Spaces:
Running
Running
| # ================== imports ================== | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import os, pickle, tempfile | |
| from sentence_transformers import SentenceTransformer, util | |
| import gdown | |
| # ================== إعدادات ================== | |
| BOOKS_FILE = "book.xlsx" | |
| THESES_FILE = "theses.xlsx" | |
| DRIVE_BOOKS_ID = "1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O" | |
| DRIVE_THESES_ID = "1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv" | |
| EMB_DIR = "embeddings" | |
| os.makedirs(EMB_DIR, exist_ok=True) | |
| # ================== تحميل نموذج Semantic ================== | |
| MODEL_NAME = "all-MiniLM-L6-v2" | |
| model = SentenceTransformer(MODEL_NAME) | |
| def build_or_load_embeddings(df, name): | |
| path = embeddings_path(name) | |
| if os.path.exists(path): | |
| try: | |
| with open(path,"rb") as f: | |
| emb = pickle.load(f) | |
| if len(emb) == len(df): | |
| return emb | |
| except Exception: | |
| pass | |
| texts = df["Title"].astype(str).tolist() | |
| emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True) | |
| with open(path,"wb") as f: | |
| pickle.dump(emb,f) | |
| return emb | |
| books_embeddings = build_or_load_embeddings(books_df,"books") | |
| theses_embeddings = build_or_load_embeddings(theses_df,"theses") | |
| # ================== تحميل من Drive ================== | |
| def download_from_drive(file_id, output): | |
| if not os.path.exists(output): | |
| url = f"https://drive.google.com/uc?id={file_id}" | |
| gdown.download(url, output, quiet=True) | |
| download_from_drive(DRIVE_BOOKS_ID, BOOKS_FILE) | |
| download_from_drive(DRIVE_THESES_ID, THESES_FILE) | |
| # ================== تحميل الملفات ================== | |
| books_df = pd.read_excel(BOOKS_FILE).fillna("") | |
| theses_df = pd.read_excel(THESES_FILE).fillna("") | |
| # إضافة نوع المصدر | |
| books_df["المصدر"] = "كتاب" | |
| theses_df["المصدر"] = "رسالة" | |
| # ================== مسار الـ embeddings ================== | |
| def emb_path(name): | |
| return os.path.join(EMB_DIR, f"{name}.pkl") | |
| # ================== بناء أو تحميل الـ embeddings ================== | |
| def build_or_load_embeddings(df, name): | |
| path = emb_path(name) | |
| if os.path.exists(path): | |
| try: | |
| with open(path,"rb") as f: | |
| emb = pickle.load(f) | |
| if len(emb) == len(df): | |
| return emb | |
| except Exception: | |
| pass | |
| # تأكد من وجود عمود Title | |
| if "Title" not in df.columns: | |
| df["Title"] = df["العنوان"] | |
| texts = df["Title"].astype(str).tolist() | |
| emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True) | |
| with open(path,"wb") as f: | |
| pickle.dump(emb,f) | |
| return emb | |
| books_embeddings = build_or_load_embeddings(books_df,"books") | |
| theses_embeddings = build_or_load_embeddings(theses_df,"theses") | |
| # ================== دمج المكتبة ================== | |
| library_df = pd.concat([books_df, theses_df], ignore_index=True) | |
| library_embeddings = np.concatenate([books_embeddings, theses_embeddings], axis=0) | |
| # ================== CSS ================== | |
| CUSTOM_CSS = """ | |
| <style> | |
| .styled-table{border-collapse:collapse;margin:15px 0;font-size:14px;width:100%;text-align:right;direction:rtl;} | |
| .styled-table th,.styled-table td{border:1px solid #ddd;padding:8px;} | |
| .styled-table tr:nth-child(even){background-color:#f9f9f9;} | |
| .styled-table tr:nth-child(odd){background-color:#fff;} | |
| .styled-table th{background-color:#4da6ff;color:white;} | |
| </style> | |
| """ | |
| # ================== عرض النتائج HTML لكل نتيجة ================== | |
| def results_to_html(df): | |
| if df.empty: | |
| return "<p>❌ لم يتم العثور على نتائج</p>" | |
| # التأكد من الأعمدة المطلوبة | |
| for col in ["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر"]: | |
| if col not in df.columns: | |
| df[col] = "-" | |
| html_results = "" | |
| for _, row in df.iterrows(): | |
| row_df = pd.DataFrame([row[["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر"]]]) | |
| html_results += row_df.to_html(index=False, escape=False, classes="styled-table", border=0) | |
| return CUSTOM_CSS + html_results | |
| # ================== البحث ================== | |
| def local_search_df(query, mode, source_filter): | |
| if not query.strip(): | |
| return "<p>⚠️ اكتب كلمة أو جملة للبحث</p>", pd.DataFrame() | |
| df_search = library_df.copy() | |
| # فلترة حسب المصدر | |
| if source_filter != "الكل": | |
| df_search = df_search[df_search["المصدر"] == source_filter] | |
| # بحث نصي | |
| if mode == "نصي": | |
| df = df_search[df_search["العنوان"].str.contains(query, case=False, na=False)] | |
| # بحث دلالي | |
| else: | |
| q_emb = model.encode([query], convert_to_numpy=True) | |
| scores = util.cos_sim(q_emb, library_embeddings)[0].cpu().numpy() | |
| df_search["score"] = scores | |
| df = df_search.sort_values("score", ascending=False) | |
| return results_to_html(df), df | |
| # ================== حفظ النتائج Excel ================== | |
| def save_to_excel(df): | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") | |
| if df is None or df.empty: | |
| pd.DataFrame().to_excel(tmp.name, index=False) | |
| else: | |
| df.to_excel(tmp.name, index=False) | |
| return tmp.name | |
| # ================== واجهة Gradio ================== | |
| IMAGE_URL = "https://drive.google.com/uc?id=1y1cbJbdXSrhkEM7bMDrAUKr0dTiHPe-y" | |
| with gr.Blocks(title="البحث الدلالي بالمكتبة") as app: | |
| gr.Markdown("## 🔍 البحث في مقتنيات المكتبة") | |
| gr.Image(IMAGE_URL, elem_id="header-image") | |
| query = gr.Textbox(label="اكتب كلمة أو موضوع البحث") | |
| mode = gr.Radio( | |
| ["نصي", "دلالي (Semantic)"], | |
| value="نصي", | |
| label="نوع البحث" | |
| ) | |
| source_filter = gr.Radio( | |
| ["الكل", "كتاب", "رسالة"], | |
| value="الكل", | |
| label="فلترة حسب المصدر" | |
| ) | |
| btn_search = gr.Button("🔎 بحث") | |
| df_state = gr.State() | |
| output_html = gr.HTML() | |
| file_out = gr.File(label="⬇️ تحميل النتائج") | |
| btn_search.click( | |
| local_search_df, | |
| inputs=[query, mode, source_filter], | |
| outputs=[output_html, df_state] | |
| ) | |
| gr.Button("📥 حفظ النتائج").click( | |
| save_to_excel, | |
| inputs=df_state, | |
| outputs=file_out | |
| ) | |
| app.launch() | |