aelsaeed commited on
Commit
9cfdb63
·
verified ·
1 Parent(s): 34a7ba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -32
app.py CHANGED
@@ -5,7 +5,6 @@ import numpy as np
5
  import os, pickle, tempfile
6
  from sentence_transformers import SentenceTransformer, util
7
  import gdown
8
- import torch
9
 
10
  # ================== إعدادات ==================
11
  BOOKS_FILE = "book.xlsx"
@@ -17,7 +16,7 @@ DRIVE_THESES_ID = "1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"
17
  EMB_DIR = "embeddings"
18
  os.makedirs(EMB_DIR, exist_ok=True)
19
 
20
- MODEL_NAME = "paraphrase-multilingual-MiniLM-L12-v2"
21
  model = SentenceTransformer(MODEL_NAME)
22
 
23
  # ================== تحميل من Drive ==================
@@ -37,26 +36,30 @@ def load_and_merge():
37
  books = pd.read_excel(BOOKS_FILE).fillna("")
38
  theses = pd.read_excel(THESES_FILE).fillna("")
39
 
 
40
  books["المصدر"] = "كتاب"
41
  theses["المصدر"] = "رسالة"
42
 
 
43
  merged = pd.concat([books, theses], ignore_index=True)
44
  return merged
45
 
46
  library_df = load_and_merge()
47
 
48
  # ================== Embeddings ==================
49
- def emb_path(name):
50
  return os.path.join(EMB_DIR, f"{name}.pkl")
51
 
52
  def build_or_load_embeddings(df, name):
53
- path = emb_path(name)
54
  if os.path.exists(path):
55
- with open(path, "rb") as f:
56
- emb = pickle.load(f)
57
- if len(emb) == len(df):
58
- return emb
59
-
 
 
60
  texts = df["العنوان"].astype(str).tolist()
61
  emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
62
  with open(path, "wb") as f:
@@ -83,42 +86,33 @@ def results_to_html(df):
83
  if df.empty:
84
  return "<p>❌ لم يتم العثور على نتائج</p>"
85
 
86
- # التأكد من وجود الأعمدة المطلوبة
87
- for col in ["المؤلف", "العنوان", "سنة النشر", "الموقع على الرف", "المصدر"]:
88
- if col not in df.columns:
89
- df[col] = "-"
90
-
91
- display_cols = ["المؤلف", "العنوان", "سنة النشر", "الموقع على الرف", "المصدر"]
92
- df_display = df[display_cols]
93
-
94
- # كل نتيجة في جدول منفصل
95
- html_tables = ""
96
- for _, row in df_display.iterrows():
97
- html_tables += "<table class='styled-table'>"
98
- for col in display_cols:
99
- html_tables += f"<tr><th>{col}</th><td>{row[col]}</td></tr>"
100
- html_tables += "</table><br>"
101
- return CUSTOM_CSS + html_tables
102
 
 
103
  def local_search_df(query, mode, source_filter):
104
  if not query.strip():
105
  return "<p>⚠️ اكتب كلمة أو جملة للبحث</p>", pd.DataFrame()
106
 
107
  df_search = library_df.copy()
108
 
 
109
  if source_filter != "الكل":
110
  df_search = df_search[df_search["المصدر"] == source_filter]
111
 
 
112
  if mode == "نصي":
113
  df = df_search[df_search["العنوان"].str.contains(query, case=False, na=False)]
 
114
  else:
115
- # التأكد من أن embeddings كلها tensors
116
- q_emb = torch.tensor(model.encode([query], convert_to_numpy=True), dtype=torch.float32)
117
- lib_emb = torch.tensor(library_embeddings, dtype=torch.float32)
118
- scores = util.cos_sim(q_emb, lib_emb)[0]
119
- df_search["score"] = scores.numpy()
120
- # ترتيب من الأعلى إلى الأقل
121
- df = df_search.sort_values("score", ascending=False).reset_index(drop=True)
122
 
123
  return results_to_html(df), df
124
 
@@ -153,6 +147,7 @@ with gr.Blocks(title="البحث الدلالي بالمكتبة") as app:
153
  )
154
 
155
  btn_search = gr.Button("🔎 بحث")
 
156
  df_state = gr.State()
157
  output_html = gr.HTML()
158
  file_out = gr.File(label="⬇️ تحميل النتائج")
 
5
  import os, pickle, tempfile
6
  from sentence_transformers import SentenceTransformer, util
7
  import gdown
 
8
 
9
  # ================== إعدادات ==================
10
  BOOKS_FILE = "book.xlsx"
 
16
  EMB_DIR = "embeddings"
17
  os.makedirs(EMB_DIR, exist_ok=True)
18
 
19
+ MODEL_NAME = "all-MiniLM-L6-v2"
20
  model = SentenceTransformer(MODEL_NAME)
21
 
22
  # ================== تحميل من Drive ==================
 
36
  books = pd.read_excel(BOOKS_FILE).fillna("")
37
  theses = pd.read_excel(THESES_FILE).fillna("")
38
 
39
+ # إضافة نوع المصدر
40
  books["المصدر"] = "كتاب"
41
  theses["المصدر"] = "رسالة"
42
 
43
+ # دمج
44
  merged = pd.concat([books, theses], ignore_index=True)
45
  return merged
46
 
47
  library_df = load_and_merge()
48
 
49
  # ================== Embeddings ==================
50
+ def embeddings_path(name):
51
  return os.path.join(EMB_DIR, f"{name}.pkl")
52
 
53
  def build_or_load_embeddings(df, name):
54
+ path = embeddings_path(name)
55
  if os.path.exists(path):
56
+ try:
57
+ with open(path, "rb") as f:
58
+ emb = pickle.load(f)
59
+ if len(emb) == len(df):
60
+ return emb
61
+ except Exception:
62
+ pass
63
  texts = df["العنوان"].astype(str).tolist()
64
  emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
65
  with open(path, "wb") as f:
 
86
  if df.empty:
87
  return "<p>❌ لم يتم العثور على نتائج</p>"
88
 
89
+ html_all = ""
90
+ for _, row in df.iterrows():
91
+ data = {col: row[col] if col in row else "-" for col in ["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر"]}
92
+ df_row = pd.DataFrame([data])
93
+ html_all += df_row.to_html(index=False, escape=False, classes="styled-table", border=0) + "<br>"
94
+ return CUSTOM_CSS + html_all
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # ================== البحث ==================
97
  def local_search_df(query, mode, source_filter):
98
  if not query.strip():
99
  return "<p>⚠️ اكتب كلمة أو جملة للبحث</p>", pd.DataFrame()
100
 
101
  df_search = library_df.copy()
102
 
103
+ # فلترة حسب المصدر
104
  if source_filter != "الكل":
105
  df_search = df_search[df_search["المصدر"] == source_filter]
106
 
107
+ # بحث نصي
108
  if mode == "نصي":
109
  df = df_search[df_search["العنوان"].str.contains(query, case=False, na=False)]
110
+ # بحث دلالي
111
  else:
112
+ q_emb = model.encode([query], convert_to_numpy=True)
113
+ scores = util.cos_sim(q_emb, library_embeddings)[0].cpu().numpy()
114
+ df_search["score"] = scores
115
+ df = df_search.sort_values("score", ascending=False)
 
 
 
116
 
117
  return results_to_html(df), df
118
 
 
147
  )
148
 
149
  btn_search = gr.Button("🔎 بحث")
150
+
151
  df_state = gr.State()
152
  output_html = gr.HTML()
153
  file_out = gr.File(label="⬇️ تحميل النتائج")