aelsaeed commited on
Commit
7c9bc21
·
verified ·
1 Parent(s): 9cfdb63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -16,7 +16,7 @@ DRIVE_THESES_ID = "1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"
16
  EMB_DIR = "embeddings"
17
  os.makedirs(EMB_DIR, exist_ok=True)
18
 
19
- MODEL_NAME = "all-MiniLM-L6-v2"
20
  model = SentenceTransformer(MODEL_NAME)
21
 
22
  # ================== تحميل من Drive ==================
@@ -36,25 +36,23 @@ def load_and_merge():
36
  books = pd.read_excel(BOOKS_FILE).fillna("")
37
  theses = pd.read_excel(THESES_FILE).fillna("")
38
 
39
- # إضافة نوع المصدر
40
  books["المصدر"] = "كتاب"
41
  theses["المصدر"] = "رسالة"
42
 
43
- # دمج
44
  merged = pd.concat([books, theses], ignore_index=True)
45
  return merged
46
 
47
  library_df = load_and_merge()
48
 
49
  # ================== Embeddings ==================
50
- def embeddings_path(name):
51
  return os.path.join(EMB_DIR, f"{name}.pkl")
52
 
53
  def build_or_load_embeddings(df, name):
54
- path = embeddings_path(name)
55
  if os.path.exists(path):
56
  try:
57
- with open(path, "rb") as f:
58
  emb = pickle.load(f)
59
  if len(emb) == len(df):
60
  return emb
@@ -62,8 +60,8 @@ def build_or_load_embeddings(df, name):
62
  pass
63
  texts = df["العنوان"].astype(str).tolist()
64
  emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
65
- with open(path, "wb") as f:
66
- pickle.dump(emb, f)
67
  return emb
68
 
69
  library_embeddings = build_or_load_embeddings(library_df, "library")
@@ -86,12 +84,16 @@ def results_to_html(df):
86
  if df.empty:
87
  return "<p>❌ لم يتم العثور على نتائج</p>"
88
 
89
- html_all = ""
 
 
 
 
90
  for _, row in df.iterrows():
91
- data = {col: row[col] if col in row else "-" for col in ["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر"]}
92
- df_row = pd.DataFrame([data])
93
- html_all += df_row.to_html(index=False, escape=False, classes="styled-table", border=0) + "<br>"
94
- return CUSTOM_CSS + html_all
95
 
96
  # ================== البحث ==================
97
  def local_search_df(query, mode, source_filter):
@@ -100,19 +102,16 @@ def local_search_df(query, mode, source_filter):
100
 
101
  df_search = library_df.copy()
102
 
103
- # فلترة حسب المصدر
104
  if source_filter != "الكل":
105
  df_search = df_search[df_search["المصدر"] == source_filter]
106
 
107
- # بحث نصي
108
  if mode == "نصي":
109
  df = df_search[df_search["العنوان"].str.contains(query, case=False, na=False)]
110
- # بحث دلالي
111
  else:
112
  q_emb = model.encode([query], convert_to_numpy=True)
113
  scores = util.cos_sim(q_emb, library_embeddings)[0].cpu().numpy()
114
  df_search["score"] = scores
115
- df = df_search.sort_values("score", ascending=False)
116
 
117
  return results_to_html(df), df
118
 
@@ -147,7 +146,6 @@ with gr.Blocks(title="البحث الدلالي بالمكتبة") as app:
147
  )
148
 
149
  btn_search = gr.Button("🔎 بحث")
150
-
151
  df_state = gr.State()
152
  output_html = gr.HTML()
153
  file_out = gr.File(label="⬇️ تحميل النتائج")
 
16
  EMB_DIR = "embeddings"
17
  os.makedirs(EMB_DIR, exist_ok=True)
18
 
19
+ MODEL_NAME = "all-MiniLM-L6-v2" # نموذج أخف وأسرع
20
  model = SentenceTransformer(MODEL_NAME)
21
 
22
  # ================== تحميل من Drive ==================
 
36
  books = pd.read_excel(BOOKS_FILE).fillna("")
37
  theses = pd.read_excel(THESES_FILE).fillna("")
38
 
 
39
  books["المصدر"] = "كتاب"
40
  theses["المصدر"] = "رسالة"
41
 
 
42
  merged = pd.concat([books, theses], ignore_index=True)
43
  return merged
44
 
45
  library_df = load_and_merge()
46
 
47
  # ================== Embeddings ==================
48
+ def emb_path(name):
49
  return os.path.join(EMB_DIR, f"{name}.pkl")
50
 
51
  def build_or_load_embeddings(df, name):
52
+ path = emb_path(name)
53
  if os.path.exists(path):
54
  try:
55
+ with open(path,"rb") as f:
56
  emb = pickle.load(f)
57
  if len(emb) == len(df):
58
  return emb
 
60
  pass
61
  texts = df["العنوان"].astype(str).tolist()
62
  emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
63
+ with open(path,"wb") as f:
64
+ pickle.dump(emb,f)
65
  return emb
66
 
67
  library_embeddings = build_or_load_embeddings(library_df, "library")
 
84
  if df.empty:
85
  return "<p>❌ لم يتم العثور على نتائج</p>"
86
 
87
+ for col in ["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر","score"]:
88
+ if col not in df.columns:
89
+ df[col] = "-"
90
+
91
+ html_results = ""
92
  for _, row in df.iterrows():
93
+ single_df = pd.DataFrame([row[["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر","score"]]])
94
+ html_results += single_df.to_html(index=False, escape=False, classes="styled-table", border=0)
95
+
96
+ return CUSTOM_CSS + html_results
97
 
98
  # ================== البحث ==================
99
  def local_search_df(query, mode, source_filter):
 
102
 
103
  df_search = library_df.copy()
104
 
 
105
  if source_filter != "الكل":
106
  df_search = df_search[df_search["المصدر"] == source_filter]
107
 
 
108
  if mode == "نصي":
109
  df = df_search[df_search["العنوان"].str.contains(query, case=False, na=False)]
 
110
  else:
111
  q_emb = model.encode([query], convert_to_numpy=True)
112
  scores = util.cos_sim(q_emb, library_embeddings)[0].cpu().numpy()
113
  df_search["score"] = scores
114
+ df = df_search.sort_values("score", ascending=False).head(20) # أعلى 20 نتيجة
115
 
116
  return results_to_html(df), df
117
 
 
146
  )
147
 
148
  btn_search = gr.Button("🔎 بحث")
 
149
  df_state = gr.State()
150
  output_html = gr.HTML()
151
  file_out = gr.File(label="⬇️ تحميل النتائج")