File size: 6,547 Bytes
cb7e269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be62a9f
 
cb7e269
 
75d8388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb7e269
 
 
 
3eedb01
cb7e269
 
 
 
be62a9f
 
 
cb7e269
be62a9f
 
 
cb7e269
be62a9f
7c9bc21
cb7e269
 
be62a9f
cb7e269
7c9bc21
cb7e269
9cfdb63
7c9bc21
9cfdb63
 
 
 
 
be62a9f
 
 
 
cb7e269
7c9bc21
 
cb7e269
 
be62a9f
 
 
 
 
 
cb7e269
acf219d
 
 
 
 
 
 
 
 
 
 
be62a9f
cb7e269
 
 
 
be62a9f
 
7c9bc21
 
 
 
9cfdb63
be62a9f
 
7c9bc21
cb7e269
9cfdb63
cb7e269
3eedb01
cb7e269
 
 
 
be62a9f
cb7e269
 
 
be62a9f
cb7e269
3eedb01
be62a9f
cb7e269
9cfdb63
 
 
be62a9f
cb7e269
 
 
 
 
 
 
 
 
 
 
 
be62a9f
3eedb01
cb7e269
3eedb01
 
 
cb7e269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be62a9f
cb7e269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# ================== imports ==================
import gradio as gr
import pandas as pd
import numpy as np
import os, pickle, tempfile
from sentence_transformers import SentenceTransformer, util
import gdown

# ================== إعدادات ==================
BOOKS_FILE = "book.xlsx"
THESES_FILE = "theses.xlsx"

DRIVE_BOOKS_ID = "1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O"
DRIVE_THESES_ID = "1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"

EMB_DIR = "embeddings"
os.makedirs(EMB_DIR, exist_ok=True)

# ================== تحميل نموذج Semantic ==================
MODEL_NAME = "all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)

def build_or_load_embeddings(df, name):
    path = embeddings_path(name)
    if os.path.exists(path):
        try:
            with open(path,"rb") as f:
                emb = pickle.load(f)
            if len(emb) == len(df):
                return emb
        except Exception:
            pass
    texts = df["Title"].astype(str).tolist()
    emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
    with open(path,"wb") as f:
        pickle.dump(emb,f)
    return emb

books_embeddings = build_or_load_embeddings(books_df,"books")
theses_embeddings = build_or_load_embeddings(theses_df,"theses")
# ================== تحميل من Drive ==================
def download_from_drive(file_id, output):
    if not os.path.exists(output):
        url = f"https://drive.google.com/uc?id={file_id}"
        gdown.download(url, output, quiet=True)

download_from_drive(DRIVE_BOOKS_ID, BOOKS_FILE)
download_from_drive(DRIVE_THESES_ID, THESES_FILE)

# ================== تحميل الملفات ==================
books_df = pd.read_excel(BOOKS_FILE).fillna("")
theses_df = pd.read_excel(THESES_FILE).fillna("")

# إضافة نوع المصدر
books_df["المصدر"] = "كتاب"
theses_df["المصدر"] = "رسالة"

# ================== مسار الـ embeddings ==================
def emb_path(name):
    return os.path.join(EMB_DIR, f"{name}.pkl")

# ================== بناء أو تحميل الـ embeddings ==================
def build_or_load_embeddings(df, name):
    path = emb_path(name)
    if os.path.exists(path):
        try:
            with open(path,"rb") as f:
                emb = pickle.load(f)
            if len(emb) == len(df):
                return emb
        except Exception:
            pass
    # تأكد من وجود عمود Title
    if "Title" not in df.columns:
        df["Title"] = df["العنوان"]
    texts = df["Title"].astype(str).tolist()
    emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
    with open(path,"wb") as f:
        pickle.dump(emb,f)
    return emb

books_embeddings = build_or_load_embeddings(books_df,"books")
theses_embeddings = build_or_load_embeddings(theses_df,"theses")

# ================== دمج المكتبة ==================
library_df = pd.concat([books_df, theses_df], ignore_index=True)
library_embeddings = np.concatenate([books_embeddings, theses_embeddings], axis=0)

# ================== CSS ==================
CUSTOM_CSS = """
<style>
.styled-table{border-collapse:collapse;margin:15px 0;font-size:14px;width:100%;text-align:right;direction:rtl;}
.styled-table th,.styled-table td{border:1px solid #ddd;padding:8px;}
.styled-table tr:nth-child(even){background-color:#f9f9f9;}
.styled-table tr:nth-child(odd){background-color:#fff;}
.styled-table th{background-color:#4da6ff;color:white;}
</style>
"""

# ================== عرض النتائج HTML لكل نتيجة ==================
def results_to_html(df):
    if df.empty:
        return "<p>❌ لم يتم العثور على نتائج</p>"

    # التأكد من الأعمدة المطلوبة
    for col in ["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر"]:
        if col not in df.columns:
            df[col] = "-"

    html_results = ""
    for _, row in df.iterrows():
        row_df = pd.DataFrame([row[["المؤلف","العنوان","سنة النشر","الموقع على الرف","المصدر"]]])
        html_results += row_df.to_html(index=False, escape=False, classes="styled-table", border=0)
    return CUSTOM_CSS + html_results

# ================== البحث ==================
def local_search_df(query, mode, source_filter):
    if not query.strip():
        return "<p>⚠️ اكتب كلمة أو جملة للبحث</p>", pd.DataFrame()

    df_search = library_df.copy()

    # فلترة حسب المصدر
    if source_filter != "الكل":
        df_search = df_search[df_search["المصدر"] == source_filter]

    # بحث نصي
    if mode == "نصي":
        df = df_search[df_search["العنوان"].str.contains(query, case=False, na=False)]
    # بحث دلالي
    else:
        q_emb = model.encode([query], convert_to_numpy=True)
        scores = util.cos_sim(q_emb, library_embeddings)[0].cpu().numpy()
        df_search["score"] = scores
        df = df_search.sort_values("score", ascending=False)

    return results_to_html(df), df

# ================== حفظ النتائج Excel ==================
def save_to_excel(df):
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
    if df is None or df.empty:
        pd.DataFrame().to_excel(tmp.name, index=False)
    else:
        df.to_excel(tmp.name, index=False)
    return tmp.name

# ================== واجهة Gradio ==================
IMAGE_URL = "https://drive.google.com/uc?id=1y1cbJbdXSrhkEM7bMDrAUKr0dTiHPe-y"

with gr.Blocks(title="البحث الدلالي بالمكتبة") as app:
    gr.Markdown("## 🔍 البحث في مقتنيات المكتبة")
    gr.Image(IMAGE_URL, elem_id="header-image")

    query = gr.Textbox(label="اكتب كلمة أو موضوع البحث")

    mode = gr.Radio(
        ["نصي", "دلالي (Semantic)"],
        value="نصي",
        label="نوع البحث"
    )

    source_filter = gr.Radio(
        ["الكل", "كتاب", "رسالة"],
        value="الكل",
        label="فلترة حسب المصدر"
    )

    btn_search = gr.Button("🔎 بحث")

    df_state = gr.State()
    output_html = gr.HTML()
    file_out = gr.File(label="⬇️ تحميل النتائج")

    btn_search.click(
        local_search_df,
        inputs=[query, mode, source_filter],
        outputs=[output_html, df_state]
    )

    gr.Button("📥 حفظ النتائج").click(
        save_to_excel,
        inputs=df_state,
        outputs=file_out
    )

app.launch()