Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,20 +6,9 @@ from dotenv import load_dotenv
|
|
| 6 |
from typing import Optional
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
os.makedirs(
|
| 12 |
-
|
| 13 |
-
gdown_cache = os.path.join(cache_dir, "gdown")
|
| 14 |
-
os.makedirs(gdown_cache, exist_ok=True)
|
| 15 |
-
|
| 16 |
-
cookie_file = os.path.join(gdown_cache, "cookies.txt")
|
| 17 |
-
if not os.path.exists(cookie_file):
|
| 18 |
-
with open(cookie_file, "w") as f:
|
| 19 |
-
f.write("")
|
| 20 |
-
|
| 21 |
-
os.environ["GDOWN_CACHE"] = cache_dir
|
| 22 |
-
os.environ["XDG_CACHE_HOME"] = cache_dir
|
| 23 |
# تحميل متغيرات البيئة
|
| 24 |
load_dotenv()
|
| 25 |
|
|
@@ -151,10 +140,10 @@ def looks_religious_answer(text: str) -> bool:
|
|
| 151 |
print("[SERVER-INFO] بدء تحميل الأصول...")
|
| 152 |
DATA_FILE_ID = "1GMG6fVxhUuBEAHP91c8RAUdUJh5TxY5O"
|
| 153 |
EMBEDDINGS_FILE_ID = "1MCIJ4zZRfTC9ZEy-CLvcvNbRdjTFnw5q"
|
| 154 |
-
data_path = 'cleaned_fatwas_v2.csv'
|
| 155 |
-
embeddings_path = 'questions_embeddings_arabert.npy'
|
| 156 |
-
learned_data_path = 'learned_fatwas.csv'
|
| 157 |
-
FEEDBACK_FILE = "feedback.csv"
|
| 158 |
|
| 159 |
TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "TOKEN")
|
| 160 |
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "CHAT_ID")
|
|
@@ -256,11 +245,15 @@ def google_search_fatwa(query: str):
|
|
| 256 |
# ===================== 9) FastAPI app (واحد) =====================
|
| 257 |
app = FastAPI(title="Hajeen Islamic QA API (Fatwas + Hadith)")
|
| 258 |
def safe_download(file_id, output_path):
|
| 259 |
-
try:
|
| 260 |
-
|
|
|
|
|
|
|
| 261 |
print(f"[SAFE-DOWNLOAD] تم تنزيل {output_path}")
|
| 262 |
except Exception as e:
|
| 263 |
-
print(f"[SAFE-DOWNLOAD-ERROR] {e}")
|
|
|
|
|
|
|
| 264 |
@app.on_event("startup")
|
| 265 |
async def startup_event():
|
| 266 |
global df_main, df_learned, question_embeddings, index, tokenizer, model
|
|
@@ -378,10 +371,10 @@ ID_MUSLIM = os.environ.get("ID_MUSLIM")
|
|
| 378 |
ID_MUSNAD = os.environ.get("ID_MUSNAD")
|
| 379 |
|
| 380 |
PATHS = {
|
| 381 |
-
"bukhari": "sahih_bukhari_clean.csv",
|
| 382 |
-
"muslim": "sahih_muslim_clean.csv",
|
| 383 |
-
"musnad": "musnad_ahmed_clean.csv",
|
| 384 |
-
}
|
| 385 |
|
| 386 |
# --- تطبيع عربي (أحاديث) ---
|
| 387 |
def normalize_ar(s: str) -> str:
|
|
|
|
| 6 |
from typing import Optional
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
# أضف هذا السطر
|
| 10 |
+
DATA_DIR = "data"
|
| 11 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# تحميل متغيرات البيئة
|
| 13 |
load_dotenv()
|
| 14 |
|
|
|
|
| 140 |
print("[SERVER-INFO] بدء تحميل الأصول...")
|
| 141 |
DATA_FILE_ID = "1GMG6fVxhUuBEAHP91c8RAUdUJh5TxY5O"
|
| 142 |
EMBEDDINGS_FILE_ID = "1MCIJ4zZRfTC9ZEy-CLvcvNbRdjTFnw5q"
|
| 143 |
+
data_path = os.path.join(DATA_DIR, 'cleaned_fatwas_v2.csv')
|
| 144 |
+
embeddings_path = os.path.join(DATA_DIR, 'questions_embeddings_arabert.npy')
|
| 145 |
+
learned_data_path = os.path.join(DATA_DIR, 'learned_fatwas.csv')
|
| 146 |
+
FEEDBACK_FILE = os.path.join(DATA_DIR, "feedback.csv")
|
| 147 |
|
| 148 |
TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "TOKEN")
|
| 149 |
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "CHAT_ID")
|
|
|
|
| 245 |
# ===================== 9) FastAPI app (واحد) =====================
|
| 246 |
app = FastAPI(title="Hajeen Islamic QA API (Fatwas + Hadith)")
|
| 247 |
def safe_download(file_id, output_path):
|
| 248 |
+
try:
|
| 249 |
+
# نحدد مجلد الكاش هنا ليكون داخل مجلد البيانات الرئيسي
|
| 250 |
+
cache_folder = os.path.join(DATA_DIR, "gdown_cache")
|
| 251 |
+
gdown.download(id=file_id, output=output_path, quiet=False, cache=cache_folder)
|
| 252 |
print(f"[SAFE-DOWNLOAD] تم تنزيل {output_path}")
|
| 253 |
except Exception as e:
|
| 254 |
+
print(f"[SAFE-DOWNLOAD-ERROR] {e}")
|
| 255 |
+
# من الأفضل إيقاف التطبيق إذا فشل تنزيل ملف أساسي
|
| 256 |
+
raise e
|
| 257 |
@app.on_event("startup")
|
| 258 |
async def startup_event():
|
| 259 |
global df_main, df_learned, question_embeddings, index, tokenizer, model
|
|
|
|
| 371 |
ID_MUSNAD = os.environ.get("ID_MUSNAD")
|
| 372 |
|
| 373 |
PATHS = {
|
| 374 |
+
"bukhari": os.path.join(DATA_DIR, "sahih_bukhari_clean.csv"),
|
| 375 |
+
"muslim": os.path.join(DATA_DIR, "sahih_muslim_clean.csv"),
|
| 376 |
+
"musnad": os.path.join(DATA_DIR, "musnad_ahmed_clean.csv"),
|
| 377 |
+
}
|
| 378 |
|
| 379 |
# --- تطبيع عربي (أحاديث) ---
|
| 380 |
def normalize_ar(s: str) -> str:
|