Hothaifa commited on
Commit
4935ebe
·
verified ·
1 Parent(s): d208c4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -25
app.py CHANGED
@@ -6,20 +6,9 @@ from dotenv import load_dotenv
6
  from typing import Optional
7
  import os
8
 
9
- # اجبار gdown يستخدم /tmp (المكان الوحيد المضمون بالكتابة في HuggingFace)
10
- cache_dir = "/tmp/gdown_cache"
11
- os.makedirs(cache_dir, exist_ok=True)
12
-
13
- gdown_cache = os.path.join(cache_dir, "gdown")
14
- os.makedirs(gdown_cache, exist_ok=True)
15
-
16
- cookie_file = os.path.join(gdown_cache, "cookies.txt")
17
- if not os.path.exists(cookie_file):
18
- with open(cookie_file, "w") as f:
19
- f.write("")
20
-
21
- os.environ["GDOWN_CACHE"] = cache_dir
22
- os.environ["XDG_CACHE_HOME"] = cache_dir
23
  # تحميل متغيرات البيئة
24
  load_dotenv()
25
 
@@ -151,10 +140,10 @@ def looks_religious_answer(text: str) -> bool:
151
  print("[SERVER-INFO] بدء تحميل الأصول...")
152
  DATA_FILE_ID = "1GMG6fVxhUuBEAHP91c8RAUdUJh5TxY5O"
153
  EMBEDDINGS_FILE_ID = "1MCIJ4zZRfTC9ZEy-CLvcvNbRdjTFnw5q"
154
- data_path = 'cleaned_fatwas_v2.csv'
155
- embeddings_path = 'questions_embeddings_arabert.npy'
156
- learned_data_path = 'learned_fatwas.csv'
157
- FEEDBACK_FILE = "feedback.csv"
158
 
159
  TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "TOKEN")
160
  TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "CHAT_ID")
@@ -256,11 +245,15 @@ def google_search_fatwa(query: str):
256
  # ===================== 9) FastAPI app (واحد) =====================
257
  app = FastAPI(title="Hajeen Islamic QA API (Fatwas + Hadith)")
258
  def safe_download(file_id, output_path):
259
- try:
260
- gdown.download(id=file_id, output=output_path, quiet=False)
 
 
261
  print(f"[SAFE-DOWNLOAD] تم تنزيل {output_path}")
262
  except Exception as e:
263
- print(f"[SAFE-DOWNLOAD-ERROR] {e}")
 
 
264
  @app.on_event("startup")
265
  async def startup_event():
266
  global df_main, df_learned, question_embeddings, index, tokenizer, model
@@ -378,10 +371,10 @@ ID_MUSLIM = os.environ.get("ID_MUSLIM")
378
  ID_MUSNAD = os.environ.get("ID_MUSNAD")
379
 
380
  PATHS = {
381
- "bukhari": "sahih_bukhari_clean.csv",
382
- "muslim": "sahih_muslim_clean.csv",
383
- "musnad": "musnad_ahmed_clean.csv",
384
- }
385
 
386
  # --- تطبيع عربي (أحاديث) ---
387
  def normalize_ar(s: str) -> str:
 
6
  from typing import Optional
7
  import os
8
 
9
+ # أضف هذا السطر
10
+ DATA_DIR = "data"
11
+ os.makedirs(DATA_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
12
  # تحميل متغيرات البيئة
13
  load_dotenv()
14
 
 
140
  print("[SERVER-INFO] بدء تحميل الأصول...")
141
  DATA_FILE_ID = "1GMG6fVxhUuBEAHP91c8RAUdUJh5TxY5O"
142
  EMBEDDINGS_FILE_ID = "1MCIJ4zZRfTC9ZEy-CLvcvNbRdjTFnw5q"
143
+ data_path = os.path.join(DATA_DIR, 'cleaned_fatwas_v2.csv')
144
+ embeddings_path = os.path.join(DATA_DIR, 'questions_embeddings_arabert.npy')
145
+ learned_data_path = os.path.join(DATA_DIR, 'learned_fatwas.csv')
146
+ FEEDBACK_FILE = os.path.join(DATA_DIR, "feedback.csv")
147
 
148
  TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "TOKEN")
149
  TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "CHAT_ID")
 
245
  # ===================== 9) FastAPI app (واحد) =====================
246
  app = FastAPI(title="Hajeen Islamic QA API (Fatwas + Hadith)")
247
  def safe_download(file_id, output_path):
248
+ try:
249
+ # نحدد مجلد الكاش هنا ليكون داخل مجلد البيانات الرئيسي
250
+ cache_folder = os.path.join(DATA_DIR, "gdown_cache")
251
+ gdown.download(id=file_id, output=output_path, quiet=False, cache=cache_folder)
252
  print(f"[SAFE-DOWNLOAD] تم تنزيل {output_path}")
253
  except Exception as e:
254
+ print(f"[SAFE-DOWNLOAD-ERROR] {e}")
255
+ # من الأفضل إيقاف التطبيق إذا فشل تنزيل ملف أساسي
256
+ raise e
257
  @app.on_event("startup")
258
  async def startup_event():
259
  global df_main, df_learned, question_embeddings, index, tokenizer, model
 
371
  ID_MUSNAD = os.environ.get("ID_MUSNAD")
372
 
373
  PATHS = {
374
+ "bukhari": os.path.join(DATA_DIR, "sahih_bukhari_clean.csv"),
375
+ "muslim": os.path.join(DATA_DIR, "sahih_muslim_clean.csv"),
376
+ "musnad": os.path.join(DATA_DIR, "musnad_ahmed_clean.csv"),
377
+ }
378
 
379
  # --- تطبيع عربي (أحاديث) ---
380
  def normalize_ar(s: str) -> str: