Spaces:

UcsTurkey
/

mistral7b

Paused

App Files Files Community

ciyidogan commited on May 22

Commit

0b8c8b5

verified ·

1 Parent(s): d5ed5d0

Update inference_test_turkcell_with_intents.py

Browse files

Files changed (1) hide show

inference_test_turkcell_with_intents.py +48 -45

inference_test_turkcell_with_intents.py CHANGED Viewed

@@ -6,10 +6,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequen
 from peft import PeftModel
 from datasets import Dataset
 from datetime import datetime
-import faiss
-import numpy as np
-import pandas as pd
-from sentence_transformers import SentenceTransformer
 # === Ortam
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -23,10 +19,6 @@ USE_FINE_TUNE = False
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 FINE_TUNE_ZIP = "trained_model_000_009.zip"
 USE_SAMPLING = False
-USE_RAG = True
-RAG_INDEX_PATH = "/app/faiss/faiss_index_000_100.index"
-RAG_METADATA_PATH = "/app/faiss/faiss_index_000_100_metadata.parquet"
-RAG_EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
 INTENT_CONFIDENCE_THRESHOLD = 0.5
 LLM_CONFIDENCE_THRESHOLD = 0.2
 TRAIN_CONFIDENCE_THRESHOLD = 0.7
@@ -36,22 +28,19 @@ FALLBACK_ANSWERS = [
     "Bu soruya şu an yanıt veremiyorum."
 ]
-# === Global Değişkenler
 INTENT_MODEL_PATH = "intent_model"
 INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
 INTENT_MODEL = None
 INTENT_TOKENIZER = None
 LABEL2ID = {}
 INTENT_DEFINITIONS = {}
-model = None
-tokenizer = None
-eos_token_id = None
-faiss_index = None
-rag_metadata = None
-rag_embedder = None
 # === FastAPI
 app = FastAPI()
 class Message(BaseModel):
     user_input: str
@@ -105,6 +94,7 @@ def background_training(intents):
             for ex in intent["examples"]:
                 texts.append(ex)
                 labels.append(idx)
         dataset = Dataset.from_dict({"text": texts, "label": labels})
         tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
@@ -118,6 +108,7 @@ def background_training(intents):
             tokenized_data["input_ids"].append(out["input_ids"])
             tokenized_data["attention_mask"].append(out["attention_mask"])
             tokenized_data["label"].append(row["label"])
         tokenized = Dataset.from_dict(tokenized_data)
         tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
@@ -131,6 +122,7 @@ def background_training(intents):
         )
         trainer.train()
         log("🔧 Başarı raporu üretiliyor...")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
@@ -142,7 +134,8 @@ def background_training(intents):
             predictions = outputs.logits.argmax(dim=-1).tolist()
         actuals = tokenized["label"]
-        counts, correct = {}, {}
         for pred, actual in zip(predictions, actuals):
             intent = list(label2id.keys())[list(label2id.values()).index(actual)]
             counts[intent] = counts.get(intent, 0) + 1
@@ -154,13 +147,16 @@ def background_training(intents):
             if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
                 log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
         if os.path.exists(INTENT_MODEL_PATH):
             shutil.rmtree(INTENT_MODEL_PATH)
         model.save_pretrained(INTENT_MODEL_PATH)
         tokenizer.save_pretrained(INTENT_MODEL_PATH)
         with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
             json.dump(label2id, f)
         log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
     except Exception as e:
         log(f"❌ Intent eğitimi hatası: {e}")
         traceback.print_exc()
@@ -191,6 +187,7 @@ async def generate_response(text):
     eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
     input_ids = encodeds.to(model.device)
     attention_mask = (input_ids != tokenizer.pad_token_id).long()
     with torch.no_grad():
         output = model.generate(
             input_ids=input_ids,
@@ -202,11 +199,14 @@ async def generate_response(text):
             return_dict_in_generate=True,
             output_scores=True
         )
-    top_conf = None
     if not USE_SAMPLING:
         scores = torch.stack(output.scores, dim=1)
         probs = torch.nn.functional.softmax(scores[0], dim=-1)
         top_conf = probs.max().item()
     decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
     for tag in ["assistant", "<|im_start|>assistant"]:
         start = decoded.find(tag)
@@ -215,40 +215,45 @@ async def generate_response(text):
             break
     return decoded, top_conf
-def search_rag(query, top_k=1):
-    if faiss_index is None or rag_metadata is None:
-        return None
-    emb = rag_embedder.encode([query], convert_to_numpy=True)
-    D, I = faiss_index.search(emb, top_k)
-    results = rag_metadata.iloc[I[0]]
-    return results.iloc[0]["output"] if not results.empty else None
 @app.post("/chat")
 async def chat(msg: Message):
     user_input = msg.user_input.strip()
     try:
         if INTENT_MODEL:
             intent_task = asyncio.create_task(detect_intent(user_input))
             response_task = asyncio.create_task(generate_response(user_input))
             intent, intent_conf = await intent_task
             log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
             if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
-                return execute_intent(intent, user_input)
-            response, response_conf = await response_task
-            if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
-                if USE_RAG:
-                    rag_result = search_rag(user_input)
-                    if rag_result:
-                        return {"response": rag_result}
-                return {"response": random.choice(FALLBACK_ANSWERS)}
-            return {"response": response}
         else:
             response, response_conf = await generate_response(user_input)
             if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
-                if USE_RAG:
-                    rag_result = search_rag(user_input)
-                    if rag_result:
-                        return {"response": rag_result}
                 return {"response": random.choice(FALLBACK_ANSWERS)}
             return {"response": response}
     except Exception as e:
@@ -260,27 +265,25 @@ def log(message):
     print(f"[{timestamp}] {message}", flush=True)
 def setup_model():
-    global model, tokenizer, eos_token_id, faiss_index, rag_metadata, rag_embedder
     try:
         log("🧠 setup_model() başladı")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         log(f"📡 Kullanılan cihaz: {device}")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
         model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
         tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
         model.config.pad_token_id = tokenizer.pad_token_id
         eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
         model.eval()
-        log("✅ Ana model yüklendi ve hazır.")
         _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
         log("✅ Intent modeli önbelleğe alındı.")
-        if USE_RAG:
-            log("📥 FAISS index yükleniyor...")
-            faiss_index = faiss.read_index(RAG_INDEX_PATH)
-            rag_metadata = pd.read_parquet(RAG_METADATA_PATH)
-            rag_embedder = SentenceTransformer(RAG_EMBEDDING_MODEL_NAME)
-            log("✅ FAISS index ve metadata yüklendi.")
     except Exception as e:
         log(f"❌ setup_model() hatası: {e}")
         traceback.print_exc()

 from peft import PeftModel
 from datasets import Dataset
 from datetime import datetime
 # === Ortam
 HF_TOKEN = os.getenv("HF_TOKEN")
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 FINE_TUNE_ZIP = "trained_model_000_009.zip"
 USE_SAMPLING = False
 INTENT_CONFIDENCE_THRESHOLD = 0.5
 LLM_CONFIDENCE_THRESHOLD = 0.2
 TRAIN_CONFIDENCE_THRESHOLD = 0.7
     "Bu soruya şu an yanıt veremiyorum."
 ]
 INTENT_MODEL_PATH = "intent_model"
 INTENT_MODEL_ID = "dbmdz/bert-base-turkish-cased"
 INTENT_MODEL = None
 INTENT_TOKENIZER = None
 LABEL2ID = {}
 INTENT_DEFINITIONS = {}
 # === FastAPI
 app = FastAPI()
+chat_history = []
+model = None
+tokenizer = None
+eos_token_id = None
 class Message(BaseModel):
     user_input: str
             for ex in intent["examples"]:
                 texts.append(ex)
                 labels.append(idx)
         dataset = Dataset.from_dict({"text": texts, "label": labels})
         tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         config = AutoConfig.from_pretrained(INTENT_MODEL_ID)
             tokenized_data["input_ids"].append(out["input_ids"])
             tokenized_data["attention_mask"].append(out["attention_mask"])
             tokenized_data["label"].append(row["label"])
         tokenized = Dataset.from_dict(tokenized_data)
         tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
         )
         trainer.train()
+        # ✅ Başarı raporu üret
         log("🔧 Başarı raporu üretiliyor...")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
             predictions = outputs.logits.argmax(dim=-1).tolist()
         actuals = tokenized["label"]
+        counts = {}
+        correct = {}
         for pred, actual in zip(predictions, actuals):
             intent = list(label2id.keys())[list(label2id.values()).index(actual)]
             counts[intent] = counts.get(intent, 0) + 1
             if accuracy < TRAIN_CONFIDENCE_THRESHOLD or total < 5:
                 log(f"⚠️ Yetersiz performanslı intent: '{intent}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
+        log("📦 Intent modeli eğitimi kaydediliyor...")
         if os.path.exists(INTENT_MODEL_PATH):
             shutil.rmtree(INTENT_MODEL_PATH)
         model.save_pretrained(INTENT_MODEL_PATH)
         tokenizer.save_pretrained(INTENT_MODEL_PATH)
         with open(os.path.join(INTENT_MODEL_PATH, "label2id.json"), "w") as f:
             json.dump(label2id, f)
         log("✅ Intent eğitimi tamamlandı ve model kaydedildi.")
     except Exception as e:
         log(f"❌ Intent eğitimi hatası: {e}")
         traceback.print_exc()
     eos_token = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
     input_ids = encodeds.to(model.device)
     attention_mask = (input_ids != tokenizer.pad_token_id).long()
     with torch.no_grad():
         output = model.generate(
             input_ids=input_ids,
             return_dict_in_generate=True,
             output_scores=True
         )
     if not USE_SAMPLING:
         scores = torch.stack(output.scores, dim=1)
         probs = torch.nn.functional.softmax(scores[0], dim=-1)
         top_conf = probs.max().item()
+    else:
+        top_conf = None
     decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True).strip()
     for tag in ["assistant", "<|im_start|>assistant"]:
         start = decoded.find(tag)
             break
     return decoded, top_conf
+def extract_parameters(variables_list, user_input):
+    for pattern in variables_list:
+        regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
+        match = re.match(regex, user_input)
+        if match:
+            return [{"key": k, "value": v} for k, v in match.groupdict().items()]
+    return []
+def execute_intent(intent_name, user_input):
+    if intent_name in INTENT_DEFINITIONS:
+        definition = INTENT_DEFINITIONS[intent_name]
+        variables = extract_parameters(definition.get("variables", []), user_input)
+        log(f"🚀 execute_intent('{intent_name}', {variables})")
+        return {"intent": intent_name, "parameters": variables}
+    return {"intent": intent_name, "parameters": []}
 @app.post("/chat")
 async def chat(msg: Message):
     user_input = msg.user_input.strip()
     try:
+        if model is None or tokenizer is None:
+            return {"error": "Model yüklenmedi."}
         if INTENT_MODEL:
             intent_task = asyncio.create_task(detect_intent(user_input))
             response_task = asyncio.create_task(generate_response(user_input))
             intent, intent_conf = await intent_task
             log(f"🎯 Intent: {intent} (conf={intent_conf:.2f})")
             if intent_conf > INTENT_CONFIDENCE_THRESHOLD and intent in INTENT_DEFINITIONS:
+                result = execute_intent(intent, user_input)
+                return result
+            else:
+                response, response_conf = await response_task
+                if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
+                    return {"response": random.choice(FALLBACK_ANSWERS)}
+                return {"response": response}
         else:
             response, response_conf = await generate_response(user_input)
             if response_conf is not None and response_conf < LLM_CONFIDENCE_THRESHOLD:
                 return {"response": random.choice(FALLBACK_ANSWERS)}
             return {"response": response}
     except Exception as e:
     print(f"[{timestamp}] {message}", flush=True)
 def setup_model():
+    global model, tokenizer, eos_token_id
     try:
         log("🧠 setup_model() başladı")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         log(f"📡 Kullanılan cihaz: {device}")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
+        log("📦 Tokenizer yüklendi. Ana model indiriliyor...")
         model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=torch.float32).to(device)
+        log("📦 Ana model indirildi ve yüklendi. eval() çağırılıyor...")
         tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
         model.config.pad_token_id = tokenizer.pad_token_id
         eos_token_id = tokenizer("<|im_end|>", add_special_tokens=False)["input_ids"][0]
         model.eval()
+        log("✅ Ana model eval() çağrıldı")
+        log(f"📦 Intent modeli indiriliyor: {INTENT_MODEL_ID}")
         _ = AutoTokenizer.from_pretrained(INTENT_MODEL_ID)
         _ = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_ID)
         log("✅ Intent modeli önbelleğe alındı.")
+        log("✔️ Model başarıyla yüklendi ve sohbet için hazır.")
     except Exception as e:
         log(f"❌ setup_model() hatası: {e}")
         traceback.print_exc()