PhishingTest

Paused

App Files Files Community

dungeon29 commited on 18 days ago

Commit

47c8ad8

verified ·

1 Parent(s): 3797dd4

Update llm_client.py

Browse files

Files changed (1) hide show

llm_client.py +85 -85

llm_client.py CHANGED Viewed

@@ -1,85 +1,85 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from langchain_huggingface import HuggingFacePipeline
-from langchain.chains import RetrievalQA
-from langchain_core.prompts import PromptTemplate
-import torch
-class LLMClient:
-    def __init__(self, vector_store=None):
-        """
-        Initialize Qwen2.5-3B-Instruct with LangChain
-        """
-        print("🔷 Loading Qwen2.5-3B-Instruct (LangChain)...")
-        model_name = "Qwen/Qwen2.5-1.5B-Instruct"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.bfloat16,
-            device_map="cpu",
-            low_cpu_mem_usage=True,
-            trust_remote_code=True
-        )
-        # Create HF Pipeline
-        pipe = pipeline(
-            "text-generation",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            max_new_tokens=256,  # Reduced to save memory
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            do_sample=True
-        )
-        self.llm = HuggingFacePipeline(pipeline=pipe)
-        self.vector_store = vector_store
-        print("✅ LLM Client Ready!")
-    def analyze(self, text, context_chunks=None):
-        """
-        Analyze text using LangChain RetrievalQA
-        """
-        if not self.vector_store:
-            return "❌ Vector Store not initialized."
-        # Custom Prompt Template
-        template = """You are a cybersecurity expert specializing in phishing detection.
-Use the following pieces of context to analyze the input.
-If the input is in Vietnamese, respond in Vietnamese.
-Context:
-{context}
-Input to Analyze:
-{question}
-Analysis:"""
-        PROMPT = PromptTemplate(
-            template=template,
-            input_variables=["context", "question"]
-        )
-        # Create QA Chain
-        qa_chain = RetrievalQA.from_chain_type(
-            llm=self.llm,
-            chain_type="stuff",
-            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
-            chain_type_kwargs={"prompt": PROMPT}
-        )
-        try:
-            print("🤖 Generating response (LangChain)...")
-            response = qa_chain.invoke(text)
-            # Explicit Garbage Collection
-            import gc
-            gc.collect()
-            return response['result']
-        except Exception as e:
-            return f"❌ Error: {str(e)}"

+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from langchain_huggingface import HuggingFacePipeline
+from langchain.chains import RetrievalQA
+from langchain_core.prompts import PromptTemplate
+import torch
+class LLMClient:
+    def __init__(self, vector_store=None):
+        """
+        Initialize Qwen2.5-3B-Instruct with LangChain
+        """
+        print("🔷 Loading Qwen2.5-3B-Instruct (LangChain)...")
+        model_name = "Qwen/Qwen2.5-1.5B-Instruct"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            device_map="cpu",
+            low_cpu_mem_usage=True,
+            trust_remote_code=True
+        )
+        # Create HF Pipeline
+        pipe = pipeline(
+            "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            max_new_tokens=512,
+            temperature=0.5,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True
+        )
+        self.llm = HuggingFacePipeline(pipeline=pipe)
+        self.vector_store = vector_store
+        print("✅ LLM Client Ready!")
+    def analyze(self, text, context_chunks=None):
+        """
+        Analyze text using LangChain RetrievalQA
+        """
+        if not self.vector_store:
+            return "❌ Vector Store not initialized."
+        # Custom Prompt Template
+        template = """You are a cybersecurity expert specializing in phishing detection.
+Use the following pieces of context to analyze the input.
+If the input is in Vietnamese, respond in Vietnamese.
+Context:
+{context}
+Input to Analyze:
+{question}
+Analysis:"""
+        PROMPT = PromptTemplate(
+            template=template,
+            input_variables=["context", "question"]
+        )
+        # Create QA Chain
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=self.llm,
+            chain_type="stuff",
+            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
+            chain_type_kwargs={"prompt": PROMPT}
+        )
+        try:
+            print("🤖 Generating response (LangChain)...")
+            response = qa_chain.invoke(text)
+            # Explicit Garbage Collection
+            import gc
+            gc.collect()
+            return response['result']
+        except Exception as e:
+            return f"❌ Error: {str(e)}"