Final_Assignment_Template

Sleeping

App Files Files Community

lukmanaj commited on Apr 26

Commit

46c1cbd

verified ·

1 Parent(s): c2f387f

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -16

app.py CHANGED Viewed

@@ -7,8 +7,6 @@ import pandas as pd
 # from google.genai import types
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from smolagents.agents import ReActAgent
-from smolagents.tools import tool
 # (Keep Constants as is)
 # --- Constants ---
@@ -69,39 +67,72 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 #             print(f"Error during Gemini API call: {str(e)}")
 #             return f"Error: {str(e)}"
-class BasicAgent(ReActAgent):
     def __init__(self):
         print("BasicAgent using local LLM initialized.")
-        # Load a small model from Hugging Face
-        model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can pick another lightweight model
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name,
             torch_dtype=torch.float16,
-            device_map="auto"  # Automatically choose GPU/CPU
         )
-        super().__init__(tools=[])  # No tools for now
-    def call(self, task: str) -> str:
-        """Core method for answering a task."""
-        prompt = f"Answer the following question concisely:\n\n{task}\n\nAnswer:"
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=200,
                 do_sample=True,
                 temperature=0.7,
-                top_p=0.95,
                 top_k=50,
             )
-        answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract only the answer part
-        return answer.split("Answer:")[-1].strip()
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 # from google.genai import types
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # (Keep Constants as is)
 # --- Constants ---
 #             print(f"Error during Gemini API call: {str(e)}")
 #             return f"Error: {str(e)}"
+# class BasicAgent(ReActAgent):
+#     def __init__(self):
+#         print("BasicAgent using local LLM initialized.")
+#         # Load a small model from Hugging Face
+#         model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can pick another lightweight model
+#         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+#         self.model = AutoModelForCausalLM.from_pretrained(
+#             model_name,
+#             torch_dtype=torch.float16,
+#             device_map="auto"  # Automatically choose GPU/CPU
+#         )
+#         super().__init__(tools=[])  # No tools for now
+#     def call(self, task: str) -> str:
+#         """Core method for answering a task."""
+#         prompt = f"Answer the following question concisely:\n\n{task}\n\nAnswer:"
+#         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+#         with torch.no_grad():
+#             outputs = self.model.generate(
+#                 **inputs,
+#                 max_new_tokens=200,
+#                 do_sample=True,
+#                 temperature=0.7,
+#                 top_p=0.95,
+#                 top_k=50,
+#             )
+#         answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         # Extract only the answer part
+#         return answer.split("Answer:")[-1].strip()
+class BasicAgent:
     def __init__(self):
         print("BasicAgent using local LLM initialized.")
+        # Load a small Hugging Face model
+        model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Change if you want
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name,
             torch_dtype=torch.float16,
+            device_map="auto"  # Use GPU if available
         )
+    def __call__(self, task: str) -> str:
+        """Answer a question."""
+        prompt = f"Answer the following question clearly and concisely:\n\n{task}\n\nAnswer:"
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=256,
                 do_sample=True,
                 temperature=0.7,
+                top_p=0.9,
                 top_k=50,
             )
+        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the answer part
+        if "Answer:" in decoded:
+            return decoded.split("Answer:")[-1].strip()
+        return decoded.strip()
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """