Spaces:

ellawang9
/

bps-school-chatbot

Build error

App Files Files Community

ellawang9 commited on Apr 15

Commit

8e036bb

verified ·

1 Parent(s): f902b26

Update src/chat.py

Browse files

Files changed (1) hide show

src/chat.py +30 -7

src/chat.py CHANGED Viewed

@@ -15,9 +15,31 @@ class SchoolChatbot:
         model_id = MY_MODEL if MY_MODEL else BASE_MODEL
         self.client = InferenceClient(model=model_id, token=HF_TOKEN)
         self.df = pd.read_csv("bps_data.csv")
-        with open("keyword_to_column_map.json") as f:
             self.keyword_map = json.load(f)
     def format_prompt(self, user_input):
         return (
             "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
@@ -25,6 +47,12 @@ class SchoolChatbot:
             "<|assistant|>"
         )
     def extract_context_with_keywords(self, prompt, school_name=None):
         def extract_keywords(text):
             tokens = re.findall(r'\b\w+\b', text.lower())
@@ -50,12 +78,7 @@ class SchoolChatbot:
         return context_items
     def get_response(self, user_input):
-        matched_school = None
-        for name in self.df["BPS_School_Name"].dropna():
-            if name.lower() in user_input.lower():
-                matched_school = name
-                break
         structured_facts = self.extract_context_with_keywords(user_input, matched_school)
         if structured_facts:

         model_id = MY_MODEL if MY_MODEL else BASE_MODEL
         self.client = InferenceClient(model=model_id, token=HF_TOKEN)
         self.df = pd.read_csv("bps_data.csv")
+        with open("cleaned_keyword_to_column_map.json") as f:
             self.keyword_map = json.load(f)
+        # Create school name map with aliases
+        self.school_name_map = {}
+        for _, row in self.df.iterrows():
+            primary = row.get("BPS_School_Name")
+            hist = row.get("BPS_Historical_Name")
+            abbrev = row.get("SMMA_Abbreviated_Name")
+            if pd.notna(primary):
+                self.school_name_map[primary.lower()] = primary
+            if pd.notna(hist):
+                self.school_name_map[hist.lower()] = primary
+            if pd.notna(abbrev):
+                self.school_name_map[abbrev.lower()] = primary
+        # Add custom aliases
+        self.school_name_map.update({
+            "acc": "Another Course to College*",
+            "baldwin": "Baldwin Early Learning Pilot Academy",
+            "adams elementary": "Adams, Samuel Elementary",
+            "alighieri montessori": "Alighieri, Dante Montessori School",
+            "phineas bates": "Bates, Phineas Elementary"
+        })
     def format_prompt(self, user_input):
         return (
             "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
             "<|assistant|>"
         )
+    def match_school_name(self, query):
+        for key in self.school_name_map:
+            if key in query.lower():
+                return self.school_name_map[key]
+        return None
     def extract_context_with_keywords(self, prompt, school_name=None):
         def extract_keywords(text):
             tokens = re.findall(r'\b\w+\b', text.lower())
         return context_items
     def get_response(self, user_input):
+        matched_school = self.match_school_name(user_input)
         structured_facts = self.extract_context_with_keywords(user_input, matched_school)
         if structured_facts: