Spaces:

ellawang9
/

bps-school-chatbot

Build error

App Files Files Community

ellawang9 commited on Apr 16

Commit

2b3dacc

verified ·

1 Parent(s): fee4cb4

Update src/chat.py

Browse files

Files changed (1) hide show

src/chat.py +39 -7

src/chat.py CHANGED Viewed

@@ -7,18 +7,16 @@ import re
 from difflib import get_close_matches
 class SchoolChatbot:
-    """
-    A chatbot that integrates structured school data and language generation to assist with Boston Public School queries.
-    """
     def __init__(self):
         model_id = MY_MODEL if MY_MODEL else BASE_MODEL
         self.client = InferenceClient(model=model_id, token=HF_TOKEN)
         self.df = pd.read_csv("bps_data.csv")
-        with open("keyword_to_column_map.json") as f:
             self.keyword_map = json.load(f)
-        # Create school name map with aliases
         self.school_name_map = {}
         for _, row in self.df.iterrows():
             primary = row.get("BPS_School_Name")
@@ -31,13 +29,12 @@ class SchoolChatbot:
             if pd.notna(abbrev):
                 self.school_name_map[abbrev.lower()] = primary
-        # Add custom aliases
         self.school_name_map.update({
             "acc": "Another Course to College*",
             "baldwin": "Baldwin Early Learning Pilot Academy",
             "adams elementary": "Adams, Samuel Elementary",
             "alighieri montessori": "Alighieri, Dante Montessori School",
-            "phineas bates": "Bates, Phineas Elementary"
         })
     def format_prompt(self, user_input):
@@ -77,7 +74,42 @@ class SchoolChatbot:
                 context_items.append(f"The school's {kw} is {val.lower()}.")
         return context_items
     def get_response(self, user_input):
         matched_school = self.match_school_name(user_input)
         structured_facts = self.extract_context_with_keywords(user_input, matched_school)

 from difflib import get_close_matches
 class SchoolChatbot:
+    """Boston School Chatbot integrating structured data, vector context, and model completion."""
     def __init__(self):
         model_id = MY_MODEL if MY_MODEL else BASE_MODEL
         self.client = InferenceClient(model=model_id, token=HF_TOKEN)
         self.df = pd.read_csv("bps_data.csv")
+        with open("cleaned_keyword_to_column_map.json") as f:
             self.keyword_map = json.load(f)
+        # Build name variants for school matching
         self.school_name_map = {}
         for _, row in self.df.iterrows():
             primary = row.get("BPS_School_Name")
             if pd.notna(abbrev):
                 self.school_name_map[abbrev.lower()] = primary
         self.school_name_map.update({
             "acc": "Another Course to College*",
             "baldwin": "Baldwin Early Learning Pilot Academy",
             "adams elementary": "Adams, Samuel Elementary",
             "alighieri montessori": "Alighieri, Dante Montessori School",
+            "phineas bates": "Bates, Phineas Elementary",
         })
     def format_prompt(self, user_input):
                 context_items.append(f"The school's {kw} is {val.lower()}.")
         return context_items
+    def query_schools_by_feature(self, query):
+        tokens = re.findall(r'\b\w+\b', query.lower())
+        matched_keywords = set()
+        for token in tokens:
+            matched_keywords.update(get_close_matches(token, self.keyword_map.keys(), cutoff=0.85))
+        positive_terms = "yes|accessible|adequate|good|excellent|present"
+        negative_terms = "no|not accessible|inadequate|poor|bad|limited"
+        matching_schools = set()
+        inverse = any(t in query.lower() for t in ["not", "inaccessible", "bad", "poor", "lacking"])
+        for keyword in matched_keywords:
+            col = self.keyword_map.get(keyword)
+            if col and col in self.df.columns:
+                if inverse:
+                    subset = self.df[~self.df[col].astype(str).str.lower().str.contains(positive_terms, na=False)]
+                else:
+                    subset = self.df[self.df[col].astype(str).str.lower().str.contains(positive_terms, na=False)]
+                schools = subset["BPS_School_Name"].dropna().unique().tolist()
+                matching_schools.update(schools)
+        if not matching_schools:
+            return None
+        return (
+            "The following schools match your criteria:\n" +
+            "\n".join(f"- {s}" for s in sorted(matching_schools))
+        )
     def get_response(self, user_input):
+        # School-wide filter query
+        school_filter = self.query_schools_by_feature(user_input)
+        if school_filter:
+            return school_filter
+        # Per-school context query
         matched_school = self.match_school_name(user_input)
         structured_facts = self.extract_context_with_keywords(user_input, matched_school)