Spaces:

Phoenix21
/

ChatBotAgenticRAG

Sleeping

App Files Files Community

Phoenix21 commited on Jan 12, 2025

Commit

756269e

verified ·

1 Parent(s): 263ad5f

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +28 -24

pipeline.py CHANGED Viewed

@@ -10,7 +10,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
-from pydantic import BaseModel, ValidationError  # Import Pydantic for text validation
 from mistralai import Mistral
 from langchain.prompts import PromptTemplate
@@ -25,6 +25,9 @@ from prompts import classification_prompt, refusal_prompt, tailor_prompt
 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
 client = Mistral(api_key=mistral_api_key)
 # Load spaCy model for NER and download it if not already installed
 def install_spacy_model():
     try:
@@ -53,6 +56,17 @@ def extract_main_topic(query: str) -> str:
                 break
     return main_topic if main_topic else "this topic"
 # Function to classify query based on wellness topics
 def classify_query(query: str) -> str:
     wellness_keywords = ["box breathing", "meditation", "yoga", "mindfulness", "breathing exercises"]
@@ -63,45 +77,31 @@ def classify_query(query: str) -> str:
     classification = class_result.get("text", "").strip()
     return classification if classification != "OutOfScope" else "OutOfScope"
-# Pydantic model for text validation
-class TextInputModel(BaseModel):
-    text: str
-# Function to validate the text input using Pydantic
-def validate_text(query: str) -> str:
     try:
-        # Attempt to validate the query as a text input
-        TextInputModel(text=query)
-        return query
     except ValidationError as e:
         print(f"Error validating text: {e}")
         return "Invalid text format."
-# Function to moderate text using Mistral moderation API (synchronous version)
-def moderate_text(query: str) -> str:
-    # Validate the text using Pydantic
-    validated_text = validate_text(query)
-    if validated_text == "Invalid text format.":
-        return validated_text
     # Call the Mistral moderation API
     response = client.classifiers.moderate_chat(
         model="mistral-moderation-latest",
-        inputs=[{"role": "user", "content": validated_text}]
     )
-    # Assuming the response is an object of type 'ClassificationResponse',
-    # check if it has a 'results' attribute, and then access its categories
     if hasattr(response, 'results') and response.results:
         categories = response.results[0].categories
-        # Check if harmful categories are present
         if categories.get("violence_and_threats", False) or \
            categories.get("hate_and_discrimination", False) or \
            categories.get("dangerous_and_criminal_content", False) or \
            categories.get("selfharm", False):
             return "OutOfScope"
-    return validated_text
 # Function to build or load the vector store from CSV data
@@ -173,7 +173,7 @@ def merge_responses(kb_answer: str, web_answer: str) -> str:
 # Orchestrate the entire workflow
 def run_pipeline(query: str) -> str:
-    # Moderate the query for harmful content (sync)
     moderated_query = moderate_text(query)
     if moderated_query == "OutOfScope":
         return "Sorry, this query contains harmful or inappropriate content."
@@ -207,7 +207,7 @@ def run_pipeline(query: str) -> str:
     final_refusal = tailor_chain.run({"response": refusal_text})
     return final_refusal.strip()
-# Initialize chains here
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()
 tailor_chain = get_tailor_chain()
@@ -224,3 +224,7 @@ brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
 gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)

 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
+from pydantic import BaseModel, ValidationError, validator
 from mistralai import Mistral
 from langchain.prompts import PromptTemplate
 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
 client = Mistral(api_key=mistral_api_key)
+# Initialize Pydantic AI Agent (for text validation)
+pydantic_agent = Agent('mistral:mistral-large-latest', result_type=str)
 # Load spaCy model for NER and download it if not already installed
 def install_spacy_model():
     try:
                 break
     return main_topic if main_topic else "this topic"
+# Pydantic model to handle string input validation
+class QueryInput(BaseModel):
+    query: str
+    # Validator to ensure the query is always a string
+    @validator('query')
+    def check_query_is_string(cls, v):
+        if not isinstance(v, str):
+            raise ValueError("Query must be a valid string.")
+        return v
 # Function to classify query based on wellness topics
 def classify_query(query: str) -> str:
     wellness_keywords = ["box breathing", "meditation", "yoga", "mindfulness", "breathing exercises"]
     classification = class_result.get("text", "").strip()
     return classification if classification != "OutOfScope" else "OutOfScope"
+# Function to moderate text using Mistral moderation API (sync version)
+def moderate_text(query: str) -> str:
     try:
+        # Use Pydantic to validate text input
+        query_input = QueryInput(query=query)  # This will validate that the query is a string
     except ValidationError as e:
         print(f"Error validating text: {e}")
         return "Invalid text format."
     # Call the Mistral moderation API
     response = client.classifiers.moderate_chat(
         model="mistral-moderation-latest",
+        inputs=[{"role": "user", "content": query}]
     )
+    # Check if harmful categories are present in the response
     if hasattr(response, 'results') and response.results:
         categories = response.results[0].categories
         if categories.get("violence_and_threats", False) or \
            categories.get("hate_and_discrimination", False) or \
            categories.get("dangerous_and_criminal_content", False) or \
            categories.get("selfharm", False):
             return "OutOfScope"
+    return query
 # Function to build or load the vector store from CSV data
 # Orchestrate the entire workflow
 def run_pipeline(query: str) -> str:
+    # Moderate the query for harmful content
     moderated_query = moderate_text(query)
     if moderated_query == "OutOfScope":
         return "Sorry, this query contains harmful or inappropriate content."
     final_refusal = tailor_chain.run({"response": refusal_text})
     return final_refusal.strip()
+# Initialize chains
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()
 tailor_chain = get_tailor_chain()
 gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
+# Function to wrap up and run the chain
+def run_with_chain(query: str) -> str:
+    return run_pipeline(query)