Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Paused

VanguardAI commited on Aug 15, 2024

Commit

8318c4a

verified ·

1 Parent(s): 5d11559

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -137,17 +137,9 @@ class DuckDuckGoSearchRun(BaseTool):
         answer = data["Abstract"]
         return answer
 # Function to handle different input types and choose the right tool
 def handle_input(user_prompt, image=None, audio=None, websearch=False, document=None):
-    if audio:
-        if isinstance(audio, str):
-            audio = open(audio, "rb")
-        transcription = client.audio.transcriptions.create(
-            file=(audio.name, audio.read()),
-            model="whisper-large-v3"
-        )
-        user_prompt = transcription.text
     # Initialize the search tool
     search = DuckDuckGoSearchRun()
@@ -189,20 +181,31 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False, document=
     llm = ChatGroq(model=MODEL, api_key=os.environ.get("GROQ_API_KEY"))
-    # Initialize the agent
-    agent = initialize_agent(
-        tools,
-        llm,
-        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-        verbose=True
-    )
-    if image:
-        image = Image.open(image).convert('RGB')
-        messages = [{"role": "user", "content": [image, user_prompt]}]
-        response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
     else:
-        response = agent.run(user_prompt)
     return response

         answer = data["Abstract"]
         return answer
+# Function to handle different input types and choose the right tool
 # Function to handle different input types and choose the right tool
 def handle_input(user_prompt, image=None, audio=None, websearch=False, document=None):
     # Initialize the search tool
     search = DuckDuckGoSearchRun()
     llm = ChatGroq(model=MODEL, api_key=os.environ.get("GROQ_API_KEY"))
+    # Check if the input requires any tools
+    requires_tool = False
+    for tool in tools:
+        if tool.name.lower() in user_prompt.lower():
+            requires_tool = True
+            break
+    if image or audio or requires_tool:
+        # Initialize the agent
+        agent = initialize_agent(
+            tools,
+            llm,
+            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            verbose=True
+        )
+        if image:
+            image = Image.open(image).convert('RGB')
+            messages = [{"role": "user", "content": [image, user_prompt]}]
+            response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
+        else:
+            response = agent.run(user_prompt)
     else:
+        # If no tools are required, use the LLM directly
+        response = llm.call(query=user_prompt)
     return response