Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Paused

VanguardAI commited on Aug 17, 2024

Commit

5e6cec6

verified ·

1 Parent(s): 6b07c5f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -127,7 +127,7 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False, document=
             model="whisper-large-v3"
         )
         user_prompt = transcription.text
-        response = llm.call(query=user_prompt)
         audio_output = play_voice_output(response)
         return "Response generated.", audio_output
@@ -159,7 +159,7 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False, document=
         image.save("output.jpg")
         return "output.jpg", None
-    elif function == "image_description":
         print("Executing Image Description")
         if image:
             image = Image.open(image).convert('RGB')
@@ -169,7 +169,7 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False, document=
         else:
             return "Please upload an image.", None
-    elif function == "document_summarization":
         print("Executing Document Summarization")
         if document:
             document_qa = DocumentQuestionAnswering(document)
@@ -180,7 +180,7 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False, document=
     else:  # function == "text_to_text"
         print("Executing Text-to-Text")
-        response = llm.call(query=user_prompt)
         return response, None
 # Main interface function

             model="whisper-large-v3"
         )
         user_prompt = transcription.text
+        response = llm.invoke(query=user_prompt)
         audio_output = play_voice_output(response)
         return "Response generated.", audio_output
         image.save("output.jpg")
         return "output.jpg", None
+    elif function == "image_vqa":
         print("Executing Image Description")
         if image:
             image = Image.open(image).convert('RGB')
         else:
             return "Please upload an image.", None
+    elif function == "document_qa":
         print("Executing Document Summarization")
         if document:
             document_qa = DocumentQuestionAnswering(document)
     else:  # function == "text_to_text"
         print("Executing Text-to-Text")
+        response = llm.invoke(query=user_prompt)
         return response, None
 # Main interface function