DhirajN
/

demo

Audio-Text-to-Text

English

Model card Files Files and versions

xet

Community

DhirajN commited on Feb 7, 2025

Commit

1c04206

verified ·

1 Parent(s): 36b9cfd

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# -*- coding: utf-8 -*-
+"""OpenAI Whisper from Hugging Face Transformers with Microsoft PHI 3 Integration"""
+import gradio as gr
+from transformers import pipeline
+import torch
+from huggingface_hub import InferenceClient
+import os
+# Initialize the InferenceClient for PHI 3
+client = InferenceClient(
+    "microsoft/phi-3",  # Update this to the correct model name for PHI 3
+    token=os.getenv("HF_API_TOKEN", "")  # You can configure this API token through the Hugging Face Secrets
+)
+# Check if a GPU is available and use it if possible
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Initialize the Whisper pipeline
+whisper = pipeline('automatic-speech-recognition', model='openai/whisper-tiny', device=0 if device == 'cuda' else -1)
+# Instructions (can be set through Hugging Face Secrets or hardcoded)
+instructions = os.getenv("INST", "Your default instructions here.")
+def query_phi(prompt):
+    response = ""  # Initialize an empty string to store the response
+    for message in client.chat_completion(
+      messages=[{"role": "user", "content": f"{instructions}\n{prompt}"}],
+      max_tokens=500,
+      stream=True,
+    ):
+        response += message.choices[0].delta.content  # Append each message to the response
+    return response  # Return the accumulated response after the loop
+def transcribe_and_query(audio):
+    # Transcribe the audio file
+    transcription = whisper(audio)["text"]
+    transcription = "Prompt : " + transcription
+    # Query Microsoft PHI 3 with the transcribed text
+    phi_response = query_phi(transcription)
+    return transcription, phi_response
+# Create Gradio interface
+iface = gr.Interface(
+    fn=transcribe_and_query,
+    inputs=gr.Audio(type="filepath"),
+    outputs=["text", "text"],
+    title="Scam Call detector with BEEP",
+    description="Upload your recorded call to see if it is a scam or not. /n Stay Safe, Stay Secure."
+)
+# Launch the interface
+iface.launch(share=True)  # share=True is optional, it provides a public link