Spaces:

Teapack1
/

Assistant-Audio-Intent-Classification

Sleeping

App Files Files Community

Teapack1 commited on Jan 2, 2024

Commit

9fe1f6c

1 Parent(s): 7463397

server run utility

Browse files

Files changed (3) hide show

app.py +15 -128
requirements.txt +3 -2
server.py +131 -0

app.py CHANGED Viewed

@@ -1,133 +1,20 @@
-from fastapi import FastAPI, WebSocket, Request, WebSocketDisconnect
-from fastapi.staticfiles import StaticFiles
-from fastapi.responses import HTMLResponse
-from fastapi.templating import Jinja2Templates
 import os
-import gradio
-import numpy as np
-from transformers import pipeline
-import torch
-from transformers.pipelines.audio_utils import ffmpeg_microphone_live
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-classifier = pipeline(
-    "audio-classification", model="MIT/ast-finetuned-speech-commands-v2", device=device
-)
-intent_class_pipe = pipeline(
-    "audio-classification", model="anton-l/xtreme_s_xlsr_minds14", device=device
-)
-async def launch_fn(
-    wake_word="marvin",
-    prob_threshold=0.5,
-    chunk_length_s=2.0,
-    stream_chunk_s=0.25,
-    debug=False,
-):
-    if wake_word not in classifier.model.config.label2id.keys():
-        raise ValueError(
-            f"Wake word {wake_word} not in set of valid class labels, pick a wake word in the set {classifier.model.config.label2id.keys()}."
-        )
-    sampling_rate = classifier.feature_extractor.sampling_rate
-    mic = ffmpeg_microphone_live(
-        sampling_rate=sampling_rate,
-        chunk_length_s=chunk_length_s,
-        stream_chunk_s=stream_chunk_s,
-    )
-    print("Listening for wake word...")
-    for prediction in classifier(mic):
-        prediction = prediction[0]
-        if debug:
-            print(prediction)
-        if prediction["label"] == wake_word:
-            if prediction["score"] > prob_threshold:
-                return True
-async def listen(websocket, chunk_length_s=2.0, stream_chunk_s=2.0):
-    sampling_rate = intent_class_pipe.feature_extractor.sampling_rate
-    mic = ffmpeg_microphone_live(
-        sampling_rate=sampling_rate,
-        chunk_length_s=chunk_length_s,
-        stream_chunk_s=stream_chunk_s,
-    )
-    audio_buffer = []
-    print("Listening")
-    for i in range(4):
-        audio_chunk = next(mic)
-        audio_buffer.append(audio_chunk["raw"])
-        prediction = intent_class_pipe(audio_chunk["raw"])
-        await websocket.send_text(f"chunk: {prediction[0]['label']} | {i+1} / 4")
-        if await is_silence(audio_chunk["raw"], threshold=0.7):
-            print("Silence detected, processing audio.")
-            break
-    combined_audio = np.concatenate(audio_buffer)
-    prediction = intent_class_pipe(combined_audio)
-    top_3_predictions = prediction[:3]
-    formatted_predictions = "\n".join([f"{pred['label']}: {pred['score'] * 100:.2f}%" for pred in top_3_predictions])
-    await websocket.send_text(f"classes: \n{formatted_predictions}")
-    return
-async def is_silence(audio_chunk, threshold):
-    silence = intent_class_pipe(audio_chunk)
-    if silence[0]["label"] == "silence" and silence[0]["score"] > threshold:
-        return True
     else:
-        return False
-# Initialize FastAPI app
-app = FastAPI()
-# Set up static file directory
-app.mount("/static", StaticFiles(directory="static"), name="static")
-# Jinja2 Template for HTML rendering
-templates = Jinja2Templates(directory="templates")
-@app.get("/", response_class=HTMLResponse)
-async def get_home(request: Request):
-    return templates.TemplateResponse("index.html", {"request": request})
-@app.websocket("/ws")
-async def websocket_endpoint(websocket: WebSocket):
-    await websocket.accept()
-    try:
-        process_active = False  # Flag to track the state of the process
-        while True:
-            message = await websocket.receive_text()
-            if message == "start" and not process_active:
-                process_active = True
-                await websocket.send_text("Listening for wake word...")
-                wake_word_detected = await launch_fn(debug=True)
-                if wake_word_detected:
-                    await websocket.send_text("Wake word detected. Listening for your query...")
-                    await listen(websocket)
-                    process_active = False  # Reset the process flag
-            elif message == "stop":
-                if process_active:
-                    # Implement logic to stop the ongoing process
-                    # This might involve setting a flag that your launch_fn and listen functions check
-                    process_active = False
-                    await websocket.send_text("Process stopped. Ready to restart.")
-                    break  # Or keep the loop running if you want to allow restarting without reconnecting
-    except WebSocketDisconnect:
-        print("Client disconnected.")

+import json
 import os
+import requests
+import socket
+def start_server():
+    os.system("uvicorn server:app --port 8080 --host 0.0.0.0 --workers 2")
+def is_port_in_use(port):
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex(('0.0.0.0', port)) == 0
+def main():
+    if is_port_in_use(8080):
+        print("Port 8080 is already in use. Please kill the process and try again.")
     else:
+        start_server()
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -3,5 +3,6 @@ transformers
 torchaudio
 numpy
 fastapi
-uvicorn[standard]
-gradio

 torchaudio
 numpy
 fastapi
+uvicorn
+gradio
+requests

server.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from fastapi import FastAPI, WebSocket, Request, WebSocketDisconnect
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+import numpy as np
+from transformers import pipeline
+import torch
+from transformers.pipelines.audio_utils import ffmpeg_microphone_live
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+classifier = pipeline(
+    "audio-classification", model="MIT/ast-finetuned-speech-commands-v2", device=device
+)
+intent_class_pipe = pipeline(
+    "audio-classification", model="anton-l/xtreme_s_xlsr_minds14", device=device
+)
+async def launch_fn(
+    wake_word="marvin",
+    prob_threshold=0.5,
+    chunk_length_s=2.0,
+    stream_chunk_s=0.25,
+    debug=False,
+):
+    if wake_word not in classifier.model.config.label2id.keys():
+        raise ValueError(
+            f"Wake word {wake_word} not in set of valid class labels, pick a wake word in the set {classifier.model.config.label2id.keys()}."
+        )
+    sampling_rate = classifier.feature_extractor.sampling_rate
+    mic = ffmpeg_microphone_live(
+        sampling_rate=sampling_rate,
+        chunk_length_s=chunk_length_s,
+        stream_chunk_s=stream_chunk_s,
+    )
+    print("Listening for wake word...")
+    for prediction in classifier(mic):
+        prediction = prediction[0]
+        if debug:
+            print(prediction)
+        if prediction["label"] == wake_word:
+            if prediction["score"] > prob_threshold:
+                return True
+async def listen(websocket, chunk_length_s=2.0, stream_chunk_s=2.0):
+    sampling_rate = intent_class_pipe.feature_extractor.sampling_rate
+    mic = ffmpeg_microphone_live(
+        sampling_rate=sampling_rate,
+        chunk_length_s=chunk_length_s,
+        stream_chunk_s=stream_chunk_s,
+    )
+    audio_buffer = []
+    print("Listening")
+    for i in range(4):
+        audio_chunk = next(mic)
+        audio_buffer.append(audio_chunk["raw"])
+        prediction = intent_class_pipe(audio_chunk["raw"])
+        await websocket.send_text(f"chunk: {prediction[0]['label']} | {i+1} / 4")
+        if await is_silence(audio_chunk["raw"], threshold=0.7):
+            print("Silence detected, processing audio.")
+            break
+    combined_audio = np.concatenate(audio_buffer)
+    prediction = intent_class_pipe(combined_audio)
+    top_3_predictions = prediction[:3]
+    formatted_predictions = "\n".join([f"{pred['label']}: {pred['score'] * 100:.2f}%" for pred in top_3_predictions])
+    await websocket.send_text(f"classes: \n{formatted_predictions}")
+    return
+async def is_silence(audio_chunk, threshold):
+    silence = intent_class_pipe(audio_chunk)
+    if silence[0]["label"] == "silence" and silence[0]["score"] > threshold:
+        return True
+    else:
+        return False
+# Initialize FastAPI app
+app = FastAPI()
+# Set up static file directory
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Jinja2 Template for HTML rendering
+templates = Jinja2Templates(directory="templates")
+@app.get("/", response_class=HTMLResponse)
+async def get_home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    try:
+        process_active = False  # Flag to track the state of the process
+        while True:
+            message = await websocket.receive_text()
+            if message == "start" and not process_active:
+                process_active = True
+                await websocket.send_text("Listening for wake word...")
+                wake_word_detected = await launch_fn(debug=True)
+                if wake_word_detected:
+                    await websocket.send_text("Wake word detected. Listening for your query...")
+                    await listen(websocket)
+                    process_active = False  # Reset the process flag
+            elif message == "stop":
+                if process_active:
+                    # Implement logic to stop the ongoing process
+                    # This might involve setting a flag that your launch_fn and listen functions check
+                    process_active = False
+                    await websocket.send_text("Process stopped. Ready to restart.")
+                    break  # Or keep the loop running if you want to allow restarting without reconnecting
+    except WebSocketDisconnect:
+        print("Client disconnected.")