Spaces:
Sleeping
Sleeping
Commit
·
855dfb9
1
Parent(s):
87ae702
Using whisper without saving file
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ def hide_settings():
|
|
| 13 |
audio_input = gr.Audio(
|
| 14 |
label="Record audio",
|
| 15 |
sources=["microphone"],
|
| 16 |
-
type="
|
| 17 |
waveform_options={"show_controls": False},
|
| 18 |
interactive=True,
|
| 19 |
editable=False,
|
|
@@ -35,7 +35,7 @@ def hide_solution():
|
|
| 35 |
audio_input = gr.Audio(
|
| 36 |
label="Record audio",
|
| 37 |
sources=["microphone"],
|
| 38 |
-
type="
|
| 39 |
waveform_options={"show_controls": False},
|
| 40 |
interactive=False,
|
| 41 |
editable=False,
|
|
@@ -93,7 +93,7 @@ with gr.Blocks() as demo:
|
|
| 93 |
audio_input = gr.Audio(
|
| 94 |
label="Record audio",
|
| 95 |
sources=["microphone"],
|
| 96 |
-
type="
|
| 97 |
waveform_options={"show_controls": False},
|
| 98 |
interactive=False,
|
| 99 |
editable=False,
|
|
@@ -124,11 +124,12 @@ with gr.Blocks() as demo:
|
|
| 124 |
|
| 125 |
audio_input.stop_recording(fn=transcribe_audio, inputs=[audio_input], outputs=[message]).then(
|
| 126 |
fn=return_none, inputs=None, outputs=[audio_input]
|
| 127 |
-
).then(
|
| 128 |
-
fn=send_request,
|
| 129 |
-
inputs=[code, previous_code, message, chat_history, chat, model_select],
|
| 130 |
-
outputs=[chat_history, chat, message, previous_code],
|
| 131 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
chat.change(fn=read_last_message, inputs=[chat], outputs=[audio_output])
|
| 134 |
|
|
|
|
| 13 |
audio_input = gr.Audio(
|
| 14 |
label="Record audio",
|
| 15 |
sources=["microphone"],
|
| 16 |
+
type="numpy",
|
| 17 |
waveform_options={"show_controls": False},
|
| 18 |
interactive=True,
|
| 19 |
editable=False,
|
|
|
|
| 35 |
audio_input = gr.Audio(
|
| 36 |
label="Record audio",
|
| 37 |
sources=["microphone"],
|
| 38 |
+
type="numpy",
|
| 39 |
waveform_options={"show_controls": False},
|
| 40 |
interactive=False,
|
| 41 |
editable=False,
|
|
|
|
| 93 |
audio_input = gr.Audio(
|
| 94 |
label="Record audio",
|
| 95 |
sources=["microphone"],
|
| 96 |
+
type="numpy",
|
| 97 |
waveform_options={"show_controls": False},
|
| 98 |
interactive=False,
|
| 99 |
editable=False,
|
|
|
|
| 124 |
|
| 125 |
audio_input.stop_recording(fn=transcribe_audio, inputs=[audio_input], outputs=[message]).then(
|
| 126 |
fn=return_none, inputs=None, outputs=[audio_input]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
)
|
| 128 |
+
# .then(
|
| 129 |
+
# fn=send_request,
|
| 130 |
+
# inputs=[code, previous_code, message, chat_history, chat, model_select],
|
| 131 |
+
# outputs=[chat_history, chat, message, previous_code],
|
| 132 |
+
# )
|
| 133 |
|
| 134 |
chat.change(fn=read_last_message, inputs=[chat], outputs=[audio_output])
|
| 135 |
|
audio.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import wave
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def numpy_audio_to_bytes(audio_data):
|
| 6 |
+
sample_rate = 44100
|
| 7 |
+
num_channels = 1
|
| 8 |
+
sampwidth = 2
|
| 9 |
+
|
| 10 |
+
buffer = io.BytesIO()
|
| 11 |
+
with wave.open(buffer, "wb") as wf:
|
| 12 |
+
wf.setnchannels(num_channels)
|
| 13 |
+
wf.setsampwidth(sampwidth)
|
| 14 |
+
wf.setframerate(sample_rate)
|
| 15 |
+
wf.writeframes(audio_data.tobytes())
|
| 16 |
+
|
| 17 |
+
return buffer.getvalue()
|
llm.py
CHANGED
|
@@ -3,6 +3,7 @@ import json
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from openai import OpenAI
|
| 5 |
|
|
|
|
| 6 |
from prompts import coding_interviewer_prompt, grading_feedback_prompt
|
| 7 |
|
| 8 |
load_dotenv()
|
|
@@ -73,9 +74,10 @@ def send_request(code, previous_code, message, chat_history, chat_display, model
|
|
| 73 |
return chat_history, chat_display, "", code
|
| 74 |
|
| 75 |
|
| 76 |
-
def transcribe_audio(
|
| 77 |
-
|
| 78 |
-
|
|
|
|
| 79 |
|
| 80 |
return transcription
|
| 81 |
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from openai import OpenAI
|
| 5 |
|
| 6 |
+
from audio import numpy_audio_to_bytes
|
| 7 |
from prompts import coding_interviewer_prompt, grading_feedback_prompt
|
| 8 |
|
| 9 |
load_dotenv()
|
|
|
|
| 74 |
return chat_history, chat_display, "", code
|
| 75 |
|
| 76 |
|
| 77 |
+
def transcribe_audio(audio, client=client):
|
| 78 |
+
transcription = client.audio.transcriptions.create(
|
| 79 |
+
model="whisper-1", file=("temp.wav", numpy_audio_to_bytes(audio[1]), "audio/wav"), response_format="text"
|
| 80 |
+
)
|
| 81 |
|
| 82 |
return transcription
|
| 83 |
|