Spaces:
Runtime error
Runtime error
aar2dee2
commited on
Commit
·
5a1ed1a
1
Parent(s):
bf4c978
custom send_audio function
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from vocode import getenv
|
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
import logging
|
| 10 |
-
|
| 11 |
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
|
| 12 |
from vocode.turn_based.synthesizer import CoquiSynthesizer
|
| 13 |
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
|
|
@@ -56,6 +56,9 @@ Answer the question accurately in less than 150 words. Remember you are Darth Va
|
|
| 56 |
|
| 57 |
# # 1. Setup Vocode
|
| 58 |
# import env vars
|
|
|
|
|
|
|
|
|
|
| 59 |
vocode.setenv(
|
| 60 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"),
|
| 61 |
COQUI_API_KEY=os.getenv("COQUI_API_KEY"),
|
|
@@ -67,6 +70,8 @@ logging.basicConfig()
|
|
| 67 |
logger = logging.getLogger(__name__)
|
| 68 |
logger.setLevel(logging.DEBUG)
|
| 69 |
|
|
|
|
|
|
|
| 70 |
|
| 71 |
def convert_to_audio_segment(input_audio):
|
| 72 |
sample_rate, audio_data = input_audio
|
|
@@ -80,6 +85,20 @@ def convert_to_audio_segment(input_audio):
|
|
| 80 |
return audio_segment
|
| 81 |
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
def main(input_audio):
|
| 84 |
logger.info(f"Type of input_audio: {type(input_audio)}")
|
| 85 |
logger.info(f"input_audio: {input_audio}")
|
|
@@ -98,8 +117,6 @@ def main(input_audio):
|
|
| 98 |
api_key=getenv("COQUI_API_KEY"),
|
| 99 |
)
|
| 100 |
|
| 101 |
-
speaker_output = SpeakerOutput.from_default_device()
|
| 102 |
-
|
| 103 |
print("Starting conversation. Press Ctrl+C to exit.")
|
| 104 |
while True:
|
| 105 |
try:
|
|
@@ -113,7 +130,7 @@ def main(input_audio):
|
|
| 113 |
response = agent.respond(transcript)
|
| 114 |
logger.info(f"Agent response: {response}")
|
| 115 |
output_audio = synthesizer.synthesize(response)
|
| 116 |
-
return
|
| 117 |
|
| 118 |
except Exception as e:
|
| 119 |
logger.error("Failed to synthesize response: %s", e)
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
import logging
|
| 10 |
+
import sounddevice as sd
|
| 11 |
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
|
| 12 |
from vocode.turn_based.synthesizer import CoquiSynthesizer
|
| 13 |
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
|
|
|
|
| 56 |
|
| 57 |
# # 1. Setup Vocode
|
| 58 |
# import env vars
|
| 59 |
+
if not os.getenv("OPENAI_API_KEY") or not os.getenv("COQUI_API_KEY"):
|
| 60 |
+
raise EnvironmentError("Required environment variables not set")
|
| 61 |
+
|
| 62 |
vocode.setenv(
|
| 63 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"),
|
| 64 |
COQUI_API_KEY=os.getenv("COQUI_API_KEY"),
|
|
|
|
| 70 |
logger = logging.getLogger(__name__)
|
| 71 |
logger.setLevel(logging.DEBUG)
|
| 72 |
|
| 73 |
+
DEFAULT_SAMPLING_RATE = 44100
|
| 74 |
+
|
| 75 |
|
| 76 |
def convert_to_audio_segment(input_audio):
|
| 77 |
sample_rate, audio_data = input_audio
|
|
|
|
| 85 |
return audio_segment
|
| 86 |
|
| 87 |
|
| 88 |
+
def send_audio(audio_segment: AudioSegment):
|
| 89 |
+
sampling_rate = DEFAULT_SAMPLING_RATE
|
| 90 |
+
stream = sd.OutputStream(
|
| 91 |
+
channels=1,
|
| 92 |
+
samplerate=sampling_rate,
|
| 93 |
+
dtype=np.int16,
|
| 94 |
+
device=None,
|
| 95 |
+
)
|
| 96 |
+
raw_data = audio_segment.raw_data
|
| 97 |
+
if audio_segment.frame_rate != sampling_rate:
|
| 98 |
+
raw_data = audio_segment.set_frame_rate(sampling_rate).raw_data
|
| 99 |
+
stream.write(np.frombuffer(raw_data, dtype=np.int16))
|
| 100 |
+
|
| 101 |
+
|
| 102 |
def main(input_audio):
|
| 103 |
logger.info(f"Type of input_audio: {type(input_audio)}")
|
| 104 |
logger.info(f"input_audio: {input_audio}")
|
|
|
|
| 117 |
api_key=getenv("COQUI_API_KEY"),
|
| 118 |
)
|
| 119 |
|
|
|
|
|
|
|
| 120 |
print("Starting conversation. Press Ctrl+C to exit.")
|
| 121 |
while True:
|
| 122 |
try:
|
|
|
|
| 130 |
response = agent.respond(transcript)
|
| 131 |
logger.info(f"Agent response: {response}")
|
| 132 |
output_audio = synthesizer.synthesize(response)
|
| 133 |
+
return send_audio(output_audio)
|
| 134 |
|
| 135 |
except Exception as e:
|
| 136 |
logger.error("Failed to synthesize response: %s", e)
|