Spaces:

IliaLarchenko
/

interviewer

Sleeping

App Files Files Community

IliaLarchenko commited on Apr 24, 2024

Commit

e98b248

1 Parent(s): 166b0a2

Auto streaming for TTS

Browse files

Files changed (2) hide show

api/audio.py +71 -38
app.py +12 -6

api/audio.py CHANGED Viewed

@@ -29,6 +29,7 @@ def numpy_audio_to_bytes(audio_data):
 class STTManager:
     def __init__(self, config):
         self.config = config
     def speech_to_text(self, audio, convert_to_bytes=True):
         if convert_to_bytes:
@@ -57,44 +58,76 @@ class STTManager:
 class TTSManager:
     def __init__(self, config):
         self.config = config
-    def read_last_message(self, chat_display):
-        if chat_display:
-            text = chat_display[-1][1]
-            headers = {"Authorization": "Bearer " + self.config.tts.key}
-            try:
-                if self.config.tts.type == "OPENAI_API":
-                    data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
-                    if os.environ.get("STREAMING", False):
-                        with requests.post(self.config.tts.url, headers=headers, json=data, stream=True) as response:
-                            if response.status_code != 200:
-                                error_details = response.json().get("error", "No error message provided")
-                                raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
-                            else:
-                                yield from response.iter_content(chunk_size=1024)
-                    else:
-                        response = requests.post(self.config.tts.url, headers=headers, json=data)
-                        if response.status_code != 200:
-                            error_details = response.json().get("error", "No error message provided")
-                            raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
-                        return response.content
-                elif self.config.tts.type == "HF_API":
-                    if os.environ.get("STREAMING", False):
-                        raise APIError("Streaming not supported for HF API TTS")
-                    else:
-                        response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
-                        if response.status_code != 200:
-                            error_details = response.json().get("error", "No error message provided")
-                            raise APIError("TTS Error: HF API error", status_code=response.status_code, details=error_details)
-                        return response.content
-            except APIError as e:
-                raise
-            except Exception as e:
-                raise APIError(f"TTS Error: Unexpected error: {e}")
         else:
-            return None

 class STTManager:
     def __init__(self, config):
         self.config = config
+        self.streaming = os.getenv("STREAMING", False)
     def speech_to_text(self, audio, convert_to_bytes=True):
         if convert_to_bytes:
 class TTSManager:
+    def test_tts(self):
+        try:
+            self.read_text("Handshake")
+            return True
+        except:
+            return False
+    def test_tts_stream(self):
+        try:
+            for _ in self.read_text_stream("Handshake"):
+                pass
+            return True
+        except:
+            return False
     def __init__(self, config):
         self.config = config
+        self.status = self.test_tts()
+        if self.status:
+            self.streaming = self.test_tts_stream()
+        else:
+            self.streaming = False
+        if self.streaming:
+            self.read_last_message = self.rlm_stream
         else:
+            self.read_last_message = self.rlm
+    def read_text(self, text):
+        headers = {"Authorization": "Bearer " + self.config.tts.key}
+        try:
+            if self.config.tts.type == "OPENAI_API":
+                data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
+                response = requests.post(self.config.tts.url, headers=headers, json=data)
+            elif self.config.tts.type == "HF_API":
+                response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
+            if response.status_code != 200:
+                error_details = response.json().get("error", "No error message provided")
+                raise APIError(f"TTS Error: {self.config.tts.type} error", status_code=response.status_code, details=error_details)
+        except APIError as e:
+            raise
+        except Exception as e:
+            raise APIError(f"TTS Error: Unexpected error: {e}")
+        return response.content
+    def read_text_stream(self, text):
+        1 / 0
+        if self.config.tts.type not in ["OPENAI_API"]:
+            raise APIError("TTS Error: Streaming not supported for this TTS type")
+        headers = {"Authorization": "Bearer " + self.config.tts.key}
+        data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
+        try:
+            with requests.post(self.config.tts.url, headers=headers, json=data, stream=True) as response:
+                if response.status_code != 200:
+                    error_details = response.json().get("error", "No error message provided")
+                    raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
+                else:
+                    yield from response.iter_content(chunk_size=1024)
+        except StopIteration:
+            pass
+        except APIError as e:
+            raise
+        except Exception as e:
+            raise APIError(f"TTS Error: Unexpected error: {e}")
+    def rlm(self, chat_history):
+        if chat_history:
+            return self.read_text(chat_history[-1][1])
+    def rlm_stream(self, chat_history):
+        if chat_history:
+            yield from self.read_text_stream(chat_history[-1][1])

app.py CHANGED Viewed

@@ -46,6 +46,14 @@ def hide_solution():
     return solution_acc, end_btn, problem_acc, audio_input
 # Interface
 with gr.Blocks(title="AI Interviewer") as demo:
@@ -53,18 +61,16 @@ with gr.Blocks(title="AI Interviewer") as demo:
         gr.Markdown(instruction["demo"])
     started_coding = gr.State(False)
-    audio_output = gr.Audio(label="Play audio", autoplay=True, visible=False, streaming=os.environ.get("STREAMING", False))
     with gr.Tab("Instruction") as instruction_tab:
         with gr.Row():
             with gr.Column(scale=2):
                 gr.Markdown(instruction["introduction"])
             with gr.Column(scale=1):
                 space = "&nbsp;" * 10
-                try:
-                    audio_test = tts.text_to_speech("Handshake")
-                    gr.Markdown(f"TTS status: 🟢{space} {config.tts.name}")
-                except:
-                    gr.Markdown(f"TTS status: 🔴{space} {config.tts.name}")
                 try:
                     text_test = stt.speech_to_text(audio_test, False)

     return solution_acc, end_btn, problem_acc, audio_input
+def get_status_color(obj):
+    if obj.status:
+        if obj.streaming:
+            return "🟢"
+        return "🟡"
+    return "🔴"
 # Interface
 with gr.Blocks(title="AI Interviewer") as demo:
         gr.Markdown(instruction["demo"])
     started_coding = gr.State(False)
+    audio_output = gr.Audio(label="Play audio", autoplay=True, visible=os.environ["DEBUG"], streaming=tts.streaming)
     with gr.Tab("Instruction") as instruction_tab:
         with gr.Row():
             with gr.Column(scale=2):
                 gr.Markdown(instruction["introduction"])
             with gr.Column(scale=1):
                 space = "&nbsp;" * 10
+                tts_status = get_status_color(tts)
+                gr.Markdown(f"TTS status: {tts_status}{space}{config.tts.name}")
                 try:
                     text_test = stt.speech_to_text(audio_test, False)