Spaces:

fastrtc
/

talk-to-gemini

Running on CPU Upgrade

App Files Files Community

freddyaboulton HF Staff commited on Feb 20

Commit

55525a7

verified ·

1 Parent(s): d75ee3c

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +25 -15
index.html +61 -18

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from fastapi.responses import HTMLResponse
 from fastrtc import (
     AsyncStreamHandler,
     Stream,
     get_twilio_turn_credentials,
 )
 from google import genai
@@ -62,12 +63,18 @@ class GeminiHandler(AsyncStreamHandler):
         )
     async def start_up(self):
-        await self.wait_for_args()
-        api_key, voice_name = self.latest_args[1:]
-        client = genai.Client(
-            api_key=api_key or os.getenv("GEMINI_API_KEY"),
-            http_options={"api_version": "v1alpha"},
-        )
         config = LiveConnectConfig(
             response_modalities=["AUDIO"],  # type: ignore
             speech_config=SpeechConfig(
@@ -78,15 +85,18 @@ class GeminiHandler(AsyncStreamHandler):
                 )
             ),
         )
-        async with client.aio.live.connect(
-            model="gemini-2.0-flash-exp", config=config
-        ) as session:
-            async for audio in session.start_stream(
-                stream=self.stream(), mime_type="audio/pcm"
-            ):
-                if audio.data:
-                    array = np.frombuffer(audio.data, dtype=np.int16)
-                    self.output_queue.put_nowait(array)
     async def stream(self) -> AsyncGenerator[bytes, None]:
         while not self.quit.is_set():

 from fastrtc import (
     AsyncStreamHandler,
     Stream,
+    WebRTCError,
     get_twilio_turn_credentials,
 )
 from google import genai
         )
     async def start_up(self):
+        if not self.phone_mode:
+            await self.wait_for_args()
+            api_key, voice_name = self.latest_args[1:]
+        else:
+            api_key, voice_name = None, "Puck"
+        try:
+            client = genai.Client(
+                api_key=api_key or os.getenv("GEMINI_API_KEY"),
+                http_options={"api_version": "v1alpha"},
+            )
+        except Exception as e:
+            raise WebRTCError(str(e))
         config = LiveConnectConfig(
             response_modalities=["AUDIO"],  # type: ignore
             speech_config=SpeechConfig(
                 )
             ),
         )
+        try:
+            async with client.aio.live.connect(
+                model="gemini-2.0-flash-exp", config=config
+            ) as session:
+                async for audio in session.start_stream(
+                    stream=self.stream(), mime_type="audio/pcm"
+                ):
+                    if audio.data:
+                        array = np.frombuffer(audio.data, dtype=np.int16)
+                        self.output_queue.put_nowait(array)
+        except Exception as e:
+            raise WebRTCError(str(e))
     async def stream(self) -> AsyncGenerator[bytes, None]:
         while not self.quit.is_set():

index.html CHANGED Viewed

@@ -147,11 +147,29 @@
             transform: translateX(-0%) scale(var(--audio-level, 1));
             transition: transform 0.1s ease;
         }
     </style>
 </head>
 <body>
     <div style="text-align: center">
         <h1>Gemini Voice Chat</h1>
         <p>Speak with Gemini using real-time audio streaming</p>
@@ -229,6 +247,17 @@
             }
         }
         async function setupWebRTC() {
             const config = __RTC_CONFIGURATION__;
             peerConnection = new RTCPeerConnection(config);
@@ -286,7 +315,24 @@
                 // Create data channel for messages
                 dataChannel = peerConnection.createDataChannel('text');
-                dataChannel.onmessage = handleMessage;
                 // Create and send offer
                 const offer = await peerConnection.createOffer();
@@ -317,26 +363,22 @@
                 });
                 const serverResponse = await response.json();
                 await peerConnection.setRemoteDescription(serverResponse);
             } catch (err) {
                 console.error('Error setting up WebRTC:', err);
-            }
-        }
-        function handleMessage(event) {
-            const eventJson = JSON.parse(event.data);
-            if (eventJson.type === "send_input") {
-                fetch('/input_hook', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                    },
-                    body: JSON.stringify({
-                        webrtc_id: webrtc_id,
-                        api_key: apiKeyInput.value,
-                        voice_name: voiceSelect.value
-                    })
-                });
             }
         }
@@ -364,6 +406,7 @@
             if (audioContext) {
                 audioContext.close();
             }
         }
         startButton.addEventListener('click', () => {

             transform: translateX(-0%) scale(var(--audio-level, 1));
             transition: transform 0.1s ease;
         }
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            background-color: #f44336;
+            color: white;
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
     </style>
 </head>
 <body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
     <div style="text-align: center">
         <h1>Gemini Voice Chat</h1>
         <p>Speak with Gemini using real-time audio streaming</p>
             }
         }
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.style.display = 'block';
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
         async function setupWebRTC() {
             const config = __RTC_CONFIGURATION__;
             peerConnection = new RTCPeerConnection(config);
                 // Create data channel for messages
                 dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onmessage = (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    if (eventJson.type === "error") {
+                        showError(eventJson.message);
+                    } else if (eventJson.type === "send_input") {
+                        fetch('/input_hook', {
+                            method: 'POST',
+                            headers: {
+                                'Content-Type': 'application/json',
+                            },
+                            body: JSON.stringify({
+                                webrtc_id: webrtc_id,
+                                api_key: apiKeyInput.value,
+                                voice_name: voiceSelect.value
+                            })
+                        });
+                    }
+                };
                 // Create and send offer
                 const offer = await peerConnection.createOffer();
                 });
                 const serverResponse = await response.json();
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    startButton.textContent = 'Start Recording';
+                    return;
+                }
                 await peerConnection.setRemoteDescription(serverResponse);
             } catch (err) {
                 console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+                startButton.textContent = 'Start Recording';
             }
         }
             if (audioContext) {
                 audioContext.close();
             }
+            updateButtonState();
         }
         startButton.addEventListener('click', () => {