Jofthomas commited on
Commit
89223f6
·
verified ·
1 Parent(s): 5667cf6

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +419 -0
  2. assets/voxtral.png +0 -0
  3. requirements.txt +3 -0
  4. style.css +297 -0
app.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ import asyncio
4
+ import base64
5
+ import json
6
+ import os
7
+ import queue
8
+ import threading
9
+ import time
10
+
11
+ import gradio as gr
12
+ import numpy as np
13
+ import websockets
14
+
15
+ # Load Voxtral icon as base64
16
+ VOXTRAL_ICON_B64 = ""
17
+ icon_path = os.path.join(os.path.dirname(__file__), "assets", "voxtral.png")
18
+ if os.path.exists(icon_path):
19
+ with open(icon_path, "rb") as f:
20
+ VOXTRAL_ICON_B64 = base64.b64encode(f.read()).decode("utf-8")
21
+
22
+ SAMPLE_RATE = 16_000
23
+ WARMUP_DURATION = 2.0 # seconds of silence for warmup
24
+ WPM_WINDOW = 10 # seconds for running mean calculation
25
+ CALIBRATION_PERIOD = 5 # seconds before showing WPM
26
+ SESSION_TIMEOUT = 300 # 5 minutes session timeout
27
+
28
+ # Global config (shared across users)
29
+ ws_url = ""
30
+ model = ""
31
+
32
+
33
+ class UserSession:
34
+ """Per-user session state."""
35
+ def __init__(self):
36
+ self.audio_queue = queue.Queue(maxsize=100) # Limit queue size
37
+ self.transcription_text = ""
38
+ self.is_running = False
39
+ self.status_message = "ready"
40
+ self.word_timestamps = []
41
+ self.current_wpm = "Calibrating..."
42
+ self.session_start_time = None
43
+
44
+
45
+ # Load CSS from external file
46
+ css_path = os.path.join(os.path.dirname(__file__), "style.css")
47
+ with open(css_path, "r") as f:
48
+ CUSTOM_CSS = f.read()
49
+
50
+
51
+ def get_header_html() -> str:
52
+ """Generate the header HTML with Voxtral logo."""
53
+ if VOXTRAL_ICON_B64:
54
+ logo_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="header-logo" />'
55
+ else:
56
+ logo_html = ''
57
+
58
+ return f"""
59
+ <div class="header-card">
60
+ <h1 class="header-title">{logo_html}Real-time Speech Transcription</h1>
61
+ <p class="header-subtitle">Powered by Voxtral Mini — Click the microphone to start recording</p>
62
+ </div>
63
+ """
64
+
65
+
66
+ def get_status_html(status: str) -> str:
67
+ """Generate status badge HTML based on current status."""
68
+ status_configs = {
69
+ "ready": ("STANDBY", "status-ready", ""),
70
+ "connecting": ("CONNECTING", "status-connecting", "fast"),
71
+ "warming": ("WARMING UP", "status-warming", "fast"),
72
+ "listening": ("LISTENING", "status-listening", "animate"),
73
+ "timeout": ("TIMEOUT", "status-timeout", ""),
74
+ "error": ("ERROR", "status-error", ""),
75
+ }
76
+ label, css_class, dot_class = status_configs.get(status, status_configs["ready"])
77
+ dot_anim = f" {dot_class}" if dot_class else ""
78
+
79
+ return f"""<div class="status-badge {css_class}"><span class="status-dot{dot_anim}"></span><span style="color: inherit !important;">{label}</span></div>"""
80
+
81
+
82
+ def get_transcription_html(transcript: str, status: str, wpm: str = "Calibrating...") -> str:
83
+ """Generate the full transcription card HTML."""
84
+ status_badge = get_status_html(status)
85
+ wpm_badge = f'<div class="wpm-badge"><span style="color: #1E1E1E !important;">{wpm}</span></div>'
86
+
87
+ if transcript:
88
+ cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
89
+ content_html = f"""
90
+ <div class="transcript-text" style="color: #000000 !important;">
91
+ {transcript}{cursor_html}
92
+ </div>
93
+ """
94
+ elif status in ["listening", "warming", "connecting"]:
95
+ content_html = """
96
+ <div class="empty-state">
97
+ <div class="empty-dots">
98
+ <div class="empty-dot"></div>
99
+ <div class="empty-dot"></div>
100
+ <div class="empty-dot"></div>
101
+ </div>
102
+ <p class="empty-text" style="color: #555555 !important;">Listening for audio...</p>
103
+ </div>
104
+ """
105
+ elif status == "timeout":
106
+ content_html = """
107
+ <div class="empty-state">
108
+ <p class="empty-text" style="color: #B30400 !important;">Session timeout (5 minutes)</p>
109
+ <p class="empty-text" style="color: #555555 !important;">Click 'Clear History' and refresh to restart.</p>
110
+ </div>
111
+ """
112
+ else:
113
+ content_html = """
114
+ <div class="empty-state">
115
+ <p class="empty-text" style="color: #555555 !important;">// Awaiting audio input...</p>
116
+ <p class="empty-text" style="color: #555555 !important;">// Click the microphone to start.</p>
117
+ </div>
118
+ """
119
+
120
+ # Use base64 image if available
121
+ if VOXTRAL_ICON_B64:
122
+ icon_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="voxtral-icon" />'
123
+ else:
124
+ icon_html = '<span style="font-size:20px;">🎙️</span>'
125
+
126
+ return f"""
127
+ <div class="transcription-card">
128
+ <div class="card-header">
129
+ <div class="card-header-left">
130
+ {icon_html}
131
+ <span class="card-title" style="color: #1E1E1E !important;">Transcription Output</span>
132
+ </div>
133
+ <div class="card-header-right">
134
+ {wpm_badge}
135
+ {status_badge}
136
+ </div>
137
+ </div>
138
+ <div class="card-content">
139
+ {content_html}
140
+ </div>
141
+ <div class="card-footer">
142
+ <span style="color: #555555 !important;">Voxtral Mini</span>
143
+ <span style="color: #555555 !important;">Real-time Audio Transcription</span>
144
+ </div>
145
+ </div>
146
+ """
147
+
148
+
149
+ def calculate_wpm(session):
150
+ """Calculate words per minute based on running mean of last WPM_WINDOW seconds."""
151
+ if session.session_start_time is not None:
152
+ elapsed = time.time() - session.session_start_time
153
+ if elapsed < CALIBRATION_PERIOD:
154
+ return "Calibrating..."
155
+
156
+ if len(session.word_timestamps) < 2:
157
+ return "0.0 WPM"
158
+
159
+ current_time = time.time()
160
+ cutoff_time = current_time - WPM_WINDOW
161
+ session.word_timestamps = [ts for ts in session.word_timestamps if ts >= cutoff_time]
162
+
163
+ if len(session.word_timestamps) < 2:
164
+ return "0.0 WPM"
165
+
166
+ time_span = current_time - session.word_timestamps[0]
167
+ if time_span == 0:
168
+ return "0.0 WPM"
169
+
170
+ word_count = len(session.word_timestamps)
171
+ wpm = (word_count / time_span) * 60
172
+ return f"{round(wpm, 1)} WPM"
173
+
174
+
175
+ async def send_silence(ws, duration=2.0):
176
+ """Send silence to warm up the model."""
177
+ num_samples = int(SAMPLE_RATE * duration)
178
+ silence = np.zeros(num_samples, dtype=np.int16)
179
+
180
+ chunk_size = int(SAMPLE_RATE * 0.1)
181
+ for i in range(0, num_samples, chunk_size):
182
+ chunk = silence[i:i + chunk_size]
183
+ b64_chunk = base64.b64encode(chunk.tobytes()).decode("utf-8")
184
+ await ws.send(
185
+ json.dumps(
186
+ {"type": "input_audio_buffer.append", "audio": b64_chunk}
187
+ )
188
+ )
189
+ await asyncio.sleep(0.05)
190
+
191
+
192
+ async def websocket_handler(session):
193
+ """Connect to WebSocket and handle audio streaming + transcription."""
194
+ try:
195
+ async with websockets.connect(ws_url) as ws:
196
+ await ws.recv()
197
+ await ws.send(json.dumps({"type": "session.update", "model": model}))
198
+
199
+ session.status_message = "warming"
200
+ await send_silence(ws, WARMUP_DURATION)
201
+ await ws.send(json.dumps({"type": "input_audio_buffer.commit"}))
202
+ session.status_message = "listening"
203
+
204
+ async def send_audio():
205
+ while session.is_running:
206
+ try:
207
+ if session.session_start_time is not None:
208
+ elapsed = time.time() - session.session_start_time
209
+ if elapsed >= SESSION_TIMEOUT:
210
+ print("Session timeout reached (5 minutes)")
211
+ session.is_running = False
212
+ session.status_message = "timeout"
213
+ break
214
+
215
+ chunk = await asyncio.get_event_loop().run_in_executor(
216
+ None, lambda: session.audio_queue.get(timeout=0.1)
217
+ )
218
+ if session.is_running:
219
+ await ws.send(
220
+ json.dumps(
221
+ {"type": "input_audio_buffer.append", "audio": chunk}
222
+ )
223
+ )
224
+ except queue.Empty:
225
+ continue
226
+ except Exception as e:
227
+ print(f"Error sending audio: {e}")
228
+ break
229
+
230
+ async def receive_transcription():
231
+ try:
232
+ async for message in ws:
233
+ if not session.is_running:
234
+ break
235
+
236
+ if session.session_start_time is not None:
237
+ elapsed = time.time() - session.session_start_time
238
+ if elapsed >= SESSION_TIMEOUT:
239
+ session.status_message = "timeout"
240
+ session.is_running = False
241
+ break
242
+
243
+ data = json.loads(message)
244
+ if data.get("type") == "transcription.delta":
245
+ delta = data["delta"]
246
+ session.transcription_text += delta
247
+
248
+ words = delta.split()
249
+ for _ in words:
250
+ session.word_timestamps.append(time.time())
251
+
252
+ session.current_wpm = calculate_wpm(session)
253
+ except Exception as e:
254
+ print(f"Error receiving transcription: {e}")
255
+
256
+ await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
257
+ except Exception as e:
258
+ print(f"WebSocket connection error: {e}")
259
+ session.status_message = "error"
260
+
261
+
262
+ def start_websocket(session):
263
+ """Start WebSocket connection in background thread."""
264
+ session.is_running = True
265
+ loop = asyncio.new_event_loop()
266
+ asyncio.set_event_loop(loop)
267
+ try:
268
+ loop.run_until_complete(websocket_handler(session))
269
+ except Exception as e:
270
+ print(f"WebSocket error: {e}")
271
+ finally:
272
+ try:
273
+ loop.close()
274
+ except Exception:
275
+ pass
276
+
277
+
278
+ def auto_start_recording(session):
279
+ """Automatically start the transcription service when audio begins."""
280
+ if not session.is_running:
281
+ session.transcription_text = ""
282
+ session.word_timestamps = []
283
+ session.current_wpm = "Calibrating..."
284
+ session.session_start_time = time.time()
285
+ session.status_message = "connecting"
286
+ thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
287
+ thread.start()
288
+
289
+ return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
290
+
291
+
292
+ def clear_history(session):
293
+ """Stop the websocket connection and clear all history."""
294
+ session.is_running = False
295
+
296
+ # Clear the audio queue without blocking
297
+ try:
298
+ while True:
299
+ session.audio_queue.get_nowait()
300
+ except queue.Empty:
301
+ pass
302
+
303
+ session.transcription_text = ""
304
+ session.word_timestamps = []
305
+ session.current_wpm = "Calibrating..."
306
+ session.session_start_time = None
307
+ session.status_message = "ready"
308
+
309
+ return get_transcription_html("", "ready", "Calibrating..."), None
310
+
311
+
312
+ def process_audio(audio, session):
313
+ """Process incoming audio and queue for streaming."""
314
+ try:
315
+ # Quick return if audio is None
316
+ if audio is None:
317
+ wpm = session.current_wpm if session.is_running else "Calibrating..."
318
+ return get_transcription_html(session.transcription_text, session.status_message, wpm)
319
+
320
+ # Auto-start if not running
321
+ if not session.is_running and session.status_message not in ["timeout", "error"]:
322
+ auto_start_recording(session)
323
+
324
+ # Skip processing if session stopped
325
+ if not session.is_running:
326
+ return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
327
+
328
+ sample_rate, audio_data = audio
329
+
330
+ # Convert to mono if stereo
331
+ if len(audio_data.shape) > 1:
332
+ audio_data = audio_data.mean(axis=1)
333
+
334
+ # Normalize to float
335
+ if audio_data.dtype == np.int16:
336
+ audio_float = audio_data.astype(np.float32) / 32767.0
337
+ else:
338
+ audio_float = audio_data.astype(np.float32)
339
+
340
+ # Resample to 16kHz if needed
341
+ if sample_rate != SAMPLE_RATE:
342
+ num_samples = int(len(audio_float) * SAMPLE_RATE / sample_rate)
343
+ audio_float = np.interp(
344
+ np.linspace(0, len(audio_float) - 1, num_samples),
345
+ np.arange(len(audio_float)),
346
+ audio_float,
347
+ )
348
+
349
+ # Convert to PCM16 and base64 encode
350
+ pcm16 = (audio_float * 32767).astype(np.int16)
351
+ b64_chunk = base64.b64encode(pcm16.tobytes()).decode("utf-8")
352
+
353
+ # Non-blocking put to queue
354
+ try:
355
+ session.audio_queue.put_nowait(b64_chunk)
356
+ except queue.Full:
357
+ pass # Skip if queue is full
358
+
359
+ return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
360
+ except Exception as e:
361
+ print(f"Error processing audio: {e}")
362
+ return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
363
+
364
+
365
+ # Gradio interface
366
+ with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
367
+ # Create session state
368
+ session_state = gr.State(value=UserSession)
369
+
370
+ # Header
371
+ gr.HTML(get_header_html())
372
+
373
+ # Transcription output
374
+ transcription_display = gr.HTML(
375
+ value=get_transcription_html("", "ready", "Calibrating..."),
376
+ elem_id="transcription-output"
377
+ )
378
+
379
+ # Audio input
380
+ audio_input = gr.Audio(
381
+ sources=["microphone"],
382
+ streaming=True,
383
+ type="numpy",
384
+ format="wav",
385
+ elem_id="audio-input",
386
+ label="Microphone Input"
387
+ )
388
+
389
+ # Clear button
390
+ clear_btn = gr.Button(
391
+ "Clear History",
392
+ elem_classes=["clear-btn"]
393
+ )
394
+
395
+ # Info text
396
+ gr.HTML('<p class="info-text">Click "Clear History" and refresh the page to start a new session.</p>')
397
+
398
+ # Event handlers
399
+ clear_btn.click(
400
+ clear_history,
401
+ inputs=[session_state],
402
+ outputs=[transcription_display, audio_input]
403
+ )
404
+
405
+ audio_input.stream(
406
+ process_audio,
407
+ inputs=[audio_input, session_state],
408
+ outputs=[transcription_display],
409
+ show_progress="hidden",
410
+ concurrency_limit=None,
411
+ )
412
+
413
+ model = os.environ.get("MODEL", "mistralai/Voxtral-Mini-4B-Realtime-2602")
414
+ host = os.environ.get("HOST", "")
415
+
416
+ ws_url = f"wss://{host}/v1/realtime"
417
+
418
+ demo.queue(default_concurrency_limit=20)
419
+ demo.launch(css=CUSTOM_CSS, theme=gr.themes.Base(), ssr_mode=False, max_threads=40)
assets/voxtral.png ADDED
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.0.0
2
+ websockets
3
+ numpy
style.css ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600&family=Inter:wght@400;500;600;700&display=swap');
2
+
3
+ body, .gradio-container {
4
+ background-color: #FFFAEB !important;
5
+ background-image:
6
+ linear-gradient(#E9E2CB 1px, transparent 1px),
7
+ linear-gradient(90deg, #E9E2CB 1px, transparent 1px) !important;
8
+ background-size: 40px 40px !important;
9
+ font-family: 'Inter', sans-serif !important;
10
+ }
11
+
12
+ @keyframes pulse {
13
+ 0%, 100% { opacity: 1; transform: scale(1); }
14
+ 50% { opacity: 0.5; transform: scale(0.8); }
15
+ }
16
+
17
+ @keyframes blink {
18
+ 0%, 100% { opacity: 1; }
19
+ 50% { opacity: 0; }
20
+ }
21
+
22
+ @keyframes bounce {
23
+ 0%, 100% { transform: translateY(0); }
24
+ 50% { transform: translateY(-8px); }
25
+ }
26
+
27
+ .header-card {
28
+ background: linear-gradient(135deg, #FFFAEB 0%, #FFF0C3 100%);
29
+ border: 2px solid #E9E2CB;
30
+ border-top: 4px solid #FF8205;
31
+ padding: 2rem;
32
+ margin-bottom: 1.5rem;
33
+ box-shadow: 0 4px 24px rgba(0,0,0,0.06);
34
+ }
35
+
36
+ .header-title {
37
+ font-size: 2rem;
38
+ font-weight: 700;
39
+ color: #1E1E1E !important;
40
+ margin: 0 0 0.5rem 0;
41
+ letter-spacing: -0.02em;
42
+ display: flex;
43
+ align-items: center;
44
+ gap: 0.75rem;
45
+ }
46
+
47
+ .header-logo {
48
+ width: 40px;
49
+ height: 40px;
50
+ object-fit: contain;
51
+ }
52
+
53
+ .header-subtitle {
54
+ color: #444444 !important;
55
+ font-size: 1rem;
56
+ margin: 0;
57
+ }
58
+
59
+ .transcription-card {
60
+ background: #FFFAEB !important;
61
+ border: 2px solid #E9E2CB;
62
+ box-shadow: 0 8px 32px rgba(0,0,0,0.08);
63
+ overflow: hidden;
64
+ min-height: 350px;
65
+ }
66
+
67
+ .card-header {
68
+ background: rgba(255,255,255,0.6) !important;
69
+ border-bottom: 1px solid #E9E2CB;
70
+ padding: 0.75rem 1rem;
71
+ display: flex;
72
+ align-items: center;
73
+ justify-content: space-between;
74
+ }
75
+
76
+ .card-header-left {
77
+ display: flex;
78
+ align-items: center;
79
+ gap: 0.75rem;
80
+ }
81
+
82
+ .card-header-right {
83
+ display: flex;
84
+ align-items: center;
85
+ gap: 1rem;
86
+ }
87
+
88
+ .card-title {
89
+ font-size: 0.65rem;
90
+ font-weight: 700;
91
+ color: #1E1E1E !important;
92
+ text-transform: uppercase;
93
+ letter-spacing: 0.1em;
94
+ margin: 0;
95
+ }
96
+
97
+ .voxtral-icon {
98
+ width: 24px;
99
+ height: 24px;
100
+ object-fit: contain;
101
+ }
102
+
103
+ .wpm-badge {
104
+ display: inline-flex;
105
+ align-items: center;
106
+ gap: 0.4rem;
107
+ padding: 0.25rem 0.6rem;
108
+ font-size: 0.6rem;
109
+ font-weight: 700;
110
+ text-transform: uppercase;
111
+ letter-spacing: 0.05em;
112
+ border-radius: 2px;
113
+ background: #FFF0C3;
114
+ color: #1E1E1E !important;
115
+ border: 1px solid #E9E2CB;
116
+ font-family: 'JetBrains Mono', monospace;
117
+ }
118
+
119
+ .status-badge {
120
+ display: inline-flex;
121
+ align-items: center;
122
+ gap: 0.5rem;
123
+ padding: 0.25rem 0.75rem;
124
+ font-size: 0.6rem;
125
+ font-weight: 700;
126
+ text-transform: uppercase;
127
+ letter-spacing: 0.08em;
128
+ border-radius: 2px;
129
+ }
130
+
131
+ .status-ready {
132
+ background: #f5f5f5 !important;
133
+ color: #555555 !important;
134
+ border: 1px solid #E9E2CB;
135
+ }
136
+
137
+ .status-connecting, .status-warming {
138
+ background: rgba(255, 130, 5, 0.15) !important;
139
+ color: #CC6A04 !important;
140
+ border: 1px solid #FF8205;
141
+ }
142
+
143
+ .status-listening {
144
+ background: rgba(255, 130, 5, 0.2) !important;
145
+ color: #CC6A04 !important;
146
+ border: 1px solid #FF8205;
147
+ }
148
+
149
+ .status-timeout {
150
+ background: rgba(225, 5, 0, 0.1) !important;
151
+ color: #B30400 !important;
152
+ border: 1px solid #E10500;
153
+ }
154
+
155
+ .status-error {
156
+ background: rgba(225, 5, 0, 0.1) !important;
157
+ color: #B30400 !important;
158
+ border: 1px solid #E10500;
159
+ }
160
+
161
+ .status-dot {
162
+ width: 6px;
163
+ height: 6px;
164
+ border-radius: 50%;
165
+ background: currentColor !important;
166
+ }
167
+
168
+ .status-dot.animate {
169
+ animation: pulse 1.5s ease-in-out infinite;
170
+ }
171
+
172
+ .status-dot.fast {
173
+ animation: pulse 0.8s ease-in-out infinite;
174
+ }
175
+
176
+ .card-content {
177
+ padding: 1.5rem;
178
+ min-height: 250px;
179
+ position: relative;
180
+ background-color: #FFFAEB !important;
181
+ background-image:
182
+ linear-gradient(rgba(0,0,0,0.02) 1px, transparent 1px),
183
+ linear-gradient(90deg, rgba(0,0,0,0.02) 1px, transparent 1px);
184
+ background-size: 20px 20px;
185
+ }
186
+
187
+ .transcript-text {
188
+ font-family: 'JetBrains Mono', monospace !important;
189
+ font-size: 1.1rem !important;
190
+ line-height: 1.8 !important;
191
+ color: #000000 !important;
192
+ white-space: pre-wrap;
193
+ word-break: break-word;
194
+ }
195
+
196
+ .transcript-cursor {
197
+ display: inline-block;
198
+ width: 10px;
199
+ height: 20px;
200
+ background: #FF8205 !important;
201
+ margin-left: 4px;
202
+ vertical-align: middle;
203
+ animation: blink 1s step-end infinite;
204
+ }
205
+
206
+ .empty-state {
207
+ display: flex;
208
+ flex-direction: column;
209
+ align-items: center;
210
+ justify-content: center;
211
+ height: 200px;
212
+ }
213
+
214
+ .empty-dots {
215
+ display: flex;
216
+ gap: 6px;
217
+ margin-bottom: 1rem;
218
+ }
219
+
220
+ .empty-dot {
221
+ width: 8px;
222
+ height: 8px;
223
+ border-radius: 50%;
224
+ background: #FF8205 !important;
225
+ animation: bounce 1s ease-in-out infinite;
226
+ }
227
+
228
+ .empty-dot:nth-child(2) { animation-delay: 0.1s; }
229
+ .empty-dot:nth-child(3) { animation-delay: 0.2s; }
230
+
231
+ .empty-text {
232
+ font-family: 'JetBrains Mono', monospace !important;
233
+ font-size: 0.875rem !important;
234
+ color: #555555 !important;
235
+ font-style: italic;
236
+ }
237
+
238
+ .card-footer {
239
+ background: #fafafa !important;
240
+ border-top: 1px solid #E9E2CB;
241
+ padding: 0.5rem 1rem;
242
+ display: flex;
243
+ justify-content: space-between;
244
+ font-family: 'JetBrains Mono', monospace;
245
+ font-size: 0.65rem;
246
+ color: #555555 !important;
247
+ }
248
+
249
+ /* Audio component styling - preserve Gradio defaults */
250
+ #audio-input {
251
+ margin-top: 1rem;
252
+ }
253
+
254
+ #audio-input button {
255
+ color: inherit !important;
256
+ }
257
+
258
+ #audio-input .controls button {
259
+ background: var(--button-secondary-background-fill) !important;
260
+ color: var(--button-secondary-text-color) !important;
261
+ }
262
+
263
+ .clear-btn {
264
+ background: #E10500 !important;
265
+ color: #FFFFFF !important;
266
+ border: none !important;
267
+ padding: 0.75rem 1.5rem !important;
268
+ font-weight: 600 !important;
269
+ text-transform: uppercase !important;
270
+ letter-spacing: 0.05em !important;
271
+ font-size: 0.75rem !important;
272
+ cursor: pointer !important;
273
+ transition: all 0.2s !important;
274
+ border-radius: 0 !important;
275
+ margin-top: 1rem !important;
276
+ }
277
+
278
+ .clear-btn:hover {
279
+ background: #B30400 !important;
280
+ }
281
+
282
+ footer {
283
+ display: none !important;
284
+ }
285
+
286
+ .gradio-container .prose {
287
+ max-width: none !important;
288
+ }
289
+
290
+ .info-text {
291
+ font-family: 'JetBrains Mono', monospace !important;
292
+ font-size: 0.75rem !important;
293
+ color: #888888 !important;
294
+ font-style: italic;
295
+ text-align: center;
296
+ margin-top: 1rem;
297
+ }