Spaces:
Sleeping
Sleeping
Commit
·
99ecc54
1
Parent(s):
5c73715
Continous logs of faster-whisper
Browse files- inference.py +13 -2
- shared.py +32 -3
- test_websocket.py +0 -1
inference.py
CHANGED
|
@@ -69,6 +69,11 @@ class ConnectionManager:
|
|
| 69 |
connection_stats["current_connections"] = len(self.active_connections)
|
| 70 |
connection_stats["total_connections"] += 1
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
|
| 73 |
f"Total connections: {len(self.active_connections)}")
|
| 74 |
|
|
@@ -83,6 +88,11 @@ class ConnectionManager:
|
|
| 83 |
|
| 84 |
connection_stats["current_connections"] = len(self.active_connections)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
logger.info(f"WebSocket disconnected: {client_id}. "
|
| 87 |
f"Remaining connections: {len(self.active_connections)}")
|
| 88 |
|
|
@@ -130,8 +140,9 @@ async def initialize_diarization_system():
|
|
| 130 |
|
| 131 |
if success:
|
| 132 |
logger.info("Models initialized successfully")
|
| 133 |
-
|
| 134 |
-
|
|
|
|
| 135 |
return True
|
| 136 |
else:
|
| 137 |
logger.error("Failed to initialize models")
|
|
|
|
| 69 |
connection_stats["current_connections"] = len(self.active_connections)
|
| 70 |
connection_stats["total_connections"] += 1
|
| 71 |
|
| 72 |
+
# Start recording if this is the first connection and system is ready
|
| 73 |
+
if len(self.active_connections) == 1 and diart and not diart.is_running:
|
| 74 |
+
logger.info("First connection established, starting recording")
|
| 75 |
+
diart.start_recording()
|
| 76 |
+
|
| 77 |
logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
|
| 78 |
f"Total connections: {len(self.active_connections)}")
|
| 79 |
|
|
|
|
| 88 |
|
| 89 |
connection_stats["current_connections"] = len(self.active_connections)
|
| 90 |
|
| 91 |
+
# If no more connections, stop recording to save resources
|
| 92 |
+
if len(self.active_connections) == 0 and diart and diart.is_running:
|
| 93 |
+
logger.info("No active connections, stopping recording")
|
| 94 |
+
diart.stop_recording()
|
| 95 |
+
|
| 96 |
logger.info(f"WebSocket disconnected: {client_id}. "
|
| 97 |
f"Remaining connections: {len(self.active_connections)}")
|
| 98 |
|
|
|
|
| 140 |
|
| 141 |
if success:
|
| 142 |
logger.info("Models initialized successfully")
|
| 143 |
+
# Don't start recording yet - wait for an actual connection
|
| 144 |
+
# diart.start_recording()
|
| 145 |
+
logger.info("System ready for connections")
|
| 146 |
return True
|
| 147 |
else:
|
| 148 |
logger.error("Failed to initialize models")
|
shared.py
CHANGED
|
@@ -347,7 +347,11 @@ class RealtimeSpeakerDiarization:
|
|
| 347 |
realtime_processing_pause=0,
|
| 348 |
realtime_model_type=REALTIME_TRANSCRIPTION_MODEL,
|
| 349 |
on_realtime_transcription_update=self.live_text_detected,
|
| 350 |
-
on_recording_stop=self.process_final_text
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
)
|
| 352 |
|
| 353 |
logger.info("Models initialized successfully!")
|
|
@@ -452,8 +456,16 @@ class RealtimeSpeakerDiarization:
|
|
| 452 |
self.sentence_thread = threading.Thread(target=self.process_sentence_queue, daemon=True)
|
| 453 |
self.sentence_thread.start()
|
| 454 |
|
| 455 |
-
# Start the RealtimeSTT recorder
|
| 456 |
-
if self.recorder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
self.recorder.start()
|
| 458 |
logger.info("RealtimeSTT recorder started")
|
| 459 |
|
|
@@ -472,6 +484,10 @@ class RealtimeSpeakerDiarization:
|
|
| 472 |
try:
|
| 473 |
self.recorder.stop()
|
| 474 |
logger.info("RealtimeSTT recorder stopped")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
except Exception as e:
|
| 476 |
logger.error(f"Error stopping recorder: {e}")
|
| 477 |
|
|
@@ -616,6 +632,19 @@ class RealtimeSpeakerDiarization:
|
|
| 616 |
if len(audio_data.shape) > 1:
|
| 617 |
audio_data = np.mean(audio_data, axis=1) if audio_data.shape[1] > 1 else audio_data.flatten()
|
| 618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
# Normalize if needed
|
| 620 |
if np.max(np.abs(audio_data)) > 1.0:
|
| 621 |
audio_data = audio_data / np.max(np.abs(audio_data))
|
|
|
|
| 347 |
realtime_processing_pause=0,
|
| 348 |
realtime_model_type=REALTIME_TRANSCRIPTION_MODEL,
|
| 349 |
on_realtime_transcription_update=self.live_text_detected,
|
| 350 |
+
on_recording_stop=self.process_final_text,
|
| 351 |
+
# Add setting to reduce log spam
|
| 352 |
+
verbose=False,
|
| 353 |
+
# Don't start processing immediately
|
| 354 |
+
start_on_init=False
|
| 355 |
)
|
| 356 |
|
| 357 |
logger.info("Models initialized successfully!")
|
|
|
|
| 456 |
self.sentence_thread = threading.Thread(target=self.process_sentence_queue, daemon=True)
|
| 457 |
self.sentence_thread.start()
|
| 458 |
|
| 459 |
+
# Start the RealtimeSTT recorder explicitly
|
| 460 |
+
if self.recorder:
|
| 461 |
+
# First make sure it's stopped if it was running
|
| 462 |
+
try:
|
| 463 |
+
if getattr(self.recorder, '_is_running', False):
|
| 464 |
+
self.recorder.stop()
|
| 465 |
+
except Exception:
|
| 466 |
+
pass
|
| 467 |
+
|
| 468 |
+
# Then start it fresh
|
| 469 |
self.recorder.start()
|
| 470 |
logger.info("RealtimeSTT recorder started")
|
| 471 |
|
|
|
|
| 484 |
try:
|
| 485 |
self.recorder.stop()
|
| 486 |
logger.info("RealtimeSTT recorder stopped")
|
| 487 |
+
|
| 488 |
+
# Reset the last transcription
|
| 489 |
+
with self.transcription_lock:
|
| 490 |
+
self.last_transcription = ""
|
| 491 |
except Exception as e:
|
| 492 |
logger.error(f"Error stopping recorder: {e}")
|
| 493 |
|
|
|
|
| 632 |
if len(audio_data.shape) > 1:
|
| 633 |
audio_data = np.mean(audio_data, axis=1) if audio_data.shape[1] > 1 else audio_data.flatten()
|
| 634 |
|
| 635 |
+
# Check if audio has meaningful content (not just silence)
|
| 636 |
+
audio_level = np.abs(audio_data).mean()
|
| 637 |
+
is_silence = audio_level < 0.01 # Threshold for silence
|
| 638 |
+
|
| 639 |
+
# Skip processing for silent audio
|
| 640 |
+
if is_silence:
|
| 641 |
+
return {
|
| 642 |
+
"status": "silent",
|
| 643 |
+
"buffer_size": len(self.audio_processor.audio_buffer),
|
| 644 |
+
"speaker_id": self.speaker_detector.current_speaker,
|
| 645 |
+
"conversation_html": self.current_conversation
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
# Normalize if needed
|
| 649 |
if np.max(np.abs(audio_data)) > 1.0:
|
| 650 |
audio_data = audio_data / np.max(np.abs(audio_data))
|
test_websocket.py
CHANGED
|
@@ -15,7 +15,6 @@ async def test_ws():
|
|
| 15 |
audio = (np.random.randn(3200) * 3000).astype(np.int16)
|
| 16 |
await websocket.send(audio.tobytes())
|
| 17 |
print(f"Sent audio chunk {i+1}/20")
|
| 18 |
-
await asyncio.sleep(0.05)
|
| 19 |
|
| 20 |
try:
|
| 21 |
while True:
|
|
|
|
| 15 |
audio = (np.random.randn(3200) * 3000).astype(np.int16)
|
| 16 |
await websocket.send(audio.tobytes())
|
| 17 |
print(f"Sent audio chunk {i+1}/20")
|
|
|
|
| 18 |
|
| 19 |
try:
|
| 20 |
while True:
|