Spaces:

Scrapyard-Brampton
/

Testing

Sleeping

Testing / app.py

Sidak Singh

question boundary works

7b7db64 4 months ago

5.15 kB

	import gradio as gr
	import numpy as np
	import threading
	import time
	from components.transcriber import AudioProcessor
	from components.gpt import gen_llm_response
	from components.streaming import StreamingManager, create_streaming_interface
	from config import config

	# Create processor instance with configuration-based device settings
	processor = AudioProcessor(model_size="base.en")

	# Adjust some settings for better quality
	processor.min_process_length = 1 * processor.sample_rate # Need at least 2 seconds before processing
	processor.process_interval = 1 # Process at most every 1.5 seconds

	# Create streaming manager
	streaming_manager = StreamingManager(processor)

	def process_mic_audio(audio):
	"""Process audio from Gradio microphone and update transcription"""
	if audio is None:
	return gr.update(), gr.update(), gr.update()

	sr, y = audio

	# Add to processor and possibly trigger transcription
	buffer_size = processor.add_audio(y, sr)

	# Wait for any pending processing to complete before getting transcription
	processor.wait_for_processing_complete(1.0)

	# Get current transcription
	transcription = processor.get_transcription()

	# Send transcription to LLM and get response
	llm_response = ""
	if transcription and len(transcription) > 0:
	llm_response = gen_llm_response(transcription)

	# Return status update, original transcription, and LLM response
	buffer_seconds = buffer_size / processor.sample_rate
	return (
	f"Buffer: {buffer_seconds:.1f}s \| Processed: {processor.processed_length/processor.sample_rate:.1f}s",
	transcription,
	llm_response
	)

	def clear_audio_buffer():
	"""Clear the audio buffer"""
	return processor.clear_buffer(), gr.update(), "", ""

	def get_current_buffer():
	"""Get the current buffer for playback"""
	return processor.get_playback_audio()

	def force_transcribe():
	"""Force transcription of current buffer"""
	# Force complete processing of all remaining audio
	transcription = processor.force_complete_processing()

	# Send to LLM and get response
	llm_response = ""
	if transcription and len(transcription) > 0:
	llm_response = gen_llm_response(transcription)

	return transcription, llm_response

	# Create Gradio interface
	with gr.Blocks(title="Live Speech Transcription") as demo:
	device_info = config.get_device_info()
	device_status = f"🖥️ Device: {device_info['device'].upper()}"
	if device_info['cuda_available'] and device_info['device'] == 'cuda':
	device_status += f" \| GPU: {device_info.get('cuda_device_name', 'Unknown')}"

	gr.Markdown(f"# Live Speech Recognition with LLM Response\n{device_status}")

	with gr.Row():
	audio_input = gr.Audio(sources=["microphone"], streaming=True, label="Microphone Input")

	with gr.Row():
	status_output = gr.Textbox(label="Buffer Status", interactive=False)
	buffer_audio = gr.Audio(label="Current Buffer (Click to Play)", interactive=False)

	with gr.Row():
	clear_btn = gr.Button("Clear Buffer")
	play_btn = gr.Button("Get Buffer for Playback")
	force_btn = gr.Button("Force Transcribe")

	with gr.Row():
	with gr.Column():
	transcription_display = gr.Textbox(label="Live Transcription", lines=5, interactive=False)
	with gr.Column():
	llm_response_display = gr.Textbox(label="LLM Response", lines=5, interactive=False)

	# Create streaming interface
	streaming_components = create_streaming_interface(streaming_manager)

	# Connect main interface components
	audio_input.stream(
	process_mic_audio,
	audio_input,
	[status_output, streaming_components['transcription_output'], streaming_components['llm_output']]
	)

	clear_btn.click(
	clear_audio_buffer,
	None,
	[status_output, buffer_audio, streaming_components['transcription_output'], streaming_components['llm_output']]
	)
	play_btn.click(get_current_buffer, None, buffer_audio)
	force_btn.click(
	force_transcribe,
	None,
	[streaming_components['transcription_output'], streaming_components['llm_output']]
	)

	if __name__ == "__main__":
	print("🎤 Live Speech Transcription App with LLM")
	print("=" * 40)

	# Display device configuration
	device_info = config.get_device_info()
	print("🔧 Configuration:")
	print(f" Device: {device_info['device'].upper()}")
	print(f" Compute type: {device_info['compute_type']}")
	print(f" CUDA available: {device_info['cuda_available']}")
	if device_info['cuda_available'] and device_info['device'] == 'cuda':
	print(f" GPU: {device_info.get('cuda_device_name', 'Unknown')}")
	memory_gb = device_info.get('cuda_memory_total', 0) / (1024**3)
	print(f" GPU Memory: {memory_gb:.1f} GB")

	print("\nFeatures:")
	print("• Real-time microphone transcription")
	print("• Audio buffer playback")
	print("• LLM responses displayed in UI")
	print("• RoBERTa+ hybrid question detection")

	# Launch the interface
	demo.launch()