Agents_Course_Final_Assignment_Evaluator

Paused

Michele De Stefano

Adapted the code so that it can run locally

1b8aef5 8 months ago

1.43 kB

	import torch

	from langchain_core.tools import tool
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	from .data_helpers import get_file_path


	@tool(parse_docstring=True)
	def transcribe_audio_file(file_name: str) -> str:
	"""
	Transcribes an audio file to text.

	Args:
	file_name: The name of the audio file. This is simply the file name,
	not the full path.

	Returns:
	The transcribed text.
	"""
	# Specific setting for local run with GPU busy for the LLM (ollama)
	cuda_available = False
	device = "cuda:0" if cuda_available else "cpu"
	torch_dtype = torch.float16 if cuda_available else torch.float32

	model_id = "openai/whisper-large-v3-turbo"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	generate_kwargs = {
	"return_timestamps": True,
	}

	file_path = get_file_path(file_name)
	result = pipe(file_path, generate_kwargs=generate_kwargs)

	return result["text"]