| import numpy as np | |
| import asyncio | |
| #from .base_tool import BaseTool | |
| from models.tinygpt2_model import TinyGPT2Model | |
| from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq | |
| import os | |
| import tempfile | |
| import soundfile as sf | |
| import torch | |
| from pydantic import PrivateAttr | |
| from crewai.tools import BaseTool | |
| # class MultilingualVoiceProcessor: | |
| # def __init__(self, model_name="openai/whisper-base", device=None): | |
| # cache_dir = os.getenv("TRANSFORMERS_CACHE", None) | |
| # if device is None: | |
| # device = 0 if torch.cuda.is_available() else -1 | |
| # # Load model and processor with cache_dir | |
| # processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir) | |
| # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir) | |
| # # Create the pipeline, DO NOT PASS cache_dir here | |
| # # self.pipe = pipeline( | |
| # # "automatic-speech-recognition", | |
| # # model=model, | |
| # # tokenizer=processor, | |
| # # feature_extractor=processor, | |
| # # device=device, | |
| # # generate_kwargs={"task": "transcribe", "return_timestamps": False}, | |
| # # ) | |
| # self.pipe = pipeline( | |
| # "automatic-speech-recognition", | |
| # model=model_name, | |
| # device=device, | |
| # generate_kwargs={"task": "transcribe", "return_timestamps": False}, | |
| # ) | |
| # async def transcribe(self, audio_data: np.ndarray, language: str = None): | |
| # with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav: | |
| # sf.write(tmp_wav.name, audio_data, samplerate=16000) | |
| # extra = {"language": language} if language else {} | |
| # result = self.pipe(tmp_wav.name, **extra) | |
| # text = result['text'] | |
| # return text, language or "unknown" | |
| # async def synthesize(self, text, language: str = "en", voice_type: str = "normal"): | |
| # raise NotImplementedError("Use gTTS or edge-tts as before.") | |
| # class TranscribeAudioTool(BaseTool): | |
| # name: str = "transcribe_audio" | |
| # description: str = "Transcribe audio to text and detect language." | |
| # model_config = {"arbitrary_types_allowed": True} | |
| # #_vp: MultilingualVoiceProcessor = PrivateAttr() | |
| # def __init__(self, config=None): | |
| # super().__init__() | |
| # self.vp = MultilingualVoiceProcessor() | |
| # def _run(self, audio_data: np.ndarray, language=None): | |
| # text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language)) | |
| # return {"text": text, "language": detected_lang} | |
| # class DetectEmotionTool(BaseTool): | |
| # name: str = "detect_emotion" | |
| # description: str = "Detect the emotional state from text." | |
| # model_config = {"arbitrary_types_allowed": True} | |
| # def __init__(self, config=None): | |
| # super().__init__() | |
| # def _run(self, text: str): | |
| # model = TinyGPT2Model() | |
| # prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.' | |
| # response = model.generate(prompt) | |
| # return {"primary_emotion": "detected_emotion", | |
| # "intensity": "medium", | |
| # "feelings": ["feeling1"], | |
| # "concerns": ["concern1"]} | |
| # class GenerateReflectiveQuestionsTool(BaseTool): | |
| # name: str = "generate_reflective_questions" | |
| # description: str = "Generate reflective questions." | |
| # model_config = {"arbitrary_types_allowed": True} | |
| # def __init__(self, config=None): | |
| # super().__init__() | |
| # def _run(self, context: dict): | |
| # emotion = context.get("primary_emotion", "neutral") | |
| # questions_map = { | |
| # "anxiety": ["What triggers your anxiety?", "How do you cope?"], | |
| # "sadness": ["What helps when you feel sad?", "Who can you talk to?"] | |
| # } | |
| # return questions_map.get(emotion, [ | |
| # "How are you feeling?", | |
| # "What feels important now?" | |
| # ]) | |
| # class VoiceTools: | |
| # def __init__(self, config=None): | |
| # self.transcribe_audio = TranscribeAudioTool(config) | |
| # self.detect_emotion = DetectEmotionTool(config) | |
| # self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config) | |
| import numpy as np | |
| import asyncio | |
| from typing import List, Optional | |
| from models.tinygpt2_model import TinyGPT2Model | |
| from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq | |
| import os | |
| import tempfile | |
| import soundfile as sf | |
| import torch | |
| from crewai.tools import BaseTool | |
| class MultilingualVoiceProcessor: | |
| def __init__(self, model_name="openai/whisper-base", device=None): | |
| cache_dir = os.getenv("TRANSFORMERS_CACHE", None) | |
| if device is None: | |
| device = 0 if torch.cuda.is_available() else -1 | |
| self.pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model_name, | |
| device=device, | |
| generate_kwargs={"task": "transcribe", "return_timestamps": False}, | |
| ) | |
| async def transcribe(self, audio_data: np.ndarray, language: str = None): | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav: | |
| sf.write(tmp_wav.name, audio_data, samplerate=16000) | |
| extra = {"language": language} if language else {} | |
| result = self.pipe(tmp_wav.name, **extra) | |
| text = result['text'] | |
| return text, language or "unknown" | |
| async def synthesize(self, text, language: str = "en", voice_type: str = "normal"): | |
| raise NotImplementedError("Use gTTS or edge-tts as before.") | |
| class TranscribeAudioTool(BaseTool): | |
| name: str = "transcribe_audio" | |
| description: str = "Transcribe audio to text and detect language." | |
| model_config = {"arbitrary_types_allowed": True} | |
| _vp: MultilingualVoiceProcessor = PrivateAttr() | |
| def __init__(self, config=None): | |
| super().__init__() | |
| self._vp = MultilingualVoiceProcessor() | |
| def _run(self, audio_data: List[float], language: Optional[str] = None): | |
| audio_np = np.array(audio_data, dtype=np.float32) | |
| text, detected_lang = asyncio.run(self.vp.transcribe(audio_np, language)) | |
| return {"text": text, "language": detected_lang} | |
| class DetectEmotionTool(BaseTool): | |
| name: str = "detect_emotion" | |
| description: str = "Detect the emotional state from text." | |
| model_config = {"arbitrary_types_allowed": True} | |
| def __init__(self, config=None): | |
| super().__init__() | |
| def _run(self, text: str): | |
| model = TinyGPT2Model() | |
| prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.' | |
| response = model.generate(prompt) | |
| return {"primary_emotion": "detected_emotion", | |
| "intensity": "medium", | |
| "feelings": ["feeling1"], | |
| "concerns": ["concern1"]} | |
| class GenerateReflectiveQuestionsTool(BaseTool): | |
| name: str = "generate_reflective_questions" | |
| description: str = "Generate reflective questions." | |
| model_config = {"arbitrary_types_allowed": True} | |
| def __init__(self, config=None): | |
| super().__init__() | |
| def _run(self, context: dict): | |
| emotion = context.get("primary_emotion", "neutral") | |
| questions_map = { | |
| "anxiety": ["What triggers your anxiety?", "How do you cope?"], | |
| "sadness": ["What helps when you feel sad?", "Who can you talk to?"] | |
| } | |
| return questions_map.get(emotion, [ | |
| "How are you feeling?", | |
| "What feels important now?" | |
| ]) | |
| class VoiceTools: | |
| def __init__(self, config=None): | |
| self.transcribe_audio = TranscribeAudioTool(config) | |
| self.detect_emotion = DetectEmotionTool(config) | |
| self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config) |