Spaces:

OmarHusseinZaki
/

vid-to-notes-backend

Sleeping

App Files Files Community

OmarHusseinZaki commited on Apr 24

Commit

ab6db1e

1 Parent(s): d2c4233

using openrouter ai chat instead of hf models

Browse files

Files changed (2) hide show

main.py +16 -20
requirements.txt +3 -1

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import io                     # For handling bytes data in memory
 import yt_dlp                 # YouTube audio downloader
 import requests               # For making HTTP requests (to audio URLs)
 from fastapi import FastAPI, HTTPException, Request # The web framework
 from fastapi.middleware.cors import CORSMiddleware # For allowing frontend access
 from pydantic import BaseModel # For data validation
@@ -18,6 +19,9 @@ from dotenv import load_dotenv # To load .env file locally
 load_dotenv()
 HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
 # Check if the API key is loaded (crucial!)
 if not HF_API_KEY:
@@ -29,7 +33,7 @@ if not HF_API_KEY:
 # I can change these! Smaller Whisper models (base, small, medium) are faster.
 # Different LLMs have different strengths.
 ASR_MODEL = "openai/whisper-large-v3"
-LLM_MODEL = "meta-llama/Llama-2-7b-chat-hf"
 # Initialize the Hugging Face Inference Client
 # Handles authentication using the API key automatically
@@ -219,13 +223,12 @@ def transcribe_audio(audio_bytes: bytes) -> str:
 def generate_notes_from_transcript(transcript: str) -> str:
     """
-    Sends the transcript to the Hugging Face text generation LLM API.
     """
-    if not hf_inference:
-        raise HTTPException(status_code=503, detail="Note generation service client not initialized.")
     if not transcript:
-        print("Warning: Skipping note generation for empty transcript.")
-        return "Could not generate notes: Transcription was empty."
     print(f"Generating notes for transcript (length {len(transcript)}) using {LLM_MODEL}...")
@@ -243,26 +246,19 @@ def generate_notes_from_transcript(transcript: str) -> str:
     Structured Notes:"""
     try:
-        # Use the textGeneration task for instruction-following models like Mistral
-        response = hf_inference.text_generation(
-            prompt=prompt,
             model=LLM_MODEL,
-            max_new_tokens=1024,  # Max length of the *generated* notes. Adjust as needed.
-                                 # Longer videos might need more tokens for comprehensive notes.
-            temperature=0.7,     # Controls randomness (lower = more focused, higher = more creative)
-            repetition_penalty=1.1, # Slightly discourage repeating the same phrases
-            # Other parameters like top_p, top_k can also be tuned
         )
-        # The response is usually the generated text directly for text-generation
-        # Sometimes it might include the prompt, so basic cleaning can help.
-        notes = response.strip()
         print("Note generation successful.")
         return notes
     except Exception as e:
-        print(f"ERROR: Hugging Face LLM API call failed: {e}")
-        raise HTTPException(status_code=503, detail=f"Note generation service failed: {e}")
 # --- API Endpoints ---

 import io                     # For handling bytes data in memory
 import yt_dlp                 # YouTube audio downloader
 import requests               # For making HTTP requests (to audio URLs)
+import openai                 # for making requests to openrouter
 from fastapi import FastAPI, HTTPException, Request # The web framework
 from fastapi.middleware.cors import CORSMiddleware # For allowing frontend access
 from pydantic import BaseModel # For data validation
 load_dotenv()
 HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
+OPENROUTER_API_KEY= os.getenv("OPENROUTER_API_KEY")
+openai.api_key = OPENROUTER_API_KEY
+openai.api_base = "https://openrouter.ai/api/v1"
 # Check if the API key is loaded (crucial!)
 if not HF_API_KEY:
 # I can change these! Smaller Whisper models (base, small, medium) are faster.
 # Different LLMs have different strengths.
 ASR_MODEL = "openai/whisper-large-v3"
+LLM_MODEL = "microsoft/mai-ds-r1:free"
 # Initialize the Hugging Face Inference Client
 # Handles authentication using the API key automatically
 def generate_notes_from_transcript(transcript: str) -> str:
     """
+    Sends the transcript to OpenRouter LLM (chat model) and gets structured notes back.
     """
+    if not OPENROUTER_API_KEY:
+        raise HTTPException(status_code=503, detail="OpenRouter API key not found.")
     if not transcript:
+        return "Transcript was empty."
     print(f"Generating notes for transcript (length {len(transcript)}) using {LLM_MODEL}...")
     Structured Notes:"""
     try:
+        response = openai.ChatCompletion.create(
             model=LLM_MODEL,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=1024,
+            temperature=0.7
         )
+        notes = response.choices[0].message.content
         print("Note generation successful.")
         return notes
     except Exception as e:
+        print(f"OpenRouter call failed: {e}")
+        raise HTTPException(status_code=503, detail=f"OpenRouter failed: {e}")
 # --- API Endpoints ---

requirements.txt CHANGED Viewed

@@ -9,4 +9,6 @@ huggingface_hub>=0.19.0 # Includes InferenceClient
 # Optional: If I decide I need local audio manipulation later
 pydub>=0.25.1
-# ffmpeg-python # Requires ffmpeg binary installed in Dockerfile too!

 # Optional: If I decide I need local audio manipulation later
 pydub>=0.25.1
+# ffmpeg-python # Requires ffmpeg binary installed in Dockerfile too!
+openai>=1.0.0