Commit
·
ab6db1e
1
Parent(s):
d2c4233
using openrouter ai chat instead of hf models
Browse files- main.py +16 -20
- requirements.txt +3 -1
main.py
CHANGED
|
@@ -4,6 +4,7 @@ import os
|
|
| 4 |
import io # For handling bytes data in memory
|
| 5 |
import yt_dlp # YouTube audio downloader
|
| 6 |
import requests # For making HTTP requests (to audio URLs)
|
|
|
|
| 7 |
from fastapi import FastAPI, HTTPException, Request # The web framework
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware # For allowing frontend access
|
| 9 |
from pydantic import BaseModel # For data validation
|
|
@@ -18,6 +19,9 @@ from dotenv import load_dotenv # To load .env file locally
|
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Check if the API key is loaded (crucial!)
|
| 23 |
if not HF_API_KEY:
|
|
@@ -29,7 +33,7 @@ if not HF_API_KEY:
|
|
| 29 |
# I can change these! Smaller Whisper models (base, small, medium) are faster.
|
| 30 |
# Different LLMs have different strengths.
|
| 31 |
ASR_MODEL = "openai/whisper-large-v3"
|
| 32 |
-
LLM_MODEL = "
|
| 33 |
|
| 34 |
# Initialize the Hugging Face Inference Client
|
| 35 |
# Handles authentication using the API key automatically
|
|
@@ -219,13 +223,12 @@ def transcribe_audio(audio_bytes: bytes) -> str:
|
|
| 219 |
|
| 220 |
def generate_notes_from_transcript(transcript: str) -> str:
|
| 221 |
"""
|
| 222 |
-
Sends the transcript to
|
| 223 |
"""
|
| 224 |
-
if not
|
| 225 |
-
raise HTTPException(status_code=503, detail="
|
| 226 |
if not transcript:
|
| 227 |
-
|
| 228 |
-
return "Could not generate notes: Transcription was empty."
|
| 229 |
|
| 230 |
print(f"Generating notes for transcript (length {len(transcript)}) using {LLM_MODEL}...")
|
| 231 |
|
|
@@ -243,26 +246,19 @@ def generate_notes_from_transcript(transcript: str) -> str:
|
|
| 243 |
Structured Notes:"""
|
| 244 |
|
| 245 |
try:
|
| 246 |
-
|
| 247 |
-
response = hf_inference.text_generation(
|
| 248 |
-
prompt=prompt,
|
| 249 |
model=LLM_MODEL,
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
temperature=0.7
|
| 253 |
-
repetition_penalty=1.1, # Slightly discourage repeating the same phrases
|
| 254 |
-
# Other parameters like top_p, top_k can also be tuned
|
| 255 |
)
|
| 256 |
-
|
| 257 |
-
# The response is usually the generated text directly for text-generation
|
| 258 |
-
# Sometimes it might include the prompt, so basic cleaning can help.
|
| 259 |
-
notes = response.strip()
|
| 260 |
print("Note generation successful.")
|
| 261 |
return notes
|
| 262 |
|
| 263 |
except Exception as e:
|
| 264 |
-
print(f"
|
| 265 |
-
raise HTTPException(status_code=503, detail=f"
|
| 266 |
|
| 267 |
|
| 268 |
# --- API Endpoints ---
|
|
|
|
| 4 |
import io # For handling bytes data in memory
|
| 5 |
import yt_dlp # YouTube audio downloader
|
| 6 |
import requests # For making HTTP requests (to audio URLs)
|
| 7 |
+
import openai # for making requests to openrouter
|
| 8 |
from fastapi import FastAPI, HTTPException, Request # The web framework
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware # For allowing frontend access
|
| 10 |
from pydantic import BaseModel # For data validation
|
|
|
|
| 19 |
load_dotenv()
|
| 20 |
|
| 21 |
HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
|
| 22 |
+
OPENROUTER_API_KEY= os.getenv("OPENROUTER_API_KEY")
|
| 23 |
+
openai.api_key = OPENROUTER_API_KEY
|
| 24 |
+
openai.api_base = "https://openrouter.ai/api/v1"
|
| 25 |
|
| 26 |
# Check if the API key is loaded (crucial!)
|
| 27 |
if not HF_API_KEY:
|
|
|
|
| 33 |
# I can change these! Smaller Whisper models (base, small, medium) are faster.
|
| 34 |
# Different LLMs have different strengths.
|
| 35 |
ASR_MODEL = "openai/whisper-large-v3"
|
| 36 |
+
LLM_MODEL = "microsoft/mai-ds-r1:free"
|
| 37 |
|
| 38 |
# Initialize the Hugging Face Inference Client
|
| 39 |
# Handles authentication using the API key automatically
|
|
|
|
| 223 |
|
| 224 |
def generate_notes_from_transcript(transcript: str) -> str:
|
| 225 |
"""
|
| 226 |
+
Sends the transcript to OpenRouter LLM (chat model) and gets structured notes back.
|
| 227 |
"""
|
| 228 |
+
if not OPENROUTER_API_KEY:
|
| 229 |
+
raise HTTPException(status_code=503, detail="OpenRouter API key not found.")
|
| 230 |
if not transcript:
|
| 231 |
+
return "Transcript was empty."
|
|
|
|
| 232 |
|
| 233 |
print(f"Generating notes for transcript (length {len(transcript)}) using {LLM_MODEL}...")
|
| 234 |
|
|
|
|
| 246 |
Structured Notes:"""
|
| 247 |
|
| 248 |
try:
|
| 249 |
+
response = openai.ChatCompletion.create(
|
|
|
|
|
|
|
| 250 |
model=LLM_MODEL,
|
| 251 |
+
messages=[{"role": "user", "content": prompt}],
|
| 252 |
+
max_tokens=1024,
|
| 253 |
+
temperature=0.7
|
|
|
|
|
|
|
| 254 |
)
|
| 255 |
+
notes = response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
| 256 |
print("Note generation successful.")
|
| 257 |
return notes
|
| 258 |
|
| 259 |
except Exception as e:
|
| 260 |
+
print(f"OpenRouter call failed: {e}")
|
| 261 |
+
raise HTTPException(status_code=503, detail=f"OpenRouter failed: {e}")
|
| 262 |
|
| 263 |
|
| 264 |
# --- API Endpoints ---
|
requirements.txt
CHANGED
|
@@ -9,4 +9,6 @@ huggingface_hub>=0.19.0 # Includes InferenceClient
|
|
| 9 |
|
| 10 |
# Optional: If I decide I need local audio manipulation later
|
| 11 |
pydub>=0.25.1
|
| 12 |
-
# ffmpeg-python # Requires ffmpeg binary installed in Dockerfile too!
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Optional: If I decide I need local audio manipulation later
|
| 11 |
pydub>=0.25.1
|
| 12 |
+
# ffmpeg-python # Requires ffmpeg binary installed in Dockerfile too!
|
| 13 |
+
|
| 14 |
+
openai>=1.0.0
|