Spaces:

tscr-369
/

vocalcore

Runtime error

App Files Files Community

tscr-369 commited on Jul 20

Commit

c4d0b69

verified ·

1 Parent(s): 7a02106

Update main.py

Browse files

Files changed (1) hide show

main.py +32 -131

main.py CHANGED Viewed

@@ -9,21 +9,18 @@ from typing import Optional, Dict, Any, List
 import json
 import re
 from contextlib import asynccontextmanager
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoProcessor
 from huggingface_hub import InferenceClient
 import base64
 # Set up cache directories BEFORE importing any HuggingFace modules
 cache_base = "/app/.cache"
 os.environ['HF_HOME'] = cache_base
-os.environ['TRANSFORMERS_CACHE'] = f"{cache_base}/transformers"
 os.environ['HF_DATASETS_CACHE'] = f"{cache_base}/datasets"
 os.environ['HF_HUB_CACHE'] = f"{cache_base}/hub"
 # Ensure cache directories exist
 cache_dirs = [
     os.environ['HF_HOME'],
-    os.environ['TRANSFORMERS_CACHE'],
     os.environ['HF_DATASETS_CACHE'],
     os.environ['HF_HUB_CACHE']
 ]
@@ -31,15 +28,13 @@ cache_dirs = [
 for cache_dir in cache_dirs:
     os.makedirs(cache_dir, exist_ok=True)
-# Global variables for model and pipeline
-model = None
-audio_pipeline = None
 client = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
-    global model, audio_pipeline, client
     try:
         print("🔄 Starting NatureLM Audio Decoder API...")
         print(f"📁 Using cache directory: {os.environ.get('HF_HOME', '/app/.cache')}")
@@ -47,84 +42,7 @@ async def lifespan(app: FastAPI):
         # Initialize HuggingFace client for inference API
         client = InferenceClient()
         print("✅ HuggingFace client initialized successfully")
-        # Load NatureLM-audio model locally for better performance
-        try:
-            print("🔄 Loading NatureLM-audio model...")
-            model_name = "EarthSpeciesProject/NatureLM-audio"
-            # For NatureLM-audio, we need to use a different approach since it's a custom model
-            # Let's try using the processor and model directly
-            try:
-                # Load processor first
-                processor = AutoProcessor.from_pretrained(
-                    model_name,
-                    trust_remote_code=True,
-                    cache_dir=os.environ['TRANSFORMERS_CACHE']
-                )
-                # Load model with specific configuration for NatureLM-audio
-                model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float16,
-                    device_map="auto",
-                    trust_remote_code=True,
-                    cache_dir=os.environ['TRANSFORMERS_CACHE'],
-                    low_cpu_mem_usage=True
-                )
-                print("✅ NatureLM-audio model loaded successfully")
-                # Create a custom pipeline for NatureLM-audio
-                def naturelm_audio_pipeline(audio_input, **kwargs):
-                    """Custom pipeline for NatureLM-audio processing"""
-                    try:
-                        # Process audio with the model
-                        if isinstance(audio_input, bytes):
-                            # Convert bytes to the format expected by the model
-                            # This is a simplified approach - in practice, you'd need to match the model's expected input format
-                            inputs = processor(
-                                audio_input,
-                                return_tensors="pt",
-                                sampling_rate=16000,
-                                **kwargs
-                            )
-                        else:
-                            inputs = processor(audio_input, return_tensors="pt", **kwargs)
-                        # Generate response
-                        with torch.no_grad():
-                            outputs = model.generate(
-                                **inputs,
-                                max_length=512,
-                                do_sample=True,
-                                temperature=0.7,
-                                pad_token_id=processor.tokenizer.eos_token_id
-                            )
-                        # Decode the response
-                        response = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
-                        return {"text": response}
-                    except Exception as e:
-                        print(f"Error in NatureLM pipeline: {e}")
-                        return {"text": "Error processing audio with NatureLM-audio model"}
-                audio_pipeline = naturelm_audio_pipeline
-            except Exception as model_error:
-                print(f"⚠️ Could not load NatureLM-audio model locally: {model_error}")
-                print("🔄 Falling back to HuggingFace Inference API")
-                model = None
-                audio_pipeline = None
-        except Exception as model_error:
-            print(f"⚠️ Could not load model locally: {model_error}")
-            print("🔄 Falling back to HuggingFace Inference API")
-            model = None
-            audio_pipeline = None
-        print("✅ API ready for NatureLM-audio analysis")
     except Exception as e:
         print(f"❌ Error during startup: {e}")
@@ -461,15 +379,14 @@ async def health_check():
     return {
         "status": "healthy",
         "service": "NatureLM Audio Decoder API",
-        "model_loaded": model is not None,
-        "pipeline_ready": audio_pipeline is not None,
-        "client_ready": client is not None
     }
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_audio(file: UploadFile = File(...)):
     """
-    Analyze audio file using NatureLM-audio model with enhanced confidence scoring and detailed captioning
     """
     try:
         # Save uploaded file temporarily
@@ -515,50 +432,34 @@ async def analyze_audio(file: UploadFile = File(...)):
             complexity=audio_chars.get('audio_quality_indicators', {}).get('complexity_score', 0)
         )
-        # Use NatureLM-audio model for analysis
         try:
-            if audio_pipeline is not None:
-                # Use local model if available
-                print("🔄 Using local NatureLM-audio model...")
-                # Read audio file
-                with open(temp_path, "rb") as audio_file:
-                    audio_bytes = audio_file.read()
-                # Process with local pipeline
-                result = audio_pipeline(audio_bytes)
-                combined_response = result.get('text', '') if isinstance(result, dict) else str(result)
-                detection_method = "Local NatureLM-audio Model"
             else:
-                # Use HuggingFace inference API
-                print("🔄 Using HuggingFace Inference API...")
-                # Read audio file as bytes
-                with open(temp_path, "rb") as audio_file:
-                    audio_bytes = audio_file.read()
-                # Encode audio as base64 for API
-                audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
-                # Call NatureLM-audio model via HuggingFace API
-                response = client.post(
-                    "EarthSpeciesProject/NatureLM-audio",
-                    inputs={
-                        "audio": audio_b64,
-                        "text": prompt
-                    }
-                )
-                # Parse response
-                if isinstance(response, list) and len(response) > 0:
-                    combined_response = response[0]
-                else:
-                    combined_response = str(response)
-                detection_method = "HuggingFace Inference API"
         except Exception as api_error:
             print(f"API call failed: {api_error}")
             # Fallback to a comprehensive mock response for testing

 import json
 import re
 from contextlib import asynccontextmanager
 from huggingface_hub import InferenceClient
 import base64
 # Set up cache directories BEFORE importing any HuggingFace modules
 cache_base = "/app/.cache"
 os.environ['HF_HOME'] = cache_base
 os.environ['HF_DATASETS_CACHE'] = f"{cache_base}/datasets"
 os.environ['HF_HUB_CACHE'] = f"{cache_base}/hub"
 # Ensure cache directories exist
 cache_dirs = [
     os.environ['HF_HOME'],
     os.environ['HF_DATASETS_CACHE'],
     os.environ['HF_HUB_CACHE']
 ]
 for cache_dir in cache_dirs:
     os.makedirs(cache_dir, exist_ok=True)
+# Global variables
 client = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
+    global client
     try:
         print("🔄 Starting NatureLM Audio Decoder API...")
         print(f"📁 Using cache directory: {os.environ.get('HF_HOME', '/app/.cache')}")
         # Initialize HuggingFace client for inference API
         client = InferenceClient()
         print("✅ HuggingFace client initialized successfully")
+        print("✅ API ready for NatureLM-audio analysis via HuggingFace Inference API")
     except Exception as e:
         print(f"❌ Error during startup: {e}")
     return {
         "status": "healthy",
         "service": "NatureLM Audio Decoder API",
+        "client_ready": client is not None,
+        "model": "NatureLM-audio via HuggingFace Inference API"
     }
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_audio(file: UploadFile = File(...)):
     """
+    Analyze audio file using NatureLM-audio model via HuggingFace Inference API
     """
     try:
         # Save uploaded file temporarily
             complexity=audio_chars.get('audio_quality_indicators', {}).get('complexity_score', 0)
         )
+        # Use HuggingFace Inference API for NatureLM-audio
         try:
+            print("🔄 Using HuggingFace Inference API for NatureLM-audio...")
+            # Read audio file as bytes
+            with open(temp_path, "rb") as audio_file:
+                audio_bytes = audio_file.read()
+            # Encode audio as base64 for API
+            audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
+            # Call NatureLM-audio model via HuggingFace API
+            response = client.post(
+                "EarthSpeciesProject/NatureLM-audio",
+                inputs={
+                    "audio": audio_b64,
+                    "text": prompt
+                }
+            )
+            # Parse response
+            if isinstance(response, list) and len(response) > 0:
+                combined_response = response[0]
             else:
+                combined_response = str(response)
+            detection_method = "HuggingFace Inference API"
         except Exception as api_error:
             print(f"API call failed: {api_error}")
             # Fallback to a comprehensive mock response for testing