Spaces:

ravi86
/

moodtunespace

Runtime error

File size: 4,979 Bytes

import gradio as gr
import numpy as np
from PIL import Image
import os
import warnings

warnings.filterwarnings("ignore")

# --- Optional torch and tf loading ---
try:
    import torch
    IS_TORCH_AVAILABLE = True
except ImportError:
    IS_TORCH_AVAILABLE = False

try:
    import tensorflow as tf
    IS_TF_AVAILABLE = True
except ImportError:
    IS_TF_AVAILABLE = False

from transformers import AutoModelForImageClassification, AutoImageProcessor

# --- Model loading ---
model = None
processor = None
is_pytorch_model = True
model_name_or_path = "ravi86/mood_detector"
local_model_dir = "./model"
local_h5_path = os.path.join(local_model_dir, "my_model.h5")

# Try Hugging Face PyTorch
if IS_TORCH_AVAILABLE:
    try:
        model = AutoModelForImageClassification.from_pretrained(model_name_or_path)
        processor = AutoImageProcessor.from_pretrained(model_name_or_path)
        is_pytorch_model = True
        print("Loaded PyTorch model from Hugging Face.")
    except:
        pass

# Try Hugging Face TensorFlow
if model is None and IS_TF_AVAILABLE:
    try:
        model = AutoModelForImageClassification.from_pretrained(model_name_or_path, from_tf=True)
        processor = AutoImageProcessor.from_pretrained(model_name_or_path)
        is_pytorch_model = False
        print("Loaded TensorFlow model from Hugging Face.")
    except:
        pass

# Try local Transformers model
if model is None:
    try:
        model = AutoModelForImageClassification.from_pretrained(local_model_dir)
        processor = AutoImageProcessor.from_pretrained(local_model_dir)
        is_pytorch_model = hasattr(model, 'parameters')
        print("Loaded local Transformers model.")
    except:
        pass

# Try raw .h5
if model is None and IS_TF_AVAILABLE and os.path.exists(local_h5_path):
    try:
        model = tf.keras.models.load_model(local_h5_path)
        try:
            processor = AutoImageProcessor.from_pretrained(local_model_dir)
        except:
            processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
        is_pytorch_model = False
        print("Loaded local Keras .h5 model.")
    except Exception as e:
        raise RuntimeError(f"Failed to load model: {e}")

if model is None or processor is None:
    raise RuntimeError("Failed to load model and processor.")

if is_pytorch_model and IS_TORCH_AVAILABLE:
    model.eval()

# --- Emotion & Spotify Map ---
emotions = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
spotify_playlist_mapping = {
    "Angry": "https://open.spotify.com/playlist/37i9dQZF1DX2LTjeP1y0aR",
    "Disgust": "https://open.spotify.com/playlist/37i9dQZF1DXcK3k3gJ6usM",
    "Fear": "https://open.spotify.com/playlist/37i9dQZF1DX4Qp4Cp4wK2N",
    "Happy": "https://open.spotify.com/playlist/37i9dQZF1DXdPec7aLk9C1",
    "Sad": "https://open.spotify.com/playlist/37i9dQZF1DX7qK8TM4T5pC",
    "Surprise": "https://open.spotify.com/playlist/37i9dQZF1DXdgnL3vj1gWM",
    "Neutral": "https://open.spotify.com/playlist/37i9dQZF1DXasMvN3R0sVw"
}

# --- Inference Function ---
def classify_expression_and_suggest_music(image_input: np.ndarray):
    if image_input is None:
        return "No image detected. Please enable your webcam.", ""

    image = Image.fromarray(image_input).convert("L").resize((48, 48))
    inputs = processor(images=image, return_tensors="pt" if is_pytorch_model else "tf")

    if not is_pytorch_model:
        pixel_values = inputs['pixel_values'].numpy()
        tf_tensor = tf.convert_to_tensor(pixel_values)
        outputs = model(tf_tensor)
        logits = outputs if isinstance(outputs, (np.ndarray, tf.Tensor)) else outputs[0]
        if isinstance(logits, tf.Tensor):
            logits = logits.numpy()
        probs = tf.nn.softmax(logits).numpy()
    else:
        with torch.no_grad():
            outputs = model(inputs['pixel_values'])
            logits = outputs.logits
            probs = torch.softmax(logits, dim=-1).numpy()

    predicted_class = int(np.argmax(probs))
    confidence = float(np.max(probs)) * 100
    emotion = emotions[predicted_class]

    spotify_link = spotify_playlist_mapping.get(emotion, spotify_playlist_mapping["Neutral"])
    return (
        f"Detected Emotion: **{emotion}** (Confidence: {confidence:.2f}%)",
        f"**Listen on Spotify:** <a href='{spotify_link}' target='_blank'>🎧 {emotion} Vibes</a>"
    )

# --- Gradio Interface ---
iface = gr.Interface(
    fn=classify_expression_and_suggest_music,
    inputs=gr.Image(
        type="numpy",
        source="webcam",
        streaming=True,
        label="Webcam Input"
    ),
    outputs=[
        gr.Textbox(label="Detected Emotion"),
        gr.Markdown(label="Suggested Spotify Playlist")
    ],
    live=True,
    title="🎭 MoodTune: Emotion-Based Music Recommender",
    description="This app detects your mood from your face and plays music to match it! Allow webcam access to begin."
)

if __name__ == "__main__":
    iface.launch()