submissionTtemplateFrugalAI

Sleeping

App Files Files Community

ImenMourali commited on Feb 3

Commit

adf98b6

verified ·

1 Parent(s): 398ac10

Update tasks/audio.py

Browse files

Files changed (1) hide show

tasks/audio.py +140 -223

tasks/audio.py CHANGED Viewed

@@ -16,276 +16,193 @@ router = APIRouter()
 DESCRIPTION = "Random Baseline"
 ROUTE = "/audio"
-from sklearn.metrics import accuracy_score
 @router.post(ROUTE, tags=["Audio Task"],
              description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
-    # Map string predictions to numeric labels
-    numeric_predictions = map_predictions_to_labels(predictions)
-    # Extract true labels (already numeric)
-    true_labels = test_dataset["label"]
-    # Calculate accuracy
-    accuracy = accuracy_score(true_labels, numeric_predictions)
-    print("Accuracy:", accuracy)
-    # Get space info
-    username, space_url = get_space_info()
-    # Define the label mapping
-    LABEL_MAPPING = {
-        "chainsaw": 0,
-        "environment": 1
-    }
     # Load and prepare the dataset
     # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
-    dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
     # Split dataset
     train_test = dataset["train"]
     test_dataset = dataset["test"]
     # Start tracking emissions
     tracker.start()
     tracker.start_task("inference")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-import tensorflow as tf
-import tensorflow_hub as hub
-import librosa
-import numpy as np
-import os
-# Load the YAMNet model from TensorFlow Hub
-yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
-yamnet = hub.load(yamnet_model_url)
-# Download YAMNet class map CSV file
-labels_path = "https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv"
-labels = tf.keras.utils.get_file("yamnet_class_map.csv", labels_path)
-# Read class names from the downloaded CSV
-def load_class_names(csv_file_path):
-    class_names = []
-    with open(csv_file_path, "r") as file:
-        next(file)  # Skip the header
-        for line in file:
-            class_names.append(line.strip().split(",")[-1])  # Get the class name from the last column
-    return class_names
-yamnet_classes = load_class_names(labels)
-# Define a function for YAMNet inference
-def yamnet_inference(file_name):
-    try:
-        # Load the audio file and resample to 16kHz (YAMNet's expected sample rate)
-        waveform, sample_rate = librosa.load(file_name, sr=16000)
-        # Normalize audio data
-        waveform = waveform / np.max(np.abs(waveform))
-        # Convert to tensor
         waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
-        # Predict the scores and embeddings from YAMNet
-        scores, embeddings, spectrogram = yamnet(waveform)
-        # Average the scores across time frames to get a single prediction for the entire audio
-        prediction = tf.reduce_mean(scores, axis=0).numpy()
-        return prediction
-    except Exception as e:
-        print(f"Error processing file {file_name}: {e}")
-        return None
-# Function to map predictions to class names
-def get_top_class(predictions):
-    if predictions is None:
-        return "Error"
-    top_class = np.argmax(predictions)  # Get the index of the class with the highest score
-    return yamnet_classes[top_class] if top_class < len(yamnet_classes) else "Unknown"
-import tensorflow as tf
-import tensorflow_hub as hub
-import numpy as np
-from sklearn.model_selection import train_test_split
-from tensorflow.keras.utils import to_categorical
-from datasets import DatasetDict
-# Load YAMNet Model
-yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
-yamnet_model = hub.load(yamnet_model_url)
-# Function to extract embeddings from audio
-def extract_embedding(audio_example):
-    '''Extract YAMNet embeddings from a waveform'''
-    # Convert the audio example to a NumPy array
-    waveform = audio_example["audio"]["array"]  # Ensure correct key reference
-    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
-# Run YAMNet model
-    scores, embeddings, spectrogram = yamnet_model(waveform)
-    return {"embedding": embeddings.numpy()}
-# Apply embedding extraction to training data
-train_embeddings = dataset["train"].map(extract_embedding)
-# Apply embedding extraction to testing data
-test_embeddings = dataset["test"].map(extract_embedding)
-X_train, y_train = [], []
-X_test, y_test = [], []
-# Process Training Data
-for example in train_embeddings:
-    for embedding in example["embedding"]:
-        X_train.append(embedding)
-        y_train.append(example["label"])
-# Process Testing Data
-for example in test_embeddings:
-    for embedding in example["embedding"]:
-        X_test.append(embedding)
-        y_test.append(example["label"])
-# Convert to NumPy arrays
-X_train = np.array(X_train)
-y_train = np.array(y_train)
-X_test = np.array(X_test)
-y_test = np.array(y_test)
-# Convert labels to categorical (one-hot encoding)
-y_train_cat = to_categorical(y_train, num_classes=2)
-y_test_cat = to_categorical(y_test, num_classes=2)
-print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense, Dropout
-# Define the model
-model = Sequential([
-    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
-    Dropout(0.3),
-    Dense(64, activation='relu'),
-    Dropout(0.3),
-    Dense(2, activation='softmax')  # 2 classes: chainsaw (0) vs. environment (1)
-])
-model.summary()
-# Compile the model
-model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
-# Train the model on YAMNet embeddings
-model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
-# Evaluate the model
-y_pred = model.predict(X_test)
-y_pred_labels = np.argmax(y_pred, axis=1)
-from sklearn.metrics import accuracy_score
-accuracy = accuracy_score(y_test, y_pred_labels)
-print("Transfer Learning Model Accuracy:", accuracy)
-# Predict labels for the test dataset
-# Run YAMNet inference on the raw audio data
-predictions = []
-for audio_data in test_dataset["audio"]:
-    # Extract waveform and sampling rate
-    waveform = audio_data["array"]
-    sample_rate = audio_data["sampling_rate"]
-    # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
-    if sample_rate != 16000:
-        waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
-    # Convert waveform to tensor
-    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
-    # Ensure waveform is 1D
-    waveform = tf.squeeze(waveform)
-    # Predict with YAMNet--->model
-    # Get YAMNet embeddings
-    _, embeddings, _ = yamnet_model(waveform)  # Using the original yamnet_model for embedding extraction
-    # Calculate the mean of the embeddings across the time dimension
-    embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across time frames
-    # Reshape embeddings for prediction
-    embeddings = embeddings.numpy()  # Convert to NumPy array
-    embeddings = embeddings.reshape(1, -1)  # Reshape to (1, embedding_dimension)
-    # Now predict using your trained model
-    scores = model.predict(embeddings)
-    # Get predicted class
-    predicted_class_index = np.argmax(scores)
-    predicted_class_label = predicted_class_index  # Assuming 0 for 'chainsaw', 1 for 'environment'
-    # Get the top class name using the predicted label
-    top_class = "chainsaw" if predicted_class_label == 0 else "environment"
-    predictions.append(top_class)
-print("Predictions:", predictions)
-def map_predictions_to_labels(predictions):
-    """
-    Maps string predictions to numeric labels:
-    - "chainsaw" -> 0
-    - any other class -> 1
-    Args:
-        predictions (list of str): List of class name predictions.
-    Returns:
-        list of int: Mapped numeric labels.
-    """
-    return [0 if pred == "chainsaw" else 1 for pred in predictions]
-from sklearn.metrics import accuracy_score
-# Map string predictions to numeric labels
-numeric_predictions = map_predictions_to_labels(predictions)
-# Extract true labels (already numeric)
-true_labels = test_dataset["label"]
-# Calculate accuracy
-accuracy = accuracy_score(true_labels, numeric_predictions)
-print("Accuracy:", accuracy)
-#--------------------------------------------------------------------------------------------
-# YOUR MODEL INFERENCE STOPS HERE
-#--------------------------------------------------------------------------------------------
-# Stop tracking emissions
-emissions_data = tracker.stop_task()
-# Prepare results dictionary
-results = {
-    "username": username,
-    "space_url": space_url,
-    "submission_timestamp": datetime.now().isoformat(),
-    "model_description": DESCRIPTION,
-    "accuracy": float(accuracy),
-    "energy_consumed_wh": emissions_data.energy_consumed * 1000,
-    "emissions_gco2eq": emissions_data.emissions * 1000,
-    "emissions_data": clean_emissions_data(emissions_data),
-    "api_route": ROUTE,
-    "dataset_config": {
-        "dataset_name": request.dataset_name,
-        "test_size": request.test_size,
-        "test_seed": request.test_seed
     }
-}
-print(results)

 DESCRIPTION = "Random Baseline"
 ROUTE = "/audio"
 @router.post(ROUTE, tags=["Audio Task"],
              description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
     # Load and prepare the dataset
     # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
+    dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
     # Split dataset
     train_test = dataset["train"]
     test_dataset = dataset["test"]
     # Start tracking emissions
     tracker.start()
     tracker.start_task("inference")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    import tensorflow as tf
+    import tensorflow_hub as hub
+    import librosa
+    import numpy as np
+    from sklearn.model_selection import train_test_split
+    from tensorflow.keras.utils import to_categorical
+    # Load YAMNet Model
+    yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
+    yamnet_model = hub.load(yamnet_model_url)
+    # Function to extract embeddings from audio
+    def extract_embedding(audio_example):
+        '''Extract YAMNet embeddings from a waveform'''
+        waveform = audio_example["audio"]["array"]  # Ensure correct key reference
         waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
+        scores, embeddings, spectrogram = yamnet_model(waveform)
+        return {"embedding": embeddings.numpy()}
+    # Apply embedding extraction to training data
+    train_embeddings = dataset["train"].map(extract_embedding)
+    # Apply embedding extraction to testing data
+    test_embeddings = dataset["test"].map(extract_embedding)
+    X_train, y_train = [], []
+    X_test, y_test = [], []
+    # Process Training Data
+    for example in train_embeddings:
+        for embedding in example["embedding"]:
+            X_train.append(embedding)
+            y_train.append(example["label"])
+    # Process Testing Data
+    for example in test_embeddings:
+        for embedding in example["embedding"]:
+            X_test.append(embedding)
+            y_test.append(example["label"])
+    # Convert to NumPy arrays
+    X_train = np.array(X_train)
+    y_train = np.array(y_train)
+    X_test = np.array(X_test)
+    y_test = np.array(y_test)
+    # Convert labels to categorical (one-hot encoding)
+    y_train_cat = to_categorical(y_train, num_classes=2)
+    y_test_cat = to_categorical(y_test, num_classes=2)
+    print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
+    from tensorflow.keras.models import Sequential
+    from tensorflow.keras.layers import Dense, Dropout
+    # Define the model
+    model = Sequential([
+        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
+        Dropout(0.3),
+        Dense(64, activation='relu'),
+        Dropout(0.3),
+        Dense(2, activation='softmax')  # 2 classes: chainsaw (0) vs. environment (1)
+    ])
+    model.summary()
+    # Compile the model
+    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
+    # Train the model on YAMNet embeddings
+    model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
+    # Evaluate the model
+    y_pred = model.predict(X_test)
+    y_pred_labels = np.argmax(y_pred, axis=1)
+    from sklearn.metrics import accuracy_score
+    accuracy = accuracy_score(y_test, y_pred_labels)
+    print("Transfer Learning Model Accuracy:", accuracy)
+    # Predict labels for the test dataset
+    # Run YAMNet inference on the raw audio data
+    predictions = []
+    for audio_data in test_dataset["audio"]:
+        # Extract waveform and sampling rate
+        waveform = audio_data["array"]
+        sample_rate = audio_data["sampling_rate"]
+        # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
+        if sample_rate != 16000:
+            waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
+        # Convert waveform to tensor
+        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
+        # Ensure waveform is 1D
+        waveform = tf.squeeze(waveform)
+        # Predict with YAMNet--->model
+        # Get YAMNet embeddings
+        _, embeddings, _ = yamnet_model(waveform)  # Using the original yamnet_model for embedding extraction
+        # Calculate the mean of the embeddings across the time dimension
+        embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across time frames
+        # Reshape embeddings for prediction
+        embeddings = embeddings.numpy()  # Convert to NumPy array
+        embeddings = embeddings.reshape(1, -1)  # Reshape to (1, embedding_dimension)
+        # Now predict using your trained model
+        scores = model.predict(embeddings)
+        # Get predicted class
+        predicted_class_index = np.argmax(scores)
+        predicted_class_label = predicted_class_index  # Assuming 0 for 'chainsaw', 1 for 'environment'
+        # Get the top class name using the predicted label
+        top_class = "chainsaw" if predicted_class_label == 0 else "environment"
+        predictions.append(top_class)
+    print("Predictions:", predictions)
+    def map_predictions_to_labels(predictions):
+        """
+        Maps string predictions to numeric labels:
+        - "chainsaw" -> 0
+        - any other class -> 1
+        Args:
+            predictions (list of str): List of class name predictions.
+        Returns:
+            list of int: Mapped numeric labels.
+        """
+        return [0 if pred == "chainsaw" else 1 for pred in predictions]
+    # Map string predictions to numeric labels
+    numeric_predictions = map_predictions_to_labels(predictions)
+    # Extract true labels (already numeric)
+    true_labels = test_dataset["label"]
+    # Calculate accuracy
+    accuracy = accuracy_score(true_labels, numeric_predictions)
+    print("Accuracy:", accuracy)
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE STOPS HERE
+    #--------------------------------------------------------------------------------------------
+    # Stop tracking emissions
+    emissions_data = tracker.stop_task()
+    # Prepare results dictionary
+    results = {
+        "username": username,
+        "space_url": space_url,
+        "submission_timestamp": datetime.now().isoformat(),
+        "model_description": DESCRIPTION,
+        "accuracy": float(accuracy),
+        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
+        "emissions_gco2eq": emissions_data.emissions * 1000,
+        "emissions_data": clean_emissions_data(emissions_data),
+        "api_route": ROUTE,
+        "dataset_config": {
+            "dataset_name": request.dataset_name,
+            "test_size": request.test_size,
+            "test_seed": request.test_seed
+        }
     }
+    print(results)