Spaces:

ngeb25
/

SpotChatbot

Sleeping

App Files Files Community

Nathan Gebreab commited on 2 days ago

Commit

1b64026

1 Parent(s): 5468740

added dataset and original code to gradio repo

Browse files

Files changed (3) hide show

.gitignore +2 -1
app.py +172 -0
spotify_songs.csv +0 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- venv


1	+ venv
2	+ keys.txt

app.py CHANGED Viewed

@@ -1,7 +1,179 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 def respond(
     message,
     history: list[dict[str, str]],

+"""
+Spot: The Spotify Chatbot
+IAT360 Final Project
+By Nathan Gebreab (301582871) & EmXi Vo (301600699)
+Spot is a chatbot using Meta's Llama-3.2-3B-Instruct model & uses
+RAG (Retrieval-Augmented Generation) to provide the user with song recommendations
+based on their input prompt. By using RAG, Spot is able to access a dataset of
+approximately 30000 Spotify songs and their descriptive parameters in order to
+find the best recommendations.
+Links to Model (Authentication from Meta Required):
+https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct
+https://www.llama.com/llama-downloads/
+Link to Dataset (created by Joakim Arvidsson):
+https://www.kaggle.com/datasets/joebeachcapital/30000-spotify-songs
+"""
+import torch
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+import pandas as pd
+import numpy as np
+import warnings
 import gradio as gr
 from huggingface_hub import InferenceClient
+model_id = "meta-llama/Llama-3.2-3B-Instruct"
+# Suppress warnings
+warnings.filterwarnings('ignore')
+# Load the spotify dataset all at the beginning
+print("Loading Spotify songs database...")
+spotify_df = pd.read_csv('spotify_songs.csv')
+# Remove duplicates based on track name and artist name
+spotify_df = spotify_df.drop_duplicates(subset=["track_name", "track_artist"])
+documents = spotify_df.apply(
+    lambda row: f"""Song: {row['track_name']},
+        Album: {row['track_album_name']},
+        Album Release Date: {row['track_album_release_date']},
+        Artist: {row['track_artist']},
+        Playlist Genre: {row['playlist_genre']},
+        Playlist Subgenre: {row['playlist_subgenre']},
+        Danceability: {row['danceability']},
+        Energy: {row['energy']},
+        Key: {row['key']},
+        Loudness: {row['loudness']},
+        Mode: {row['mode']},
+        Speechiness: {row['speechiness']},
+        Acousticness: {row['acousticness']},
+        Instrumentalness: {row['instrumentalness']},
+        Liveness: {row['liveness']},
+        Valence: {row['valence']},
+        Tempo: {row['tempo']},
+        Duration: {row['duration_ms']}
+        """,
+    axis=1
+).tolist()
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+embeddings = embedding_model.encode(documents, show_progress_bar=False)
+df = pd.DataFrame({
+    "Document": documents,
+    "Embedding": list(embeddings)
+})
+print("Database loaded! Ready to chat.\n")
+def retrieve_with_pandas(query, top_k=10):
+    query_embedding = embedding_model.encode([query])[0]
+    df['Similarity'] = df['Embedding'].apply(lambda x: np.dot(query_embedding, x) /
+                                             (np.linalg.norm(query_embedding) * np.linalg.norm(x)))
+    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
+    return results[["Document", "Similarity"]]
+def generate_intro(query):
+    llm = pipeline(
+        "text-generation",
+        model=model_id,
+        dtype=torch.bfloat16,
+        device_map="auto",
+    )
+    system_prompt = (
+        "You are Spot, a friendly music recommendation chatbot."
+        "Respond to the user in 1–3 natural sentences."
+        "Do NOT list songs. Do NOT number anything. Do NOT name any songs. Do NOT name any artists. Do NOT name any musicians. Do NOT name any famous works."
+        "Just give a short, warm and friendly message that leads into the list of recommended songs"
+    )
+    prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}\n" \
+             f"<|start_header_id|>user<|end_header_id|>\n{query}\n" \
+             f"<|start_header_id|>assistant<|end_header_id|>\n"
+    intro = llm(
+        prompt,
+        max_new_tokens=60,
+        do_sample=True,
+        temperature=2.0
+    )[0]["generated_text"]
+    intro = intro.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
+    return intro
+def num_requested_songs(query):
+    for word in query.split():
+        if word.isdigit():
+            return min(int(word), 10)  # Max 10 songs
+    return 3  # Default number of songs
+def generate_response(query, num_songs):
+    intro = generate_intro(query)
+    retrieved = retrieve_with_pandas(query, top_k=num_songs)
+    # Get the actual songs
+    songs_list = "\n".join([f"{i+1}. {row['Document']}"
+                            for i, (_, row) in enumerate(retrieved.iterrows())])
+    response = f"""{intro}
+    Here are my recommendations:
+        {songs_list}
+    """
+    return response
+# def chatbot():
+#     print("=" * 60)
+#     print("Spot: The Spotify Chatbot")
+#     print("=" * 60)
+#     print("\nHi there! My name's Spot and I'm here to give song recommendations!")
+#     print("You can request a specific song, or you can just let me know how you're feeling and we can get started!\n")
+#     print("Examples of song requests:")
+#     print("  - 'Give me 3 songs that start with W'")
+#     print("  - 'Recommend 5 upbeat songs'")
+#     print("  - 'Show me songs by Drake'")
+#     print("\nType 'quit' or 'exit' to stop.\n")
+#     while True:
+#         # Get user input
+#         user_input = input("You: ").strip()
+#         if user_input.lower() in ['quit', 'exit', 'bye', 'goodbye']:
+#             print("\nSpot: Thanks for chatting! Goodbye!")
+#             break
+#         # elif user_input.lower() in ['test']:
+#         #     test_chatbot()
+#         elif not user_input:
+#             print("Spot: Please ask me something!\n")
+#             continue
+#         else:
+#             num_songs = num_requested_songs(user_input)
+#             response = generate_response(user_input, num_songs)
+#             print(f"\nSpot: {response}")
+#             continue
+# chatbot()
 def respond(
     message,
     history: list[dict[str, str]],

spotify_songs.csv ADDED Viewed

The diff for this file is too large to render. See raw diff