Spaces:
Running
Running
| """ | |
| Spot: The Spotify Chatbot | |
| IAT360 Final Project | |
| By Nathan Gebreab (301582871) & EmXi Vo (301600699) | |
| Spot is a chatbot using Meta's Llama-3.2-3B-Instruct model & uses | |
| RAG (Retrieval-Augmented Generation) to provide the user with song recommendations | |
| based on their input prompt. By using RAG, Spot is able to access a dataset of | |
| approximately 30000 Spotify songs and their descriptive parameters in order to | |
| find the best recommendations. | |
| Links to Model (Authentication from Meta Required): | |
| https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct | |
| https://www.llama.com/llama-downloads/ | |
| Link to Dataset (created by Joakim Arvidsson): | |
| https://www.kaggle.com/datasets/joebeachcapital/30000-spotify-songs | |
| """ | |
| import torch | |
| from transformers import pipeline | |
| from sentence_transformers import SentenceTransformer | |
| import pandas as pd | |
| import numpy as np | |
| import warnings | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| model_id="meta-llama/Llama-3.2-3B-Instruct" | |
| # Suppress warnings | |
| warnings.filterwarnings('ignore') | |
| # Load the spotify dataset all at the beginning | |
| print("Loading Spotify songs database...") | |
| spotify_df = pd.read_csv('spotify_songs.csv') | |
| # Remove duplicates based on track name and artist name | |
| spotify_df = spotify_df.drop_duplicates(subset=["track_name", "track_artist"]) | |
| documents = spotify_df.apply( | |
| lambda row: f"""Song: {row['track_name']}, | |
| Album: {row['track_album_name']}, | |
| Album Release Date: {row['track_album_release_date']}, | |
| Artist: {row['track_artist']}, | |
| Playlist Genre: {row['playlist_genre']}, | |
| Playlist Subgenre: {row['playlist_subgenre']}, | |
| Danceability: {row['danceability']}, | |
| Energy: {row['energy']}, | |
| Key: {row['key']}, | |
| Loudness: {row['loudness']}, | |
| Mode: {row['mode']}, | |
| Speechiness: {row['speechiness']}, | |
| Acousticness: {row['acousticness']}, | |
| Instrumentalness: {row['instrumentalness']}, | |
| Liveness: {row['liveness']}, | |
| Valence: {row['valence']}, | |
| Tempo: {row['tempo']}, | |
| Duration: {row['duration_ms']} | |
| """, | |
| axis=1 | |
| ).tolist() | |
| embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| embeddings = embedding_model.encode(documents, show_progress_bar=False) | |
| df = pd.DataFrame({ | |
| "Document": documents, | |
| "Embedding": list(embeddings) | |
| }) | |
| print("Database loaded! Ready to chat.\n") | |
| def retrieve_with_pandas(query, top_k=10): | |
| query_embedding = embedding_model.encode([query])[0] | |
| df['Similarity'] = df['Embedding'].apply(lambda x: np.dot(query_embedding, x) / | |
| (np.linalg.norm(query_embedding) * np.linalg.norm(x))) | |
| results = df.sort_values(by="Similarity", ascending=False).head(top_k) | |
| return results[["Document", "Similarity"]] | |
| def generate_intro(query): | |
| llm = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| dtype=torch.bfloat16, | |
| device_map="auto", | |
| ) | |
| system_prompt = ( | |
| "You are Spot, a friendly music recommendation chatbot." | |
| "Respond to the user in 1–3 natural sentences." | |
| "Do NOT list songs. Do NOT number anything. Do NOT name any songs. Do NOT name any artists. Do NOT name any musicians. Do NOT name any famous works." | |
| "Just give a short, warm and friendly message that leads into the list of recommended songs" | |
| ) | |
| prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}\n" \ | |
| f"<|start_header_id|>user<|end_header_id|>\n{query}\n" \ | |
| f"<|start_header_id|>assistant<|end_header_id|>\n" | |
| intro = llm( | |
| prompt, | |
| max_new_tokens=60, | |
| do_sample=True, | |
| temperature=2.0 | |
| )[0]["generated_text"] | |
| intro = intro.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip() | |
| return intro | |
| def num_requested_songs(query): | |
| for word in query.split(): | |
| if word.isdigit(): | |
| return min(int(word), 10) # Max 10 songs | |
| return 3 # Default number of songs | |
| def generate_response(query, num_songs): | |
| intro = generate_intro(query) | |
| retrieved = retrieve_with_pandas(query, top_k=num_songs) | |
| # Get the actual songs | |
| songs_list = "\n".join([f"{i+1}. {row['Document']}" | |
| for i, (_, row) in enumerate(retrieved.iterrows())]) | |
| response = f"""{intro} | |
| Here are my recommendations: | |
| {songs_list} | |
| """ | |
| return response | |
| def respond( | |
| message, | |
| history: list[dict[str, str]], | |
| # system_message, | |
| # max_tokens, | |
| # temperature, | |
| # top_p, | |
| # hf_token: gr.OAuthToken, | |
| ): | |
| if message.lower() in ['quit', 'exit', 'bye', 'goodbye']: | |
| return "Thanks for chatting!" | |
| if not message.strip(): | |
| return "Please ask me something!" | |
| num_songs = num_requested_songs(message) | |
| response = generate_response(message, num_songs) | |
| return response | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| title="Spot: The Spotify Chatbot", | |
| description=""" | |
| Hello! My name's Spot and I'm here to give song recommendations! | |
| You can request a specific song, or just let me know how you're feeling! | |
| *Type 'quit' or 'exit' to end the conversation.* | |
| """, | |
| examples=[ | |
| "Give me 8 upbeat songs", | |
| "Show me 5 chill songs for studying", | |
| "Recommend songs by Drake", | |
| "I want something energetic" | |
| ], | |
| theme="glass", | |
| # retry_btn=None, | |
| # undo_btn=None, | |
| # clear_btn="Clear Chat" | |
| ) | |
| with gr.Blocks() as demo: | |
| with gr.Sidebar(): | |
| gr.LoginButton() | |
| chatbot.render() | |
| if __name__ == "__main__": | |
| demo.launch() | |