Nathan Gebreab commited on
Commit
1b64026
·
1 Parent(s): 5468740

added dataset and original code to gradio repo

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +172 -0
  3. spotify_songs.csv +0 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- venv
 
 
1
+ venv
2
+ keys.txt
app.py CHANGED
@@ -1,7 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def respond(
6
  message,
7
  history: list[dict[str, str]],
 
1
+ """
2
+
3
+ Spot: The Spotify Chatbot
4
+ IAT360 Final Project
5
+
6
+ By Nathan Gebreab (301582871) & EmXi Vo (301600699)
7
+
8
+ Spot is a chatbot using Meta's Llama-3.2-3B-Instruct model & uses
9
+ RAG (Retrieval-Augmented Generation) to provide the user with song recommendations
10
+ based on their input prompt. By using RAG, Spot is able to access a dataset of
11
+ approximately 30000 Spotify songs and their descriptive parameters in order to
12
+ find the best recommendations.
13
+
14
+ Links to Model (Authentication from Meta Required):
15
+ https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct
16
+ https://www.llama.com/llama-downloads/
17
+
18
+ Link to Dataset (created by Joakim Arvidsson):
19
+ https://www.kaggle.com/datasets/joebeachcapital/30000-spotify-songs
20
+
21
+ """
22
+
23
+ import torch
24
+ from transformers import pipeline
25
+ from sentence_transformers import SentenceTransformer
26
+ import pandas as pd
27
+ import numpy as np
28
+ import warnings
29
  import gradio as gr
30
  from huggingface_hub import InferenceClient
31
 
32
 
33
+ model_id = "meta-llama/Llama-3.2-3B-Instruct"
34
+
35
+ # Suppress warnings
36
+ warnings.filterwarnings('ignore')
37
+
38
+ # Load the spotify dataset all at the beginning
39
+ print("Loading Spotify songs database...")
40
+ spotify_df = pd.read_csv('spotify_songs.csv')
41
+
42
+ # Remove duplicates based on track name and artist name
43
+ spotify_df = spotify_df.drop_duplicates(subset=["track_name", "track_artist"])
44
+
45
+ documents = spotify_df.apply(
46
+ lambda row: f"""Song: {row['track_name']},
47
+ Album: {row['track_album_name']},
48
+ Album Release Date: {row['track_album_release_date']},
49
+ Artist: {row['track_artist']},
50
+ Playlist Genre: {row['playlist_genre']},
51
+ Playlist Subgenre: {row['playlist_subgenre']},
52
+ Danceability: {row['danceability']},
53
+ Energy: {row['energy']},
54
+ Key: {row['key']},
55
+ Loudness: {row['loudness']},
56
+ Mode: {row['mode']},
57
+ Speechiness: {row['speechiness']},
58
+ Acousticness: {row['acousticness']},
59
+ Instrumentalness: {row['instrumentalness']},
60
+ Liveness: {row['liveness']},
61
+ Valence: {row['valence']},
62
+ Tempo: {row['tempo']},
63
+ Duration: {row['duration_ms']}
64
+ """,
65
+
66
+ axis=1
67
+ ).tolist()
68
+
69
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
70
+ embeddings = embedding_model.encode(documents, show_progress_bar=False)
71
+
72
+ df = pd.DataFrame({
73
+ "Document": documents,
74
+ "Embedding": list(embeddings)
75
+ })
76
+
77
+ print("Database loaded! Ready to chat.\n")
78
+
79
+ def retrieve_with_pandas(query, top_k=10):
80
+ query_embedding = embedding_model.encode([query])[0]
81
+
82
+ df['Similarity'] = df['Embedding'].apply(lambda x: np.dot(query_embedding, x) /
83
+ (np.linalg.norm(query_embedding) * np.linalg.norm(x)))
84
+ results = df.sort_values(by="Similarity", ascending=False).head(top_k)
85
+ return results[["Document", "Similarity"]]
86
+
87
+ def generate_intro(query):
88
+
89
+ llm = pipeline(
90
+ "text-generation",
91
+ model=model_id,
92
+ dtype=torch.bfloat16,
93
+ device_map="auto",
94
+ )
95
+
96
+ system_prompt = (
97
+ "You are Spot, a friendly music recommendation chatbot."
98
+ "Respond to the user in 1–3 natural sentences."
99
+ "Do NOT list songs. Do NOT number anything. Do NOT name any songs. Do NOT name any artists. Do NOT name any musicians. Do NOT name any famous works."
100
+ "Just give a short, warm and friendly message that leads into the list of recommended songs"
101
+ )
102
+
103
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}\n" \
104
+ f"<|start_header_id|>user<|end_header_id|>\n{query}\n" \
105
+ f"<|start_header_id|>assistant<|end_header_id|>\n"
106
+
107
+ intro = llm(
108
+ prompt,
109
+ max_new_tokens=60,
110
+ do_sample=True,
111
+ temperature=2.0
112
+ )[0]["generated_text"]
113
+
114
+ intro = intro.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
115
+
116
+ return intro
117
+
118
+ def num_requested_songs(query):
119
+ for word in query.split():
120
+ if word.isdigit():
121
+ return min(int(word), 10) # Max 10 songs
122
+ return 3 # Default number of songs
123
+
124
+ def generate_response(query, num_songs):
125
+
126
+ intro = generate_intro(query)
127
+
128
+ retrieved = retrieve_with_pandas(query, top_k=num_songs)
129
+
130
+ # Get the actual songs
131
+ songs_list = "\n".join([f"{i+1}. {row['Document']}"
132
+ for i, (_, row) in enumerate(retrieved.iterrows())])
133
+
134
+ response = f"""{intro}
135
+
136
+ Here are my recommendations:
137
+ {songs_list}
138
+ """
139
+
140
+ return response
141
+
142
+ # def chatbot():
143
+ # print("=" * 60)
144
+ # print("Spot: The Spotify Chatbot")
145
+ # print("=" * 60)
146
+ # print("\nHi there! My name's Spot and I'm here to give song recommendations!")
147
+ # print("You can request a specific song, or you can just let me know how you're feeling and we can get started!\n")
148
+ # print("Examples of song requests:")
149
+ # print(" - 'Give me 3 songs that start with W'")
150
+ # print(" - 'Recommend 5 upbeat songs'")
151
+ # print(" - 'Show me songs by Drake'")
152
+ # print("\nType 'quit' or 'exit' to stop.\n")
153
+
154
+ # while True:
155
+ # # Get user input
156
+ # user_input = input("You: ").strip()
157
+
158
+ # if user_input.lower() in ['quit', 'exit', 'bye', 'goodbye']:
159
+ # print("\nSpot: Thanks for chatting! Goodbye!")
160
+ # break
161
+ # # elif user_input.lower() in ['test']:
162
+ # # test_chatbot()
163
+ # elif not user_input:
164
+ # print("Spot: Please ask me something!\n")
165
+ # continue
166
+ # else:
167
+
168
+ # num_songs = num_requested_songs(user_input)
169
+
170
+ # response = generate_response(user_input, num_songs)
171
+ # print(f"\nSpot: {response}")
172
+ # continue
173
+
174
+ # chatbot()
175
+
176
+
177
  def respond(
178
  message,
179
  history: list[dict[str, str]],
spotify_songs.csv ADDED
The diff for this file is too large to render. See raw diff