File size: 5,692 Bytes
1b64026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37d7126
 
 
 
7bf136e
1b64026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37d7126
 
 
7bf136e
 
 
 
 
37d7126
7bf136e
 
 
 
 
 
 
 
37d7126
7bf136e
 
 
37d7126
 
 
7bf136e
 
 
 
 
 
 
 
 
 
 
 
 
37d7126
7bf136e
 
 
 
37d7126
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""

Spot: The Spotify Chatbot
IAT360 Final Project

By Nathan Gebreab (301582871) & EmXi Vo (301600699)

Spot is a chatbot using Meta's Llama-3.2-3B-Instruct model & uses 
RAG (Retrieval-Augmented Generation) to provide the user with song recommendations 
based on their input prompt. By using RAG, Spot is able to access a dataset of 
approximately 30000 Spotify songs and their descriptive parameters in order to 
find the best recommendations.

Links to Model (Authentication from Meta Required): 
https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct
https://www.llama.com/llama-downloads/

Link to Dataset (created by Joakim Arvidsson):
https://www.kaggle.com/datasets/joebeachcapital/30000-spotify-songs

"""

import torch
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
import warnings
import gradio as gr
from huggingface_hub import InferenceClient


model_id="meta-llama/Llama-3.2-3B-Instruct"

# Suppress warnings
warnings.filterwarnings('ignore')

# Load the spotify dataset all at the beginning
print("Loading Spotify songs database...")
spotify_df = pd.read_csv('spotify_songs.csv')

# Remove duplicates based on track name and artist name
spotify_df = spotify_df.drop_duplicates(subset=["track_name", "track_artist"])

documents = spotify_df.apply(
    lambda row: f"""Song: {row['track_name']},
        Album: {row['track_album_name']},
        Album Release Date: {row['track_album_release_date']},
        Artist: {row['track_artist']}, 
        Playlist Genre: {row['playlist_genre']},
        Playlist Subgenre: {row['playlist_subgenre']},
        Danceability: {row['danceability']},
        Energy: {row['energy']},
        Key: {row['key']},
        Loudness: {row['loudness']},
        Mode: {row['mode']},
        Speechiness: {row['speechiness']},
        Acousticness: {row['acousticness']},
        Instrumentalness: {row['instrumentalness']},
        Liveness: {row['liveness']},
        Valence: {row['valence']},
        Tempo: {row['tempo']},
        Duration: {row['duration_ms']}
        """, 
                    
    axis=1
).tolist()

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedding_model.encode(documents, show_progress_bar=False)

df = pd.DataFrame({
    "Document": documents,
    "Embedding": list(embeddings)
})

print("Database loaded! Ready to chat.\n")

def retrieve_with_pandas(query, top_k=10):
    query_embedding = embedding_model.encode([query])[0]
    
    df['Similarity'] = df['Embedding'].apply(lambda x: np.dot(query_embedding, x) /
                                             (np.linalg.norm(query_embedding) * np.linalg.norm(x)))
    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
    return results[["Document", "Similarity"]]

def generate_intro(query):

    llm = pipeline(
        "text-generation",
        model=model_id,
        dtype=torch.bfloat16,
        device_map="auto",
    )

    system_prompt = (
        "You are Spot, a friendly music recommendation chatbot."
        "Respond to the user in 1–3 natural sentences."
        "Do NOT list songs. Do NOT number anything. Do NOT name any songs. Do NOT name any artists. Do NOT name any musicians. Do NOT name any famous works."
        "Just give a short, warm and friendly message that leads into the list of recommended songs"
    )

    prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}\n" \
             f"<|start_header_id|>user<|end_header_id|>\n{query}\n" \
             f"<|start_header_id|>assistant<|end_header_id|>\n"

    intro = llm(
        prompt,
        max_new_tokens=60,
        do_sample=True,
        temperature=2.0
    )[0]["generated_text"]

    intro = intro.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()

    return intro

def num_requested_songs(query):
    for word in query.split():
        if word.isdigit():
            return min(int(word), 10)  # Max 10 songs
    return 3  # Default number of songs

def generate_response(query, num_songs):
    
    intro = generate_intro(query)
    
    retrieved = retrieve_with_pandas(query, top_k=num_songs)
    
    # Get the actual songs
    songs_list = "\n".join([f"{i+1}. {row['Document']}" 
                            for i, (_, row) in enumerate(retrieved.iterrows())])

    response = f"""{intro}
    
    Here are my recommendations:
        {songs_list}
    """

    return response

def respond(
    message,
    history: list[dict[str, str]],
    # system_message,
    # max_tokens,
    # temperature,
    # top_p,
    # hf_token: gr.OAuthToken,
):
    
    if message.lower() in ['quit', 'exit', 'bye', 'goodbye']:
        return "Thanks for chatting!"
    
    if not message.strip():
        return "Please ask me something!"
   
    num_songs = num_requested_songs(message)

    response = generate_response(message, num_songs)
    return response
 

chatbot = gr.ChatInterface(
    respond,
    title="Spot: The Spotify Chatbot",
    description="""
    Hello! My name's Spot and I'm here to give song recommendations!
    
    You can request a specific song, or just let me know how you're feeling!
    
    *Type 'quit' or 'exit' to end the conversation.*
    """,
    examples=[
        "Give me 8 upbeat songs",
        "Show me 5 chill songs for studying",
        "Recommend songs by Drake",
        "I want something energetic"
    ],
    theme="glass", 
    # retry_btn=None,
    # undo_btn=None,
    # clear_btn="Clear Chat"
)

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.LoginButton()
    chatbot.render()


if __name__ == "__main__":
    demo.launch()