Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import tiktoken | |
| import os | |
| import openai | |
| from openai.embeddings_utils import get_embedding, cosine_similarity | |
| import numpy as np | |
| import streamlit as st | |
| input_datapath = "fine_food_reviews_with_embeddings_1k.csv" | |
| df = pd.read_csv(input_datapath, index_col=0) | |
| #os.environ["OPENAI_API_KEY"] = st.secrets("OPENAI_API_KEY") | |
| #openai.api_key = st.secrets("OPENAI_API_KEY") | |
| st.title("Semantic Search") | |
| #adding another column having the summary as title and the actual text as content | |
| df["combined"] = ( | |
| "Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip() | |
| ) | |
| # embedding model parameters | |
| embedding_model = "text-embedding-ada-002" | |
| embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002 | |
| max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191 | |
| encoding = tiktoken.get_encoding(embedding_encoding) | |
| top_n = 500 | |
| # omit reviews that are too long to embed | |
| df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x))) | |
| df = df[df.n_tokens <= max_tokens].tail(top_n) | |
| datafile_path = "fine_food_reviews_with_embeddings_1k.csv" | |
| df = pd.read_csv(datafile_path) | |
| df["embedding"] = df.embedding.apply(eval).apply(np.array) | |
| # search through the reviews for a specific product | |
| def search_reviews(df, product_description, n=3, pprint=True): | |
| product_embedding = get_embedding( | |
| product_description, | |
| engine="text-embedding-ada-002" | |
| ) | |
| df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding)) | |
| results = ( | |
| df.sort_values("similarity", ascending=False) | |
| .head(n) | |
| .combined.str.replace("Title: ", "") | |
| .str.replace("; Content:", ": ") | |
| ) | |
| product = ( | |
| df.sort_values("similarity", ascending=False) | |
| .head(n) | |
| .ProductId | |
| ) | |
| if pprint: | |
| for r in range(n): | |
| idx = results.index[r] | |
| print("Product : ",product[idx]) | |
| print(results[idx]) | |
| print() | |
| return results,product | |
| prompt = st.text_input("What do you want to search for? : ","pizza") | |
| top_n = st.number_input("How many results do you want to see? : ", min_value = 1) | |
| results,product = search_reviews(df, prompt, top_n) | |
| if st.button("Search Reviews"): | |
| st.write(product,results) | |