Spaces:

Danielos100
/

Gifty

Sleeping

App Files Files Community

Danielos100 commited on Aug 15

Commit

64b2f09

verified ·

1 Parent(s): 6427279

Create app.py

Browse files

Files changed (1) hide show

app.py +283 -0

app.py ADDED Viewed

	@@ -0,0 +1,283 @@

+# app.py
+# 🎁 Gift Recommender – Gradio app (English / USD)
+# Dataset: ckandemir/amazon-products (Hugging Face)
+# Baseline retrieval: TF-IDF + cosine (fast & dependency-light)
+# Optional: switch to embeddings + FAISS by flipping USE_EMBEDDINGS to True.
+import os
+import re
+import random
+from typing import List, Dict, Tuple
+import numpy as np
+import pandas as pd
+from datasets import load_dataset
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.neighbors import NearestNeighbors
+import gradio as gr
+# ========= Configuration =========
+USE_EMBEDDINGS = False  # set True to try SentenceTransformers + FAISS (see TODO block below)
+MAX_ROWS = int(os.getenv("MAX_ROWS", "5000"))  # cap for speed
+DEFAULT_OCCASIONS = "birthday, thank_you, housewarming"
+# ========= Data Loading & Schema =========
+def _to_price_usd(x):
+    s = str(x).strip()
+    s = s.replace("$", "").replace(",", "")
+    try:
+        return float(s)
+    except Exception:
+        return np.nan
+def map_amazon_to_schema(df_raw: pd.DataFrame) -> pd.DataFrame:
+    # Normalize column lookup (case-insensitive)
+    cols = {c.lower().strip(): c for c in df_raw.columns}
+    # Source columns (case-insensitive)
+    get = lambda key: df_raw.get(cols.get(key, ""), "")
+    out = pd.DataFrame({
+        "name": get("product name"),
+        "short_desc": get("description"),
+        "tags": get("category"),
+        "price_usd": get("selling price").map(_to_price_usd) if "selling price" in cols else np.nan,
+        "age_range": "any",
+        "gender_tags": "any",
+        "occasion_tags": DEFAULT_OCCASIONS,
+        "persona_fit": get("category"),
+        "image_url": get("image") if "image" in cols else "",
+    })
+    # Basic cleaning
+    out["name"] = out["name"].astype(str).str.strip().str.slice(0, 120)
+    out["short_desc"] = out["short_desc"].astype(str).str.strip().str.slice(0, 400)
+    out["tags"] = out["tags"].astype(str).str.replace("|", ", ").str.lower()
+    out["persona_fit"] = out["persona_fit"].astype(str).str.lower()
+    return out
+def build_doc(row: pd.Series) -> str:
+    parts = [
+        str(row.get("name", "")),
+        str(row.get("short_desc", "")),
+        str(row.get("tags", "")),
+        str(row.get("persona_fit", "")),
+        str(row.get("occasion_tags", "")),
+    ]
+    return " | ".join([p for p in parts if p])
+def load_catalog() -> pd.DataFrame:
+    # Load HF dataset (internet required in Space). If it fails, create tiny fallback.
+    try:
+        ds = load_dataset("ckandemir/amazon-products", split="train")
+        raw = ds.to_pandas()
+    except Exception:
+        # Minimal fallback (keeps app alive even without internet)
+        raw = pd.DataFrame(
+            {
+                "Product Name": ["Wireless Earbuds", "Coffee Sampler", "Strategy Board Game"],
+                "Description": [
+                    "Compact earbuds with noise isolation and long battery life.",
+                    "Four single-origin roasts from small roasters.",
+                    "Modern eurogame for 2–4 players, 45–60 minutes."
+                ],
+                "Category": ["Electronics | Audio", "Grocery | Coffee", "Toys & Games | Board Games"],
+                "Selling Price": ["$59.00", "$34.00", "$39.00"],
+                "Image": ["", "", ""],
+            }
+        )
+    df = map_amazon_to_schema(raw).drop_duplicates(subset=["name", "short_desc"])
+    if len(df) > MAX_ROWS:
+        df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True)
+    df["doc"] = df.apply(build_doc, axis=1)
+    return df
+CATALOG = load_catalog()
+# ========= Retrieval (Baseline: TF-IDF) =========
+_vectorizer = TfidfVectorizer(min_df=1, ngram_range=(1, 2))
+_X = _vectorizer.fit_transform(CATALOG["doc"].fillna(""))
+_nn = NearestNeighbors(n_neighbors=10, metric="cosine").fit(_X)
+def profile_to_query(profile: Dict) -> str:
+    interests = ", ".join(profile.get("interests", []))
+    occasion = profile.get("occasion", "")
+    budget = profile.get("budget_usd", "")
+    extras = profile.get("extras", "")
+    return f"{interests}. occasion: {occasion}. budget: {budget} USD. {extras}".strip()
+def filter_business(df: pd.DataFrame, budget_min=None, budget_max=None, occasion: str = None) -> pd.DataFrame:
+    m = pd.Series(True, index=df.index)
+    if budget_min is not None:
+        m &= df["price_usd"].fillna(0) >= float(budget_min)
+    if budget_max is not None:
+        m &= df["price_usd"].fillna(1e9) <= float(budget_max)
+    if occasion:
+        # case-insensitive contains in occasion_tags
+        pattern = re.escape(str(occasion))
+        m &= df["occasion_tags"].fillna("").str.contains(pattern, case=False, regex=True)
+    return df[m]
+def recommend_topk(profile: Dict, k: int = 3) -> pd.DataFrame:
+    q = profile_to_query(profile)
+    q_vec = _vectorizer.transform([q])
+    df_f = filter_business(
+        CATALOG,
+        profile.get("budget_min"),
+        profile.get("budget_max"),
+        profile.get("occasion"),
+    )
+    if df_f.empty:
+        df_f = CATALOG
+    idx = df_f.index.values
+    dists, inds = _nn.kneighbors(q_vec, n_neighbors=min(max(k * 4, k), len(df_f)))
+    cand_idx = idx[inds[0]]
+    d = dists[0]
+    order = np.argsort(d)
+    cand_idx = cand_idx[order]
+    d = d[order]
+    seen, picks = set(), []
+    for ci, dist in zip(cand_idx, d):
+        nm = CATALOG.loc[ci, "name"]
+        if nm in seen:
+            continue
+        seen.add(nm)
+        picks.append((ci, 1 - float(dist)))  # similarity = 1 - distance
+        if len(picks) >= k:
+            break
+    res = CATALOG.loc[[ci for ci, _ in picks]].copy()
+    res["similarity"] = [sim for _, sim in picks]
+    return res[["name", "short_desc", "price_usd", "occasion_tags", "persona_fit", "image_url", "similarity"]]
+# ========= Optional: Embeddings + FAISS (toggle USE_EMBEDDINGS=True) =========
+# If you want to try embeddings, uncomment and flip the flag to True. This is optional.
+# import faiss
+# from sentence_transformers import SentenceTransformer
+# _st_model = None
+# _faiss_index = None
+# def _build_embeddings_index(model_name="sentence-transformers/all-MiniLM-L6-v2"):
+#     global _st_model, _faiss_index
+#     _st_model = SentenceTransformer(model_name)
+#     embs = _st_model.encode(CATALOG["doc"].tolist(), convert_to_numpy=True, normalize_embeddings=True)
+#     _faiss_index = faiss.IndexFlatIP(embs.shape[1])  # cosine if normalized
+#     _faiss_index.add(embs)
+# _MODEL_BUILT = False
+#
+# def recommend_topk_embeddings(profile: Dict, k: int = 3) -> pd.DataFrame:
+#     global _MODEL_BUILT
+#     if not _MODEL_BUILT:
+#         _build_embeddings_index()
+#         _MODEL_BUILT = True
+#     query = profile_to_query(profile)
+#     qv = _st_model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
+#     sims, idxs = _faiss_index.search(qv, min(max(k * 6, k), len(CATALOG)))
+#     order = np.argsort(-sims[0])
+#     picks = [int(i) for i in order[:k]]
+#     out = CATALOG.iloc[picks].copy()
+#     out["similarity"] = sims[0][order][:k]
+#     return out[["name", "short_desc", "price_usd", "occasion_tags", "persona_fit", "image_url", "similarity"]]
+# ========= Generative placeholders (synthetic idea + message) =========
+def generate_item(profile: Dict) -> Dict:
+    interests = profile.get("interests", [])
+    occasion = profile.get("occasion", "birthday")
+    budget = profile.get("budget_max", profile.get("budget_usd", 50)) or 50
+    style = random.choice(["personalized", "experience", "bundle"])
+    core = (interests[0] if interests else "hobby").strip()
+    if style == "personalized":
+        name = f"Custom {core} accessory with initials"
+        desc = f"Thoughtful personalized {core} accessory tailored to their taste."
+    elif style == "experience":
+        name = f"{core.title()} workshop voucher"
+        desc = f"A guided intro session to explore {core} in a fun, hands-on way."
+    else:
+        name = f"{core.title()} starter bundle"
+        desc = f"A curated set to kickstart their {core} passion."
+    return {
+        "name": f"{name} ({occasion})",
+        "short_desc": desc,
+        "price_usd": float(np.clip(float(budget), 20, 200)),
+        "occasion_tags": occasion,
+        "persona_fit": ", ".join(interests) or "general",
+        "image_url": ""
+    }
+def generate_message(profile: Dict, language: str = "en") -> str:
+    name = profile.get("recipient_name", "Friend")
+    occasion = profile.get("occasion", "birthday")
+    tone = profile.get("tone", "warm and friendly")
+    return (
+        f"Dear {name},\n"
+        f"Happy {occasion}! Wishing you health, joy, and a year full of great memories. "
+        f"May your goals come true. With {tone}."
+    )
+# ========= Gradio UI =========
+EXAMPLES = [
+    ["music, fitness", "birthday", 20, 60, "Noa", "warm and friendly"],
+    ["coffee, remote work", "housewarming", 20, 40, "Daniel", "warm"],
+    ["travel, design", "hanukkah", 20, 70, "Maya", "friendly"],
+    ["photography, tech", "birthday", 30, 100, "Omer", "fun"],
+    ["wellness, yoga", "thank_you", 15, 35, "Lior", "heartfelt"],
+]
+def ui_predict(interests: str, occasion: str, budget_min, budget_max, recipient_name: str, tone: str):
+    profile = {
+        "recipient_name": recipient_name or "Friend",
+        "interests": [s.strip() for s in (interests or "").split(",") if s.strip()],
+        "occasion": occasion or "birthday",
+        "budget_min": float(budget_min) if budget_min not in (None, "") else None,
+        "budget_max": float(budget_max) if budget_max not in (None, "") else None,
+        "budget_usd": float(budget_max) if budget_max not in (None, "") else 50.0,
+        "tone": tone or "warm and friendly",
+    }
+    # Retrieval
+    if USE_EMBEDDINGS:
+        # out_df = recommend_topk_embeddings(profile, k=3)
+        # For the template, we keep TF-IDF default. If you enable embeddings, uncomment the line above.
+        out_df = recommend_topk(profile, k=3)
+    else:
+        out_df = recommend_topk(profile, k=3)
+    # Generated
+    gen = generate_item(profile)
+    msg = generate_message(profile, language="en")
+    # Present results
+    top3_md = out_df[["name", "short_desc", "price_usd", "similarity"]].to_markdown(index=False)
+    gen_md = f"**{gen['name']}**\n\n{gen['short_desc']}\n\n~${gen['price_usd']:.0f}"
+    return top3_md, gen_md, msg
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎁 Gift Recommender — English / USD (Top-3 + 1 Generated + Message)")
+    with gr.Row():
+        interests = gr.Textbox(label="Interests (comma-separated)", value="music, fitness")
+        occasion = gr.Textbox(label="Occasion", value="birthday")
+    with gr.Row():
+        budget_min = gr.Number(label="Budget min (USD)", value=20)
+        budget_max = gr.Number(label="Budget max (USD)", value=60)
+    with gr.Row():
+        recipient_name = gr.Textbox(label="Recipient name", value="Noa")
+        tone = gr.Textbox(label="Message tone", value="warm and friendly")
+    go = gr.Button("Recommend 🎯")
+    out_top3 = gr.Markdown(label="Top-3 recommendations")
+    out_gen = gr.Markdown(label="Generated item")
+    out_msg = gr.Markdown(label="Personalized message")
+    gr.Examples(EXAMPLES, [interests, occasion, budget_min, budget_max, recipient_name, tone])
+    go.click(ui_predict, [interests, occasion, budget_min, budget_max, recipient_name, tone],
+             [out_top3, out_gen, out_msg])
+# For Spaces
+if __name__ == "__main__":
+    demo.launch()