Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| 简洁版BERT+FAISS标语数据库 | |
| 输入:产品/业务描述 | |
| 输出:匹配的广告标语 | |
| """ | |
| import numpy as np | |
| import faiss | |
| import json | |
| from sentence_transformers import SentenceTransformer | |
| from datasets import Dataset | |
| import pandas as pd | |
| class SloganDatabase: | |
| def __init__(self): | |
| self.encoder = SentenceTransformer('all-MiniLM-L6-v2') | |
| self.index = None | |
| self.slogans = [] | |
| def create_dataset(self): | |
| """创建标语数据集 - 珠宝首饰奢侈品领域""" | |
| # 示例数据:[品牌, 类别, 描述, 标语] | |
| data = [ | |
| # 顶级珠宝品牌 | |
| ["Tiffany & Co.", "jewelry", "luxury diamond jewelry and engagement rings", "A Diamond is Forever"], | |
| ["Cartier", "luxury_jewelry", "high-end jewelry watches and accessories", "L'art de vivre"], | |
| ["Van Cleef & Arpels", "jewelry", "French luxury jewelry and watches", "Poetry of Time"], | |
| ["Harry Winston", "jewelry", "rare diamonds and luxury jewelry", "Rare Jewels of the World"], | |
| ["Bulgari", "jewelry", "Italian luxury jewelry and watches", "Italian Excellence"], | |
| ["Chopard", "jewelry", "Swiss luxury jewelry and watches", "Happy Diamonds"], | |
| ["Graff", "jewelry", "exceptional diamonds and jewelry", "The Most Fabulous Jewels in the World"], | |
| ["Piaget", "jewelry", "Swiss luxury watches and jewelry", "Possession"], | |
| ["Boucheron", "jewelry", "French high jewelry and luxury watches", "Le Joaillier Depuis 1858"], | |
| ["Mikimoto", "jewelry", "cultured pearl jewelry", "The Originator of Cultured Pearls"], | |
| # 奢侈品牌 | |
| ["Louis Vuitton", "luxury_fashion", "luxury leather goods and fashion", "The Art of Travel"], | |
| ["Hermès", "luxury_fashion", "French luxury goods and accessories", "Luxury in the making"], | |
| ["Chanel", "luxury_fashion", "haute couture and luxury fashion", "Inside every woman there is a flower and a cat"], | |
| ["Gucci", "luxury_fashion", "Italian luxury fashion and accessories", "Quality is remembered long after price is forgotten"], | |
| ["Prada", "luxury_fashion", "Italian luxury fashion house", "Prada"], | |
| ["Dior", "luxury_fashion", "French luxury fashion and beauty", "Miss Dior"], | |
| ["Versace", "luxury_fashion", "Italian luxury fashion design", "Virtus"], | |
| ["Saint Laurent", "luxury_fashion", "French luxury fashion house", "Saint Laurent Paris"], | |
| ["Balenciaga", "luxury_fashion", "Spanish luxury fashion house", "Balenciaga"], | |
| ["Bottega Veneta", "luxury_fashion", "Italian luxury leather goods", "When your own initials are enough"], | |
| # 腕表品牌 | |
| ["Rolex", "luxury_watches", "Swiss luxury watches and timepieces", "Perpetual, Spirit of Excellence"], | |
| ["Patek Philippe", "luxury_watches", "Swiss luxury watch manufacturer", "You never actually own a Patek Philippe"], | |
| ["Audemars Piguet", "luxury_watches", "Swiss luxury watch brand", "To break the rules, you must first master them"], | |
| ["Omega", "luxury_watches", "Swiss luxury watch manufacturer", "Precision"], | |
| ["TAG Heuer", "luxury_watches", "Swiss luxury watches", "Don't crack under pressure"], | |
| ["Breitling", "luxury_watches", "Swiss luxury watchmaker", "Instruments for Professionals"], | |
| ["IWC", "luxury_watches", "Swiss luxury watch company", "Engineered for men"], | |
| ["Jaeger-LeCoultre", "luxury_watches", "Swiss luxury watch manufacturer", "The World's Most Complicated Watches"], | |
| ["Vacheron Constantin", "luxury_watches", "Swiss luxury watch manufacturer", "One of Not Many"], | |
| ["A. Lange & Söhne", "luxury_watches", "German luxury watch manufacturer", "When nothing else will do"], | |
| # 时尚首饰 | |
| ["Pandora", "fashion_jewelry", "Danish jewelry brand charm bracelets", "Be Love"], | |
| ["Swarovski", "fashion_jewelry", "Austrian crystal jewelry and accessories", "Unleash Your Light"], | |
| ["Daniel Wellington", "fashion_watches", "Swedish watch brand minimalist design", "Live the moment"], | |
| ["Alex and Ani", "fashion_jewelry", "American jewelry brand spiritual bracelets", "Positive Energy"], | |
| ["Kendra Scott", "fashion_jewelry", "American jewelry designer colorful stones", "Live colorfully"], | |
| ["Monica Vinader", "fashion_jewelry", "British jewelry brand contemporary design", "Everyday luxury"], | |
| ["Mejuri", "fashion_jewelry", "Canadian jewelry brand everyday luxury", "Everyday fine"], | |
| ["Gorjana", "fashion_jewelry", "California jewelry brand layered necklaces", "Live your layer"], | |
| ["Kate Spade", "fashion_jewelry", "American fashion accessories jewelry", "Live colorfully"], | |
| ["Marc Jacobs", "fashion_jewelry", "American fashion designer accessories", "Marc Jacobs"], | |
| # 珠宝定制 | |
| ["Blue Nile", "diamond_jewelry", "online diamond jewelry retailer", "Extraordinary diamonds for extraordinary moments"], | |
| ["James Allen", "diamond_jewelry", "online engagement ring retailer", "See it. Love it. Own it."], | |
| ["Brilliant Earth", "diamond_jewelry", "ethical diamond jewelry", "Brilliant Earth"], | |
| ["With Clarity", "diamond_jewelry", "lab-grown diamond jewelry", "Diamonds. Redefined."], | |
| ["Clean Origin", "diamond_jewelry", "lab-created diamond jewelry", "Grown with love"], | |
| ["Ritani", "diamond_jewelry", "engagement rings and wedding bands", "Love is in the details"], | |
| ["Vrai", "diamond_jewelry", "lab-grown diamond jewelry", "Created, not mined"], | |
| ["Catbird", "jewelry", "Brooklyn-based jewelry designer", "Made in Brooklyn"], | |
| ["Wwake", "jewelry", "contemporary fine jewelry designer", "Wwake"], | |
| ["Jacquie Aiche", "jewelry", "California jewelry designer bohemian luxury", "Jacquie Aiche"], | |
| # 中国珠宝品牌 | |
| ["周大福", "jewelry", "香港珠宝品牌黄金钻石", "心意足金"], | |
| ["周生生", "jewelry", "香港珠宝品牌传统工艺", "传承经典"], | |
| ["老凤祥", "jewelry", "中国传统珠宝品牌黄金首饰", "老凤祥,真金不怕火炼"], | |
| ["六福珠宝", "jewelry", "香港珠宝品牌时尚设计", "六福临门"], | |
| ["潘多拉", "jewelry", "丹麦珠宝品牌串珠手链", "表达你的故事"], | |
| ["周大生", "jewelry", "中国珠宝品牌钻石首饰", "爱就在一起"], | |
| ["金伯利", "jewelry", "中国钻石珠宝品牌", "只为更好的你"], | |
| ["戴比尔斯", "diamond_jewelry", "钻石开采珠宝品牌", "钻石恒久远,一颗永流传"], | |
| ["施华洛世奇", "crystal_jewelry", "奥地利水晶珠宝品牌", "释放你的光芒"], | |
| ["谢瑞麟", "jewelry", "香港珠宝设计师品牌", "艺术珠宝"], | |
| # 奢侈品配饰 | |
| ["Goyard", "luxury_accessories", "French luxury leather goods", "Goyard"], | |
| ["Moynat", "luxury_accessories", "French luxury leather goods", "Moynat"], | |
| ["Berluti", "luxury_accessories", "French luxury leather goods", "Berluti"], | |
| ["Valextra", "luxury_accessories", "Italian luxury leather goods", "Milanese excellence since 1937"], | |
| ["Loewe", "luxury_accessories", "Spanish luxury leather goods", "Craft"], | |
| ["Brunello Cucinelli", "luxury_fashion", "Italian luxury fashion cashmere", "Humanistic Enterprise"], | |
| ["Loro Piana", "luxury_fashion", "Italian luxury textile and clothing", "Excellence in natural fibers"], | |
| ["Kiton", "luxury_fashion", "Italian luxury menswear", "The most beautiful thing made by man"], | |
| ["Zegna", "luxury_fashion", "Italian luxury menswear", "What makes a man"], | |
| ["Brioni", "luxury_fashion", "Italian luxury menswear", "Roman style"], | |
| # 新兴奢侈品牌 | |
| ["Jacquemus", "luxury_fashion", "French luxury fashion house", "La Montagne"], | |
| ["Ganni", "luxury_fashion", "Danish fashion brand", "Ganni"], | |
| ["Staud", "luxury_fashion", "American fashion brand", "Staud"], | |
| ["Cult Gaia", "luxury_accessories", "American accessories brand", "Cult Gaia"], | |
| ["Rosantica", "jewelry", "Italian jewelry brand", "Rosantica"], | |
| ["Alighieri", "jewelry", "British jewelry brand", "The Inferno"], | |
| ["Lizzie Fortunato", "jewelry", "American jewelry brand", "Lizzie Fortunato"], | |
| ["Aurate", "jewelry", "American jewelry brand", "Accessible luxury"], | |
| ["AUrate New York", "jewelry", "New York jewelry brand", "Radically responsible luxury"], | |
| ["Missoma", "jewelry", "British jewelry brand", "Missoma"] | |
| ] | |
| # 转换为DataFrame | |
| df = pd.DataFrame(data, columns=['brand', 'category', 'description', 'slogan']) | |
| # 创建搜索文本(组合描述信息) | |
| df['search_text'] = df['brand'] + ' ' + df['category'] + ' ' + df['description'] | |
| return df.to_dict('records') | |
| def build_index(self, data): | |
| """构建FAISS索引""" | |
| print("🔨 Building FAISS index...") | |
| # 提取搜索文本 | |
| texts = [item['search_text'] for item in data] | |
| # 生成embeddings | |
| embeddings = self.encoder.encode(texts, show_progress_bar=True) | |
| # 构建索引 | |
| self.index = faiss.IndexFlatIP(384) # 使用内积相似度 | |
| self.index.add(embeddings.astype('float32')) | |
| # 保存数据 | |
| self.slogans = data | |
| print(f"✅ Index built with {len(data)} slogans") | |
| def search(self, query, k=5): | |
| """搜索相似标语""" | |
| if not self.index: | |
| raise ValueError("Index not built yet!") | |
| # 编码查询 | |
| query_embedding = self.encoder.encode([query]) | |
| # 搜索 | |
| scores, indices = self.index.search(query_embedding.astype('float32'), k) | |
| # 返回结果 | |
| results = [] | |
| for score, idx in zip(scores[0], indices[0]): | |
| if idx < len(self.slogans): | |
| result = self.slogans[idx].copy() | |
| result['similarity_score'] = float(score) | |
| results.append(result) | |
| return results | |
| def save(self, path="slogan_db"): | |
| """保存数据库""" | |
| # 保存FAISS索引 | |
| faiss.write_index(self.index, f"{path}.faiss") | |
| # 保存标语数据 | |
| with open(f"{path}.json", 'w', encoding='utf-8') as f: | |
| json.dump(self.slogans, f, ensure_ascii=False, indent=2) | |
| print(f"💾 Database saved to {path}") | |
| def load(self, path="slogan_db"): | |
| """加载数据库""" | |
| try: | |
| # 加载FAISS索引 | |
| self.index = faiss.read_index(f"{path}.faiss") | |
| # 加载标语数据 | |
| with open(f"{path}.json", 'r', encoding='utf-8') as f: | |
| self.slogans = json.load(f) | |
| print(f"📂 Database loaded from {path}") | |
| return True | |
| except: | |
| print(f"❌ Failed to load database from {path}") | |
| return False | |
| def main(): | |
| """主函数""" | |
| print("🚀 Creating Slogan Database...") | |
| # 初始化 | |
| db = SloganDatabase() | |
| # 尝试加载现有数据库 | |
| if not db.load(): | |
| print("📊 Creating new database...") | |
| # 创建数据集 | |
| data = db.create_dataset() | |
| # 构建索引 | |
| db.build_index(data) | |
| # 保存数据库 | |
| db.save() | |
| # 测试搜索 | |
| test_queries = [ | |
| "钻石订婚戒指", | |
| "奢侈品手袋", | |
| "瑞士手表品牌", | |
| "珍珠首饰", | |
| "黄金项链", | |
| "时尚耳环", | |
| "luxury jewelry brand", | |
| "designer handbag", | |
| "crystal accessories", | |
| "wedding rings" | |
| ] | |
| print("\n🔍 Testing searches...") | |
| for query in test_queries: | |
| print(f"\n查询: {query}") | |
| print("-" * 40) | |
| results = db.search(query, k=3) | |
| for i, result in enumerate(results, 1): | |
| print(f"{i}. {result['brand']} ({result['category']})") | |
| print(f" 描述: {result['description']}") | |
| print(f" 标语: {result['slogan']}") | |
| print(f" 相似度: {result['similarity_score']:.3f}") | |
| print() | |
| if __name__ == "__main__": | |
| main() |