import os import json import gradio as gr import numpy as np from langchain_core.vectorstores import InMemoryVectorStore from langchain_huggingface import HuggingFaceEmbeddings from huggingface_hub import hf_hub_download EMBEDDING_NAME = "Qwen/Qwen3-Embedding-0.6B" def get_vecstore_from_json(embedding_name): json_path = hf_hub_download( repo_id="yucxy/cv", filename="cv_vectors_20250814-1613.json", repo_type="dataset", token=os.environ.get("DATASET_TOK") ) with open(json_path, "r", encoding="utf-8") as f: data = json.load(f) embeddings = HuggingFaceEmbeddings(model_name=embedding_name) vector_store = InMemoryVectorStore(embeddings) texts = [item["content"] for item in data] metadatas = [item["metadata"] for item in data] embeddings_array = [np.array(item["embedding"], dtype=np.float32) for item in data] vector_store.add_texts(texts=texts, metadatas=metadatas, embeddings=embeddings_array) return vector_store VECSTORE = get_vecstore_from_json(EMBEDDING_NAME) def ask_question(message, history): results = VECSTORE.similarity_search(message, k=1) top_0 = results[0].page_content return top_0 if __name__ == "__main__": gr.ChatInterface( fn=ask_question, type="messages", chatbot=gr.Chatbot([], type="messages", height=300), textbox=gr.Textbox(placeholder="Ask me a question about my CV", container=False, scale=7), title="CV Semantic Search Chatbot Demo", description="Ask the chatbot a question about my CV", theme="ocean", ).launch()