Spaces:

yucxy
/

semsearch_demo

Sleeping

File size: 1,625 Bytes

import os
import json

import gradio as gr
import numpy as np

from langchain_core.vectorstores import InMemoryVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from huggingface_hub import hf_hub_download



EMBEDDING_NAME = "Qwen/Qwen3-Embedding-0.6B"

def get_vecstore_from_json(embedding_name):
    json_path = hf_hub_download(
        repo_id="yucxy/cv",
        filename="cv_vectors_20250814-1613.json",
        repo_type="dataset",
        token=os.environ.get("DATASET_TOK")
    )
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    
    embeddings = HuggingFaceEmbeddings(model_name=embedding_name)
    vector_store = InMemoryVectorStore(embeddings)

    texts = [item["content"] for item in data]
    metadatas = [item["metadata"] for item in data]
    embeddings_array = [np.array(item["embedding"], dtype=np.float32) for item in data]

    vector_store.add_texts(texts=texts, metadatas=metadatas, embeddings=embeddings_array)

    return vector_store

VECSTORE = get_vecstore_from_json(EMBEDDING_NAME)


def ask_question(message, history):
    results = VECSTORE.similarity_search(message, k=1)
    top_0 = results[0].page_content

    return top_0



if __name__ == "__main__":


    gr.ChatInterface(
        fn=ask_question,
        type="messages",
        chatbot=gr.Chatbot([], type="messages", height=300),
        textbox=gr.Textbox(placeholder="Ask me a question about my CV", container=False, scale=7),
        title="CV Semantic Search Chatbot Demo",
        description="Ask the chatbot a question about my CV",
        theme="ocean",
    ).launch()