Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| from langchain.prompts import PromptTemplate | |
| # Initialize the embedding model | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'}, | |
| encode_kwargs={'normalize_embeddings': True} | |
| ) | |
| # Load the existing Chroma vector store | |
| persist_directory = os.path.join(os.path.dirname(__file__), 'mydb') | |
| vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings) | |
| # Initialize the Llama model | |
| llm = Llama.from_pretrained( | |
| repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", | |
| filename="Llama-3.2-1B-Instruct-Q8_0.gguf", | |
| ) | |
| # Create the RAG prompt template | |
| template = """Answer the question based only on the following context: | |
| {context} | |
| Question: {question} | |
| Answer the question in a clear way. If you cannot find the answer in the context, just say "I don't have enough information to answer this question." | |
| Make sure to: | |
| 1. Only use information from the provided context | |
| 2. If you're unsure, acknowledge it | |
| """ | |
| prompt = PromptTemplate.from_template(template) | |
| def respond( | |
| message, | |
| history, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| # top_p, | |
| ): | |
| # Build the messages list | |
| messages = [{"role": "system", "content": system_message}] | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # Search the vector store | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) | |
| docs = retriever.get_relevant_documents(message) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| # Format the prompt | |
| final_prompt = prompt.format(context=context, question=message) | |
| # Add the formatted prompt to messages | |
| messages.append({"role": "user", "content": final_prompt}) | |
| # Generate response using the Llama model | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| # top_p=top_p, | |
| ) | |
| # Extract the assistant's reply | |
| assistant_reply = response['choices'][0]['message']['content'] | |
| return assistant_reply | |
| # Create Gradio Chat Interface | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a friendly chatbot.", label="System Message"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"), | |
| # gr.Slider( | |
| # minimum=0.1, | |
| # maximum=1.0, | |
| # value=0.95, | |
| # step=0.05, | |
| # label="Top-p (Nucleus Sampling)", | |
| # ), | |
| ], | |
| title="Document-Based QA with Llama", | |
| description="A PDF Chat interface powered by the Llama model.", | |
| examples=["What is a Computer?"], | |
| theme="default", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |