Spaces:

biyootiful
/

cv-chatbot

Running

App Files Files Community

biyootiful commited on Nov 5, 2025

Commit

0eae068

1 Parent(s): bbb8c10

adjusted params

Browse files

Files changed (2) hide show

app.py +7 -27
config.py +5 -14

app.py CHANGED Viewed

@@ -479,10 +479,10 @@ def generate_response_local(system_prompt: str, user_prompt: str) -> str:
                     {"role": "user", "content": user_prompt},
                 ],
                 max_tokens=LOCAL_MODEL_MAX_OUTPUT_TOKENS,
-                temperature=0.5,
-                top_p=0.9,
-                repeat_penalty=1.2,
-                stop=["<end_of_turn>", "</s>"],
             )
     except Exception as err:
         raise HTTPException(status_code=500, detail=f"Local model error: {err}") from err
@@ -507,34 +507,14 @@ def generate_response(
     assistant_query: bool = False,
 ) -> str:
     """Generate response using configured LLM provider"""
-    if assistant_query:
-        persona_instruction = (
-            "Respond in first person as Bi's AI assistant. Mention you run locally on a "
-            "quantized TinyLlama 1.1B Chat model (Q4_K_M via llama.cpp with MiniLM embeddings and FAISS)."
-        )
-    else:
-        persona_instruction = (
-            "Speak directly about Bi by name in a professional, supportive manner - like a knowledgeable secretary. "
-            "Use direct references such as 'Bi has experience in...', 'Bi specializes in...', 'Bi worked on...'. "
-            "Rely only on the provided context."
-        )
-    system_prompt = "\n".join(
-        [
-            SYSTEM_PROMPT.strip(),
-            persona_instruction,
-            "Provide a direct, concise answer without repeating the context.",
-            "If the context lacks the answer, state that politely.",
-            "Do not echo or list the context - synthesize it into a clear response.",
-        ]
-    )
-    user_prompt = f"""Context:
 {context}
 Question: {original_question or question}
-Provide a concise, professional answer based only on the context above."""
     combined_prompt = f"{system_prompt}\n\n{user_prompt}"

                     {"role": "user", "content": user_prompt},
                 ],
                 max_tokens=LOCAL_MODEL_MAX_OUTPUT_TOKENS,
+                temperature=0.3,
+                top_p=0.7,
+                repeat_penalty=1.3,
+                stop=["<end_of_turn>", "</s>", "Question:", "Context:"],
             )
     except Exception as err:
         raise HTTPException(status_code=500, detail=f"Local model error: {err}") from err
     assistant_query: bool = False,
 ) -> str:
     """Generate response using configured LLM provider"""
+    system_prompt = SYSTEM_PROMPT.strip()
+    user_prompt = f"""Context about Bi:
 {context}
 Question: {original_question or question}
+Answer:"""
     combined_prompt = f"{system_prompt}\n\n{user_prompt}"

config.py CHANGED Viewed

@@ -20,7 +20,7 @@ LOCAL_MODEL_FILENAME = os.getenv("LOCAL_MODEL_FILENAME", "tinyllama-1.1b-chat-v1
 LOCAL_MODEL_CONTEXT_LENGTH = int(os.getenv("LOCAL_MODEL_CONTEXT_LENGTH", "2048"))
 LOCAL_MODEL_THREADS = int(os.getenv("LOCAL_MODEL_THREADS", str(os.cpu_count() or 4)))
 LOCAL_MODEL_BATCH_SIZE = int(os.getenv("LOCAL_MODEL_BATCH_SIZE", "256"))
-LOCAL_MODEL_MAX_OUTPUT_TOKENS = int(os.getenv("LOCAL_MODEL_MAX_OUTPUT_TOKENS", "512"))
 LOCAL_MODEL_HF_TOKEN = os.getenv("LOCAL_MODEL_HF_TOKEN", HUGGINGFACE_API_KEY or "")
 # Access control configuration
@@ -37,18 +37,9 @@ SESSION_TOKEN_TTL_SECONDS = int(os.getenv("SESSION_TOKEN_TTL_SECONDS", "600"))
 # RAG Configuration
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Fast, lightweight
-CHUNK_SIZE = 500  # Characters per chunk
-CHUNK_OVERLAP = 50  # Overlap between chunks
-TOP_K_RESULTS = 3  # Number of relevant chunks to retrieve
 # System prompt for the chatbot
-SYSTEM_PROMPT = """You are Bi's professional assistant, helping visitors learn about his background, skills, and experience.
-Instructions:
-- Refer to Bi directly by name (e.g., "Bi has experience in...", "Bi worked on...")
-- Answer questions based ONLY on the provided context about Bi
-- Be conversational, friendly, and professional - like a knowledgeable secretary
-- If information is not in the context, politely say you don't have that information about Bi
-- Keep responses concise but informative
-- Speak on Bi's behalf in a supportive, professional manner
-"""

 LOCAL_MODEL_CONTEXT_LENGTH = int(os.getenv("LOCAL_MODEL_CONTEXT_LENGTH", "2048"))
 LOCAL_MODEL_THREADS = int(os.getenv("LOCAL_MODEL_THREADS", str(os.cpu_count() or 4)))
 LOCAL_MODEL_BATCH_SIZE = int(os.getenv("LOCAL_MODEL_BATCH_SIZE", "256"))
+LOCAL_MODEL_MAX_OUTPUT_TOKENS = int(os.getenv("LOCAL_MODEL_MAX_OUTPUT_TOKENS", "150"))
 LOCAL_MODEL_HF_TOKEN = os.getenv("LOCAL_MODEL_HF_TOKEN", HUGGINGFACE_API_KEY or "")
 # Access control configuration
 # RAG Configuration
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Fast, lightweight
+CHUNK_SIZE = 300  # Characters per chunk (reduced for faster inference)
+CHUNK_OVERLAP = 30  # Overlap between chunks
+TOP_K_RESULTS = 2  # Number of relevant chunks to retrieve (reduced to minimize context)
 # System prompt for the chatbot
+SYSTEM_PROMPT = """Answer questions about Bi using the provided context. Keep answers short and direct. Always refer to Bi by name."""