Kalpokoch commited on
Commit
b3975f9
·
verified ·
1 Parent(s): 0a51966

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +3 -3
app/app.py CHANGED
@@ -73,8 +73,8 @@ logger.info(f"Loading GGUF model from: {MODEL_PATH}")
73
  try:
74
  llm = Llama(
75
  model_path=MODEL_PATH,
76
- n_ctx=4096,
77
- n_threads=4,
78
  n_batch=512,
79
  use_mlock=True,
80
  verbose=False
@@ -125,7 +125,7 @@ async def generate_llm_response(prompt: str, request_id: str):
125
  loop = asyncio.get_running_loop()
126
  response = await loop.run_in_executor(
127
  None,
128
- lambda: llm(prompt, max_tokens=2048, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
129
  )
130
  answer = response["choices"][0]["text"].strip()
131
  if not answer:
 
73
  try:
74
  llm = Llama(
75
  model_path=MODEL_PATH,
76
+ n_ctx=2048,
77
+ n_threads=1,
78
  n_batch=512,
79
  use_mlock=True,
80
  verbose=False
 
125
  loop = asyncio.get_running_loop()
126
  response = await loop.run_in_executor(
127
  None,
128
+ lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
129
  )
130
  answer = response["choices"][0]["text"].strip()
131
  if not answer: