inaamullah-younas commited on
Commit
a6faf11
ยท
verified ยท
1 Parent(s): b9d32b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -51,8 +51,18 @@ print("โœ… ChromaDB initialized!")
51
 
52
 
53
  # ๐Ÿ”น Function to Generate LLM Responses
 
 
 
 
 
 
54
  def generate_humanized_response(query, retrieved_text):
55
  """Passes retrieved chunks through Mistral-7B to improve readability."""
 
 
 
 
56
  prompt = f"""You are a medical assistant. Answer the following question based on retrieved text:
57
 
58
  Retrieved Text:
@@ -63,13 +73,12 @@ def generate_humanized_response(query, retrieved_text):
63
  Provide a well-structured, human-like response:
64
  """
65
 
66
- inputs = llm_tokenizer(prompt, return_tensors="pt").to("cuda")
67
- output = llm_model.generate(**inputs, max_new_tokens=150, do_sample=True, stream=True)
68
  response = llm_tokenizer.decode(output[0], skip_special_tokens=True)
69
 
70
  return response
71
- from transformers import AutoModel, AutoTokenizer
72
- import torch
73
 
74
  # ๐Ÿ”น Load BioMedBERT for Embeddings
75
  embed_model_name = "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract"
 
51
 
52
 
53
  # ๐Ÿ”น Function to Generate LLM Responses
54
+ import torch
55
+
56
+ # ๐Ÿ”น Detect Device
57
+ device = "cuda" if torch.cuda.is_available() else "cpu"
58
+ print(f"โœ… Using device: {device}")
59
+
60
  def generate_humanized_response(query, retrieved_text):
61
  """Passes retrieved chunks through Mistral-7B to improve readability."""
62
+
63
+ # ๐Ÿ”น Truncate retrieved text to avoid long input errors
64
+ retrieved_text = retrieved_text[:500]
65
+
66
  prompt = f"""You are a medical assistant. Answer the following question based on retrieved text:
67
 
68
  Retrieved Text:
 
73
  Provide a well-structured, human-like response:
74
  """
75
 
76
+ inputs = llm_tokenizer(prompt, return_tensors="pt").to(device) # โœ… Uses GPU if available, otherwise CPU
77
+ output = llm_model.generate(**inputs, max_new_tokens=150, do_sample=True)
78
  response = llm_tokenizer.decode(output[0], skip_special_tokens=True)
79
 
80
  return response
81
+
 
82
 
83
  # ๐Ÿ”น Load BioMedBERT for Embeddings
84
  embed_model_name = "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract"