fugthchat commited on
Commit
ca47c49
·
verified ·
1 Parent(s): 77a2fdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -25
app.py CHANGED
@@ -12,19 +12,20 @@ from contextlib import asynccontextmanager
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO)
14
 
15
- # --- MODEL MAP ---
 
16
  MODEL_MAP = {
17
  "light": {
18
- "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
19
- "filename": "stablelm-zephyr-3b.Q3_K_S.gguf"
20
  },
21
  "medium": {
22
- "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
23
- "filename": "stablelm-zephyr-3b.Q4_K_M.gguf"
24
  },
25
- "heavy": {
26
- "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
27
- "filename": "stablelm-zephyr-3b.Q5_K_M.gguf"
28
  }
29
  }
30
 
@@ -36,10 +37,10 @@ model_lock = threading.Lock()
36
  @asynccontextmanager
37
  async def lifespan(app: FastAPI):
38
  # This code runs ON STARTUP
39
- logging.info("Server starting up... Acquiring lock to pre-load model.")
40
  with model_lock:
41
  get_llm_instance("light")
42
- logging.info("Server is ready and 'light' model is loaded.")
43
 
44
  yield
45
 
@@ -97,20 +98,13 @@ def get_llm_instance(choice: str) -> Llama:
97
  return None
98
 
99
  # --- API Data Models (SIMPLIFIED) ---
100
- # We only need the full prompt and the model choice
101
- # The frontend will build the prompt.
102
  class StoryPrompt(BaseModel):
103
  prompt: str
104
  model_choice: str
105
-
106
- # These are no longer used by the backend, but we include them
107
- # so the frontend's request doesn't fail
108
  feedback: str = ""
109
  story_memory: str = ""
110
 
111
-
112
  # --- API Endpoints ---
113
-
114
  @app.get("/")
115
  def get_status():
116
  loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
@@ -122,10 +116,6 @@ def get_status():
122
 
123
  @app.post("/generate")
124
  async def generate_story(prompt: StoryPrompt):
125
- """
126
- Main generation endpoint.
127
- This is now much simpler.
128
- """
129
  logging.info("Request received. Waiting to acquire model lock...")
130
  with model_lock:
131
  logging.info("Lock acquired. Processing request.")
@@ -135,16 +125,14 @@ async def generate_story(prompt: StoryPrompt):
135
  logging.error(f"Failed to get model for choice: {prompt.model_choice}")
136
  return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
137
 
138
- # --- THIS IS THE BUG FIX ---
139
- # We trust the frontend and use the prompt exactly as it was sent.
140
- # We no longer re-format it.
141
  final_prompt = prompt.prompt
142
 
143
  logging.info(f"Generating with {prompt.model_choice}...")
144
  output = llm(
145
  final_prompt,
146
  max_tokens=512,
147
- stop=["<|user|>", "<|endoftext|>"],
148
  echo=False
149
  )
150
 
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO)
14
 
15
+ # --- NEW, SMARTER MODEL MAP ---
16
+ # We are swapping to better storytelling models
17
  MODEL_MAP = {
18
  "light": {
19
+ "repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
20
+ "filename": "Phi-3-mini-4k-instruct-q4.gguf" # 2.13 GB - MUCH smarter
21
  },
22
  "medium": {
23
+ "repo_id": "TheBloke/DeepSeek-LLM-7B-Chat-GGUF",
24
+ "filename": "deepseek-llm-7b-chat.Q4_K_M.gguf" # 4.08 GB - High Quality
25
  },
26
+ "heavy": {
27
+ "repo_id": "TheBloke/DeepSeek-LLM-7B-Chat-GGUF",
28
+ "filename": "deepseek-llm-7b-chat.Q5_K_M.gguf" # 4.78 GB - Best Quality
29
  }
30
  }
31
 
 
37
  @asynccontextmanager
38
  async def lifespan(app: FastAPI):
39
  # This code runs ON STARTUP
40
+ logging.info("Server starting up... Acquiring lock to pre-load 'light' model (Phi-3).")
41
  with model_lock:
42
  get_llm_instance("light")
43
+ logging.info("Server is ready and 'light' model (Phi-3) is loaded.")
44
 
45
  yield
46
 
 
98
  return None
99
 
100
  # --- API Data Models (SIMPLIFIED) ---
 
 
101
  class StoryPrompt(BaseModel):
102
  prompt: str
103
  model_choice: str
 
 
 
104
  feedback: str = ""
105
  story_memory: str = ""
106
 
 
107
  # --- API Endpoints ---
 
108
  @app.get("/")
109
  def get_status():
110
  loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
 
116
 
117
  @app.post("/generate")
118
  async def generate_story(prompt: StoryPrompt):
 
 
 
 
119
  logging.info("Request received. Waiting to acquire model lock...")
120
  with model_lock:
121
  logging.info("Lock acquired. Processing request.")
 
125
  logging.error(f"Failed to get model for choice: {prompt.model_choice}")
126
  return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
127
 
128
+ # We trust the frontend to build the full prompt
 
 
129
  final_prompt = prompt.prompt
130
 
131
  logging.info(f"Generating with {prompt.model_choice}...")
132
  output = llm(
133
  final_prompt,
134
  max_tokens=512,
135
+ stop=["<|user|>", "<|endoftext|>", "user:"], # Added stop tokens for Phi-3
136
  echo=False
137
  )
138