Spaces:

fugthchat
/

fugthdes

Sleeping

App Files Files Community

fugthchat commited on Nov 6

Commit

ca47c49

verified ·

1 Parent(s): 77a2fdc

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -25

app.py CHANGED Viewed

@@ -12,19 +12,20 @@ from contextlib import asynccontextmanager
 # Set up logging
 logging.basicConfig(level=logging.INFO)
-# --- MODEL MAP ---
 MODEL_MAP = {
     "light": {
-        "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
-        "filename": "stablelm-zephyr-3b.Q3_K_S.gguf"
     },
     "medium": {
-        "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
-        "filename": "stablelm-zephyr-3b.Q4_K_M.gguf"
     },
-"heavy": {
-        "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
-        "filename": "stablelm-zephyr-3b.Q5_K_M.gguf"
     }
 }
@@ -36,10 +37,10 @@ model_lock = threading.Lock()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # This code runs ON STARTUP
-    logging.info("Server starting up... Acquiring lock to pre-load model.")
     with model_lock:
         get_llm_instance("light")
-    logging.info("Server is ready and 'light' model is loaded.")
     yield
@@ -97,20 +98,13 @@ def get_llm_instance(choice: str) -> Llama:
         return None
 # --- API Data Models (SIMPLIFIED) ---
-# We only need the full prompt and the model choice
-# The frontend will build the prompt.
 class StoryPrompt(BaseModel):
     prompt: str
     model_choice: str
-    # These are no longer used by the backend, but we include them
-    # so the frontend's request doesn't fail
     feedback: str = ""
     story_memory: str = ""
 # --- API Endpoints ---
 @app.get("/")
 def get_status():
     loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
@@ -122,10 +116,6 @@ def get_status():
 @app.post("/generate")
 async def generate_story(prompt: StoryPrompt):
-    """
-    Main generation endpoint.
-    This is now much simpler.
-    """
     logging.info("Request received. Waiting to acquire model lock...")
     with model_lock:
         logging.info("Lock acquired. Processing request.")
@@ -135,16 +125,14 @@ async def generate_story(prompt: StoryPrompt):
                 logging.error(f"Failed to get model for choice: {prompt.model_choice}")
                 return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
-            # --- THIS IS THE BUG FIX ---
-            # We trust the frontend and use the prompt exactly as it was sent.
-            # We no longer re-format it.
             final_prompt = prompt.prompt
             logging.info(f"Generating with {prompt.model_choice}...")
             output = llm(
                 final_prompt,
                 max_tokens=512,
-                stop=["<|user|>", "<|endoftext|>"],
                 echo=False
             )

 # Set up logging
 logging.basicConfig(level=logging.INFO)
+# --- NEW, SMARTER MODEL MAP ---
+# We are swapping to better storytelling models
 MODEL_MAP = {
     "light": {
+        "repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
+        "filename": "Phi-3-mini-4k-instruct-q4.gguf" # 2.13 GB - MUCH smarter
     },
     "medium": {
+        "repo_id": "TheBloke/DeepSeek-LLM-7B-Chat-GGUF",
+        "filename": "deepseek-llm-7b-chat.Q4_K_M.gguf" # 4.08 GB - High Quality
     },
+    "heavy": {
+        "repo_id": "TheBloke/DeepSeek-LLM-7B-Chat-GGUF",
+        "filename": "deepseek-llm-7b-chat.Q5_K_M.gguf" # 4.78 GB - Best Quality
     }
 }
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # This code runs ON STARTUP
+    logging.info("Server starting up... Acquiring lock to pre-load 'light' model (Phi-3).")
     with model_lock:
         get_llm_instance("light")
+    logging.info("Server is ready and 'light' model (Phi-3) is loaded.")
     yield
         return None
 # --- API Data Models (SIMPLIFIED) ---
 class StoryPrompt(BaseModel):
     prompt: str
     model_choice: str
     feedback: str = ""
     story_memory: str = ""
 # --- API Endpoints ---
 @app.get("/")
 def get_status():
     loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
 @app.post("/generate")
 async def generate_story(prompt: StoryPrompt):
     logging.info("Request received. Waiting to acquire model lock...")
     with model_lock:
         logging.info("Lock acquired. Processing request.")
                 logging.error(f"Failed to get model for choice: {prompt.model_choice}")
                 return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
+            # We trust the frontend to build the full prompt
             final_prompt = prompt.prompt
             logging.info(f"Generating with {prompt.model_choice}...")
             output = llm(
                 final_prompt,
                 max_tokens=512,
+                stop=["<|user|>", "<|endoftext|>", "user:"], # Added stop tokens for Phi-3
                 echo=False
             )