Tim Luka Horstmann
commited on
Commit
·
a3b349c
1
Parent(s):
b8961cc
Updated for Game
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
|
| 7 |
from fastapi.responses import StreamingResponse, Response
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from pydantic import BaseModel
|
|
|
|
| 10 |
from llama_cpp import Llama
|
| 11 |
from huggingface_hub import login, hf_hub_download
|
| 12 |
import logging
|
|
@@ -188,16 +189,46 @@ except Exception as e:
|
|
| 188 |
logger.error(f"Error loading cv_text.txt: {str(e)}")
|
| 189 |
raise
|
| 190 |
|
| 191 |
-
async def stream_response(query, history):
|
| 192 |
"""Main streaming response function that routes to either Gemini or local model"""
|
| 193 |
if USE_GEMINI:
|
| 194 |
-
async for chunk in stream_response_gemini(query, history):
|
| 195 |
yield chunk
|
| 196 |
else:
|
| 197 |
-
async for chunk in stream_response_local(query, history):
|
| 198 |
yield chunk
|
| 199 |
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
"""Stream response using Gemini API with a proper system_instruction."""
|
| 202 |
logger.info(f"Processing query with Gemini: {query}")
|
| 203 |
start_time = time.time()
|
|
@@ -205,14 +236,26 @@ async def stream_response_gemini(query, history):
|
|
| 205 |
|
| 206 |
# 1) Build your system prompt once
|
| 207 |
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
# 2) Build only user/model history as `contents`
|
| 218 |
contents = []
|
|
@@ -272,7 +315,7 @@ async def stream_response_gemini(query, history):
|
|
| 272 |
yield "data: [DONE]\n\n"
|
| 273 |
|
| 274 |
|
| 275 |
-
async def stream_response_local(query, history):
|
| 276 |
"""Stream response using local model"""
|
| 277 |
logger.info(f"Processing query with local model: {query}")
|
| 278 |
start_time = time.time()
|
|
@@ -280,14 +323,25 @@ async def stream_response_local(query, history):
|
|
| 280 |
|
| 281 |
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
if not isinstance(system_prompt, str):
|
| 293 |
system_prompt = str(system_prompt)
|
|
@@ -340,6 +394,8 @@ async def stream_response_local(query, history):
|
|
| 340 |
class QueryRequest(BaseModel):
|
| 341 |
query: str
|
| 342 |
history: list
|
|
|
|
|
|
|
| 343 |
|
| 344 |
class TTSRequest(BaseModel):
|
| 345 |
text: str
|
|
@@ -363,7 +419,9 @@ def get_ram_usage():
|
|
| 363 |
async def predict(request: Request, query_request: QueryRequest):
|
| 364 |
query = query_request.query
|
| 365 |
history = query_request.history
|
| 366 |
-
|
|
|
|
|
|
|
| 367 |
|
| 368 |
@app.post("/api/tts")
|
| 369 |
@limiter.limit("5/minute") # Allow 5 TTS requests per minute per IP
|
|
@@ -490,4 +548,4 @@ async def keep_model_warm():
|
|
| 490 |
logger.error(f"Error in periodic warm-up: {str(e)}")
|
| 491 |
|
| 492 |
# Wait for 13 minutes before the next warm-up
|
| 493 |
-
await asyncio.sleep(13 * 60)
|
|
|
|
| 7 |
from fastapi.responses import StreamingResponse, Response
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from pydantic import BaseModel
|
| 10 |
+
from typing import Optional, Union, Dict, Any
|
| 11 |
from llama_cpp import Llama
|
| 12 |
from huggingface_hub import login, hf_hub_download
|
| 13 |
import logging
|
|
|
|
| 189 |
logger.error(f"Error loading cv_text.txt: {str(e)}")
|
| 190 |
raise
|
| 191 |
|
| 192 |
+
async def stream_response(query, history, game_context=None, mode: Optional[str] = None):
|
| 193 |
"""Main streaming response function that routes to either Gemini or local model"""
|
| 194 |
if USE_GEMINI:
|
| 195 |
+
async for chunk in stream_response_gemini(query, history, game_context, mode):
|
| 196 |
yield chunk
|
| 197 |
else:
|
| 198 |
+
async for chunk in stream_response_local(query, history, game_context, mode):
|
| 199 |
yield chunk
|
| 200 |
|
| 201 |
+
def _format_game_context_for_prompt(game_context: Optional[Union[str, Dict[str, Any]]]) -> str:
|
| 202 |
+
"""Return a concise text snippet to inject into the system prompt from game context."""
|
| 203 |
+
if not game_context:
|
| 204 |
+
return ""
|
| 205 |
+
try:
|
| 206 |
+
if isinstance(game_context, str):
|
| 207 |
+
return f"\nGAME CONTEXT: The player is currently at a station about {game_context}."
|
| 208 |
+
if isinstance(game_context, dict):
|
| 209 |
+
current = game_context.get('current_station') or game_context.get('station') or ''
|
| 210 |
+
visited = game_context.get('visited_stations') or []
|
| 211 |
+
context = game_context.get('context') or game_context.get('current_context') or ''
|
| 212 |
+
parts = ["\nGAME CONTEXT:"]
|
| 213 |
+
if current:
|
| 214 |
+
parts.append(f"Current station: {current}.")
|
| 215 |
+
if context:
|
| 216 |
+
parts.append(f"Station details: {context}.")
|
| 217 |
+
if visited:
|
| 218 |
+
try:
|
| 219 |
+
uniq = []
|
| 220 |
+
for v in visited:
|
| 221 |
+
if v and v not in uniq:
|
| 222 |
+
uniq.append(v)
|
| 223 |
+
if uniq:
|
| 224 |
+
parts.append(f"Visited stations so far: {', '.join(uniq)}.")
|
| 225 |
+
except Exception:
|
| 226 |
+
pass
|
| 227 |
+
return " ".join(parts)
|
| 228 |
+
except Exception:
|
| 229 |
+
return ""
|
| 230 |
+
|
| 231 |
+
async def stream_response_gemini(query, history, game_context=None, mode: Optional[str] = None):
|
| 232 |
"""Stream response using Gemini API with a proper system_instruction."""
|
| 233 |
logger.info(f"Processing query with Gemini: {query}")
|
| 234 |
start_time = time.time()
|
|
|
|
| 236 |
|
| 237 |
# 1) Build your system prompt once
|
| 238 |
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 239 |
+
game_context_text = _format_game_context_for_prompt(game_context)
|
| 240 |
+
is_game_mode = (mode == 'game') or bool(game_context_text)
|
| 241 |
+
if is_game_mode:
|
| 242 |
+
system_prompt = (
|
| 243 |
+
"You are Tim Luka Horstmann as a friendly in-game 'Station Guide'. "
|
| 244 |
+
"Stay in first person. Prioritize the current station context provided. "
|
| 245 |
+
"Use the CV for personal facts, roles and dates. "
|
| 246 |
+
"For non-personal or general questions (e.g., about cities or institutions), you may use general world knowledge. "
|
| 247 |
+
"Do not invent personal details beyond the CV/FAQs; if specifics are unknown, say so briefly. "
|
| 248 |
+
f"Today's date is {current_date}. CV: {full_cv_text}" + game_context_text
|
| 249 |
+
)
|
| 250 |
+
else:
|
| 251 |
+
system_prompt = (
|
| 252 |
+
"You are Tim Luka Horstmann, a Computer Scientist. "
|
| 253 |
+
"A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
|
| 254 |
+
"For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
|
| 255 |
+
"For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France) "
|
| 256 |
+
"and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. "
|
| 257 |
+
f"Today's date is {current_date}. CV: {full_cv_text}"
|
| 258 |
+
)
|
| 259 |
|
| 260 |
# 2) Build only user/model history as `contents`
|
| 261 |
contents = []
|
|
|
|
| 315 |
yield "data: [DONE]\n\n"
|
| 316 |
|
| 317 |
|
| 318 |
+
async def stream_response_local(query, history, game_context=None, mode: Optional[str] = None):
|
| 319 |
"""Stream response using local model"""
|
| 320 |
logger.info(f"Processing query with local model: {query}")
|
| 321 |
start_time = time.time()
|
|
|
|
| 323 |
|
| 324 |
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 325 |
|
| 326 |
+
game_context_text = _format_game_context_for_prompt(game_context)
|
| 327 |
+
is_game_mode = (mode == 'game') or bool(game_context_text)
|
| 328 |
+
if is_game_mode:
|
| 329 |
+
system_prompt = (
|
| 330 |
+
"/no_think You are Tim Luka Horstmann as a friendly in-game 'Station Guide'. "
|
| 331 |
+
"Stay in first person. Prioritize the current station context provided. "
|
| 332 |
+
"Use the CV for personal facts, roles and dates. "
|
| 333 |
+
"For non-personal or general questions (e.g., about cities or institutions), you may use general world knowledge. "
|
| 334 |
+
"Do not invent personal details beyond the CV/FAQs; if specifics are unknown, say so briefly. "
|
| 335 |
+
f"Today's date is {current_date}. CV: {full_cv_text}" + game_context_text
|
| 336 |
+
)
|
| 337 |
+
else:
|
| 338 |
+
system_prompt = (
|
| 339 |
+
"/no_think You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
|
| 340 |
+
"For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
|
| 341 |
+
"For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
|
| 342 |
+
"and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
|
| 343 |
+
f"Today's date is {current_date}. CV: {full_cv_text}"
|
| 344 |
+
)
|
| 345 |
|
| 346 |
if not isinstance(system_prompt, str):
|
| 347 |
system_prompt = str(system_prompt)
|
|
|
|
| 394 |
class QueryRequest(BaseModel):
|
| 395 |
query: str
|
| 396 |
history: list
|
| 397 |
+
game_context: Optional[Union[str, Dict[str, Any]]] = None
|
| 398 |
+
mode: Optional[str] = None
|
| 399 |
|
| 400 |
class TTSRequest(BaseModel):
|
| 401 |
text: str
|
|
|
|
| 419 |
async def predict(request: Request, query_request: QueryRequest):
|
| 420 |
query = query_request.query
|
| 421 |
history = query_request.history
|
| 422 |
+
game_context = query_request.game_context
|
| 423 |
+
mode = (query_request.mode or '').lower() or None
|
| 424 |
+
return StreamingResponse(stream_response(query, history, game_context, mode), media_type="text/event-stream")
|
| 425 |
|
| 426 |
@app.post("/api/tts")
|
| 427 |
@limiter.limit("5/minute") # Allow 5 TTS requests per minute per IP
|
|
|
|
| 548 |
logger.error(f"Error in periodic warm-up: {str(e)}")
|
| 549 |
|
| 550 |
# Wait for 13 minutes before the next warm-up
|
| 551 |
+
await asyncio.sleep(13 * 60)
|