Tim Luka Horstmann commited on
Commit
a3b349c
·
1 Parent(s): b8961cc

Updated for Game

Browse files
Files changed (1) hide show
  1. app.py +81 -23
app.py CHANGED
@@ -7,6 +7,7 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
7
  from fastapi.responses import StreamingResponse, Response
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from pydantic import BaseModel
 
10
  from llama_cpp import Llama
11
  from huggingface_hub import login, hf_hub_download
12
  import logging
@@ -188,16 +189,46 @@ except Exception as e:
188
  logger.error(f"Error loading cv_text.txt: {str(e)}")
189
  raise
190
 
191
- async def stream_response(query, history):
192
  """Main streaming response function that routes to either Gemini or local model"""
193
  if USE_GEMINI:
194
- async for chunk in stream_response_gemini(query, history):
195
  yield chunk
196
  else:
197
- async for chunk in stream_response_local(query, history):
198
  yield chunk
199
 
200
- async def stream_response_gemini(query, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  """Stream response using Gemini API with a proper system_instruction."""
202
  logger.info(f"Processing query with Gemini: {query}")
203
  start_time = time.time()
@@ -205,14 +236,26 @@ async def stream_response_gemini(query, history):
205
 
206
  # 1) Build your system prompt once
207
  current_date = datetime.now().strftime("%Y-%m-%d")
208
- system_prompt = (
209
- "You are Tim Luka Horstmann, a Computer Scientist. "
210
- "A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
211
- "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
212
- "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France) "
213
- "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. "
214
- f"Today's date is {current_date}. CV: {full_cv_text}"
215
- )
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  # 2) Build only user/model history as `contents`
218
  contents = []
@@ -272,7 +315,7 @@ async def stream_response_gemini(query, history):
272
  yield "data: [DONE]\n\n"
273
 
274
 
275
- async def stream_response_local(query, history):
276
  """Stream response using local model"""
277
  logger.info(f"Processing query with local model: {query}")
278
  start_time = time.time()
@@ -280,14 +323,25 @@ async def stream_response_local(query, history):
280
 
281
  current_date = datetime.now().strftime("%Y-%m-%d")
282
 
283
- system_prompt = (
284
- "/no_think You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
285
- "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
286
- "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
287
- "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
288
- f"Today's date is {current_date}. "
289
- f"CV: {full_cv_text}"
290
- )
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  if not isinstance(system_prompt, str):
293
  system_prompt = str(system_prompt)
@@ -340,6 +394,8 @@ async def stream_response_local(query, history):
340
  class QueryRequest(BaseModel):
341
  query: str
342
  history: list
 
 
343
 
344
  class TTSRequest(BaseModel):
345
  text: str
@@ -363,7 +419,9 @@ def get_ram_usage():
363
  async def predict(request: Request, query_request: QueryRequest):
364
  query = query_request.query
365
  history = query_request.history
366
- return StreamingResponse(stream_response(query, history), media_type="text/event-stream")
 
 
367
 
368
  @app.post("/api/tts")
369
  @limiter.limit("5/minute") # Allow 5 TTS requests per minute per IP
@@ -490,4 +548,4 @@ async def keep_model_warm():
490
  logger.error(f"Error in periodic warm-up: {str(e)}")
491
 
492
  # Wait for 13 minutes before the next warm-up
493
- await asyncio.sleep(13 * 60)
 
7
  from fastapi.responses import StreamingResponse, Response
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from pydantic import BaseModel
10
+ from typing import Optional, Union, Dict, Any
11
  from llama_cpp import Llama
12
  from huggingface_hub import login, hf_hub_download
13
  import logging
 
189
  logger.error(f"Error loading cv_text.txt: {str(e)}")
190
  raise
191
 
192
+ async def stream_response(query, history, game_context=None, mode: Optional[str] = None):
193
  """Main streaming response function that routes to either Gemini or local model"""
194
  if USE_GEMINI:
195
+ async for chunk in stream_response_gemini(query, history, game_context, mode):
196
  yield chunk
197
  else:
198
+ async for chunk in stream_response_local(query, history, game_context, mode):
199
  yield chunk
200
 
201
+ def _format_game_context_for_prompt(game_context: Optional[Union[str, Dict[str, Any]]]) -> str:
202
+ """Return a concise text snippet to inject into the system prompt from game context."""
203
+ if not game_context:
204
+ return ""
205
+ try:
206
+ if isinstance(game_context, str):
207
+ return f"\nGAME CONTEXT: The player is currently at a station about {game_context}."
208
+ if isinstance(game_context, dict):
209
+ current = game_context.get('current_station') or game_context.get('station') or ''
210
+ visited = game_context.get('visited_stations') or []
211
+ context = game_context.get('context') or game_context.get('current_context') or ''
212
+ parts = ["\nGAME CONTEXT:"]
213
+ if current:
214
+ parts.append(f"Current station: {current}.")
215
+ if context:
216
+ parts.append(f"Station details: {context}.")
217
+ if visited:
218
+ try:
219
+ uniq = []
220
+ for v in visited:
221
+ if v and v not in uniq:
222
+ uniq.append(v)
223
+ if uniq:
224
+ parts.append(f"Visited stations so far: {', '.join(uniq)}.")
225
+ except Exception:
226
+ pass
227
+ return " ".join(parts)
228
+ except Exception:
229
+ return ""
230
+
231
+ async def stream_response_gemini(query, history, game_context=None, mode: Optional[str] = None):
232
  """Stream response using Gemini API with a proper system_instruction."""
233
  logger.info(f"Processing query with Gemini: {query}")
234
  start_time = time.time()
 
236
 
237
  # 1) Build your system prompt once
238
  current_date = datetime.now().strftime("%Y-%m-%d")
239
+ game_context_text = _format_game_context_for_prompt(game_context)
240
+ is_game_mode = (mode == 'game') or bool(game_context_text)
241
+ if is_game_mode:
242
+ system_prompt = (
243
+ "You are Tim Luka Horstmann as a friendly in-game 'Station Guide'. "
244
+ "Stay in first person. Prioritize the current station context provided. "
245
+ "Use the CV for personal facts, roles and dates. "
246
+ "For non-personal or general questions (e.g., about cities or institutions), you may use general world knowledge. "
247
+ "Do not invent personal details beyond the CV/FAQs; if specifics are unknown, say so briefly. "
248
+ f"Today's date is {current_date}. CV: {full_cv_text}" + game_context_text
249
+ )
250
+ else:
251
+ system_prompt = (
252
+ "You are Tim Luka Horstmann, a Computer Scientist. "
253
+ "A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
254
+ "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
255
+ "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France) "
256
+ "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. "
257
+ f"Today's date is {current_date}. CV: {full_cv_text}"
258
+ )
259
 
260
  # 2) Build only user/model history as `contents`
261
  contents = []
 
315
  yield "data: [DONE]\n\n"
316
 
317
 
318
+ async def stream_response_local(query, history, game_context=None, mode: Optional[str] = None):
319
  """Stream response using local model"""
320
  logger.info(f"Processing query with local model: {query}")
321
  start_time = time.time()
 
323
 
324
  current_date = datetime.now().strftime("%Y-%m-%d")
325
 
326
+ game_context_text = _format_game_context_for_prompt(game_context)
327
+ is_game_mode = (mode == 'game') or bool(game_context_text)
328
+ if is_game_mode:
329
+ system_prompt = (
330
+ "/no_think You are Tim Luka Horstmann as a friendly in-game 'Station Guide'. "
331
+ "Stay in first person. Prioritize the current station context provided. "
332
+ "Use the CV for personal facts, roles and dates. "
333
+ "For non-personal or general questions (e.g., about cities or institutions), you may use general world knowledge. "
334
+ "Do not invent personal details beyond the CV/FAQs; if specifics are unknown, say so briefly. "
335
+ f"Today's date is {current_date}. CV: {full_cv_text}" + game_context_text
336
+ )
337
+ else:
338
+ system_prompt = (
339
+ "/no_think You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
340
+ "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
341
+ "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
342
+ "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
343
+ f"Today's date is {current_date}. CV: {full_cv_text}"
344
+ )
345
 
346
  if not isinstance(system_prompt, str):
347
  system_prompt = str(system_prompt)
 
394
  class QueryRequest(BaseModel):
395
  query: str
396
  history: list
397
+ game_context: Optional[Union[str, Dict[str, Any]]] = None
398
+ mode: Optional[str] = None
399
 
400
  class TTSRequest(BaseModel):
401
  text: str
 
419
  async def predict(request: Request, query_request: QueryRequest):
420
  query = query_request.query
421
  history = query_request.history
422
+ game_context = query_request.game_context
423
+ mode = (query_request.mode or '').lower() or None
424
+ return StreamingResponse(stream_response(query, history, game_context, mode), media_type="text/event-stream")
425
 
426
  @app.post("/api/tts")
427
  @limiter.limit("5/minute") # Allow 5 TTS requests per minute per IP
 
548
  logger.error(f"Error in periodic warm-up: {str(e)}")
549
 
550
  # Wait for 13 minutes before the next warm-up
551
+ await asyncio.sleep(13 * 60)