IPF commited on
Commit
0a122af
·
verified ·
1 Parent(s): e5af20d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -792,7 +792,8 @@ def render_user_message(question: str) -> str:
792
  # Remote API Generation (via vLLM-compatible endpoint)
793
  # ============================================================
794
 
795
- def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
 
796
  """Generate response using vLLM OpenAI-compatible API."""
797
  # Use /completions endpoint for raw prompt
798
  url = f"{REMOTE_API_BASE}/completions"
@@ -809,7 +810,9 @@ def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
809
  "stop": ["\n<tool_response>", "<tool_response>"],
810
  }
811
 
812
- response = httpx.post(url, json=payload, headers=headers, timeout=300.0)
 
 
813
  if response.status_code != 200:
814
  raise Exception(f"vLLM API error {response.status_code}: {response.text}")
815
 
@@ -820,7 +823,7 @@ def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
820
  # ============================================================
821
  # Streaming Agent Runner
822
  # ============================================================
823
- def run_agent_streaming(
824
  question: str,
825
  serper_key: str,
826
  max_rounds: int
@@ -884,7 +887,7 @@ def run_agent_streaming(
884
  yield ''.join(html_parts)
885
 
886
  # Call ZeroGPU function
887
- generated = generate_response(prompt, max_new_tokens=MAX_NEW_TOKENS)
888
 
889
  # Remove placeholder
890
  html_parts.pop()
@@ -944,9 +947,9 @@ def run_agent_streaming(
944
  result = ""
945
  try:
946
  if actual_fn == "search":
947
- result = asyncio.run(browser.search(args.get("query", ""), args.get("topn", 10)))
948
  elif actual_fn == "open":
949
- result = asyncio.run(browser.open(**args))
950
  elif actual_fn == "find":
951
  result = browser.find(args.get("pattern", ""), args.get("cursor", -1))
952
  else:
@@ -2510,7 +2513,7 @@ def create_interface():
2510
  clear_btn = gr.Button("🗑 Clear", scale=1)
2511
 
2512
  # Function to hide welcome and show output
2513
- def start_research(question, serper_key, max_rounds):
2514
  # Generator that first hides welcome, then streams results
2515
  # Also clears the input box for the next question
2516
 
@@ -2518,7 +2521,7 @@ def create_interface():
2518
  # IMPORTANT: Don't use empty string for output, or JS will hide the output area!
2519
  yield "", '<div style="text-align: center; padding: 2rem; color: #6b7280;">Delving into it...</div>', ""
2520
 
2521
- for result in run_agent_streaming(question, serper_key, max_rounds):
2522
  yield "", result, ""
2523
 
2524
  # Event handlers
 
792
  # Remote API Generation (via vLLM-compatible endpoint)
793
  # ============================================================
794
 
795
+
796
+ async def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
797
  """Generate response using vLLM OpenAI-compatible API."""
798
  # Use /completions endpoint for raw prompt
799
  url = f"{REMOTE_API_BASE}/completions"
 
810
  "stop": ["\n<tool_response>", "<tool_response>"],
811
  }
812
 
813
+ async with httpx.AsyncClient() as client:
814
+ response = await client.post(url, json=payload, headers=headers, timeout=300.0)
815
+
816
  if response.status_code != 200:
817
  raise Exception(f"vLLM API error {response.status_code}: {response.text}")
818
 
 
823
  # ============================================================
824
  # Streaming Agent Runner
825
  # ============================================================
826
+ async def run_agent_streaming(
827
  question: str,
828
  serper_key: str,
829
  max_rounds: int
 
887
  yield ''.join(html_parts)
888
 
889
  # Call ZeroGPU function
890
+ generated = await generate_response(prompt, max_new_tokens=MAX_NEW_TOKENS)
891
 
892
  # Remove placeholder
893
  html_parts.pop()
 
947
  result = ""
948
  try:
949
  if actual_fn == "search":
950
+ result = await browser.search(args.get("query", ""), args.get("topn", 10))
951
  elif actual_fn == "open":
952
+ result = await browser.open(**args)
953
  elif actual_fn == "find":
954
  result = browser.find(args.get("pattern", ""), args.get("cursor", -1))
955
  else:
 
2513
  clear_btn = gr.Button("🗑 Clear", scale=1)
2514
 
2515
  # Function to hide welcome and show output
2516
+ async def start_research(question, serper_key, max_rounds):
2517
  # Generator that first hides welcome, then streams results
2518
  # Also clears the input box for the next question
2519
 
 
2521
  # IMPORTANT: Don't use empty string for output, or JS will hide the output area!
2522
  yield "", '<div style="text-align: center; padding: 2rem; color: #6b7280;">Delving into it...</div>', ""
2523
 
2524
+ async for result in run_agent_streaming(question, serper_key, max_rounds):
2525
  yield "", result, ""
2526
 
2527
  # Event handlers