Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -792,7 +792,8 @@ def render_user_message(question: str) -> str:
|
|
| 792 |
# Remote API Generation (via vLLM-compatible endpoint)
|
| 793 |
# ============================================================
|
| 794 |
|
| 795 |
-
|
|
|
|
| 796 |
"""Generate response using vLLM OpenAI-compatible API."""
|
| 797 |
# Use /completions endpoint for raw prompt
|
| 798 |
url = f"{REMOTE_API_BASE}/completions"
|
|
@@ -809,7 +810,9 @@ def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
|
|
| 809 |
"stop": ["\n<tool_response>", "<tool_response>"],
|
| 810 |
}
|
| 811 |
|
| 812 |
-
|
|
|
|
|
|
|
| 813 |
if response.status_code != 200:
|
| 814 |
raise Exception(f"vLLM API error {response.status_code}: {response.text}")
|
| 815 |
|
|
@@ -820,7 +823,7 @@ def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
|
|
| 820 |
# ============================================================
|
| 821 |
# Streaming Agent Runner
|
| 822 |
# ============================================================
|
| 823 |
-
def run_agent_streaming(
|
| 824 |
question: str,
|
| 825 |
serper_key: str,
|
| 826 |
max_rounds: int
|
|
@@ -884,7 +887,7 @@ def run_agent_streaming(
|
|
| 884 |
yield ''.join(html_parts)
|
| 885 |
|
| 886 |
# Call ZeroGPU function
|
| 887 |
-
generated = generate_response(prompt, max_new_tokens=MAX_NEW_TOKENS)
|
| 888 |
|
| 889 |
# Remove placeholder
|
| 890 |
html_parts.pop()
|
|
@@ -944,9 +947,9 @@ def run_agent_streaming(
|
|
| 944 |
result = ""
|
| 945 |
try:
|
| 946 |
if actual_fn == "search":
|
| 947 |
-
result =
|
| 948 |
elif actual_fn == "open":
|
| 949 |
-
result =
|
| 950 |
elif actual_fn == "find":
|
| 951 |
result = browser.find(args.get("pattern", ""), args.get("cursor", -1))
|
| 952 |
else:
|
|
@@ -2510,7 +2513,7 @@ def create_interface():
|
|
| 2510 |
clear_btn = gr.Button("🗑 Clear", scale=1)
|
| 2511 |
|
| 2512 |
# Function to hide welcome and show output
|
| 2513 |
-
def start_research(question, serper_key, max_rounds):
|
| 2514 |
# Generator that first hides welcome, then streams results
|
| 2515 |
# Also clears the input box for the next question
|
| 2516 |
|
|
@@ -2518,7 +2521,7 @@ def create_interface():
|
|
| 2518 |
# IMPORTANT: Don't use empty string for output, or JS will hide the output area!
|
| 2519 |
yield "", '<div style="text-align: center; padding: 2rem; color: #6b7280;">Delving into it...</div>', ""
|
| 2520 |
|
| 2521 |
-
for result in run_agent_streaming(question, serper_key, max_rounds):
|
| 2522 |
yield "", result, ""
|
| 2523 |
|
| 2524 |
# Event handlers
|
|
|
|
| 792 |
# Remote API Generation (via vLLM-compatible endpoint)
|
| 793 |
# ============================================================
|
| 794 |
|
| 795 |
+
|
| 796 |
+
async def generate_response(prompt: str, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
|
| 797 |
"""Generate response using vLLM OpenAI-compatible API."""
|
| 798 |
# Use /completions endpoint for raw prompt
|
| 799 |
url = f"{REMOTE_API_BASE}/completions"
|
|
|
|
| 810 |
"stop": ["\n<tool_response>", "<tool_response>"],
|
| 811 |
}
|
| 812 |
|
| 813 |
+
async with httpx.AsyncClient() as client:
|
| 814 |
+
response = await client.post(url, json=payload, headers=headers, timeout=300.0)
|
| 815 |
+
|
| 816 |
if response.status_code != 200:
|
| 817 |
raise Exception(f"vLLM API error {response.status_code}: {response.text}")
|
| 818 |
|
|
|
|
| 823 |
# ============================================================
|
| 824 |
# Streaming Agent Runner
|
| 825 |
# ============================================================
|
| 826 |
+
async def run_agent_streaming(
|
| 827 |
question: str,
|
| 828 |
serper_key: str,
|
| 829 |
max_rounds: int
|
|
|
|
| 887 |
yield ''.join(html_parts)
|
| 888 |
|
| 889 |
# Call ZeroGPU function
|
| 890 |
+
generated = await generate_response(prompt, max_new_tokens=MAX_NEW_TOKENS)
|
| 891 |
|
| 892 |
# Remove placeholder
|
| 893 |
html_parts.pop()
|
|
|
|
| 947 |
result = ""
|
| 948 |
try:
|
| 949 |
if actual_fn == "search":
|
| 950 |
+
result = await browser.search(args.get("query", ""), args.get("topn", 10))
|
| 951 |
elif actual_fn == "open":
|
| 952 |
+
result = await browser.open(**args)
|
| 953 |
elif actual_fn == "find":
|
| 954 |
result = browser.find(args.get("pattern", ""), args.get("cursor", -1))
|
| 955 |
else:
|
|
|
|
| 2513 |
clear_btn = gr.Button("🗑 Clear", scale=1)
|
| 2514 |
|
| 2515 |
# Function to hide welcome and show output
|
| 2516 |
+
async def start_research(question, serper_key, max_rounds):
|
| 2517 |
# Generator that first hides welcome, then streams results
|
| 2518 |
# Also clears the input box for the next question
|
| 2519 |
|
|
|
|
| 2521 |
# IMPORTANT: Don't use empty string for output, or JS will hide the output area!
|
| 2522 |
yield "", '<div style="text-align: center; padding: 2rem; color: #6b7280;">Delving into it...</div>', ""
|
| 2523 |
|
| 2524 |
+
async for result in run_agent_streaming(question, serper_key, max_rounds):
|
| 2525 |
yield "", result, ""
|
| 2526 |
|
| 2527 |
# Event handlers
|