Spaces:
Running
on
Zero
Running
on
Zero
Fix: Use pure ASGI middleware to avoid Content-Length issues
Browse files
app.py
CHANGED
|
@@ -517,106 +517,14 @@ with gr.Blocks(
|
|
| 517 |
fn=lambda: ("", ""),
|
| 518 |
outputs=[prompt_input, output],
|
| 519 |
)
|
| 520 |
-
|
| 521 |
-
# Mount FastAPI routes using middleware to intercept before Gradio processes them
|
| 522 |
-
# This must be done inside the Blocks context
|
| 523 |
-
def mount_fastapi_routes():
|
| 524 |
-
"""Mount FastAPI routes using middleware to ensure they're processed first."""
|
| 525 |
-
try:
|
| 526 |
-
from fastapi.responses import JSONResponse
|
| 527 |
-
from starlette.requests import Request
|
| 528 |
-
from starlette.responses import Response
|
| 529 |
-
|
| 530 |
-
# Define route handlers
|
| 531 |
-
async def health_handler(request: Request) -> Response:
|
| 532 |
-
"""Handle GET /health requests."""
|
| 533 |
-
return JSONResponse(content={
|
| 534 |
-
"status": "ok",
|
| 535 |
-
"model": MODEL_ID,
|
| 536 |
-
"strategy": ACTIVE_STRATEGY or "pending",
|
| 537 |
-
})
|
| 538 |
-
|
| 539 |
-
async def generate_handler(request: Request) -> Response:
|
| 540 |
-
"""Handle POST /v1/generate requests."""
|
| 541 |
-
try:
|
| 542 |
-
data = await request.json()
|
| 543 |
-
payload = GeneratePayload(**data)
|
| 544 |
-
text = _generate_with_gpu(
|
| 545 |
-
prompt=payload.prompt,
|
| 546 |
-
max_new_tokens=payload.max_new_tokens or MAX_NEW_TOKENS,
|
| 547 |
-
temperature=payload.temperature or DEFAULT_TEMPERATURE,
|
| 548 |
-
top_p=payload.top_p or DEFAULT_TOP_P,
|
| 549 |
-
)
|
| 550 |
-
return JSONResponse(content={"text": text})
|
| 551 |
-
except Exception as exc:
|
| 552 |
-
from fastapi import HTTPException
|
| 553 |
-
raise HTTPException(status_code=500, detail=str(exc))
|
| 554 |
-
|
| 555 |
-
async def console_handler(request: Request) -> Response:
|
| 556 |
-
"""Handle GET /console requests."""
|
| 557 |
-
return HTMLResponse(interactive_ui())
|
| 558 |
-
|
| 559 |
-
# Create pure ASGI middleware to intercept routes before Gradio
|
| 560 |
-
# This avoids Content-Length issues with BaseHTTPMiddleware
|
| 561 |
-
class FastAPIRouteMiddleware:
|
| 562 |
-
def __init__(self, app):
|
| 563 |
-
self.app = app
|
| 564 |
-
|
| 565 |
-
async def __call__(self, scope, receive, send):
|
| 566 |
-
if scope["type"] == "http":
|
| 567 |
-
path = scope["path"]
|
| 568 |
-
method = scope["method"]
|
| 569 |
-
|
| 570 |
-
# Handle our custom routes
|
| 571 |
-
if path == "/health" and method == "GET":
|
| 572 |
-
request = Request(scope, receive)
|
| 573 |
-
response = await health_handler(request)
|
| 574 |
-
await response(scope, receive, send)
|
| 575 |
-
return
|
| 576 |
-
elif path == "/v1/generate" and method == "POST":
|
| 577 |
-
request = Request(scope, receive)
|
| 578 |
-
response = await generate_handler(request)
|
| 579 |
-
await response(scope, receive, send)
|
| 580 |
-
return
|
| 581 |
-
elif path == "/console" and method == "GET":
|
| 582 |
-
request = Request(scope, receive)
|
| 583 |
-
response = await console_handler(request)
|
| 584 |
-
await response(scope, receive, send)
|
| 585 |
-
return
|
| 586 |
-
|
| 587 |
-
# Let other requests pass through to Gradio
|
| 588 |
-
await self.app(scope, receive, send)
|
| 589 |
-
|
| 590 |
-
# Wrap Gradio's app with our pure ASGI middleware
|
| 591 |
-
try:
|
| 592 |
-
# Store original app and wrap it with our middleware
|
| 593 |
-
original_app = gradio_app.app
|
| 594 |
-
gradio_app.app = FastAPIRouteMiddleware(original_app)
|
| 595 |
-
print("FastAPI routes mounted successfully via pure ASGI middleware")
|
| 596 |
-
# Pure ASGI middleware avoids Content-Length issues with BaseHTTPMiddleware
|
| 597 |
-
except Exception as middleware_error:
|
| 598 |
-
# Fallback: try to add routes directly to router
|
| 599 |
-
print(f"Middleware approach failed: {middleware_error}, trying direct route addition...")
|
| 600 |
-
from starlette.routing import Route
|
| 601 |
-
gradio_app.app.router.routes.insert(0, Route("/health", health_handler, methods=["GET"]))
|
| 602 |
-
gradio_app.app.router.routes.insert(0, Route("/v1/generate", generate_handler, methods=["POST"]))
|
| 603 |
-
gradio_app.app.router.routes.insert(0, Route("/console", console_handler, methods=["GET"]))
|
| 604 |
-
print("FastAPI routes added directly to router")
|
| 605 |
-
except Exception as e:
|
| 606 |
-
print(f"Warning: Could not mount FastAPI routes: {e}")
|
| 607 |
-
import traceback
|
| 608 |
-
traceback.print_exc()
|
| 609 |
-
|
| 610 |
-
# Mount routes when Gradio app loads (must be inside Blocks context)
|
| 611 |
-
gradio_app.load(mount_fastapi_routes)
|
| 612 |
|
| 613 |
# Enable queued execution so ZeroGPU can schedule GPU work reliably
|
| 614 |
gradio_app.queue(max_size=8)
|
| 615 |
|
| 616 |
-
#
|
| 617 |
-
|
| 618 |
-
app = gradio_app
|
| 619 |
|
| 620 |
if __name__ == "__main__": # pragma: no cover
|
| 621 |
-
|
| 622 |
-
|
|
|
|
|
|
| 517 |
fn=lambda: ("", ""),
|
| 518 |
outputs=[prompt_input, output],
|
| 519 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
|
| 521 |
# Enable queued execution so ZeroGPU can schedule GPU work reliably
|
| 522 |
gradio_app.queue(max_size=8)
|
| 523 |
|
| 524 |
+
# Mount the Gradio UI on the FastAPI app so both live side-by-side
|
| 525 |
+
app = gr.mount_gradio_app(fastapi_app, gradio_app, path="/gradio")
|
|
|
|
| 526 |
|
| 527 |
if __name__ == "__main__": # pragma: no cover
|
| 528 |
+
import uvicorn
|
| 529 |
+
|
| 530 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
|