Alovestocode commited on
Commit
d751a09
·
verified ·
1 Parent(s): 689867f

Fix: Use pure ASGI middleware to avoid Content-Length issues

Browse files
Files changed (1) hide show
  1. app.py +5 -97
app.py CHANGED
@@ -517,106 +517,14 @@ with gr.Blocks(
517
  fn=lambda: ("", ""),
518
  outputs=[prompt_input, output],
519
  )
520
-
521
- # Mount FastAPI routes using middleware to intercept before Gradio processes them
522
- # This must be done inside the Blocks context
523
- def mount_fastapi_routes():
524
- """Mount FastAPI routes using middleware to ensure they're processed first."""
525
- try:
526
- from fastapi.responses import JSONResponse
527
- from starlette.requests import Request
528
- from starlette.responses import Response
529
-
530
- # Define route handlers
531
- async def health_handler(request: Request) -> Response:
532
- """Handle GET /health requests."""
533
- return JSONResponse(content={
534
- "status": "ok",
535
- "model": MODEL_ID,
536
- "strategy": ACTIVE_STRATEGY or "pending",
537
- })
538
-
539
- async def generate_handler(request: Request) -> Response:
540
- """Handle POST /v1/generate requests."""
541
- try:
542
- data = await request.json()
543
- payload = GeneratePayload(**data)
544
- text = _generate_with_gpu(
545
- prompt=payload.prompt,
546
- max_new_tokens=payload.max_new_tokens or MAX_NEW_TOKENS,
547
- temperature=payload.temperature or DEFAULT_TEMPERATURE,
548
- top_p=payload.top_p or DEFAULT_TOP_P,
549
- )
550
- return JSONResponse(content={"text": text})
551
- except Exception as exc:
552
- from fastapi import HTTPException
553
- raise HTTPException(status_code=500, detail=str(exc))
554
-
555
- async def console_handler(request: Request) -> Response:
556
- """Handle GET /console requests."""
557
- return HTMLResponse(interactive_ui())
558
-
559
- # Create pure ASGI middleware to intercept routes before Gradio
560
- # This avoids Content-Length issues with BaseHTTPMiddleware
561
- class FastAPIRouteMiddleware:
562
- def __init__(self, app):
563
- self.app = app
564
-
565
- async def __call__(self, scope, receive, send):
566
- if scope["type"] == "http":
567
- path = scope["path"]
568
- method = scope["method"]
569
-
570
- # Handle our custom routes
571
- if path == "/health" and method == "GET":
572
- request = Request(scope, receive)
573
- response = await health_handler(request)
574
- await response(scope, receive, send)
575
- return
576
- elif path == "/v1/generate" and method == "POST":
577
- request = Request(scope, receive)
578
- response = await generate_handler(request)
579
- await response(scope, receive, send)
580
- return
581
- elif path == "/console" and method == "GET":
582
- request = Request(scope, receive)
583
- response = await console_handler(request)
584
- await response(scope, receive, send)
585
- return
586
-
587
- # Let other requests pass through to Gradio
588
- await self.app(scope, receive, send)
589
-
590
- # Wrap Gradio's app with our pure ASGI middleware
591
- try:
592
- # Store original app and wrap it with our middleware
593
- original_app = gradio_app.app
594
- gradio_app.app = FastAPIRouteMiddleware(original_app)
595
- print("FastAPI routes mounted successfully via pure ASGI middleware")
596
- # Pure ASGI middleware avoids Content-Length issues with BaseHTTPMiddleware
597
- except Exception as middleware_error:
598
- # Fallback: try to add routes directly to router
599
- print(f"Middleware approach failed: {middleware_error}, trying direct route addition...")
600
- from starlette.routing import Route
601
- gradio_app.app.router.routes.insert(0, Route("/health", health_handler, methods=["GET"]))
602
- gradio_app.app.router.routes.insert(0, Route("/v1/generate", generate_handler, methods=["POST"]))
603
- gradio_app.app.router.routes.insert(0, Route("/console", console_handler, methods=["GET"]))
604
- print("FastAPI routes added directly to router")
605
- except Exception as e:
606
- print(f"Warning: Could not mount FastAPI routes: {e}")
607
- import traceback
608
- traceback.print_exc()
609
-
610
- # Mount routes when Gradio app loads (must be inside Blocks context)
611
- gradio_app.load(mount_fastapi_routes)
612
 
613
  # Enable queued execution so ZeroGPU can schedule GPU work reliably
614
  gradio_app.queue(max_size=8)
615
 
616
- # Set app to Gradio for Spaces compatibility (sdk: gradio requires Gradio app)
617
- # Spaces will handle running the server automatically
618
- app = gradio_app
619
 
620
  if __name__ == "__main__": # pragma: no cover
621
- # For local testing only - Spaces handles server startup
622
- app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
 
517
  fn=lambda: ("", ""),
518
  outputs=[prompt_input, output],
519
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
  # Enable queued execution so ZeroGPU can schedule GPU work reliably
522
  gradio_app.queue(max_size=8)
523
 
524
+ # Mount the Gradio UI on the FastAPI app so both live side-by-side
525
+ app = gr.mount_gradio_app(fastapi_app, gradio_app, path="/gradio")
 
526
 
527
  if __name__ == "__main__": # pragma: no cover
528
+ import uvicorn
529
+
530
+ uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))