Spaces:

BiasLab2025
/

Demo-2025

Sleeping

App Files Files Community

zye0616 commited on 6 days ago

Commit

51780f2

1 Parent(s): 05e7070

fix:frontend video display

Browse files

Files changed (4) hide show

README.md +4 -5
app.py +50 -8
demo.html +35 -12
inference.py +15 -5

README.md CHANGED Viewed

@@ -12,8 +12,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 ## Mission-guided detections
-1. Send a `POST /process_video` request with fields `video` (file) and `prompt` (mission text). Optionally include `detector` (`owlv2` or `hf_yolov8`) to pick the backend per request; if omitted the server uses its default/`OBJECT_DETECTOR` setting.
-2. The backend feeds the mission text into an OpenAI (`gpt-4o-mini`) reasoning step that scores and ranks every YOLO/COCO class. Place your API key inside `.env` as either `OPENAI_API_KEY=...` or `OpenAI-API: ...`; the server loads it automatically on startup.
-3. The top scored classes become the textual queries for the existing OWLv2 detector so the detections align with the mission.
-4. After object detection finishes, another OpenAI call ingests the detection log plus the first/middle/last frame context and produces a natural-language summary of the mission outcome.
-5. The HTTP response still streams the processed video, and it now embeds the structured mission plan (`x-mission-plan`) and text summary (`x-mission-summary`) in the headers.

 ## Mission-guided detections
+1. Call `POST /process_video` with fields `video` (file), `prompt` (mission text), and optional `detector` (`owlv2` or `hf_yolov8`). The response is an MP4 stream containing the annotated frames.
+2. Call `POST /mission_summary` with the same fields to receive JSON containing the structured mission plan plus the natural-language summary. This second endpoint isolates the OpenAI call, keeping the video response clean.
+3. Under the hood the mission text still feeds into the OpenAI (`gpt-4o-mini`) reasoning step that ranks the YOLO/COCO classes. Place your API key inside `.env` as either `OPENAI_API_KEY=...` or `OpenAI-API: ...`; the server loads it automatically on startup.
+4. The top scored classes drive OWLv2 or YOLOv8 to align detections with the mission, and the detection log is summarized via another OpenAI call when requested.

app.py CHANGED Viewed

@@ -50,6 +50,13 @@ def _schedule_cleanup(background_tasks: BackgroundTasks, path: str) -> None:
     background_tasks.add_task(_cleanup)
 @app.post("/process_video")
 async def process_video(
     background_tasks: BackgroundTasks,
@@ -57,10 +64,7 @@ async def process_video(
     prompt: str = Form(...),
     detector: Optional[str] = Form(None),
 ):
-    if video is None:
-        raise HTTPException(status_code=400, detail="Video file is required.")
-    if not prompt:
-        raise HTTPException(status_code=400, detail="Prompt is required.")
     try:
         input_path = _save_upload_to_tmp(video)
@@ -74,12 +78,13 @@ async def process_video(
     os.close(fd)
     try:
-        output_path, mission_plan, mission_summary = run_inference(
             input_path,
             output_path,
             prompt,
             max_frames=10,
             detector_name=detector,
         )
     except ValueError as exc:
         logging.exception("Video decoding failed.")
@@ -100,11 +105,49 @@ async def process_video(
         media_type="video/mp4",
         filename="processed.mp4",
     )
-    response.headers["x-mission-plan"] = mission_plan.to_json()
-    response.headers["x-mission-summary"] = mission_summary.replace("\n", " ").strip()
     return response
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 @app.get("/", response_class=HTMLResponse)
@@ -114,4 +157,3 @@ async def demo_page() -> str:
         return demo_path.read_text(encoding="utf-8")
     except FileNotFoundError:
         return "<h1>Demo page missing</h1>"

     background_tasks.add_task(_cleanup)
+def _validate_inputs(video: UploadFile | None, prompt: str | None) -> None:
+    if video is None:
+        raise HTTPException(status_code=400, detail="Video file is required.")
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Prompt is required.")
 @app.post("/process_video")
 async def process_video(
     background_tasks: BackgroundTasks,
     prompt: str = Form(...),
     detector: Optional[str] = Form(None),
 ):
+    _validate_inputs(video, prompt)
     try:
         input_path = _save_upload_to_tmp(video)
     os.close(fd)
     try:
+        output_path, _, _ = run_inference(
             input_path,
             output_path,
             prompt,
             max_frames=10,
             detector_name=detector,
+            generate_summary=False,
         )
     except ValueError as exc:
         logging.exception("Video decoding failed.")
         media_type="video/mp4",
         filename="processed.mp4",
     )
     return response
+@app.post("/mission_summary")
+async def mission_summary(
+    video: UploadFile = File(...),
+    prompt: str = Form(...),
+    detector: Optional[str] = Form(None),
+):
+    _validate_inputs(video, prompt)
+    try:
+        input_path = _save_upload_to_tmp(video)
+    except Exception:
+        logging.exception("Failed to save uploaded file.")
+        raise HTTPException(status_code=500, detail="Failed to save uploaded video.")
+    finally:
+        await video.close()
+    try:
+        _, mission_plan, mission_summary = run_inference(
+            input_path,
+            output_video_path=None,
+            mission_prompt=prompt,
+            max_frames=10,
+            detector_name=detector,
+            write_output_video=False,
+            generate_summary=True,
+        )
+    except ValueError as exc:
+        logging.exception("Video decoding failed.")
+        _safe_delete(input_path)
+        raise HTTPException(status_code=500, detail=str(exc))
+    except Exception as exc:
+        logging.exception("Summary generation failed.")
+        _safe_delete(input_path)
+        return JSONResponse(status_code=500, content={"error": str(exc)})
+    _safe_delete(input_path)
+    return {
+        "mission_plan": mission_plan.to_dict(),
+        "mission_summary": mission_summary or "",
+    }
 if __name__ == "__main__":
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 @app.get("/", response_class=HTMLResponse)
         return demo_path.read_text(encoding="utf-8")
     except FileNotFoundError:
         return "<h1>Demo page missing</h1>"

demo.html CHANGED Viewed

@@ -140,7 +140,9 @@ button:hover {
 </div>
 <script>
-const PROCESS_VIDEO_URL = "https://biaslab2025-demo-2025.hf.space/process_video";
 async function executeMission() {
@@ -155,18 +157,18 @@ async function executeMission() {
         return;
     }
-    const formData = new FormData();
-    formData.append("video", videoFile);
-    formData.append("prompt", mission);
-    formData.append("detector", detector);
-    statusEl.textContent = "Dispatching mission to backend...";
-    summaryEl.textContent = "(Processing...)";
     try {
         const response = await fetch(PROCESS_VIDEO_URL, {
             method: "POST",
-            body: formData
         });
         if (!response.ok) {
@@ -180,14 +182,35 @@ async function executeMission() {
             throw new Error(errorDetail);
         }
-        const missionSummary = response.headers.get("x-mission-summary") || "No summary returned.";
-        summaryEl.textContent = missionSummary;
         const videoBlob = await response.blob();
         const videoUrl = URL.createObjectURL(videoBlob);
         const videoEl = document.getElementById("processedVideo");
         videoEl.src = videoUrl;
         videoEl.load();
         statusEl.textContent = "Mission complete.";
     } catch (err) {
         console.error(err);

 </div>
 <script>
+const API_BASE_URL = "https://biaslab2025-demo-2025.hf.space";
+const PROCESS_VIDEO_URL = `${API_BASE_URL}/process_video`;
+const SUMMARY_URL = `${API_BASE_URL}/mission_summary`;
 async function executeMission() {
         return;
     }
+    statusEl.textContent = "Processing video...";
+    summaryEl.textContent = "(Awaiting summary...)";
     try {
+        const videoForm = new FormData();
+        videoForm.append("video", videoFile);
+        videoForm.append("prompt", mission);
+        videoForm.append("detector", detector);
         const response = await fetch(PROCESS_VIDEO_URL, {
             method: "POST",
+            body: videoForm
         });
         if (!response.ok) {
             throw new Error(errorDetail);
         }
         const videoBlob = await response.blob();
         const videoUrl = URL.createObjectURL(videoBlob);
         const videoEl = document.getElementById("processedVideo");
         videoEl.src = videoUrl;
         videoEl.load();
+        statusEl.textContent = "Generating summary...";
+        const summaryForm = new FormData();
+        summaryForm.append("video", videoFile);
+        summaryForm.append("prompt", mission);
+        summaryForm.append("detector", detector);
+        const summaryResponse = await fetch(SUMMARY_URL, {
+            method: "POST",
+            body: summaryForm
+        });
+        if (!summaryResponse.ok) {
+            let errorDetail = `Summary failed (${summaryResponse.status})`;
+            try {
+                const errJson = await summaryResponse.json();
+                errorDetail = errJson.error || errorDetail;
+            } catch (_) {}
+            throw new Error(errorDetail);
+        }
+        const summaryJson = await summaryResponse.json();
+        const summaryText = summaryJson.mission_summary || "No summary returned.";
+        summaryEl.textContent = summaryText;
         statusEl.textContent = "Mission complete.";
     } catch (err) {
         console.error(err);

inference.py CHANGED Viewed

@@ -66,11 +66,13 @@ def infer_frame(
 def run_inference(
     input_video_path: str,
-    output_video_path: str,
     mission_prompt: str,
     max_frames: Optional[int] = None,
     detector_name: Optional[str] = None,
-) -> Tuple[str, MissionPlan, str]:
     try:
         frames, fps, width, height = extract_frames(input_video_path)
     except ValueError as exc:
@@ -91,6 +93,14 @@ def run_inference(
         detection_log.append({"frame_index": idx, "detections": detections})
         processed_frames.append(processed_frame)
-    write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
-    mission_summary = summarize_results(mission_prompt, mission_plan, detection_log)
-    return output_video_path, mission_plan, mission_summary

 def run_inference(
     input_video_path: str,
+    output_video_path: Optional[str],
     mission_prompt: str,
     max_frames: Optional[int] = None,
     detector_name: Optional[str] = None,
+    write_output_video: bool = True,
+    generate_summary: bool = True,
+) -> Tuple[Optional[str], MissionPlan, Optional[str]]:
     try:
         frames, fps, width, height = extract_frames(input_video_path)
     except ValueError as exc:
         detection_log.append({"frame_index": idx, "detections": detections})
         processed_frames.append(processed_frame)
+    if write_output_video:
+        if not output_video_path:
+            raise ValueError("output_video_path is required when write_output_video=True.")
+        write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
+        video_path_result: Optional[str] = output_video_path
+    else:
+        video_path_result = None
+    mission_summary = (
+        summarize_results(mission_prompt, mission_plan, detection_log) if generate_summary else None
+    )
+    return video_path_result, mission_plan, mission_summary