Spaces:
Sleeping
Sleeping
fix:frontend video display
Browse files
README.md
CHANGED
|
@@ -12,8 +12,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 12 |
|
| 13 |
## Mission-guided detections
|
| 14 |
|
| 15 |
-
1.
|
| 16 |
-
2.
|
| 17 |
-
3.
|
| 18 |
-
4.
|
| 19 |
-
5. The HTTP response still streams the processed video, and it now embeds the structured mission plan (`x-mission-plan`) and text summary (`x-mission-summary`) in the headers.
|
|
|
|
| 12 |
|
| 13 |
## Mission-guided detections
|
| 14 |
|
| 15 |
+
1. Call `POST /process_video` with fields `video` (file), `prompt` (mission text), and optional `detector` (`owlv2` or `hf_yolov8`). The response is an MP4 stream containing the annotated frames.
|
| 16 |
+
2. Call `POST /mission_summary` with the same fields to receive JSON containing the structured mission plan plus the natural-language summary. This second endpoint isolates the OpenAI call, keeping the video response clean.
|
| 17 |
+
3. Under the hood the mission text still feeds into the OpenAI (`gpt-4o-mini`) reasoning step that ranks the YOLO/COCO classes. Place your API key inside `.env` as either `OPENAI_API_KEY=...` or `OpenAI-API: ...`; the server loads it automatically on startup.
|
| 18 |
+
4. The top scored classes drive OWLv2 or YOLOv8 to align detections with the mission, and the detection log is summarized via another OpenAI call when requested.
|
|
|
app.py
CHANGED
|
@@ -50,6 +50,13 @@ def _schedule_cleanup(background_tasks: BackgroundTasks, path: str) -> None:
|
|
| 50 |
background_tasks.add_task(_cleanup)
|
| 51 |
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
@app.post("/process_video")
|
| 54 |
async def process_video(
|
| 55 |
background_tasks: BackgroundTasks,
|
|
@@ -57,10 +64,7 @@ async def process_video(
|
|
| 57 |
prompt: str = Form(...),
|
| 58 |
detector: Optional[str] = Form(None),
|
| 59 |
):
|
| 60 |
-
|
| 61 |
-
raise HTTPException(status_code=400, detail="Video file is required.")
|
| 62 |
-
if not prompt:
|
| 63 |
-
raise HTTPException(status_code=400, detail="Prompt is required.")
|
| 64 |
|
| 65 |
try:
|
| 66 |
input_path = _save_upload_to_tmp(video)
|
|
@@ -74,12 +78,13 @@ async def process_video(
|
|
| 74 |
os.close(fd)
|
| 75 |
|
| 76 |
try:
|
| 77 |
-
output_path,
|
| 78 |
input_path,
|
| 79 |
output_path,
|
| 80 |
prompt,
|
| 81 |
max_frames=10,
|
| 82 |
detector_name=detector,
|
|
|
|
| 83 |
)
|
| 84 |
except ValueError as exc:
|
| 85 |
logging.exception("Video decoding failed.")
|
|
@@ -100,11 +105,49 @@ async def process_video(
|
|
| 100 |
media_type="video/mp4",
|
| 101 |
filename="processed.mp4",
|
| 102 |
)
|
| 103 |
-
response.headers["x-mission-plan"] = mission_plan.to_json()
|
| 104 |
-
response.headers["x-mission-summary"] = mission_summary.replace("\n", " ").strip()
|
| 105 |
return response
|
| 106 |
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
if __name__ == "__main__":
|
| 109 |
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
|
| 110 |
@app.get("/", response_class=HTMLResponse)
|
|
@@ -114,4 +157,3 @@ async def demo_page() -> str:
|
|
| 114 |
return demo_path.read_text(encoding="utf-8")
|
| 115 |
except FileNotFoundError:
|
| 116 |
return "<h1>Demo page missing</h1>"
|
| 117 |
-
|
|
|
|
| 50 |
background_tasks.add_task(_cleanup)
|
| 51 |
|
| 52 |
|
| 53 |
+
def _validate_inputs(video: UploadFile | None, prompt: str | None) -> None:
|
| 54 |
+
if video is None:
|
| 55 |
+
raise HTTPException(status_code=400, detail="Video file is required.")
|
| 56 |
+
if not prompt:
|
| 57 |
+
raise HTTPException(status_code=400, detail="Prompt is required.")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
@app.post("/process_video")
|
| 61 |
async def process_video(
|
| 62 |
background_tasks: BackgroundTasks,
|
|
|
|
| 64 |
prompt: str = Form(...),
|
| 65 |
detector: Optional[str] = Form(None),
|
| 66 |
):
|
| 67 |
+
_validate_inputs(video, prompt)
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
try:
|
| 70 |
input_path = _save_upload_to_tmp(video)
|
|
|
|
| 78 |
os.close(fd)
|
| 79 |
|
| 80 |
try:
|
| 81 |
+
output_path, _, _ = run_inference(
|
| 82 |
input_path,
|
| 83 |
output_path,
|
| 84 |
prompt,
|
| 85 |
max_frames=10,
|
| 86 |
detector_name=detector,
|
| 87 |
+
generate_summary=False,
|
| 88 |
)
|
| 89 |
except ValueError as exc:
|
| 90 |
logging.exception("Video decoding failed.")
|
|
|
|
| 105 |
media_type="video/mp4",
|
| 106 |
filename="processed.mp4",
|
| 107 |
)
|
|
|
|
|
|
|
| 108 |
return response
|
| 109 |
|
| 110 |
|
| 111 |
+
@app.post("/mission_summary")
|
| 112 |
+
async def mission_summary(
|
| 113 |
+
video: UploadFile = File(...),
|
| 114 |
+
prompt: str = Form(...),
|
| 115 |
+
detector: Optional[str] = Form(None),
|
| 116 |
+
):
|
| 117 |
+
_validate_inputs(video, prompt)
|
| 118 |
+
try:
|
| 119 |
+
input_path = _save_upload_to_tmp(video)
|
| 120 |
+
except Exception:
|
| 121 |
+
logging.exception("Failed to save uploaded file.")
|
| 122 |
+
raise HTTPException(status_code=500, detail="Failed to save uploaded video.")
|
| 123 |
+
finally:
|
| 124 |
+
await video.close()
|
| 125 |
+
|
| 126 |
+
try:
|
| 127 |
+
_, mission_plan, mission_summary = run_inference(
|
| 128 |
+
input_path,
|
| 129 |
+
output_video_path=None,
|
| 130 |
+
mission_prompt=prompt,
|
| 131 |
+
max_frames=10,
|
| 132 |
+
detector_name=detector,
|
| 133 |
+
write_output_video=False,
|
| 134 |
+
generate_summary=True,
|
| 135 |
+
)
|
| 136 |
+
except ValueError as exc:
|
| 137 |
+
logging.exception("Video decoding failed.")
|
| 138 |
+
_safe_delete(input_path)
|
| 139 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 140 |
+
except Exception as exc:
|
| 141 |
+
logging.exception("Summary generation failed.")
|
| 142 |
+
_safe_delete(input_path)
|
| 143 |
+
return JSONResponse(status_code=500, content={"error": str(exc)})
|
| 144 |
+
|
| 145 |
+
_safe_delete(input_path)
|
| 146 |
+
return {
|
| 147 |
+
"mission_plan": mission_plan.to_dict(),
|
| 148 |
+
"mission_summary": mission_summary or "",
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
if __name__ == "__main__":
|
| 152 |
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
|
| 153 |
@app.get("/", response_class=HTMLResponse)
|
|
|
|
| 157 |
return demo_path.read_text(encoding="utf-8")
|
| 158 |
except FileNotFoundError:
|
| 159 |
return "<h1>Demo page missing</h1>"
|
|
|
demo.html
CHANGED
|
@@ -140,7 +140,9 @@ button:hover {
|
|
| 140 |
</div>
|
| 141 |
|
| 142 |
<script>
|
| 143 |
-
const
|
|
|
|
|
|
|
| 144 |
|
| 145 |
async function executeMission() {
|
| 146 |
|
|
@@ -155,18 +157,18 @@ async function executeMission() {
|
|
| 155 |
return;
|
| 156 |
}
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
formData.append("prompt", mission);
|
| 161 |
-
formData.append("detector", detector);
|
| 162 |
-
|
| 163 |
-
statusEl.textContent = "Dispatching mission to backend...";
|
| 164 |
-
summaryEl.textContent = "(Processing...)";
|
| 165 |
|
| 166 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
const response = await fetch(PROCESS_VIDEO_URL, {
|
| 168 |
method: "POST",
|
| 169 |
-
body:
|
| 170 |
});
|
| 171 |
|
| 172 |
if (!response.ok) {
|
|
@@ -180,14 +182,35 @@ async function executeMission() {
|
|
| 180 |
throw new Error(errorDetail);
|
| 181 |
}
|
| 182 |
|
| 183 |
-
const missionSummary = response.headers.get("x-mission-summary") || "No summary returned.";
|
| 184 |
-
summaryEl.textContent = missionSummary;
|
| 185 |
-
|
| 186 |
const videoBlob = await response.blob();
|
| 187 |
const videoUrl = URL.createObjectURL(videoBlob);
|
| 188 |
const videoEl = document.getElementById("processedVideo");
|
| 189 |
videoEl.src = videoUrl;
|
| 190 |
videoEl.load();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
statusEl.textContent = "Mission complete.";
|
| 192 |
} catch (err) {
|
| 193 |
console.error(err);
|
|
|
|
| 140 |
</div>
|
| 141 |
|
| 142 |
<script>
|
| 143 |
+
const API_BASE_URL = "https://biaslab2025-demo-2025.hf.space";
|
| 144 |
+
const PROCESS_VIDEO_URL = `${API_BASE_URL}/process_video`;
|
| 145 |
+
const SUMMARY_URL = `${API_BASE_URL}/mission_summary`;
|
| 146 |
|
| 147 |
async function executeMission() {
|
| 148 |
|
|
|
|
| 157 |
return;
|
| 158 |
}
|
| 159 |
|
| 160 |
+
statusEl.textContent = "Processing video...";
|
| 161 |
+
summaryEl.textContent = "(Awaiting summary...)";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
try {
|
| 164 |
+
const videoForm = new FormData();
|
| 165 |
+
videoForm.append("video", videoFile);
|
| 166 |
+
videoForm.append("prompt", mission);
|
| 167 |
+
videoForm.append("detector", detector);
|
| 168 |
+
|
| 169 |
const response = await fetch(PROCESS_VIDEO_URL, {
|
| 170 |
method: "POST",
|
| 171 |
+
body: videoForm
|
| 172 |
});
|
| 173 |
|
| 174 |
if (!response.ok) {
|
|
|
|
| 182 |
throw new Error(errorDetail);
|
| 183 |
}
|
| 184 |
|
|
|
|
|
|
|
|
|
|
| 185 |
const videoBlob = await response.blob();
|
| 186 |
const videoUrl = URL.createObjectURL(videoBlob);
|
| 187 |
const videoEl = document.getElementById("processedVideo");
|
| 188 |
videoEl.src = videoUrl;
|
| 189 |
videoEl.load();
|
| 190 |
+
|
| 191 |
+
statusEl.textContent = "Generating summary...";
|
| 192 |
+
|
| 193 |
+
const summaryForm = new FormData();
|
| 194 |
+
summaryForm.append("video", videoFile);
|
| 195 |
+
summaryForm.append("prompt", mission);
|
| 196 |
+
summaryForm.append("detector", detector);
|
| 197 |
+
|
| 198 |
+
const summaryResponse = await fetch(SUMMARY_URL, {
|
| 199 |
+
method: "POST",
|
| 200 |
+
body: summaryForm
|
| 201 |
+
});
|
| 202 |
+
if (!summaryResponse.ok) {
|
| 203 |
+
let errorDetail = `Summary failed (${summaryResponse.status})`;
|
| 204 |
+
try {
|
| 205 |
+
const errJson = await summaryResponse.json();
|
| 206 |
+
errorDetail = errJson.error || errorDetail;
|
| 207 |
+
} catch (_) {}
|
| 208 |
+
throw new Error(errorDetail);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
const summaryJson = await summaryResponse.json();
|
| 212 |
+
const summaryText = summaryJson.mission_summary || "No summary returned.";
|
| 213 |
+
summaryEl.textContent = summaryText;
|
| 214 |
statusEl.textContent = "Mission complete.";
|
| 215 |
} catch (err) {
|
| 216 |
console.error(err);
|
inference.py
CHANGED
|
@@ -66,11 +66,13 @@ def infer_frame(
|
|
| 66 |
|
| 67 |
def run_inference(
|
| 68 |
input_video_path: str,
|
| 69 |
-
output_video_path: str,
|
| 70 |
mission_prompt: str,
|
| 71 |
max_frames: Optional[int] = None,
|
| 72 |
detector_name: Optional[str] = None,
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
try:
|
| 75 |
frames, fps, width, height = extract_frames(input_video_path)
|
| 76 |
except ValueError as exc:
|
|
@@ -91,6 +93,14 @@ def run_inference(
|
|
| 91 |
detection_log.append({"frame_index": idx, "detections": detections})
|
| 92 |
processed_frames.append(processed_frame)
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
def run_inference(
|
| 68 |
input_video_path: str,
|
| 69 |
+
output_video_path: Optional[str],
|
| 70 |
mission_prompt: str,
|
| 71 |
max_frames: Optional[int] = None,
|
| 72 |
detector_name: Optional[str] = None,
|
| 73 |
+
write_output_video: bool = True,
|
| 74 |
+
generate_summary: bool = True,
|
| 75 |
+
) -> Tuple[Optional[str], MissionPlan, Optional[str]]:
|
| 76 |
try:
|
| 77 |
frames, fps, width, height = extract_frames(input_video_path)
|
| 78 |
except ValueError as exc:
|
|
|
|
| 93 |
detection_log.append({"frame_index": idx, "detections": detections})
|
| 94 |
processed_frames.append(processed_frame)
|
| 95 |
|
| 96 |
+
if write_output_video:
|
| 97 |
+
if not output_video_path:
|
| 98 |
+
raise ValueError("output_video_path is required when write_output_video=True.")
|
| 99 |
+
write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
|
| 100 |
+
video_path_result: Optional[str] = output_video_path
|
| 101 |
+
else:
|
| 102 |
+
video_path_result = None
|
| 103 |
+
mission_summary = (
|
| 104 |
+
summarize_results(mission_prompt, mission_plan, detection_log) if generate_summary else None
|
| 105 |
+
)
|
| 106 |
+
return video_path_result, mission_plan, mission_summary
|