prithivMLmods commited on
Commit
da6c1e1
·
verified ·
1 Parent(s): 7d4c078

update app

Browse files
Files changed (1) hide show
  1. app.py +36 -33
app.py CHANGED
@@ -355,7 +355,42 @@ def create_gradio_interface():
355
  gr.Markdown("Explore the capabilities of various Vision Language Models for tasks like OCR, VQA, and Object Detection.")
356
 
357
  with gr.Tabs():
358
- # --- TAB 1: Document and General VLMs ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  with gr.TabItem("📄 Document & General VLM"):
360
  with gr.Row():
361
  with gr.Column(scale=2):
@@ -390,39 +425,7 @@ def create_gradio_interface():
390
  inputs=[image_input_doc, prompt_input_doc]
391
  )
392
 
393
- # --- TAB 2: Moondream3 Lab ---
394
- with gr.TabItem("🌝 Moondream3"):
395
- with gr.Row():
396
- with gr.Column(scale=1):
397
- md3_image_input = gr.Image(label="Upload an image", type="pil", height=400)
398
- md3_task_type = gr.Radio(
399
- choices=["Object Detection", "Point Detection", "Caption", "Visual Question Answering"],
400
- label="Task Type", value="Object Detection"
401
- )
402
- md3_prompt_input = gr.Textbox(
403
- label="Prompt (object to detect/question to ask)",
404
- placeholder="e.g., 'car', 'person', 'What's in this image?'"
405
- )
406
- md3_max_objects = gr.Number(
407
- label="Max Objects (for Object Detection only)",
408
- value=10, minimum=1, maximum=50, step=1, visible=True
409
- )
410
- md3_generate_btn = gr.Button(value="Submit", variant="primary")
411
- with gr.Column(scale=1):
412
- md3_output_image = gr.Image(type="pil", label="Result", height=400)
413
- md3_output_textbox = gr.Textbox(label="Model Response", lines=10, show_copy_button=True)
414
- md3_output_time = gr.Markdown()
415
 
416
- gr.Examples(
417
- examples=[
418
- ["md3/1.jpg", "Object Detection", "boats", 7],
419
- ["md3/2.jpg", "Point Detection", "children", 7],
420
- ["md3/3.png", "Caption", "", 5],
421
- ["md3/4.jpeg", "Visual Question Answering", "Analyze the GDP trend over the years.", 5],
422
- ],
423
- inputs=[md3_image_input, md3_task_type, md3_prompt_input, md3_max_objects],
424
- label="Click an example to populate inputs"
425
- )
426
 
427
  process_btn.click(
428
  fn=process_document_stream,
 
355
  gr.Markdown("Explore the capabilities of various Vision Language Models for tasks like OCR, VQA, and Object Detection.")
356
 
357
  with gr.Tabs():
358
+
359
+ # --- TAB 1: Moondream3 Lab ---
360
+ with gr.TabItem("🌝 Moondream3"):
361
+ with gr.Row():
362
+ with gr.Column(scale=1):
363
+ md3_image_input = gr.Image(label="Upload an image", type="pil", height=400)
364
+ md3_task_type = gr.Radio(
365
+ choices=["Object Detection", "Point Detection", "Caption", "Visual Question Answering"],
366
+ label="Task Type", value="Object Detection"
367
+ )
368
+ md3_prompt_input = gr.Textbox(
369
+ label="Prompt (object to detect/question to ask)",
370
+ placeholder="e.g., 'car', 'person', 'What's in this image?'"
371
+ )
372
+ md3_max_objects = gr.Number(
373
+ label="Max Objects (for Object Detection only)",
374
+ value=10, minimum=1, maximum=50, step=1, visible=True
375
+ )
376
+ md3_generate_btn = gr.Button(value="Submit", variant="primary")
377
+ with gr.Column(scale=1):
378
+ md3_output_image = gr.Image(type="pil", label="Result", height=400)
379
+ md3_output_textbox = gr.Textbox(label="Model Response", lines=10, show_copy_button=True)
380
+ md3_output_time = gr.Markdown()
381
+
382
+ gr.Examples(
383
+ examples=[
384
+ ["md3/1.jpg", "Object Detection", "boats", 7],
385
+ ["md3/2.jpg", "Point Detection", "children", 7],
386
+ ["md3/3.png", "Caption", "", 5],
387
+ ["md3/4.jpeg", "Visual Question Answering", "Analyze the GDP trend over the years.", 5],
388
+ ],
389
+ inputs=[md3_image_input, md3_task_type, md3_prompt_input, md3_max_objects],
390
+ label="Click an example to populate inputs"
391
+ )
392
+
393
+ # --- TAB 2: Document and General VLMs ---
394
  with gr.TabItem("📄 Document & General VLM"):
395
  with gr.Row():
396
  with gr.Column(scale=2):
 
425
  inputs=[image_input_doc, prompt_input_doc]
426
  )
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
 
 
 
 
 
 
 
 
 
 
 
429
 
430
  process_btn.click(
431
  fn=process_document_stream,