Spaces:

prithivMLmods
/

VisionScope-R2

Running on Zero

App Files Files Community

prithivMLmods commited on 12 days ago

Commit

b1eb6b5

verified ·

1 Parent(s): d779502

update app

Browse files

Files changed (1) hide show

app.py +8 -3

app.py CHANGED Viewed

@@ -101,6 +101,7 @@ MODEL_ID_N = "prithivMLmods/DeepCaption-VLA-7B"
 processor_n = AutoProcessor.from_pretrained(MODEL_ID_N, trust_remote_code=True)
 model_n = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_N,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -110,6 +111,7 @@ MODEL_ID_M = "Skywork/SkyCaptioner-V1"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -119,6 +121,7 @@ MODEL_ID_Z = "remyxai/SpaceThinker-Qwen2.5VL-3B"
 processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
 model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Z,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -128,6 +131,7 @@ MODEL_ID_K = "prithivMLmods/coreOCR-7B-050325-preview"
 processor_k = AutoProcessor.from_pretrained(MODEL_ID_K, trust_remote_code=True)
 model_k = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID_K,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -137,6 +141,7 @@ MODEL_ID_Y = "remyxai/SpaceOm"
 processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
 model_y = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Y,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -310,7 +315,7 @@ css = """
 """
 # Create the Gradio Interface
-with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **VisionScope R2**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
@@ -333,7 +338,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                 repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
         with gr.Column(scale=3):
             gr.Markdown("## Output", elem_id="output-title")
-            output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
             with gr.Accordion("(Result.md)", open=False):
                 markdown_output = gr.Markdown(label="Formatted Result")
             model_choice = gr.Radio(
@@ -353,4 +358,4 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     )
 if __name__ == "__main__":
-    demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)

 processor_n = AutoProcessor.from_pretrained(MODEL_ID_N, trust_remote_code=True)
 model_n = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_N,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
 model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Z,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_k = AutoProcessor.from_pretrained(MODEL_ID_K, trust_remote_code=True)
 model_k = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID_K,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
 model_y = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Y,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 """
 # Create the Gradio Interface
+with gr.Blocks() as demo:
     gr.Markdown("# **VisionScope R2**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=2):
                 repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
         with gr.Column(scale=3):
             gr.Markdown("## Output", elem_id="output-title")
+            output = gr.Textbox(label="Raw Output Stream", interactive=True, lines=11)
             with gr.Accordion("(Result.md)", open=False):
                 markdown_output = gr.Markdown(label="Formatted Result")
             model_choice = gr.Radio(
     )
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(css=css, theme=steel_blue_theme, mcp_server=True, ssr_mode=False, show_error=True)