segmentation-api

Paused

App Files Files

jbilcke-hf commited on Feb 23, 2024

Commit

8dce505

verified ·

1 Parent(s): 58240d2

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -30

app.py CHANGED Viewed

@@ -24,6 +24,7 @@ import argparse
 import copy
 import re
 import json
 import numpy as np
 import torch
@@ -66,6 +67,24 @@ from lama_cleaner.helper import (
 SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
 config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
 ckpt_repo_id = "ShilongLiu/GroundingDINO"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
@@ -589,12 +608,18 @@ def run_anything_task(secret_token, input_image, text_prompt, box_threshold, tex
     #    show_box(box.cpu().numpy(), plt.gca(), label)
     plt.axis('off')
     image_path = os.path.join(output_dir, f"grounding_seg_output_{file_temp}.png")
     plt.savefig(image_path, bbox_inches="tight", pad_inches=0)
     segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
     os.remove(image_path)
-    output_images.append(segment_image_result)
-    return json.dumps(results), output_images, gr.Gallery.update(label='result images')
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("Grounded SAM demo", add_help=True)
@@ -623,28 +648,22 @@ if __name__ == "__main__":
             </div>
             </div>""")
         with gr.Row():
-            with gr.Column():
-                secret_token = gr.Textbox()
-                text_prompt = gr.Textbox()
-                input_image = gr.Image(source='upload', elem_id="image_upload", tool='sketch', type='pil', label="Upload")
-                text_prompt = gr.Textbox(label="Detection Prompt[To detect multiple objects, seperating each name with '.', like this: cat . dog . chair ]", placeholder="Cannot be empty")
-                run_button = gr.Button(label="Run", visible=True)
-                with gr.Accordion("Advanced options", open=False) as advanced_options:
-                    box_threshold = gr.Slider(
-                        label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.001
-                    )
-                    text_threshold = gr.Slider(
-                        label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
-                    iou_threshold = gr.Slider(
-                        label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.001
-                    )
-            with gr.Column():
-                image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", visible=True
-                    ).style(preview=True, columns=[5], object_fit="scale-down", height="auto")
             run_button.click(
                 fn=run_anything_task,
@@ -656,13 +675,7 @@ if __name__ == "__main__":
                     text_threshold,
                     iou_threshold
                 ],
-                outputs=[
-                    gr.Textbox(),
-                    image_gallery,
-                    image_gallery
-                ],
-                show_progress=False,
-                queue=True
             )
     block.queue(max_size=20).launch(server_name='0.0.0.0', debug=args.debug, share=args.share)

 import copy
 import re
 import json
+import base64
 import numpy as np
 import torch
 SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
+# Regex pattern to match data URI scheme
+data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,')
+def readb64(b64):
+    # Remove any data URI scheme prefix with regex
+    b64 = data_uri_pattern.sub("", b64)
+    # Decode and open the image with PIL
+    img = Image.open(BytesIO(base64.b64decode(b64)))
+    return img
+# convert from PIL to base64
+def writeb64(image):
+    buffered = BytesIO()
+    image.save(buffered, format="PNG")
+    b64image = base64.b64encode(buffered.getvalue())
+    b64image_str = b64image.decode("utf-8")
+    return b64image_str
 config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
 ckpt_repo_id = "ShilongLiu/GroundingDINO"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
     #    show_box(box.cpu().numpy(), plt.gca(), label)
     plt.axis('off')
     image_path = os.path.join(output_dir, f"grounding_seg_output_{file_temp}.png")
+    # do we really need to write to the disk to get an image? seems inneficient
     plt.savefig(image_path, bbox_inches="tight", pad_inches=0)
     segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
     os.remove(image_path)
+    # output_images.append(segment_image_result)
+    response_object = {
+        "data": results,
+        "bitmap": writeb64(segment_image_result) # save as PNG base64
+    }
+    return json.dumps(response_object)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("Grounded SAM demo", add_help=True)
             </div>
             </div>""")
         with gr.Row():
+        secret_token = gr.Textbox()
+        text_prompt = gr.Textbox()
+        input_image = gr.Textbox()
+        text_prompt = gr.Textbox(label="Detection Prompt[To detect multiple objects, seperating each name with '.', like this: cat . dog . chair ]", placeholder="Cannot be empty")
+        run_button = gr.Button(label="Run", visible=True)
+        with gr.Accordion("Advanced options", open=False) as advanced_options:
+            box_threshold = gr.Slider(
+                label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.001
+            )
+            text_threshold = gr.Slider(
+                label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
+            )
+            iou_threshold = gr.Slider(
+                        label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.001
+                )
             run_button.click(
                 fn=run_anything_task,
                     text_threshold,
                     iou_threshold
                 ],
+                outputs=gr.Textbox()
             )
     block.queue(max_size=20).launch(server_name='0.0.0.0', debug=args.debug, share=args.share)