init project
Browse files
app.py
CHANGED
|
@@ -39,8 +39,7 @@ import torchvision.transforms as tvf
|
|
| 39 |
|
| 40 |
|
| 41 |
silent = False
|
| 42 |
-
|
| 43 |
-
pe3r = Models(device)
|
| 44 |
|
| 45 |
|
| 46 |
def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
|
|
@@ -86,7 +85,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
|
|
| 86 |
scene.export(file_obj=outfile)
|
| 87 |
return outfile
|
| 88 |
|
| 89 |
-
@spaces.GPU(duration=180)
|
| 90 |
def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
|
| 91 |
clean_depth=False, transparent_cams=False, cam_size=0.05):
|
| 92 |
"""
|
|
@@ -246,6 +245,8 @@ def slerp_multiple(vectors, t_values):
|
|
| 246 |
|
| 247 |
@torch.no_grad
|
| 248 |
def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
|
|
|
|
|
|
|
| 249 |
sam_mask=[]
|
| 250 |
img_area = original_size[0] * original_size[1]
|
| 251 |
|
|
@@ -299,6 +300,7 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
|
|
| 299 |
|
| 300 |
@torch.no_grad
|
| 301 |
def get_cog_feats(images):
|
|
|
|
| 302 |
cog_seg_maps = []
|
| 303 |
rev_cog_seg_maps = []
|
| 304 |
inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
|
|
@@ -443,6 +445,8 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
|
|
| 443 |
"""
|
| 444 |
if len(filelist) < 2:
|
| 445 |
raise gradio.Error("Please input at least 2 images.")
|
|
|
|
|
|
|
| 446 |
|
| 447 |
images = Images(filelist=filelist, device=device)
|
| 448 |
|
|
@@ -523,9 +527,11 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
|
|
| 523 |
def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
|
| 524 |
mask_sky, clean_depth, transparent_cams, cam_size):
|
| 525 |
|
|
|
|
|
|
|
| 526 |
texts = [text]
|
| 527 |
inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
| 528 |
-
inputs = {key: value.to(
|
| 529 |
with torch.no_grad():
|
| 530 |
text_feats =pe3r.siglip.get_text_features(**inputs)
|
| 531 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
|
@@ -559,7 +565,7 @@ def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
|
|
| 559 |
|
| 560 |
with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
| 561 |
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
|
| 562 |
-
model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
|
| 563 |
get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
|
| 564 |
|
| 565 |
with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
|
|
@@ -594,11 +600,11 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
|
| 594 |
# adjust the camera size in the output pointcloud
|
| 595 |
cam_size = gradio.Slider(label="cam_size", value=0.05, minimum=0.001, maximum=0.1, step=0.001, visible=False)
|
| 596 |
with gradio.Row():
|
| 597 |
-
as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
|
| 598 |
# two post process implemented
|
| 599 |
mask_sky = gradio.Checkbox(value=False, label="Mask sky", visible=False)
|
| 600 |
clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
|
| 601 |
-
transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras")
|
| 602 |
|
| 603 |
with gradio.Row():
|
| 604 |
text_input = gradio.Textbox(label="Query Text")
|
|
@@ -622,30 +628,30 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
|
| 622 |
mask_sky, clean_depth, transparent_cams, cam_size,
|
| 623 |
scenegraph_type, winsize, refid],
|
| 624 |
outputs=[scene, outmodel]) # , outgallery
|
| 625 |
-
min_conf_thr.release(fn=model_from_scene_fun,
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
cam_size.change(fn=model_from_scene_fun,
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
as_pointcloud.change(fn=model_from_scene_fun,
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
mask_sky.change(fn=model_from_scene_fun,
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
clean_depth.change(fn=model_from_scene_fun,
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
transparent_cams.change(model_from_scene_fun,
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
find_btn.click(fn=get_3D_object_from_scene_fun,
|
| 650 |
inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 651 |
clean_depth, transparent_cams, cam_size],
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
silent = False
|
| 42 |
+
pe3r = Models('cpu')
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
|
|
|
|
| 85 |
scene.export(file_obj=outfile)
|
| 86 |
return outfile
|
| 87 |
|
| 88 |
+
# @spaces.GPU(duration=180)
|
| 89 |
def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
|
| 90 |
clean_depth=False, transparent_cams=False, cam_size=0.05):
|
| 91 |
"""
|
|
|
|
| 245 |
|
| 246 |
@torch.no_grad
|
| 247 |
def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
|
| 248 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 249 |
+
|
| 250 |
sam_mask=[]
|
| 251 |
img_area = original_size[0] * original_size[1]
|
| 252 |
|
|
|
|
| 300 |
|
| 301 |
@torch.no_grad
|
| 302 |
def get_cog_feats(images):
|
| 303 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 304 |
cog_seg_maps = []
|
| 305 |
rev_cog_seg_maps = []
|
| 306 |
inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
|
|
|
|
| 445 |
"""
|
| 446 |
if len(filelist) < 2:
|
| 447 |
raise gradio.Error("Please input at least 2 images.")
|
| 448 |
+
|
| 449 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 450 |
|
| 451 |
images = Images(filelist=filelist, device=device)
|
| 452 |
|
|
|
|
| 527 |
def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
|
| 528 |
mask_sky, clean_depth, transparent_cams, cam_size):
|
| 529 |
|
| 530 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 531 |
+
|
| 532 |
texts = [text]
|
| 533 |
inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
| 534 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 535 |
with torch.no_grad():
|
| 536 |
text_feats =pe3r.siglip.get_text_features(**inputs)
|
| 537 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
|
|
|
| 565 |
|
| 566 |
with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
| 567 |
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
|
| 568 |
+
# model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
|
| 569 |
get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
|
| 570 |
|
| 571 |
with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
|
|
|
|
| 600 |
# adjust the camera size in the output pointcloud
|
| 601 |
cam_size = gradio.Slider(label="cam_size", value=0.05, minimum=0.001, maximum=0.1, step=0.001, visible=False)
|
| 602 |
with gradio.Row():
|
| 603 |
+
as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud", visible=False)
|
| 604 |
# two post process implemented
|
| 605 |
mask_sky = gradio.Checkbox(value=False, label="Mask sky", visible=False)
|
| 606 |
clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
|
| 607 |
+
transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
|
| 608 |
|
| 609 |
with gradio.Row():
|
| 610 |
text_input = gradio.Textbox(label="Query Text")
|
|
|
|
| 628 |
mask_sky, clean_depth, transparent_cams, cam_size,
|
| 629 |
scenegraph_type, winsize, refid],
|
| 630 |
outputs=[scene, outmodel]) # , outgallery
|
| 631 |
+
# min_conf_thr.release(fn=model_from_scene_fun,
|
| 632 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 633 |
+
# clean_depth, transparent_cams, cam_size],
|
| 634 |
+
# outputs=outmodel)
|
| 635 |
+
# cam_size.change(fn=model_from_scene_fun,
|
| 636 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 637 |
+
# clean_depth, transparent_cams, cam_size],
|
| 638 |
+
# outputs=outmodel)
|
| 639 |
+
# as_pointcloud.change(fn=model_from_scene_fun,
|
| 640 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 641 |
+
# clean_depth, transparent_cams, cam_size],
|
| 642 |
+
# outputs=outmodel)
|
| 643 |
+
# mask_sky.change(fn=model_from_scene_fun,
|
| 644 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 645 |
+
# clean_depth, transparent_cams, cam_size],
|
| 646 |
+
# outputs=outmodel)
|
| 647 |
+
# clean_depth.change(fn=model_from_scene_fun,
|
| 648 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 649 |
+
# clean_depth, transparent_cams, cam_size],
|
| 650 |
+
# outputs=outmodel)
|
| 651 |
+
# transparent_cams.change(model_from_scene_fun,
|
| 652 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 653 |
+
# clean_depth, transparent_cams, cam_size],
|
| 654 |
+
# outputs=outmodel)
|
| 655 |
find_btn.click(fn=get_3D_object_from_scene_fun,
|
| 656 |
inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 657 |
clean_depth, transparent_cams, cam_size],
|