Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Mar 3

Commit

fe4dab5

1 Parent(s): e3586fb

update

Browse files

Files changed (7) hide show

app.py +8 -10
common.py +7 -3
embodied_gen/models/sam3d.py +4 -1
embodied_gen/utils/monkey_patch/gradio.py +6 -5
embodied_gen/utils/monkey_patch/sam3d.py +2 -2
embodied_gen/utils/trender.py +0 -5
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -39,7 +39,6 @@ from common import (
     start_session,
 )
 app_name = os.getenv("GRADIO_APP")
 if app_name == "imageto3d_sam3d":
     _enable_pre_resize_default = False
@@ -51,6 +50,8 @@ elif app_name == "imageto3d":
     bg_rm_model_name = "rembg"  # "rembg", "rmbg14"
 current_rmbg_tag = bg_rm_model_name
 def set_current_rmbg_tag(rmbg: str) -> None:
     global current_rmbg_tag
     current_rmbg_tag = rmbg
@@ -88,9 +89,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
         </p>
         🖼️ Generate physically plausible 3D asset from single input image.
-        """.format(
-            VERSION=VERSION
-        ),
         elem_classes=["header"],
     )
     enable_pre_resize = gr.State(_enable_pre_resize_default)
@@ -114,11 +113,9 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
                         height=400,
                         elem_classes=["image_fit"],
                     )
-                    gr.Markdown(
-                        """
                         If you are not satisfied with the auto segmentation
-                        result, please switch to the `Image(SAM seg)` tab."""
-                    )
                 with gr.Tab(
                     label="Image(SAM seg)", id=1
                 ) as samimage_input_tab:
@@ -355,7 +352,9 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
     )
     image_prompt.upload(
-        lambda img, rmbg: preprocess_image_fn(img, rmbg, _enable_pre_resize_default),
         inputs=[image_prompt, rmbg_tag],
         outputs=[image_prompt, raw_image_cache],
         queue=False,
@@ -537,5 +536,4 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
 if __name__ == "__main__":
-    # launch_demo()
     demo.launch()

     start_session,
 )
 app_name = os.getenv("GRADIO_APP")
 if app_name == "imageto3d_sam3d":
     _enable_pre_resize_default = False
     bg_rm_model_name = "rembg"  # "rembg", "rmbg14"
 current_rmbg_tag = bg_rm_model_name
 def set_current_rmbg_tag(rmbg: str) -> None:
     global current_rmbg_tag
     current_rmbg_tag = rmbg
         </p>
         🖼️ Generate physically plausible 3D asset from single input image.
+        """.format(VERSION=VERSION),
         elem_classes=["header"],
     )
     enable_pre_resize = gr.State(_enable_pre_resize_default)
                         height=400,
                         elem_classes=["image_fit"],
                     )
+                    gr.Markdown("""
                         If you are not satisfied with the auto segmentation
+                        result, please switch to the `Image(SAM seg)` tab.""")
                 with gr.Tab(
                     label="Image(SAM seg)", id=1
                 ) as samimage_input_tab:
     )
     image_prompt.upload(
+        lambda img, rmbg: preprocess_image_fn(
+            img, rmbg, _enable_pre_resize_default
+        ),
         inputs=[image_prompt, rmbg_tag],
         outputs=[image_prompt, raw_image_cache],
         queue=False,
 if __name__ == "__main__":
     demo.launch()

common.py CHANGED Viewed

@@ -16,8 +16,12 @@
 import spaces
 from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
 monkey_path_trellis()
-from embodied_gen.utils.monkey_patch.gradio import _patch_open3d_cuda_device_count_bug
 _patch_open3d_cuda_device_count_bug()
 import gc
@@ -92,7 +96,7 @@ if os.getenv("GRADIO_APP").startswith("imageto3d"):
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
     if "sam3d" in os.getenv("GRADIO_APP"):
-        PIPELINE = Sam3dInference()
     else:
         PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
             "microsoft/TRELLIS-image-large"
@@ -110,7 +114,7 @@ elif os.getenv("GRADIO_APP").startswith("textto3d"):
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     if "sam3d" in os.getenv("GRADIO_APP"):
-        PIPELINE = Sam3dInference()
     else:
         PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
             "microsoft/TRELLIS-image-large"

 import spaces
 from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
 monkey_path_trellis()
+from embodied_gen.utils.monkey_patch.gradio import (
+    _patch_open3d_cuda_device_count_bug,
+)
 _patch_open3d_cuda_device_count_bug()
 import gc
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
     if "sam3d" in os.getenv("GRADIO_APP"):
+        PIPELINE = Sam3dInference(device="cuda")
     else:
         PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
             "microsoft/TRELLIS-image-large"
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     if "sam3d" in os.getenv("GRADIO_APP"):
+        PIPELINE = Sam3dInference(device="cuda")
     else:
         PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
             "microsoft/TRELLIS-image-large"

embodied_gen/models/sam3d.py CHANGED Viewed

@@ -63,7 +63,10 @@ class Sam3dInference:
     """
     def __init__(
-        self, local_dir: str = "weights/sam-3d-objects", compile: bool = False, device: str = "cuda",
     ) -> None:
         if not os.path.exists(local_dir):
             snapshot_download("tuandao-zenai/sam-3d-objects", local_dir=local_dir)

     """
     def __init__(
+        self,
+        local_dir: str = "weights/sam-3d-objects",
+        compile: bool = False,
+        device: str = "cuda",
     ) -> None:
         if not os.path.exists(local_dir):
             snapshot_download("tuandao-zenai/sam-3d-objects", local_dir=local_dir)

embodied_gen/utils/monkey_patch/gradio.py CHANGED Viewed

@@ -15,10 +15,12 @@
 # permissions and limitations under the License.
-import gradio_client.utils as gradio_client_utils
 import fileinput
 import site
 def _patch_gradio_schema_bool_bug() -> None:
     """Patch schema parser for bool-style for gradio<5.33."""
     original_get_type = gradio_client_utils.get_type
@@ -50,8 +52,7 @@ def _patch_open3d_cuda_device_count_bug() -> None:
         for line in file:
             print(
                 line.replace(
-                    '_pybind_cuda.open3d_core_cuda_device_count()',
-                    '1'
                 ),
-                end=''
-            )

 # permissions and limitations under the License.
 import fileinput
 import site
+import gradio_client.utils as gradio_client_utils
 def _patch_gradio_schema_bool_bug() -> None:
     """Patch schema parser for bool-style for gradio<5.33."""
     original_get_type = gradio_client_utils.get_type
         for line in file:
             print(
                 line.replace(
+                    '_pybind_cuda.open3d_core_cuda_device_count()', '1'
                 ),
+                end='',
+            )

embodied_gen/utils/monkey_patch/sam3d.py CHANGED Viewed

@@ -380,7 +380,7 @@ def monkey_patch_sam3d():
         InferencePipeline.__init__ = patch_init
-    # patch_pointmap_infer_pipeline() # patch
-    # patch_infer_init() # patch
     return

         InferencePipeline.__init__ = patch_init
+    patch_pointmap_infer_pipeline()  # patch
+    patch_infer_init()  # patch
     return

embodied_gen/utils/trender.py CHANGED Viewed

@@ -19,7 +19,6 @@ import sys
 from collections import defaultdict
 import numpy as np
-import spaces
 import torch
 from easydict import EasyDict as edict
 from tqdm import tqdm
@@ -43,7 +42,6 @@ __all__ = [
 ]
-# @spaces.GPU
 def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer = MeshRenderer()
     renderer.rendering_options.resolution = options.get("resolution", 512)
@@ -66,7 +64,6 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     return rets
-# @spaces.GPU
 def render_gs_frames(
     sample,
     extrinsics,
@@ -117,7 +114,6 @@ def render_gs_frames(
     return dict(outputs)
-# @spaces.GPU
 def render_video(
     sample,
     resolution=512,
@@ -149,7 +145,6 @@ def render_video(
     return result
-# @spaces.GPU
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {

 from collections import defaultdict
 import numpy as np
 import torch
 from easydict import EasyDict as edict
 from tqdm import tqdm
 ]
 def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer = MeshRenderer()
     renderer.rendering_options.resolution = options.get("resolution", 512)
     return rets
 def render_gs_frames(
     sample,
     extrinsics,
     return dict(outputs)
 def render_video(
     sample,
     resolution=512,
     return result
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {

requirements.txt CHANGED Viewed

@@ -65,7 +65,7 @@ https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/pytorch3d-0.
 # git+https://github.com/nerfstudio-project/gsplat.git@v1.5.3
 https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
 # flash-attn==2.7.0.post2
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 # kaolin@git+https://github.com/NVIDIAGameWorks/kaolin.git@v0.16.0
 https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl
 # nvdiffrast@git+https://github.com/NVlabs/nvdiffrast.git#egg=729261d

 # git+https://github.com/nerfstudio-project/gsplat.git@v1.5.3
 https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
 # flash-attn==2.7.0.post2
+# https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 # kaolin@git+https://github.com/NVIDIAGameWorks/kaolin.git@v0.16.0
 https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl
 # nvdiffrast@git+https://github.com/NVlabs/nvdiffrast.git#egg=729261d