xinjie.wang commited on
Commit
fe4dab5
·
1 Parent(s): e3586fb
app.py CHANGED
@@ -39,7 +39,6 @@ from common import (
39
  start_session,
40
  )
41
 
42
-
43
  app_name = os.getenv("GRADIO_APP")
44
  if app_name == "imageto3d_sam3d":
45
  _enable_pre_resize_default = False
@@ -51,6 +50,8 @@ elif app_name == "imageto3d":
51
  bg_rm_model_name = "rembg" # "rembg", "rmbg14"
52
 
53
  current_rmbg_tag = bg_rm_model_name
 
 
54
  def set_current_rmbg_tag(rmbg: str) -> None:
55
  global current_rmbg_tag
56
  current_rmbg_tag = rmbg
@@ -88,9 +89,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
88
  </p>
89
 
90
  🖼️ Generate physically plausible 3D asset from single input image.
91
- """.format(
92
- VERSION=VERSION
93
- ),
94
  elem_classes=["header"],
95
  )
96
  enable_pre_resize = gr.State(_enable_pre_resize_default)
@@ -114,11 +113,9 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
114
  height=400,
115
  elem_classes=["image_fit"],
116
  )
117
- gr.Markdown(
118
- """
119
  If you are not satisfied with the auto segmentation
120
- result, please switch to the `Image(SAM seg)` tab."""
121
- )
122
  with gr.Tab(
123
  label="Image(SAM seg)", id=1
124
  ) as samimage_input_tab:
@@ -355,7 +352,9 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
355
  )
356
 
357
  image_prompt.upload(
358
- lambda img, rmbg: preprocess_image_fn(img, rmbg, _enable_pre_resize_default),
 
 
359
  inputs=[image_prompt, rmbg_tag],
360
  outputs=[image_prompt, raw_image_cache],
361
  queue=False,
@@ -537,5 +536,4 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
537
 
538
 
539
  if __name__ == "__main__":
540
- # launch_demo()
541
  demo.launch()
 
39
  start_session,
40
  )
41
 
 
42
  app_name = os.getenv("GRADIO_APP")
43
  if app_name == "imageto3d_sam3d":
44
  _enable_pre_resize_default = False
 
50
  bg_rm_model_name = "rembg" # "rembg", "rmbg14"
51
 
52
  current_rmbg_tag = bg_rm_model_name
53
+
54
+
55
  def set_current_rmbg_tag(rmbg: str) -> None:
56
  global current_rmbg_tag
57
  current_rmbg_tag = rmbg
 
89
  </p>
90
 
91
  🖼️ Generate physically plausible 3D asset from single input image.
92
+ """.format(VERSION=VERSION),
 
 
93
  elem_classes=["header"],
94
  )
95
  enable_pre_resize = gr.State(_enable_pre_resize_default)
 
113
  height=400,
114
  elem_classes=["image_fit"],
115
  )
116
+ gr.Markdown("""
 
117
  If you are not satisfied with the auto segmentation
118
+ result, please switch to the `Image(SAM seg)` tab.""")
 
119
  with gr.Tab(
120
  label="Image(SAM seg)", id=1
121
  ) as samimage_input_tab:
 
352
  )
353
 
354
  image_prompt.upload(
355
+ lambda img, rmbg: preprocess_image_fn(
356
+ img, rmbg, _enable_pre_resize_default
357
+ ),
358
  inputs=[image_prompt, rmbg_tag],
359
  outputs=[image_prompt, raw_image_cache],
360
  queue=False,
 
536
 
537
 
538
  if __name__ == "__main__":
 
539
  demo.launch()
common.py CHANGED
@@ -16,8 +16,12 @@
16
 
17
  import spaces
18
  from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
 
19
  monkey_path_trellis()
20
- from embodied_gen.utils.monkey_patch.gradio import _patch_open3d_cuda_device_count_bug
 
 
 
21
  _patch_open3d_cuda_device_count_bug()
22
 
23
  import gc
@@ -92,7 +96,7 @@ if os.getenv("GRADIO_APP").startswith("imageto3d"):
92
  RBG14_REMOVER = BMGG14Remover()
93
  SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
94
  if "sam3d" in os.getenv("GRADIO_APP"):
95
- PIPELINE = Sam3dInference()
96
  else:
97
  PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
98
  "microsoft/TRELLIS-image-large"
@@ -110,7 +114,7 @@ elif os.getenv("GRADIO_APP").startswith("textto3d"):
110
  RBG_REMOVER = RembgRemover()
111
  RBG14_REMOVER = BMGG14Remover()
112
  if "sam3d" in os.getenv("GRADIO_APP"):
113
- PIPELINE = Sam3dInference()
114
  else:
115
  PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
116
  "microsoft/TRELLIS-image-large"
 
16
 
17
  import spaces
18
  from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
19
+
20
  monkey_path_trellis()
21
+ from embodied_gen.utils.monkey_patch.gradio import (
22
+ _patch_open3d_cuda_device_count_bug,
23
+ )
24
+
25
  _patch_open3d_cuda_device_count_bug()
26
 
27
  import gc
 
96
  RBG14_REMOVER = BMGG14Remover()
97
  SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
98
  if "sam3d" in os.getenv("GRADIO_APP"):
99
+ PIPELINE = Sam3dInference(device="cuda")
100
  else:
101
  PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
102
  "microsoft/TRELLIS-image-large"
 
114
  RBG_REMOVER = RembgRemover()
115
  RBG14_REMOVER = BMGG14Remover()
116
  if "sam3d" in os.getenv("GRADIO_APP"):
117
+ PIPELINE = Sam3dInference(device="cuda")
118
  else:
119
  PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
120
  "microsoft/TRELLIS-image-large"
embodied_gen/models/sam3d.py CHANGED
@@ -63,7 +63,10 @@ class Sam3dInference:
63
  """
64
 
65
  def __init__(
66
- self, local_dir: str = "weights/sam-3d-objects", compile: bool = False, device: str = "cuda",
 
 
 
67
  ) -> None:
68
  if not os.path.exists(local_dir):
69
  snapshot_download("tuandao-zenai/sam-3d-objects", local_dir=local_dir)
 
63
  """
64
 
65
  def __init__(
66
+ self,
67
+ local_dir: str = "weights/sam-3d-objects",
68
+ compile: bool = False,
69
+ device: str = "cuda",
70
  ) -> None:
71
  if not os.path.exists(local_dir):
72
  snapshot_download("tuandao-zenai/sam-3d-objects", local_dir=local_dir)
embodied_gen/utils/monkey_patch/gradio.py CHANGED
@@ -15,10 +15,12 @@
15
  # permissions and limitations under the License.
16
 
17
 
18
- import gradio_client.utils as gradio_client_utils
19
  import fileinput
20
  import site
21
 
 
 
 
22
  def _patch_gradio_schema_bool_bug() -> None:
23
  """Patch schema parser for bool-style for gradio<5.33."""
24
  original_get_type = gradio_client_utils.get_type
@@ -50,8 +52,7 @@ def _patch_open3d_cuda_device_count_bug() -> None:
50
  for line in file:
51
  print(
52
  line.replace(
53
- '_pybind_cuda.open3d_core_cuda_device_count()',
54
- '1'
55
  ),
56
- end=''
57
- )
 
15
  # permissions and limitations under the License.
16
 
17
 
 
18
  import fileinput
19
  import site
20
 
21
+ import gradio_client.utils as gradio_client_utils
22
+
23
+
24
  def _patch_gradio_schema_bool_bug() -> None:
25
  """Patch schema parser for bool-style for gradio<5.33."""
26
  original_get_type = gradio_client_utils.get_type
 
52
  for line in file:
53
  print(
54
  line.replace(
55
+ '_pybind_cuda.open3d_core_cuda_device_count()', '1'
 
56
  ),
57
+ end='',
58
+ )
embodied_gen/utils/monkey_patch/sam3d.py CHANGED
@@ -380,7 +380,7 @@ def monkey_patch_sam3d():
380
 
381
  InferencePipeline.__init__ = patch_init
382
 
383
- # patch_pointmap_infer_pipeline() # patch
384
- # patch_infer_init() # patch
385
 
386
  return
 
380
 
381
  InferencePipeline.__init__ = patch_init
382
 
383
+ patch_pointmap_infer_pipeline() # patch
384
+ patch_infer_init() # patch
385
 
386
  return
embodied_gen/utils/trender.py CHANGED
@@ -19,7 +19,6 @@ import sys
19
  from collections import defaultdict
20
 
21
  import numpy as np
22
- import spaces
23
  import torch
24
  from easydict import EasyDict as edict
25
  from tqdm import tqdm
@@ -43,7 +42,6 @@ __all__ = [
43
  ]
44
 
45
 
46
- # @spaces.GPU
47
  def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
48
  renderer = MeshRenderer()
49
  renderer.rendering_options.resolution = options.get("resolution", 512)
@@ -66,7 +64,6 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
66
  return rets
67
 
68
 
69
- # @spaces.GPU
70
  def render_gs_frames(
71
  sample,
72
  extrinsics,
@@ -117,7 +114,6 @@ def render_gs_frames(
117
  return dict(outputs)
118
 
119
 
120
- # @spaces.GPU
121
  def render_video(
122
  sample,
123
  resolution=512,
@@ -149,7 +145,6 @@ def render_video(
149
  return result
150
 
151
 
152
- # @spaces.GPU
153
  def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
154
  return {
155
  "gaussian": {
 
19
  from collections import defaultdict
20
 
21
  import numpy as np
 
22
  import torch
23
  from easydict import EasyDict as edict
24
  from tqdm import tqdm
 
42
  ]
43
 
44
 
 
45
  def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
46
  renderer = MeshRenderer()
47
  renderer.rendering_options.resolution = options.get("resolution", 512)
 
64
  return rets
65
 
66
 
 
67
  def render_gs_frames(
68
  sample,
69
  extrinsics,
 
114
  return dict(outputs)
115
 
116
 
 
117
  def render_video(
118
  sample,
119
  resolution=512,
 
145
  return result
146
 
147
 
 
148
  def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
149
  return {
150
  "gaussian": {
requirements.txt CHANGED
@@ -65,7 +65,7 @@ https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/pytorch3d-0.
65
  # git+https://github.com/nerfstudio-project/gsplat.git@v1.5.3
66
  https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
67
  # flash-attn==2.7.0.post2
68
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
69
  # kaolin@git+https://github.com/NVIDIAGameWorks/kaolin.git@v0.16.0
70
  https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl
71
  # nvdiffrast@git+https://github.com/NVlabs/nvdiffrast.git#egg=729261d
 
65
  # git+https://github.com/nerfstudio-project/gsplat.git@v1.5.3
66
  https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
67
  # flash-attn==2.7.0.post2
68
+ # https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
69
  # kaolin@git+https://github.com/NVIDIAGameWorks/kaolin.git@v0.16.0
70
  https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl
71
  # nvdiffrast@git+https://github.com/NVlabs/nvdiffrast.git#egg=729261d