Akash/resolution in yaml (#22)

akashkgarg · web-flow · commit b99e2106b072 · 2025-03-19T12:18:12.000-07:00
diff --git a/README.md b/README.md
@@ -65,13 +65,17 @@ python3 -m cube3d.generate \
             --prompt "sleek vintage green couch with clean lines and velvet material"
 ```
 
+> **Note**: `--fast-inference` is optional and may not be available for all GPU that have limited VRAM. This flag will also not work on MacOS. 
+
 The output will be an `.obj` file saved in the specified `output` directory.
 
 If you want to render a turntable gif of the mesh, you can use the `--render-gif` flag, which will render a turntable gif of the mesh
 and save it as `turntable.gif` in the specified `output` directory.
 
 > **Note**: You must have Blender installed and available in your system's PATH to render the turntable GIF. You can download it from [Blender's official website](https://www.blender.org/). Ensure that the Blender executable is accessible from the command line.
 
+> **Note**: If shape decoding is slow, you can try try to specify a lower resolution using the `--resolution-base` flag. A lower resolution will create a coarser and lower quality output mesh but faster decoding. Values between 4.0 and 9.0 are recommended.
+
 #### 2. Shape Tokenization and De-tokenization
 
 To tokenize a 3D shape into token indices and reconstruct it back, you can use the following command:
diff --git a/cube3d/generate.py b/cube3d/generate.py
@@ -17,8 +17,15 @@
 logging.basicConfig(level=logging.INFO)
 
 
-def generate_mesh(engine, prompt, output_dir, output_name, disable_postprocess=False):
-    mesh_v_f = engine.t2s([prompt], use_kv_cache=True)
+def generate_mesh(
+    engine,
+    prompt,
+    output_dir,
+    output_name,
+    resolution_base=8.0,
+    disable_postprocess=False,
+):
+    mesh_v_f = engine.t2s([prompt], use_kv_cache=True, resolution_base=resolution_base)
     vertices, faces = mesh_v_f[0][0], mesh_v_f[0][1]
     obj_path = os.path.join(output_dir, f"{output_name}.obj")
     if PYMESHLAB_AVAILABLE:
@@ -89,6 +96,12 @@ def generate_mesh(engine, prompt, output_dir, output_name, disable_postprocess=F
         default=False,
         action="store_true",
     )
+    parser.add_argument(
+        "--resolution-base",
+        type=float,
+        default=8.0,
+        help="Resolution base for the shape decoder.",
+    )
     args = parser.parse_args()
     os.makedirs(args.output_dir, exist_ok=True)
     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
@@ -110,7 +123,12 @@ def generate_mesh(engine, prompt, output_dir, output_name, disable_postprocess=F
 
     # Generate meshes based on input source
     obj_path = generate_mesh(
-        engine, args.prompt, args.output_dir, "output", args.disable_postprocessing
+        engine,
+        args.prompt,
+        args.output_dir,
+        "output",
+        args.resolution_base,
+        args.disable_postprocessing,
     )
     if args.render_gif:
         gif_path = renderer.render_turntable(obj_path, args.output_dir)