Merge branch 'main' into hifa

threestudio-project · Dec 30, 2023 · 9334e56 · 9334e56
2 parents 70ed2a9 + 8ce432d
commit 9334e56
Show file tree

Hide file tree

Showing 28 changed files with 821 additions and 92 deletions.
diff --git a/.gitignore b/.gitignore
@@ -188,4 +188,9 @@ outputs-gradio/
 # wandb
 wandb/
 
+# vscode
+.code-workspace
+
+custom/*
+
 load/tets/256_tets.npz
diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 | <a href="https://zero123.cs.columbia.edu/">Zero-1-to-3</a> | <a href="https://guochengqian.github.io/project/magic123/">Magic123</a> |
 <br />
 | <a href="https://instruct-nerf2nerf.github.io/">InstructNeRF2NeRF</a> | <a href="https://control4darxiv.github.io/">Control4D</a> |
-</b></p>
+</b>
 
 <p align="center">
   <a href="https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb">
@@ -43,22 +43,40 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 </p>
 
 <p align="center">
-    Did not find what you want? Submit a feature request or upvote others' requests <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
+    Did not find what you want? Checkout <a href="https://threestudio-project.github.io/threestudio-extensions/"><b>threestudio-extension</b></a> or submit a feature request <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
 </p>
 
-## News
+<p align="center">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/ac6089a7-d88f-414c-96d6-a5e75616115a" width="68%">
+</p>
+<p align="center">
+
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/302a399e-d36f-453e-a595-1c7d120451d3" width="35%">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/025e6980-baf2-4b5f-9c23-4f66ef847bf5" width="35%">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/f04b6bdd-ef02-4ce7-b7c9-981f8bda419f" width="35%">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/8892898f-8bd8-43dc-a4ec-dd8d078af860" width="50%">
+</p>
+<p align="center"><b>
+| <a href="https://github.com/HeliosZhao/Animate124/tree/threestudio">Animate-124</a> | <a href="https://github.com/DSaurus/threestudio-4dfy">4D-fy</a> | <a href="https://github.com/DSaurus/threestudio-dreamcraft3D">DreamCraft3D</a> | <a href="https://github.com/DSaurus/threestudio-3dgs">Gaussian Splatting</a> | <a href="https://github.com/DSaurus/threestudio-mvdream">MVDream</a> | <a href="https://github.com/DSaurus/threestudio-meshfitting">Mesh-Fitting</a> |
+</b>
 
-- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
-- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
-- 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
-- 06/20/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try.
-- 06/14/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try.
-- 06/14/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try.
-- 05/29/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try.
-- 05/26/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try.
-- 05/14/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb).
-- 05/13/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)!
-- 05/11/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions.
+## News
+- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Animate-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
+- 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
+- 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
+- 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
+- 25/06/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
+- 06/07/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
+- 03/07/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
+- 20/06/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try.
+- 14/06/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try.
+- 14/06/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try.
+- 29/05/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try.
+- 26/05/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try.
+- 14/05/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb).
+- 13/05/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)!
+- 11/05/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions.
 
 ![export-blender](https://github.com/threestudio-project/threestudio/assets/19284678/ccae2820-e702-484c-a43f-81678a365427)
 
@@ -102,6 +120,8 @@ pip install ninja
 pip install -r requirements.txt
 ```
 
+- (Optional) `tiny-cuda-nn` installation might require downgrading pip to 23.0.1
+
 - (Optional, Recommended) The best-performing models in threestudio use the newly-released T2I model [DeepFloyd IF](https://github.com/deep-floyd/IF), which currently requires signing a license agreement. If you would like to use these models, you need to [accept the license on the model card of DeepFloyd IF](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0), and login into the Hugging Face hub in the terminal by `huggingface-cli login`.
 
 - For contributors, see [here](https://github.com/threestudio-project/threestudio#contributing-to-threestudio).
@@ -423,7 +443,7 @@ https://github.com/threestudio-project/threestudio/assets/19284678/72217cdd-765a
 
 - Most of the settings are the same as the DreamFusion model. Please refer to the notable differences of the DreamFusion model.
 - We use NeuS as the geometry representation while the original paper uses VolSDF.
-- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stablize normal computation when using hash grids.
+- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stabilize normal computation when using hash grids.
 - We currently only implemented the coarse stage of TextMesh.
 
 **Example running commands**
@@ -511,6 +531,34 @@ python launch.py --config configs/magic123-refine-sd.yaml --train --gpu 0 data.i
 
 - If the image contains non-front-facing objects, specifying the approximate elevation and azimuth angle by setting `data.default_elevation_deg` and `data.default_azimuth_deg` can be helpful. In threestudio, top is elevation +90 and bottom is elevation -90; left is azimuth -90 and right is azimuth +90.
 
+
+### Stable Zero123
+
+**Installation**
+
+Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/zero123` from https://huggingface.co/stabilityai/stable-zero123
+
+**Results obtained by threestudio (Stable Zero123 vs Zero123-XL)**
+![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7)
+
+**Direct multi-view images generation**
+If you only want to generate multi-view images, please refer to [threestudio-mvimg-gen](https://github.com/DSaurus/threestudio-mvimg-gen). This extension can use Stable Zero123 to directly generate images from multi-view perspectives.
+
+**Example running commands**
+
+1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog"
+2. Remove its background using Clipdrop (https://clipdrop.co/remove-background)
+3. Save to `load/images/`, preferably with `_rgba.png` as the suffix
+4. Run Zero-1-to-3 with the Stable Zero123 ckpt:
+```sh
+python launch.py --config configs/stable-zero123.yaml --train --gpu 0 data.image_path=./load/images/hamburger_rgba.png
+```
+
+**IMPORTANT NOTE: This is an experimental implementation and we're constantly improving the quality.**
+
+**IMPORTANT NOTE: This implementation extends the Zero-1-to-3 implementation below, and is heavily inspired from the Zero-1-to-3 implementation in [https://github.com/ashawkey/stable-dreamfusion](stable-dreamfusion)! `extern/ldm_zero123` is borrowed from `stable-dreamfusion/ldm`.**
+
+
 ### Zero-1-to-3 [![arXiv](https://img.shields.io/badge/arXiv-2303.11328-b31b1b.svg?style=flat-square)](https://arxiv.org/abs/2303.11328)
 
 **Installation**

diff --git a/configs/zero123_64.yaml → configs/stable-zero123.yaml b/configs/zero123_64.yaml → configs/stable-zero123.yaml
@@ -1,24 +1,25 @@
-name: "zero123"
-tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+name: "zero123-sai"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
 exp_root_dir: "outputs"
 seed: 0
 
 data_type: "single-image-datamodule"
 data: # threestudio/data/image.py -> SingleImageDataModuleConfig
   image_path: ./load/images/hamburger_rgba.png
-  height: 128
-  width: 128
-  default_elevation_deg: 0.0
+  height: [128, 256, 512]
+  width: [128, 256, 512]
+  resolution_milestones: [200, 300]
+  default_elevation_deg: 5.0
   default_azimuth_deg: 0.0
   default_camera_distance: 3.8
   default_fovy_deg: 20.0
   requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}}
   requires_normal: ${cmaxgt0:${system.loss.lambda_normal}}
   random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
-    height: 64
-    width: 64
-    batch_size: 12
-    resolution_milestones: []
+    height: [64, 128, 256]
+    width: [64, 128, 256]
+    batch_size: [12, 8, 4]
+    resolution_milestones: [200, 300]
     eval_height: 512
     eval_width: 512
     eval_batch_size: 1
@@ -47,13 +48,6 @@ system:
     radius: 2.0
     normal_type: "analytic"
 
-    # the density initialization proposed in the DreamFusion paper
-    # does not work very well
-    # density_bias: "blob_dreamfusion"
-    # density_activation: exp
-    # density_blob_scale: 5.
-    # density_blob_std: 0.2
-
     # use Magic3D density initialization instead
     density_bias: "blob_magic3d"
     density_activation: softplus
@@ -88,28 +82,26 @@ system:
   renderer:
     radius: ${system.geometry.radius}
     num_samples_per_ray: 512
-    return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}}
-    return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}}
+    return_comp_normal: ${cmaxgt0:${system.loss.lambda_normal_smooth}}
+    return_normal_perturb: ${cmaxgt0:${system.loss.lambda_3d_normal_smooth}}
 
   prompt_processor_type: "dummy-prompt-processor" # Zero123 doesn't use prompts
   prompt_processor:
     pretrained_model_name_or_path: ""
     prompt: ""
 
-  guidance_type: "zero123-guidance"
+  guidance_type: "stable-zero123-guidance"
   guidance:
-    pretrained_model_name_or_path: "./load/zero123/zero123-xl.ckpt"
     pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
+    pretrained_model_name_or_path: "./load/zero123/stable_zero123.ckpt"
     vram_O: ${not:${gt0:${system.freq.guidance_eval}}}
     cond_image_path: ${data.image_path}
     cond_elevation_deg: ${data.default_elevation_deg}
     cond_azimuth_deg: ${data.default_azimuth_deg}
     cond_camera_distance: ${data.default_camera_distance}
     guidance_scale: 3.0
-    #min_step_percent: 0.02
-    min_step_percent: [0, 0.4, 0.2, 200]  # (start_iter, start_val, end_val, end_iter)
-    #max_step_percent: 0.98
-    max_step_percent: [0, 0.85, 0.5, 200]
+    min_step_percent: [50, 0.7, 0.3, 200]  # (start_iter, start_val, end_val, end_iter)
+    max_step_percent: [50, 0.98, 0.8, 200]
 
   freq:
     ref_only_steps: 0
@@ -123,16 +115,16 @@ system:
 
   loss:
     lambda_sds: 0.1
-    lambda_rgb: 500.
+    lambda_rgb: [100, 500., 1000., 400]
     lambda_mask: 50.
     lambda_depth: 0. # 0.05
     lambda_depth_rel: 0. # [0, 0, 0.05, 100]
     lambda_normal: 0. # [0, 0, 0.05, 100]
-    lambda_normal_smooth: 10.0
-    lambda_3d_normal_smooth: 10.0
+    lambda_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200]
+    lambda_3d_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200]
     lambda_orient: 1.0
-    lambda_sparsity: 0.1 # should be tweaked for every model
-    lambda_opaque: 0.1
+    lambda_sparsity: 0.5 # should be tweaked for every model
+    lambda_opaque: 0.5
 
   optimizer:
     name: Adam
@@ -142,14 +134,14 @@ system:
       eps: 1.e-8
 
 trainer:
-  max_steps: 400
+  max_steps: 600
   log_every_n_steps: 1
   num_sanity_val_steps: 0
   val_check_interval: 100
   enable_progress_bar: true
-  precision: 16-mixed
+  precision: 32
 
 checkpoint:
   save_last: true # save at each validation time
   save_top_k: -1
-  every_n_train_steps: ${trainer.max_steps}
+  every_n_train_steps: 100 # ${trainer.max_steps}
diff --git a/configs/zero123.yaml b/configs/zero123.yaml
@@ -1,5 +1,5 @@
 name: "zero123"
-tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
 exp_root_dir: "outputs"
 seed: 0
 
@@ -9,7 +9,7 @@ data: # threestudio/data/image.py -> SingleImageDataModuleConfig
   height: [128, 256, 512]
   width: [128, 256, 512]
   resolution_milestones: [200, 300]
-  default_elevation_deg: 0.0
+  default_elevation_deg: 5.0
   default_azimuth_deg: 0.0
   default_camera_distance: 3.8
   default_fovy_deg: 20.0
@@ -111,9 +111,7 @@ system:
     cond_azimuth_deg: ${data.default_azimuth_deg}
     cond_camera_distance: ${data.default_camera_distance}
     guidance_scale: 3.0
-    #min_step_percent: 0.02
     min_step_percent: [0, 0.4, 0.2, 200]  # (start_iter, start_val, end_val, end_iter)
-    #max_step_percent: 0.98
     max_step_percent: [0, 0.85, 0.5, 200]
 
   freq:
@@ -147,7 +145,7 @@ system:
       eps: 1.e-8
 
 trainer:
-  max_steps: 400
+  max_steps: 600
   log_every_n_steps: 1
   num_sanity_val_steps: 0
   val_check_interval: 100

diff --git a/custom/put_custom_extensions_here b/custom/put_custom_extensions_here
diff --git a/gradio_app.py b/gradio_app.py
@@ -201,7 +201,7 @@ def run(
 
     # manually assign the output directory, name and tag so that we know the trial directory
     name = os.path.basename(model_config[model_name]["path"]).split(".")[0]
-    tag = datetime.now().strftime("@%Y%m%d-%H%M%S")
+    tag = datetime.now().strftime("%Y%m%d-%H%M%S")
     trial_dir = os.path.join(save_root, EXP_ROOT_DIR, name, tag)
     alive_path = os.path.join(trial_dir, "alive")
 
@@ -441,6 +441,7 @@ def launch(
                 run_btn,
                 stop_btn,
             ],
+            concurrency_limit=1,
         )
         stop_btn.click(
             fn=stop_run,
@@ -453,7 +454,7 @@ def launch(
     launch_args = {"server_port": port}
     if listen:
         launch_args["server_name"] = "0.0.0.0"
-    demo.queue(concurrency_count=1).launch(**launch_args)
+    demo.queue().launch(**launch_args)
 
 
 def watch(