From 477345befc45ea66248c567accd466ae82afbf70 Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Mon, 27 Nov 2023 21:48:43 +0800
Subject: [PATCH 01/24] fix elevation bug

---
 threestudio/data/uncond.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index d3b67b7d..ee8df2e7 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -151,18 +151,16 @@ def collate(self, batch) -> Dict[str, Any]:
         else:
             # otherwise sample uniformly on sphere
             elevation_range_percent = [
-                (self.elevation_range[0] + 90.0) / 180.0,
-                (self.elevation_range[1] + 90.0) / 180.0,
+                self.elevation_range[0] / 180.0 * math.pi,
+                self.elevation_range[1] / 180.0 * math.pi,
             ]
             # inverse transform sampling
             elevation = torch.asin(
-                2
-                * (
+                (
                     torch.rand(self.batch_size)
-                    * (elevation_range_percent[1] - elevation_range_percent[0])
-                    + elevation_range_percent[0]
+                    * (math.sin(elevation_range_percent[1]) - math.sin(elevation_range_percent[0]) )
+                    + math.sin(elevation_range_percent[0] )
                 )
-                - 1.0
             )
             elevation_deg = elevation / math.pi * 180.0
 

From 9b76296bf7be1bfa8fa965d8b9fb4c4fc017cb4f Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Mon, 27 Nov 2023 21:52:09 +0800
Subject: [PATCH 02/24] fix format

---
 threestudio/data/uncond.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index ee8df2e7..8316325c 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -158,8 +158,11 @@ def collate(self, batch) -> Dict[str, Any]:
             elevation = torch.asin(
                 (
                     torch.rand(self.batch_size)
-                    * (math.sin(elevation_range_percent[1]) - math.sin(elevation_range_percent[0]) )
-                    + math.sin(elevation_range_percent[0] )
+                    * (
+                        math.sin(elevation_range_percent[1])
+                        - math.sin(elevation_range_percent[0])
+                    )
+                    + math.sin(elevation_range_percent[0])
                 )
             )
             elevation_deg = elevation / math.pi * 180.0

From 503140385940b1d4d3a528c0cc4494a52f198eff Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 01:51:30 +0800
Subject: [PATCH 03/24] Enable Gaussian Splatting and Custom Extension (#344)

* add gaussian wip

* clean up

* add refine

* gsgen baseline w/o point-e

* upd config

* rm KNN

* adjust parameters

* max_num, fix color

* import lib

* background device

* update config

* update config

* clean gausisan splatting

* fix format

* update extensions

* fix memory bug

* prepare for extensions

* clean gaussian

* clean gaussian

* clean

* fix bugs

---------

Co-authored-by: Linyou <linyoutian.loyot@gmail.com>
---
 .gitignore                                    |  2 +
 README.md                                     |  4 +-
 launch.py                                     | 64 ++++++++++++++++++
 threestudio/data/uncond.py                    | 20 ++++--
 .../background/solid_color_background.py      |  7 +-
 threestudio/utils/loss.py                     | 16 +++++
 threestudio/utils/ops.py                      | 65 +++++++++++++++++++
 7 files changed, 167 insertions(+), 11 deletions(-)
 create mode 100644 threestudio/utils/loss.py

diff --git a/.gitignore b/.gitignore
index 12adb415..0bf85006 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,4 +188,6 @@ outputs-gradio/
 # wandb
 wandb/
 
+custom/*
+
 load/tets/256_tets.npz
diff --git a/README.md b/README.md
index 52abcde4..15da0e4c 100644
--- a/README.md
+++ b/README.md
@@ -44,8 +44,10 @@ threestudio is a unified framework for 3D content creation from text prompts, si
     Did not find what you want? Submit a feature request or upvote others' requests <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
 </p>
 
-## News
 
+## News
+- 30/11/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
 - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
 - 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
 - 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
diff --git a/launch.py b/launch.py
index 72add725..d24940af 100644
--- a/launch.py
+++ b/launch.py
@@ -1,8 +1,11 @@
 import argparse
 import contextlib
+import importlib
 import logging
 import os
 import sys
+import time
+import traceback
 
 
 class ColoredFilter(logging.Filter):
@@ -39,6 +42,65 @@ def filter(self, record):
         return True
 
 
+def load_custom_module(module_path):
+    module_name = os.path.basename(module_path)
+    if os.path.isfile(module_path):
+        sp = os.path.splitext(module_path)
+        module_name = sp[0]
+    try:
+        if os.path.isfile(module_path):
+            module_spec = importlib.util.spec_from_file_location(
+                module_name, module_path
+            )
+        else:
+            module_spec = importlib.util.spec_from_file_location(
+                module_name, os.path.join(module_path, "__init__.py")
+            )
+
+        module = importlib.util.module_from_spec(module_spec)
+        sys.modules[module_name] = module
+        module_spec.loader.exec_module(module)
+        return True
+    except Exception as e:
+        print(traceback.format_exc())
+        print(f"Cannot import {module_path} module for custom nodes:", e)
+        return False
+
+
+def load_custom_modules():
+    node_paths = ["custom"]
+    node_import_times = []
+    for custom_node_path in node_paths:
+        possible_modules = os.listdir(custom_node_path)
+        if "__pycache__" in possible_modules:
+            possible_modules.remove("__pycache__")
+
+        for possible_module in possible_modules:
+            module_path = os.path.join(custom_node_path, possible_module)
+            if (
+                os.path.isfile(module_path)
+                and os.path.splitext(module_path)[1] != ".py"
+            ):
+                continue
+            if module_path.endswith(".disabled"):
+                continue
+            time_before = time.perf_counter()
+            success = load_custom_module(module_path)
+            node_import_times.append(
+                (time.perf_counter() - time_before, module_path, success)
+            )
+
+    if len(node_import_times) > 0:
+        print("\nImport times for custom modules:")
+        for n in sorted(node_import_times):
+            if n[2]:
+                import_message = ""
+            else:
+                import_message = " (IMPORT FAILED)"
+            print("{:6.1f} seconds{}:".format(n[0], import_message), n[1])
+        print()
+
+
 def main(args, extras) -> None:
     # set CUDA_VISIBLE_DEVICES if needed, then import pytorch-lightning
     os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
@@ -94,6 +156,8 @@ def main(args, extras) -> None:
             else:
                 handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
 
+    load_custom_modules()
+
     # parse YAML config to OmegaConf
     cfg: ExperimentConfig
     cfg = load_config(args.config, cli_args=extras, n_gpus=n_gpus)
diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index 8316325c..d051e3fd 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -3,6 +3,7 @@
 import random
 from dataclasses import dataclass, field
 
+import numpy as np
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
@@ -14,6 +15,7 @@
 from threestudio.utils.config import parse_structured
 from threestudio.utils.misc import get_device
 from threestudio.utils.ops import (
+    get_full_projection_matrix,
     get_mvp_matrix,
     get_projection_matrix,
     get_ray_directions,
@@ -315,10 +317,11 @@ def collate(self, batch) -> Dict[str, Any]:
         # Importance note: the returned rays_d MUST be normalized!
         rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
 
-        proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
-            fovy, self.width / self.height, 0.1, 1000.0
+        self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
+            fovy, self.width / self.height, 0.01, 100.0
         )  # FIXME: hard-coded near and far
-        mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, proj_mtx)
+        mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, self.proj_mtx)
+        self.fovy = fovy
 
         return {
             "rays_o": rays_o,
@@ -332,6 +335,8 @@ def collate(self, batch) -> Dict[str, Any]:
             "camera_distances": camera_distances,
             "height": self.height,
             "width": self.width,
+            "fovy": self.fovy,
+            "proj_mtx": self.proj_mtx,
         }
 
 
@@ -414,10 +419,10 @@ def __init__(self, cfg: Any, split: str) -> None:
         )
 
         rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
-        proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
-            fovy, self.cfg.eval_width / self.cfg.eval_height, 0.1, 1000.0
+        self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
+            fovy, self.cfg.eval_width / self.cfg.eval_height, 0.01, 100.0
         )  # FIXME: hard-coded near and far
-        mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, proj_mtx)
+        mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, self.proj_mtx)
 
         self.rays_o, self.rays_d = rays_o, rays_d
         self.mvp_mtx = mvp_mtx
@@ -427,6 +432,7 @@ def __init__(self, cfg: Any, split: str) -> None:
         self.elevation, self.azimuth = elevation, azimuth
         self.elevation_deg, self.azimuth_deg = elevation_deg, azimuth_deg
         self.camera_distances = camera_distances
+        self.fovy = fovy
 
     def __len__(self):
         return self.n_views
@@ -445,6 +451,8 @@ def __getitem__(self, index):
             "camera_distances": self.camera_distances[index],
             "height": self.cfg.eval_height,
             "width": self.cfg.eval_width,
+            "fovy": self.fovy[index],
+            "proj_mtx": self.proj_mtx[index],
         }
 
     def collate(self, batch):
diff --git a/threestudio/models/background/solid_color_background.py b/threestudio/models/background/solid_color_background.py
index 0763a0c5..0b68d5b4 100644
--- a/threestudio/models/background/solid_color_background.py
+++ b/threestudio/models/background/solid_color_background.py
@@ -34,10 +34,9 @@ def configure(self) -> None:
             )
 
     def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
-        color = (
-            torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs)
-            * self.env_color
-        )
+        color = torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(
+            dirs
+        ) * self.env_color.to(dirs)
         if (
             self.training
             and self.cfg.random_aug
diff --git a/threestudio/utils/loss.py b/threestudio/utils/loss.py
new file mode 100644
index 00000000..eb0c7250
--- /dev/null
+++ b/threestudio/utils/loss.py
@@ -0,0 +1,16 @@
+import torch
+
+
+def _tensor_size(t):
+    return t.size()[1] * t.size()[2] * t.size()[3]
+
+
+def tv_loss(x):
+    batch_size = x.size()[0]
+    h_x = x.size()[2]
+    w_x = x.size()[3]
+    count_h = _tensor_size(x[:, :, 1:, :])
+    count_w = _tensor_size(x[:, :, :, 1:])
+    h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, : h_x - 1, :]), 2).sum()
+    w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, : w_x - 1]), 2).sum()
+    return 2 * (h_tv / count_h + w_tv / count_w) / batch_size
diff --git a/threestudio/utils/ops.py b/threestudio/utils/ops.py
index 320fa46a..b35d3cd0 100644
--- a/threestudio/utils/ops.py
+++ b/threestudio/utils/ops.py
@@ -1,3 +1,4 @@
+import math
 from collections import defaultdict
 
 import numpy as np
@@ -292,6 +293,70 @@ def get_mvp_matrix(
     return mvp_mtx
 
 
+def get_full_projection_matrix(
+    c2w: Float[Tensor, "B 4 4"], proj_mtx: Float[Tensor, "B 4 4"]
+) -> Float[Tensor, "B 4 4"]:
+    return (c2w.unsqueeze(0).bmm(proj_mtx.unsqueeze(0))).squeeze(0)
+
+
+# gaussian splatting functions
+def convert_pose(C2W):
+    flip_yz = torch.eye(4, device=C2W.device)
+    flip_yz[1, 1] = -1
+    flip_yz[2, 2] = -1
+    C2W = torch.matmul(C2W, flip_yz)
+    return C2W
+
+
+def get_projection_matrix_gaussian(znear, zfar, fovX, fovY, device="cuda"):
+    tanHalfFovY = math.tan((fovY / 2))
+    tanHalfFovX = math.tan((fovX / 2))
+
+    top = tanHalfFovY * znear
+    bottom = -top
+    right = tanHalfFovX * znear
+    left = -right
+
+    P = torch.zeros(4, 4, device=device)
+
+    z_sign = 1.0
+
+    P[0, 0] = 2.0 * znear / (right - left)
+    P[1, 1] = 2.0 * znear / (top - bottom)
+    P[0, 2] = (right + left) / (right - left)
+    P[1, 2] = (top + bottom) / (top - bottom)
+    P[3, 2] = z_sign
+    P[2, 2] = z_sign * zfar / (zfar - znear)
+    P[2, 3] = -(zfar * znear) / (zfar - znear)
+    return P
+
+
+def get_fov_gaussian(P):
+    tanHalfFovX = 1 / P[0, 0]
+    tanHalfFovY = 1 / P[1, 1]
+    fovY = math.atan(tanHalfFovY) * 2
+    fovX = math.atan(tanHalfFovX) * 2
+    return fovX, fovY
+
+
+def get_cam_info_gaussian(c2w, fovx, fovy, znear, zfar):
+    c2w = convert_pose(c2w)
+    world_view_transform = torch.inverse(c2w)
+
+    world_view_transform = world_view_transform.transpose(0, 1).cuda().float()
+    projection_matrix = (
+        get_projection_matrix_gaussian(znear=znear, zfar=zfar, fovX=fovx, fovY=fovy)
+        .transpose(0, 1)
+        .cuda()
+    )
+    full_proj_transform = (
+        world_view_transform.unsqueeze(0).bmm(projection_matrix.unsqueeze(0))
+    ).squeeze(0)
+    camera_center = world_view_transform.inverse()[3, :3]
+
+    return world_view_transform, full_proj_transform, camera_center
+
+
 def binary_cross_entropy(input, target):
     """
     F.binary_cross_entropy is not numerically stable in mixed-precision training.

From 692968736ace7b9902bba4b0fc218e2bae965ef6 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 03:22:42 +0800
Subject: [PATCH 04/24] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 15da0e4c..71505dc5 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,8 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 
 
 ## News
-- 30/11/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
-- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
+- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
 - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
 - 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
 - 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).

From cfabde68d89c96975bf0a230b85955d3ac143a2d Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 03:56:32 +0800
Subject: [PATCH 05/24] add custom folder (#348)

---
 custom/put_custom_extensions_here | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 custom/put_custom_extensions_here

diff --git a/custom/put_custom_extensions_here b/custom/put_custom_extensions_here
new file mode 100644
index 00000000..e69de29b

From 7ce2f499e8459f920f122c694796991fd0b6f88a Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 06:00:36 +0800
Subject: [PATCH 06/24] Update README.md

---
 README.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 71505dc5..cfec4bac 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 | <a href="https://zero123.cs.columbia.edu/">Zero-1-to-3</a> | <a href="https://guochengqian.github.io/project/magic123/">Magic123</a> |
 <br />
 | <a href="https://instruct-nerf2nerf.github.io/">InstructNeRF2NeRF</a> | <a href="https://control4darxiv.github.io/">Control4D</a> |
-</b></p>
+</b>
 
 <p align="center">
   <a href="https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb">
@@ -41,9 +41,15 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 </p>
 
 <p align="center">
-    Did not find what you want? Submit a feature request or upvote others' requests <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
+    Did not find what you want? Checkout <b>threestudio-extension</b> or submit a feature request <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
 </p>
 
+<p align="center">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/ac6089a7-d88f-414c-96d6-a5e75616115a" width="68%">
+</p>
+<p align="center">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/8892898f-8bd8-43dc-a4ec-dd8d078af860" width="50%">
+</p>
 
 ## News
 - 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).

From eaadd2b5b813e997f06d8656d1d7854c6f2aca96 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 06:36:16 +0800
Subject: [PATCH 07/24] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cfec4bac..018200e3 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 </p>
 
 <p align="center">
-    Did not find what you want? Checkout <b>threestudio-extension</b> or submit a feature request <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
+    Did not find what you want? Checkout <a href="https://threestudio-project.github.io/threestudio-extensions/"><b>threestudio-extension</b></a> or submit a feature request <a href="https://github.com/threestudio-project/threestudio/discussions/46">here</a>!
 </p>
 
 <p align="center">

From 2c202276747a892cfc1ded8e27a005715be8f5f2 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Mon, 4 Dec 2023 22:00:04 +0800
Subject: [PATCH 08/24] ray direction normalize (#351)

* add rays normalization setting
---
 .gitignore                    |  3 +++
 threestudio/data/image.py     |  8 +++++++-
 threestudio/data/multiview.py | 12 ++++++++++--
 threestudio/data/uncond.py    | 10 ++++++++--
 threestudio/utils/ops.py      |  4 +++-
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0bf85006..b774bf79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,6 +188,9 @@ outputs-gradio/
 # wandb
 wandb/
 
+# vscode
+.code-workspace
+
 custom/*
 
 load/tets/256_tets.npz
diff --git a/threestudio/data/image.py b/threestudio/data/image.py
index 5c60b53b..fe7c227e 100644
--- a/threestudio/data/image.py
+++ b/threestudio/data/image.py
@@ -48,6 +48,8 @@ class SingleImageDataModuleConfig:
     requires_depth: bool = False
     requires_normal: bool = False
 
+    rays_d_normalize: bool = True
+
 
 class SingleImageDataBase:
     def setup(self, cfg, split):
@@ -141,7 +143,11 @@ def set_rays(self):
         directions[:, :, :, :2] = directions[:, :, :, :2] / self.focal_length
 
         rays_o, rays_d = get_rays(
-            directions, self.c2w, keepdim=True, noise_scale=self.cfg.rays_noise_scale
+            directions,
+            self.c2w,
+            keepdim=True,
+            noise_scale=self.cfg.rays_noise_scale,
+            normalize=self.cfg.rays_d_normalize,
         )
 
         proj_mtx: Float[Tensor, "4 4"] = get_projection_matrix(
diff --git a/threestudio/data/multiview.py b/threestudio/data/multiview.py
index e127390a..8b722624 100644
--- a/threestudio/data/multiview.py
+++ b/threestudio/data/multiview.py
@@ -70,6 +70,8 @@ class MultiviewsDataModuleConfig:
     camera_distance: float = -1
     eval_interpolation: Optional[Tuple[int, int, int]] = None  # (0, 1, 30)
 
+    rays_d_normalize: bool = True
+
 
 class MultiviewIterableDataset(IterableDataset):
     def __init__(self, cfg: Any) -> None:
@@ -164,7 +166,10 @@ def __init__(self, cfg: Any) -> None:
         self.frames_img: Float[Tensor, "B H W 3"] = torch.stack(frames_img, dim=0)
 
         self.rays_o, self.rays_d = get_rays(
-            self.frames_direction, self.frames_c2w, keepdim=True
+            self.frames_direction,
+            self.frames_c2w,
+            keepdim=True,
+            normalize=self.cfg.rays_d_normalize,
         )
         self.mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(
             self.frames_c2w, self.frames_proj
@@ -344,7 +349,10 @@ def __init__(self, cfg: Any, split: str) -> None:
         self.frames_img: Float[Tensor, "B H W 3"] = torch.stack(frames_img, dim=0)
 
         self.rays_o, self.rays_d = get_rays(
-            self.frames_direction, self.frames_c2w, keepdim=True
+            self.frames_direction,
+            self.frames_c2w,
+            keepdim=True,
+            normalize=self.cfg.rays_d_normalize,
         )
         self.mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(
             self.frames_c2w, self.frames_proj
diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index d051e3fd..999ba55c 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -56,6 +56,8 @@ class RandomCameraDataModuleConfig:
     batch_uniform_azimuth: bool = True
     progressive_until: int = 0  # progressive ranges for elevation, azimuth, r, fovy
 
+    rays_d_normalize: bool = True
+
 
 class RandomCameraIterableDataset(IterableDataset, Updateable):
     def __init__(self, cfg: Any) -> None:
@@ -315,7 +317,9 @@ def collate(self, batch) -> Dict[str, Any]:
         )
 
         # Importance note: the returned rays_d MUST be normalized!
-        rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
+        rays_o, rays_d = get_rays(
+            directions, c2w, keepdim=True, normalize=self.cfg.rays_d_normalize
+        )
 
         self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
             fovy, self.width / self.height, 0.01, 100.0
@@ -418,7 +422,9 @@ def __init__(self, cfg: Any, split: str) -> None:
             directions[:, :, :, :2] / focal_length[:, None, None, None]
         )
 
-        rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
+        rays_o, rays_d = get_rays(
+            directions, c2w, keepdim=True, normalize=self.cfg.rays_d_normalize
+        )
         self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
             fovy, self.cfg.eval_width / self.cfg.eval_height, 0.01, 100.0
         )  # FIXME: hard-coded near and far
diff --git a/threestudio/utils/ops.py b/threestudio/utils/ops.py
index b35d3cd0..81d5b599 100644
--- a/threestudio/utils/ops.py
+++ b/threestudio/utils/ops.py
@@ -222,6 +222,7 @@ def get_rays(
     c2w: Float[Tensor, "... 4 4"],
     keepdim=False,
     noise_scale=0.0,
+    normalize=True,
 ) -> Tuple[Float[Tensor, "... 3"], Float[Tensor, "... 3"]]:
     # Rotate ray directions from camera coordinate to the world coordinate
     assert directions.shape[-1] == 3
@@ -257,7 +258,8 @@ def get_rays(
         rays_o = rays_o + torch.randn(3, device=rays_o.device) * noise_scale
         rays_d = rays_d + torch.randn(3, device=rays_d.device) * noise_scale
 
-    rays_d = F.normalize(rays_d, dim=-1)
+    if normalize:
+        rays_d = F.normalize(rays_d, dim=-1)
     if not keepdim:
         rays_o, rays_d = rays_o.reshape(-1, 3), rays_d.reshape(-1, 3)
 

From 3fe3153bf29927459b5ad5cc98d955d9b4c51ba3 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Wed, 6 Dec 2023 23:42:14 +0800
Subject: [PATCH 09/24] Add modules of 4d-fy for 4D generation(#353)

---
 .../models/geometry/implicit_volume.py        | 16 +++++
 threestudio/models/networks.py                | 64 +++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/threestudio/models/geometry/implicit_volume.py b/threestudio/models/geometry/implicit_volume.py
index d1eeb96e..cfee0017 100644
--- a/threestudio/models/geometry/implicit_volume.py
+++ b/threestudio/models/geometry/implicit_volume.py
@@ -53,6 +53,9 @@ class Config(BaseImplicitGeometry.Config):
         # automatically determine the threshold
         isosurface_threshold: Union[float, str] = 25.0
 
+        # 4D Gaussian Annealing
+        anneal_density_blob_std_config: Optional[dict] = None
+
     cfg: Config
 
     def configure(self) -> None:
@@ -267,3 +270,16 @@ def create_from(
             raise TypeError(
                 f"Cannot create {ImplicitVolume.__name__} from {other.__class__.__name__}"
             )
+
+    def update_step(
+        self, epoch: int, global_step: int, on_load_weights: bool = False
+    ) -> None:
+        if self.cfg.anneal_density_blob_std_config is not None:
+            min_step = self.cfg.anneal_density_blob_std_config.min_anneal_step
+            max_step = self.cfg.anneal_density_blob_std_config.max_anneal_step
+            if global_step >= min_step and global_step <= max_step:
+                end_val = self.cfg.anneal_density_blob_std_config.end_val
+                start_val = self.cfg.anneal_density_blob_std_config.start_val
+                self.density_blob_std = start_val + (global_step - min_step) * (
+                    end_val - start_val
+                ) / (max_step - min_step)
diff --git a/threestudio/models/networks.py b/threestudio/models/networks.py
index 9dc3dc28..cfe986ea 100644
--- a/threestudio/models/networks.py
+++ b/threestudio/models/networks.py
@@ -64,6 +64,68 @@ def forward(self, x):
         return self.encoding(x)
 
 
+# 4D implicit decomposition of space and time (4D-fy)
+class TCNNEncodingSpatialTime(nn.Module):
+    def __init__(
+        self, in_channels, config, dtype=torch.float32, init_time_zero=False
+    ) -> None:
+        super().__init__()
+        self.n_input_dims = in_channels
+        config["otype"] = "HashGrid"
+        self.num_frames = 1  # config["num_frames"]
+        self.static = config["static"]
+        self.cfg = config_to_primitive(config)
+        self.cfg_time = self.cfg
+        self.n_key_frames = config.get("n_key_frames", 1)
+        with torch.cuda.device(get_rank()):
+            self.encoding = tcnn.Encoding(self.n_input_dims, self.cfg, dtype=dtype)
+            self.encoding_time = tcnn.Encoding(
+                self.n_input_dims + 1, self.cfg_time, dtype=dtype
+            )
+        self.n_output_dims = self.encoding.n_output_dims
+        self.frame_time = None
+        if self.static:
+            self.set_temp_param_grad(requires_grad=False)
+        self.use_key_frame = config.get("use_key_frame", False)
+        self.is_video = True
+        self.update_occ_grid = False
+
+    def set_temp_param_grad(self, requires_grad=False):
+        self.set_param_grad(self.encoding_time, requires_grad=requires_grad)
+
+    def set_param_grad(self, param_list, requires_grad=False):
+        if isinstance(param_list, nn.Parameter):
+            param_list.requires_grad = requires_grad
+        else:
+            for param in param_list.parameters():
+                param.requires_grad = requires_grad
+
+    def forward(self, x):
+        # TODO frame_time only supports batch_size == 1 cases
+        if self.update_occ_grid and not isinstance(self.frame_time, float):
+            frame_time = self.frame_time
+        else:
+            if (self.static or not self.training) and self.frame_time is None:
+                frame_time = torch.zeros(
+                    (self.num_frames, 1), device=x.device, dtype=x.dtype
+                ).expand(x.shape[0], 1)
+            else:
+                if self.frame_time is None:
+                    frame_time = 0.0
+                else:
+                    frame_time = self.frame_time
+                frame_time = (
+                    torch.ones((self.num_frames, 1), device=x.device, dtype=x.dtype)
+                    * frame_time
+                ).expand(x.shape[0], 1)
+            frame_time = frame_time.view(-1, 1)
+        enc_space = self.encoding(x)
+        x_frame_time = torch.cat((x, frame_time), 1)
+        enc_space_time = self.encoding_time(x_frame_time)
+        enc = enc_space + enc_space_time
+        return enc
+
+
 class ProgressiveBandHashGrid(nn.Module, Updateable):
     def __init__(self, in_channels, config, dtype=torch.float32):
         super().__init__()
@@ -136,6 +198,8 @@ def get_encoding(n_input_dims: int, config) -> nn.Module:
         encoding = ProgressiveBandFrequency(n_input_dims, config_to_primitive(config))
     elif config.otype == "ProgressiveBandHashGrid":
         encoding = ProgressiveBandHashGrid(n_input_dims, config_to_primitive(config))
+    elif config.otype == "HashGridSpatialTime":
+        encoding = TCNNEncodingSpatialTime(n_input_dims, config)  # 4D-fy encoding
     else:
         encoding = TCNNEncoding(n_input_dims, config_to_primitive(config))
     encoding = CompositeEncoding(

From 56564c88e0139bdd31b1585f8720a1ae6141f138 Mon Sep 17 00:00:00 2001
From: Vikram Voleti <vikram.voleti@gmail.com>
Date: Wed, 13 Dec 2023 13:24:32 -0500
Subject: [PATCH 10/24] [DRAFT] Adds stable-zero123 guidance (#356)

* Adds stable-zero123 guidance

* Fixes end-of-file?

* Update README.md with gif

* Fixes end-of-file?

* Corrects link to huggingface model

* general linear config

* Fixed HF link

* Fixes HF link

---------

Co-authored-by: Vikram Voleti <vikram@ip-26-0-153-234.us-west-2.compute.internal>
Co-authored-by: DSaurus <2238454358@qq.com>
---
 README.md                                     |  27 ++
 .../{zero123_64.yaml => stable-zero123.yaml}  |  56 ++-
 configs/zero123.yaml                          |   8 +-
 load/images/{dog1.png => dog1_rgba.png}       | Bin
 load/zero123/download.sh                      |   5 +-
 threestudio/models/guidance/__init__.py       |   1 +
 .../guidance/stable_zero123_guidance.py       | 340 ++++++++++++++++++
 threestudio/utils/config.py                   |   5 +
 threestudio/utils/misc.py                     |  11 +
 9 files changed, 415 insertions(+), 38 deletions(-)
 rename configs/{zero123_64.yaml => stable-zero123.yaml} (75%)
 rename load/images/{dog1.png => dog1_rgba.png} (100%)
 create mode 100644 threestudio/models/guidance/stable_zero123_guidance.py

diff --git a/README.md b/README.md
index 018200e3..4c3a0ab2 100644
--- a/README.md
+++ b/README.md
@@ -108,6 +108,8 @@ pip install ninja
 pip install -r requirements.txt
 ```
 
+- (Optional) `tiny-cuda-nn` installation might require downgrading pip to 23.0.1
+
 - (Optional, Recommended) The best-performing models in threestudio use the newly-released T2I model [DeepFloyd IF](https://github.com/deep-floyd/IF), which currently requires signing a license agreement. If you would like to use these models, you need to [accept the license on the model card of DeepFloyd IF](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0), and login into the Hugging Face hub in the terminal by `huggingface-cli login`.
 
 - For contributors, see [here](https://github.com/threestudio-project/threestudio#contributing-to-threestudio).
@@ -517,6 +519,31 @@ python launch.py --config configs/magic123-refine-sd.yaml --train --gpu 0 data.i
 
 - If the image contains non-front-facing objects, specifying the approximate elevation and azimuth angle by setting `data.default_elevation_deg` and `data.default_azimuth_deg` can be helpful. In threestudio, top is elevation +90 and bottom is elevation -90; left is azimuth -90 and right is azimuth +90.
 
+
+### Stable Zero123
+
+**Installation**
+
+Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/zero123` from https://huggingface.co/stabilityai/stable-zero123
+
+**Results obtained by threestudio (Stable Zero123 vs Zero123-XL)**
+![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7)
+
+**Example running commands**
+
+1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog"
+2. Remove its background using Clipdrop (https://clipdrop.co/remove-background)
+3. Save to `load/images/`, preferably with `_rgba.png` as the suffix
+4. Run Zero-1-to-3 with the Stable Zero123 ckpt:
+```sh
+python launch.py --config configs/stable-zero123.yaml --train --gpu 0 data.image_path=./load/images/hamburger_rgba.png
+```
+
+**IMPORTANT NOTE: This is an experimental implementation and we're constantly improving the quality.**
+
+**IMPORTANT NOTE: This implementation extends the Zero-1-to-3 implementation below, and is heavily inspired from the Zero-1-to-3 implementation in [https://github.com/ashawkey/stable-dreamfusion](stable-dreamfusion)! `extern/ldm_zero123` is borrowed from `stable-dreamfusion/ldm`.**
+
+
 ### Zero-1-to-3 [![arXiv](https://img.shields.io/badge/arXiv-2303.11328-b31b1b.svg?style=flat-square)](https://arxiv.org/abs/2303.11328)
 
 **Installation**
diff --git a/configs/zero123_64.yaml b/configs/stable-zero123.yaml
similarity index 75%
rename from configs/zero123_64.yaml
rename to configs/stable-zero123.yaml
index 6a579335..5a372f66 100644
--- a/configs/zero123_64.yaml
+++ b/configs/stable-zero123.yaml
@@ -1,24 +1,25 @@
-name: "zero123"
-tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+name: "zero123-sai"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
 exp_root_dir: "outputs"
 seed: 0
 
 data_type: "single-image-datamodule"
 data: # threestudio/data/image.py -> SingleImageDataModuleConfig
   image_path: ./load/images/hamburger_rgba.png
-  height: 128
-  width: 128
-  default_elevation_deg: 0.0
+  height: [128, 256, 512]
+  width: [128, 256, 512]
+  resolution_milestones: [200, 300]
+  default_elevation_deg: 5.0
   default_azimuth_deg: 0.0
   default_camera_distance: 3.8
   default_fovy_deg: 20.0
   requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}}
   requires_normal: ${cmaxgt0:${system.loss.lambda_normal}}
   random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
-    height: 64
-    width: 64
-    batch_size: 12
-    resolution_milestones: []
+    height: [64, 128, 256]
+    width: [64, 128, 256]
+    batch_size: [12, 8, 4]
+    resolution_milestones: [200, 300]
     eval_height: 512
     eval_width: 512
     eval_batch_size: 1
@@ -47,13 +48,6 @@ system:
     radius: 2.0
     normal_type: "analytic"
 
-    # the density initialization proposed in the DreamFusion paper
-    # does not work very well
-    # density_bias: "blob_dreamfusion"
-    # density_activation: exp
-    # density_blob_scale: 5.
-    # density_blob_std: 0.2
-
     # use Magic3D density initialization instead
     density_bias: "blob_magic3d"
     density_activation: softplus
@@ -88,28 +82,26 @@ system:
   renderer:
     radius: ${system.geometry.radius}
     num_samples_per_ray: 512
-    return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}}
-    return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}}
+    return_comp_normal: ${cmaxgt0:${system.loss.lambda_normal_smooth}}
+    return_normal_perturb: ${cmaxgt0:${system.loss.lambda_3d_normal_smooth}}
 
   prompt_processor_type: "dummy-prompt-processor" # Zero123 doesn't use prompts
   prompt_processor:
     pretrained_model_name_or_path: ""
     prompt: ""
 
-  guidance_type: "zero123-guidance"
+  guidance_type: "stable-zero123-guidance"
   guidance:
-    pretrained_model_name_or_path: "./load/zero123/zero123-xl.ckpt"
     pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
+    pretrained_model_name_or_path: "./load/zero123/stable_zero123.ckpt"
     vram_O: ${not:${gt0:${system.freq.guidance_eval}}}
     cond_image_path: ${data.image_path}
     cond_elevation_deg: ${data.default_elevation_deg}
     cond_azimuth_deg: ${data.default_azimuth_deg}
     cond_camera_distance: ${data.default_camera_distance}
     guidance_scale: 3.0
-    #min_step_percent: 0.02
-    min_step_percent: [0, 0.4, 0.2, 200]  # (start_iter, start_val, end_val, end_iter)
-    #max_step_percent: 0.98
-    max_step_percent: [0, 0.85, 0.5, 200]
+    min_step_percent: [50, 0.7, 0.3, 200]  # (start_iter, start_val, end_val, end_iter)
+    max_step_percent: [50, 0.98, 0.8, 200]
 
   freq:
     ref_only_steps: 0
@@ -123,16 +115,16 @@ system:
 
   loss:
     lambda_sds: 0.1
-    lambda_rgb: 500.
+    lambda_rgb: [100, 500., 1000., 400]
     lambda_mask: 50.
     lambda_depth: 0. # 0.05
     lambda_depth_rel: 0. # [0, 0, 0.05, 100]
     lambda_normal: 0. # [0, 0, 0.05, 100]
-    lambda_normal_smooth: 10.0
-    lambda_3d_normal_smooth: 10.0
+    lambda_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200]
+    lambda_3d_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200]
     lambda_orient: 1.0
-    lambda_sparsity: 0.1 # should be tweaked for every model
-    lambda_opaque: 0.1
+    lambda_sparsity: 0.5 # should be tweaked for every model
+    lambda_opaque: 0.5
 
   optimizer:
     name: Adam
@@ -142,14 +134,14 @@ system:
       eps: 1.e-8
 
 trainer:
-  max_steps: 400
+  max_steps: 600
   log_every_n_steps: 1
   num_sanity_val_steps: 0
   val_check_interval: 100
   enable_progress_bar: true
-  precision: 16-mixed
+  precision: 32
 
 checkpoint:
   save_last: true # save at each validation time
   save_top_k: -1
-  every_n_train_steps: ${trainer.max_steps}
+  every_n_train_steps: 100 # ${trainer.max_steps}
diff --git a/configs/zero123.yaml b/configs/zero123.yaml
index ca61b2e4..0f6ade97 100644
--- a/configs/zero123.yaml
+++ b/configs/zero123.yaml
@@ -1,5 +1,5 @@
 name: "zero123"
-tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
 exp_root_dir: "outputs"
 seed: 0
 
@@ -9,7 +9,7 @@ data: # threestudio/data/image.py -> SingleImageDataModuleConfig
   height: [128, 256, 512]
   width: [128, 256, 512]
   resolution_milestones: [200, 300]
-  default_elevation_deg: 0.0
+  default_elevation_deg: 5.0
   default_azimuth_deg: 0.0
   default_camera_distance: 3.8
   default_fovy_deg: 20.0
@@ -111,9 +111,7 @@ system:
     cond_azimuth_deg: ${data.default_azimuth_deg}
     cond_camera_distance: ${data.default_camera_distance}
     guidance_scale: 3.0
-    #min_step_percent: 0.02
     min_step_percent: [0, 0.4, 0.2, 200]  # (start_iter, start_val, end_val, end_iter)
-    #max_step_percent: 0.98
     max_step_percent: [0, 0.85, 0.5, 200]
 
   freq:
@@ -147,7 +145,7 @@ system:
       eps: 1.e-8
 
 trainer:
-  max_steps: 400
+  max_steps: 600
   log_every_n_steps: 1
   num_sanity_val_steps: 0
   val_check_interval: 100
diff --git a/load/images/dog1.png b/load/images/dog1_rgba.png
similarity index 100%
rename from load/images/dog1.png
rename to load/images/dog1_rgba.png
diff --git a/load/zero123/download.sh b/load/zero123/download.sh
index 35cc597e..169676b7 100644
--- a/load/zero123/download.sh
+++ b/load/zero123/download.sh
@@ -1 +1,4 @@
-wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt
+# wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt
+# mv 105000.ckpt zero123-original.ckpt
+wget https://zero123.cs.columbia.edu/assets/zero123-xl.ckpt
+# Download stable_zero123.ckpt from https://huggingface.co/stabilityai/stable-zero123
diff --git a/threestudio/models/guidance/__init__.py b/threestudio/models/guidance/__init__.py
index eeda92e4..b25a8d76 100644
--- a/threestudio/models/guidance/__init__.py
+++ b/threestudio/models/guidance/__init__.py
@@ -5,6 +5,7 @@
     stable_diffusion_guidance,
     stable_diffusion_unified_guidance,
     stable_diffusion_vsd_guidance,
+    stable_zero123_guidance,
     zero123_guidance,
     zero123_unified_guidance,
 )
diff --git a/threestudio/models/guidance/stable_zero123_guidance.py b/threestudio/models/guidance/stable_zero123_guidance.py
new file mode 100644
index 00000000..6d545908
--- /dev/null
+++ b/threestudio/models/guidance/stable_zero123_guidance.py
@@ -0,0 +1,340 @@
+import importlib
+import os
+from dataclasses import dataclass, field
+
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from diffusers import DDIMScheduler, DDPMScheduler, StableDiffusionPipeline
+from diffusers.utils.import_utils import is_xformers_available
+from omegaconf import OmegaConf
+from tqdm import tqdm
+
+import threestudio
+from threestudio.utils.base import BaseObject
+from threestudio.utils.misc import C, parse_version
+from threestudio.utils.typing import *
+
+
+def get_obj_from_str(string, reload=False):
+    module, cls = string.rsplit(".", 1)
+    if reload:
+        module_imp = importlib.import_module(module)
+        importlib.reload(module_imp)
+    return getattr(importlib.import_module(module, package=None), cls)
+
+
+def instantiate_from_config(config):
+    if not "target" in config:
+        if config == "__is_first_stage__":
+            return None
+        elif config == "__is_unconditional__":
+            return None
+        raise KeyError("Expected key `target` to instantiate.")
+    return get_obj_from_str(config["target"])(**config.get("params", dict()))
+
+
+# load model
+def load_model_from_config(config, ckpt, device, vram_O=True, verbose=False):
+    pl_sd = torch.load(ckpt, map_location="cpu")
+
+    if "global_step" in pl_sd and verbose:
+        print(f'[INFO] Global Step: {pl_sd["global_step"]}')
+
+    sd = pl_sd["state_dict"]
+
+    model = instantiate_from_config(config.model)
+    m, u = model.load_state_dict(sd, strict=False)
+
+    if len(m) > 0 and verbose:
+        print("[INFO] missing keys: \n", m)
+    if len(u) > 0 and verbose:
+        print("[INFO] unexpected keys: \n", u)
+
+    # manually load ema and delete it to save GPU memory
+    if model.use_ema:
+        if verbose:
+            print("[INFO] loading EMA...")
+        model.model_ema.copy_to(model.model)
+        del model.model_ema
+
+    if vram_O:
+        # we don't need decoder
+        del model.first_stage_model.decoder
+
+    torch.cuda.empty_cache()
+
+    model.eval().to(device)
+
+    return model
+
+
+@threestudio.register("stable-zero123-guidance")
+class StableZero123Guidance(BaseObject):
+    @dataclass
+    class Config(BaseObject.Config):
+        pretrained_model_name_or_path: str = "load/zero123/stable-zero123.ckpt"
+        pretrained_config: str = "load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
+        vram_O: bool = True
+
+        cond_image_path: str = "load/images/hamburger_rgba.png"
+        cond_elevation_deg: float = 0.0
+        cond_azimuth_deg: float = 0.0
+        cond_camera_distance: float = 1.2
+
+        guidance_scale: float = 5.0
+
+        grad_clip: Optional[
+            Any
+        ] = None  # field(default_factory=lambda: [0, 2.0, 8.0, 1000])
+        half_precision_weights: bool = False
+
+        min_step_percent: float = 0.02
+        max_step_percent: float = 0.98
+
+    cfg: Config
+
+    def configure(self) -> None:
+        threestudio.info(f"Loading Stable Zero123 ...")
+
+        self.config = OmegaConf.load(self.cfg.pretrained_config)
+        # TODO: seems it cannot load into fp16...
+        self.weights_dtype = torch.float32
+        self.model = load_model_from_config(
+            self.config,
+            self.cfg.pretrained_model_name_or_path,
+            device=self.device,
+            vram_O=self.cfg.vram_O,
+        )
+
+        for p in self.model.parameters():
+            p.requires_grad_(False)
+
+        # timesteps: use diffuser for convenience... hope it's alright.
+        self.num_train_timesteps = self.config.model.params.timesteps
+
+        self.scheduler = DDIMScheduler(
+            self.num_train_timesteps,
+            self.config.model.params.linear_start,
+            self.config.model.params.linear_end,
+            beta_schedule="scaled_linear",
+            clip_sample=False,
+            set_alpha_to_one=False,
+            steps_offset=1,
+        )
+
+        self.num_train_timesteps = self.scheduler.config.num_train_timesteps
+        self.set_min_max_steps()  # set to default value
+
+        self.alphas: Float[Tensor, "..."] = self.scheduler.alphas_cumprod.to(
+            self.device
+        )
+
+        self.grad_clip_val: Optional[float] = None
+
+        self.prepare_embeddings(self.cfg.cond_image_path)
+
+        threestudio.info(f"Loaded Stable Zero123!")
+
+    @torch.cuda.amp.autocast(enabled=False)
+    def set_min_max_steps(self, min_step_percent=0.02, max_step_percent=0.98):
+        self.min_step = int(self.num_train_timesteps * min_step_percent)
+        self.max_step = int(self.num_train_timesteps * max_step_percent)
+
+    @torch.cuda.amp.autocast(enabled=False)
+    def prepare_embeddings(self, image_path: str) -> None:
+        # load cond image for zero123
+        assert os.path.exists(image_path)
+        rgba = cv2.cvtColor(
+            cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_BGRA2RGBA
+        )
+        rgba = (
+            cv2.resize(rgba, (256, 256), interpolation=cv2.INTER_AREA).astype(
+                np.float32
+            )
+            / 255.0
+        )
+        rgb = rgba[..., :3] * rgba[..., 3:] + (1 - rgba[..., 3:])
+        self.rgb_256: Float[Tensor, "1 3 H W"] = (
+            torch.from_numpy(rgb)
+            .unsqueeze(0)
+            .permute(0, 3, 1, 2)
+            .contiguous()
+            .to(self.device)
+        )
+        self.c_crossattn, self.c_concat = self.get_img_embeds(self.rgb_256)
+
+    @torch.cuda.amp.autocast(enabled=False)
+    @torch.no_grad()
+    def get_img_embeds(
+        self,
+        img: Float[Tensor, "B 3 256 256"],
+    ) -> Tuple[Float[Tensor, "B 1 768"], Float[Tensor, "B 4 32 32"]]:
+        img = img * 2.0 - 1.0
+        c_crossattn = self.model.get_learned_conditioning(img.to(self.weights_dtype))
+        c_concat = self.model.encode_first_stage(img.to(self.weights_dtype)).mode()
+        return c_crossattn, c_concat
+
+    @torch.cuda.amp.autocast(enabled=False)
+    def encode_images(
+        self, imgs: Float[Tensor, "B 3 256 256"]
+    ) -> Float[Tensor, "B 4 32 32"]:
+        input_dtype = imgs.dtype
+        imgs = imgs * 2.0 - 1.0
+        latents = self.model.get_first_stage_encoding(
+            self.model.encode_first_stage(imgs.to(self.weights_dtype))
+        )
+        return latents.to(input_dtype)  # [B, 4, 32, 32] Latent space image
+
+    @torch.cuda.amp.autocast(enabled=False)
+    def decode_latents(
+        self,
+        latents: Float[Tensor, "B 4 H W"],
+    ) -> Float[Tensor, "B 3 512 512"]:
+        input_dtype = latents.dtype
+        image = self.model.decode_first_stage(latents)
+        image = (image * 0.5 + 0.5).clamp(0, 1)
+        return image.to(input_dtype)
+
+    @torch.cuda.amp.autocast(enabled=False)
+    @torch.no_grad()
+    def get_cond(
+        self,
+        elevation: Float[Tensor, "B"],
+        azimuth: Float[Tensor, "B"],
+        camera_distances: Float[Tensor, "B"],
+        c_crossattn=None,
+        c_concat=None,
+        **kwargs,
+    ) -> dict:
+        T = torch.stack(
+            [
+                torch.deg2rad(
+                    (90 - elevation) - (90 - self.cfg.cond_elevation_deg)
+                ),  # Zero123 polar is 90-elevation
+                torch.sin(torch.deg2rad(azimuth - self.cfg.cond_azimuth_deg)),
+                torch.cos(torch.deg2rad(azimuth - self.cfg.cond_azimuth_deg)),
+                torch.deg2rad(
+                    90 - torch.full_like(elevation, self.cfg.cond_elevation_deg)
+                ),
+            ],
+            dim=-1,
+        )[:, None, :].to(self.device)
+        cond = {}
+        clip_emb = self.model.cc_projection(
+            torch.cat(
+                [
+                    (self.c_crossattn if c_crossattn is None else c_crossattn).repeat(
+                        len(T), 1, 1
+                    ),
+                    T,
+                ],
+                dim=-1,
+            )
+        )
+        cond["c_crossattn"] = [
+            torch.cat([torch.zeros_like(clip_emb).to(self.device), clip_emb], dim=0)
+        ]
+        cond["c_concat"] = [
+            torch.cat(
+                [
+                    torch.zeros_like(self.c_concat)
+                    .repeat(len(T), 1, 1, 1)
+                    .to(self.device),
+                    (self.c_concat if c_concat is None else c_concat).repeat(
+                        len(T), 1, 1, 1
+                    ),
+                ],
+                dim=0,
+            )
+        ]
+        return cond
+
+    def __call__(
+        self,
+        rgb: Float[Tensor, "B H W C"],
+        elevation: Float[Tensor, "B"],
+        azimuth: Float[Tensor, "B"],
+        camera_distances: Float[Tensor, "B"],
+        rgb_as_latents=False,
+        **kwargs,
+    ):
+        batch_size = rgb.shape[0]
+
+        rgb_BCHW = rgb.permute(0, 3, 1, 2)
+        latents: Float[Tensor, "B 4 64 64"]
+        if rgb_as_latents:
+            latents = (
+                F.interpolate(rgb_BCHW, (32, 32), mode="bilinear", align_corners=False)
+                * 2
+                - 1
+            )
+        else:
+            rgb_BCHW_512 = F.interpolate(
+                rgb_BCHW, (256, 256), mode="bilinear", align_corners=False
+            )
+            # encode image into latents with vae
+            latents = self.encode_images(rgb_BCHW_512)
+
+        cond = self.get_cond(elevation, azimuth, camera_distances)
+
+        # timestep ~ U(0.02, 0.98) to avoid very high/low noise level
+        t = torch.randint(
+            self.min_step,
+            self.max_step + 1,
+            [batch_size],
+            dtype=torch.long,
+            device=self.device,
+        )
+
+        # predict the noise residual with unet, NO grad!
+        with torch.no_grad():
+            # add noise
+            noise = torch.randn_like(latents)  # TODO: use torch generator
+            latents_noisy = self.scheduler.add_noise(latents, noise, t)
+            # pred noise
+            x_in = torch.cat([latents_noisy] * 2)
+            t_in = torch.cat([t] * 2)
+            noise_pred = self.model.apply_model(x_in, t_in, cond)
+
+        # perform guidance
+        noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2)
+        noise_pred = noise_pred_uncond + self.cfg.guidance_scale * (
+            noise_pred_cond - noise_pred_uncond
+        )
+
+        w = (1 - self.alphas[t]).reshape(-1, 1, 1, 1)
+        grad = w * (noise_pred - noise)
+        grad = torch.nan_to_num(grad)
+        # clip grad for stable training?
+        if self.grad_clip_val is not None:
+            grad = grad.clamp(-self.grad_clip_val, self.grad_clip_val)
+
+        # loss = SpecifyGradient.apply(latents, grad)
+        # SpecifyGradient is not straghtforward, use a reparameterization trick instead
+        target = (latents - grad).detach()
+        # d(loss)/d(latents) = latents - target = latents - (latents - grad) = grad
+        loss_sds = 0.5 * F.mse_loss(latents, target, reduction="sum") / batch_size
+
+        guidance_out = {
+            "loss_sds": loss_sds,
+            "grad_norm": grad.norm(),
+            "min_step": self.min_step,
+            "max_step": self.max_step,
+        }
+
+        return guidance_out
+
+    def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False):
+        # clip grad for stable training as demonstrated in
+        # Debiasing Scores and Prompts of 2D Diffusion for Robust Text-to-3D Generation
+        # http://arxiv.org/abs/2303.15413
+        if self.cfg.grad_clip is not None:
+            self.grad_clip_val = C(self.cfg.grad_clip, epoch, global_step)
+
+        self.set_min_max_steps(
+            min_step_percent=C(self.cfg.min_step_percent, epoch, global_step),
+            max_step_percent=C(self.cfg.max_step_percent, epoch, global_step),
+        )
diff --git a/threestudio/utils/config.py b/threestudio/utils/config.py
index 99456333..88a7d092 100644
--- a/threestudio/utils/config.py
+++ b/threestudio/utils/config.py
@@ -35,6 +35,11 @@ def C_max(value: Any) -> float:
         value = config_to_primitive(value)
         if not isinstance(value, list):
             raise TypeError("Scalar specification only supports list, got", type(value))
+        if len(value) >= 6:
+            max_value = value[2]
+            for i in range(4, len(value), 2):
+                max_value = max(max_value, value[i])
+            value = [value[0], value[1], max_value, value[3]]
         if len(value) == 3:
             value = [0] + value
         assert len(value) == 4
diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py
index 7954bb86..969c7c60 100644
--- a/threestudio/utils/misc.py
+++ b/threestudio/utils/misc.py
@@ -71,6 +71,17 @@ def C(value: Any, epoch: int, global_step: int) -> float:
             raise TypeError("Scalar specification only supports list, got", type(value))
         if len(value) == 3:
             value = [0] + value
+        if len(value) >= 6:
+            select_i = 3
+            for i in range(3, len(value) - 2, 2):
+                if global_step >= value[i]:
+                    select_i = i + 2
+            if select_i != 3:
+                start_value, start_step = value[select_i - 3], value[select_i - 2]
+            else:
+                start_step, start_value = value[:2]
+            end_value, end_step = value[select_i - 1], value[select_i]
+            value = [start_step, start_value, end_value, end_step]
         assert len(value) == 4
         start_step, start_value, end_value, end_step = value
         if isinstance(end_step, int):

From c86246d7b194915584abb4b33703abd3e3966f01 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 15 Dec 2023 03:32:57 +0800
Subject: [PATCH 11/24] perceptual loss update (#358)

---
 threestudio/systems/control4d_multiview.py |  4 ++--
 threestudio/systems/instructnerf2nerf.py   |  3 ++-
 threestudio/utils/perceptual/perceptual.py | 23 ++++++++++++++++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/threestudio/systems/control4d_multiview.py b/threestudio/systems/control4d_multiview.py
index 0f198b51..8cfd9cf5 100644
--- a/threestudio/systems/control4d_multiview.py
+++ b/threestudio/systems/control4d_multiview.py
@@ -37,8 +37,8 @@ def configure(self) -> None:
             material=self.material,
             background=self.background,
         )
-
-        self.perceptual_loss = PerceptualLoss().eval().to(get_device())
+        p_config = {}
+        self.perceptual_loss = threestudio.find("perceptual-loss")(p_config)
         self.edit_frames = {}
         self.per_editing_step = self.cfg.per_editing_step
         self.start_editing_step = self.cfg.start_editing_step
diff --git a/threestudio/systems/instructnerf2nerf.py b/threestudio/systems/instructnerf2nerf.py
index 16e914e1..f6e3ecde 100644
--- a/threestudio/systems/instructnerf2nerf.py
+++ b/threestudio/systems/instructnerf2nerf.py
@@ -24,7 +24,8 @@ def configure(self):
         # create geometry, material, background, renderer
         super().configure()
         self.edit_frames = {}
-        self.perceptual_loss = PerceptualLoss().eval().to(get_device())
+        p_config = {}
+        self.perceptual_loss = threestudio.find("perceptual-loss")(p_config)
 
     def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         render_out = self.renderer(**batch)
diff --git a/threestudio/utils/perceptual/perceptual.py b/threestudio/utils/perceptual/perceptual.py
index d756694a..403d9a92 100644
--- a/threestudio/utils/perceptual/perceptual.py
+++ b/threestudio/utils/perceptual/perceptual.py
@@ -1,12 +1,35 @@
 """Stripped version of https://github.com/richzhang/PerceptualSimilarity/tree/master/models"""
 
 from collections import namedtuple
+from dataclasses import dataclass, field
 
 import torch
 import torch.nn as nn
 from torchvision import models
 
+import threestudio
+from threestudio.utils.base import BaseObject
 from threestudio.utils.perceptual.utils import get_ckpt_path
+from threestudio.utils.typing import *
+
+
+@threestudio.register("perceptual-loss")
+class PerceptualLossObject(BaseObject):
+    @dataclass
+    class Config(BaseObject.Config):
+        use_dropout: bool = True
+
+    cfg: Config
+
+    def configure(self) -> None:
+        self.perceptual_loss = PerceptualLoss(self.cfg.use_dropout).to(self.device)
+
+    def __call__(
+        self,
+        x: Float[Tensor, "B 3 256 256"],
+        y: Float[Tensor, "B 3 256 256"],
+    ):
+        return self.perceptual_loss(x, y)
 
 
 class PerceptualLoss(nn.Module):

From 3597b550e483a91e0a52587fd72d48902fc4b897 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 15 Dec 2023 20:43:56 +0800
Subject: [PATCH 12/24] Automatically find last checkpoint and support
 multi-stage training (#362)

---
 threestudio/systems/base.py | 10 +++++++++-
 threestudio/utils/misc.py   | 21 +++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/threestudio/systems/base.py b/threestudio/systems/base.py
index 5b668ea6..73faac60 100644
--- a/threestudio/systems/base.py
+++ b/threestudio/systems/base.py
@@ -13,7 +13,13 @@
     update_if_possible,
 )
 from threestudio.utils.config import parse_structured
-from threestudio.utils.misc import C, cleanup, get_device, load_module_weights
+from threestudio.utils.misc import (
+    C,
+    cleanup,
+    find_last_path,
+    get_device,
+    load_module_weights,
+)
 from threestudio.utils.saving import SaverMixin
 from threestudio.utils.typing import *
 
@@ -241,6 +247,8 @@ class Config(BaseSystem.Config):
     cfg: Config
 
     def configure(self) -> None:
+        self.cfg.geometry_convert_from = find_last_path(self.cfg.geometry_convert_from)
+        self.cfg.weights = find_last_path(self.cfg.weights)
         if (
             self.cfg.geometry_convert_from  # from_coarse must be specified
             and not self.cfg.weights  # not initialized from coarse when weights are specified
diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py
index 969c7c60..ccb4987f 100644
--- a/threestudio/utils/misc.py
+++ b/threestudio/utils/misc.py
@@ -134,3 +134,24 @@ def broadcast(tensor, src=0):
 def enable_gradient(model, enabled: bool = True) -> None:
     for param in model.parameters():
         param.requires_grad_(enabled)
+
+
+def find_last_path(path: str):
+    if (path is not None) and ("LAST" in path):
+        path = path.replace(" ", "_")
+        base_dir_prefix, suffix = path.split("LAST", 1)
+        base_dir = os.path.dirname(base_dir_prefix)
+        prefix = os.path.split(base_dir_prefix)[-1]
+        base_dir_prefix = os.path.join(base_dir, prefix)
+        all_path = os.listdir(base_dir)
+        all_path = [os.path.join(base_dir, dir) for dir in all_path]
+        filtered_path = [dir for dir in all_path if dir.startswith(base_dir_prefix)]
+        filtered_path.sort(reverse=True)
+        last_path = filtered_path[0]
+        new_path = last_path + suffix
+        if os.path.exists(new_path):
+            return new_path
+        else:
+            raise FileNotFoundError(new_path)
+    else:
+        return path

From 5d21501996de6a9542e2164506253c36608f94ed Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Fri, 15 Dec 2023 22:37:42 +0800
Subject: [PATCH 13/24] update extensions

---
 launch.py               | 2 +-
 threestudio/__init__.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/launch.py b/launch.py
index d24940af..bca4ae11 100644
--- a/launch.py
+++ b/launch.py
@@ -82,7 +82,7 @@ def load_custom_modules():
                 and os.path.splitext(module_path)[1] != ".py"
             ):
                 continue
-            if module_path.endswith(".disabled"):
+            if module_path.endswith("_disabled"):
                 continue
             time_before = time.perf_counter()
             success = load_custom_module(module_path)
diff --git a/threestudio/__init__.py b/threestudio/__init__.py
index 2c83608f..5651db5e 100644
--- a/threestudio/__init__.py
+++ b/threestudio/__init__.py
@@ -3,7 +3,12 @@
 
 def register(name):
     def decorator(cls):
-        __modules__[name] = cls
+        if name in __modules__:
+            raise ValueError(
+                f"Module {name} already exists! Names of extensions conflict!"
+            )
+        else:
+            __modules__[name] = cls
         return cls
 
     return decorator

From 145d2bdbfd6554a7e6ba0ec8e41ec052dfdc519e Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine <eltociear@gmail.com>
Date: Sun, 17 Dec 2023 00:46:34 +0900
Subject: [PATCH 14/24] Update README.md (#366)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4c3a0ab2..d82af86d 100644
--- a/README.md
+++ b/README.md
@@ -431,7 +431,7 @@ https://github.com/threestudio-project/threestudio/assets/19284678/72217cdd-765a
 
 - Most of the settings are the same as the DreamFusion model. Please refer to the notable differences of the DreamFusion model.
 - We use NeuS as the geometry representation while the original paper uses VolSDF.
-- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stablize normal computation when using hash grids.
+- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stabilize normal computation when using hash grids.
 - We currently only implemented the coarse stage of TextMesh.
 
 **Example running commands**

From 03671ab851364753142e75344dc303dfa48e7048 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Mon, 18 Dec 2023 21:28:44 +0800
Subject: [PATCH 15/24] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index d82af86d..25a1a6c7 100644
--- a/README.md
+++ b/README.md
@@ -48,10 +48,13 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/ac6089a7-d88f-414c-96d6-a5e75616115a" width="68%">
 </p>
 <p align="center">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/025e6980-baf2-4b5f-9c23-4f66ef847bf5" width="33%">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/f04b6bdd-ef02-4ce7-b7c9-981f8bda419f" width="33%">
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/8892898f-8bd8-43dc-a4ec-dd8d078af860" width="50%">
 </p>
 
 ## News
+- 18/12/2023 Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
 - 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
 - 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
 - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.

From b6d7c12075396bdb89d387f2a3b4a573290de35d Mon Sep 17 00:00:00 2001
From: Vikram Voleti <vikram.voleti@gmail.com>
Date: Mon, 18 Dec 2023 11:56:17 -0500
Subject: [PATCH 16/24] Update README.md for Stable Zero123 (#372)

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 25a1a6c7..deab297e 100644
--- a/README.md
+++ b/README.md
@@ -54,8 +54,9 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 </p>
 
 ## News
-- 18/12/2023 Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
-- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 12/18/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
+- 12/13/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
+- 11/30/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
 - 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
 - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
 - 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!

From cf23ed6eab4b145d45954ac7db8dc78f94616914 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Tue, 19 Dec 2023 14:02:27 +0800
Subject: [PATCH 17/24] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index deab297e..c77f7b92 100644
--- a/README.md
+++ b/README.md
@@ -533,6 +533,9 @@ Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/z
 **Results obtained by threestudio (Stable Zero123 vs Zero123-XL)**
 ![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7)
 
+**Direct multi-view images generation**
+If you only want to generate multi-view images, please refer to [threestudio-mvimg-gen](https://github.com/DSaurus/threestudio-mvimg-gen). This extension can use Stable Zero123 to directly generate images from multi-view perspectives.
+
 **Example running commands**
 
 1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog"

From 47b6a33827350fc72d9be2d69ed6ad8522a350ba Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Tue, 19 Dec 2023 19:58:20 +0800
Subject: [PATCH 18/24] add version (#375)

---
 threestudio/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/threestudio/__init__.py b/threestudio/__init__.py
index 5651db5e..a1184e43 100644
--- a/threestudio/__init__.py
+++ b/threestudio/__init__.py
@@ -1,4 +1,5 @@
 __modules__ = {}
+__version__ = "0.2.0"
 
 
 def register(name):

From 23b2d717474ffefd3e88e8f69c0e9695c5c6f7f8 Mon Sep 17 00:00:00 2001
From: bennyguo <bennyguo@163.com>
Date: Thu, 21 Dec 2023 12:08:11 +0800
Subject: [PATCH 19/24] update gradio app

---
 gradio_app.py    | 5 +++--
 requirements.txt | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gradio_app.py b/gradio_app.py
index c2d32f5f..0d921d98 100644
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -201,7 +201,7 @@ def run(
 
     # manually assign the output directory, name and tag so that we know the trial directory
     name = os.path.basename(model_config[model_name]["path"]).split(".")[0]
-    tag = datetime.now().strftime("@%Y%m%d-%H%M%S")
+    tag = datetime.now().strftime("%Y%m%d-%H%M%S")
     trial_dir = os.path.join(save_root, EXP_ROOT_DIR, name, tag)
     alive_path = os.path.join(trial_dir, "alive")
 
@@ -441,6 +441,7 @@ def launch(
                 run_btn,
                 stop_btn,
             ],
+            concurrency_limit=1,
         )
         stop_btn.click(
             fn=stop_run,
@@ -453,7 +454,7 @@ def launch(
     launch_args = {"server_port": port}
     if listen:
         launch_args["server_name"] = "0.0.0.0"
-    demo.queue(concurrency_count=1).launch(**launch_args)
+    demo.queue().launch(**launch_args)
 
 
 def watch(
diff --git a/requirements.txt b/requirements.txt
index 142a76d2..88706a6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,7 +20,7 @@ networkx
 pysdf
 PyMCubes
 wandb
-gradio
+gradio==4.11.0
 git+https://github.com/ashawkey/envlight.git
 torchmetrics
 

From fa40007b7b6c90f34cdec957a2e91acb65e1fe60 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Sat, 23 Dec 2023 19:04:17 +0800
Subject: [PATCH 20/24] Update README.md

---
 README.md | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index c77f7b92..d0f69fa9 100644
--- a/README.md
+++ b/README.md
@@ -48,27 +48,33 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/ac6089a7-d88f-414c-96d6-a5e75616115a" width="68%">
 </p>
 <p align="center">
-<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/025e6980-baf2-4b5f-9c23-4f66ef847bf5" width="33%">
-<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/f04b6bdd-ef02-4ce7-b7c9-981f8bda419f" width="33%">
+    
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/302a399e-d36f-453e-a595-1c7d120451d3" width="35%">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/025e6980-baf2-4b5f-9c23-4f66ef847bf5" width="35%">
+<img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/f04b6bdd-ef02-4ce7-b7c9-981f8bda419f" width="35%">
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/8892898f-8bd8-43dc-a4ec-dd8d078af860" width="50%">
 </p>
+<p align="center"><b>
+| <a href="https://github.com/HeliosZhao/Animate124/tree/threestudio">Animate-124</a> | <a href="https://github.com/DSaurus/threestudio-4dfy">4D-fy</a> | <a href="https://github.com/DSaurus/threestudio-dreamcraft3D">DreamCraft3D</a> | <a href="https://github.com/DSaurus/threestudio-3dgs">Gaussian Splatting</a> | <a href="https://github.com/DSaurus/threestudio-mvdream">MVDream</a> | <a href="https://github.com/DSaurus/threestudio-meshfitting">Mesh-Fitting</a> |
+</b>
 
 ## News
-- 12/18/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
-- 12/13/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
-- 11/30/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
-- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
-- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
-- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
-- 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
-- 06/20/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try.
-- 06/14/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try.
-- 06/14/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try.
-- 05/29/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try.
-- 05/26/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try.
-- 05/14/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb).
-- 05/13/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)!
-- 05/11/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions.
+- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Aniamte-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
+- 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
+- 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
+- 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
+- 25/06/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
+- 06/07/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
+- 03/07/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
+- 20/06/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try.
+- 14/06/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try.
+- 14/06/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try.
+- 29/05/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try.
+- 26/05/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try.
+- 14/05/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb).
+- 13/05/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)!
+- 11/05/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions.
 
 ![export-blender](https://github.com/threestudio-project/threestudio/assets/19284678/ccae2820-e702-484c-a43f-81678a365427)
 

From 652740ab3e30bd871f10acab6db2ed4afdfd25dc Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Sat, 23 Dec 2023 19:44:54 +0800
Subject: [PATCH 21/24] fix format

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d0f69fa9..389c675d 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/ac6089a7-d88f-414c-96d6-a5e75616115a" width="68%">
 </p>
 <p align="center">
-    
+
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/302a399e-d36f-453e-a595-1c7d120451d3" width="35%">
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/025e6980-baf2-4b5f-9c23-4f66ef847bf5" width="35%">
 <img alt="threestudio" src="https://github.com/threestudio-project/threestudio/assets/24589363/f04b6bdd-ef02-4ce7-b7c9-981f8bda419f" width="35%">

From 894390aad91ad80b6d0f5af591acf5a720ab2bfe Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Sat, 23 Dec 2023 22:21:22 +0800
Subject: [PATCH 22/24] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 389c675d..bc1b1b3d 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
 </b>
 
 ## News
-- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Aniamte-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
+- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Animate-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
 - 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
 - 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
 - 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).

From e254d87d2a9bf43851ac953bc323e7fe695817ec Mon Sep 17 00:00:00 2001
From: johnbanq <cardinalist@yeah.net>
Date: Wed, 27 Dec 2023 18:12:54 +0000
Subject: [PATCH 23/24] Assert the text embeddings process successfully runs
 (#387)

---
 threestudio/models/prompt_processors/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/threestudio/models/prompt_processors/base.py b/threestudio/models/prompt_processors/base.py
index 83a040f2..8993434b 100644
--- a/threestudio/models/prompt_processors/base.py
+++ b/threestudio/models/prompt_processors/base.py
@@ -379,6 +379,7 @@ def prepare_text_embeddings(self):
                 )
                 subprocess.start()
                 subprocess.join()
+                assert subprocess.exitcode == 0, "prompt embedding process failed!"
             else:
                 self.spawn_func(
                     self.cfg.pretrained_model_name_or_path,

From 8ce432d51b2f46eae2e40c045b079bc66a994db0 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 29 Dec 2023 00:05:40 +0800
Subject: [PATCH 24/24] support gaussian zero-123 (#388)

* support gaussian zero-123

* add exp interpolation
---
 threestudio/__init__.py   |  2 +-
 threestudio/data/image.py | 10 ++++++++--
 threestudio/utils/misc.py | 16 ++++++++++------
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/threestudio/__init__.py b/threestudio/__init__.py
index a1184e43..f5619b2f 100644
--- a/threestudio/__init__.py
+++ b/threestudio/__init__.py
@@ -1,5 +1,5 @@
 __modules__ = {}
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 
 
 def register(name):
diff --git a/threestudio/data/image.py b/threestudio/data/image.py
index fe7c227e..033c528f 100644
--- a/threestudio/data/image.py
+++ b/threestudio/data/image.py
@@ -96,6 +96,10 @@ def setup(self, cfg, split):
             [torch.stack([right, up, -lookat], dim=-1), camera_position[:, :, None]],
             dim=-1,
         )
+        self.c2w4x4: Float[Tensor, "B 4 4"] = torch.cat(
+            [self.c2w, torch.zeros_like(self.c2w[:, :1])], dim=1
+        )
+        self.c2w4x4[:, 3, 3] = 1.0
 
         self.camera_position = camera_position
         self.light_position = light_position
@@ -258,8 +262,10 @@ def collate(self, batch) -> Dict[str, Any]:
             "ref_depth": self.depth,
             "ref_normal": self.normal,
             "mask": self.mask,
-            "height": self.cfg.height,
-            "width": self.cfg.width,
+            "height": self.height,
+            "width": self.width,
+            "c2w": self.c2w4x4,
+            "fovy": self.fovy,
         }
         if self.cfg.use_random_camera:
             batch["random_camera"] = self.random_pose_generator.collate(None)
diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py
index ccb4987f..f2378f55 100644
--- a/threestudio/utils/misc.py
+++ b/threestudio/utils/misc.py
@@ -1,4 +1,5 @@
 import gc
+import math
 import os
 import re
 
@@ -62,7 +63,7 @@ def load_module_weights(
     return state_dict_to_load, ckpt["epoch"], ckpt["global_step"]
 
 
-def C(value: Any, epoch: int, global_step: int) -> float:
+def C(value: Any, epoch: int, global_step: int, interpolation="linear") -> float:
     if isinstance(value, int) or isinstance(value, float):
         pass
     else:
@@ -86,13 +87,16 @@ def C(value: Any, epoch: int, global_step: int) -> float:
         start_step, start_value, end_value, end_step = value
         if isinstance(end_step, int):
             current_step = global_step
-            value = start_value + (end_value - start_value) * max(
-                min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0
-            )
         elif isinstance(end_step, float):
             current_step = epoch
-            value = start_value + (end_value - start_value) * max(
-                min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0
+        t = max(min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0)
+        if interpolation == "linear":
+            value = start_value + (end_value - start_value) * t
+        elif interpolation == "exp":
+            value = math.exp(math.log(start_value) * (1 - t) + math.log(end_value) * t)
+        else:
+            raise ValueError(
+                f"Unknown interpolation method: {interpolation}, only support linear and exp"
             )
     return value