From 477345befc45ea66248c567accd466ae82afbf70 Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Mon, 27 Nov 2023 21:48:43 +0800
Subject: [PATCH 01/24] fix elevation bug
---
threestudio/data/uncond.py | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index d3b67b7d..ee8df2e7 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -151,18 +151,16 @@ def collate(self, batch) -> Dict[str, Any]:
else:
# otherwise sample uniformly on sphere
elevation_range_percent = [
- (self.elevation_range[0] + 90.0) / 180.0,
- (self.elevation_range[1] + 90.0) / 180.0,
+ self.elevation_range[0] / 180.0 * math.pi,
+ self.elevation_range[1] / 180.0 * math.pi,
]
# inverse transform sampling
elevation = torch.asin(
- 2
- * (
+ (
torch.rand(self.batch_size)
- * (elevation_range_percent[1] - elevation_range_percent[0])
- + elevation_range_percent[0]
+ * (math.sin(elevation_range_percent[1]) - math.sin(elevation_range_percent[0]) )
+ + math.sin(elevation_range_percent[0] )
)
- - 1.0
)
elevation_deg = elevation / math.pi * 180.0
From 9b76296bf7be1bfa8fa965d8b9fb4c4fc017cb4f Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Mon, 27 Nov 2023 21:52:09 +0800
Subject: [PATCH 02/24] fix format
---
threestudio/data/uncond.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index ee8df2e7..8316325c 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -158,8 +158,11 @@ def collate(self, batch) -> Dict[str, Any]:
elevation = torch.asin(
(
torch.rand(self.batch_size)
- * (math.sin(elevation_range_percent[1]) - math.sin(elevation_range_percent[0]) )
- + math.sin(elevation_range_percent[0] )
+ * (
+ math.sin(elevation_range_percent[1])
+ - math.sin(elevation_range_percent[0])
+ )
+ + math.sin(elevation_range_percent[0])
)
)
elevation_deg = elevation / math.pi * 180.0
From 503140385940b1d4d3a528c0cc4494a52f198eff Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 01:51:30 +0800
Subject: [PATCH 03/24] Enable Gaussian Splatting and Custom Extension (#344)
* add gaussian wip
* clean up
* add refine
* gsgen baseline w/o point-e
* upd config
* rm KNN
* adjust parameters
* max_num, fix color
* import lib
* background device
* update config
* update config
* clean gausisan splatting
* fix format
* update extensions
* fix memory bug
* prepare for extensions
* clean gaussian
* clean gaussian
* clean
* fix bugs
---------
Co-authored-by: Linyou
---
.gitignore | 2 +
README.md | 4 +-
launch.py | 64 ++++++++++++++++++
threestudio/data/uncond.py | 20 ++++--
.../background/solid_color_background.py | 7 +-
threestudio/utils/loss.py | 16 +++++
threestudio/utils/ops.py | 65 +++++++++++++++++++
7 files changed, 167 insertions(+), 11 deletions(-)
create mode 100644 threestudio/utils/loss.py
diff --git a/.gitignore b/.gitignore
index 12adb415..0bf85006 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,4 +188,6 @@ outputs-gradio/
# wandb
wandb/
+custom/*
+
load/tets/256_tets.npz
diff --git a/README.md b/README.md
index 52abcde4..15da0e4c 100644
--- a/README.md
+++ b/README.md
@@ -44,8 +44,10 @@ threestudio is a unified framework for 3D content creation from text prompts, si
Did not find what you want? Submit a feature request or upvote others' requests here!
-## News
+## News
+- 30/11/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
- 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
diff --git a/launch.py b/launch.py
index 72add725..d24940af 100644
--- a/launch.py
+++ b/launch.py
@@ -1,8 +1,11 @@
import argparse
import contextlib
+import importlib
import logging
import os
import sys
+import time
+import traceback
class ColoredFilter(logging.Filter):
@@ -39,6 +42,65 @@ def filter(self, record):
return True
+def load_custom_module(module_path):
+ module_name = os.path.basename(module_path)
+ if os.path.isfile(module_path):
+ sp = os.path.splitext(module_path)
+ module_name = sp[0]
+ try:
+ if os.path.isfile(module_path):
+ module_spec = importlib.util.spec_from_file_location(
+ module_name, module_path
+ )
+ else:
+ module_spec = importlib.util.spec_from_file_location(
+ module_name, os.path.join(module_path, "__init__.py")
+ )
+
+ module = importlib.util.module_from_spec(module_spec)
+ sys.modules[module_name] = module
+ module_spec.loader.exec_module(module)
+ return True
+ except Exception as e:
+ print(traceback.format_exc())
+ print(f"Cannot import {module_path} module for custom nodes:", e)
+ return False
+
+
+def load_custom_modules():
+ node_paths = ["custom"]
+ node_import_times = []
+ for custom_node_path in node_paths:
+ possible_modules = os.listdir(custom_node_path)
+ if "__pycache__" in possible_modules:
+ possible_modules.remove("__pycache__")
+
+ for possible_module in possible_modules:
+ module_path = os.path.join(custom_node_path, possible_module)
+ if (
+ os.path.isfile(module_path)
+ and os.path.splitext(module_path)[1] != ".py"
+ ):
+ continue
+ if module_path.endswith(".disabled"):
+ continue
+ time_before = time.perf_counter()
+ success = load_custom_module(module_path)
+ node_import_times.append(
+ (time.perf_counter() - time_before, module_path, success)
+ )
+
+ if len(node_import_times) > 0:
+ print("\nImport times for custom modules:")
+ for n in sorted(node_import_times):
+ if n[2]:
+ import_message = ""
+ else:
+ import_message = " (IMPORT FAILED)"
+ print("{:6.1f} seconds{}:".format(n[0], import_message), n[1])
+ print()
+
+
def main(args, extras) -> None:
# set CUDA_VISIBLE_DEVICES if needed, then import pytorch-lightning
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
@@ -94,6 +156,8 @@ def main(args, extras) -> None:
else:
handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
+ load_custom_modules()
+
# parse YAML config to OmegaConf
cfg: ExperimentConfig
cfg = load_config(args.config, cli_args=extras, n_gpus=n_gpus)
diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index 8316325c..d051e3fd 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -3,6 +3,7 @@
import random
from dataclasses import dataclass, field
+import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
@@ -14,6 +15,7 @@
from threestudio.utils.config import parse_structured
from threestudio.utils.misc import get_device
from threestudio.utils.ops import (
+ get_full_projection_matrix,
get_mvp_matrix,
get_projection_matrix,
get_ray_directions,
@@ -315,10 +317,11 @@ def collate(self, batch) -> Dict[str, Any]:
# Importance note: the returned rays_d MUST be normalized!
rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
- proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
- fovy, self.width / self.height, 0.1, 1000.0
+ self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
+ fovy, self.width / self.height, 0.01, 100.0
) # FIXME: hard-coded near and far
- mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, proj_mtx)
+ mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, self.proj_mtx)
+ self.fovy = fovy
return {
"rays_o": rays_o,
@@ -332,6 +335,8 @@ def collate(self, batch) -> Dict[str, Any]:
"camera_distances": camera_distances,
"height": self.height,
"width": self.width,
+ "fovy": self.fovy,
+ "proj_mtx": self.proj_mtx,
}
@@ -414,10 +419,10 @@ def __init__(self, cfg: Any, split: str) -> None:
)
rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
- proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
- fovy, self.cfg.eval_width / self.cfg.eval_height, 0.1, 1000.0
+ self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
+ fovy, self.cfg.eval_width / self.cfg.eval_height, 0.01, 100.0
) # FIXME: hard-coded near and far
- mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, proj_mtx)
+ mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, self.proj_mtx)
self.rays_o, self.rays_d = rays_o, rays_d
self.mvp_mtx = mvp_mtx
@@ -427,6 +432,7 @@ def __init__(self, cfg: Any, split: str) -> None:
self.elevation, self.azimuth = elevation, azimuth
self.elevation_deg, self.azimuth_deg = elevation_deg, azimuth_deg
self.camera_distances = camera_distances
+ self.fovy = fovy
def __len__(self):
return self.n_views
@@ -445,6 +451,8 @@ def __getitem__(self, index):
"camera_distances": self.camera_distances[index],
"height": self.cfg.eval_height,
"width": self.cfg.eval_width,
+ "fovy": self.fovy[index],
+ "proj_mtx": self.proj_mtx[index],
}
def collate(self, batch):
diff --git a/threestudio/models/background/solid_color_background.py b/threestudio/models/background/solid_color_background.py
index 0763a0c5..0b68d5b4 100644
--- a/threestudio/models/background/solid_color_background.py
+++ b/threestudio/models/background/solid_color_background.py
@@ -34,10 +34,9 @@ def configure(self) -> None:
)
def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
- color = (
- torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs)
- * self.env_color
- )
+ color = torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(
+ dirs
+ ) * self.env_color.to(dirs)
if (
self.training
and self.cfg.random_aug
diff --git a/threestudio/utils/loss.py b/threestudio/utils/loss.py
new file mode 100644
index 00000000..eb0c7250
--- /dev/null
+++ b/threestudio/utils/loss.py
@@ -0,0 +1,16 @@
+import torch
+
+
+def _tensor_size(t):
+ return t.size()[1] * t.size()[2] * t.size()[3]
+
+
+def tv_loss(x):
+ batch_size = x.size()[0]
+ h_x = x.size()[2]
+ w_x = x.size()[3]
+ count_h = _tensor_size(x[:, :, 1:, :])
+ count_w = _tensor_size(x[:, :, :, 1:])
+ h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, : h_x - 1, :]), 2).sum()
+ w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, : w_x - 1]), 2).sum()
+ return 2 * (h_tv / count_h + w_tv / count_w) / batch_size
diff --git a/threestudio/utils/ops.py b/threestudio/utils/ops.py
index 320fa46a..b35d3cd0 100644
--- a/threestudio/utils/ops.py
+++ b/threestudio/utils/ops.py
@@ -1,3 +1,4 @@
+import math
from collections import defaultdict
import numpy as np
@@ -292,6 +293,70 @@ def get_mvp_matrix(
return mvp_mtx
+def get_full_projection_matrix(
+ c2w: Float[Tensor, "B 4 4"], proj_mtx: Float[Tensor, "B 4 4"]
+) -> Float[Tensor, "B 4 4"]:
+ return (c2w.unsqueeze(0).bmm(proj_mtx.unsqueeze(0))).squeeze(0)
+
+
+# gaussian splatting functions
+def convert_pose(C2W):
+ flip_yz = torch.eye(4, device=C2W.device)
+ flip_yz[1, 1] = -1
+ flip_yz[2, 2] = -1
+ C2W = torch.matmul(C2W, flip_yz)
+ return C2W
+
+
+def get_projection_matrix_gaussian(znear, zfar, fovX, fovY, device="cuda"):
+ tanHalfFovY = math.tan((fovY / 2))
+ tanHalfFovX = math.tan((fovX / 2))
+
+ top = tanHalfFovY * znear
+ bottom = -top
+ right = tanHalfFovX * znear
+ left = -right
+
+ P = torch.zeros(4, 4, device=device)
+
+ z_sign = 1.0
+
+ P[0, 0] = 2.0 * znear / (right - left)
+ P[1, 1] = 2.0 * znear / (top - bottom)
+ P[0, 2] = (right + left) / (right - left)
+ P[1, 2] = (top + bottom) / (top - bottom)
+ P[3, 2] = z_sign
+ P[2, 2] = z_sign * zfar / (zfar - znear)
+ P[2, 3] = -(zfar * znear) / (zfar - znear)
+ return P
+
+
+def get_fov_gaussian(P):
+ tanHalfFovX = 1 / P[0, 0]
+ tanHalfFovY = 1 / P[1, 1]
+ fovY = math.atan(tanHalfFovY) * 2
+ fovX = math.atan(tanHalfFovX) * 2
+ return fovX, fovY
+
+
+def get_cam_info_gaussian(c2w, fovx, fovy, znear, zfar):
+ c2w = convert_pose(c2w)
+ world_view_transform = torch.inverse(c2w)
+
+ world_view_transform = world_view_transform.transpose(0, 1).cuda().float()
+ projection_matrix = (
+ get_projection_matrix_gaussian(znear=znear, zfar=zfar, fovX=fovx, fovY=fovy)
+ .transpose(0, 1)
+ .cuda()
+ )
+ full_proj_transform = (
+ world_view_transform.unsqueeze(0).bmm(projection_matrix.unsqueeze(0))
+ ).squeeze(0)
+ camera_center = world_view_transform.inverse()[3, :3]
+
+ return world_view_transform, full_proj_transform, camera_center
+
+
def binary_cross_entropy(input, target):
"""
F.binary_cross_entropy is not numerically stable in mixed-precision training.
From 692968736ace7b9902bba4b0fc218e2bae965ef6 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 03:22:42 +0800
Subject: [PATCH 04/24] Update README.md
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 15da0e4c..71505dc5 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,8 @@ threestudio is a unified framework for 3D content creation from text prompts, si
## News
-- 30/11/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
-- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
+- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
- 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
From cfabde68d89c96975bf0a230b85955d3ac143a2d Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 03:56:32 +0800
Subject: [PATCH 05/24] add custom folder (#348)
---
custom/put_custom_extensions_here | 0
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 custom/put_custom_extensions_here
diff --git a/custom/put_custom_extensions_here b/custom/put_custom_extensions_here
new file mode 100644
index 00000000..e69de29b
From 7ce2f499e8459f920f122c694796991fd0b6f88a Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 06:00:36 +0800
Subject: [PATCH 06/24] Update README.md
---
README.md | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 71505dc5..cfec4bac 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
| Zero-1-to-3 | Magic123 |
| InstructNeRF2NeRF | Control4D |
-
+
@@ -41,9 +41,15 @@ threestudio is a unified framework for 3D content creation from text prompts, si
- Did not find what you want? Submit a feature request or upvote others' requests here!
+ Did not find what you want? Checkout threestudio-extension or submit a feature request here!
+
+
+
+
+
+
## News
- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
From eaadd2b5b813e997f06d8656d1d7854c6f2aca96 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 1 Dec 2023 06:36:16 +0800
Subject: [PATCH 07/24] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index cfec4bac..018200e3 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
- Did not find what you want? Checkout threestudio-extension or submit a feature request here!
+ Did not find what you want? Checkout threestudio-extension or submit a feature request here!
From 2c202276747a892cfc1ded8e27a005715be8f5f2 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Mon, 4 Dec 2023 22:00:04 +0800
Subject: [PATCH 08/24] ray direction normalize (#351)
* add rays normalization setting
---
.gitignore | 3 +++
threestudio/data/image.py | 8 +++++++-
threestudio/data/multiview.py | 12 ++++++++++--
threestudio/data/uncond.py | 10 ++++++++--
threestudio/utils/ops.py | 4 +++-
5 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/.gitignore b/.gitignore
index 0bf85006..b774bf79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,6 +188,9 @@ outputs-gradio/
# wandb
wandb/
+# vscode
+.code-workspace
+
custom/*
load/tets/256_tets.npz
diff --git a/threestudio/data/image.py b/threestudio/data/image.py
index 5c60b53b..fe7c227e 100644
--- a/threestudio/data/image.py
+++ b/threestudio/data/image.py
@@ -48,6 +48,8 @@ class SingleImageDataModuleConfig:
requires_depth: bool = False
requires_normal: bool = False
+ rays_d_normalize: bool = True
+
class SingleImageDataBase:
def setup(self, cfg, split):
@@ -141,7 +143,11 @@ def set_rays(self):
directions[:, :, :, :2] = directions[:, :, :, :2] / self.focal_length
rays_o, rays_d = get_rays(
- directions, self.c2w, keepdim=True, noise_scale=self.cfg.rays_noise_scale
+ directions,
+ self.c2w,
+ keepdim=True,
+ noise_scale=self.cfg.rays_noise_scale,
+ normalize=self.cfg.rays_d_normalize,
)
proj_mtx: Float[Tensor, "4 4"] = get_projection_matrix(
diff --git a/threestudio/data/multiview.py b/threestudio/data/multiview.py
index e127390a..8b722624 100644
--- a/threestudio/data/multiview.py
+++ b/threestudio/data/multiview.py
@@ -70,6 +70,8 @@ class MultiviewsDataModuleConfig:
camera_distance: float = -1
eval_interpolation: Optional[Tuple[int, int, int]] = None # (0, 1, 30)
+ rays_d_normalize: bool = True
+
class MultiviewIterableDataset(IterableDataset):
def __init__(self, cfg: Any) -> None:
@@ -164,7 +166,10 @@ def __init__(self, cfg: Any) -> None:
self.frames_img: Float[Tensor, "B H W 3"] = torch.stack(frames_img, dim=0)
self.rays_o, self.rays_d = get_rays(
- self.frames_direction, self.frames_c2w, keepdim=True
+ self.frames_direction,
+ self.frames_c2w,
+ keepdim=True,
+ normalize=self.cfg.rays_d_normalize,
)
self.mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(
self.frames_c2w, self.frames_proj
@@ -344,7 +349,10 @@ def __init__(self, cfg: Any, split: str) -> None:
self.frames_img: Float[Tensor, "B H W 3"] = torch.stack(frames_img, dim=0)
self.rays_o, self.rays_d = get_rays(
- self.frames_direction, self.frames_c2w, keepdim=True
+ self.frames_direction,
+ self.frames_c2w,
+ keepdim=True,
+ normalize=self.cfg.rays_d_normalize,
)
self.mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(
self.frames_c2w, self.frames_proj
diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py
index d051e3fd..999ba55c 100644
--- a/threestudio/data/uncond.py
+++ b/threestudio/data/uncond.py
@@ -56,6 +56,8 @@ class RandomCameraDataModuleConfig:
batch_uniform_azimuth: bool = True
progressive_until: int = 0 # progressive ranges for elevation, azimuth, r, fovy
+ rays_d_normalize: bool = True
+
class RandomCameraIterableDataset(IterableDataset, Updateable):
def __init__(self, cfg: Any) -> None:
@@ -315,7 +317,9 @@ def collate(self, batch) -> Dict[str, Any]:
)
# Importance note: the returned rays_d MUST be normalized!
- rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
+ rays_o, rays_d = get_rays(
+ directions, c2w, keepdim=True, normalize=self.cfg.rays_d_normalize
+ )
self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
fovy, self.width / self.height, 0.01, 100.0
@@ -418,7 +422,9 @@ def __init__(self, cfg: Any, split: str) -> None:
directions[:, :, :, :2] / focal_length[:, None, None, None]
)
- rays_o, rays_d = get_rays(directions, c2w, keepdim=True)
+ rays_o, rays_d = get_rays(
+ directions, c2w, keepdim=True, normalize=self.cfg.rays_d_normalize
+ )
self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix(
fovy, self.cfg.eval_width / self.cfg.eval_height, 0.01, 100.0
) # FIXME: hard-coded near and far
diff --git a/threestudio/utils/ops.py b/threestudio/utils/ops.py
index b35d3cd0..81d5b599 100644
--- a/threestudio/utils/ops.py
+++ b/threestudio/utils/ops.py
@@ -222,6 +222,7 @@ def get_rays(
c2w: Float[Tensor, "... 4 4"],
keepdim=False,
noise_scale=0.0,
+ normalize=True,
) -> Tuple[Float[Tensor, "... 3"], Float[Tensor, "... 3"]]:
# Rotate ray directions from camera coordinate to the world coordinate
assert directions.shape[-1] == 3
@@ -257,7 +258,8 @@ def get_rays(
rays_o = rays_o + torch.randn(3, device=rays_o.device) * noise_scale
rays_d = rays_d + torch.randn(3, device=rays_d.device) * noise_scale
- rays_d = F.normalize(rays_d, dim=-1)
+ if normalize:
+ rays_d = F.normalize(rays_d, dim=-1)
if not keepdim:
rays_o, rays_d = rays_o.reshape(-1, 3), rays_d.reshape(-1, 3)
From 3fe3153bf29927459b5ad5cc98d955d9b4c51ba3 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Wed, 6 Dec 2023 23:42:14 +0800
Subject: [PATCH 09/24] Add modules of 4d-fy for 4D generation(#353)
---
.../models/geometry/implicit_volume.py | 16 +++++
threestudio/models/networks.py | 64 +++++++++++++++++++
2 files changed, 80 insertions(+)
diff --git a/threestudio/models/geometry/implicit_volume.py b/threestudio/models/geometry/implicit_volume.py
index d1eeb96e..cfee0017 100644
--- a/threestudio/models/geometry/implicit_volume.py
+++ b/threestudio/models/geometry/implicit_volume.py
@@ -53,6 +53,9 @@ class Config(BaseImplicitGeometry.Config):
# automatically determine the threshold
isosurface_threshold: Union[float, str] = 25.0
+ # 4D Gaussian Annealing
+ anneal_density_blob_std_config: Optional[dict] = None
+
cfg: Config
def configure(self) -> None:
@@ -267,3 +270,16 @@ def create_from(
raise TypeError(
f"Cannot create {ImplicitVolume.__name__} from {other.__class__.__name__}"
)
+
+ def update_step(
+ self, epoch: int, global_step: int, on_load_weights: bool = False
+ ) -> None:
+ if self.cfg.anneal_density_blob_std_config is not None:
+ min_step = self.cfg.anneal_density_blob_std_config.min_anneal_step
+ max_step = self.cfg.anneal_density_blob_std_config.max_anneal_step
+ if global_step >= min_step and global_step <= max_step:
+ end_val = self.cfg.anneal_density_blob_std_config.end_val
+ start_val = self.cfg.anneal_density_blob_std_config.start_val
+ self.density_blob_std = start_val + (global_step - min_step) * (
+ end_val - start_val
+ ) / (max_step - min_step)
diff --git a/threestudio/models/networks.py b/threestudio/models/networks.py
index 9dc3dc28..cfe986ea 100644
--- a/threestudio/models/networks.py
+++ b/threestudio/models/networks.py
@@ -64,6 +64,68 @@ def forward(self, x):
return self.encoding(x)
+# 4D implicit decomposition of space and time (4D-fy)
+class TCNNEncodingSpatialTime(nn.Module):
+ def __init__(
+ self, in_channels, config, dtype=torch.float32, init_time_zero=False
+ ) -> None:
+ super().__init__()
+ self.n_input_dims = in_channels
+ config["otype"] = "HashGrid"
+ self.num_frames = 1 # config["num_frames"]
+ self.static = config["static"]
+ self.cfg = config_to_primitive(config)
+ self.cfg_time = self.cfg
+ self.n_key_frames = config.get("n_key_frames", 1)
+ with torch.cuda.device(get_rank()):
+ self.encoding = tcnn.Encoding(self.n_input_dims, self.cfg, dtype=dtype)
+ self.encoding_time = tcnn.Encoding(
+ self.n_input_dims + 1, self.cfg_time, dtype=dtype
+ )
+ self.n_output_dims = self.encoding.n_output_dims
+ self.frame_time = None
+ if self.static:
+ self.set_temp_param_grad(requires_grad=False)
+ self.use_key_frame = config.get("use_key_frame", False)
+ self.is_video = True
+ self.update_occ_grid = False
+
+ def set_temp_param_grad(self, requires_grad=False):
+ self.set_param_grad(self.encoding_time, requires_grad=requires_grad)
+
+ def set_param_grad(self, param_list, requires_grad=False):
+ if isinstance(param_list, nn.Parameter):
+ param_list.requires_grad = requires_grad
+ else:
+ for param in param_list.parameters():
+ param.requires_grad = requires_grad
+
+ def forward(self, x):
+ # TODO frame_time only supports batch_size == 1 cases
+ if self.update_occ_grid and not isinstance(self.frame_time, float):
+ frame_time = self.frame_time
+ else:
+ if (self.static or not self.training) and self.frame_time is None:
+ frame_time = torch.zeros(
+ (self.num_frames, 1), device=x.device, dtype=x.dtype
+ ).expand(x.shape[0], 1)
+ else:
+ if self.frame_time is None:
+ frame_time = 0.0
+ else:
+ frame_time = self.frame_time
+ frame_time = (
+ torch.ones((self.num_frames, 1), device=x.device, dtype=x.dtype)
+ * frame_time
+ ).expand(x.shape[0], 1)
+ frame_time = frame_time.view(-1, 1)
+ enc_space = self.encoding(x)
+ x_frame_time = torch.cat((x, frame_time), 1)
+ enc_space_time = self.encoding_time(x_frame_time)
+ enc = enc_space + enc_space_time
+ return enc
+
+
class ProgressiveBandHashGrid(nn.Module, Updateable):
def __init__(self, in_channels, config, dtype=torch.float32):
super().__init__()
@@ -136,6 +198,8 @@ def get_encoding(n_input_dims: int, config) -> nn.Module:
encoding = ProgressiveBandFrequency(n_input_dims, config_to_primitive(config))
elif config.otype == "ProgressiveBandHashGrid":
encoding = ProgressiveBandHashGrid(n_input_dims, config_to_primitive(config))
+ elif config.otype == "HashGridSpatialTime":
+ encoding = TCNNEncodingSpatialTime(n_input_dims, config) # 4D-fy encoding
else:
encoding = TCNNEncoding(n_input_dims, config_to_primitive(config))
encoding = CompositeEncoding(
From 56564c88e0139bdd31b1585f8720a1ae6141f138 Mon Sep 17 00:00:00 2001
From: Vikram Voleti
Date: Wed, 13 Dec 2023 13:24:32 -0500
Subject: [PATCH 10/24] [DRAFT] Adds stable-zero123 guidance (#356)
* Adds stable-zero123 guidance
* Fixes end-of-file?
* Update README.md with gif
* Fixes end-of-file?
* Corrects link to huggingface model
* general linear config
* Fixed HF link
* Fixes HF link
---------
Co-authored-by: Vikram Voleti
Co-authored-by: DSaurus <2238454358@qq.com>
---
README.md | 27 ++
.../{zero123_64.yaml => stable-zero123.yaml} | 56 ++-
configs/zero123.yaml | 8 +-
load/images/{dog1.png => dog1_rgba.png} | Bin
load/zero123/download.sh | 5 +-
threestudio/models/guidance/__init__.py | 1 +
.../guidance/stable_zero123_guidance.py | 340 ++++++++++++++++++
threestudio/utils/config.py | 5 +
threestudio/utils/misc.py | 11 +
9 files changed, 415 insertions(+), 38 deletions(-)
rename configs/{zero123_64.yaml => stable-zero123.yaml} (75%)
rename load/images/{dog1.png => dog1_rgba.png} (100%)
create mode 100644 threestudio/models/guidance/stable_zero123_guidance.py
diff --git a/README.md b/README.md
index 018200e3..4c3a0ab2 100644
--- a/README.md
+++ b/README.md
@@ -108,6 +108,8 @@ pip install ninja
pip install -r requirements.txt
```
+- (Optional) `tiny-cuda-nn` installation might require downgrading pip to 23.0.1
+
- (Optional, Recommended) The best-performing models in threestudio use the newly-released T2I model [DeepFloyd IF](https://github.com/deep-floyd/IF), which currently requires signing a license agreement. If you would like to use these models, you need to [accept the license on the model card of DeepFloyd IF](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0), and login into the Hugging Face hub in the terminal by `huggingface-cli login`.
- For contributors, see [here](https://github.com/threestudio-project/threestudio#contributing-to-threestudio).
@@ -517,6 +519,31 @@ python launch.py --config configs/magic123-refine-sd.yaml --train --gpu 0 data.i
- If the image contains non-front-facing objects, specifying the approximate elevation and azimuth angle by setting `data.default_elevation_deg` and `data.default_azimuth_deg` can be helpful. In threestudio, top is elevation +90 and bottom is elevation -90; left is azimuth -90 and right is azimuth +90.
+
+### Stable Zero123
+
+**Installation**
+
+Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/zero123` from https://huggingface.co/stabilityai/stable-zero123
+
+**Results obtained by threestudio (Stable Zero123 vs Zero123-XL)**
+![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7)
+
+**Example running commands**
+
+1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog"
+2. Remove its background using Clipdrop (https://clipdrop.co/remove-background)
+3. Save to `load/images/`, preferably with `_rgba.png` as the suffix
+4. Run Zero-1-to-3 with the Stable Zero123 ckpt:
+```sh
+python launch.py --config configs/stable-zero123.yaml --train --gpu 0 data.image_path=./load/images/hamburger_rgba.png
+```
+
+**IMPORTANT NOTE: This is an experimental implementation and we're constantly improving the quality.**
+
+**IMPORTANT NOTE: This implementation extends the Zero-1-to-3 implementation below, and is heavily inspired from the Zero-1-to-3 implementation in [https://github.com/ashawkey/stable-dreamfusion](stable-dreamfusion)! `extern/ldm_zero123` is borrowed from `stable-dreamfusion/ldm`.**
+
+
### Zero-1-to-3 [![arXiv](https://img.shields.io/badge/arXiv-2303.11328-b31b1b.svg?style=flat-square)](https://arxiv.org/abs/2303.11328)
**Installation**
diff --git a/configs/zero123_64.yaml b/configs/stable-zero123.yaml
similarity index 75%
rename from configs/zero123_64.yaml
rename to configs/stable-zero123.yaml
index 6a579335..5a372f66 100644
--- a/configs/zero123_64.yaml
+++ b/configs/stable-zero123.yaml
@@ -1,24 +1,25 @@
-name: "zero123"
-tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+name: "zero123-sai"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
exp_root_dir: "outputs"
seed: 0
data_type: "single-image-datamodule"
data: # threestudio/data/image.py -> SingleImageDataModuleConfig
image_path: ./load/images/hamburger_rgba.png
- height: 128
- width: 128
- default_elevation_deg: 0.0
+ height: [128, 256, 512]
+ width: [128, 256, 512]
+ resolution_milestones: [200, 300]
+ default_elevation_deg: 5.0
default_azimuth_deg: 0.0
default_camera_distance: 3.8
default_fovy_deg: 20.0
requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}}
requires_normal: ${cmaxgt0:${system.loss.lambda_normal}}
random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
- height: 64
- width: 64
- batch_size: 12
- resolution_milestones: []
+ height: [64, 128, 256]
+ width: [64, 128, 256]
+ batch_size: [12, 8, 4]
+ resolution_milestones: [200, 300]
eval_height: 512
eval_width: 512
eval_batch_size: 1
@@ -47,13 +48,6 @@ system:
radius: 2.0
normal_type: "analytic"
- # the density initialization proposed in the DreamFusion paper
- # does not work very well
- # density_bias: "blob_dreamfusion"
- # density_activation: exp
- # density_blob_scale: 5.
- # density_blob_std: 0.2
-
# use Magic3D density initialization instead
density_bias: "blob_magic3d"
density_activation: softplus
@@ -88,28 +82,26 @@ system:
renderer:
radius: ${system.geometry.radius}
num_samples_per_ray: 512
- return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}}
- return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}}
+ return_comp_normal: ${cmaxgt0:${system.loss.lambda_normal_smooth}}
+ return_normal_perturb: ${cmaxgt0:${system.loss.lambda_3d_normal_smooth}}
prompt_processor_type: "dummy-prompt-processor" # Zero123 doesn't use prompts
prompt_processor:
pretrained_model_name_or_path: ""
prompt: ""
- guidance_type: "zero123-guidance"
+ guidance_type: "stable-zero123-guidance"
guidance:
- pretrained_model_name_or_path: "./load/zero123/zero123-xl.ckpt"
pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
+ pretrained_model_name_or_path: "./load/zero123/stable_zero123.ckpt"
vram_O: ${not:${gt0:${system.freq.guidance_eval}}}
cond_image_path: ${data.image_path}
cond_elevation_deg: ${data.default_elevation_deg}
cond_azimuth_deg: ${data.default_azimuth_deg}
cond_camera_distance: ${data.default_camera_distance}
guidance_scale: 3.0
- #min_step_percent: 0.02
- min_step_percent: [0, 0.4, 0.2, 200] # (start_iter, start_val, end_val, end_iter)
- #max_step_percent: 0.98
- max_step_percent: [0, 0.85, 0.5, 200]
+ min_step_percent: [50, 0.7, 0.3, 200] # (start_iter, start_val, end_val, end_iter)
+ max_step_percent: [50, 0.98, 0.8, 200]
freq:
ref_only_steps: 0
@@ -123,16 +115,16 @@ system:
loss:
lambda_sds: 0.1
- lambda_rgb: 500.
+ lambda_rgb: [100, 500., 1000., 400]
lambda_mask: 50.
lambda_depth: 0. # 0.05
lambda_depth_rel: 0. # [0, 0, 0.05, 100]
lambda_normal: 0. # [0, 0, 0.05, 100]
- lambda_normal_smooth: 10.0
- lambda_3d_normal_smooth: 10.0
+ lambda_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200]
+ lambda_3d_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200]
lambda_orient: 1.0
- lambda_sparsity: 0.1 # should be tweaked for every model
- lambda_opaque: 0.1
+ lambda_sparsity: 0.5 # should be tweaked for every model
+ lambda_opaque: 0.5
optimizer:
name: Adam
@@ -142,14 +134,14 @@ system:
eps: 1.e-8
trainer:
- max_steps: 400
+ max_steps: 600
log_every_n_steps: 1
num_sanity_val_steps: 0
val_check_interval: 100
enable_progress_bar: true
- precision: 16-mixed
+ precision: 32
checkpoint:
save_last: true # save at each validation time
save_top_k: -1
- every_n_train_steps: ${trainer.max_steps}
+ every_n_train_steps: 100 # ${trainer.max_steps}
diff --git a/configs/zero123.yaml b/configs/zero123.yaml
index ca61b2e4..0f6ade97 100644
--- a/configs/zero123.yaml
+++ b/configs/zero123.yaml
@@ -1,5 +1,5 @@
name: "zero123"
-tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
exp_root_dir: "outputs"
seed: 0
@@ -9,7 +9,7 @@ data: # threestudio/data/image.py -> SingleImageDataModuleConfig
height: [128, 256, 512]
width: [128, 256, 512]
resolution_milestones: [200, 300]
- default_elevation_deg: 0.0
+ default_elevation_deg: 5.0
default_azimuth_deg: 0.0
default_camera_distance: 3.8
default_fovy_deg: 20.0
@@ -111,9 +111,7 @@ system:
cond_azimuth_deg: ${data.default_azimuth_deg}
cond_camera_distance: ${data.default_camera_distance}
guidance_scale: 3.0
- #min_step_percent: 0.02
min_step_percent: [0, 0.4, 0.2, 200] # (start_iter, start_val, end_val, end_iter)
- #max_step_percent: 0.98
max_step_percent: [0, 0.85, 0.5, 200]
freq:
@@ -147,7 +145,7 @@ system:
eps: 1.e-8
trainer:
- max_steps: 400
+ max_steps: 600
log_every_n_steps: 1
num_sanity_val_steps: 0
val_check_interval: 100
diff --git a/load/images/dog1.png b/load/images/dog1_rgba.png
similarity index 100%
rename from load/images/dog1.png
rename to load/images/dog1_rgba.png
diff --git a/load/zero123/download.sh b/load/zero123/download.sh
index 35cc597e..169676b7 100644
--- a/load/zero123/download.sh
+++ b/load/zero123/download.sh
@@ -1 +1,4 @@
-wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt
+# wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt
+# mv 105000.ckpt zero123-original.ckpt
+wget https://zero123.cs.columbia.edu/assets/zero123-xl.ckpt
+# Download stable_zero123.ckpt from https://huggingface.co/stabilityai/stable-zero123
diff --git a/threestudio/models/guidance/__init__.py b/threestudio/models/guidance/__init__.py
index eeda92e4..b25a8d76 100644
--- a/threestudio/models/guidance/__init__.py
+++ b/threestudio/models/guidance/__init__.py
@@ -5,6 +5,7 @@
stable_diffusion_guidance,
stable_diffusion_unified_guidance,
stable_diffusion_vsd_guidance,
+ stable_zero123_guidance,
zero123_guidance,
zero123_unified_guidance,
)
diff --git a/threestudio/models/guidance/stable_zero123_guidance.py b/threestudio/models/guidance/stable_zero123_guidance.py
new file mode 100644
index 00000000..6d545908
--- /dev/null
+++ b/threestudio/models/guidance/stable_zero123_guidance.py
@@ -0,0 +1,340 @@
+import importlib
+import os
+from dataclasses import dataclass, field
+
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from diffusers import DDIMScheduler, DDPMScheduler, StableDiffusionPipeline
+from diffusers.utils.import_utils import is_xformers_available
+from omegaconf import OmegaConf
+from tqdm import tqdm
+
+import threestudio
+from threestudio.utils.base import BaseObject
+from threestudio.utils.misc import C, parse_version
+from threestudio.utils.typing import *
+
+
+def get_obj_from_str(string, reload=False):
+ module, cls = string.rsplit(".", 1)
+ if reload:
+ module_imp = importlib.import_module(module)
+ importlib.reload(module_imp)
+ return getattr(importlib.import_module(module, package=None), cls)
+
+
+def instantiate_from_config(config):
+ if not "target" in config:
+ if config == "__is_first_stage__":
+ return None
+ elif config == "__is_unconditional__":
+ return None
+ raise KeyError("Expected key `target` to instantiate.")
+ return get_obj_from_str(config["target"])(**config.get("params", dict()))
+
+
+# load model
+def load_model_from_config(config, ckpt, device, vram_O=True, verbose=False):
+ pl_sd = torch.load(ckpt, map_location="cpu")
+
+ if "global_step" in pl_sd and verbose:
+ print(f'[INFO] Global Step: {pl_sd["global_step"]}')
+
+ sd = pl_sd["state_dict"]
+
+ model = instantiate_from_config(config.model)
+ m, u = model.load_state_dict(sd, strict=False)
+
+ if len(m) > 0 and verbose:
+ print("[INFO] missing keys: \n", m)
+ if len(u) > 0 and verbose:
+ print("[INFO] unexpected keys: \n", u)
+
+ # manually load ema and delete it to save GPU memory
+ if model.use_ema:
+ if verbose:
+ print("[INFO] loading EMA...")
+ model.model_ema.copy_to(model.model)
+ del model.model_ema
+
+ if vram_O:
+ # we don't need decoder
+ del model.first_stage_model.decoder
+
+ torch.cuda.empty_cache()
+
+ model.eval().to(device)
+
+ return model
+
+
+@threestudio.register("stable-zero123-guidance")
+class StableZero123Guidance(BaseObject):
+ @dataclass
+ class Config(BaseObject.Config):
+ pretrained_model_name_or_path: str = "load/zero123/stable-zero123.ckpt"
+ pretrained_config: str = "load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
+ vram_O: bool = True
+
+ cond_image_path: str = "load/images/hamburger_rgba.png"
+ cond_elevation_deg: float = 0.0
+ cond_azimuth_deg: float = 0.0
+ cond_camera_distance: float = 1.2
+
+ guidance_scale: float = 5.0
+
+ grad_clip: Optional[
+ Any
+ ] = None # field(default_factory=lambda: [0, 2.0, 8.0, 1000])
+ half_precision_weights: bool = False
+
+ min_step_percent: float = 0.02
+ max_step_percent: float = 0.98
+
+ cfg: Config
+
+ def configure(self) -> None:
+ threestudio.info(f"Loading Stable Zero123 ...")
+
+ self.config = OmegaConf.load(self.cfg.pretrained_config)
+ # TODO: seems it cannot load into fp16...
+ self.weights_dtype = torch.float32
+ self.model = load_model_from_config(
+ self.config,
+ self.cfg.pretrained_model_name_or_path,
+ device=self.device,
+ vram_O=self.cfg.vram_O,
+ )
+
+ for p in self.model.parameters():
+ p.requires_grad_(False)
+
+ # timesteps: use diffuser for convenience... hope it's alright.
+ self.num_train_timesteps = self.config.model.params.timesteps
+
+ self.scheduler = DDIMScheduler(
+ self.num_train_timesteps,
+ self.config.model.params.linear_start,
+ self.config.model.params.linear_end,
+ beta_schedule="scaled_linear",
+ clip_sample=False,
+ set_alpha_to_one=False,
+ steps_offset=1,
+ )
+
+ self.num_train_timesteps = self.scheduler.config.num_train_timesteps
+ self.set_min_max_steps() # set to default value
+
+ self.alphas: Float[Tensor, "..."] = self.scheduler.alphas_cumprod.to(
+ self.device
+ )
+
+ self.grad_clip_val: Optional[float] = None
+
+ self.prepare_embeddings(self.cfg.cond_image_path)
+
+ threestudio.info(f"Loaded Stable Zero123!")
+
+ @torch.cuda.amp.autocast(enabled=False)
+ def set_min_max_steps(self, min_step_percent=0.02, max_step_percent=0.98):
+ self.min_step = int(self.num_train_timesteps * min_step_percent)
+ self.max_step = int(self.num_train_timesteps * max_step_percent)
+
+ @torch.cuda.amp.autocast(enabled=False)
+ def prepare_embeddings(self, image_path: str) -> None:
+ # load cond image for zero123
+ assert os.path.exists(image_path)
+ rgba = cv2.cvtColor(
+ cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_BGRA2RGBA
+ )
+ rgba = (
+ cv2.resize(rgba, (256, 256), interpolation=cv2.INTER_AREA).astype(
+ np.float32
+ )
+ / 255.0
+ )
+ rgb = rgba[..., :3] * rgba[..., 3:] + (1 - rgba[..., 3:])
+ self.rgb_256: Float[Tensor, "1 3 H W"] = (
+ torch.from_numpy(rgb)
+ .unsqueeze(0)
+ .permute(0, 3, 1, 2)
+ .contiguous()
+ .to(self.device)
+ )
+ self.c_crossattn, self.c_concat = self.get_img_embeds(self.rgb_256)
+
+ @torch.cuda.amp.autocast(enabled=False)
+ @torch.no_grad()
+ def get_img_embeds(
+ self,
+ img: Float[Tensor, "B 3 256 256"],
+ ) -> Tuple[Float[Tensor, "B 1 768"], Float[Tensor, "B 4 32 32"]]:
+ img = img * 2.0 - 1.0
+ c_crossattn = self.model.get_learned_conditioning(img.to(self.weights_dtype))
+ c_concat = self.model.encode_first_stage(img.to(self.weights_dtype)).mode()
+ return c_crossattn, c_concat
+
+ @torch.cuda.amp.autocast(enabled=False)
+ def encode_images(
+ self, imgs: Float[Tensor, "B 3 256 256"]
+ ) -> Float[Tensor, "B 4 32 32"]:
+ input_dtype = imgs.dtype
+ imgs = imgs * 2.0 - 1.0
+ latents = self.model.get_first_stage_encoding(
+ self.model.encode_first_stage(imgs.to(self.weights_dtype))
+ )
+ return latents.to(input_dtype) # [B, 4, 32, 32] Latent space image
+
+ @torch.cuda.amp.autocast(enabled=False)
+ def decode_latents(
+ self,
+ latents: Float[Tensor, "B 4 H W"],
+ ) -> Float[Tensor, "B 3 512 512"]:
+ input_dtype = latents.dtype
+ image = self.model.decode_first_stage(latents)
+ image = (image * 0.5 + 0.5).clamp(0, 1)
+ return image.to(input_dtype)
+
+ @torch.cuda.amp.autocast(enabled=False)
+ @torch.no_grad()
+ def get_cond(
+ self,
+ elevation: Float[Tensor, "B"],
+ azimuth: Float[Tensor, "B"],
+ camera_distances: Float[Tensor, "B"],
+ c_crossattn=None,
+ c_concat=None,
+ **kwargs,
+ ) -> dict:
+ T = torch.stack(
+ [
+ torch.deg2rad(
+ (90 - elevation) - (90 - self.cfg.cond_elevation_deg)
+ ), # Zero123 polar is 90-elevation
+ torch.sin(torch.deg2rad(azimuth - self.cfg.cond_azimuth_deg)),
+ torch.cos(torch.deg2rad(azimuth - self.cfg.cond_azimuth_deg)),
+ torch.deg2rad(
+ 90 - torch.full_like(elevation, self.cfg.cond_elevation_deg)
+ ),
+ ],
+ dim=-1,
+ )[:, None, :].to(self.device)
+ cond = {}
+ clip_emb = self.model.cc_projection(
+ torch.cat(
+ [
+ (self.c_crossattn if c_crossattn is None else c_crossattn).repeat(
+ len(T), 1, 1
+ ),
+ T,
+ ],
+ dim=-1,
+ )
+ )
+ cond["c_crossattn"] = [
+ torch.cat([torch.zeros_like(clip_emb).to(self.device), clip_emb], dim=0)
+ ]
+ cond["c_concat"] = [
+ torch.cat(
+ [
+ torch.zeros_like(self.c_concat)
+ .repeat(len(T), 1, 1, 1)
+ .to(self.device),
+ (self.c_concat if c_concat is None else c_concat).repeat(
+ len(T), 1, 1, 1
+ ),
+ ],
+ dim=0,
+ )
+ ]
+ return cond
+
+ def __call__(
+ self,
+ rgb: Float[Tensor, "B H W C"],
+ elevation: Float[Tensor, "B"],
+ azimuth: Float[Tensor, "B"],
+ camera_distances: Float[Tensor, "B"],
+ rgb_as_latents=False,
+ **kwargs,
+ ):
+ batch_size = rgb.shape[0]
+
+ rgb_BCHW = rgb.permute(0, 3, 1, 2)
+ latents: Float[Tensor, "B 4 64 64"]
+ if rgb_as_latents:
+ latents = (
+ F.interpolate(rgb_BCHW, (32, 32), mode="bilinear", align_corners=False)
+ * 2
+ - 1
+ )
+ else:
+ rgb_BCHW_512 = F.interpolate(
+ rgb_BCHW, (256, 256), mode="bilinear", align_corners=False
+ )
+ # encode image into latents with vae
+ latents = self.encode_images(rgb_BCHW_512)
+
+ cond = self.get_cond(elevation, azimuth, camera_distances)
+
+ # timestep ~ U(0.02, 0.98) to avoid very high/low noise level
+ t = torch.randint(
+ self.min_step,
+ self.max_step + 1,
+ [batch_size],
+ dtype=torch.long,
+ device=self.device,
+ )
+
+ # predict the noise residual with unet, NO grad!
+ with torch.no_grad():
+ # add noise
+ noise = torch.randn_like(latents) # TODO: use torch generator
+ latents_noisy = self.scheduler.add_noise(latents, noise, t)
+ # pred noise
+ x_in = torch.cat([latents_noisy] * 2)
+ t_in = torch.cat([t] * 2)
+ noise_pred = self.model.apply_model(x_in, t_in, cond)
+
+ # perform guidance
+ noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + self.cfg.guidance_scale * (
+ noise_pred_cond - noise_pred_uncond
+ )
+
+ w = (1 - self.alphas[t]).reshape(-1, 1, 1, 1)
+ grad = w * (noise_pred - noise)
+ grad = torch.nan_to_num(grad)
+ # clip grad for stable training?
+ if self.grad_clip_val is not None:
+ grad = grad.clamp(-self.grad_clip_val, self.grad_clip_val)
+
+ # loss = SpecifyGradient.apply(latents, grad)
+ # SpecifyGradient is not straghtforward, use a reparameterization trick instead
+ target = (latents - grad).detach()
+ # d(loss)/d(latents) = latents - target = latents - (latents - grad) = grad
+ loss_sds = 0.5 * F.mse_loss(latents, target, reduction="sum") / batch_size
+
+ guidance_out = {
+ "loss_sds": loss_sds,
+ "grad_norm": grad.norm(),
+ "min_step": self.min_step,
+ "max_step": self.max_step,
+ }
+
+ return guidance_out
+
+ def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False):
+ # clip grad for stable training as demonstrated in
+ # Debiasing Scores and Prompts of 2D Diffusion for Robust Text-to-3D Generation
+ # http://arxiv.org/abs/2303.15413
+ if self.cfg.grad_clip is not None:
+ self.grad_clip_val = C(self.cfg.grad_clip, epoch, global_step)
+
+ self.set_min_max_steps(
+ min_step_percent=C(self.cfg.min_step_percent, epoch, global_step),
+ max_step_percent=C(self.cfg.max_step_percent, epoch, global_step),
+ )
diff --git a/threestudio/utils/config.py b/threestudio/utils/config.py
index 99456333..88a7d092 100644
--- a/threestudio/utils/config.py
+++ b/threestudio/utils/config.py
@@ -35,6 +35,11 @@ def C_max(value: Any) -> float:
value = config_to_primitive(value)
if not isinstance(value, list):
raise TypeError("Scalar specification only supports list, got", type(value))
+ if len(value) >= 6:
+ max_value = value[2]
+ for i in range(4, len(value), 2):
+ max_value = max(max_value, value[i])
+ value = [value[0], value[1], max_value, value[3]]
if len(value) == 3:
value = [0] + value
assert len(value) == 4
diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py
index 7954bb86..969c7c60 100644
--- a/threestudio/utils/misc.py
+++ b/threestudio/utils/misc.py
@@ -71,6 +71,17 @@ def C(value: Any, epoch: int, global_step: int) -> float:
raise TypeError("Scalar specification only supports list, got", type(value))
if len(value) == 3:
value = [0] + value
+ if len(value) >= 6:
+ select_i = 3
+ for i in range(3, len(value) - 2, 2):
+ if global_step >= value[i]:
+ select_i = i + 2
+ if select_i != 3:
+ start_value, start_step = value[select_i - 3], value[select_i - 2]
+ else:
+ start_step, start_value = value[:2]
+ end_value, end_step = value[select_i - 1], value[select_i]
+ value = [start_step, start_value, end_value, end_step]
assert len(value) == 4
start_step, start_value, end_value, end_step = value
if isinstance(end_step, int):
From c86246d7b194915584abb4b33703abd3e3966f01 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 15 Dec 2023 03:32:57 +0800
Subject: [PATCH 11/24] perceptual loss update (#358)
---
threestudio/systems/control4d_multiview.py | 4 ++--
threestudio/systems/instructnerf2nerf.py | 3 ++-
threestudio/utils/perceptual/perceptual.py | 23 ++++++++++++++++++++++
3 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/threestudio/systems/control4d_multiview.py b/threestudio/systems/control4d_multiview.py
index 0f198b51..8cfd9cf5 100644
--- a/threestudio/systems/control4d_multiview.py
+++ b/threestudio/systems/control4d_multiview.py
@@ -37,8 +37,8 @@ def configure(self) -> None:
material=self.material,
background=self.background,
)
-
- self.perceptual_loss = PerceptualLoss().eval().to(get_device())
+ p_config = {}
+ self.perceptual_loss = threestudio.find("perceptual-loss")(p_config)
self.edit_frames = {}
self.per_editing_step = self.cfg.per_editing_step
self.start_editing_step = self.cfg.start_editing_step
diff --git a/threestudio/systems/instructnerf2nerf.py b/threestudio/systems/instructnerf2nerf.py
index 16e914e1..f6e3ecde 100644
--- a/threestudio/systems/instructnerf2nerf.py
+++ b/threestudio/systems/instructnerf2nerf.py
@@ -24,7 +24,8 @@ def configure(self):
# create geometry, material, background, renderer
super().configure()
self.edit_frames = {}
- self.perceptual_loss = PerceptualLoss().eval().to(get_device())
+ p_config = {}
+ self.perceptual_loss = threestudio.find("perceptual-loss")(p_config)
def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]:
render_out = self.renderer(**batch)
diff --git a/threestudio/utils/perceptual/perceptual.py b/threestudio/utils/perceptual/perceptual.py
index d756694a..403d9a92 100644
--- a/threestudio/utils/perceptual/perceptual.py
+++ b/threestudio/utils/perceptual/perceptual.py
@@ -1,12 +1,35 @@
"""Stripped version of https://github.com/richzhang/PerceptualSimilarity/tree/master/models"""
from collections import namedtuple
+from dataclasses import dataclass, field
import torch
import torch.nn as nn
from torchvision import models
+import threestudio
+from threestudio.utils.base import BaseObject
from threestudio.utils.perceptual.utils import get_ckpt_path
+from threestudio.utils.typing import *
+
+
+@threestudio.register("perceptual-loss")
+class PerceptualLossObject(BaseObject):
+ @dataclass
+ class Config(BaseObject.Config):
+ use_dropout: bool = True
+
+ cfg: Config
+
+ def configure(self) -> None:
+ self.perceptual_loss = PerceptualLoss(self.cfg.use_dropout).to(self.device)
+
+ def __call__(
+ self,
+ x: Float[Tensor, "B 3 256 256"],
+ y: Float[Tensor, "B 3 256 256"],
+ ):
+ return self.perceptual_loss(x, y)
class PerceptualLoss(nn.Module):
From 3597b550e483a91e0a52587fd72d48902fc4b897 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 15 Dec 2023 20:43:56 +0800
Subject: [PATCH 12/24] Automatically find last checkpoint and support
multi-stage training (#362)
---
threestudio/systems/base.py | 10 +++++++++-
threestudio/utils/misc.py | 21 +++++++++++++++++++++
2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/threestudio/systems/base.py b/threestudio/systems/base.py
index 5b668ea6..73faac60 100644
--- a/threestudio/systems/base.py
+++ b/threestudio/systems/base.py
@@ -13,7 +13,13 @@
update_if_possible,
)
from threestudio.utils.config import parse_structured
-from threestudio.utils.misc import C, cleanup, get_device, load_module_weights
+from threestudio.utils.misc import (
+ C,
+ cleanup,
+ find_last_path,
+ get_device,
+ load_module_weights,
+)
from threestudio.utils.saving import SaverMixin
from threestudio.utils.typing import *
@@ -241,6 +247,8 @@ class Config(BaseSystem.Config):
cfg: Config
def configure(self) -> None:
+ self.cfg.geometry_convert_from = find_last_path(self.cfg.geometry_convert_from)
+ self.cfg.weights = find_last_path(self.cfg.weights)
if (
self.cfg.geometry_convert_from # from_coarse must be specified
and not self.cfg.weights # not initialized from coarse when weights are specified
diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py
index 969c7c60..ccb4987f 100644
--- a/threestudio/utils/misc.py
+++ b/threestudio/utils/misc.py
@@ -134,3 +134,24 @@ def broadcast(tensor, src=0):
def enable_gradient(model, enabled: bool = True) -> None:
for param in model.parameters():
param.requires_grad_(enabled)
+
+
+def find_last_path(path: str):
+ if (path is not None) and ("LAST" in path):
+ path = path.replace(" ", "_")
+ base_dir_prefix, suffix = path.split("LAST", 1)
+ base_dir = os.path.dirname(base_dir_prefix)
+ prefix = os.path.split(base_dir_prefix)[-1]
+ base_dir_prefix = os.path.join(base_dir, prefix)
+ all_path = os.listdir(base_dir)
+ all_path = [os.path.join(base_dir, dir) for dir in all_path]
+ filtered_path = [dir for dir in all_path if dir.startswith(base_dir_prefix)]
+ filtered_path.sort(reverse=True)
+ last_path = filtered_path[0]
+ new_path = last_path + suffix
+ if os.path.exists(new_path):
+ return new_path
+ else:
+ raise FileNotFoundError(new_path)
+ else:
+ return path
From 5d21501996de6a9542e2164506253c36608f94ed Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Fri, 15 Dec 2023 22:37:42 +0800
Subject: [PATCH 13/24] update extensions
---
launch.py | 2 +-
threestudio/__init__.py | 7 ++++++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/launch.py b/launch.py
index d24940af..bca4ae11 100644
--- a/launch.py
+++ b/launch.py
@@ -82,7 +82,7 @@ def load_custom_modules():
and os.path.splitext(module_path)[1] != ".py"
):
continue
- if module_path.endswith(".disabled"):
+ if module_path.endswith("_disabled"):
continue
time_before = time.perf_counter()
success = load_custom_module(module_path)
diff --git a/threestudio/__init__.py b/threestudio/__init__.py
index 2c83608f..5651db5e 100644
--- a/threestudio/__init__.py
+++ b/threestudio/__init__.py
@@ -3,7 +3,12 @@
def register(name):
def decorator(cls):
- __modules__[name] = cls
+ if name in __modules__:
+ raise ValueError(
+ f"Module {name} already exists! Names of extensions conflict!"
+ )
+ else:
+ __modules__[name] = cls
return cls
return decorator
From 145d2bdbfd6554a7e6ba0ec8e41ec052dfdc519e Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine
Date: Sun, 17 Dec 2023 00:46:34 +0900
Subject: [PATCH 14/24] Update README.md (#366)
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 4c3a0ab2..d82af86d 100644
--- a/README.md
+++ b/README.md
@@ -431,7 +431,7 @@ https://github.com/threestudio-project/threestudio/assets/19284678/72217cdd-765a
- Most of the settings are the same as the DreamFusion model. Please refer to the notable differences of the DreamFusion model.
- We use NeuS as the geometry representation while the original paper uses VolSDF.
-- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stablize normal computation when using hash grids.
+- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stabilize normal computation when using hash grids.
- We currently only implemented the coarse stage of TextMesh.
**Example running commands**
From 03671ab851364753142e75344dc303dfa48e7048 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Mon, 18 Dec 2023 21:28:44 +0800
Subject: [PATCH 15/24] Update README.md
---
README.md | 3 +++
1 file changed, 3 insertions(+)
diff --git a/README.md b/README.md
index d82af86d..25a1a6c7 100644
--- a/README.md
+++ b/README.md
@@ -48,10 +48,13 @@ threestudio is a unified framework for 3D content creation from text prompts, si
+
+
## News
+- 18/12/2023 Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
From b6d7c12075396bdb89d387f2a3b4a573290de35d Mon Sep 17 00:00:00 2001
From: Vikram Voleti
Date: Mon, 18 Dec 2023 11:56:17 -0500
Subject: [PATCH 16/24] Update README.md for Stable Zero123 (#372)
---
README.md | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 25a1a6c7..deab297e 100644
--- a/README.md
+++ b/README.md
@@ -54,8 +54,9 @@ threestudio is a unified framework for 3D content creation from text prompts, si
## News
-- 18/12/2023 Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
-- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 12/18/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
+- 12/13/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
+- 11/30/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
From cf23ed6eab4b145d45954ac7db8dc78f94616914 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Tue, 19 Dec 2023 14:02:27 +0800
Subject: [PATCH 17/24] Update README.md
---
README.md | 3 +++
1 file changed, 3 insertions(+)
diff --git a/README.md b/README.md
index deab297e..c77f7b92 100644
--- a/README.md
+++ b/README.md
@@ -533,6 +533,9 @@ Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/z
**Results obtained by threestudio (Stable Zero123 vs Zero123-XL)**
![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7)
+**Direct multi-view images generation**
+If you only want to generate multi-view images, please refer to [threestudio-mvimg-gen](https://github.com/DSaurus/threestudio-mvimg-gen). This extension can use Stable Zero123 to directly generate images from multi-view perspectives.
+
**Example running commands**
1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog"
From 47b6a33827350fc72d9be2d69ed6ad8522a350ba Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Tue, 19 Dec 2023 19:58:20 +0800
Subject: [PATCH 18/24] add version (#375)
---
threestudio/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/threestudio/__init__.py b/threestudio/__init__.py
index 5651db5e..a1184e43 100644
--- a/threestudio/__init__.py
+++ b/threestudio/__init__.py
@@ -1,4 +1,5 @@
__modules__ = {}
+__version__ = "0.2.0"
def register(name):
From 23b2d717474ffefd3e88e8f69c0e9695c5c6f7f8 Mon Sep 17 00:00:00 2001
From: bennyguo
Date: Thu, 21 Dec 2023 12:08:11 +0800
Subject: [PATCH 19/24] update gradio app
---
gradio_app.py | 5 +++--
requirements.txt | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/gradio_app.py b/gradio_app.py
index c2d32f5f..0d921d98 100644
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -201,7 +201,7 @@ def run(
# manually assign the output directory, name and tag so that we know the trial directory
name = os.path.basename(model_config[model_name]["path"]).split(".")[0]
- tag = datetime.now().strftime("@%Y%m%d-%H%M%S")
+ tag = datetime.now().strftime("%Y%m%d-%H%M%S")
trial_dir = os.path.join(save_root, EXP_ROOT_DIR, name, tag)
alive_path = os.path.join(trial_dir, "alive")
@@ -441,6 +441,7 @@ def launch(
run_btn,
stop_btn,
],
+ concurrency_limit=1,
)
stop_btn.click(
fn=stop_run,
@@ -453,7 +454,7 @@ def launch(
launch_args = {"server_port": port}
if listen:
launch_args["server_name"] = "0.0.0.0"
- demo.queue(concurrency_count=1).launch(**launch_args)
+ demo.queue().launch(**launch_args)
def watch(
diff --git a/requirements.txt b/requirements.txt
index 142a76d2..88706a6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,7 +20,7 @@ networkx
pysdf
PyMCubes
wandb
-gradio
+gradio==4.11.0
git+https://github.com/ashawkey/envlight.git
torchmetrics
From fa40007b7b6c90f34cdec957a2e91acb65e1fe60 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Sat, 23 Dec 2023 19:04:17 +0800
Subject: [PATCH 20/24] Update README.md
---
README.md | 40 +++++++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/README.md b/README.md
index c77f7b92..d0f69fa9 100644
--- a/README.md
+++ b/README.md
@@ -48,27 +48,33 @@ threestudio is a unified framework for 3D content creation from text prompts, si
-
-
+
+
+
+
+
+| Animate-124 | 4D-fy | DreamCraft3D | Gaussian Splatting | MVDream | Mesh-Fitting |
+
## News
-- 12/18/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
-- 12/13/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
-- 11/30/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
-- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
-- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
-- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
-- 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
-- 06/20/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try.
-- 06/14/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try.
-- 06/14/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try.
-- 05/29/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try.
-- 05/26/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try.
-- 05/14/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb).
-- 05/13/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)!
-- 05/11/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions.
+- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Aniamte-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
+- 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
+- 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
+- 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
+- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions).
+- 25/06/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try.
+- 06/07/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions!
+- 03/07/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface).
+- 20/06/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try.
+- 14/06/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try.
+- 14/06/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try.
+- 29/05/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try.
+- 26/05/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try.
+- 14/05/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb).
+- 13/05/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)!
+- 11/05/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions.
![export-blender](https://github.com/threestudio-project/threestudio/assets/19284678/ccae2820-e702-484c-a43f-81678a365427)
From 652740ab3e30bd871f10acab6db2ed4afdfd25dc Mon Sep 17 00:00:00 2001
From: DSaurus <2238454358@qq.com>
Date: Sat, 23 Dec 2023 19:44:54 +0800
Subject: [PATCH 21/24] fix format
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d0f69fa9..389c675d 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
-
+
From 894390aad91ad80b6d0f5af591acf5a720ab2bfe Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Sat, 23 Dec 2023 22:21:22 +0800
Subject: [PATCH 22/24] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 389c675d..bc1b1b3d 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si
## News
-- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Aniamte-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
+- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Animate-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try.
- 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try.
- 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try.
- 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting).
From e254d87d2a9bf43851ac953bc323e7fe695817ec Mon Sep 17 00:00:00 2001
From: johnbanq
Date: Wed, 27 Dec 2023 18:12:54 +0000
Subject: [PATCH 23/24] Assert the text embeddings process successfully runs
(#387)
---
threestudio/models/prompt_processors/base.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/threestudio/models/prompt_processors/base.py b/threestudio/models/prompt_processors/base.py
index 83a040f2..8993434b 100644
--- a/threestudio/models/prompt_processors/base.py
+++ b/threestudio/models/prompt_processors/base.py
@@ -379,6 +379,7 @@ def prepare_text_embeddings(self):
)
subprocess.start()
subprocess.join()
+ assert subprocess.exitcode == 0, "prompt embedding process failed!"
else:
self.spawn_func(
self.cfg.pretrained_model_name_or_path,
From 8ce432d51b2f46eae2e40c045b079bc66a994db0 Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 29 Dec 2023 00:05:40 +0800
Subject: [PATCH 24/24] support gaussian zero-123 (#388)
* support gaussian zero-123
* add exp interpolation
---
threestudio/__init__.py | 2 +-
threestudio/data/image.py | 10 ++++++++--
threestudio/utils/misc.py | 16 ++++++++++------
3 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/threestudio/__init__.py b/threestudio/__init__.py
index a1184e43..f5619b2f 100644
--- a/threestudio/__init__.py
+++ b/threestudio/__init__.py
@@ -1,5 +1,5 @@
__modules__ = {}
-__version__ = "0.2.0"
+__version__ = "0.2.1"
def register(name):
diff --git a/threestudio/data/image.py b/threestudio/data/image.py
index fe7c227e..033c528f 100644
--- a/threestudio/data/image.py
+++ b/threestudio/data/image.py
@@ -96,6 +96,10 @@ def setup(self, cfg, split):
[torch.stack([right, up, -lookat], dim=-1), camera_position[:, :, None]],
dim=-1,
)
+ self.c2w4x4: Float[Tensor, "B 4 4"] = torch.cat(
+ [self.c2w, torch.zeros_like(self.c2w[:, :1])], dim=1
+ )
+ self.c2w4x4[:, 3, 3] = 1.0
self.camera_position = camera_position
self.light_position = light_position
@@ -258,8 +262,10 @@ def collate(self, batch) -> Dict[str, Any]:
"ref_depth": self.depth,
"ref_normal": self.normal,
"mask": self.mask,
- "height": self.cfg.height,
- "width": self.cfg.width,
+ "height": self.height,
+ "width": self.width,
+ "c2w": self.c2w4x4,
+ "fovy": self.fovy,
}
if self.cfg.use_random_camera:
batch["random_camera"] = self.random_pose_generator.collate(None)
diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py
index ccb4987f..f2378f55 100644
--- a/threestudio/utils/misc.py
+++ b/threestudio/utils/misc.py
@@ -1,4 +1,5 @@
import gc
+import math
import os
import re
@@ -62,7 +63,7 @@ def load_module_weights(
return state_dict_to_load, ckpt["epoch"], ckpt["global_step"]
-def C(value: Any, epoch: int, global_step: int) -> float:
+def C(value: Any, epoch: int, global_step: int, interpolation="linear") -> float:
if isinstance(value, int) or isinstance(value, float):
pass
else:
@@ -86,13 +87,16 @@ def C(value: Any, epoch: int, global_step: int) -> float:
start_step, start_value, end_value, end_step = value
if isinstance(end_step, int):
current_step = global_step
- value = start_value + (end_value - start_value) * max(
- min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0
- )
elif isinstance(end_step, float):
current_step = epoch
- value = start_value + (end_value - start_value) * max(
- min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0
+ t = max(min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0)
+ if interpolation == "linear":
+ value = start_value + (end_value - start_value) * t
+ elif interpolation == "exp":
+ value = math.exp(math.log(start_value) * (1 - t) + math.log(end_value) * t)
+ else:
+ raise ValueError(
+ f"Unknown interpolation method: {interpolation}, only support linear and exp"
)
return value