From 477345befc45ea66248c567accd466ae82afbf70 Mon Sep 17 00:00:00 2001 From: DSaurus <2238454358@qq.com> Date: Mon, 27 Nov 2023 21:48:43 +0800 Subject: [PATCH 01/24] fix elevation bug --- threestudio/data/uncond.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py index d3b67b7d..ee8df2e7 100644 --- a/threestudio/data/uncond.py +++ b/threestudio/data/uncond.py @@ -151,18 +151,16 @@ def collate(self, batch) -> Dict[str, Any]: else: # otherwise sample uniformly on sphere elevation_range_percent = [ - (self.elevation_range[0] + 90.0) / 180.0, - (self.elevation_range[1] + 90.0) / 180.0, + self.elevation_range[0] / 180.0 * math.pi, + self.elevation_range[1] / 180.0 * math.pi, ] # inverse transform sampling elevation = torch.asin( - 2 - * ( + ( torch.rand(self.batch_size) - * (elevation_range_percent[1] - elevation_range_percent[0]) - + elevation_range_percent[0] + * (math.sin(elevation_range_percent[1]) - math.sin(elevation_range_percent[0]) ) + + math.sin(elevation_range_percent[0] ) ) - - 1.0 ) elevation_deg = elevation / math.pi * 180.0 From 9b76296bf7be1bfa8fa965d8b9fb4c4fc017cb4f Mon Sep 17 00:00:00 2001 From: DSaurus <2238454358@qq.com> Date: Mon, 27 Nov 2023 21:52:09 +0800 Subject: [PATCH 02/24] fix format --- threestudio/data/uncond.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py index ee8df2e7..8316325c 100644 --- a/threestudio/data/uncond.py +++ b/threestudio/data/uncond.py @@ -158,8 +158,11 @@ def collate(self, batch) -> Dict[str, Any]: elevation = torch.asin( ( torch.rand(self.batch_size) - * (math.sin(elevation_range_percent[1]) - math.sin(elevation_range_percent[0]) ) - + math.sin(elevation_range_percent[0] ) + * ( + math.sin(elevation_range_percent[1]) + - math.sin(elevation_range_percent[0]) + ) + + math.sin(elevation_range_percent[0]) ) ) elevation_deg = elevation / math.pi * 180.0 From 503140385940b1d4d3a528c0cc4494a52f198eff Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 1 Dec 2023 01:51:30 +0800 Subject: [PATCH 03/24] Enable Gaussian Splatting and Custom Extension (#344) * add gaussian wip * clean up * add refine * gsgen baseline w/o point-e * upd config * rm KNN * adjust parameters * max_num, fix color * import lib * background device * update config * update config * clean gausisan splatting * fix format * update extensions * fix memory bug * prepare for extensions * clean gaussian * clean gaussian * clean * fix bugs --------- Co-authored-by: Linyou --- .gitignore | 2 + README.md | 4 +- launch.py | 64 ++++++++++++++++++ threestudio/data/uncond.py | 20 ++++-- .../background/solid_color_background.py | 7 +- threestudio/utils/loss.py | 16 +++++ threestudio/utils/ops.py | 65 +++++++++++++++++++ 7 files changed, 167 insertions(+), 11 deletions(-) create mode 100644 threestudio/utils/loss.py diff --git a/.gitignore b/.gitignore index 12adb415..0bf85006 100644 --- a/.gitignore +++ b/.gitignore @@ -188,4 +188,6 @@ outputs-gradio/ # wandb wandb/ +custom/* + load/tets/256_tets.npz diff --git a/README.md b/README.md index 52abcde4..15da0e4c 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,10 @@ threestudio is a unified framework for 3D content creation from text prompts, si Did not find what you want? Submit a feature request or upvote others' requests here!

-## News +## News +- 30/11/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). +- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try. - 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions! - 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface). diff --git a/launch.py b/launch.py index 72add725..d24940af 100644 --- a/launch.py +++ b/launch.py @@ -1,8 +1,11 @@ import argparse import contextlib +import importlib import logging import os import sys +import time +import traceback class ColoredFilter(logging.Filter): @@ -39,6 +42,65 @@ def filter(self, record): return True +def load_custom_module(module_path): + module_name = os.path.basename(module_path) + if os.path.isfile(module_path): + sp = os.path.splitext(module_path) + module_name = sp[0] + try: + if os.path.isfile(module_path): + module_spec = importlib.util.spec_from_file_location( + module_name, module_path + ) + else: + module_spec = importlib.util.spec_from_file_location( + module_name, os.path.join(module_path, "__init__.py") + ) + + module = importlib.util.module_from_spec(module_spec) + sys.modules[module_name] = module + module_spec.loader.exec_module(module) + return True + except Exception as e: + print(traceback.format_exc()) + print(f"Cannot import {module_path} module for custom nodes:", e) + return False + + +def load_custom_modules(): + node_paths = ["custom"] + node_import_times = [] + for custom_node_path in node_paths: + possible_modules = os.listdir(custom_node_path) + if "__pycache__" in possible_modules: + possible_modules.remove("__pycache__") + + for possible_module in possible_modules: + module_path = os.path.join(custom_node_path, possible_module) + if ( + os.path.isfile(module_path) + and os.path.splitext(module_path)[1] != ".py" + ): + continue + if module_path.endswith(".disabled"): + continue + time_before = time.perf_counter() + success = load_custom_module(module_path) + node_import_times.append( + (time.perf_counter() - time_before, module_path, success) + ) + + if len(node_import_times) > 0: + print("\nImport times for custom modules:") + for n in sorted(node_import_times): + if n[2]: + import_message = "" + else: + import_message = " (IMPORT FAILED)" + print("{:6.1f} seconds{}:".format(n[0], import_message), n[1]) + print() + + def main(args, extras) -> None: # set CUDA_VISIBLE_DEVICES if needed, then import pytorch-lightning os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" @@ -94,6 +156,8 @@ def main(args, extras) -> None: else: handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s")) + load_custom_modules() + # parse YAML config to OmegaConf cfg: ExperimentConfig cfg = load_config(args.config, cli_args=extras, n_gpus=n_gpus) diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py index 8316325c..d051e3fd 100644 --- a/threestudio/data/uncond.py +++ b/threestudio/data/uncond.py @@ -3,6 +3,7 @@ import random from dataclasses import dataclass, field +import numpy as np import pytorch_lightning as pl import torch import torch.nn.functional as F @@ -14,6 +15,7 @@ from threestudio.utils.config import parse_structured from threestudio.utils.misc import get_device from threestudio.utils.ops import ( + get_full_projection_matrix, get_mvp_matrix, get_projection_matrix, get_ray_directions, @@ -315,10 +317,11 @@ def collate(self, batch) -> Dict[str, Any]: # Importance note: the returned rays_d MUST be normalized! rays_o, rays_d = get_rays(directions, c2w, keepdim=True) - proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix( - fovy, self.width / self.height, 0.1, 1000.0 + self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix( + fovy, self.width / self.height, 0.01, 100.0 ) # FIXME: hard-coded near and far - mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, proj_mtx) + mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, self.proj_mtx) + self.fovy = fovy return { "rays_o": rays_o, @@ -332,6 +335,8 @@ def collate(self, batch) -> Dict[str, Any]: "camera_distances": camera_distances, "height": self.height, "width": self.width, + "fovy": self.fovy, + "proj_mtx": self.proj_mtx, } @@ -414,10 +419,10 @@ def __init__(self, cfg: Any, split: str) -> None: ) rays_o, rays_d = get_rays(directions, c2w, keepdim=True) - proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix( - fovy, self.cfg.eval_width / self.cfg.eval_height, 0.1, 1000.0 + self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix( + fovy, self.cfg.eval_width / self.cfg.eval_height, 0.01, 100.0 ) # FIXME: hard-coded near and far - mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, proj_mtx) + mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix(c2w, self.proj_mtx) self.rays_o, self.rays_d = rays_o, rays_d self.mvp_mtx = mvp_mtx @@ -427,6 +432,7 @@ def __init__(self, cfg: Any, split: str) -> None: self.elevation, self.azimuth = elevation, azimuth self.elevation_deg, self.azimuth_deg = elevation_deg, azimuth_deg self.camera_distances = camera_distances + self.fovy = fovy def __len__(self): return self.n_views @@ -445,6 +451,8 @@ def __getitem__(self, index): "camera_distances": self.camera_distances[index], "height": self.cfg.eval_height, "width": self.cfg.eval_width, + "fovy": self.fovy[index], + "proj_mtx": self.proj_mtx[index], } def collate(self, batch): diff --git a/threestudio/models/background/solid_color_background.py b/threestudio/models/background/solid_color_background.py index 0763a0c5..0b68d5b4 100644 --- a/threestudio/models/background/solid_color_background.py +++ b/threestudio/models/background/solid_color_background.py @@ -34,10 +34,9 @@ def configure(self) -> None: ) def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]: - color = ( - torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs) - * self.env_color - ) + color = torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to( + dirs + ) * self.env_color.to(dirs) if ( self.training and self.cfg.random_aug diff --git a/threestudio/utils/loss.py b/threestudio/utils/loss.py new file mode 100644 index 00000000..eb0c7250 --- /dev/null +++ b/threestudio/utils/loss.py @@ -0,0 +1,16 @@ +import torch + + +def _tensor_size(t): + return t.size()[1] * t.size()[2] * t.size()[3] + + +def tv_loss(x): + batch_size = x.size()[0] + h_x = x.size()[2] + w_x = x.size()[3] + count_h = _tensor_size(x[:, :, 1:, :]) + count_w = _tensor_size(x[:, :, :, 1:]) + h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, : h_x - 1, :]), 2).sum() + w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, : w_x - 1]), 2).sum() + return 2 * (h_tv / count_h + w_tv / count_w) / batch_size diff --git a/threestudio/utils/ops.py b/threestudio/utils/ops.py index 320fa46a..b35d3cd0 100644 --- a/threestudio/utils/ops.py +++ b/threestudio/utils/ops.py @@ -1,3 +1,4 @@ +import math from collections import defaultdict import numpy as np @@ -292,6 +293,70 @@ def get_mvp_matrix( return mvp_mtx +def get_full_projection_matrix( + c2w: Float[Tensor, "B 4 4"], proj_mtx: Float[Tensor, "B 4 4"] +) -> Float[Tensor, "B 4 4"]: + return (c2w.unsqueeze(0).bmm(proj_mtx.unsqueeze(0))).squeeze(0) + + +# gaussian splatting functions +def convert_pose(C2W): + flip_yz = torch.eye(4, device=C2W.device) + flip_yz[1, 1] = -1 + flip_yz[2, 2] = -1 + C2W = torch.matmul(C2W, flip_yz) + return C2W + + +def get_projection_matrix_gaussian(znear, zfar, fovX, fovY, device="cuda"): + tanHalfFovY = math.tan((fovY / 2)) + tanHalfFovX = math.tan((fovX / 2)) + + top = tanHalfFovY * znear + bottom = -top + right = tanHalfFovX * znear + left = -right + + P = torch.zeros(4, 4, device=device) + + z_sign = 1.0 + + P[0, 0] = 2.0 * znear / (right - left) + P[1, 1] = 2.0 * znear / (top - bottom) + P[0, 2] = (right + left) / (right - left) + P[1, 2] = (top + bottom) / (top - bottom) + P[3, 2] = z_sign + P[2, 2] = z_sign * zfar / (zfar - znear) + P[2, 3] = -(zfar * znear) / (zfar - znear) + return P + + +def get_fov_gaussian(P): + tanHalfFovX = 1 / P[0, 0] + tanHalfFovY = 1 / P[1, 1] + fovY = math.atan(tanHalfFovY) * 2 + fovX = math.atan(tanHalfFovX) * 2 + return fovX, fovY + + +def get_cam_info_gaussian(c2w, fovx, fovy, znear, zfar): + c2w = convert_pose(c2w) + world_view_transform = torch.inverse(c2w) + + world_view_transform = world_view_transform.transpose(0, 1).cuda().float() + projection_matrix = ( + get_projection_matrix_gaussian(znear=znear, zfar=zfar, fovX=fovx, fovY=fovy) + .transpose(0, 1) + .cuda() + ) + full_proj_transform = ( + world_view_transform.unsqueeze(0).bmm(projection_matrix.unsqueeze(0)) + ).squeeze(0) + camera_center = world_view_transform.inverse()[3, :3] + + return world_view_transform, full_proj_transform, camera_center + + def binary_cross_entropy(input, target): """ F.binary_cross_entropy is not numerically stable in mixed-precision training. From 692968736ace7b9902bba4b0fc218e2bae965ef6 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 1 Dec 2023 03:22:42 +0800 Subject: [PATCH 04/24] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 15da0e4c..71505dc5 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,8 @@ threestudio is a unified framework for 3D content creation from text prompts, si ## News -- 30/11/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). -- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). +- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). +- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try. - 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions! - 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface). From cfabde68d89c96975bf0a230b85955d3ac143a2d Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 1 Dec 2023 03:56:32 +0800 Subject: [PATCH 05/24] add custom folder (#348) --- custom/put_custom_extensions_here | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 custom/put_custom_extensions_here diff --git a/custom/put_custom_extensions_here b/custom/put_custom_extensions_here new file mode 100644 index 00000000..e69de29b From 7ce2f499e8459f920f122c694796991fd0b6f88a Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 1 Dec 2023 06:00:36 +0800 Subject: [PATCH 06/24] Update README.md --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 71505dc5..cfec4bac 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si | Zero-1-to-3 | Magic123 |
| InstructNeRF2NeRF | Control4D | -

+

@@ -41,9 +41,15 @@ threestudio is a unified framework for 3D content creation from text prompts, si

- Did not find what you want? Submit a feature request or upvote others' requests here! + Did not find what you want? Checkout threestudio-extension or submit a feature request here!

+

+threestudio +

+

+threestudio +

## News - 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). From eaadd2b5b813e997f06d8656d1d7854c6f2aca96 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 1 Dec 2023 06:36:16 +0800 Subject: [PATCH 07/24] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cfec4bac..018200e3 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si

- Did not find what you want? Checkout threestudio-extension or submit a feature request here! + Did not find what you want? Checkout threestudio-extension or submit a feature request here!

From 2c202276747a892cfc1ded8e27a005715be8f5f2 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Mon, 4 Dec 2023 22:00:04 +0800 Subject: [PATCH 08/24] ray direction normalize (#351) * add rays normalization setting --- .gitignore | 3 +++ threestudio/data/image.py | 8 +++++++- threestudio/data/multiview.py | 12 ++++++++++-- threestudio/data/uncond.py | 10 ++++++++-- threestudio/utils/ops.py | 4 +++- 5 files changed, 31 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 0bf85006..b774bf79 100644 --- a/.gitignore +++ b/.gitignore @@ -188,6 +188,9 @@ outputs-gradio/ # wandb wandb/ +# vscode +.code-workspace + custom/* load/tets/256_tets.npz diff --git a/threestudio/data/image.py b/threestudio/data/image.py index 5c60b53b..fe7c227e 100644 --- a/threestudio/data/image.py +++ b/threestudio/data/image.py @@ -48,6 +48,8 @@ class SingleImageDataModuleConfig: requires_depth: bool = False requires_normal: bool = False + rays_d_normalize: bool = True + class SingleImageDataBase: def setup(self, cfg, split): @@ -141,7 +143,11 @@ def set_rays(self): directions[:, :, :, :2] = directions[:, :, :, :2] / self.focal_length rays_o, rays_d = get_rays( - directions, self.c2w, keepdim=True, noise_scale=self.cfg.rays_noise_scale + directions, + self.c2w, + keepdim=True, + noise_scale=self.cfg.rays_noise_scale, + normalize=self.cfg.rays_d_normalize, ) proj_mtx: Float[Tensor, "4 4"] = get_projection_matrix( diff --git a/threestudio/data/multiview.py b/threestudio/data/multiview.py index e127390a..8b722624 100644 --- a/threestudio/data/multiview.py +++ b/threestudio/data/multiview.py @@ -70,6 +70,8 @@ class MultiviewsDataModuleConfig: camera_distance: float = -1 eval_interpolation: Optional[Tuple[int, int, int]] = None # (0, 1, 30) + rays_d_normalize: bool = True + class MultiviewIterableDataset(IterableDataset): def __init__(self, cfg: Any) -> None: @@ -164,7 +166,10 @@ def __init__(self, cfg: Any) -> None: self.frames_img: Float[Tensor, "B H W 3"] = torch.stack(frames_img, dim=0) self.rays_o, self.rays_d = get_rays( - self.frames_direction, self.frames_c2w, keepdim=True + self.frames_direction, + self.frames_c2w, + keepdim=True, + normalize=self.cfg.rays_d_normalize, ) self.mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix( self.frames_c2w, self.frames_proj @@ -344,7 +349,10 @@ def __init__(self, cfg: Any, split: str) -> None: self.frames_img: Float[Tensor, "B H W 3"] = torch.stack(frames_img, dim=0) self.rays_o, self.rays_d = get_rays( - self.frames_direction, self.frames_c2w, keepdim=True + self.frames_direction, + self.frames_c2w, + keepdim=True, + normalize=self.cfg.rays_d_normalize, ) self.mvp_mtx: Float[Tensor, "B 4 4"] = get_mvp_matrix( self.frames_c2w, self.frames_proj diff --git a/threestudio/data/uncond.py b/threestudio/data/uncond.py index d051e3fd..999ba55c 100644 --- a/threestudio/data/uncond.py +++ b/threestudio/data/uncond.py @@ -56,6 +56,8 @@ class RandomCameraDataModuleConfig: batch_uniform_azimuth: bool = True progressive_until: int = 0 # progressive ranges for elevation, azimuth, r, fovy + rays_d_normalize: bool = True + class RandomCameraIterableDataset(IterableDataset, Updateable): def __init__(self, cfg: Any) -> None: @@ -315,7 +317,9 @@ def collate(self, batch) -> Dict[str, Any]: ) # Importance note: the returned rays_d MUST be normalized! - rays_o, rays_d = get_rays(directions, c2w, keepdim=True) + rays_o, rays_d = get_rays( + directions, c2w, keepdim=True, normalize=self.cfg.rays_d_normalize + ) self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix( fovy, self.width / self.height, 0.01, 100.0 @@ -418,7 +422,9 @@ def __init__(self, cfg: Any, split: str) -> None: directions[:, :, :, :2] / focal_length[:, None, None, None] ) - rays_o, rays_d = get_rays(directions, c2w, keepdim=True) + rays_o, rays_d = get_rays( + directions, c2w, keepdim=True, normalize=self.cfg.rays_d_normalize + ) self.proj_mtx: Float[Tensor, "B 4 4"] = get_projection_matrix( fovy, self.cfg.eval_width / self.cfg.eval_height, 0.01, 100.0 ) # FIXME: hard-coded near and far diff --git a/threestudio/utils/ops.py b/threestudio/utils/ops.py index b35d3cd0..81d5b599 100644 --- a/threestudio/utils/ops.py +++ b/threestudio/utils/ops.py @@ -222,6 +222,7 @@ def get_rays( c2w: Float[Tensor, "... 4 4"], keepdim=False, noise_scale=0.0, + normalize=True, ) -> Tuple[Float[Tensor, "... 3"], Float[Tensor, "... 3"]]: # Rotate ray directions from camera coordinate to the world coordinate assert directions.shape[-1] == 3 @@ -257,7 +258,8 @@ def get_rays( rays_o = rays_o + torch.randn(3, device=rays_o.device) * noise_scale rays_d = rays_d + torch.randn(3, device=rays_d.device) * noise_scale - rays_d = F.normalize(rays_d, dim=-1) + if normalize: + rays_d = F.normalize(rays_d, dim=-1) if not keepdim: rays_o, rays_d = rays_o.reshape(-1, 3), rays_d.reshape(-1, 3) From 3fe3153bf29927459b5ad5cc98d955d9b4c51ba3 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Wed, 6 Dec 2023 23:42:14 +0800 Subject: [PATCH 09/24] Add modules of 4d-fy for 4D generation(#353) --- .../models/geometry/implicit_volume.py | 16 +++++ threestudio/models/networks.py | 64 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/threestudio/models/geometry/implicit_volume.py b/threestudio/models/geometry/implicit_volume.py index d1eeb96e..cfee0017 100644 --- a/threestudio/models/geometry/implicit_volume.py +++ b/threestudio/models/geometry/implicit_volume.py @@ -53,6 +53,9 @@ class Config(BaseImplicitGeometry.Config): # automatically determine the threshold isosurface_threshold: Union[float, str] = 25.0 + # 4D Gaussian Annealing + anneal_density_blob_std_config: Optional[dict] = None + cfg: Config def configure(self) -> None: @@ -267,3 +270,16 @@ def create_from( raise TypeError( f"Cannot create {ImplicitVolume.__name__} from {other.__class__.__name__}" ) + + def update_step( + self, epoch: int, global_step: int, on_load_weights: bool = False + ) -> None: + if self.cfg.anneal_density_blob_std_config is not None: + min_step = self.cfg.anneal_density_blob_std_config.min_anneal_step + max_step = self.cfg.anneal_density_blob_std_config.max_anneal_step + if global_step >= min_step and global_step <= max_step: + end_val = self.cfg.anneal_density_blob_std_config.end_val + start_val = self.cfg.anneal_density_blob_std_config.start_val + self.density_blob_std = start_val + (global_step - min_step) * ( + end_val - start_val + ) / (max_step - min_step) diff --git a/threestudio/models/networks.py b/threestudio/models/networks.py index 9dc3dc28..cfe986ea 100644 --- a/threestudio/models/networks.py +++ b/threestudio/models/networks.py @@ -64,6 +64,68 @@ def forward(self, x): return self.encoding(x) +# 4D implicit decomposition of space and time (4D-fy) +class TCNNEncodingSpatialTime(nn.Module): + def __init__( + self, in_channels, config, dtype=torch.float32, init_time_zero=False + ) -> None: + super().__init__() + self.n_input_dims = in_channels + config["otype"] = "HashGrid" + self.num_frames = 1 # config["num_frames"] + self.static = config["static"] + self.cfg = config_to_primitive(config) + self.cfg_time = self.cfg + self.n_key_frames = config.get("n_key_frames", 1) + with torch.cuda.device(get_rank()): + self.encoding = tcnn.Encoding(self.n_input_dims, self.cfg, dtype=dtype) + self.encoding_time = tcnn.Encoding( + self.n_input_dims + 1, self.cfg_time, dtype=dtype + ) + self.n_output_dims = self.encoding.n_output_dims + self.frame_time = None + if self.static: + self.set_temp_param_grad(requires_grad=False) + self.use_key_frame = config.get("use_key_frame", False) + self.is_video = True + self.update_occ_grid = False + + def set_temp_param_grad(self, requires_grad=False): + self.set_param_grad(self.encoding_time, requires_grad=requires_grad) + + def set_param_grad(self, param_list, requires_grad=False): + if isinstance(param_list, nn.Parameter): + param_list.requires_grad = requires_grad + else: + for param in param_list.parameters(): + param.requires_grad = requires_grad + + def forward(self, x): + # TODO frame_time only supports batch_size == 1 cases + if self.update_occ_grid and not isinstance(self.frame_time, float): + frame_time = self.frame_time + else: + if (self.static or not self.training) and self.frame_time is None: + frame_time = torch.zeros( + (self.num_frames, 1), device=x.device, dtype=x.dtype + ).expand(x.shape[0], 1) + else: + if self.frame_time is None: + frame_time = 0.0 + else: + frame_time = self.frame_time + frame_time = ( + torch.ones((self.num_frames, 1), device=x.device, dtype=x.dtype) + * frame_time + ).expand(x.shape[0], 1) + frame_time = frame_time.view(-1, 1) + enc_space = self.encoding(x) + x_frame_time = torch.cat((x, frame_time), 1) + enc_space_time = self.encoding_time(x_frame_time) + enc = enc_space + enc_space_time + return enc + + class ProgressiveBandHashGrid(nn.Module, Updateable): def __init__(self, in_channels, config, dtype=torch.float32): super().__init__() @@ -136,6 +198,8 @@ def get_encoding(n_input_dims: int, config) -> nn.Module: encoding = ProgressiveBandFrequency(n_input_dims, config_to_primitive(config)) elif config.otype == "ProgressiveBandHashGrid": encoding = ProgressiveBandHashGrid(n_input_dims, config_to_primitive(config)) + elif config.otype == "HashGridSpatialTime": + encoding = TCNNEncodingSpatialTime(n_input_dims, config) # 4D-fy encoding else: encoding = TCNNEncoding(n_input_dims, config_to_primitive(config)) encoding = CompositeEncoding( From 56564c88e0139bdd31b1585f8720a1ae6141f138 Mon Sep 17 00:00:00 2001 From: Vikram Voleti Date: Wed, 13 Dec 2023 13:24:32 -0500 Subject: [PATCH 10/24] [DRAFT] Adds stable-zero123 guidance (#356) * Adds stable-zero123 guidance * Fixes end-of-file? * Update README.md with gif * Fixes end-of-file? * Corrects link to huggingface model * general linear config * Fixed HF link * Fixes HF link --------- Co-authored-by: Vikram Voleti Co-authored-by: DSaurus <2238454358@qq.com> --- README.md | 27 ++ .../{zero123_64.yaml => stable-zero123.yaml} | 56 ++- configs/zero123.yaml | 8 +- load/images/{dog1.png => dog1_rgba.png} | Bin load/zero123/download.sh | 5 +- threestudio/models/guidance/__init__.py | 1 + .../guidance/stable_zero123_guidance.py | 340 ++++++++++++++++++ threestudio/utils/config.py | 5 + threestudio/utils/misc.py | 11 + 9 files changed, 415 insertions(+), 38 deletions(-) rename configs/{zero123_64.yaml => stable-zero123.yaml} (75%) rename load/images/{dog1.png => dog1_rgba.png} (100%) create mode 100644 threestudio/models/guidance/stable_zero123_guidance.py diff --git a/README.md b/README.md index 018200e3..4c3a0ab2 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,8 @@ pip install ninja pip install -r requirements.txt ``` +- (Optional) `tiny-cuda-nn` installation might require downgrading pip to 23.0.1 + - (Optional, Recommended) The best-performing models in threestudio use the newly-released T2I model [DeepFloyd IF](https://github.com/deep-floyd/IF), which currently requires signing a license agreement. If you would like to use these models, you need to [accept the license on the model card of DeepFloyd IF](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0), and login into the Hugging Face hub in the terminal by `huggingface-cli login`. - For contributors, see [here](https://github.com/threestudio-project/threestudio#contributing-to-threestudio). @@ -517,6 +519,31 @@ python launch.py --config configs/magic123-refine-sd.yaml --train --gpu 0 data.i - If the image contains non-front-facing objects, specifying the approximate elevation and azimuth angle by setting `data.default_elevation_deg` and `data.default_azimuth_deg` can be helpful. In threestudio, top is elevation +90 and bottom is elevation -90; left is azimuth -90 and right is azimuth +90. + +### Stable Zero123 + +**Installation** + +Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/zero123` from https://huggingface.co/stabilityai/stable-zero123 + +**Results obtained by threestudio (Stable Zero123 vs Zero123-XL)** +![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7) + +**Example running commands** + +1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog" +2. Remove its background using Clipdrop (https://clipdrop.co/remove-background) +3. Save to `load/images/`, preferably with `_rgba.png` as the suffix +4. Run Zero-1-to-3 with the Stable Zero123 ckpt: +```sh +python launch.py --config configs/stable-zero123.yaml --train --gpu 0 data.image_path=./load/images/hamburger_rgba.png +``` + +**IMPORTANT NOTE: This is an experimental implementation and we're constantly improving the quality.** + +**IMPORTANT NOTE: This implementation extends the Zero-1-to-3 implementation below, and is heavily inspired from the Zero-1-to-3 implementation in [https://github.com/ashawkey/stable-dreamfusion](stable-dreamfusion)! `extern/ldm_zero123` is borrowed from `stable-dreamfusion/ldm`.** + + ### Zero-1-to-3 [![arXiv](https://img.shields.io/badge/arXiv-2303.11328-b31b1b.svg?style=flat-square)](https://arxiv.org/abs/2303.11328) **Installation** diff --git a/configs/zero123_64.yaml b/configs/stable-zero123.yaml similarity index 75% rename from configs/zero123_64.yaml rename to configs/stable-zero123.yaml index 6a579335..5a372f66 100644 --- a/configs/zero123_64.yaml +++ b/configs/stable-zero123.yaml @@ -1,24 +1,25 @@ -name: "zero123" -tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}" +name: "zero123-sai" +tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}" exp_root_dir: "outputs" seed: 0 data_type: "single-image-datamodule" data: # threestudio/data/image.py -> SingleImageDataModuleConfig image_path: ./load/images/hamburger_rgba.png - height: 128 - width: 128 - default_elevation_deg: 0.0 + height: [128, 256, 512] + width: [128, 256, 512] + resolution_milestones: [200, 300] + default_elevation_deg: 5.0 default_azimuth_deg: 0.0 default_camera_distance: 3.8 default_fovy_deg: 20.0 requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}} requires_normal: ${cmaxgt0:${system.loss.lambda_normal}} random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig - height: 64 - width: 64 - batch_size: 12 - resolution_milestones: [] + height: [64, 128, 256] + width: [64, 128, 256] + batch_size: [12, 8, 4] + resolution_milestones: [200, 300] eval_height: 512 eval_width: 512 eval_batch_size: 1 @@ -47,13 +48,6 @@ system: radius: 2.0 normal_type: "analytic" - # the density initialization proposed in the DreamFusion paper - # does not work very well - # density_bias: "blob_dreamfusion" - # density_activation: exp - # density_blob_scale: 5. - # density_blob_std: 0.2 - # use Magic3D density initialization instead density_bias: "blob_magic3d" density_activation: softplus @@ -88,28 +82,26 @@ system: renderer: radius: ${system.geometry.radius} num_samples_per_ray: 512 - return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}} - return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}} + return_comp_normal: ${cmaxgt0:${system.loss.lambda_normal_smooth}} + return_normal_perturb: ${cmaxgt0:${system.loss.lambda_3d_normal_smooth}} prompt_processor_type: "dummy-prompt-processor" # Zero123 doesn't use prompts prompt_processor: pretrained_model_name_or_path: "" prompt: "" - guidance_type: "zero123-guidance" + guidance_type: "stable-zero123-guidance" guidance: - pretrained_model_name_or_path: "./load/zero123/zero123-xl.ckpt" pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml" + pretrained_model_name_or_path: "./load/zero123/stable_zero123.ckpt" vram_O: ${not:${gt0:${system.freq.guidance_eval}}} cond_image_path: ${data.image_path} cond_elevation_deg: ${data.default_elevation_deg} cond_azimuth_deg: ${data.default_azimuth_deg} cond_camera_distance: ${data.default_camera_distance} guidance_scale: 3.0 - #min_step_percent: 0.02 - min_step_percent: [0, 0.4, 0.2, 200] # (start_iter, start_val, end_val, end_iter) - #max_step_percent: 0.98 - max_step_percent: [0, 0.85, 0.5, 200] + min_step_percent: [50, 0.7, 0.3, 200] # (start_iter, start_val, end_val, end_iter) + max_step_percent: [50, 0.98, 0.8, 200] freq: ref_only_steps: 0 @@ -123,16 +115,16 @@ system: loss: lambda_sds: 0.1 - lambda_rgb: 500. + lambda_rgb: [100, 500., 1000., 400] lambda_mask: 50. lambda_depth: 0. # 0.05 lambda_depth_rel: 0. # [0, 0, 0.05, 100] lambda_normal: 0. # [0, 0, 0.05, 100] - lambda_normal_smooth: 10.0 - lambda_3d_normal_smooth: 10.0 + lambda_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200] + lambda_3d_normal_smooth: [100, 7.0, 5.0, 150, 10.0, 200] lambda_orient: 1.0 - lambda_sparsity: 0.1 # should be tweaked for every model - lambda_opaque: 0.1 + lambda_sparsity: 0.5 # should be tweaked for every model + lambda_opaque: 0.5 optimizer: name: Adam @@ -142,14 +134,14 @@ system: eps: 1.e-8 trainer: - max_steps: 400 + max_steps: 600 log_every_n_steps: 1 num_sanity_val_steps: 0 val_check_interval: 100 enable_progress_bar: true - precision: 16-mixed + precision: 32 checkpoint: save_last: true # save at each validation time save_top_k: -1 - every_n_train_steps: ${trainer.max_steps} + every_n_train_steps: 100 # ${trainer.max_steps} diff --git a/configs/zero123.yaml b/configs/zero123.yaml index ca61b2e4..0f6ade97 100644 --- a/configs/zero123.yaml +++ b/configs/zero123.yaml @@ -1,5 +1,5 @@ name: "zero123" -tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}" +tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}" exp_root_dir: "outputs" seed: 0 @@ -9,7 +9,7 @@ data: # threestudio/data/image.py -> SingleImageDataModuleConfig height: [128, 256, 512] width: [128, 256, 512] resolution_milestones: [200, 300] - default_elevation_deg: 0.0 + default_elevation_deg: 5.0 default_azimuth_deg: 0.0 default_camera_distance: 3.8 default_fovy_deg: 20.0 @@ -111,9 +111,7 @@ system: cond_azimuth_deg: ${data.default_azimuth_deg} cond_camera_distance: ${data.default_camera_distance} guidance_scale: 3.0 - #min_step_percent: 0.02 min_step_percent: [0, 0.4, 0.2, 200] # (start_iter, start_val, end_val, end_iter) - #max_step_percent: 0.98 max_step_percent: [0, 0.85, 0.5, 200] freq: @@ -147,7 +145,7 @@ system: eps: 1.e-8 trainer: - max_steps: 400 + max_steps: 600 log_every_n_steps: 1 num_sanity_val_steps: 0 val_check_interval: 100 diff --git a/load/images/dog1.png b/load/images/dog1_rgba.png similarity index 100% rename from load/images/dog1.png rename to load/images/dog1_rgba.png diff --git a/load/zero123/download.sh b/load/zero123/download.sh index 35cc597e..169676b7 100644 --- a/load/zero123/download.sh +++ b/load/zero123/download.sh @@ -1 +1,4 @@ -wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt +# wget https://huggingface.co/cvlab/zero123-weights/resolve/main/105000.ckpt +# mv 105000.ckpt zero123-original.ckpt +wget https://zero123.cs.columbia.edu/assets/zero123-xl.ckpt +# Download stable_zero123.ckpt from https://huggingface.co/stabilityai/stable-zero123 diff --git a/threestudio/models/guidance/__init__.py b/threestudio/models/guidance/__init__.py index eeda92e4..b25a8d76 100644 --- a/threestudio/models/guidance/__init__.py +++ b/threestudio/models/guidance/__init__.py @@ -5,6 +5,7 @@ stable_diffusion_guidance, stable_diffusion_unified_guidance, stable_diffusion_vsd_guidance, + stable_zero123_guidance, zero123_guidance, zero123_unified_guidance, ) diff --git a/threestudio/models/guidance/stable_zero123_guidance.py b/threestudio/models/guidance/stable_zero123_guidance.py new file mode 100644 index 00000000..6d545908 --- /dev/null +++ b/threestudio/models/guidance/stable_zero123_guidance.py @@ -0,0 +1,340 @@ +import importlib +import os +from dataclasses import dataclass, field + +import cv2 +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from diffusers import DDIMScheduler, DDPMScheduler, StableDiffusionPipeline +from diffusers.utils.import_utils import is_xformers_available +from omegaconf import OmegaConf +from tqdm import tqdm + +import threestudio +from threestudio.utils.base import BaseObject +from threestudio.utils.misc import C, parse_version +from threestudio.utils.typing import * + + +def get_obj_from_str(string, reload=False): + module, cls = string.rsplit(".", 1) + if reload: + module_imp = importlib.import_module(module) + importlib.reload(module_imp) + return getattr(importlib.import_module(module, package=None), cls) + + +def instantiate_from_config(config): + if not "target" in config: + if config == "__is_first_stage__": + return None + elif config == "__is_unconditional__": + return None + raise KeyError("Expected key `target` to instantiate.") + return get_obj_from_str(config["target"])(**config.get("params", dict())) + + +# load model +def load_model_from_config(config, ckpt, device, vram_O=True, verbose=False): + pl_sd = torch.load(ckpt, map_location="cpu") + + if "global_step" in pl_sd and verbose: + print(f'[INFO] Global Step: {pl_sd["global_step"]}') + + sd = pl_sd["state_dict"] + + model = instantiate_from_config(config.model) + m, u = model.load_state_dict(sd, strict=False) + + if len(m) > 0 and verbose: + print("[INFO] missing keys: \n", m) + if len(u) > 0 and verbose: + print("[INFO] unexpected keys: \n", u) + + # manually load ema and delete it to save GPU memory + if model.use_ema: + if verbose: + print("[INFO] loading EMA...") + model.model_ema.copy_to(model.model) + del model.model_ema + + if vram_O: + # we don't need decoder + del model.first_stage_model.decoder + + torch.cuda.empty_cache() + + model.eval().to(device) + + return model + + +@threestudio.register("stable-zero123-guidance") +class StableZero123Guidance(BaseObject): + @dataclass + class Config(BaseObject.Config): + pretrained_model_name_or_path: str = "load/zero123/stable-zero123.ckpt" + pretrained_config: str = "load/zero123/sd-objaverse-finetune-c_concat-256.yaml" + vram_O: bool = True + + cond_image_path: str = "load/images/hamburger_rgba.png" + cond_elevation_deg: float = 0.0 + cond_azimuth_deg: float = 0.0 + cond_camera_distance: float = 1.2 + + guidance_scale: float = 5.0 + + grad_clip: Optional[ + Any + ] = None # field(default_factory=lambda: [0, 2.0, 8.0, 1000]) + half_precision_weights: bool = False + + min_step_percent: float = 0.02 + max_step_percent: float = 0.98 + + cfg: Config + + def configure(self) -> None: + threestudio.info(f"Loading Stable Zero123 ...") + + self.config = OmegaConf.load(self.cfg.pretrained_config) + # TODO: seems it cannot load into fp16... + self.weights_dtype = torch.float32 + self.model = load_model_from_config( + self.config, + self.cfg.pretrained_model_name_or_path, + device=self.device, + vram_O=self.cfg.vram_O, + ) + + for p in self.model.parameters(): + p.requires_grad_(False) + + # timesteps: use diffuser for convenience... hope it's alright. + self.num_train_timesteps = self.config.model.params.timesteps + + self.scheduler = DDIMScheduler( + self.num_train_timesteps, + self.config.model.params.linear_start, + self.config.model.params.linear_end, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + steps_offset=1, + ) + + self.num_train_timesteps = self.scheduler.config.num_train_timesteps + self.set_min_max_steps() # set to default value + + self.alphas: Float[Tensor, "..."] = self.scheduler.alphas_cumprod.to( + self.device + ) + + self.grad_clip_val: Optional[float] = None + + self.prepare_embeddings(self.cfg.cond_image_path) + + threestudio.info(f"Loaded Stable Zero123!") + + @torch.cuda.amp.autocast(enabled=False) + def set_min_max_steps(self, min_step_percent=0.02, max_step_percent=0.98): + self.min_step = int(self.num_train_timesteps * min_step_percent) + self.max_step = int(self.num_train_timesteps * max_step_percent) + + @torch.cuda.amp.autocast(enabled=False) + def prepare_embeddings(self, image_path: str) -> None: + # load cond image for zero123 + assert os.path.exists(image_path) + rgba = cv2.cvtColor( + cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_BGRA2RGBA + ) + rgba = ( + cv2.resize(rgba, (256, 256), interpolation=cv2.INTER_AREA).astype( + np.float32 + ) + / 255.0 + ) + rgb = rgba[..., :3] * rgba[..., 3:] + (1 - rgba[..., 3:]) + self.rgb_256: Float[Tensor, "1 3 H W"] = ( + torch.from_numpy(rgb) + .unsqueeze(0) + .permute(0, 3, 1, 2) + .contiguous() + .to(self.device) + ) + self.c_crossattn, self.c_concat = self.get_img_embeds(self.rgb_256) + + @torch.cuda.amp.autocast(enabled=False) + @torch.no_grad() + def get_img_embeds( + self, + img: Float[Tensor, "B 3 256 256"], + ) -> Tuple[Float[Tensor, "B 1 768"], Float[Tensor, "B 4 32 32"]]: + img = img * 2.0 - 1.0 + c_crossattn = self.model.get_learned_conditioning(img.to(self.weights_dtype)) + c_concat = self.model.encode_first_stage(img.to(self.weights_dtype)).mode() + return c_crossattn, c_concat + + @torch.cuda.amp.autocast(enabled=False) + def encode_images( + self, imgs: Float[Tensor, "B 3 256 256"] + ) -> Float[Tensor, "B 4 32 32"]: + input_dtype = imgs.dtype + imgs = imgs * 2.0 - 1.0 + latents = self.model.get_first_stage_encoding( + self.model.encode_first_stage(imgs.to(self.weights_dtype)) + ) + return latents.to(input_dtype) # [B, 4, 32, 32] Latent space image + + @torch.cuda.amp.autocast(enabled=False) + def decode_latents( + self, + latents: Float[Tensor, "B 4 H W"], + ) -> Float[Tensor, "B 3 512 512"]: + input_dtype = latents.dtype + image = self.model.decode_first_stage(latents) + image = (image * 0.5 + 0.5).clamp(0, 1) + return image.to(input_dtype) + + @torch.cuda.amp.autocast(enabled=False) + @torch.no_grad() + def get_cond( + self, + elevation: Float[Tensor, "B"], + azimuth: Float[Tensor, "B"], + camera_distances: Float[Tensor, "B"], + c_crossattn=None, + c_concat=None, + **kwargs, + ) -> dict: + T = torch.stack( + [ + torch.deg2rad( + (90 - elevation) - (90 - self.cfg.cond_elevation_deg) + ), # Zero123 polar is 90-elevation + torch.sin(torch.deg2rad(azimuth - self.cfg.cond_azimuth_deg)), + torch.cos(torch.deg2rad(azimuth - self.cfg.cond_azimuth_deg)), + torch.deg2rad( + 90 - torch.full_like(elevation, self.cfg.cond_elevation_deg) + ), + ], + dim=-1, + )[:, None, :].to(self.device) + cond = {} + clip_emb = self.model.cc_projection( + torch.cat( + [ + (self.c_crossattn if c_crossattn is None else c_crossattn).repeat( + len(T), 1, 1 + ), + T, + ], + dim=-1, + ) + ) + cond["c_crossattn"] = [ + torch.cat([torch.zeros_like(clip_emb).to(self.device), clip_emb], dim=0) + ] + cond["c_concat"] = [ + torch.cat( + [ + torch.zeros_like(self.c_concat) + .repeat(len(T), 1, 1, 1) + .to(self.device), + (self.c_concat if c_concat is None else c_concat).repeat( + len(T), 1, 1, 1 + ), + ], + dim=0, + ) + ] + return cond + + def __call__( + self, + rgb: Float[Tensor, "B H W C"], + elevation: Float[Tensor, "B"], + azimuth: Float[Tensor, "B"], + camera_distances: Float[Tensor, "B"], + rgb_as_latents=False, + **kwargs, + ): + batch_size = rgb.shape[0] + + rgb_BCHW = rgb.permute(0, 3, 1, 2) + latents: Float[Tensor, "B 4 64 64"] + if rgb_as_latents: + latents = ( + F.interpolate(rgb_BCHW, (32, 32), mode="bilinear", align_corners=False) + * 2 + - 1 + ) + else: + rgb_BCHW_512 = F.interpolate( + rgb_BCHW, (256, 256), mode="bilinear", align_corners=False + ) + # encode image into latents with vae + latents = self.encode_images(rgb_BCHW_512) + + cond = self.get_cond(elevation, azimuth, camera_distances) + + # timestep ~ U(0.02, 0.98) to avoid very high/low noise level + t = torch.randint( + self.min_step, + self.max_step + 1, + [batch_size], + dtype=torch.long, + device=self.device, + ) + + # predict the noise residual with unet, NO grad! + with torch.no_grad(): + # add noise + noise = torch.randn_like(latents) # TODO: use torch generator + latents_noisy = self.scheduler.add_noise(latents, noise, t) + # pred noise + x_in = torch.cat([latents_noisy] * 2) + t_in = torch.cat([t] * 2) + noise_pred = self.model.apply_model(x_in, t_in, cond) + + # perform guidance + noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + self.cfg.guidance_scale * ( + noise_pred_cond - noise_pred_uncond + ) + + w = (1 - self.alphas[t]).reshape(-1, 1, 1, 1) + grad = w * (noise_pred - noise) + grad = torch.nan_to_num(grad) + # clip grad for stable training? + if self.grad_clip_val is not None: + grad = grad.clamp(-self.grad_clip_val, self.grad_clip_val) + + # loss = SpecifyGradient.apply(latents, grad) + # SpecifyGradient is not straghtforward, use a reparameterization trick instead + target = (latents - grad).detach() + # d(loss)/d(latents) = latents - target = latents - (latents - grad) = grad + loss_sds = 0.5 * F.mse_loss(latents, target, reduction="sum") / batch_size + + guidance_out = { + "loss_sds": loss_sds, + "grad_norm": grad.norm(), + "min_step": self.min_step, + "max_step": self.max_step, + } + + return guidance_out + + def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False): + # clip grad for stable training as demonstrated in + # Debiasing Scores and Prompts of 2D Diffusion for Robust Text-to-3D Generation + # http://arxiv.org/abs/2303.15413 + if self.cfg.grad_clip is not None: + self.grad_clip_val = C(self.cfg.grad_clip, epoch, global_step) + + self.set_min_max_steps( + min_step_percent=C(self.cfg.min_step_percent, epoch, global_step), + max_step_percent=C(self.cfg.max_step_percent, epoch, global_step), + ) diff --git a/threestudio/utils/config.py b/threestudio/utils/config.py index 99456333..88a7d092 100644 --- a/threestudio/utils/config.py +++ b/threestudio/utils/config.py @@ -35,6 +35,11 @@ def C_max(value: Any) -> float: value = config_to_primitive(value) if not isinstance(value, list): raise TypeError("Scalar specification only supports list, got", type(value)) + if len(value) >= 6: + max_value = value[2] + for i in range(4, len(value), 2): + max_value = max(max_value, value[i]) + value = [value[0], value[1], max_value, value[3]] if len(value) == 3: value = [0] + value assert len(value) == 4 diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py index 7954bb86..969c7c60 100644 --- a/threestudio/utils/misc.py +++ b/threestudio/utils/misc.py @@ -71,6 +71,17 @@ def C(value: Any, epoch: int, global_step: int) -> float: raise TypeError("Scalar specification only supports list, got", type(value)) if len(value) == 3: value = [0] + value + if len(value) >= 6: + select_i = 3 + for i in range(3, len(value) - 2, 2): + if global_step >= value[i]: + select_i = i + 2 + if select_i != 3: + start_value, start_step = value[select_i - 3], value[select_i - 2] + else: + start_step, start_value = value[:2] + end_value, end_step = value[select_i - 1], value[select_i] + value = [start_step, start_value, end_value, end_step] assert len(value) == 4 start_step, start_value, end_value, end_step = value if isinstance(end_step, int): From c86246d7b194915584abb4b33703abd3e3966f01 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 15 Dec 2023 03:32:57 +0800 Subject: [PATCH 11/24] perceptual loss update (#358) --- threestudio/systems/control4d_multiview.py | 4 ++-- threestudio/systems/instructnerf2nerf.py | 3 ++- threestudio/utils/perceptual/perceptual.py | 23 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/threestudio/systems/control4d_multiview.py b/threestudio/systems/control4d_multiview.py index 0f198b51..8cfd9cf5 100644 --- a/threestudio/systems/control4d_multiview.py +++ b/threestudio/systems/control4d_multiview.py @@ -37,8 +37,8 @@ def configure(self) -> None: material=self.material, background=self.background, ) - - self.perceptual_loss = PerceptualLoss().eval().to(get_device()) + p_config = {} + self.perceptual_loss = threestudio.find("perceptual-loss")(p_config) self.edit_frames = {} self.per_editing_step = self.cfg.per_editing_step self.start_editing_step = self.cfg.start_editing_step diff --git a/threestudio/systems/instructnerf2nerf.py b/threestudio/systems/instructnerf2nerf.py index 16e914e1..f6e3ecde 100644 --- a/threestudio/systems/instructnerf2nerf.py +++ b/threestudio/systems/instructnerf2nerf.py @@ -24,7 +24,8 @@ def configure(self): # create geometry, material, background, renderer super().configure() self.edit_frames = {} - self.perceptual_loss = PerceptualLoss().eval().to(get_device()) + p_config = {} + self.perceptual_loss = threestudio.find("perceptual-loss")(p_config) def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]: render_out = self.renderer(**batch) diff --git a/threestudio/utils/perceptual/perceptual.py b/threestudio/utils/perceptual/perceptual.py index d756694a..403d9a92 100644 --- a/threestudio/utils/perceptual/perceptual.py +++ b/threestudio/utils/perceptual/perceptual.py @@ -1,12 +1,35 @@ """Stripped version of https://github.com/richzhang/PerceptualSimilarity/tree/master/models""" from collections import namedtuple +from dataclasses import dataclass, field import torch import torch.nn as nn from torchvision import models +import threestudio +from threestudio.utils.base import BaseObject from threestudio.utils.perceptual.utils import get_ckpt_path +from threestudio.utils.typing import * + + +@threestudio.register("perceptual-loss") +class PerceptualLossObject(BaseObject): + @dataclass + class Config(BaseObject.Config): + use_dropout: bool = True + + cfg: Config + + def configure(self) -> None: + self.perceptual_loss = PerceptualLoss(self.cfg.use_dropout).to(self.device) + + def __call__( + self, + x: Float[Tensor, "B 3 256 256"], + y: Float[Tensor, "B 3 256 256"], + ): + return self.perceptual_loss(x, y) class PerceptualLoss(nn.Module): From 3597b550e483a91e0a52587fd72d48902fc4b897 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 15 Dec 2023 20:43:56 +0800 Subject: [PATCH 12/24] Automatically find last checkpoint and support multi-stage training (#362) --- threestudio/systems/base.py | 10 +++++++++- threestudio/utils/misc.py | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/threestudio/systems/base.py b/threestudio/systems/base.py index 5b668ea6..73faac60 100644 --- a/threestudio/systems/base.py +++ b/threestudio/systems/base.py @@ -13,7 +13,13 @@ update_if_possible, ) from threestudio.utils.config import parse_structured -from threestudio.utils.misc import C, cleanup, get_device, load_module_weights +from threestudio.utils.misc import ( + C, + cleanup, + find_last_path, + get_device, + load_module_weights, +) from threestudio.utils.saving import SaverMixin from threestudio.utils.typing import * @@ -241,6 +247,8 @@ class Config(BaseSystem.Config): cfg: Config def configure(self) -> None: + self.cfg.geometry_convert_from = find_last_path(self.cfg.geometry_convert_from) + self.cfg.weights = find_last_path(self.cfg.weights) if ( self.cfg.geometry_convert_from # from_coarse must be specified and not self.cfg.weights # not initialized from coarse when weights are specified diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py index 969c7c60..ccb4987f 100644 --- a/threestudio/utils/misc.py +++ b/threestudio/utils/misc.py @@ -134,3 +134,24 @@ def broadcast(tensor, src=0): def enable_gradient(model, enabled: bool = True) -> None: for param in model.parameters(): param.requires_grad_(enabled) + + +def find_last_path(path: str): + if (path is not None) and ("LAST" in path): + path = path.replace(" ", "_") + base_dir_prefix, suffix = path.split("LAST", 1) + base_dir = os.path.dirname(base_dir_prefix) + prefix = os.path.split(base_dir_prefix)[-1] + base_dir_prefix = os.path.join(base_dir, prefix) + all_path = os.listdir(base_dir) + all_path = [os.path.join(base_dir, dir) for dir in all_path] + filtered_path = [dir for dir in all_path if dir.startswith(base_dir_prefix)] + filtered_path.sort(reverse=True) + last_path = filtered_path[0] + new_path = last_path + suffix + if os.path.exists(new_path): + return new_path + else: + raise FileNotFoundError(new_path) + else: + return path From 5d21501996de6a9542e2164506253c36608f94ed Mon Sep 17 00:00:00 2001 From: DSaurus <2238454358@qq.com> Date: Fri, 15 Dec 2023 22:37:42 +0800 Subject: [PATCH 13/24] update extensions --- launch.py | 2 +- threestudio/__init__.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/launch.py b/launch.py index d24940af..bca4ae11 100644 --- a/launch.py +++ b/launch.py @@ -82,7 +82,7 @@ def load_custom_modules(): and os.path.splitext(module_path)[1] != ".py" ): continue - if module_path.endswith(".disabled"): + if module_path.endswith("_disabled"): continue time_before = time.perf_counter() success = load_custom_module(module_path) diff --git a/threestudio/__init__.py b/threestudio/__init__.py index 2c83608f..5651db5e 100644 --- a/threestudio/__init__.py +++ b/threestudio/__init__.py @@ -3,7 +3,12 @@ def register(name): def decorator(cls): - __modules__[name] = cls + if name in __modules__: + raise ValueError( + f"Module {name} already exists! Names of extensions conflict!" + ) + else: + __modules__[name] = cls return cls return decorator From 145d2bdbfd6554a7e6ba0ec8e41ec052dfdc519e Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Sun, 17 Dec 2023 00:46:34 +0900 Subject: [PATCH 14/24] Update README.md (#366) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c3a0ab2..d82af86d 100644 --- a/README.md +++ b/README.md @@ -431,7 +431,7 @@ https://github.com/threestudio-project/threestudio/assets/19284678/72217cdd-765a - Most of the settings are the same as the DreamFusion model. Please refer to the notable differences of the DreamFusion model. - We use NeuS as the geometry representation while the original paper uses VolSDF. -- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stablize normal computation when using hash grids. +- We adopt techniques from [Neuralangelo](https://arxiv.org/abs/2306.03092) to stabilize normal computation when using hash grids. - We currently only implemented the coarse stage of TextMesh. **Example running commands** From 03671ab851364753142e75344dc303dfa48e7048 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Mon, 18 Dec 2023 21:28:44 +0800 Subject: [PATCH 15/24] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index d82af86d..25a1a6c7 100644 --- a/README.md +++ b/README.md @@ -48,10 +48,13 @@ threestudio is a unified framework for 3D content creation from text prompts, si threestudio

+threestudio +threestudio threestudio

## News +- 18/12/2023 Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try. - 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). - 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try. From b6d7c12075396bdb89d387f2a3b4a573290de35d Mon Sep 17 00:00:00 2001 From: Vikram Voleti Date: Mon, 18 Dec 2023 11:56:17 -0500 Subject: [PATCH 16/24] Update README.md for Stable Zero123 (#372) --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 25a1a6c7..deab297e 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,9 @@ threestudio is a unified framework for 3D content creation from text prompts, si

## News -- 18/12/2023 Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try. -- 11/30/2023 Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). +- 12/18/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try. +- 12/13/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try. +- 11/30/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). - 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). - 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try. - 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions! From cf23ed6eab4b145d45954ac7db8dc78f94616914 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Tue, 19 Dec 2023 14:02:27 +0800 Subject: [PATCH 17/24] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index deab297e..c77f7b92 100644 --- a/README.md +++ b/README.md @@ -533,6 +533,9 @@ Download pretrained Stable Zero123 checkpoint `stable-zero123.ckpt` into `load/z **Results obtained by threestudio (Stable Zero123 vs Zero123-XL)** ![Final_video_v01](https://github.com/threestudio-project/threestudio/assets/22424247/bf2d2213-5027-489c-a6ba-1c56c14ee8b7) +**Direct multi-view images generation** +If you only want to generate multi-view images, please refer to [threestudio-mvimg-gen](https://github.com/DSaurus/threestudio-mvimg-gen). This extension can use Stable Zero123 to directly generate images from multi-view perspectives. + **Example running commands** 1. Take an image of your choice, or generate it from text using your favourite AI image generator such as SDXL Turbo (https://clipdrop.co/stable-diffusion-turbo) E.g. "A simple 3D render of a friendly dog" From 47b6a33827350fc72d9be2d69ed6ad8522a350ba Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Tue, 19 Dec 2023 19:58:20 +0800 Subject: [PATCH 18/24] add version (#375) --- threestudio/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/threestudio/__init__.py b/threestudio/__init__.py index 5651db5e..a1184e43 100644 --- a/threestudio/__init__.py +++ b/threestudio/__init__.py @@ -1,4 +1,5 @@ __modules__ = {} +__version__ = "0.2.0" def register(name): From 23b2d717474ffefd3e88e8f69c0e9695c5c6f7f8 Mon Sep 17 00:00:00 2001 From: bennyguo Date: Thu, 21 Dec 2023 12:08:11 +0800 Subject: [PATCH 19/24] update gradio app --- gradio_app.py | 5 +++-- requirements.txt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gradio_app.py b/gradio_app.py index c2d32f5f..0d921d98 100644 --- a/gradio_app.py +++ b/gradio_app.py @@ -201,7 +201,7 @@ def run( # manually assign the output directory, name and tag so that we know the trial directory name = os.path.basename(model_config[model_name]["path"]).split(".")[0] - tag = datetime.now().strftime("@%Y%m%d-%H%M%S") + tag = datetime.now().strftime("%Y%m%d-%H%M%S") trial_dir = os.path.join(save_root, EXP_ROOT_DIR, name, tag) alive_path = os.path.join(trial_dir, "alive") @@ -441,6 +441,7 @@ def launch( run_btn, stop_btn, ], + concurrency_limit=1, ) stop_btn.click( fn=stop_run, @@ -453,7 +454,7 @@ def launch( launch_args = {"server_port": port} if listen: launch_args["server_name"] = "0.0.0.0" - demo.queue(concurrency_count=1).launch(**launch_args) + demo.queue().launch(**launch_args) def watch( diff --git a/requirements.txt b/requirements.txt index 142a76d2..88706a6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ networkx pysdf PyMCubes wandb -gradio +gradio==4.11.0 git+https://github.com/ashawkey/envlight.git torchmetrics From fa40007b7b6c90f34cdec957a2e91acb65e1fe60 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Sat, 23 Dec 2023 19:04:17 +0800 Subject: [PATCH 20/24] Update README.md --- README.md | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index c77f7b92..d0f69fa9 100644 --- a/README.md +++ b/README.md @@ -48,27 +48,33 @@ threestudio is a unified framework for 3D content creation from text prompts, si threestudio

-threestudio -threestudio + +threestudio +threestudio +threestudio threestudio

+

+| Animate-124 | 4D-fy | DreamCraft3D | Gaussian Splatting | MVDream | Mesh-Fitting | + ## News -- 12/18/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try. -- 12/13/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try. -- 11/30/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). -- 11/30/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). -- 08/25/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try. -- 07/06/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions! -- 07/03/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface). -- 06/20/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try. -- 06/14/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try. -- 06/14/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try. -- 05/29/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try. -- 05/26/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try. -- 05/14/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb). -- 05/13/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)! -- 05/11/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions. +- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Aniamte-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try. +- 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try. +- 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try. +- 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). +- 30/11/2023: Implementation of [custom extension system](https://threestudio-project.github.io/threestudio-extensions/) and you can add your extensions in [this project](https://github.com/threestudio-project/threestudio-extensions). +- 25/06/2023: Implementation of [Magic123](https://guochengqian.github.io/project/magic123/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#magic123-) to give it a try. +- 06/07/2023: Join our [Discord server](https://discord.gg/ejer2MAB8N) for lively discussions! +- 03/07/2023: Try text-to-3D online in [HuggingFace Spaces](https://huggingface.co/spaces/bennyguo/threestudio) or using our [self-hosted service](http://t23-g-01.threestudio.ai) (GPU support from Tencent). To host the web interface locally, see [here](https://github.com/threestudio-project/threestudio#gradio-web-interface). +- 20/06/2023: Implementations of Instruct-NeRF2NeRF and Control4D for high-fidelity 3D editing! Follow the instructions for [Control4D](https://github.com/threestudio-project/threestudio#control4d-) and [Instruct-NeRF2NeRF](https://github.com/threestudio-project/threestudio#instructnerf2nerf-) to give it a try. +- 14/06/2023: Implementation of TextMesh! Follow the instructions [here](https://github.com/threestudio-project/threestudio#textmesh-) to give it a try. +- 14/06/2023: Implementation of [prompt debiasing](https://arxiv.org/abs/2303.15413) and [Perp-Neg](https://perp-neg.github.io/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#tips-on-improving-quality) to give it a try. +- 29/05/2023: An experimental implementation of using [Zero-1-to-3](https://zero123.cs.columbia.edu/) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#zero-1-to-3-) to give it a try. +- 26/05/2023: Implementation of [ProlificDreamer](https://ml.cs.tsinghua.edu.cn/prolificdreamer/)! Follow the instructions [here](https://github.com/threestudio-project/threestudio#prolificdreamer-) to give it a try. +- 14/05/2023: You can experiment with the SDS loss on 2D images using our [2dplayground](2dplayground.ipynb). +- 13/05/2023: You can now try threestudio on [Google Colab](https://colab.research.google.com/github/threestudio-project/threestudio/blob/main/threestudio.ipynb)! +- 11/05/2023: We now support exporting textured meshes! See [here](https://github.com/threestudio-project/threestudio#export-meshes) for instructions. ![export-blender](https://github.com/threestudio-project/threestudio/assets/19284678/ccae2820-e702-484c-a43f-81678a365427) From 652740ab3e30bd871f10acab6db2ed4afdfd25dc Mon Sep 17 00:00:00 2001 From: DSaurus <2238454358@qq.com> Date: Sat, 23 Dec 2023 19:44:54 +0800 Subject: [PATCH 21/24] fix format --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d0f69fa9..389c675d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si threestudio

- + threestudio threestudio threestudio From 894390aad91ad80b6d0f5af591acf5a720ab2bfe Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Sat, 23 Dec 2023 22:21:22 +0800 Subject: [PATCH 22/24] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 389c675d..bc1b1b3d 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ threestudio is a unified framework for 3D content creation from text prompts, si ## News -- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Aniamte-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try. +- 23/12/2023: Thank [Yuyang Zhao](https://github.com/HeliosZhao) for implementation of image-to-4D generation extensions [Animate-124](https://github.com/HeliosZhao/Animate124/tree/threestudio)! Follow the instructions on the extensions website to give it a try. - 18/12/2023: Implementation of [4D-fy](https://github.com/DSaurus/threestudio-4dfy) for 4D generation and [DreamCraft3D](https://github.com/DSaurus/threestudio-dreamcraft3D) for high-quality image-to-3D generation as the custom extensions! Follow the instructions on the extensions website to give it a try. - 13/12/2023: Implementation supporting [Stable Zero123](https://stability.ai/news/stable-zero123-3d-generation) for 3D generation from a single image! Follow the instructions [here](https://github.com/threestudio-project/threestudio#stable-zero123) to give it a try. - 30/11/2023: Implementation of [MVDream](https://github.com/DSaurus/threestudio-mvdream), [Gaussian Splatting](https://github.com/DSaurus/threestudio-3dgs) as the custom extensions. You can also use neural representation to fit a mesh by [Mesh-Fitting](https://github.com/DSaurus/threestudio-meshfitting). From e254d87d2a9bf43851ac953bc323e7fe695817ec Mon Sep 17 00:00:00 2001 From: johnbanq Date: Wed, 27 Dec 2023 18:12:54 +0000 Subject: [PATCH 23/24] Assert the text embeddings process successfully runs (#387) --- threestudio/models/prompt_processors/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/threestudio/models/prompt_processors/base.py b/threestudio/models/prompt_processors/base.py index 83a040f2..8993434b 100644 --- a/threestudio/models/prompt_processors/base.py +++ b/threestudio/models/prompt_processors/base.py @@ -379,6 +379,7 @@ def prepare_text_embeddings(self): ) subprocess.start() subprocess.join() + assert subprocess.exitcode == 0, "prompt embedding process failed!" else: self.spawn_func( self.cfg.pretrained_model_name_or_path, From 8ce432d51b2f46eae2e40c045b079bc66a994db0 Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 29 Dec 2023 00:05:40 +0800 Subject: [PATCH 24/24] support gaussian zero-123 (#388) * support gaussian zero-123 * add exp interpolation --- threestudio/__init__.py | 2 +- threestudio/data/image.py | 10 ++++++++-- threestudio/utils/misc.py | 16 ++++++++++------ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/threestudio/__init__.py b/threestudio/__init__.py index a1184e43..f5619b2f 100644 --- a/threestudio/__init__.py +++ b/threestudio/__init__.py @@ -1,5 +1,5 @@ __modules__ = {} -__version__ = "0.2.0" +__version__ = "0.2.1" def register(name): diff --git a/threestudio/data/image.py b/threestudio/data/image.py index fe7c227e..033c528f 100644 --- a/threestudio/data/image.py +++ b/threestudio/data/image.py @@ -96,6 +96,10 @@ def setup(self, cfg, split): [torch.stack([right, up, -lookat], dim=-1), camera_position[:, :, None]], dim=-1, ) + self.c2w4x4: Float[Tensor, "B 4 4"] = torch.cat( + [self.c2w, torch.zeros_like(self.c2w[:, :1])], dim=1 + ) + self.c2w4x4[:, 3, 3] = 1.0 self.camera_position = camera_position self.light_position = light_position @@ -258,8 +262,10 @@ def collate(self, batch) -> Dict[str, Any]: "ref_depth": self.depth, "ref_normal": self.normal, "mask": self.mask, - "height": self.cfg.height, - "width": self.cfg.width, + "height": self.height, + "width": self.width, + "c2w": self.c2w4x4, + "fovy": self.fovy, } if self.cfg.use_random_camera: batch["random_camera"] = self.random_pose_generator.collate(None) diff --git a/threestudio/utils/misc.py b/threestudio/utils/misc.py index ccb4987f..f2378f55 100644 --- a/threestudio/utils/misc.py +++ b/threestudio/utils/misc.py @@ -1,4 +1,5 @@ import gc +import math import os import re @@ -62,7 +63,7 @@ def load_module_weights( return state_dict_to_load, ckpt["epoch"], ckpt["global_step"] -def C(value: Any, epoch: int, global_step: int) -> float: +def C(value: Any, epoch: int, global_step: int, interpolation="linear") -> float: if isinstance(value, int) or isinstance(value, float): pass else: @@ -86,13 +87,16 @@ def C(value: Any, epoch: int, global_step: int) -> float: start_step, start_value, end_value, end_step = value if isinstance(end_step, int): current_step = global_step - value = start_value + (end_value - start_value) * max( - min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0 - ) elif isinstance(end_step, float): current_step = epoch - value = start_value + (end_value - start_value) * max( - min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0 + t = max(min(1.0, (current_step - start_step) / (end_step - start_step)), 0.0) + if interpolation == "linear": + value = start_value + (end_value - start_value) * t + elif interpolation == "exp": + value = math.exp(math.log(start_value) * (1 - t) + math.log(end_value) * t) + else: + raise ValueError( + f"Unknown interpolation method: {interpolation}, only support linear and exp" ) return value