From 9b03299d409a8c6dcbca6ad2e61ef5fe6663ba6d Mon Sep 17 00:00:00 2001 From: Frederik Warburg <32867328+FrederikWarburg@users.noreply.github.com> Date: Fri, 4 Aug 2023 16:11:55 +0200 Subject: [PATCH] Train and eval data splits (Nerfbusters) (#2207) * nerfbusters dataparser * nerfbuster dataparser and visualization of eval views * resolved some errors from pyright * enable ns-process-data to take two videos as input and use one for eval and the other for train * resolve some pyright issues * instead of data being a tuple, I added eval_data as another argument * implemented train eval split for all cases * implemented train eval split for all cases * reverting equi changes * reverting reality capture changes * raise valueerror * revert comment * added split override code back to dataparser * reverted one change * using image border now * adding docs --------- Co-authored-by: Ethan Weber --- docs/quickstart/custom_dataset.md | 4 + .../data/dataparsers/nerfstudio_dataparser.py | 59 +++++++--- nerfstudio/data/utils/dataparsers_utils.py | 101 ++++++++++++++++++ nerfstudio/engine/trainer.py | 3 +- .../base_converter_to_nerfstudio_dataset.py | 5 +- .../colmap_converter_to_nerfstudio_dataset.py | 4 +- .../images_to_nerfstudio_dataset.py | 23 +++- nerfstudio/process_data/process_data_utils.py | 57 ++++++---- .../video_to_nerfstudio_dataset.py | 27 ++++- nerfstudio/scripts/process_data.py | 12 +-- nerfstudio/scripts/viewer/run_viewer.py | 4 +- nerfstudio/viewer/server/viewer_state.py | 49 ++++++--- nerfstudio/viewer_beta/viewer.py | 13 ++- tests/process_data/test_process_images.py | 10 +- 14 files changed, 288 insertions(+), 83 deletions(-) create mode 100644 nerfstudio/data/utils/dataparsers_utils.py diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md index 1b6c240717..c826979360 100644 --- a/docs/quickstart/custom_dataset.md +++ b/docs/quickstart/custom_dataset.md @@ -46,6 +46,10 @@ ns-process-data {images, video} --data {DATA_PATH} --output-dir {PROCESSED_DATA_ ns-train nerfacto --data {PROCESSED_DATA_DIR} ``` +### Training and evaluation on separate data + +For `ns-process-data {images, video}`, you can optionally use a separate image directory or video for training and evaluation, as suggested in [Nerfbusters](https://ethanweber.me/nerfbusters/). To do this, run `ns-process-data {images, video} --data {DATA_PATH} --eval-data {EVAL_DATA_PATH} --output-dir {PROCESSED_DATA_DIR}`. Then when running nerfacto, run `ns-train nerfacto --data {PROCESSED_DATA_DIR} nerfstudio-data --eval-mode filename`. + ### Installing COLMAP There are many ways to install COLMAP, unfortunately it can sometimes be a bit finicky. If the following commands do not work, please refer to the [COLMAP installation guide](https://colmap.github.io/install.html) for additional installation methods. COLMAP install issues are common! Feel free to ask for help in on our [Discord](https://discord.gg/uMbNqcraFc). diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 008fedfa2d..eaeda0bce6 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -15,7 +15,6 @@ from __future__ import annotations -import math from dataclasses import dataclass, field from pathlib import Path from typing import Literal, Optional, Type @@ -26,12 +25,14 @@ from nerfstudio.cameras import camera_utils from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType -from nerfstudio.data.dataparsers.base_dataparser import ( - DataParser, - DataParserConfig, - DataparserOutputs, -) +from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs from nerfstudio.data.scene_box import SceneBox +from nerfstudio.data.utils.dataparsers_utils import ( + get_train_eval_split_filename, + get_train_eval_split_fraction, + get_train_eval_split_interval, + get_train_eval_split_all, +) from nerfstudio.utils.io import load_from_json from nerfstudio.utils.rich_utils import CONSOLE @@ -58,8 +59,18 @@ class NerfstudioDataParserConfig(DataParserConfig): """The method to use to center the poses.""" auto_scale_poses: bool = True """Whether to automatically scale the poses to fit in +/- 1 bounding box.""" + eval_mode: Literal["fraction", "filename", "interval", "all"] = "fraction" + """ + The method to use for splitting the dataset into train and eval. + Fraction splits based on a percentage for train and the remaining for eval. + Filename splits based on filenames containing train/eval. + Interval uses every nth frame for eval. + All uses all the images for any split. + """ train_split_fraction: float = 0.9 - """The fraction of images to use for training. The remaining images are for eval.""" + """The percentage of the dataset to use for training. Only used when eval_mode is train-split-fraction.""" + eval_interval: int = 8 + """The interval between frames to use for eval. Only used when eval_mode is eval-interval.""" depth_unit_scale_factor: float = 1e-3 """Scales the depth values to meters. Default value is 0.001 for a millimeter to meter conversion.""" @@ -105,9 +116,18 @@ def _generate_dataparser_outputs(self, split="train"): width = [] distort = [] + # sort the frames by fname + fnames = [] for frame in meta["frames"]: filepath = Path(frame["file_path"]) fname = self._get_fname(filepath, data_dir) + fnames.append(fname) + inds = np.argsort(fnames) + frames = [meta["frames"][ind] for ind in inds] + + for frame in frames: + filepath = Path(frame["file_path"]) + fname = self._get_fname(filepath, data_dir) if not fx_fixed: assert "fl_x" in frame, "fx not specified in frame" @@ -182,16 +202,21 @@ def _generate_dataparser_outputs(self, split="train"): elif has_split_files_spec: raise RuntimeError(f"The dataset's list of filenames for split {split} is missing.") else: - # filter image_filenames and poses based on train/eval split percentage - num_images = len(image_filenames) - num_train_images = math.ceil(num_images * self.config.train_split_fraction) - num_eval_images = num_images - num_train_images - i_all = np.arange(num_images) - i_train = np.linspace( - 0, num_images - 1, num_train_images, dtype=int - ) # equally spaced training images starting and ending at 0 and num_images-1 - i_eval = np.setdiff1d(i_all, i_train) # eval images are the remaining images - assert len(i_eval) == num_eval_images + # find train and eval indices based on the eval_mode specified + if self.config.eval_mode == "fraction": + i_train, i_eval = get_train_eval_split_fraction(image_filenames, self.config.train_split_fraction) + elif self.config.eval_mode == "filename": + i_train, i_eval = get_train_eval_split_filename(image_filenames) + elif self.config.eval_mode == "interval": + i_train, i_eval = get_train_eval_split_interval(image_filenames, self.config.eval_interval) + elif self.config.eval_mode == "all": + CONSOLE.log( + "[yellow] Be careful with '--eval-mode=all'. If using camera optimization, the cameras may diverge in the current implementation, giving unpredictable results." + ) + i_train, i_eval = get_train_eval_split_all(image_filenames) + else: + raise ValueError(f"Unknown eval mode {self.config.eval_mode}") + if split == "train": indices = i_train elif split in ["val", "test"]: diff --git a/nerfstudio/data/utils/dataparsers_utils.py b/nerfstudio/data/utils/dataparsers_utils.py new file mode 100644 index 0000000000..b48323f21e --- /dev/null +++ b/nerfstudio/data/utils/dataparsers_utils.py @@ -0,0 +1,101 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Data parser utils for nerfstudio datasets. """ + +import math +import os +from typing import List, Tuple + +import numpy as np + + +def get_train_eval_split_fraction(image_filenames: List, train_split_fraction: float) -> Tuple[np.ndarray, np.ndarray]: + """ + Get the train/eval split fraction based on the number of images and the train split fraction. + + Args: + image_filenames: list of image filenames + train_split_fraction: fraction of images to use for training + """ + + # filter image_filenames and poses based on train/eval split percentage + num_images = len(image_filenames) + num_train_images = math.ceil(num_images * train_split_fraction) + num_eval_images = num_images - num_train_images + i_all = np.arange(num_images) + i_train = np.linspace( + 0, num_images - 1, num_train_images, dtype=int + ) # equally spaced training images starting and ending at 0 and num_images-1 + i_eval = np.setdiff1d(i_all, i_train) # eval images are the remaining images + assert len(i_eval) == num_eval_images + + return i_train, i_eval + + +def get_train_eval_split_filename(image_filenames: List) -> Tuple[np.ndarray, np.ndarray]: + """ + Get the train/eval split based on the filename of the images. + + Args: + image_filenames: list of image filenames + """ + + num_images = len(image_filenames) + basenames = [os.path.basename(image_filename) for image_filename in image_filenames] + i_all = np.arange(num_images) + i_train = [] + i_eval = [] + for idx, basename in zip(i_all, basenames): + # check the frame index + if "train" in basename: + i_train.append(idx) + elif "eval" in basename: + i_eval.append(idx) + else: + raise ValueError("frame should contain train/eval in its name to use this eval-frame-index eval mode") + + return np.array(i_train), np.array(i_eval) + + +def get_train_eval_split_interval(image_filenames: List, eval_interval: float) -> Tuple[np.ndarray, np.ndarray]: + """ + Get the train/eval split based on the interval of the images. + + Args: + image_filenames: list of image filenames + eval_interval: interval of images to use for eval + """ + + num_images = len(image_filenames) + all_indices = np.arange(num_images) + train_indices = all_indices[all_indices % eval_interval != 0] + eval_indices = all_indices[all_indices % eval_interval == 0] + i_train = train_indices + i_eval = eval_indices + + return i_train, i_eval + + +def get_train_eval_split_all(image_filenames: List) -> Tuple[np.ndarray, np.ndarray]: + """ + Get the train/eval split where all indices are used for both train and eval. + + Args: + image_filenames: list of image filenames + """ + num_images = len(image_filenames) + i_all = np.arange(num_images) + i_train = i_all + i_eval = i_all + return i_train, i_eval diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index 489543f0a3..94256809cf 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -338,8 +338,9 @@ def _init_viewer_state(self) -> None: """Initializes viewer scene with given train dataset""" assert self.viewer_state and self.pipeline.datamanager.train_dataset self.viewer_state.init_scene( - dataset=self.pipeline.datamanager.train_dataset, + train_dataset=self.pipeline.datamanager.train_dataset, train_state="training", + eval_dataset=self.pipeline.datamanager.eval_dataset, ) @check_viewer_enabled diff --git a/nerfstudio/process_data/base_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/base_converter_to_nerfstudio_dataset.py index e46601479d..1c7f8cfbdd 100644 --- a/nerfstudio/process_data/base_converter_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/base_converter_to_nerfstudio_dataset.py @@ -19,16 +19,19 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path +from typing import Optional @dataclass class BaseConverterToNerfstudioDataset(ABC): - """Base class to process images or video into a nerfstudio dataset""" + """Base class to process images or video into a nerfstudio dataset.""" data: Path """Path the data, either a video file or a directory of images.""" output_dir: Path """Path to the output directory.""" + eval_data: Optional[Path] = None + """Path the eval data, either a video file or a directory of images. If set to None, the first will be used both for training and eval""" verbose: bool = False """If True, print extra logging.""" diff --git a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py index 13ed7da66f..7ab711b46e 100644 --- a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py @@ -19,9 +19,7 @@ from typing import Dict, List, Literal, Optional, Tuple from nerfstudio.process_data import colmap_utils, hloc_utils, process_data_utils -from nerfstudio.process_data.base_converter_to_nerfstudio_dataset import ( - BaseConverterToNerfstudioDataset, -) +from nerfstudio.process_data.base_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset from nerfstudio.process_data.process_data_utils import CAMERA_MODELS from nerfstudio.utils import install_checks from nerfstudio.utils.rich_utils import CONSOLE diff --git a/nerfstudio/process_data/images_to_nerfstudio_dataset.py b/nerfstudio/process_data/images_to_nerfstudio_dataset.py index 3f47b70ac3..e6cf501bfb 100644 --- a/nerfstudio/process_data/images_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/images_to_nerfstudio_dataset.py @@ -18,9 +18,7 @@ from typing import Optional from nerfstudio.process_data import equirect_utils, process_data_utils -from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ( - ColmapConverterToNerfstudioDataset, -) +from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset from nerfstudio.utils.rich_utils import CONSOLE @@ -47,11 +45,15 @@ def main(self) -> None: # Generate planar projections if equirectangular if self.camera_type == "equirectangular": + if self.eval_data is not None: + raise ValueError("Cannot use eval_data with camera_type equirectangular.") + pers_size = equirect_utils.compute_resolution_from_equirect(self.data, self.images_per_equirect) CONSOLE.log(f"Generating {self.images_per_equirect} {pers_size} sized images per equirectangular image") self.data = equirect_utils.generate_planar_projections_from_equirectangular( self.data, pers_size, self.images_per_equirect, crop_factor=self.crop_factor ) + self.camera_type = "perspective" summary_log = [] @@ -63,10 +65,25 @@ def main(self) -> None: self.data, image_dir=self.image_dir, crop_factor=self.crop_factor, + image_prefix="frame_train_" if self.eval_data is not None else "frame_", verbose=self.verbose, num_downscales=self.num_downscales, same_dimensions=self.same_dimensions, + keep_image_dir=False, ) + if self.eval_data is not None: + eval_image_rename_map_paths = process_data_utils.copy_images( + self.eval_data, + image_dir=self.image_dir, + crop_factor=self.crop_factor, + image_prefix="frame_eval_", + verbose=self.verbose, + num_downscales=self.num_downscales, + same_dimensions=self.same_dimensions, + keep_image_dir=True, + ) + image_rename_map_paths.update(eval_image_rename_map_paths) + image_rename_map = dict((a.name, b.name) for a, b in image_rename_map_paths.items()) num_frames = len(image_rename_map) summary_log.append(f"Starting with {num_frames} images") diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 6a10153e8d..6db731e4fb 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -15,17 +15,16 @@ """Helper utils for processing data into the nerfstudio format.""" import math +import re import shutil import sys -import re from enum import Enum from pathlib import Path from typing import List, Literal, Optional, OrderedDict, Tuple, Union import cv2 -import numpy as np - import imageio +import numpy as np import rawpy from nerfstudio.utils.rich_utils import CONSOLE, status @@ -114,6 +113,8 @@ def convert_video_to_images( num_downscales: int, crop_factor: Tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), verbose: bool = False, + image_prefix: str = "frame_", + keep_image_dir: bool = False, ) -> Tuple[List[str], int]: """Converts a video into a sequence of images. @@ -124,10 +125,19 @@ def convert_video_to_images( num_downscales: Number of times to downscale the images. Downscales by 2 each time. crop_factor: Portion of the image to crop. Should be in [0,1] (top, bottom, left, right) verbose: If True, logs the output of the command. + image_prefix: Prefix to use for the image filenames. + keep_image_dir: If True, don't delete the output directory if it already exists. Returns: A tuple containing summary of the conversion and the number of extracted frames. """ + # If keep_image_dir is False, then remove the output image directory and its downscaled versions + if not keep_image_dir: + for i in range(num_downscales + 1): + dir_to_remove = image_dir if i == 0 else f"{image_dir}_{2**i}" + shutil.rmtree(dir_to_remove, ignore_errors=True) + image_dir.mkdir(exist_ok=True, parents=True) + for i in crop_factor: if i < 0 or i > 1: CONSOLE.print("[bold red]Error: Invalid crop factor. All crops must be in [0,1].") @@ -141,12 +151,6 @@ def convert_video_to_images( sys.exit(1) with status(msg="Converting video to images...", spinner="bouncingBall", verbose=verbose): - # delete existing images in folder - for img in image_dir.glob("*.png"): - if verbose: - CONSOLE.log(f"Deleting {img}") - img.unlink() - num_frames = get_num_frames_in_video(video_path) if num_frames == 0: CONSOLE.print(f"[bold red]Error: Video has no frames: {video_path}") @@ -168,7 +172,7 @@ def convert_video_to_images( downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)] downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)] - downscale_paths = [downscale_dirs[i] / "frame_%05d.png" for i in range(num_downscales + 1)] + downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)] for dir in downscale_dirs: dir.mkdir(parents=True, exist_ok=True) @@ -200,7 +204,7 @@ def convert_video_to_images( num_final_frames = len(list(image_dir.glob("*.png"))) summary_log = [] summary_log.append(f"Starting with {num_frames} video frames") - summary_log.append(f"We extracted {num_final_frames} images") + summary_log.append(f"We extracted {num_final_frames} images with prefix '{image_prefix}'") CONSOLE.log("[bold green]:tada: Done converting video to images.") return summary_log, num_final_frames @@ -210,9 +214,11 @@ def copy_images_list( image_paths: List[Path], image_dir: Path, num_downscales: int, + image_prefix: str = "frame_", crop_border_pixels: Optional[int] = None, crop_factor: Tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), verbose: bool = False, + keep_image_dir: bool = False, upscale_factor: Optional[int] = None, nearest_neighbor: bool = False, same_dimensions: bool = True, @@ -222,19 +228,24 @@ def copy_images_list( image_paths: List of Paths of images to copy to a new directory. image_dir: Path to the output directory. num_downscales: Number of times to downscale the images. Downscales by 2 each time. + image_prefix: Prefix for the image filenames. crop_border_pixels: If not None, crops each edge by the specified number of pixels. crop_factor: Portion of the image to crop. Should be in [0,1] (top, bottom, left, right) verbose: If True, print extra logging. + keep_image_dir: If True, don't delete the output directory if it already exists. Returns: A list of the copied image Paths. """ - # Remove original directory only if we provide a proper image folder path - if image_dir.is_dir() and len(image_paths): + # Remove original directory and its downscaled versions + # only if we provide a proper image folder path and keep_image_dir is False + if image_dir.is_dir() and len(image_paths) and not keep_image_dir: # check that output directory is not the same as input directory if image_dir != image_paths[0].parent: - shutil.rmtree(image_dir, ignore_errors=True) - image_dir.mkdir(exist_ok=True, parents=True) + for i in range(num_downscales + 1): + dir_to_remove = image_dir if i == 0 else f"{image_dir}_{2**i}" + shutil.rmtree(dir_to_remove, ignore_errors=True) + image_dir.mkdir(exist_ok=True, parents=True) copied_image_paths = [] @@ -242,11 +253,11 @@ def copy_images_list( for idx, image_path in enumerate(image_paths): if verbose: CONSOLE.log(f"Copying image {idx + 1} of {len(image_paths)}...") - copied_image_path = image_dir / f"frame_{idx + 1:05d}{image_path.suffix}" + copied_image_path = image_dir / f"{image_prefix}{idx + 1:05d}{image_path.suffix}" try: # if CR2 raw, we want to read raw and write RAW_CONVERTED_SUFFIX, and change the file suffix for downstream processing if image_path.suffix.lower() in ALLOWED_RAW_EXTS: - copied_image_path = image_dir / f"frame_{idx + 1:05d}{RAW_CONVERTED_SUFFIX}" + copied_image_path = image_dir / f"{image_prefix}{idx + 1:05d}{RAW_CONVERTED_SUFFIX}" with rawpy.imread(str(image_path)) as raw: rgb = raw.postprocess() imageio.imsave(copied_image_path, rgb) @@ -283,7 +294,7 @@ def copy_images_list( # When this is not the case (e.g. mixed portrait and landscape images), we need to do individually. # (Unfortunately, that is much slower.) for framenum in range(1, (1 if same_dimensions else num_frames) + 1): - framename = "frame_%05d" if same_dimensions else f"frame_{framenum:05d}" + framename = f"{image_prefix}%05d" if same_dimensions else f"{image_prefix}{framenum:05d}" ffmpeg_cmd = f'ffmpeg -y -noautorotate -i "{image_dir / f"{framename}{copied_image_paths[0].suffix}"}" -q:v 2 ' crop_cmd = "" @@ -315,7 +326,7 @@ def copy_images_list( if num_frames == 0: CONSOLE.log("[bold red]:skull: No usable images in the data folder.") else: - CONSOLE.log("[bold green]:tada: Done copying images.") + CONSOLE.log(f"[bold green]:tada: Done copying images with prefix '{image_prefix}'.") return copied_image_paths @@ -363,7 +374,9 @@ def copy_and_upscale_polycam_depth_maps_list( def copy_images( data: Path, image_dir: Path, + image_prefix: str = "frame_", verbose: bool = False, + keep_image_dir: bool = False, crop_factor: Tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), num_downscales: int = 0, same_dimensions: bool = True, @@ -373,8 +386,10 @@ def copy_images( Args: data: Path to the directory of images. image_dir: Path to the output directory. + image_prefix: Prefix for the image filenames. verbose: If True, print extra logging. crop_factor: Portion of the image to crop. Should be in [0,1] (top, bottom, left, right) + keep_image_dir: If True, don't delete the output directory if it already exists. Returns: The mapping from the original filenames to the new ones. """ @@ -390,6 +405,8 @@ def copy_images( image_dir=image_dir, crop_factor=crop_factor, verbose=verbose, + image_prefix=image_prefix, + keep_image_dir=keep_image_dir, num_downscales=num_downscales, same_dimensions=same_dimensions, ) @@ -406,8 +423,6 @@ def downscale_images( """(Now deprecated; much faster integrated into copy_images.) Downscales the images in the directory. Uses FFMPEG. - Assumes images are named frame_00001.png, frame_00002.png, etc. - Args: image_dir: Path to the directory containing the images. num_downscales: Number of times to downscale the images. Downscales by 2 each time. diff --git a/nerfstudio/process_data/video_to_nerfstudio_dataset.py b/nerfstudio/process_data/video_to_nerfstudio_dataset.py index b1eec97eab..928752e432 100644 --- a/nerfstudio/process_data/video_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/video_to_nerfstudio_dataset.py @@ -18,9 +18,7 @@ from dataclasses import dataclass from nerfstudio.process_data import equirect_utils, process_data_utils -from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ( - ColmapConverterToNerfstudioDataset, -) +from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset from nerfstudio.utils.rich_utils import CONSOLE @@ -35,7 +33,7 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset): """ num_frames_target: int = 300 - """Target number of frames to use for the dataset, results may not be exact.""" + """Target number of frames to use per video, results may not be exact.""" percent_radius_crop: float = 1.0 """Create circle crop mask. The radius is the percent of the image diagonal.""" @@ -43,6 +41,7 @@ def main(self) -> None: """Process video into a nerfstudio dataset.""" summary_log = [] + summary_log_eval = [] # Convert video to images if self.camera_type == "equirectangular": # create temp images folder to store the equirect and perspective images @@ -65,10 +64,28 @@ def main(self) -> None: num_downscales=self.num_downscales, crop_factor=self.crop_factor, verbose=self.verbose, + image_prefix="frame_train_" if self.eval_data is not None else "frame_", + keep_image_dir=False, ) + if self.eval_data is not None: + summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images( + self.eval_data, + image_dir=self.image_dir, + num_frames_target=self.num_frames_target, + num_downscales=self.num_downscales, + crop_factor=self.crop_factor, + verbose=self.verbose, + image_prefix="frame_eval_", + keep_image_dir=True, + ) + summary_log += summary_log_eval + num_extracted_frames += num_extracted_frames_eval # Generate planar projections if equirectangular if self.camera_type == "equirectangular": + if self.eval_data is not None: + raise ValueError("Cannot use eval_data with camera_type equirectangular.") + perspective_image_size = equirect_utils.compute_resolution_from_equirect( self.output_dir / "temp_images", self.images_per_equirect ) @@ -97,7 +114,7 @@ def main(self) -> None: process_data_utils.downscale_images(self.image_dir, self.num_downscales, verbose=self.verbose) ) - # # Create mask + # Create mask mask_path = process_data_utils.save_mask( image_dir=self.image_dir, num_downscales=self.num_downscales, diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py index 201eb94fc9..8b4151f757 100644 --- a/nerfstudio/scripts/process_data.py +++ b/nerfstudio/scripts/process_data.py @@ -33,12 +33,8 @@ realitycapture_utils, record3d_utils, ) -from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ( - BaseConverterToNerfstudioDataset, -) -from nerfstudio.process_data.images_to_nerfstudio_dataset import ( - ImagesToNerfstudioDataset, -) +from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset +from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset from nerfstudio.process_data.video_to_nerfstudio_dataset import VideoToNerfstudioDataset from nerfstudio.utils.rich_utils import CONSOLE @@ -253,6 +249,8 @@ def main(self) -> None: raise ValueError(f"XML file {self.xml} must have a .xml extension") if not self.xml.exists: raise ValueError(f"XML file {self.xml} doesn't exist") + if self.eval_data is not None: + raise ValueError("Cannot use eval_data since cameras were already aligned with Metashape.") self.output_dir.mkdir(parents=True, exist_ok=True) image_dir = self.output_dir / "images" @@ -338,6 +336,8 @@ def main(self) -> None: raise ValueError(f"CSV file {self.csv} must have a .csv extension") if not self.csv.exists: raise ValueError(f"CSV file {self.csv} doesn't exist") + if self.eval_data is not None: + raise ValueError("Cannot use eval_data since cameras were already aligned with RealityCapture.") self.output_dir.mkdir(parents=True, exist_ok=True) image_dir = self.output_dir / "images" diff --git a/nerfstudio/scripts/viewer/run_viewer.py b/nerfstudio/scripts/viewer/run_viewer.py index c4ebefacb7..56a712026f 100644 --- a/nerfstudio/scripts/viewer/run_viewer.py +++ b/nerfstudio/scripts/viewer/run_viewer.py @@ -21,7 +21,6 @@ import time from dataclasses import dataclass, field, fields from pathlib import Path - from typing import Literal import tyro @@ -113,8 +112,9 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int): assert viewer_state and pipeline.datamanager.train_dataset viewer_state.init_scene( - dataset=pipeline.datamanager.train_dataset, + train_dataset=pipeline.datamanager.train_dataset, train_state="completed", + eval_dataset=pipeline.datamanager.eval_dataset, ) if isinstance(viewer_state, ViewerState): viewer_state.viser_server.set_training_state("completed") diff --git a/nerfstudio/viewer/server/viewer_state.py b/nerfstudio/viewer/server/viewer_state.py index 0e948d2ccc..ad80822ce8 100644 --- a/nerfstudio/viewer/server/viewer_state.py +++ b/nerfstudio/viewer/server/viewer_state.py @@ -38,10 +38,7 @@ from nerfstudio.viewer.server import viewer_utils from nerfstudio.viewer.server.control_panel import ControlPanel from nerfstudio.viewer.server.gui_utils import parse_object -from nerfstudio.viewer.server.render_state_machine import ( - RenderAction, - RenderStateMachine, -) +from nerfstudio.viewer.server.render_state_machine import RenderAction, RenderStateMachine from nerfstudio.viewer.server.utils import get_intrinsics_matrix_and_camera_to_world_h from nerfstudio.viewer.server.viewer_elements import ViewerControl, ViewerElement from nerfstudio.viewer.viser import ViserServer @@ -331,7 +328,7 @@ def get_camera(self, image_height: int, image_width: int) -> Optional[Cameras]: camera = camera.to(self.get_model().device) return camera - def _pick_drawn_image_idxs(self, total_num: int) -> list[int]: + def _pick_drawn_image_idxs(self, total_num: int) -> np.ndarray: """Determine indices of images to display in viewer. Args: @@ -345,9 +342,14 @@ def _pick_drawn_image_idxs(self, total_num: int) -> list[int]: else: num_display_images = min(self.config.max_num_display_images, total_num) # draw indices, roughly evenly spaced - return np.linspace(0, total_num - 1, num_display_images, dtype=np.int32).tolist() + return np.linspace(0, total_num - 1, num_display_images, dtype=np.int32) - def init_scene(self, dataset: InputDataset, train_state: Literal["training", "paused", "completed"]) -> None: + def init_scene( + self, + train_dataset: InputDataset, + train_state: Literal["training", "paused", "completed"], + eval_dataset: Optional[InputDataset] = None, + ) -> None: """Draw some images and the scene aabb in the viewer. Args: @@ -360,16 +362,39 @@ def init_scene(self, dataset: InputDataset, train_state: Literal["training", "pa export_path_name=self.log_filename.parent.stem, ) + # total num of images + num_images = len(train_dataset) + if eval_dataset is not None: + num_images += len(eval_dataset) + # draw the training cameras and images - image_indices = self._pick_drawn_image_idxs(len(dataset)) - for idx in image_indices: - image = dataset[idx]["image"] + image_indices = self._pick_drawn_image_idxs(num_images) + for idx in image_indices[image_indices < len(train_dataset)].tolist(): + image = train_dataset[idx]["image"] bgr = image[..., [2, 1, 0]] - camera_json = dataset.cameras.to_json(camera_idx=idx, image=bgr, max_size=100) + camera_json = train_dataset.cameras.to_json(camera_idx=idx, image=bgr, max_size=100) self.viser_server.add_dataset_image(idx=f"{idx:06d}", json=camera_json) + # draw the eval cameras and images + if eval_dataset is not None: + image_indices = image_indices[image_indices >= len(train_dataset)] - len(train_dataset) + for idx in image_indices.tolist(): + image = eval_dataset[idx]["image"] + bgr = image[..., [2, 1, 0]] + # color the eval image borders red + # TODO: color the threejs frustum instead of changing the image itself like we are doing here + t = int(min(image.shape[:2]) * 0.1) # border thickness as 10% of min height or width resolution + bc = torch.tensor((0, 0, 1.0)) + bgr[:t, :, :] = bc + bgr[-t:, :, :] = bc + bgr[:, -t:, :] = bc + bgr[:, :t, :] = bc + + camera_json = eval_dataset.cameras.to_json(camera_idx=idx, image=bgr, max_size=100) + self.viser_server.add_dataset_image(idx=f"{idx+len(train_dataset):06d}", json=camera_json) + # draw the scene box (i.e., the bounding box) - self.viser_server.update_scene_box(dataset.scene_box) + self.viser_server.update_scene_box(train_dataset.scene_box) # set the initial state whether to train or not self.train_btn_state = train_state diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 4c007f082f..a5271571dd 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -169,7 +169,12 @@ def _pick_drawn_image_idxs(self, total_num: int) -> list[int]: # draw indices, roughly evenly spaced return np.linspace(0, total_num - 1, num_display_images, dtype=np.int32).tolist() - def init_scene(self, dataset: InputDataset, train_state: Literal["training", "paused", "completed"]) -> None: + def init_scene( + self, + train_dataset: InputDataset, + train_state: Literal["training", "paused", "completed"], + eval_dataset: Optional[InputDataset] = None, + ) -> None: """Draw some images and the scene aabb in the viewer. Args: @@ -178,10 +183,10 @@ def init_scene(self, dataset: InputDataset, train_state: Literal["training", "pa """ # draw the training cameras and images - image_indices = self._pick_drawn_image_idxs(len(dataset)) + image_indices = self._pick_drawn_image_idxs(len(train_dataset)) for idx in image_indices: - image = dataset[idx]["image"] - camera = dataset.cameras[idx] + image = train_dataset[idx]["image"] + camera = train_dataset.cameras[idx] image_uint8 = (image * 255).detach().type(torch.uint8) image_uint8 = image_uint8.permute(2, 0, 1) image_uint8 = torchvision.transforms.functional.resize(image_uint8, 100) # type: ignore diff --git a/tests/process_data/test_process_images.py b/tests/process_data/test_process_images.py index f143f2b1fa..a3d0323551 100644 --- a/tests/process_data/test_process_images.py +++ b/tests/process_data/test_process_images.py @@ -11,14 +11,8 @@ from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig from nerfstudio.data.utils.colmap_parsing_utils import Camera from nerfstudio.data.utils.colmap_parsing_utils import Image as ColmapImage -from nerfstudio.data.utils.colmap_parsing_utils import ( - qvec2rotmat, - write_cameras_binary, - write_images_binary, -) -from nerfstudio.process_data.images_to_nerfstudio_dataset import ( - ImagesToNerfstudioDataset, -) +from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat, write_cameras_binary, write_images_binary +from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset def random_quaternion(num_poses: int):