diff --git a/airo-camera-toolkit/README.md b/airo-camera-toolkit/README.md index bcb4b06c..755fa0a7 100644 --- a/airo-camera-toolkit/README.md +++ b/airo-camera-toolkit/README.md @@ -1,26 +1,17 @@ # airo-camera-toolkit This package contains code for working with RGB(D) cameras, images and pointclouds. + + Overview of the functionality and the structure: ```cs -airo_camera_toolkit -├── interfaces.py # Common interfaces for all cameras. -├── reprojection.py # Projecting points between the 3D world and images -│ # and reprojecting points from image plane to world -├── utils.py # Conversion between image formats: BGR to RGB, int to float, etc. -│ # or channel-first vs channel-last. -├── annotation_tools.py # Tool for annotating images with keypoints, lines, etc. -└── cameras # Implementation of the interfaces for real cameras -│ ├── zed2i.py # Implementation using ZED SDK, run this file to test your ZED Installation -│ ├── realsense.py # Implementation using RealSense SDK -│ └── manual_test_hw.py # Used for manually testing in the above implementations. -└── calibration -│ ├── fiducial_markers.py # Detecting and localising aruco markers and charuco boards -│ └── hand_eye_calibration.py # Camera-robot extrinsics calibration, eye-in-hand and eye-to-hand -└── image_transforms # Invertible transforms for cropping/scaling images with keypoints -│ └── ... -└── multiprocess # Multiprocessing for - └── ... - +airo-camera-toolkit/ +├── calibration/ # hand-eye extrinsics calibration +├── cameras/ # actual camera drivers +├── image_transformations/ # reversible geometric 2D transforms +├── pinhole_operations/ # 2D-3D operations +├── utils/ # a.o. annotation tool and converter +├── interfaces.py +└── cli.py ``` ## Installation @@ -63,7 +54,10 @@ airo-camera-toolkit hand-eye-calibration --help See [calibration/README.md](./airo_camera_toolkit/calibration/README.md) for more details. -## Image format conversion + +## Utils + +### Image format conversion Camera by default return images as numpy 32-bit float RGB images with values between 0 to 1 through `get_rgb_image()`. This is most convenient for subsequent processing, e.g. with neural networks. For higher performance, 8-bit unsigned integer RGB images are also accessible through `get_rgb_image_as_int()`. @@ -78,13 +72,15 @@ image_bgr = ImageConverter.from_numpy_int_format(image_rgb_int).image_in_opencv_ ``` -## Reprojection +### Annotation tool -See [reprojection.py](./airo_camera_toolkit/reprojection.py) for more details. +See [annotation_tool.md](./airo_camera_toolkit/annotation_tool.md) for usage instructions. -## Annotation tool -See [annotation_tool.md](./airo_camera_toolkit/annotation_tool.md) for usage instructions. +## Pinhole Operations + +2D - 3D geometric operations using the pinhole model. See [readme](./airo_camera_toolkit/pinhole_operations/Readme.md) for more information. + ## Image Transforms @@ -100,6 +96,7 @@ If this is a problem for your application, see [multiprocess/README.md](./airo_c ## References For more background on cameras, in particular on the meaning of intrinsics, extrinics, distortion coefficients, pinhole (and other) camera models, see: +- Szeliski - Computer vision: Algorithms and Applications, available [here](https://szeliski.org/Book/) - https://web.eecs.umich.edu/~justincj/teaching/eecs442/WI2021/schedule.html - https://learnopencv.com/geometry-of-image-formation/ (extrinsics & intrinsics) - http://www.cs.cmu.edu/~16385/s17/Slides/11.1_Camera_matrix.pdf (idem) diff --git a/airo-camera-toolkit/airo_camera_toolkit/calibration/collect_calibration_data.py b/airo-camera-toolkit/airo_camera_toolkit/calibration/collect_calibration_data.py index bcc1cd7a..a5c1703e 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/calibration/collect_calibration_data.py +++ b/airo-camera-toolkit/airo_camera_toolkit/calibration/collect_calibration_data.py @@ -9,7 +9,7 @@ from airo_camera_toolkit.calibration.fiducial_markers import detect_and_visualize_charuco_pose from airo_camera_toolkit.cameras.camera_discovery import click_camera_options, discover_camera from airo_camera_toolkit.interfaces import RGBCamera -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter from airo_dataset_tools.data_parsers.camera_intrinsics import CameraIntrinsics from airo_dataset_tools.data_parsers.pose import Pose from airo_robots.manipulators.hardware.ur_rtde import URrtde diff --git a/airo-camera-toolkit/airo_camera_toolkit/calibration/fiducial_markers.py b/airo-camera-toolkit/airo_camera_toolkit/calibration/fiducial_markers.py index 47fc3ffe..bdc9381d 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/calibration/fiducial_markers.py +++ b/airo-camera-toolkit/airo_camera_toolkit/calibration/fiducial_markers.py @@ -9,7 +9,6 @@ import cv2 import numpy as np -from airo_camera_toolkit.cameras.camera_discovery import click_camera_options, discover_camera from airo_spatial_algebra import SE3Container from airo_typing import CameraIntrinsicsMatrixType, HomogeneousMatrixType, OpenCVIntImageType from cv2 import aruco @@ -249,7 +248,8 @@ def detect_and_visualize_charuco_pose( Defaults to the AIRO_DEFAULT_CHARUCO_BOARD. """ import click - from airo_camera_toolkit.utils import ImageConverter + from airo_camera_toolkit.cameras.camera_discovery import click_camera_options, discover_camera + from airo_camera_toolkit.utils.image_converter import ImageConverter @click.command() @click.option("--aruco_marker_size", default=0.031, help="Size of the aruco marker in meters") diff --git a/airo-camera-toolkit/airo_camera_toolkit/calibration/hand_eye_calibration.py b/airo-camera-toolkit/airo_camera_toolkit/calibration/hand_eye_calibration.py index 5145d4e8..348017d7 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/calibration/hand_eye_calibration.py +++ b/airo-camera-toolkit/airo_camera_toolkit/calibration/hand_eye_calibration.py @@ -18,7 +18,7 @@ detect_and_visualize_charuco_pose, ) from airo_camera_toolkit.interfaces import RGBCamera -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter from airo_dataset_tools.data_parsers.camera_intrinsics import CameraIntrinsics from airo_robots.manipulators.hardware.ur_rtde import URrtde from airo_robots.manipulators.position_manipulator import PositionManipulator diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/README.md b/airo-camera-toolkit/airo_camera_toolkit/cameras/README.md index bb4ec5c9..db373564 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/README.md +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/README.md @@ -2,13 +2,20 @@ # Cameras This subpackage contains implementations of the camera interface for the cameras we have at AIRO. +- ZED 2i +- Realsense D400 series + +It also contains code to enable multiprocessed use of the camera streams: [multiprocessed camera](./multiprocess/) + ## 1. Installation Implementations usually require the installation of SDKs, drivers etc. to communicate with the camera. This information can be found in `READMEs` for each camera: -* [ZED Installation](zed_installation.md) +* [ZED Installation](zed/installation.md) +* [RealSense Installation](realsense/realsense_installation.md) + ## 2. Testing your hardware installation -Furthermore, there is code for testing the hardware implementations. +Furthermore, there is code for testing the hardware implementations: `manual_test_hw.py` But since this requires attaching a physical camera, these are 'user tests' which should be done manually by developers/users. Each camera implementation can be run as a script and will execute the relevant tests, providing instructions on what to look out for. diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/__init__.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/__init__.py index e69de29b..b3beeb22 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/__init__.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/__init__.py @@ -0,0 +1,2 @@ +from airo_camera_toolkit.cameras.realsense.realsense import Realsense # noqa: F401 +from airo_camera_toolkit.cameras.zed.zed2i import Zed2i # noqa: F401 diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/camera_discovery.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/camera_discovery.py index 644f4761..af008194 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/camera_discovery.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/camera_discovery.py @@ -44,11 +44,11 @@ def discover_camera(brand: Optional[str], serial_number: Optional[str] = None, * brand_enum = CameraBrand(brand) # Attempt to convert to enum if brand_enum == CameraBrand.ZED: - from airo_camera_toolkit.cameras.zed2i import Zed2i + from airo_camera_toolkit.cameras.zed.zed2i import Zed2i camera = Zed2i(serial_number=serial_number, **kwargs) elif brand_enum == CameraBrand.REALSENSE: - from airo_camera_toolkit.cameras.realsense import Realsense + from airo_camera_toolkit.cameras.realsense.realsense import Realsense camera = Realsense(serial_number=serial_number, **kwargs) # type: ignore else: @@ -88,7 +88,7 @@ def my_command(camera_brand: Optional[str] = None, if __name__ == "__main__": import click import cv2 - from airo_camera_toolkit.utils import ImageConverter + from airo_camera_toolkit.utils.image_converter import ImageConverter @click.command() @click_camera_options diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/test_hw.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/manual_test_hw.py similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/cameras/test_hw.py rename to airo-camera-toolkit/airo_camera_toolkit/cameras/manual_test_hw.py diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rerun_logger.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rerun_logger.py index 314a3846..b34223eb 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rerun_logger.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rerun_logger.py @@ -7,7 +7,7 @@ from airo_camera_toolkit.cameras.multiprocess.multiprocess_rgb_camera import MultiprocessRGBReceiver from airo_camera_toolkit.cameras.multiprocess.multiprocess_rgbd_camera import MultiprocessRGBDReceiver from airo_camera_toolkit.image_transforms.image_transform import ImageTransform -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter logger = loguru.logger diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgb_camera.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgb_camera.py index e38d2073..9b9060df 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgb_camera.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgb_camera.py @@ -7,7 +7,7 @@ import cv2 import numpy as np from airo_camera_toolkit.interfaces import RGBCamera -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter from airo_typing import CameraIntrinsicsMatrixType, CameraResolutionType, NumpyFloatImageType, NumpyIntImageType _RGB_SHM_NAME = "rgb" @@ -285,7 +285,7 @@ def __del__(self) -> None: """example of how to use the MultiprocessRGBPublisher and MultiprocessRGBReceiver. You can also use the MultiprocessRGBReceiver in a different process (e.g. in a different python script) """ - from airo_camera_toolkit.cameras.zed2i import Zed2i + from airo_camera_toolkit.cameras.zed.zed2i import Zed2i namespace = "camera" diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgbd_camera.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgbd_camera.py index 55eb8dc8..bc38acb1 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgbd_camera.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgbd_camera.py @@ -171,7 +171,7 @@ def __del__(self) -> None: You can also use the MultiprocessRGBDReceiver in a different process (e.g. in a different python script) """ - from airo_camera_toolkit.cameras.zed2i import Zed2i + from airo_camera_toolkit.cameras.zed.zed2i import Zed2i resolution = Zed2i.RESOLUTION_720 diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_video_recorder.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_video_recorder.py index 82e7d782..3efabc32 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_video_recorder.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_video_recorder.py @@ -7,7 +7,7 @@ from airo_camera_toolkit.cameras.multiprocess.multiprocess_rgb_camera import MultiprocessRGBReceiver from airo_camera_toolkit.image_transforms.image_transform import ImageTransform -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter class MultiprocessVideoRecorder(Process): diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense.py similarity index 97% rename from airo-camera-toolkit/airo_camera_toolkit/cameras/realsense.py rename to airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense.py index 4d2916bc..3828fdee 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense.py @@ -5,7 +5,7 @@ import numpy as np import pyrealsense2 as rs # type: ignore from airo_camera_toolkit.interfaces import RGBDCamera -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter from airo_typing import ( CameraIntrinsicsMatrixType, CameraResolutionType, @@ -138,7 +138,7 @@ def _retrieve_depth_image(self) -> NumpyIntImageType: if __name__ == "__main__": - import airo_camera_toolkit.cameras.test_hw as test + import airo_camera_toolkit.cameras.manual_test_hw as test import cv2 camera = Realsense(fps=30, resolution=Realsense.RESOLUTION_1080, enable_hole_filling=True) diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense_installation.md b/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense_installation.md similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/cameras/realsense_installation.md rename to airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense_installation.md diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense_scan_profiles.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense_scan_profiles.py similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/cameras/realsense_scan_profiles.py rename to airo-camera-toolkit/airo_camera_toolkit/cameras/realsense/realsense_scan_profiles.py diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/__init__.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/zed_installation.md b/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/installation.md similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/cameras/zed_installation.md rename to airo-camera-toolkit/airo_camera_toolkit/cameras/zed/installation.md diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/zed2i.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py similarity index 98% rename from airo-camera-toolkit/airo_camera_toolkit/cameras/zed2i.py rename to airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py index 7d77fc27..2ac4d790 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/zed2i.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py @@ -21,9 +21,8 @@ import time import numpy as np -from airo_camera_toolkit.cameras.test_hw import manual_test_stereo_rgbd_camera from airo_camera_toolkit.interfaces import StereoRGBDCamera -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.utils.image_converter import ImageConverter from airo_typing import ( CameraIntrinsicsMatrixType, CameraResolutionType, @@ -287,6 +286,7 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: if __name__ == "__main__": """this script serves as a 'test' for the zed implementation.""" + from airo_camera_toolkit.cameras.manual_test_hw import manual_test_stereo_rgbd_camera, profile_rgb_throughput # zed specific tests: # - list all serial numbers of the cameras @@ -301,7 +301,6 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: manual_test_stereo_rgbd_camera(zed) # profile rgb throughput, should be at 60FPS, i.e. 0.017s - from airo_camera_toolkit.cameras.test_hw import profile_rgb_throughput zed = Zed2i(Zed2i.RESOLUTION_720, fps=60) profile_rgb_throughput(zed) diff --git a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/Readme.md b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/Readme.md new file mode 100644 index 00000000..3d3d8366 --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/Readme.md @@ -0,0 +1,16 @@ +# Pinhole Operations + +This subpackage contains code to perform various geometric operations to convert between 3D positions and 2D image coordinates using the pinhole camera model. + +Most notably these include: + +- projection to go from 3D points to 2D image coordinates +- triangulation to go from a set of corresponding image coordinates to a 3D point +- unprojection to go from a 2D coordinate to a 3D point by intersecting the ray with depth information + + +For mathematical background, see: + +- IRM course +- Sleziski, [Computer Vision and Algorithms](https://szeliski.org/Book/) +- Opencv page on [cameras](https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#) \ No newline at end of file diff --git a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/__init__.py b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/__init__.py new file mode 100644 index 00000000..274d9802 --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/__init__.py @@ -0,0 +1,7 @@ +from .projection import project_points_to_image_plane # noqa F401 +from .triangulation import calculate_triangulation_errors, multiview_triangulation_midpoint # noqa F401 +from .unprojection import ( # noqa F401 + extract_depth_from_depthmap_heuristic, + unproject_onto_depth_values, + unproject_using_depthmap, +) diff --git a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/projection.py b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/projection.py new file mode 100644 index 00000000..d7008aab --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/projection.py @@ -0,0 +1,28 @@ +from typing import Union + +from airo_spatial_algebra.operations import _HomogeneousPoints +from airo_typing import CameraIntrinsicsMatrixType, Vector2DArrayType, Vector3DArrayType, Vector3DType + + +def project_points_to_image_plane( + positions_in_camera_frame: Union[Vector3DArrayType, Vector3DType], + camera_intrinsics: CameraIntrinsicsMatrixType, +) -> Vector2DArrayType: + """Projects an array of points from the 3D camera frame to their 2D pixel coordinates on the image plane. + + Make sure to transform them to the camera frame first if they are in the world frame. + + Args: + positions_in_camera_frame: numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame + camera_intrinsics: camera intrinsics matrix as a numpy array of shape (3, 3) + + Returns: + numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane + """ + + homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame).homogeneous_points.T + homogeneous_positions_on_image_plane = camera_intrinsics @ homogeneous_positions_in_camera_frame[:3, ...] + positions_on_image_plane = ( + homogeneous_positions_on_image_plane[:2, ...] / homogeneous_positions_on_image_plane[2, ...] + ) + return positions_on_image_plane.T diff --git a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/triangulation.py b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/triangulation.py new file mode 100644 index 00000000..bec02347 --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/triangulation.py @@ -0,0 +1,80 @@ +from typing import List + +import numpy as np +from airo_typing import CameraExtrinsicMatrixType, CameraIntrinsicsMatrixType, Vector2DArrayType, Vector3DType + + +def multiview_triangulation_midpoint( + extrinsics_matrices: List[CameraExtrinsicMatrixType], + intrinsics_matrices: List[CameraIntrinsicsMatrixType], + image_coordinates: Vector2DArrayType, +) -> Vector3DType: + """triangulates a point from multiple views using the midpoint method. + This method minimizes the L2 distance in the camera space between the estimated point and all rays + cf. https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8967077 + + Args: + extrinsics_matrices: list of extrinsics matrices for each viewpoint + intrinsics_matrices: list of intrinsics matrices for each viewpoint + image_points: list of image coordinates of the 3D point for each viewpoint + + Returns: + estimated 3D position in the world frame as a numpy array of shape (3,). + """ + + # determine the rays for each camera in the world frame + rays = [] + for extrinsics_matrix, intrinsics_matrix, image_point in zip( + extrinsics_matrices, intrinsics_matrices, image_coordinates + ): + ray = ( + extrinsics_matrix[:3, :3] + @ np.linalg.inv(intrinsics_matrix) + @ np.array([image_point[0], image_point[1], 1]) + ) + ray = ray / np.linalg.norm(ray) + rays.append(ray) + + lhs = 0 + rhs = 0 + for i, ray in enumerate(rays): + rhs += (np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]) @ extrinsics_matrices[i][:3, 3] + lhs += np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :] + + lhs_inv = np.linalg.inv(lhs) + midpoint = lhs_inv @ rhs + return midpoint + + +def calculate_triangulation_errors( + extrinsics_matrices: List[CameraExtrinsicMatrixType], + intrinsics_matrices: List[CameraIntrinsicsMatrixType], + image_coordinates: Vector2DArrayType, + point: Vector3DType, +) -> List[float]: + """calculates the Euclidean distances in the camera space between the estimated point and all rays, as a measure of triangulation error + + Args: + extrinsics_matrices: list of extrinsics matrices for each viewpoint + intrinsics_matrices: list of intrinsics matrices for each viewpoint + image_points: list of image coordinates of the 3D point for each viewpoint + point: 3D point in the world frame + + Returns: + list of Euclidean distances between the point and the rays for each viewpoint + """ + errors = [] + for extrinsics_matrix, intrinsics_matrix, image_point in zip( + extrinsics_matrices, intrinsics_matrices, image_coordinates + ): + ray = ( + extrinsics_matrix[:3, :3] + @ np.linalg.inv(intrinsics_matrix) + @ np.array([image_point[0], image_point[1], 1]) + ) + ray = ray / np.linalg.norm(ray) + error = np.linalg.norm( + (np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]) @ ((extrinsics_matrix[:3, 3]) - point) + ) + errors.append(error.item()) + return errors diff --git a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py new file mode 100644 index 00000000..e5a87683 --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py @@ -0,0 +1,161 @@ +""" methods for getting from 2D image coordinates to 3D world coordinates using depth information (a.k.a. unprojection)""" + +import numpy as np +from airo_spatial_algebra.operations import _HomogeneousPoints +from airo_typing import ( + CameraIntrinsicsMatrixType, + HomogeneousMatrixType, + NumpyDepthMapType, + Vector2DArrayType, + Vector3DArrayType, +) + + +def unproject_using_depthmap( + image_coordinates: Vector2DArrayType, + depth_map: NumpyDepthMapType, + camera_intrinsics: CameraIntrinsicsMatrixType, + depth_heuristic_mask_size: int = 11, + depth_heuristic_percentile: float = 0.05, +) -> np.ndarray: + """ + Unprojects image coordinates to 3D positions using a depth map. + + Args: + image_coordinates: numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane + depth_map: numpy array of shape (height, width) containing the depth values for each pixel, i.e. the z-value of the 3D point in the camera frame corresponding to the pixel + camera_intrinsics: camera intrinsics matrix as a numpy array of shape (3, 3) + + Returns: + numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame + + """ + + # TODO: should we make this extraction method more generic? though I prefer to keep it simple and not add too many options + depth_values = extract_depth_from_depthmap_heuristic( + image_coordinates, depth_map, depth_heuristic_mask_size, depth_heuristic_percentile + ) + return unproject_onto_depth_values(image_coordinates, depth_values, camera_intrinsics) + + +def unproject_onto_depth_values( + image_coordinates: Vector2DArrayType, + depth_values: np.ndarray, + camera_intrinsics: CameraIntrinsicsMatrixType, +) -> np.ndarray: + """ + Unprojects image coordinates to 3D positions using depth values for each coordinate. + + Args: + image_coordinates: numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane + depth_values: numpy array of shape (N,) containing the depth values for each pixel, i.e. the z-value of the 3D point in the camera frame corresponding to the pixel + camera_intrinsics: camera intrinsics matrix as a numpy array of shape (3, 3) + + Returns: + numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame + """ + assert ( + image_coordinates.shape[0] == depth_values.shape[0] + ), "coordinates and depth values must have the same length" + + homogeneous_coords = np.ones((image_coordinates.shape[0], 3)) + homogeneous_coords[:, :2] = image_coordinates + homogeneous_coords = np.transpose(homogeneous_coords) + rays_in_camera_frame = ( + np.linalg.inv(camera_intrinsics) @ homogeneous_coords + ) # shape is cast by numpy to column vector! + + z_values_in_camera_frame = depth_values + + t = z_values_in_camera_frame / rays_in_camera_frame[2, :] + + positions_in_camera_frame = t * rays_in_camera_frame + + homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame.T).homogeneous_points.T + return homogeneous_positions_in_camera_frame[:3, ...].T + + +def unproject_onto_world_z_plane( + image_coordinates: Vector2DArrayType, + camera_intrinsics: CameraIntrinsicsMatrixType, + camera_in_frame_pose: HomogeneousMatrixType, + height: float, +) -> Vector3DArrayType: + """Unprojects image coordinates to 3D positions on a Z-plane of the specified frame + + This is useful if you known the height of the object in this frame, + which is the case for 2D items (cloth!) or for rigid, known 3D objects with a fixed orientation. + + + Args: + image_coordinates: numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane + camera_intrinsics: camera intrinsics matrix as a numpy array of shape (3, 3) + camera_in_frame_pose: pose of the camera in the target frame. + If the target frame is the world frame, the camera_in_frame_pose is the extrinsics matrix. + height: height of the plane in the target frame + + Returns: + positions in the world frame on the Z=height plane wrt to the frame. + """ + # convert to homogeneous coordinates and transpose to column vectors + homogeneous_coords = np.ones((image_coordinates.shape[0], 3)) + homogeneous_coords[:, :2] = image_coordinates + homogeneous_coords = np.transpose(homogeneous_coords) + + camera_frame_ray_vector = np.linalg.inv(camera_intrinsics) @ homogeneous_coords + + translation = camera_in_frame_pose[0:3, 3] + rotation_matrix = camera_in_frame_pose[0:3, 0:3] + + world_frame_ray_vectors = rotation_matrix @ camera_frame_ray_vector + world_frame_ray_vectors = np.transpose(world_frame_ray_vectors) + t = (height - translation[2]) / world_frame_ray_vectors[:, 2] + points = t[:, np.newaxis] * world_frame_ray_vectors + translation + return points + + +def extract_depth_from_depthmap_heuristic( + image_coordinates: Vector2DArrayType, + depth_map: NumpyDepthMapType, + mask_size: int = 11, + depth_percentile: float = 0.05, +) -> np.ndarray: + """ + A simple heuristic to get more robust depth values of the depth map. Especially with keypoints we are often interested in points + on the edge of an object, or even worse on a corner. Not only are these regions noisy by themselves but the keypoints could also be + be a little off. + + This function takes the percentile of a region around the specified point and assumes we are interested in the nearest object present. + This is not always true (think about the backside of a box looking under a 45 degree angle) but it serves as a good proxy. The more confident + you are of your keypoints and the better the heatmaps are, the lower you could set the mask size and percentile. If you are very, very confident + you could directly take the pointcloud as well instead of manually querying the heatmap, but I find that they are more noisy. + + Also note that this function assumes there are no negative infinity values (no objects closer than 30cm!) + + Returns: + (np.ndarray) a 1D array of the depth values for the specified coordinates + """ + + assert mask_size % 2, "only odd sized markers allowed" + assert ( + depth_percentile < 0.25 + ), "For straight corners, about 75 percent of the region will be background.. Are your sure you want the percentile to be lower?" + # check all coordinates are within the size of the depth map to avoid unwanted wrapping of the array indices + assert np.max(image_coordinates[:, 1]) < depth_map.shape[0], "V coordinates out of bounds" + assert np.max(image_coordinates[:, 0]) < depth_map.shape[1], "U coordinates out of bounds" + assert np.min(image_coordinates) >= 0, "coordinates out of bounds" + + # convert coordinates to integers + image_coordinates = image_coordinates.astype(np.uint32) + + # extract depth values by taking the percentile of the depth values in a region around the point + depth_regions = np.empty((image_coordinates.shape[0], (mask_size) ** 2)) + for i in range(image_coordinates.shape[0]): + depth_region = depth_map[ + image_coordinates[i, 1] - mask_size // 2 : image_coordinates[i, 1] + mask_size // 2 + 1, + image_coordinates[i, 0] - mask_size // 2 : image_coordinates[i, 0] + mask_size // 2 + 1, + ] + depth_regions[i, :] = depth_region.flatten() + depth_values = np.nanquantile(depth_regions, depth_percentile, axis=1) + + return depth_values diff --git a/airo-camera-toolkit/airo_camera_toolkit/reprojection.py b/airo-camera-toolkit/airo_camera_toolkit/reprojection.py deleted file mode 100644 index 58f32844..00000000 --- a/airo-camera-toolkit/airo_camera_toolkit/reprojection.py +++ /dev/null @@ -1,157 +0,0 @@ -from typing import Optional, Union - -import numpy as np -from airo_spatial_algebra.operations import _HomogeneousPoints -from airo_typing import ( - CameraIntrinsicsMatrixType, - HomogeneousMatrixType, - NumpyDepthMapType, - Vector2DArrayType, - Vector3DArrayType, - Vector3DType, -) - - -def reproject_to_frame_z_plane( - image_coords: Vector2DArrayType, - camera_intrinsics: CameraIntrinsicsMatrixType, - camera_in_frame_pose: HomogeneousMatrixType, - height: float = 0.0, -) -> Vector3DArrayType: - """Reprojects points from the image plane to a Z-plane of the specified frame - - This is useful if you known the height of the object in the world frame, - which is the case for 2D items (cloth!) or for rigid, known 3D objects with a fixed orientation. - - If the target frame is the world frame, the camera_in_frame_pose is the extrinsics matrix. - - Returns: - positions in the world frame on the Z=height plane wrt to the frame. - """ - # convert to homogeneous coordinates and transpose to column vectors - homogeneous_coords = np.ones((image_coords.shape[0], 3)) - homogeneous_coords[:, :2] = image_coords - homogeneous_coords = np.transpose(homogeneous_coords) - - camera_frame_ray_vector = np.linalg.inv(camera_intrinsics) @ homogeneous_coords - - translation = camera_in_frame_pose[0:3, 3] - rotation_matrix = camera_in_frame_pose[0:3, 0:3] - - world_frame_ray_vectors = rotation_matrix @ camera_frame_ray_vector - world_frame_ray_vectors = np.transpose(world_frame_ray_vectors) - t = (height - translation[2]) / world_frame_ray_vectors[:, 2] - points = t[:, np.newaxis] * world_frame_ray_vectors + translation - return points - - -def reproject_to_frame( - coordinates: Vector2DArrayType, - camera_intrinsics: CameraIntrinsicsMatrixType, - camera_in_frame_pose: HomogeneousMatrixType, - depth_map: NumpyDepthMapType, - mask_size: int = 11, - depth_percentile: float = 0.05, -) -> np.ndarray: - """ - Reprojects coordinates on the image plane to a base frame, as defined by the camera in frame pose. - Args: - Returns: (3, N) np.array containing the coordinates of the point in the camera frame. Each column is a set of - coordinates. - """ - # TODO: make this more generic for 2D as well (HomogeneousPoints class) - homogeneous_coords = np.ones((coordinates.shape[0], 3)) - homogeneous_coords[:, :2] = coordinates - homogeneous_coords = np.transpose(homogeneous_coords) - rays_in_camera_frame = ( - np.linalg.inv(camera_intrinsics) @ homogeneous_coords - ) # shape is cast by numpy to column vector! - - z_values_in_camera_frame = extract_depth_from_depthmap_heuristic( - coordinates, depth_map, mask_size, depth_percentile - ) - - t = z_values_in_camera_frame / rays_in_camera_frame[2, :] - - positions_in_camera_frame = t * rays_in_camera_frame - - homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame.T).homogeneous_points.T - homogeneous_positions_in_frame = camera_in_frame_pose @ homogeneous_positions_in_camera_frame - return homogeneous_positions_in_frame[:3, ...].T - - -def extract_depth_from_depthmap_heuristic( - coordinates: Vector2DArrayType, - depth_map: NumpyDepthMapType, - mask_size: int = 11, - depth_percentile: float = 0.05, -) -> np.ndarray: - """ - A simple heuristic to get more robust depth values of the depth map. Especially with keypoints we are often interested in points - on the edge of an object, or even worse on a corner. Not only are these regions noisy by themselves but the keypoints could also be - be a little off. - - This function takes the percentile of a region around the specified point and assumes we are interested in the nearest object present. - This is not always true (think about the backside of a box looking under a 45 degree angle) but it serves as a good proxy. The more confident - you are of your keypoints and the better the heatmaps are, the lower you could set the mask size and percentile. If you are very, very confident - you could directly take the pointcloud as well instead of manually querying the heatmap, but I find that they are more noisy. - - Also note that this function assumes there are no negative infinity values (no objects closer than 30cm!) - - Returns: - (np.ndarray) a 1D array of the depth values for the specified coordinates - """ - - assert mask_size % 2, "only odd sized markers allowed" - assert ( - depth_percentile < 0.25 - ), "For straight corners, about 75 percent of the region will be background.. Are your sure you want the percentile to be lower?" - # check all coordinates are within the size of the depth map to avoid unwanted wrapping of the array indices - assert np.max(coordinates[:, 1]) < depth_map.shape[0], "V coordinates out of bounds" - assert np.max(coordinates[:, 0]) < depth_map.shape[1], "U coordinates out of bounds" - assert np.min(coordinates) >= 0, "coordinates out of bounds" - - # convert coordinates to integers - coordinates = coordinates.astype(np.uint32) - - # extract depth values by taking the percentile of the depth values in a region around the point - depth_regions = np.empty((coordinates.shape[0], (mask_size) ** 2)) - for i in range(coordinates.shape[0]): - depth_region = depth_map[ - coordinates[i, 1] - mask_size // 2 : coordinates[i, 1] + mask_size // 2 + 1, - coordinates[i, 0] - mask_size // 2 : coordinates[i, 0] + mask_size // 2 + 1, - ] - depth_regions[i, :] = depth_region.flatten() - depth_values = np.nanquantile(depth_regions, depth_percentile, axis=1) - - return depth_values - - -def project_frame_to_image_plane( - positions_in_frame: Union[Vector3DArrayType, Vector3DType], - camera_matrix: CameraIntrinsicsMatrixType, - frame_to_camera_transform: Optional[HomogeneousMatrixType] = None, -) -> Vector2DArrayType: - """Projects an array of points from a 3D world frame to the 2D image plane. - - Projecting to the world frame is a special case of this projection operation. - In this case, the frame_to_camera_transform is the inverse of the camera extrinsics matrix. - - Projecting from the camera frame is also a special case, in this case the frame_to_camera_transform is the identity matrix. - If no is given, this function assumes that the points are already in the camera frame. - """ - # TODO: should we add assert statements to validate the input? - - if frame_to_camera_transform is None: - # if no transform is given, the positions are assumed to be in the camera frame - # in this case, the transform is the identity matrix - frame_to_camera_transform = np.eye(4) - - homogeneous_positions_in_world_frame = _HomogeneousPoints(positions_in_frame).homogeneous_points - homogeneous_positions_in_world_frame = homogeneous_positions_in_world_frame.T - homogeneous_positions_in_camera_frame = frame_to_camera_transform @ homogeneous_positions_in_world_frame - homogeneous_positions_on_image_plane = camera_matrix @ homogeneous_positions_in_camera_frame[:3, ...] - positions_on_image_plane = ( - homogeneous_positions_on_image_plane[:2, ...] / homogeneous_positions_on_image_plane[2, ...] - ) - return positions_on_image_plane.T diff --git a/airo-camera-toolkit/airo_camera_toolkit/annotation_tool.md b/airo-camera-toolkit/airo_camera_toolkit/utils/annotation_tool.md similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/annotation_tool.md rename to airo-camera-toolkit/airo_camera_toolkit/utils/annotation_tool.md diff --git a/airo-camera-toolkit/airo_camera_toolkit/annotation_tool.py b/airo-camera-toolkit/airo_camera_toolkit/utils/annotation_tool.py similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/annotation_tool.py rename to airo-camera-toolkit/airo_camera_toolkit/utils/annotation_tool.py diff --git a/airo-camera-toolkit/airo_camera_toolkit/utils.py b/airo-camera-toolkit/airo_camera_toolkit/utils/image_converter.py similarity index 100% rename from airo-camera-toolkit/airo_camera_toolkit/utils.py rename to airo-camera-toolkit/airo_camera_toolkit/utils/image_converter.py diff --git a/airo-camera-toolkit/docs/live_charuco_pose.py b/airo-camera-toolkit/docs/live_charuco_pose.py index 41d8143b..bb3ae12e 100644 --- a/airo-camera-toolkit/docs/live_charuco_pose.py +++ b/airo-camera-toolkit/docs/live_charuco_pose.py @@ -1,7 +1,7 @@ import cv2 from airo_camera_toolkit.calibration.fiducial_markers import detect_charuco_board, draw_frame_on_image -from airo_camera_toolkit.cameras.zed2i import Zed2i -from airo_camera_toolkit.utils import ImageConverter +from airo_camera_toolkit.cameras.zed.zed2i import Zed2i +from airo_camera_toolkit.utils.image_converter import ImageConverter camera = Zed2i(fps=30) intrinsics = camera.intrinsics_matrix() diff --git a/airo-camera-toolkit/test/test_utils.py b/airo-camera-toolkit/test/test_image_converter.py similarity index 92% rename from airo-camera-toolkit/test/test_utils.py rename to airo-camera-toolkit/test/test_image_converter.py index 926e0f2a..7543269f 100644 --- a/airo-camera-toolkit/test/test_utils.py +++ b/airo-camera-toolkit/test/test_image_converter.py @@ -1,5 +1,10 @@ import numpy as np -from airo_camera_toolkit.utils import ImageConverter, is_float_image_array, is_image_array, is_int_image_array +from airo_camera_toolkit.utils.image_converter import ( + ImageConverter, + is_float_image_array, + is_image_array, + is_int_image_array, +) def test_image_format_checks(): diff --git a/airo-camera-toolkit/test/test_pinhole_operations.py b/airo-camera-toolkit/test/test_pinhole_operations.py new file mode 100644 index 00000000..764f920c --- /dev/null +++ b/airo-camera-toolkit/test/test_pinhole_operations.py @@ -0,0 +1,73 @@ +from test.test_config import _ImageTestValues + +import numpy as np +from airo_camera_toolkit.pinhole_operations import ( + extract_depth_from_depthmap_heuristic, + multiview_triangulation_midpoint, + project_points_to_image_plane, + unproject_using_depthmap, +) +from PIL import Image + + +def test_world_to_image_plane_projection(): + position_sets = [_ImageTestValues._positions_in_camera_frame, _ImageTestValues._positions_in_camera_frame[0]] + coords_sets = [_ImageTestValues._positions_on_image_plane, _ImageTestValues._positions_on_image_plane[0]] + + for positions, coords in zip(position_sets, coords_sets): + projected_points = project_points_to_image_plane(positions, _ImageTestValues._intrinsics_matrix) + assert np.isclose(projected_points, coords, atol=1e-2).all() + + +def _load_depth_image(): + # load image, convert to grayscale and then convert to numpy array + depth_map = Image.open(_ImageTestValues._depth_image_path).convert("L") + depth_map = np.array(depth_map) + return depth_map + + +def _load_depthmap(): + return np.load(_ImageTestValues._depth_map_path) + + +def test_depth_heuristic(): + # load image, convert to grayscale and then convert to numpy array + depth_map = _load_depthmap() + depths = extract_depth_from_depthmap_heuristic(_ImageTestValues._positions_on_image_plane, depth_map, mask_size=51) + assert np.isclose(depths, _ImageTestValues._depth_z_values, atol=1e-2).all() + + +def test_reproject_camera_frame(): + depth_map = _load_depthmap() + reprojected_points = unproject_using_depthmap( + _ImageTestValues._positions_on_image_plane, depth_map, _ImageTestValues._intrinsics_matrix + ) + assert np.isclose(reprojected_points, _ImageTestValues._positions_in_camera_frame, atol=1e-2).all() + + +def test_triangulation(): + # construct a simple example + position = np.array([0.02, 0.01, 0.01]) + # extrinsics of camera 1 meter to the left looking at the origin + extrinsics1 = np.array([[0, 0, -1, 1], [0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]]) + # extrinsics of camera 1 meter to the front looking at the origin + extrinsics2 = np.array([[1, 0, 0, 0], [0, 0, -1, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + + # intrinsics of both cameras: 500x500 pixels, 1000px focal length + intrinsics = np.array([[1000, 0, 2000], [0, 1000, 2000], [0, 0, 1]]) + # position in the camera frames + position1 = (np.linalg.inv(extrinsics1) @ np.append(position, 1))[:3] + position2 = (np.linalg.inv(extrinsics2) @ np.append(position, 1))[:3] + + # image coordinates of the point in both cameras + image_coordinates_1 = project_points_to_image_plane(position1, intrinsics)[0] + image_coordinates_2 = project_points_to_image_plane(position2, intrinsics)[0] + + ## actual test + + # triangulate the point + triangulated_point = multiview_triangulation_midpoint( + [extrinsics1, extrinsics2], [intrinsics, intrinsics], [image_coordinates_1, image_coordinates_2] + ) + + assert np.isclose(triangulated_point, position, atol=1e-2).all() diff --git a/airo-camera-toolkit/test/test_reprojection.py b/airo-camera-toolkit/test/test_reprojection.py deleted file mode 100644 index 5377fad1..00000000 --- a/airo-camera-toolkit/test/test_reprojection.py +++ /dev/null @@ -1,71 +0,0 @@ -from test.test_config import _ImageTestValues - -import numpy as np -from airo_camera_toolkit.reprojection import ( - extract_depth_from_depthmap_heuristic, - project_frame_to_image_plane, - reproject_to_frame, -) -from PIL import Image - - -def test_world_to_image_plane_projection(): - world_pose_in_camera_frame = np.linalg.inv(_ImageTestValues._extrinsics_matrix) - projected_points = project_frame_to_image_plane( - _ImageTestValues._positions_in_world_frame, _ImageTestValues._intrinsics_matrix, world_pose_in_camera_frame - ) - assert np.isclose(projected_points, _ImageTestValues._positions_on_image_plane, atol=1e-2).all() - - -def test_world_to_image_plane_projection_single_vector(): - # is this test necessary? - # if the Homogeneous points class works as expected, this should work too.. - world_pose_in_camera_frame = np.linalg.inv(_ImageTestValues._extrinsics_matrix) - projected_points = project_frame_to_image_plane( - _ImageTestValues._positions_in_world_frame[0], _ImageTestValues._intrinsics_matrix, world_pose_in_camera_frame - ) - assert np.isclose(projected_points, _ImageTestValues._positions_on_image_plane[0], atol=1e-2).all() - - -def test_camera_to_image_plane_projection(): - projected_points = project_frame_to_image_plane( - _ImageTestValues._positions_in_camera_frame, _ImageTestValues._intrinsics_matrix - ) - assert np.isclose(projected_points, _ImageTestValues._positions_on_image_plane, atol=1e-2).all() - - -def _load_depth_image(): - # load image, convert to grayscale and then convert to numpy array - depth_map = Image.open(_ImageTestValues._depth_image_path).convert("L") - depth_map = np.array(depth_map) - return depth_map - - -def _load_depthmap(): - return np.load(_ImageTestValues._depth_map_path) - - -def test_depth_heuristic(): - # load image, convert to grayscale and then convert to numpy array - depth_map = _load_depthmap() - depths = extract_depth_from_depthmap_heuristic(_ImageTestValues._positions_on_image_plane, depth_map, mask_size=51) - assert np.isclose(depths, _ImageTestValues._depth_z_values, atol=1e-2).all() - - -def test_reproject_world_frame(): - depth_map = _load_depthmap() - reprojected_points = reproject_to_frame( - _ImageTestValues._positions_on_image_plane, - _ImageTestValues._intrinsics_matrix, - _ImageTestValues._extrinsics_matrix, - depth_map, - ) - assert np.isclose(reprojected_points, _ImageTestValues._positions_in_world_frame, atol=1e-2).all() - - -def test_reproject_camera_frame(): - depth_map = _load_depthmap() - reprojected_points = reproject_to_frame( - _ImageTestValues._positions_on_image_plane, _ImageTestValues._intrinsics_matrix, np.eye(4), depth_map - ) - assert np.isclose(reprojected_points, _ImageTestValues._positions_in_camera_frame, atol=1e-2).all()