diff --git a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py index e18975f..d9c0891 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py +++ b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py @@ -28,7 +28,7 @@ def filter_point_cloud(point_cloud: PointCloud, mask: Any) -> PointCloud: return point_cloud_filtered -def crop_point_cloud_mask(point_cloud: PointCloud, bounding_box: BoundingBox3DType) -> np.ndarray: +def generate_point_cloud_crop_mask(point_cloud: PointCloud, bounding_box: BoundingBox3DType) -> np.ndarray: """Creates a mask that can be used to filter a point cloud to the given bounding box. Args: @@ -59,5 +59,5 @@ def crop_point_cloud( Returns: the new cropped point cloud """ - crop_mask = crop_point_cloud_mask(point_cloud, bounding_box) + crop_mask = generate_point_cloud_crop_mask(point_cloud, bounding_box) return filter_point_cloud(point_cloud, crop_mask.nonzero()) diff --git a/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb b/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb index 5313daa..37de4f9 100644 --- a/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb +++ b/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb @@ -18,9 +18,18 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -28,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -38,16 +47,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calibration\t confidence_map.tiff image_left.png\tintrinsics.json\n", + "camera_pose.json depth_map.tiff image_right.png\tpointcloud.ply\n" + ] + } + ], "source": [ "!ls $data_dir" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -60,12 +78,12 @@ "from airo_dataset_tools.data_parsers.pose import Pose\n", "from airo_camera_toolkit.point_clouds.conversions import open3d_to_point_cloud, point_cloud_to_open3d\n", "from airo_camera_toolkit.point_clouds.operations import filter_point_cloud, crop_point_cloud\n", - "from airo_camera_toolkit.point_clouds.operations import crop_point_cloud_mask\n" + "from airo_camera_toolkit.point_clouds.operations import generate_point_cloud_crop_mask\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -80,9 +98,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolution: (2208, 1242)\n", + "Intrinsics: \n", + " [[1056.831 0. 1103.95 ]\n", + " [ 0. 1056.831 621.699]\n", + " [ 0. 0. 1. ]]\n", + "Extrinsics: \n", + " [[ 1. -0.001 -0.004 0.475]\n", + " [ 0.003 -0.365 0.931 -1.318]\n", + " [-0.002 -0.931 -0.365 0.9 ]\n", + " [ 0. 0. 0. 1. ]]\n" + ] + } + ], "source": [ "with open(intrinsics_path, \"r\") as f:\n", " intrinsics_model = CameraIntrinsics.model_validate_json(f.read())\n", @@ -100,9 +135,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PointCloud on CPU:0 [2742336 points (Float32)].\n", + "Attributes: colors (dtype = UInt8, shape = {2742336, 3})." + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def binarize_confidence(image: NumpyDepthMapType, threshold=50.0):\n", " confident = image <= threshold\n", @@ -140,9 +187,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confidence mask: 85.61% is True\n" + ] + } + ], "source": [ "print(f\"Confidence mask: {100.0 * confidence_mask.mean():.2f}% is True\")" ] @@ -158,9 +213,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.7 µs ± 320 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "point_cloud_to_open3d(point_cloud)" @@ -168,9 +231,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "17.3 µs ± 54.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "open3d_to_point_cloud(pcd)" @@ -185,9 +256,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "59.3 ms ± 160 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "filter_point_cloud(point_cloud, confidence_mask.nonzero()) # adding nonzero() is faster" @@ -195,9 +274,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "67.5 ms ± 204 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "filter_point_cloud(point_cloud, confidence_mask)" @@ -205,16 +292,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "np.all(point_cloud.points[confidence_mask] == point_cloud.points[confidence_mask.nonzero()])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -224,9 +322,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.95 ms ± 7.01 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "filter_point_cloud(point_cloud, confidence_mask_95_false.nonzero())" @@ -234,9 +340,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "140 ms ± 608 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "pcd.select_by_index(np.where(confidence_mask)[0])" @@ -244,9 +358,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "238 ms ± 3.37 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], "source": [ "%%timeit\n", "pcd.select_by_mask(confidence_mask)" @@ -254,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -263,9 +385,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "388 ms ± 3.16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], "source": [ "%%timeit\n", "pcd_legacy.select_by_index(np.where(confidence_mask)[0])" @@ -280,9 +410,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16.9 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "crop_point_cloud(point_cloud_filtered, bbox)" @@ -290,9 +428,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "31.1 ms ± 903 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "pcd_filtered.crop(bbox_o3d)" @@ -307,9 +453,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.04 ms ± 37.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "pcd_cropped.voxel_down_sample(voxel_size=0.01)" @@ -324,18 +478,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ - "crop_mask = crop_point_cloud_mask(point_cloud, bbox)" + "crop_mask = generate_point_cloud_crop_mask(point_cloud, bbox)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "139 µs ± 3.25 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "crop_mask & confidence_mask" @@ -343,21 +505,37 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21.1 ms ± 252 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", - "crop_mask = crop_point_cloud_mask(point_cloud, bbox)\n", + "crop_mask = generate_point_cloud_crop_mask(point_cloud, bbox)\n", "mask = crop_mask & confidence_mask\n", "filter_point_cloud(point_cloud, mask.nonzero())" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "74.9 ms ± 231 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "point_cloud_filtered = filter_point_cloud(point_cloud, confidence_mask.nonzero())\n", @@ -366,19 +544,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "25 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", - "crop_mask = crop_point_cloud_mask(point_cloud, bbox)\n", + "crop_mask = generate_point_cloud_crop_mask(point_cloud, bbox)\n", "mask = crop_mask & confidence_mask\n", "pcd.select_by_index(mask.nonzero())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -388,9 +574,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "49.8 ms ± 160 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ "%%timeit\n", "pcd2_cropped = pcd2.crop(bbox_o3d)\n", diff --git a/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb b/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb index 2f648b2..49f2c95 100644 --- a/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb +++ b/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb @@ -66,7 +66,7 @@ "from airo_camera_toolkit.point_clouds.conversions import open3d_to_point_cloud, point_cloud_to_open3d\n", "from airo_camera_toolkit.point_clouds.operations import filter_point_cloud, crop_point_cloud\n", "from airo_camera_toolkit.point_clouds.visualization import open3d_point\n", - "from airo_camera_toolkit.reprojection import project_frame_to_image_plane\n", + "from airo_camera_toolkit.pinhole_operations.projection import project_points_to_image_plane\n", "from airo_typing import PointCloudPositionsType" ] }, @@ -484,14 +484,20 @@ "metadata": {}, "outputs": [], "source": [ - "lowest_2d = project_frame_to_image_plane(\n", - " lowest,\n", + "from airo_spatial_algebra import transform_points\n", + "from airo_spatial_algebra.operations import _HomogeneousPoints\n", + "\n", + "# the points are currentlty expressed in the world frame p_W, we need p_C\n", + "# p_C = X_C_W @ p_W\n", + "X_C_W = np.linalg.inv(camera_pose)\n", + "\n", + "# lowest\n", + "lowest_2d = project_points_to_image_plane(\n", + " transform_points(X_C_W, lowest),\n", " intrinsics,\n", - " np.linalg.inv(camera_pose)\n", ").squeeze()\n", "\n", "lowest_2d_int = np.rint(lowest_2d).astype(int)\n", - "\n", "lowest_2d, lowest_2d_int" ] }, @@ -501,16 +507,14 @@ "metadata": {}, "outputs": [], "source": [ - "highest_2d = project_frame_to_image_plane(\n", - " highest,\n", + "highest_2d = project_points_to_image_plane(\n", + " transform_points(X_C_W, highest),\n", " intrinsics,\n", - " np.linalg.inv(camera_pose),\n", ").squeeze()\n", "\n", - "random_2d = project_frame_to_image_plane(\n", - " random_points,\n", + "random_2d = project_points_to_image_plane(\n", + " transform_points(X_C_W, random_points),\n", " intrinsics,\n", - " np.linalg.inv(camera_pose),\n", ")\n", "\n", "highest_2d_int = np.rint(highest_2d).astype(int)\n", @@ -523,10 +527,6 @@ "metadata": {}, "outputs": [], "source": [ - "lowest_2d_int = np.rint(lowest_2d).astype(int)\n", - "highest_2d_int = np.rint(highest_2d).astype(int)\n", - "random_2d_int = np.rint(random_2d).astype(int)\n", - "\n", "image_annotated = image_left.copy()\n", "cv2.circle(image_annotated, tuple(lowest_2d_int), 10, (0, 1.0, 0), thickness=2)\n", "cv2.circle(image_annotated, tuple(highest_2d_int), 10, (1.0, 0, 0), thickness=2)\n", @@ -575,14 +575,14 @@ } ], "metadata": { + "execution": { + "allow_errors": true, + "timeout": 300 + }, "kernelspec": { "display_name": "airo-mono", "language": "python", "name": "python3" - }, - "execution": { - "allow_errors": true, - "timeout": 300 }, "language_info": { "codemirror_mode": { diff --git a/airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb b/airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb index 8085361..ea170ca 100644 --- a/airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb +++ b/airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb @@ -26,7 +26,7 @@ }, "outputs": [], "source": [ - "from airo_camera_toolkit.cameras.zed2i import Zed2i\n", + "from airo_camera_toolkit.cameras.zed.zed2i import Zed2i\n", "import rerun as rr\n", "#autoreload\n", "%load_ext autoreload\n", diff --git a/airo-spatial-algebra/airo_spatial_algebra/operations.py b/airo-spatial-algebra/airo_spatial_algebra/operations.py index eaa629e..24282ca 100644 --- a/airo-spatial-algebra/airo_spatial_algebra/operations.py +++ b/airo-spatial-algebra/airo_spatial_algebra/operations.py @@ -9,22 +9,21 @@ """ import numpy as np -from airo_typing import HomogeneousMatrixType, Vectors3DType +from airo_typing import HomogeneousMatrixType, Vector3DArrayType, Vectors3DType class _HomogeneousPoints: + """Helper class to facilitate multiplicating 4x4 matrices with one or more 3D points. + This class internally handles the addition / removal of a dimension to the points. + """ + # TODO: extend to generic dimensions (1D,2D,3D). - # TODO: more apropaite name? this class mainly serves as 'input validation' and conversion to homogeneous coordinates def __init__(self, points: Vectors3DType): if not self.is_valid_points_type(points): raise ValueError(f"Invalid argument for {_HomogeneousPoints.__name__}.__init__ ") - if self.is_single_point(points): - self._homogeneous_points = np.concatenate([points, np.ones(1, dtype=np.float32)]) - self._homogeneous_points = self._homogeneous_points[np.newaxis, :] - else: - self._homogeneous_points = np.concatenate( - [points, np.ones((points.shape[0], 1), dtype=np.float32)], axis=1 - ) + + points = _HomogeneousPoints.ensure_array_2d(points) + self._homogeneous_points = np.concatenate([points, np.ones((points.shape[0], 1), dtype=np.float32)], axis=1) @staticmethod def is_valid_points_type(points: Vectors3DType) -> bool: @@ -37,8 +36,13 @@ def is_valid_points_type(points: Vectors3DType) -> bool: return False @staticmethod - def is_single_point(points: Vectors3DType) -> bool: - return len(points.shape) == 1 + def ensure_array_2d(points: Vectors3DType) -> Vector3DArrayType: + """If points is a single shape (3,) point, then it is reshaped to (1,3).""" + if len(points.shape) == 1: + if len(points) != 3: + raise ValueError("points has only one dimension, but it's length is not 3") + points = points.reshape((1, 3)) + return points @property def homogeneous_points(self) -> np.ndarray: @@ -51,7 +55,6 @@ def points(self) -> Vectors3DType: # normalize points (for safety, should never be necessary with affine transforms) # but we've had bugs of this type with projection operations, so better safe than sorry? scalars = self._homogeneous_points[:, 3][:, np.newaxis] - print(scalars) points = self.homogeneous_points[:, :3] / scalars # TODO: if the original poitns was (1,3) matrix, then the resulting points would be a (3,) vector. # Is this desirable? and if not, how to avoid it?