diff --git a/examples/Gen2/python_notebooks/Tutorial_1_vrs_data_provider_basics.ipynb b/examples/Gen2/python_notebooks/Tutorial_1_vrs_data_provider_basics.ipynb index d44c6dc84..7d0e6366f 100644 --- a/examples/Gen2/python_notebooks/Tutorial_1_vrs_data_provider_basics.ipynb +++ b/examples/Gen2/python_notebooks/Tutorial_1_vrs_data_provider_basics.ipynb @@ -167,7 +167,7 @@ "\n", " # Process image data\n", " if image_data.is_valid():\n", - " rr.set_time(\"device_time\", duration = timestamp_ns * 1e-9)\n", + " rr.set_time_nanos(\"device_time\", timestamp_ns)\n", " rr.log(\"camera_rgb\", rr.Image(image_data.to_numpy_array()))\n", "\n", "rr.notebook_show()\n" @@ -267,21 +267,13 @@ "\n", " # Plot to ReRun\n", " if image_data.is_valid():\n", - " rr.set_time(\"device_time\", duration = capture_time_ns * 1e-9)\n", + " rr.set_time_nanos(\"device_time\", capture_time_ns)\n", " rr.log(label, rr.Image(image_data.to_numpy_array()))\n", "\n", " query_timestamp_ns = query_timestamp_ns + int(1e9) # 1 second\n", "\n", "rr.notebook_show()\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1ab571d-018f-4047-b857-7269463ec8fa", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/Gen2/python_notebooks/Tutorial_2_device_calibration.ipynb b/examples/Gen2/python_notebooks/Tutorial_2_device_calibration.ipynb index 05752bb66..50f183eda 100644 --- a/examples/Gen2/python_notebooks/Tutorial_2_device_calibration.ipynb +++ b/examples/Gen2/python_notebooks/Tutorial_2_device_calibration.ipynb @@ -334,7 +334,7 @@ "\n", " # Plot original RGB image\n", " timestamp_ns = image_record.capture_timestamp_ns\n", - " rr.set_time(\"device_time\", duration=timestamp_ns * 1e-9)\n", + " rr.set_time_nanos(\"device_time\", timestamp_ns)\n", " rr.log(\"camera_rgb\", rr.Image(image_data.to_numpy_array()))\n", "\n", " # Undistort RGB image to a linear camera model\n", @@ -382,32 +382,32 @@ " \"\"\"\n", " rr.log(\n", " f\"{rerun_plot_label}/accl/x[m/sec2]\",\n", - " rr.SeriesLines(colors=[230, 25, 75], names=\"accel/x[m/sec2]\"),\n", + " rr.SeriesLine(color=[230, 25, 75], name=\"accel/x[m/sec2]\"),\n", " static=True,\n", " ) # Red\n", " rr.log(\n", " f\"{rerun_plot_label}/accl/y[m/sec2]\",\n", - " rr.SeriesLines(colors=[60, 180, 75], names=\"accel/y[m/sec2]\"),\n", + " rr.SeriesLine(color=[60, 180, 75], name=\"accel/y[m/sec2]\"),\n", " static=True,\n", " ) # Green\n", " rr.log(\n", " f\"{rerun_plot_label}/accl/z[m/sec2]\",\n", - " rr.SeriesLines(colors=[0, 130, 200], names=\"accel/z[m/sec2]\"),\n", + " rr.SeriesLine(color=[0, 130, 200], name=\"accel/z[m/sec2]\"),\n", " static=True,\n", " ) # Blue\n", " rr.log(\n", " f\"{rerun_plot_label}/gyro/x[rad/sec2]\",\n", - " rr.SeriesLines(colors=[245, 130, 48], names=\"gyro/x[rad/sec2]\"),\n", + " rr.SeriesLine(color=[245, 130, 48], name=\"gyro/x[rad/sec2]\"),\n", " static=True,\n", " ) # Orange\n", " rr.log(\n", " f\"{rerun_plot_label}/gyro/y[rad/sec2]\",\n", - " rr.SeriesLines(colors=[145, 30, 180], names=\"gyro/y[rad/sec2]\"),\n", + " rr.SeriesLine(color=[145, 30, 180], name=\"gyro/y[rad/sec2]\"),\n", " static=True,\n", " ) # Purple\n", " rr.log(\n", " f\"{rerun_plot_label}/gyro/z[rad/sec2]\",\n", - " rr.SeriesLines(colors=[70, 240, 240], names=\"gyro/z[rad/sec2]\"),\n", + " rr.SeriesLine(color=[70, 240, 240], name=\"gyro/z[rad/sec2]\"),\n", " static=True,\n", " ) # Cyan\n", "\n", @@ -418,27 +418,27 @@ " \"\"\"\n", " rr.log(\n", " f\"{rerun_plot_label}/accl/x[m/sec2]\",\n", - " rr.Scalars(accel_data[0]),\n", + " rr.Scalar(accel_data[0]),\n", " )\n", " rr.log(\n", " f\"{rerun_plot_label}/accl/y[m/sec2]\",\n", - " rr.Scalars(accel_data[1]),\n", + " rr.Scalar(accel_data[1]),\n", " )\n", " rr.log(\n", " f\"{rerun_plot_label}/accl/z[m/sec2]\",\n", - " rr.Scalars(accel_data[2]),\n", + " rr.Scalar(accel_data[2]),\n", " )\n", " rr.log(\n", " f\"{rerun_plot_label}/gyro/x[rad/sec2]\",\n", - " rr.Scalars(gyro_data[0]),\n", + " rr.Scalar(gyro_data[0]),\n", " )\n", " rr.log(\n", " f\"{rerun_plot_label}/gyro/y[rad/sec2]\",\n", - " rr.Scalars(gyro_data[1]),\n", + " rr.Scalar(gyro_data[1]),\n", " )\n", " rr.log(\n", " f\"{rerun_plot_label}/gyro/z[rad/sec2]\",\n", - " rr.Scalars(gyro_data[2]),\n", + " rr.Scalar(gyro_data[2]),\n", " )\n", "\n", "\n", @@ -464,7 +464,7 @@ " imu_data = vrs_data_provider.get_imu_data_by_index(imu_stream_id, i)\n", "\n", " # Plot raw IMU readings\n", - " rr.set_time(\"device_time\", duration=imu_data.capture_timestamp_ns * 1e-9)\n", + " rr.set_time_nanos(\"device_time\", imu_data.capture_timestamp_ns)\n", "\n", " # Get compensated imu data\n", " compensated_accel = imu_calib.raw_to_rectified_accel(imu_data.accel_msec2)\n", @@ -574,12 +574,6 @@ "\n", "rr.notebook_show()" ] - }, - { - "cell_type": "markdown", - "id": "156692c7", - "metadata": {}, - "source": [] } ], "metadata": { diff --git a/examples/Gen2/python_notebooks/Tutorial_3_sequential_access_multi_sensor_data.ipynb b/examples/Gen2/python_notebooks/Tutorial_3_sequential_access_multi_sensor_data.ipynb index 7e6f11845..6ee0c2b85 100644 --- a/examples/Gen2/python_notebooks/Tutorial_3_sequential_access_multi_sensor_data.ipynb +++ b/examples/Gen2/python_notebooks/Tutorial_3_sequential_access_multi_sensor_data.ipynb @@ -232,7 +232,7 @@ " image_data_and_record = sensor_data.image_data_and_record()\n", "\n", " # Visualize\n", - " rr.set_time(\"device_time\", duration = device_time_ns * 1e-9)\n", + " rr.set_time_nanos(\"device_time\", device_time_ns)\n", " rr.log(stream_label, rr.Image(image_data_and_record[0].to_numpy_array()))\n", "\n", "rr.notebook_show()" diff --git a/examples/Gen2/python_notebooks/Tutorial_4_on_device_eyetracking_handtracking.ipynb b/examples/Gen2/python_notebooks/Tutorial_4_on_device_eyetracking_handtracking.ipynb new file mode 100644 index 000000000..2bbbba0c9 --- /dev/null +++ b/examples/Gen2/python_notebooks/Tutorial_4_on_device_eyetracking_handtracking.ipynb @@ -0,0 +1,615 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5567afff", + "metadata": {}, + "source": [ + "# Tutorial 4: On-Device Eye-tracking and Hand-tracking data streams\n", + "\n", + "## Introduction\n", + "\n", + "In Aria-Gen2 glasses, one of the key upgrade from Aria-Gen1 is the capability to run Machine Perception (MP) algorithms on the device during streaming / recording. Currently supported on-device MP algorithms include Eye-tracking, Hand-tracking, and VIO. These algorithm results are stored as separate data streams in the VRS file. \n", + "\n", + "This tutorial focuses on demonstration of how to use the **Eye-tracking and Hand-tracking** results. \n", + "\n", + "**What you'll learn:**\n", + "\n", + "- How to access on-device EyeGaze and HandTracking data from VRS files\n", + "- Understanding the concept of interpolated hand tracking and why interpolation\n", + " is needed\n", + "- How to visualize EyeGaze and HandTracking data projected onto 2D camera images\n", + " using DeviceCalibration\n", + "- How to match MP data with camera frames using timestamps\n", + "\n", + "**Prerequisites**\n", + "- Complete Tutorial 1 (VrsDataProvider Basics) to understand basic data provider concepts\n", + "- Complete Tutorial 2 (Device Calibration) to understand how to properly use calibration in Aria data. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79330336", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core import data_provider\n", + "\n", + "# Load local VRS file\n", + "vrs_file_path = \"path/to/your/recording.vrs\"\n", + "vrs_data_provider = data_provider.create_vrs_data_provider(vrs_file_path)\n", + "\n", + "\n", + "# Query EyeGaze data streams\n", + "eyegaze_label = \"eyegaze\"\n", + "eyegaze_stream_id = vrs_data_provider.get_stream_id_from_label(eyegaze_label)\n", + "if eyegaze_stream_id is None:\n", + " raise RuntimeError(\n", + " f\"{eyegaze_label} data stream does not exist! Please use a VRS that contains valid eyegaze data for this tutorial.\"\n", + " )\n", + "\n", + "# Query HandTracking data streams\n", + "handtracking_label = \"handtracking\"\n", + "handtracking_stream_id = vrs_data_provider.get_stream_id_from_label(handtracking_label)\n", + "if handtracking_stream_id is None:\n", + " raise RuntimeError(\n", + " f\"{handtracking_label} data stream does not exist! Please use a VRS that contains valid handtracking data for this tutorial.\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "506003eb", + "metadata": {}, + "source": [ + "## On-Device Eye-tracking results\n", + "### EyeGaze Data Structure\n", + "\n", + "The EyeGaze data type represents on-device eye tracking results. \n", + "**Importantly, it directly reuses [the EyeGaze data structure](https://github.com/facebookresearch/projectaria_tools/blob/main/core/mps/EyeGaze.h)\n", + " from MPS (Machine Perception Services)**, providing guaranteed compatibility across VRS and MPS.\n", + "\n", + "**Key `EyeGaze` fields**\n", + "\n", + "| Field Name | Description |\n", + "| :---------------------------- | :---------------------------------------------------------------------- |\n", + "| `session_uid` | Unique ID for the eyetracking session |\n", + "| `tracking_timestamp` | Timestamp of the eye tracking camera frame in device time domain, in us. |\n", + "| `vergence.t[x,y,z]_[left,right]_eye` | Translation for each eye origin in CPF frame |\n", + "| `yaw`,`vergence.[left,right]_yaw` | Eye gaze yaw angle (horizontal) in radians in CPF frame |\n", + "| `pitch`,`vergence.[left,right]_pitch`(Gen2-only) | Eye gaze pitch angle (vertical) in radians in CPF frame. The left and right pitch are assumed to be the same in Aria-Gen1. |\n", + "| `depth` | Depth in meters of the 3D eye gaze point in CPF frame (0 = unavailable) |\n", + "| `yaw_low`,`yaw_high`,`pitch_low`,`pitch_high` | Confidence interval bounds for yaw and pitch angle |\n", + "| **Aria-Gen2 specific fields** | \n", + "| `combined_gaze_origin_in_cpf` | Combined gaze origin in CPF frame (Gen2 only) |\n", + "| `spatial_gaze_point_in_cpf` | 3D spatial gaze point in CPF frame |\n", + "| `vergence.[left,right]_entrance_pupil_position_meter` | Entrance pupil positions for each eye |\n", + "| `vergence.[left,right]_pupil_diameter_meter` | Entrance pupil diameter for each eye |\n", + "| `vergence.[left,right]_blink` | Blink detection for left and right eyes |\n", + "| `*_valid` | Boolean flags to indicating if the corresponding data field in EyeGaze is valid |\n", + "\n", + "\n", + "\n", + "\n", + "### EyeGaze API Reference\n", + "In `vrs_data_provider`, EyeGaze is treated the same way as any other sensor data, and share similar query APIs covered in `Tutorial_1_vrs_data_provider_basics`: \n", + "- `vrs_data_provider.get_eye_gaze_data_by_index(stream_id, index)`: Query by index. \n", + "- `vrs_data_provider.get_eye_gaze_data_by_time_ns(stream_id, timestamp, time_domain, query_options)`: Query by timestamp. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50413a12", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core.mps import get_unit_vector_from_yaw_pitch\n", + "from datetime import timedelta\n", + "\n", + "print(\"=== EyeGaze Data Sample ===\")\n", + "num_eyegaze_samples = vrs_data_provider.get_num_data(eyegaze_stream_id)\n", + "selected_index = min(5, num_eyegaze_samples)\n", + "print(f\"Sample {selected_index}:\")\n", + "\n", + "eyegaze_data = vrs_data_provider.get_eye_gaze_data_by_index(eyegaze_stream_id, selected_index)\n", + "\n", + "# Eyegaze timestamp is in format of datetime.deltatime in microseconds, convert it to integer\n", + "eyegaze_timestamp_ns = (eyegaze_data.tracking_timestamp // timedelta(microseconds=1)) * 1000\n", + "print(f\" Tracking timestamp: {eyegaze_timestamp_ns}\")\n", + "\n", + "# check if combined gaze is valid, if so, print out the gaze direction\n", + "print(f\" Combined gaze valid: {eyegaze_data.combined_gaze_valid}\")\n", + "if eyegaze_data.combined_gaze_valid:\n", + " print(f\" Yaw: {eyegaze_data.yaw:.3f} rad\")\n", + " print(f\" Pitch: {eyegaze_data.pitch:.3f} rad\")\n", + " print(f\" Depth: {eyegaze_data.depth:.3f} m\")\n", + " # Can also print gaze direction in unit vector\n", + " gaze_direction_in_unit_vec = get_unit_vector_from_yaw_pitch(eyegaze_data.yaw, eyegaze_data.pitch)\n", + " print(f\" Gaze direction in unit vec [xyz]: {gaze_direction_in_unit_vec}\")\n", + "\n", + "# Check if spatial gaze point is valid, if so, print out the spatial gaze point\n", + "print(\n", + " f\" Spatial gaze point valid: {eyegaze_data.spatial_gaze_point_valid}\"\n", + ")\n", + "if eyegaze_data.spatial_gaze_point_valid:\n", + " print(\n", + " f\" Spatial gaze point in CPF: {eyegaze_data.spatial_gaze_point_in_cpf}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "a3fe8239", + "metadata": {}, + "source": [ + "### EyeGaze visualization in camera images\n", + "To visualize EyeGaze in camera images, you just need to project eye tracking results into the camera images using the camera's calibration. But please note the coordinate frame difference, entailed below. \n", + "\n", + "**EyeGaze Coordinate System - Central Pupil Frame (CPF)** \n", + "\n", + "All Eyetracking results in Aria are stored in a reference coordinates system called **Central Pupil Frame (`CPF`)**, which is approximately the center of user's two eye positions. Note that this **`CPF` frame is DIFFERENT from the `Device` frame in device calibration**, where the latter is essentially the `slam-front-left` (for Gen2) or `camera-slam-left` (for Gen1) camera. To transform between `CPF` and `Device`, we provide the following API to query their relative pose, and see the following code cell for usage: \n", + "```\n", + "device_calibration.get_transform_device_cpf()\n", + "``` " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2f27dbd", + "metadata": {}, + "outputs": [], + "source": [ + "import rerun as rr\n", + "from projectaria_tools.core.sensor_data import SensorDataType, TimeDomain, TimeQueryOptions\n", + "from projectaria_tools.utils.rerun_helpers import create_hand_skeleton_from_landmarks\n", + "\n", + "def plot_eyegaze_in_camera(eyegaze_data, camera_label, camera_calib, T_device_cpf):\n", + " \"\"\"\n", + " A helper function to plot eyegaze's spatial gaze point into a camera image\n", + " \"\"\"\n", + " # Skip if eyegaze data is invalid\n", + " if not (\n", + " eyegaze_data.spatial_gaze_point_valid and eyegaze_data.combined_gaze_valid\n", + " ):\n", + " return\n", + "\n", + " # First, transform spatial gaze point from CPF -> Device -> Camera frame\n", + " spatial_gaze_point_in_cpf = eyegaze_data.spatial_gaze_point_in_cpf\n", + " spatial_gaze_point_in_device = T_device_cpf @ spatial_gaze_point_in_cpf\n", + " spatial_gaze_point_in_camera = (\n", + " camera_calib.get_transform_device_camera().inverse()\n", + " @ spatial_gaze_point_in_device\n", + " )\n", + "\n", + " # Project into camera and plot 2D gaze location\n", + " maybe_pixel = camera_calib.project(spatial_gaze_point_in_camera)\n", + " if maybe_pixel is not None:\n", + " rr.log(\n", + " f\"{camera_label}\",\n", + " rr.Points2D(\n", + " positions=[maybe_pixel],\n", + " colors=[255, 64, 255],\n", + " radii = [30.0]\n", + " ),\n", + " )\n", + "\n", + "print(\"\\n=== Visualizing on-device eye tracking in camera images ===\")\n", + "\n", + "# First, query the RGB camera stream ids\n", + "device_calib = vrs_data_provider.get_device_calibration()\n", + "T_device_cpf = device_calib.get_transform_device_cpf()\n", + "\n", + "rgb_camera_label = \"camera-rgb\"\n", + "rgb_stream_id = vrs_data_provider.get_stream_id_from_label(rgb_camera_label)\n", + "rgb_camera_calib = device_calib.get_camera_calib(rgb_camera_label)\n", + "\n", + "rr.init(\"rerun_viz_et_in_cameras\")\n", + "\n", + "# Set up a sensor queue with only RGB image + EyeGaze\n", + "deliver_options = vrs_data_provider.get_default_deliver_queued_options()\n", + "deliver_options.deactivate_stream_all()\n", + "deliver_options.activate_stream(rgb_stream_id)\n", + "deliver_options.activate_stream(eyegaze_stream_id)\n", + "\n", + "# Play for only 3 seconds\n", + "total_length_ns = vrs_data_provider.get_last_time_ns_all_streams(TimeDomain.DEVICE_TIME) - vrs_data_provider.get_first_time_ns_all_streams(TimeDomain.DEVICE_TIME)\n", + "skip_begin_ns = int(15 * 1e9) # Skip 15 seconds\n", + "duration_ns = int(3 * 1e9) # 3 seconds\n", + "skip_end_ns = max(total_length_ns - skip_begin_ns - duration_ns, 0)\n", + "deliver_options.set_truncate_first_device_time_ns(skip_begin_ns)\n", + "deliver_options.set_truncate_last_device_time_ns(skip_end_ns)\n", + "\n", + "# Plot image data, and plot EyeGaze on top of RGB image data\n", + "for sensor_data in vrs_data_provider.deliver_queued_sensor_data(deliver_options):\n", + " stream_id = sensor_data.stream_id()\n", + " data_type = sensor_data.sensor_data_type()\n", + "\n", + " # ---------------\n", + " # Image data: plot RGB images. \n", + " # ---------------\n", + " if data_type == SensorDataType.IMAGE:\n", + " # Convert back to image data, and plot in ReRun\n", + " device_time_ns = sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)\n", + " image_data_and_record = sensor_data.image_data_and_record()\n", + "\n", + " # Visualize the images\n", + " rr.set_time_nanos(\"device_time\", device_time_ns)\n", + " rr.log(rgb_camera_label, rr.Image(image_data_and_record[0].to_numpy_array()))\n", + "\n", + " # ---------------\n", + " # Eye gaze data: plot EyeGaze's projection into camera images\n", + " # ---------------\n", + " elif data_type == SensorDataType.EYE_GAZE:\n", + " device_time_ns = sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)\n", + " eye_gaze = sensor_data.eye_gaze_data()\n", + "\n", + " # Plot Eyegaze overlay on top of camera images\n", + " rr.set_time_nanos(\"device_time\", device_time_ns)\n", + " plot_eyegaze_in_camera(eyegaze_data = eye_gaze, camera_label = rgb_camera_label, camera_calib = rgb_camera_calib, T_device_cpf = T_device_cpf)\n", + "\n", + "\n", + "rr.notebook_show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "71790f5c", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "## On-Device Hand-tracking results\n", + "### Handtracking Data Structure\n", + "HandTracking data contains comprehensive 3D hand pose information. \n", + "**Importantly, it directly reuses the [HandTrackingResults data structure](https://github.com/facebookresearch/projectaria_tools/blob/main/core/mps/HandTracking.h) from MPS (Machine Perception\n", + "Services)**, providing guaranteed compatibility across VRS and MPS.\n", + "\n", + "**Key `EyeGaze` fields**\n", + "\n", + "**Key Fields in `HandTrackingResults`**\n", + "| Field Name | Description |\n", + "| -------------------- | ----------------------------------------------------------------------- |\n", + "| `tracking_timestamp` | Timestamp of the hand-tracking estimate in the device time domain. |\n", + "| `left_hand` | Left-hand pose, or `None` if no valid pose is found for the timestamp. |\n", + "| `right_hand` | Right-hand pose, or `None` if no valid pose is found for the timestamp. |\n", + "\n", + "**Single Hand fields (left or right):**\n", + "| Field Name | Description |\n", + "| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n", + "| `confidence` | Tracking confidence score for this hand. |\n", + "| `landmark_positions_device` | List of 21 hand-landmark positions in the device frame (3D points).
See the [wiki page](https://facebookresearch.github.io/projectaria_tools/docs/data_formats/mps/hand_tracking#hand_tracking_resultscsv) for landmark definitions. |\n", + "| `transform_device_wrist` | Full SE3 transform of the wrist in the `Device` frame. |\n", + "| `wrist_and_palm_normal_device` | Normal vectors for the wrist and palm joints in the `Device` frame. \n", + "\n", + "### Handtracking Coordinate System\n", + "All Handtracking results in Aria are stored in the `Device` coordinate frame, which is the same as device calibration. See `Tutorial_2_device_calibration` for definition of `Device` frame. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e38590a", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def print_single_hand_information(single_hand):\n", + " \"\"\"\n", + " A helper function to print the hand tracking result of one hand\n", + " \"\"\"\n", + " print(f\" Confidence: {single_hand.confidence:.3f}\")\n", + " print(\n", + " f\" Landmarks shape: {np.array(single_hand.landmark_positions_device).shape}\"\n", + " )\n", + " print(\n", + " f\" Wrist location: {single_hand.get_wrist_position_device()}\"\n", + " )\n", + " print(\n", + " f\" Palm location: {single_hand.get_wrist_position_device()}\"\n", + " )\n", + "\n", + "print(\"=== HandTracking Data Sample ===\")\n", + "num_handtracking_samples = vrs_data_provider.get_num_data(handtracking_stream_id)\n", + "selected_index = min(5, num_handtracking_samples)\n", + "hand_data = vrs_data_provider.get_hand_pose_data_by_index(\n", + " handtracking_stream_id, selected_index\n", + ")\n", + "\n", + "print(f\"Sample {selected_index}:\")\n", + "print(f\" Tracking timestamp: {hand_data.tracking_timestamp}\")\n", + "\n", + "# Print the content of left and right hand if valid\n", + "if hand_data.left_hand is not None:\n", + " print(\" Left hand detected\")\n", + " print_single_hand_information(hand_data.left_hand)\n", + "else:\n", + " print(\" Left hand: Not detected\")\n", + "\n", + "if hand_data.right_hand is not None:\n", + " print(\" Right hand detected\")\n", + " print_single_hand_information(hand_data.right_hand)\n", + "else:\n", + " print(\" Right hand: Not detected\")" + ] + }, + { + "cell_type": "markdown", + "id": "43616cd2", + "metadata": {}, + "source": [ + "### Interpolated Hand-tracking Results\n", + "**Context:**\n", + "\n", + "In Aria-Gen2 glasses, **the on-device hand-tracking data are calculated from the SLAM cameras, not RGB cameras**. \n", + "In the mean time, the SLAM cameras and RGB camera often runs at different sampling frequency, and their triggering are not aligned either. \n", + "This causes that the handtracking result's timestamp often do NOT line up with that of RGB camera, causing additional challenges in accurately visualize handtracking results in RGB images. \n", + "\n", + "**API to query interpolated handtracking results**\n", + "\n", + "To resolve this, `vrs_data_provider` enables a special query API for handtracking results: \n", + "```\n", + "vrs_data_provider.get_interpolated_hand_pose_data(stream_id, timestamp_ns)\n", + "```\n", + "which will return an interpolated handtracking results, given any timestamp within valid timestamps of the VRS file. \n", + "\n", + "**Handtracking Interpolation Implementation**\n", + "\n", + "1. Find the 2 nearest hand-tracking results before and after the target timestamp. \n", + "2. If the 2 hand-tracking results time delta is larger than 100 ms, interpolation is considered unreliable → return `None`. \n", + "3. Otherwise, interpolate each hand separately: \n", + " a. For the left or right hand, perform interpolation **only if both the \"before\" and \"after\" samples contain a valid result for that hand**. \n", + " b. If either sample is missing, the interpolated result for that hand will be `None`. Example: \n", + " ```text\n", + " interpolate(\n", + " before = [left = valid, right = None],\n", + " after = [left = valid, right = valid]\n", + " )\n", + " → result = [left = interpolated, right = None]\n", + " ```\n", + "4. Single-hand interpolation is calculated as: \n", + " a. Apply linear interpolation on the 3D hand landmark positions. \n", + " b. Apply SE3 interpolation on `T_Device_Wrist` 3D pose. \n", + " c. Re-calculate the wrist and palm normal vectors. \n", + " d. Take the `min` of confidence values. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ee4d486", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core.sensor_data import SensorDataType, TimeDomain, TimeQueryOptions\n", + "from datetime import timedelta\n", + "\n", + "print(\"\\n=== Demonstrating query interpolated hand tracking results ===\")\n", + "\n", + "# Demonstrate how to query interpolated handtracking results\n", + "slam_stream_id = vrs_data_provider.get_stream_id_from_label(\"slam-front-left\")\n", + "rgb_stream_id = vrs_data_provider.get_stream_id_from_label(\"camera-rgb\")\n", + "\n", + "# Retrieve a SLAM frame, use its timestamp as query\n", + "slam_sample_index = min(10, vrs_data_provider.get_num_data(slam_stream_id) - 1)\n", + "slam_data_and_record = vrs_data_provider.get_image_data_by_index(slam_stream_id, slam_sample_index)\n", + "slam_timestamp_ns = slam_data_and_record[1].capture_timestamp_ns\n", + "\n", + "# Retrieve the closest RGB frame\n", + "rgb_data_and_record = vrs_data_provider.get_image_data_by_time_ns(\n", + " rgb_stream_id, slam_timestamp_ns, TimeDomain.DEVICE_TIME, TimeQueryOptions.CLOSEST\n", + ")\n", + "rgb_timestamp_ns = rgb_data_and_record[1].capture_timestamp_ns\n", + "\n", + "# Retrieve the closest hand tracking data sample\n", + "raw_ht_data = vrs_data_provider.get_hand_pose_data_by_time_ns(\n", + " handtracking_stream_id, slam_timestamp_ns, TimeDomain.DEVICE_TIME, TimeQueryOptions.CLOSEST\n", + ")\n", + "raw_ht_timestamp_ns = (raw_ht_data.tracking_timestamp // timedelta(microseconds=1)) * 1000\n", + "\n", + "# Check if hand tracking aligns with RGB or SLAM data\n", + "print(f\"SLAM timestamp: {slam_timestamp_ns}\")\n", + "print(f\"RGB timestamp: {rgb_timestamp_ns}\")\n", + "print(f\"hand tracking timestamp: {raw_ht_timestamp_ns}\")\n", + "print(f\"hand tracking-SLAM time diff: {abs(raw_ht_timestamp_ns - slam_timestamp_ns) / 1e6:.2f} ms\")\n", + "print(f\"hand tracking- RGB time diff: {abs(raw_ht_timestamp_ns - rgb_timestamp_ns) / 1e6:.2f} ms\")\n", + "\n", + "# Now, query interpolated hand tracking data sample using RGB timestamp.\n", + "interpolated_ht_data = vrs_data_provider.get_interpolated_hand_pose_data(\n", + " handtracking_stream_id, rgb_timestamp_ns\n", + ")\n", + "\n", + "# Check that interpolated hand tracking now aligns with RGB data\n", + "if interpolated_ht_data is not None:\n", + " interpolated_ht_timestamp_ns = (interpolated_ht_data.tracking_timestamp// timedelta(microseconds=1)) * 1000\n", + " print(f\"Interpolated hand tracking timestamp: {interpolated_ht_timestamp_ns}\")\n", + " print(f\"Interpolated hand tracking-RGB time diff: {abs(interpolated_ht_timestamp_ns - rgb_timestamp_ns) / 1e6:.2f} ms\")\n", + "else:\n", + " print(\"Interpolated hand tracking data is None - interpolation failed\")" + ] + }, + { + "cell_type": "markdown", + "id": "09ff7d13", + "metadata": {}, + "source": [ + "### Visualize Hand-tracking Results in Cameras\n", + "In this section, we show some example code on how to visualize the hand-tracking results in SLAM and RGB camera images. \n", + "Basically, you need to project the hand tracking results (landmarks, skeleton lines) into the camera images using the camera's calibration. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "768f3816", + "metadata": {}, + "outputs": [], + "source": [ + "import rerun as rr\n", + "from projectaria_tools.core.sensor_data import SensorDataType, TimeDomain, TimeQueryOptions\n", + "from projectaria_tools.utils.rerun_helpers import create_hand_skeleton_from_landmarks\n", + "\n", + "def plot_single_hand_in_camera(hand_joints_in_device, camera_label, camera_calib, hand_label):\n", + " \"\"\"\n", + " A helper function to plot a single hand data in 2D camera view\n", + " \"\"\"\n", + " # Setting different marker plot sizes for RGB and SLAM since they have different resolutions\n", + " plot_ratio = 3.0 if camera_label == \"camera-rgb\" else 1.0\n", + " marker_color = [255,64,0] if hand_label == \"left\" else [255, 255, 0]\n", + "\n", + " # project into camera frame, and also create line segments\n", + " hand_joints_in_camera = []\n", + " for pt_in_device in hand_joints_in_device:\n", + " pt_in_camera = (\n", + " camera_calib.get_transform_device_camera().inverse() @ pt_in_device\n", + " )\n", + " pixel = camera_calib.project(pt_in_camera)\n", + " hand_joints_in_camera.append(pixel)\n", + "\n", + " # Create hand skeleton in 2D image space\n", + " hand_skeleton = create_hand_skeleton_from_landmarks(hand_joints_in_camera)\n", + "\n", + " # Remove \"None\" markers from hand joints in camera. This is intentionally done AFTER the hand skeleton creation\n", + " hand_joints_in_camera = list(\n", + " filter(lambda x: x is not None, hand_joints_in_camera)\n", + " )\n", + "\n", + " rr.log(\n", + " f\"{camera_label}/{hand_label}/landmarks\",\n", + " rr.Points2D(\n", + " positions=hand_joints_in_camera,\n", + " colors= marker_color,\n", + " radii= [3.0 * plot_ratio]\n", + " ),\n", + " )\n", + " rr.log(\n", + " f\"{camera_label}/{hand_label}/skeleton\",\n", + " rr.LineStrips2D(\n", + " hand_skeleton,\n", + " colors=[0, 255, 0],\n", + " radii= [0.5 * plot_ratio],\n", + " ),\n", + " )\n", + "\n", + "def plot_handpose_in_camera(hand_pose, camera_label, camera_calib):\n", + " \"\"\"\n", + " A helper function to plot hand tracking results into a camera image\n", + " \"\"\"\n", + " # Clear the canvas first\n", + " #rr.log(\n", + " # f\"{camera_label}/handtracking\",\n", + " # rr.Clear.recursive(),\n", + " #)\n", + "\n", + " # Plot both hands\n", + " if hand_pose.left_hand is not None:\n", + " plot_single_hand_in_camera(\n", + " hand_joints_in_device=hand_pose.left_hand.landmark_positions_device,\n", + " camera_label=camera_label,\n", + " camera_calib = camera_calib,\n", + " hand_label=\"left\")\n", + " if hand_pose.right_hand is not None:\n", + " plot_single_hand_in_camera(\n", + " hand_joints_in_device=hand_pose.right_hand.landmark_positions_device,\n", + " camera_label=camera_label,\n", + " camera_calib = camera_calib,\n", + " hand_label=\"right\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45463b47", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n=== Visualizing on-device hand tracking in camera images ===\")\n", + "\n", + "# First, query the RGB camera stream id\n", + "device_calib = vrs_data_provider.get_device_calibration()\n", + "rgb_camera_label = \"camera-rgb\"\n", + "slam_camera_labels = [\"slam-front-left\", \"slam-front-right\", \"slam-side-left\", \"slam-side-right\"]\n", + "rgb_stream_id = vrs_data_provider.get_stream_id_from_label(rgb_camera_label)\n", + "slam_stream_ids = [vrs_data_provider.get_stream_id_from_label(label) for label in slam_camera_labels]\n", + "\n", + "rr.init(\"rerun_viz_ht_in_cameras\")\n", + "\n", + "# Set up a sensor queue with only RGB images.\n", + "# Handtracking data will be queried with interpolated API.\n", + "deliver_options = vrs_data_provider.get_default_deliver_queued_options()\n", + "deliver_options.deactivate_stream_all()\n", + "for stream_id in slam_stream_ids + [rgb_stream_id]:\n", + " deliver_options.activate_stream(stream_id)\n", + "\n", + "# Play for only 3 seconds\n", + "total_length_ns = vrs_data_provider.get_last_time_ns_all_streams(TimeDomain.DEVICE_TIME) - vrs_data_provider.get_first_time_ns_all_streams(TimeDomain.DEVICE_TIME)\n", + "skip_begin_ns = int(15 * 1e9) # Skip 15 seconds\n", + "duration_ns = int(3 * 1e9) # 3 seconds\n", + "skip_end_ns = max(total_length_ns - skip_begin_ns - duration_ns, 0)\n", + "deliver_options.set_truncate_first_device_time_ns(skip_begin_ns)\n", + "deliver_options.set_truncate_last_device_time_ns(skip_end_ns)\n", + "\n", + "# Plot image data, and overlay hand tracking data\n", + "for sensor_data in vrs_data_provider.deliver_queued_sensor_data(deliver_options):\n", + " # ---------------\n", + " # Only image data will be obtained.\n", + " # ---------------\n", + " device_time_ns = sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)\n", + " image_data_and_record = sensor_data.image_data_and_record()\n", + " stream_id = sensor_data.stream_id()\n", + " camera_label = vrs_data_provider.get_label_from_stream_id(stream_id)\n", + " camera_calib = device_calib.get_camera_calib(camera_label)\n", + " \n", + "\n", + " # Visualize the RGB images.\n", + " rr.set_time_nanos(\"device_time\", device_time_ns)\n", + " rr.log(f\"{camera_label}\", rr.Image(image_data_and_record[0].to_numpy_array()))\n", + " \n", + " # Query and plot interpolated hand tracking result\n", + " interpolated_hand_pose = vrs_data_provider.get_interpolated_hand_pose_data(handtracking_stream_id, device_time_ns, TimeDomain.DEVICE_TIME)\n", + " if interpolated_hand_pose is not None:\n", + " plot_handpose_in_camera(hand_pose = interpolated_hand_pose, camera_label = camera_label, camera_calib = camera_calib)\n", + "\n", + "# Wait for rerun to buffer 1 second of data\n", + "import time\n", + "time.sleep(1)\n", + "\n", + "rr.notebook_show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c220f6d3-0638-4ae1-a95b-dcfe8a1356c2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/Gen2/python_notebooks/Tutorial_5_on_device_vio.ipynb b/examples/Gen2/python_notebooks/Tutorial_5_on_device_vio.ipynb new file mode 100644 index 000000000..a26daaad0 --- /dev/null +++ b/examples/Gen2/python_notebooks/Tutorial_5_on_device_vio.ipynb @@ -0,0 +1,425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4f4737c1", + "metadata": {}, + "source": [ + "# Tutorial 5: On-Device VIO data streams\n", + "\n", + "## Introduction\n", + "\n", + "In Aria-Gen2 glasses, one of the key upgrade from Aria-Gen1 is the capability to run Machine Perception (MP) algorithms on the device during streaming / recording. Currently supported on-device MP algorithms include Eye-tracking, Hand-tracking, and VIO. These algorithm results are stored as separate data streams in the VRS file. \n", + "\n", + "**VIO (Visual Inertial Odometry)** combines camera images and IMU (Inertial Measurement Unit) data to estimate device pose and motion in real-time. VIO tracks the device's position, orientation, and velocity by performing visual tracking, IMU integration, sensor fusion, etc, making it the foundation for spatial tracking and understanding. \n", + "\n", + "In Aria-Gen2 devices, the VIO algorithm are run on device to produce 2 types of tracking results as part of the VRS file: VIO and VIO High Frequency. \n", + "This tutorial focuses on demonstration of how to use the **on-device VIO and VIO_high_frequency** results. \n", + "\n", + "**What you'll learn:**\n", + "\n", + "- How to access on-device VIO and VIO_high_frequency data from VRS files\n", + "- How to visualize 3D trajectory from on-device VIO data.\n", + "\n", + "**Prerequisites**\n", + "- Complete Tutorial 1 (VrsDataProvider Basics) to understand basic data provider concepts\n", + "- Complete Tutorial 2 (Device Calibration) to understand how to properly use calibration in Aria data. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3e2224c", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core import data_provider\n", + "\n", + "# Load local VRS file\n", + "vrs_file_path = \"path/to/your/recording.vrs\"\n", + "vrs_data_provider = data_provider.create_vrs_data_provider(vrs_file_path)\n", + "\n", + "# Query VIO data streams\n", + "vio_label = \"vio\"\n", + "vio_stream_id = vrs_data_provider.get_stream_id_from_label(vio_label)\n", + "if vio_stream_id is None:\n", + " raise RuntimeError(\n", + " f\"{vio_label} data stream does not exist! Please use a VRS that contains valid VIO data for this tutorial.\"\n", + " )\n", + "\n", + "# Query VIO_high_frequency data streams\n", + "vio_high_freq_label = \"vio_high_frequency\"\n", + "vio_high_freq_stream_id = vrs_data_provider.get_stream_id_from_label(vio_high_freq_label)\n", + "if vio_high_freq_stream_id is None:\n", + " raise RuntimeError(\n", + " f\"{vio_high_freq_label} data stream does not exist! Please use a VRS that contains valid VIO high frequency data for this tutorial.\"\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "faa4d85a", + "metadata": {}, + "source": [ + "## On-Device VIO Data Stream\n", + "### Data Type: `FrontendOutput`\n", + "This a new data type introduced to store the results from the VIO system, containing the following fields: \n", + "\n", + "| Field Name | Description |\n", + "| ----------------------------- | ------------------------------------------ |\n", + "| `frontend_session_uid` | Session identifier (resets on VIO restart) |\n", + "| `frame_id` | Frame set identifier |\n", + "| `capture_timestamp_ns` | Center capture time in nanoseconds |\n", + "| `unix_timestamp_ns` | Unix timestamp in nanoseconds |\n", + "| `status` | VIO status (VALID/INVALID) |\n", + "| `pose_quality` | Pose quality (GOOD/BAD/UNKNOWN) |\n", + "| `visual_tracking_quality` | Visual-only tracking quality |\n", + "| `online_calib` | Online calibration estimates for SLAM cameras and IMUs |\n", + "| `gravity_in_odometry` | Gravity vector in odometry frame |\n", + "| `transform_odometry_bodyimu` | Body IMU's pose in odometry reference frame |\n", + "| `transform_bodyimu_device` | Transform from body IMU to device frame |\n", + "| `linear_velocity_in_odometry` | Linear velocity in odometry frame in m/s |\n", + "| `angular_velocity_in_bodyimu` | Angular velocity in body IMU frame in rad/s |\n", + "\n", + "Here, **body IMU** is the IMU that is picked as the reference for motion tracking. For Aria-Gen2' on-device VIO algorithm, this is often `imu-left`. \n", + "\n", + "**Important Note**: Always check `status == VioStatus.VALID` and\n", + "`pose_quality == TrackingQuality.GOOD` for VIO data validity!\n", + "\n", + "### Data Access API\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1f3f89a", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core.sensor_data import VioStatus, TrackingQuality\n", + "\n", + "print(\"=== VIO Data Sample ===\")\n", + "\n", + "# Find the first valid VIO data sample\n", + "num_vio_samples = vrs_data_provider.get_num_data(vio_stream_id)\n", + "first_valid_index = None\n", + "for idx in range(num_vio_samples):\n", + " vio_data = vrs_data_provider.get_vio_data_by_index(vio_stream_id, idx)\n", + " if (\n", + " vio_data.status == VioStatus.VALID\n", + " and vio_data.pose_quality == TrackingQuality.GOOD\n", + " ):\n", + " first_valid_index = idx\n", + " break\n", + "\n", + "if first_valid_index is not None:\n", + " print(\"=\" * 50)\n", + " print(f\"First VALID VIO Data Sample (Index: {first_valid_index})\")\n", + " print(\"=\" * 50)\n", + "\n", + " # Session Information\n", + " print(f\"Session UID: {vio_data.frontend_session_uid}\")\n", + " print(f\"Frame ID: {vio_data.frame_id}\")\n", + "\n", + " # Timestamps\n", + " print(f\"Capture Time: {vio_data.capture_timestamp_ns} ns\")\n", + " print(f\"Unix Time: {vio_data.unix_timestamp_ns} ns\")\n", + "\n", + " # Quality Status\n", + " print(f\"Status: {vio_data.status}\")\n", + " print(f\"Pose Quality: {vio_data.pose_quality}\")\n", + " print(f\"Visual Quality: {vio_data.visual_tracking_quality}\")\n", + "\n", + " # Transforms\n", + " print(f\"Transform Odometry → Body IMU:\\n{vio_data.transform_odometry_bodyimu.to_matrix()}\")\n", + " print(f\"Transform Body IMU → Device:\\n{vio_data.transform_bodyimu_device.to_matrix()}\")\n", + "\n", + " # Motion\n", + " print(f\"Linear Velocity: {vio_data.linear_velocity_in_odometry}\")\n", + " print(f\"Angular Velocity: {vio_data.angular_velocity_in_bodyimu}\")\n", + " print(f\"Gravity Vector: {vio_data.gravity_in_odometry}\")\n", + "else:\n", + " print(\"⚠️ No valid VIO sample found\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "750cdac1", + "metadata": {}, + "source": [ + "## On-Device VIO High Frequency Data Stream\n", + "\n", + "**VIO High Frequency** results are generated directly from the on-device VIO results by performing IMU integration between VIO poses, hence provides a much higher data rate at approximately **800Hz**. \n", + "\n", + "### Data Type: `OpenLoopTrajectoryPose`\n", + "The **VioHighFrequency** stream **re-uses** the `OpenLoopTrajectoryPose` data\n", + "structure [defined in MPS](https://github.com/facebookresearch/projectaria_tools/blob/main/core/mps/Trajectory.h). \n", + "\n", + "| Field Name | Description |\n", + "| --------------------------------- | ------------------------------------------------------- |\n", + "| `tracking_timestamp` | Timestamp in device time domain, in microseconds |\n", + "| `transform_odometry_device` | Transformation from device to odometry coordinate frame, represented as a SE3 instance. |\n", + "| `device_linear_velocity_odometry` | Translational velocity of device in odometry frame, in m/s |\n", + "| `angular_velocity_device` | Angular velocity of device in device frame, in rad/s |\n", + "| `quality_score` | Quality of pose estimation (higher = better) |\n", + "| `gravity_odometry` | Earth gravity vector in odometry frame |\n", + "| `session_uid` | Unique identifier for VIO tracking session |\n", + "\n", + "**Important Note**: Due to the high frequency nature of this data (~800Hz), consider\n", + "subsampling for visualization to maintain performance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3ba60b1", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== VIO High-Frequency Data Sample ===\")\n", + "\n", + "# Find the first VIO high_frequency data sample with high quality value\n", + "num_vio_high_freq_samples = vrs_data_provider.get_num_data(vio_high_freq_stream_id)\n", + "first_valid_index = None\n", + "for idx in range(num_vio_samples):\n", + " vio_high_freq_data = vrs_data_provider.get_vio_high_freq_data_by_index(vio_high_freq_stream_id, idx)\n", + " if (\n", + " vio_high_freq_data.quality_score > 0.5\n", + " ):\n", + " first_valid_index = idx\n", + " break\n", + "\n", + "if first_valid_index is not None:\n", + " print(\"=\" * 50)\n", + " print(f\"First VIO High Freq Data Sample with good quality score (Index: {first_valid_index})\")\n", + " print(\"=\" * 50)\n", + "\n", + " # Timestamps, convert timedelta to nanoseconds\n", + " capture_timestamp_ns = int(vio_high_freq_data.tracking_timestamp.total_seconds() * 1e9)\n", + "\n", + " # Session Information\n", + " print(f\"Session UID: {vio_high_freq_data.session_uid}\")\n", + "\n", + " # Timestamps\n", + " print(f\"Tracking Time: {capture_timestamp_ns} ns\")\n", + "\n", + " # Quality\n", + " print(f\"Quality Score: {vio_high_freq_data.quality_score:.3f}\")\n", + "\n", + " # Transform\n", + " print(f\"Transform Odometry → Device:\\n{vio_high_freq_data.transform_odometry_device.to_matrix()}\")\n", + "\n", + " # Motion\n", + " print(f\"Linear Velocity: {vio_high_freq_data.device_linear_velocity_odometry}\")\n", + " print(f\"Angular Velocity: {vio_high_freq_data.angular_velocity_device}\")\n", + " print(f\"Gravity Vector: {vio_high_freq_data.gravity_odometry}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "20d81060", + "metadata": {}, + "source": [ + "## Visualizing On-Device VIO trajectory\n", + "\n", + "The following code snippets demonstrate how to visualize a VIO trajectory, along with glass frame + hand tracking results, in a 3D view. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f68ef96", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_single_hand_3d(\n", + " hand_joints_in_device, hand_label\n", + "):\n", + " \"\"\"\n", + " A helper function to plot single hand data in 3D view\n", + " \"\"\"\n", + " marker_color = [255,64,0] if hand_label == \"left\" else [255, 255, 0]\n", + "\n", + " hand_skeleton_3d = create_hand_skeleton_from_landmarks(hand_joints_in_device)\n", + " rr.log(\n", + " f\"world/device/handtracking/{hand_label}/landmarks\",\n", + " rr.Points3D(\n", + " positions=hand_joints_in_device,\n", + " colors= marker_color,\n", + " radii=5e-3,\n", + " ),\n", + " )\n", + " rr.log(\n", + " f\"world/device/handtracking/{hand_label}/hand_skeleton\",\n", + " rr.LineStrips3D(\n", + " hand_skeleton_3d,\n", + " colors=[0, 255, 0],\n", + " radii=3e-3,\n", + " ),\n", + " )\n", + "\n", + "\n", + "def plot_hand_pose_data_3d(hand_pose_data):\n", + " \"\"\"\n", + " A helper function to plot hand pose data in 3D world view\n", + " \"\"\"\n", + " # Clear the canvas (only if hand_tracking_label exists for this device version)\n", + " rr.log(\n", + " f\"world/device/handtracking\",\n", + " rr.Clear.recursive(),\n", + " )\n", + "\n", + " # Plot both hands\n", + " if hand_pose_data.left_hand is not None:\n", + " plot_single_hand_3d(\n", + " hand_joints_in_device=hand_pose_data.left_hand.landmark_positions_device,\n", + " hand_label=\"left\",\n", + " )\n", + " if hand_pose_data.right_hand is not None:\n", + " plot_single_hand_3d(\n", + " hand_joints_in_device=hand_pose_data.right_hand.landmark_positions_device,\n", + " hand_label=\"right\",\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0de0f71", + "metadata": {}, + "outputs": [], + "source": [ + "import rerun as rr\n", + "from projectaria_tools.core.sensor_data import SensorDataType, TimeDomain, TimeQueryOptions\n", + "from projectaria_tools.utils.rerun_helpers import (\n", + " create_hand_skeleton_from_landmarks,\n", + " AriaGlassesOutline,\n", + " ToTransform3D\n", + ")\n", + "\n", + "print(\"\\n=== Visualizing on-device VIO trajectory + HandTracking in 3D view ===\")\n", + "\n", + "rr.init(\"rerun_viz_vio_trajectory\")\n", + "device_calib = vrs_data_provider.get_device_calibration()\n", + "handtracking_stream_id = vrs_data_provider.get_stream_id_from_label(\"handtracking\")\n", + "\n", + "# Set up a data queue\n", + "deliver_options = vrs_data_provider.get_default_deliver_queued_options()\n", + "deliver_options.deactivate_stream_all()\n", + "deliver_options.activate_stream(vio_stream_id)\n", + "\n", + "# Play for only 3 seconds\n", + "total_length_ns = vrs_data_provider.get_last_time_ns_all_streams(TimeDomain.DEVICE_TIME) - vrs_data_provider.get_first_time_ns_all_streams(TimeDomain.DEVICE_TIME)\n", + "skip_begin_ns = int(15 * 1e9) # Skip 15 seconds\n", + "duration_ns = int(3 * 1e9) # 3 seconds\n", + "skip_end_ns = max(total_length_ns - skip_begin_ns - duration_ns, 0)\n", + "deliver_options.set_truncate_first_device_time_ns(skip_begin_ns)\n", + "deliver_options.set_truncate_last_device_time_ns(skip_end_ns)\n", + "\n", + "# Plot VIO trajectory in 3D view.\n", + "# Need to keep a cache to store already-loaded trajectory\n", + "vio_traj_cached_full = []\n", + "for sensor_data in vrs_data_provider.deliver_queued_sensor_data(deliver_options):\n", + " # Convert sensor data to VIO data\n", + " vio_data = sensor_data.vio_data()\n", + " \n", + " # Check VIO data validity, only plot for valid data\n", + " if ( vio_data.status != VioStatus.VALID or vio_data.pose_quality != TrackingQuality.GOOD):\n", + " print(f\"VIO data is invalid for timestamp {sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)}\")\n", + " continue\n", + "\n", + " # Set timestamp\n", + " rr.set_time_nanos(\"device_time\", vio_data.capture_timestamp_ns)\n", + " \n", + " # Set and plot the Device pose for the current timestamp, as a RGB axis\n", + " T_World_Device = (\n", + " vio_data.transform_odometry_bodyimu @ vio_data.transform_bodyimu_device\n", + " )\n", + " rr.log(\n", + " \"world/device\",\n", + " ToTransform3D(\n", + " T_World_Device,\n", + " axis_length=0.05,\n", + " ),\n", + " )\n", + "\n", + " # Also plot Aria glass outline for visualization\n", + " aria_glasses_point_outline = AriaGlassesOutline(\n", + " device_calib, use_cad_calib=True\n", + " )\n", + " rr.log(\n", + " \"world/device/glasses_outline\",\n", + " rr.LineStrips3D(\n", + " aria_glasses_point_outline,\n", + " colors=[200,200,200],\n", + " radii=5e-4,\n", + " ),\n", + " )\n", + "\n", + " # Plot gravity direction vector\n", + " rr.log(\n", + " \"world/vio_gravity\",\n", + " rr.Arrows3D(\n", + " origins=[T_World_Device.translation()[0]],\n", + " vectors=[\n", + " vio_data.gravity_in_odometry * 1e-2\n", + " ], # length converted from 9.8 meter -> 10 cm\n", + " colors=[101,67,33],\n", + " radii=1.5e-3,\n", + " ),\n", + " static=False,\n", + " )\n", + "\n", + " # Plot VIO trajectory that are cached so far\n", + " vio_traj_cached_full.append(T_World_Device.translation()[0])\n", + " rr.log(\n", + " \"world/vio_trajectory\",\n", + " rr.LineStrips3D(\n", + " vio_traj_cached_full,\n", + " colors=[173, 216, 255],\n", + " radii=1.5e-3,\n", + " ),\n", + " static=False,\n", + " )\n", + "\n", + " # For visualization purpose, also plot the hand tracking results\n", + " interpolated_hand_pose = vrs_data_provider.get_interpolated_hand_pose_data(handtracking_stream_id, vio_data.capture_timestamp_ns, TimeDomain.DEVICE_TIME)\n", + " if interpolated_hand_pose is not None:\n", + " plot_hand_pose_data_3d(hand_pose_data = interpolated_hand_pose)\n", + "\n", + "\n", + "rr.notebook_show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f4d149e-3517-4bdd-a67e-dfb38521e44f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/Gen2/python_notebooks/Tutorial_6_time_synchronization_in_aria_gen2.ipynb b/examples/Gen2/python_notebooks/Tutorial_6_time_synchronization_in_aria_gen2.ipynb new file mode 100644 index 000000000..514e8c0db --- /dev/null +++ b/examples/Gen2/python_notebooks/Tutorial_6_time_synchronization_in_aria_gen2.ipynb @@ -0,0 +1,303 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "138f03c1-1ccc-468a-8079-ae9f8b88c5ae", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "# Tutorial 6: Time Synchronization in Aria Gen2\n", + "\n", + "## Introduction\n", + "\n", + "In Project Aria glasses, one of the key features is that it provides multi-sensor data that are **temporally aligned** to a shared, device-time domain for each single device, and also provide **multi-device temporal synchronization** using SubGHz signals (Aria Gen2), TICSync (Aria Gen1), or TimeCode signals (Aria Gen1). In this tutorial, we will demonstrate how to use such temporal aligned data from Aria Gen2 recordings.\n", + "\n", + "**What you'll learn:**\n", + "\n", + "- How to access temporally aligned sensor data on a single VRS recording.\n", + "- How to access temporally aligned sensor data across multiple recordings using SubGHz signals.\n", + "\n", + "**Prerequisites**\n", + "- Complete Tutorial 1 (VrsDataProvider Basics) to understand basic data provider concepts\n", + "- Complete Tutorial 3 (Sequential Access multi-sensor data) to understand how to create a queue of sensor data from VRS file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2d5525c", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core import data_provider\n", + "\n", + "# Load local VRS file\n", + "vrs_file_path = \"path/to/your/recording.vrs\"\n", + "vrs_data_provider = data_provider.create_vrs_data_provider(vrs_file_path)" + ] + }, + { + "cell_type": "markdown", + "id": "f31fcc66", + "metadata": {}, + "source": [ + "## Single-Device Timestamp alignment\n", + "\n", + "In `projectaria_tools`, every timestamp is linked to a specific `TimeDomain`, which represents the time reference or clock used to generate that timestamp. Timestamps from different `TimeDomain`s are not directly comparable—only timestamps within the same `TimeDomain` are consistent and can be accurately compared or aligned.\n", + "\n", + "### Supported Time Domains\n", + "\n", + "> **Important: Use `DEVICE_TIME` for single-device Aria data analysis**\n", + "\n", + "The following table shows all supported time domains in `projectaria_tools`:\n", + "\n", + "| Time Domain | Description | Usage |\n", + "|-------------|-------------|-------|\n", + "| **DEVICE_TIME (Recommended)**| Capture time in device's time domain. Accurate and reliable. All sensors on the same Aria device share the same device time domain. | **Use this for single-device Aria data analysis** |\n", + "| **RECORD_TIME** | Timestamps stored in the index of VRS files. For Aria glasses, these are equal to device timestamp converted to double-precision floating point. | Fast access, but use DEVICE_TIME for accuracy |\n", + "| **HOST_TIME** | Timestamps when sensor data is saved to the device (not when captured). | Should not be needed for any purpose |\n", + "| | **--- Multi-device synchronization domains ---** | |\n", + "| SUBGHZ | Multi-device synchronization for Aria Gen2 | See next part in this tutorial |\n", + "| UTC | Multi-device synchronization | See next part in this tutorial |\n", + "| TIME_CODE | Multi-device synchronization for Aria Gen1 | See [Gen1 multi-device tutorial](https://github.com/facebookresearch/projectaria_tools/blob/main/examples/Gen1/python_notebooks/ticsync_tutorial.ipynb) |\n", + "| TIC_SYNC | Multi-device synchronization for Aria Gen1 | See [Gen1 multi-device tutorial](https://github.com/facebookresearch/projectaria_tools/blob/main/examples/Gen1/python_notebooks/ticsync_tutorial.ipynb) |\n", + "\n", + "\n", + "### Key Differences: Device vs Record vs Host Time\n", + "\n", + "- **DEVICE_TIME**: The recommended choice for single-device work. Provides accurate capture timestamps from the device's internal clock, ensuring all sensors are temporally aligned.\n", + "- **RECORD_TIME**: Timestamps stored in VRS file index. For Aria glasses, these equal device timestamps converted to double-precision floating point, but use DEVICE_TIME directly for best accuracy.\n", + "- **HOST_TIME**: Represents when data was saved to the device, not when it was captured. Should not be needed for any purpose.\n", + "\n", + "**For single-device Aria data analysis, use `DEVICE_TIME` for accurate temporal alignment between sensors.**\n", + "\n", + "### Data API to query by timestamp\n", + "The VRS data provider offers powerful timestamp-based data access through the `get_$SENSOR_data_by_time_ns()` API family. This is the recommended approach for temporal alignment across sensors and precise timestamp-based data retrieval.\n", + "\n", + "For any sensor type, you can query data by timestamp using the `get_$SENSOR_data_by_time_ns()` function, where `$SENSOR` can be replaced by any sensor data type available in Aria VRS. See the [VrsDataProvider.h](link-to-code) for a complete list of supported sensor types.\n", + "\n", + "**TimeQueryOptions**\n", + "\n", + "This `TimeQueryOptions` parameter controls how the system finds data when your query timestamp doesn't exactly match a recorded timestamp:\n", + "\n", + "| Option | Behavior | Use Case |\n", + "|--------|----------|----------|\n", + "| **BEFORE** | Returns the last valid data with `timestamp ≤ query_time` | **Default and most common** - Get the most recent data before or at the query time |\n", + "| **AFTER** | Returns the first valid data with `timestamp ≥ query_time` | Get the next available data after or at the query time |\n", + "| **CLOSEST** | Returns data with smallest `|timestamp - query_time|` | Get the temporally closest data regardless of direction |\n", + "\n", + "### Boundary Behavior\n", + "\n", + "The API handles edge cases automatically:\n", + "\n", + "| Query Condition | BEFORE | AFTER | CLOSEST |\n", + "|-----------------|--------|-------|---------|\n", + "| `query_time < first_timestamp` | Returns invalid data | Returns first data | Returns first data |\n", + "| `first_timestamp ≤ query_time ≤ last_timestamp` | Returns data with `timestamp ≤ query_time` | Returns data with `timestamp ≥ query_time` | Returns temporally closest data |\n", + "| `query_time > last_timestamp` | Returns last data | Returns invalid data | Returns last data |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "660abc18", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core.sensor_data import SensorDataType, TimeDomain, TimeQueryOptions\n", + "\n", + "print(\"=== Single VRS timestamp based query ===\")\n", + "\n", + "# Select RGB stream ID to\n", + "rgb_stream_id = vrs_data_provider.get_stream_id_from_label(\"camera-rgb\")\n", + "\n", + "# Get a timestamp within the recording (3 seconds after start)\n", + "start_timestamp_ns = vrs_data_provider.get_first_time_ns(rgb_stream_id, TimeDomain.DEVICE_TIME)\n", + "selected_timestamp_ns = start_timestamp_ns + int(3e9)\n", + "\n", + "# Fetch the RGB frame that is CLOSEST to this selected timestamp_ns\n", + "closest_rgb_data_and_record = vrs_data_provider.get_image_data_by_time_ns(\n", + " stream_id = rgb_stream_id,\n", + " time_ns = selected_timestamp_ns,\n", + " time_domain = TimeDomain.DEVICE_TIME,\n", + " time_query_options = TimeQueryOptions.CLOSEST\n", + ")\n", + "closest_timestamp_ns = closest_rgb_data_and_record[1].capture_timestamp_ns\n", + "closest_frame_number = closest_rgb_data_and_record[1].frame_number\n", + "print(f\" The closest RGB frame to query timestamp {selected_timestamp_ns} is the {closest_frame_number}-th frame, with capture timestamp of {closest_timestamp_ns}\")\n", + "\n", + "# Fetch the frame BEFORE this frame\n", + "prev_rgb_data_and_record = vrs_data_provider.get_image_data_by_time_ns(\n", + " stream_id = rgb_stream_id,\n", + " time_ns = closest_timestamp_ns - 1,\n", + " time_domain = TimeDomain.DEVICE_TIME,\n", + " time_query_options = TimeQueryOptions.BEFORE\n", + ")\n", + "prev_timestamp_ns = prev_rgb_data_and_record[1].capture_timestamp_ns\n", + "prev_frame_number = prev_rgb_data_and_record[1].frame_number\n", + "print(f\" The previous RGB frame is the {prev_frame_number}-th frame, with capture timestamp of {prev_timestamp_ns}\")\n", + "\n", + "# Fetch the frame AFTER this frame\n", + "next_rgb_data_and_record = vrs_data_provider.get_image_data_by_time_ns(\n", + " stream_id = rgb_stream_id,\n", + " time_ns = closest_timestamp_ns + 1,\n", + " time_domain = TimeDomain.DEVICE_TIME,\n", + " time_query_options = TimeQueryOptions.AFTER\n", + ")\n", + "next_timestamp_ns = next_rgb_data_and_record[1].capture_timestamp_ns\n", + "next_frame_number = next_rgb_data_and_record[1].frame_number\n", + "print(f\" The next RGB frame is the {next_frame_number}-th frame, with capture timestamp of {next_timestamp_ns}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "88c7f181", + "metadata": {}, + "source": [ + "## Multi-Device Timestamp alignment\n", + "While recording, multiple Aria-Gen2 glasses can enable a feature that allows their timestamps to be mapped across devices using SubGHz signals. Please refer to the multi-device recording wiki page from ARK (TODO: add link) to learn how to record with this feature. \n", + "\n", + "Basically, one pair of glasses acts as the **host** device, that actively broadcasts SubGHz signals to a specified channel; \n", + "all other glasses act as **client** devices, that receives the SubGHz signals, and record a new `Time Domain Mapping` data streams in their VRS file. \n", + "It is essentially a timestamp hash mapping from **host** `DEVICE_TIME` -> **client** `DEVICE_TIME`. \n", + "Therefore this mapping data stream **only exists in client VRS**, but not **host VRS**. \n", + "\n", + "In `projectaria_tools`, we provide 2 types of APIs to easily perform timestamp-based query across multi-device recordings: \n", + "1. Converter APIs provides direct convert functions that maps timestamps between any 2 `TimeDomain`. \n", + "2. Query APIs that allows users to specifies `time_domain = TimeDomain.SUBGHZ` in a client VRS, to query \"from timestamp of the host\".\n", + "\n", + "The following code shows examples of using each type of API. \n", + "Note that in the visualization example, the host and client windows will play intermittently. \n", + "This is expected and correct, because the host and client devices' RGB cameras are NOT trigger aligned by nature. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4d99373", + "metadata": {}, + "outputs": [], + "source": [ + "import rerun as rr\n", + "from projectaria_tools.core.sensor_data import (\n", + " SensorData,\n", + " ImageData,\n", + " TimeDomain,\n", + " TimeQueryOptions,\n", + " TimeSyncMode,\n", + ")\n", + "\n", + "# Create data providers for both host and client recordings\n", + "host_recording = \"path/to/host.vrs\"\n", + "host_data_provider = data_provider.create_vrs_data_provider(host_recording)\n", + "\n", + "client_recording = \"path/to/client.vrs\"\n", + "client_data_provider = data_provider.create_vrs_data_provider(client_recording)\n", + "\n", + "print(\"======= Multi-VRS time mapping example: Timestamp converter APIs ======\")\n", + "\n", + "# Because host and client recordings may start at different times,\n", + "# we manually pick a timestamp in the middle of the host recording.\n", + "# Note that for host, we always use DEVICE_TIME domain.\n", + "selected_timestamp_host = (host_data_provider.get_first_time_ns_all_streams(time_domain = TimeDomain.DEVICE_TIME) +\n", + " host_data_provider.get_last_time_ns_all_streams(time_domain = TimeDomain.DEVICE_TIME)) // 2\n", + "\n", + "# Convert from host time to client time\n", + "selected_timestamp_client = client_data_provider.convert_from_synctime_to_device_time_ns(selected_timestamp_host, TimeSyncMode.SUBGHZ)\n", + "\n", + "# Convert from client time back to host time. Note that there could be some small numerical differences compared\n", + "selected_timestamp_host_roundtrip = client_data_provider.convert_from_device_time_to_synctime_ns(selected_timestamp_client, TimeSyncMode.SUBGHZ)\n", + "\n", + "print(f\" Selected host timestamp is {selected_timestamp_host}; \")\n", + "print(f\" Converted to client timestamp is {selected_timestamp_client}; \")\n", + "print(f\" Then roundtrip convert back to host:{selected_timestamp_host_roundtrip}; \")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96ed8a31-ef0c-43bf-ace9-b886a419d191", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"======= Multi-VRS time mapping example: Query APIs ======\")\n", + "rr.init(\"rerun_viz_multi_vrs_time_mapping\")\n", + "rr.notebook_show()\n", + "\n", + "# Set up sensor queue options in host VRS, only turn on RGB stream\n", + "host_deliver_options = host_data_provider.get_default_deliver_queued_options()\n", + "host_deliver_options.deactivate_stream_all()\n", + "rgb_stream_id = host_data_provider.get_stream_id_from_label(\"camera-rgb\")\n", + "host_deliver_options.activate_stream(rgb_stream_id)\n", + "\n", + "# Select only a segment to plot\n", + "host_vrs_start_timestamp = host_data_provider.get_first_time_ns_all_streams(time_domain = TimeDomain.DEVICE_TIME)\n", + "host_segment_start = host_vrs_start_timestamp + int(20e9) # 20 seconds after start\n", + "host_segment_duration = int(5e9)\n", + "host_segment_end = host_segment_start + host_segment_duration\n", + "host_vrs_end_timestamp = host_data_provider.get_last_time_ns_all_streams(time_domain = TimeDomain.DEVICE_TIME)\n", + "host_deliver_options.set_truncate_first_device_time_ns(host_segment_start - host_vrs_start_timestamp)\n", + "host_deliver_options.set_truncate_last_device_time_ns(host_vrs_end_timestamp - host_segment_end)\n", + "\n", + "# Plot RGB image data from both host and client\n", + "for sensor_data in host_data_provider.deliver_queued_sensor_data(host_deliver_options):\n", + " # --------- \n", + " # Plotting in host.\n", + " # Everything is done in DEVICE_TIME domain. \n", + " # ---------\n", + " host_image_data, host_image_record = sensor_data.image_data_and_record()\n", + "\n", + " # Set timestamps directly from host image record\n", + " host_timestamp_ns = host_image_record.capture_timestamp_ns\n", + " rr.set_time_nanos(\"device_time\", host_timestamp_ns)\n", + "\n", + " rr.log(\"rgb_image_in_host\", rr.Image(host_image_data.to_numpy_array()))\n", + "\n", + " # --------- \n", + " # Plotting in client.\n", + " # All the query APIs are done in SUBGHZ domain. \n", + " # ---------\n", + " # Query the closest RGB image from client VRS\n", + " client_image_data, client_image_record = client_data_provider.get_image_data_by_time_ns(\n", + " stream_id = rgb_stream_id,\n", + " time_ns = host_timestamp_ns,\n", + " time_domain = TimeDomain.SUBGHZ,\n", + " time_query_options = TimeQueryOptions.CLOSEST)\n", + "\n", + " # Still need to convert client's device time back to host's time,\n", + " # because we want to log this image data on host's timeline in Rerun\n", + " client_timestamp_ns = client_image_record.capture_timestamp_ns\n", + " converted_client_timestamp_ns = client_data_provider.convert_from_device_time_to_synctime_ns(client_timestamp_ns, TimeSyncMode.SUBGHZ)\n", + " rr.set_time_nanos(\"device_time\", converted_client_timestamp_ns)\n", + "\n", + " # Plot client image\n", + " rr.log(\"rgb_image_in_client\", rr.Image(client_image_data.to_numpy_array()))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/Gen2/python_notebooks/Tutorial_7_mps_data_provider_basics.ipynb b/examples/Gen2/python_notebooks/Tutorial_7_mps_data_provider_basics.ipynb new file mode 100644 index 000000000..9e83620c4 --- /dev/null +++ b/examples/Gen2/python_notebooks/Tutorial_7_mps_data_provider_basics.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3fccb05e", + "metadata": {}, + "source": [ + "# Tutorial 7: Machine Perception Services (MPS) for Aria Gen2\n", + "\n", + "## Introduction\n", + "\n", + "Machine Perception Services, or MPS, is a post-processing cloud service that we provide to Aria users. \n", + "It runs a set of priprietary, Spatial AI machine perception algorithms, that are designed for Project Aria glasses. \n", + "MPS is designed to provide superior accuracy and robustness compared to off-the-shelf open algorithms. \n", + "\n", + "We are excited to share that we have extended MPS to Aria-Gen2 users. \n", + "Currently, the supported MPS algorithms for Aria Gen2 include: \n", + "- **SLAM Single Sequence Trajectory**: generates device trajectories, semidense-point cloud data, online calibration. \n", + "\n", + "This tutorial focuses on demonstrating how to load and visualize the MPS results. \n", + "\n", + "**What you'll learn:**\n", + "\n", + "- How to load MPS output data, and definitions of the data types. \n", + "- How to visualize the MPS data together with Aria VRS files. \n", + "\n", + "**Prerequisites**\n", + "- Complete Tutorial 1 (VrsDataProvider Basics) to understand basic data provider concepts\n", + "- Complete Tutorial 2 (Device Calibration) to understand how to properly use calibration in Aria data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a3643bc", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core import data_provider, mps\n", + "import os\n", + "\n", + "# Load local VRS file\n", + "vrs_file_path = \"path/to/your/recording.vrs\"\n", + "vrs_data_provider = data_provider.create_vrs_data_provider(vrs_file_path)" + ] + }, + { + "cell_type": "markdown", + "id": "08e195ee", + "metadata": {}, + "source": [ + "## MPS - SLAM\n", + "\n", + "### [MPS - SLAM] Output Files\n", + "MPS output result files are categorized into sub-folders by algorithms. \n", + "For SLAM algorithm output, it generates the following files: \n", + "- `closed_loop_trajectory.csv`\n", + "- `open_loop_trajectory.csv`\n", + "- `semidense_observations.csv.gz`\n", + "- `semidense_points.csv.gz`\n", + "- `online_calibration.jsonl`\n", + "- `summary.json`\n", + "\n", + "Please refer to the [MPS Wiki page](https://facebookresearch.github.io/projectaria_tools/docs/data_formats/mps/slam) for details of each file. \n", + "\n", + "### [MPS - SLAM] Closed vs Open Loop trajectory\n", + "\n", + "MPS SLAM algorithm outputs 2 trajectory files (see [wiki page](https://facebookresearch.github.io/projectaria_tools/docs/data_formats/mps/slam/mps_trajectory) for data type definitions): \n", + "- **Open loop trajectory**: High-frequency (1kHz) odometry from visual-inertial odometry (VIO), accurate over short periods but drifts over time and distance.\n", + "- **Closed loop trajectory**: High-frequency (1kHz) pose from mapping with loop closure corrections, reducing drift but possibly less accurate locally over short spans." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56124fab", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core.mps.utils import (\n", + " filter_points_from_confidence,\n", + " get_nearest_pose,\n", + ")\n", + "\n", + "print(\"=== MPS - Closed loop trajectory ===\")\n", + "\n", + "# Load MPS closed-loop trajectory data\n", + "mps_folder_path = \"path/to/your/mps/folder/\"\n", + "closed_loop_trajectory_file = os.path.join(\n", + " mps_folder_path, \"slam\", \"closed_loop_trajectory.csv\"\n", + ")\n", + "closed_loop_trajectory = mps.read_closed_loop_trajectory(closed_loop_trajectory_file)\n", + "\n", + "# Print out the content of the first sample in closed_loop_trajectory\n", + "if closed_loop_trajectory:\n", + " sample = closed_loop_trajectory[0]\n", + " print(\"ClosedLoopTrajectoryPose sample:\")\n", + " print(f\" tracking_timestamp: {int(sample.tracking_timestamp.total_seconds() * 1e6)} us\")\n", + " print(f\" utc_timestamp: {int(sample.utc_timestamp.total_seconds() * 1e6)} us\")\n", + " print(f\" transform_world_device:\\n{sample.transform_world_device}\")\n", + " print(f\" device_linear_velocity_device: {sample.device_linear_velocity_device}\")\n", + " print(f\" angular_velocity_device: {sample.angular_velocity_device}\")\n", + " print(f\" quality_score: {sample.quality_score}\")\n", + " print(f\" gravity_world: {sample.gravity_world}\")\n", + " print(f\" graph_uid: {sample.graph_uid}\")\n", + "else:\n", + " print(\"closed_loop_trajectory is empty.\")\n", + "\n", + "\n", + "print(\"=== MPS - Open loop trajectory ===\")\n", + "\n", + "# Load MPS open-loop trajectory data\n", + "open_loop_trajectory_file = os.path.join(\n", + " mps_folder_path, \"slam\", \"open_loop_trajectory.csv\"\n", + ")\n", + "open_loop_trajectory = mps.read_open_loop_trajectory(open_loop_trajectory_file)\n", + "\n", + "# Print out the content of the first sample in open_loop_trajectory\n", + "if open_loop_trajectory:\n", + " sample = open_loop_trajectory[0]\n", + " print(\"OpenLoopTrajectoryPose sample:\")\n", + " print(f\" tracking_timestamp: {int(sample.tracking_timestamp.total_seconds() * 1e6)} us\")\n", + " print(f\" utc_timestamp: {int(sample.utc_timestamp.total_seconds() * 1e6)} us\")\n", + " print(f\" transform_odometry_device:\\n{sample.transform_odometry_device}\")\n", + " print(f\" device_linear_velocity_odometry: {sample.device_linear_velocity_odometry}\")\n", + " print(f\" angular_velocity_device: {sample.angular_velocity_device}\")\n", + " print(f\" quality_score: {sample.quality_score}\")\n", + " print(f\" gravity_odometry: {sample.gravity_odometry}\")\n", + " print(f\" session_uid: {sample.session_uid}\")\n", + "else:\n", + " print(\"open_loop_trajectory is empty.\")" + ] + }, + { + "cell_type": "markdown", + "id": "d58abddc", + "metadata": {}, + "source": [ + "### [MPS - SLAM] Semi-dense Point Cloud and Observations\n", + "\n", + "MPS SLAM algorithm outputs 2 files related to semi-dense point cloud (see [wiki page](https://facebookresearch.github.io/projectaria_tools/docs/data_formats/mps/slam/mps_pointcloud) for data type definitions): \n", + "- `semidense_points.csv.gz`: Global points in the world coordinate frame. \n", + "- `semidense_observations.csv.gz`: Point observations for each camera, at each timestamp.\n", + "\n", + "Note that semidense point files are normally large, therefore loading them may take some time. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e882ecda", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== MPS - Semi-dense Point Cloud ===\")\n", + "\n", + "# Load MPS semi-dense point cloud data\n", + "semidense_points_file = os.path.join(\n", + " mps_folder_path, \"slam\", \"semidense_points.csv.gz\"\n", + ")\n", + "semidense_points = mps.read_global_point_cloud(semidense_points_file)\n", + "\n", + "# Print out the content of the first sample in semidense_points\n", + "if semidense_points:\n", + " sample = semidense_points[0]\n", + " print(\"GlobalPointPosition sample:\")\n", + " print(f\" uid: {sample.uid}\")\n", + " print(f\" graph_uid: {sample.graph_uid}\")\n", + " print(f\" position_world: {sample.position_world}\")\n", + " print(f\" inverse_distance_std: {sample.inverse_distance_std}\")\n", + " print(f\" distance_std: {sample.distance_std}\")\n", + " print(f\"Total number of semi-dense points: {len(semidense_points)}\")\n", + "else:\n", + " print(\"semidense_points is empty.\")\n", + "\n", + "# Filter semidense points by inv_dep or depth. \n", + "# The filter will KEEP points with (inv_dep or depth < threshold)\n", + "filtered_semidense_points = filter_points_from_confidence(raw_points = semidense_points, threshold_invdep = 1e-3, threshold_dep = 5e-2)\n", + "print(f\"Filtering semidense points from a total of {len(semidense_points)} points down to {len(filtered_semidense_points)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2558426-4246-40a5-afe4-1e8301d8616e", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== MPS - Semi-dense Point Observations ===\")\n", + "\n", + "# Load MPS semi-dense point observations data\n", + "semidense_observations_file = os.path.join(\n", + " mps_folder_path, \"slam\", \"semidense_observations.csv.gz\"\n", + ")\n", + "semidense_observations = mps.read_point_observations(semidense_observations_file)\n", + "\n", + "# Print out the content of the first sample in semidense_observations\n", + "if semidense_observations:\n", + " sample = semidense_observations[0]\n", + " print(\"PointObservation sample:\")\n", + " print(f\" point_uid: {sample.point_uid}\")\n", + " print(f\" frame_capture_timestamp: {int(sample.frame_capture_timestamp.total_seconds() * 1e6)} us\")\n", + " print(f\" camera_serial: {sample.camera_serial}\")\n", + " print(f\" uv: {sample.uv}\")\n", + " print(f\"Total number of point observations: {len(semidense_observations)}\")\n", + "else:\n", + " print(\"semidense_observations is empty.\")" + ] + }, + { + "cell_type": "markdown", + "id": "562d48b4", + "metadata": {}, + "source": [ + "### [MPS - SLAM] Visualization\n", + "\n", + "In the following code snippet, we demonstrate how to visualize the MPS SLAM results in a 3D view. \n", + "\n", + "We first prepare a short trajectory segment, then extract the semidense points position, along with timestamp-mapped observations for visualization purpose. Finally we plot everything in Rerun. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bae260b7-716d-4a08-ba9e-07a5184d61bf", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "import numpy as np\n", + "\n", + "# A helper coloring function\n", + "def color_from_zdepth(z_depth_m: float) -> np.ndarray:\n", + " \"\"\"\n", + " Map z-depth (meters, along the camera's forward axis) to a bright Viridis-like RGB color.\n", + " - If z_depth_m <= 0 (point is behind the camera), return white [255, 255, 255].\n", + " - Near (0.2 m) -> yellow, Far (3.0 m) -> purple.\n", + " Returns an array of shape (3,) with dtype=uint8.\n", + " \"\"\"\n", + " if not np.isfinite(z_depth_m) or z_depth_m <= 0.0:\n", + " return np.array([0, 0, 0], dtype=np.uint8)\n", + "\n", + " NEAR_METERS, FAR_METERS = 0.2, 5.0\n", + "\n", + " # Normalize to [0,1], then flip so near → bright (yellow), far → dark (purple)\n", + " clamped = min(max(float(z_depth_m), NEAR_METERS), FAR_METERS)\n", + " normalized_position = (clamped - NEAR_METERS) / (FAR_METERS - NEAR_METERS + 1e-12)\n", + " gradient_position = 1.0 - normalized_position\n", + "\n", + " # Viridis-like anchor colors: purple → blue → teal → green → yellow\n", + " color_stops = [\n", + " (68, 1, 84),\n", + " (59, 82, 139),\n", + " (33, 145, 140),\n", + " (94, 201, 98),\n", + " (253, 231, 37),\n", + " ]\n", + "\n", + " # Locate segment and blend between its endpoints\n", + " segment_count = len(color_stops) - 1\n", + " continuous_index = gradient_position * segment_count\n", + " lower_segment_index = int(continuous_index)\n", + "\n", + " if lower_segment_index >= segment_count:\n", + " red, green, blue = color_stops[-1]\n", + " else:\n", + " segment_fraction = continuous_index - lower_segment_index\n", + " r0, g0, b0 = color_stops[lower_segment_index]\n", + " r1, g1, b1 = color_stops[lower_segment_index + 1]\n", + " red = r0 + segment_fraction * (r1 - r0)\n", + " green = g0 + segment_fraction * (g1 - g0)\n", + " blue = b0 + segment_fraction * (b1 - b0)\n", + "\n", + " return np.array([int(red), int(green), int(blue)], dtype=np.uint8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad11348f-186a-4d08-b480-710238da4f50", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Preparing MPS SLAM results for visualization ===\")\n", + "\n", + "# Check if we have valid SLAM data to visualize\n", + "if not closed_loop_trajectory or not semidense_points:\n", + " raise RuntimeError(\"Warning: This tutorial requires valid MPS SLAM data to run.\")\n", + "\n", + "# ----------- \n", + "# Prepare Trajectory data\n", + "# -----------\n", + "# Select a short segment of trajectory (e.g., first 5000 samples, subsampled by 50)\n", + "segment_length = min(50000, len(closed_loop_trajectory))\n", + "trajectory_segment = closed_loop_trajectory[:segment_length:50]\n", + "timestamp_to_pose = {\n", + " pose.tracking_timestamp: pose for pose in trajectory_segment\n", + "}\n", + "print(f\"Finished preparing a trajectory of length {len(trajectory_segment)}... \")\n", + "\n", + "# ----------- \n", + "# Prepare Semidense point data\n", + "# -----------\n", + "# Filter the semidense point cloud by confidence and limit max point count, and extract the point positions\n", + "filtered_semidense_point_cloud_data = filter_points_from_confidence(semidense_points)\n", + "points_positions = np.array(\n", + " [\n", + " point.position_world for point in filtered_semidense_point_cloud_data\n", + " ]\n", + ")\n", + "print(f\"Finished preparing filtered semidense points cloud of {len(filtered_semidense_point_cloud_data)} points... \")\n", + "\n", + "# ----------- \n", + "# Prepare Semidense observation data\n", + "# -----------\n", + "# Based on RGB observations, create a per-timestamp point position list, and color them according to its distance from RGB camera\n", + "point_uid_to_position = {\n", + " point.uid: np.array(point.position_world) for point in filtered_semidense_point_cloud_data\n", + "}\n", + "\n", + "# A helper function that creates a easier-to-query mapping to obtain observations according to timestamps\n", + "slam_1_serial = vrs_data_provider.get_device_calibration().get_camera_calib(\"slam-front-left\").get_serial_number()\n", + "timestamp_to_point_positions = defaultdict(list) # t_ns -> [position, position, ...]\n", + "timestamp_to_point_colors = defaultdict(list) # t_ns -> [color, color, ...]\n", + "\n", + "for obs in semidense_observations:\n", + " # Only add observations for SLAM_1 camera, and if the timestamp is in the chosen trajectory segment\n", + " if (\n", + " obs.camera_serial == slam_1_serial and \n", + " obs.frame_capture_timestamp in timestamp_to_pose and \n", + " obs.point_uid in point_uid_to_position):\n", + " # Insert point position\n", + " obs_timestamp = obs.frame_capture_timestamp\n", + " point_position = point_uid_to_position[obs.point_uid]\n", + " timestamp_to_point_positions[obs_timestamp].append(point_position)\n", + "\n", + " # Insert point color\n", + " T_world_device = timestamp_to_pose[obs_timestamp].transform_world_device\n", + " point_in_device = T_world_device.inverse() @ point_position\n", + " point_z_depth = point_in_device.squeeze()[2]\n", + " point_color = color_from_zdepth(point_z_depth)\n", + " timestamp_to_point_colors[obs_timestamp].append(point_color)\n", + "\n", + "from itertools import islice\n", + "print(f\"Finished preparing semidense points observations: \")\n", + "for timestamp, points in islice(timestamp_to_point_positions.items(), 5):\n", + " print(f\"\\t timestamp {int(timestamp.total_seconds() * 1e9)} ns has {len(points)} observed points in slam-front-left view. \")\n", + "print(f\"\\t ...\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cda9ee2f", + "metadata": {}, + "outputs": [], + "source": [ + "import rerun as rr\n", + "import numpy as np\n", + "from projectaria_tools.utils.rerun_helpers import (\n", + " AriaGlassesOutline,\n", + " ToTransform3D,\n", + " ToBox3D,\n", + ")\n", + "from projectaria_tools.core.mps.utils import (\n", + " filter_points_from_confidence,\n", + " get_nearest_pose,\n", + ")\n", + "\n", + "print(\"=== Visualizing MPS SLAM Results in 3D ===\")\n", + "\n", + "# Initialize Rerun\n", + "rr.init(\"MPS SLAM Visualization\")\n", + "rr.notebook_show() # open the in-notebook viewer first, then stream logs\n", + "\n", + "# Set up the 3D scene\n", + "rr.log(\"world\", rr.ViewCoordinates.RIGHT_HAND_Z_UP, static=True)\n", + "\n", + "# Log point cloud\n", + "rr.log(\n", + " \"world/semidense_points\",\n", + " rr.Points3D(\n", + " positions=points_positions,\n", + " colors=[255, 255, 255, 125],\n", + " radii=0.001\n", + " ),\n", + " static=True\n", + ")\n", + "\n", + "# Aria glass outline for visualization purpose\n", + "device_calib = vrs_data_provider.get_device_calibration()\n", + "aria_glasses_point_outline = AriaGlassesOutline(\n", + " device_calib, use_cad_calib=True\n", + ")\n", + "\n", + "# Plot Closed loop trajectory \n", + "closed_loop_traj_cached_full = []\n", + "observation_points_cached = None\n", + "observation_colors_cached = None\n", + "for closed_loop_pose in trajectory_segment:\n", + " capture_timestamp_ns = int(closed_loop_pose.tracking_timestamp.total_seconds() * 1e9)\n", + " rr.set_time_nanos(\"device_time\", capture_timestamp_ns)\n", + " \n", + " T_world_device = closed_loop_pose.transform_world_device\n", + "\n", + " # Log device pose as a coordinate frame\n", + " rr.log(\n", + " \"world/device\",\n", + " ToTransform3D(\n", + " T_world_device,\n", + " axis_length=0.05,\n", + " ),\n", + " )\n", + "\n", + " # Plot Aria glass outline\n", + " rr.log(\n", + " \"world/device/glasses_outline\",\n", + " rr.LineStrips3D(\n", + " aria_glasses_point_outline,\n", + " colors=[150,200,40],\n", + " radii=5e-3,\n", + " ),\n", + " )\n", + " \n", + " # Plot gravity direction vector\n", + " rr.log(\n", + " \"world/vio_gravity\",\n", + " rr.Arrows3D(\n", + " origins=[T_world_device.translation()[0]],\n", + " vectors=[\n", + " closed_loop_pose.gravity_world * 1e-2\n", + " ], # length converted from 9.8 meter -> 10 cm\n", + " colors=[101,67,33],\n", + " radii=5e-3,\n", + " ),\n", + " static=False,\n", + " )\n", + "\n", + " # Update cached results for observations. Cache is needed because observation has a much lower freq than high-freq trajectory. \n", + " if closed_loop_pose.tracking_timestamp in timestamp_to_point_positions.keys():\n", + " observation_points_cached = timestamp_to_point_positions[closed_loop_pose.tracking_timestamp]\n", + " observation_colors_cached = timestamp_to_point_colors[closed_loop_pose.tracking_timestamp]\n", + " if observation_points_cached is not None:\n", + " rr.log(\n", + " \"world/semidense_observations\", \n", + " rr.Points3D(\n", + " positions = observation_points_cached,\n", + " colors = observation_colors_cached,\n", + " radii=0.01\n", + " ),\n", + " static = False\n", + " )\n", + " \n", + " \n", + " # Plot the entire VIO trajectory that are cached so far\n", + " closed_loop_traj_cached_full.append(T_world_device.translation()[0])\n", + " rr.log(\n", + " \"world/vio_trajectory\",\n", + " rr.LineStrips3D(\n", + " closed_loop_traj_cached_full,\n", + " colors=[173, 216, 255],\n", + " radii=5e-3,\n", + " ),\n", + " static=False,\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/Gen2/python_notebooks/dataprovider_quickstart_tutorial.ipynb b/examples/Gen2/python_notebooks/dataprovider_quickstart_tutorial.ipynb deleted file mode 100644 index 72e4be99e..000000000 --- a/examples/Gen2/python_notebooks/dataprovider_quickstart_tutorial.ipynb +++ /dev/null @@ -1,1356 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "fb140897", - "metadata": {}, - "source": [ - "# Interactive Examples on Project Aria Tools\n", - "\n", - "### Notebook stuck?\n", - "Note that because of Jupyter issues, sometimes the code may stuck at visualization. We recommend **restart the kernels** and try again to see if the issue is resolved." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10e0572c", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "\n", - "# Specifics for Google Colab\n", - "google_colab_env = 'google.colab' in str(get_ipython())\n", - "if google_colab_env:\n", - " print(\"Running from Google Colab, installing projectaria_tools and getting sample data\")\n", - " !pip install projectaria-tools\n", - " # TODO: Update the data path here.\n", - " !curl -O -J -L \"https://github.com/facebookresearch/projectaria_tools/raw/main/data/mps_sample/sample.vrs\"\n", - " vrsfile = \"sample.vrs\"\n", - "else:\n", - " print(\"Using a pre-existing projectaria_tool github repository\")\n", - " # Define the paths to check\n", - " possible_path_1 = \"../../../data/mps_sample/sample.vrs\"\n", - " possible_path_2 = \"../../../data/gen1/mps_sample/sample.vrs\"\n", - " # Check which path contains the actual data file\n", - " if os.path.exists(possible_path_1):\n", - " vrsfile = possible_path_1\n", - " print(f\"Using data from: {vrsfile}\")\n", - " elif os.path.exists(possible_path_2):\n", - " vrsfile = possible_path_2\n", - " print(f\"Using data from: {vrsfile}\")\n", - " else:\n", - " # Exit with an error message if no data file is found\n", - " sys.exit(\"Error: No data file found in the specified paths.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8211dc17", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "\n", - "# Add the current repository path to sys.path\n", - "repo_path = os.path.abspath(os.path.join(os.getcwd(), '../../'))\n", - "sys.path.insert(0, repo_path)\n", - "print(repo_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e0909f2-ba66-4758-8a05-2e925574f43b", - "metadata": {}, - "outputs": [], - "source": [ - "from projectaria_tools.core import data_provider, calibration\n", - "from projectaria_tools.core.image import InterpolationMethod\n", - "from projectaria_tools.core.sensor_data import TimeDomain, TimeQueryOptions\n", - "from projectaria_tools.core.stream_id import RecordableTypeId, StreamId\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "from PIL import Image" - ] - }, - { - "cell_type": "markdown", - "id": "8196ad05", - "metadata": {}, - "source": [ - "## Create data provider" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb04b53b", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Creating data provider from {vrsfile}\")\n", - "provider = data_provider.create_vrs_data_provider(vrsfile)\n", - "if not provider:\n", - " print(\"Invalid vrs data provider\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "8884a2ff-a26b-40d1-a033-268306283788", - "metadata": {}, - "source": [ - "## Check device version\n", - "Create device-version specific variables. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42053ae3-f48e-4c42-a14a-70618883e9d3", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Optional\n", - "from projectaria_tools.core.calibration import DeviceVersion\n", - "# Print out the device version of the recording\n", - "device_version = provider.get_device_version()\n", - "print(f\"Device version is {calibration.get_name(device_version)}\")\n", - "\n", - "# Example variables used in this notebook\n", - "rgb_stream_id = StreamId('214-1')\n", - "\n", - "# Some example variables are different for Gen1 and Gen2,\n", - "# because they have different HW configs, sensor label names, etc.\n", - "if device_version == DeviceVersion.Gen1:\n", - " example_stream_mappings = {\n", - " \"camera-slam-left\": StreamId(\"1201-1\"),\n", - " \"camera-slam-right\":StreamId(\"1201-2\"),\n", - " \"camera-rgb\":StreamId(\"214-1\"),\n", - " \"camera-eyetracking\":StreamId(\"211-1\"),\n", - " }\n", - " example_slam_stream_label = \"camera-slam-left\"\n", - "\n", - " # Gen1 images are rotated 90 degrees for better visualization\n", - " ROTATE_90_FLAG = True\n", - "\n", - " # A linear camera model used in undistortion example: [width, height, focal]\n", - " example_linear_rgb_camera_model_params = [512, 512, 150]\n", - "elif device_version == DeviceVersion.Gen2:\n", - " example_stream_mappings = {\n", - " \"slam-front-left\": StreamId(\"1201-1\"),\n", - " \"slam-front-right\":StreamId(\"1201-2\"),\n", - " \"slam-side-left\": StreamId(\"1201-3\"),\n", - " \"slam-side-right\": StreamId(\"1201-4\"),\n", - " \"camera-rgb\":StreamId(\"214-1\"),\n", - " \"camera-et-left\":StreamId(\"211-1\"),\n", - " \"camera-et-right\":StreamId(\"211-2\"),\n", - " }\n", - " example_slam_stream_label = \"slam-front-left\"\n", - " # Gen2 images are already in up-right orientation\n", - " ROTATE_90_FLAG = False\n", - "\n", - " # A linear camera model used in undistortion example: [width, height, focal]\n", - " example_linear_rgb_camera_model_params = [4032, 3024, 1600]\n", - "\n", - "example_slam_stream_id = provider.get_stream_id_from_label(example_slam_stream_label)\n", - "\n", - "# A helper function to auto rotate Aria image, if necessary\n", - "def auto_image_rotation(img: np.array, stream_label: Optional[str] = None):\n", - " if stream_label != \"camera-eyetracking\" and ROTATE_90_FLAG:\n", - " return np.rot90(img, -1)\n", - " else:\n", - " return img" - ] - }, - { - "cell_type": "markdown", - "id": "87be2866", - "metadata": {}, - "source": [ - "# Retrieving image data\n", - "\n", - "Goals:\n", - "- Learn how to retrieve Image data for a given Image stream\n", - "\n", - "Key learnings:\n", - "- VRS contains data streams are identified with a Unique Identifier: stream_id\n", - "- Learn what are the Stream Ids used by Aria data (Slam, Rgb, EyeTracking)\n", - "- Learn that image data can be retrieved by using a record Index or a timestamp\n", - "- For each stream_id, index ranges from [0, get_num_data(stream_id)], and the same index for different streams could have different timestamps\n", - "- Query data from different sensors of the same timestamp can be done through `get_image_data_by_time_ns`, `get_imu_data_by_time_ns`, etc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "023ae1a6", - "metadata": {}, - "outputs": [], - "source": [ - "axes = []\n", - "fig, axes = plt.subplots(1, len(example_stream_mappings), figsize=(12, 4))\n", - "fig.suptitle('Retrieving image data using Record Index')\n", - "\n", - "# Query data with index\n", - "frame_index = 1\n", - "for idx, [stream_name, stream_id] in enumerate(list(example_stream_mappings.items())):\n", - " image = provider.get_image_data_by_index(stream_id, frame_index)\n", - " image_to_show = auto_image_rotation(image[0].to_numpy_array(), stream_name)\n", - " axes[idx].imshow(image_to_show, cmap=\"gray\", vmin=0, vmax=255)\n", - " axes[idx].title.set_text(stream_name)\n", - " axes[idx].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "plt.show()\n", - "\n", - "# Same example using Time\n", - "plt.figure()\n", - "fig, axes = plt.subplots(1, len(example_stream_mappings), figsize=(12, 4))\n", - "fig.suptitle('Retrieving image data using Time')\n", - "\n", - "time_domain = TimeDomain.DEVICE_TIME # query data based on host time\n", - "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", - "start_time = provider.get_first_time_ns(rgb_stream_id, time_domain)\n", - "\n", - "for idx, [stream_name, stream_id] in enumerate(list(example_stream_mappings.items())):\n", - " image = provider.get_image_data_by_time_ns(stream_id, start_time, time_domain, option)\n", - " image_to_show = auto_image_rotation(image[0].to_numpy_array(), stream_name)\n", - " axes[idx].imshow(image_to_show, cmap=\"gray\", vmin=0, vmax=255)\n", - " axes[idx].title.set_text(stream_name)\n", - " axes[idx].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "c5033225", - "metadata": {}, - "source": [ - "# Summarize a VRS using thumbnails\n", - "\n", - "Goals:\n", - "- Summarize a VRS using 10 image side by side\n", - "\n", - "Key learnings:\n", - "- Image streams are identified with a Unique Identifier: stream_id\n", - "- PIL images can be created from Numpy array" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "933725b6", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image, ImageOps\n", - "from tqdm import tqdm\n", - "\n", - "# Retrieve Start and End time for the given Sensor Stream Id\n", - "start_time = provider.get_first_time_ns(rgb_stream_id, time_domain)\n", - "end_time = provider.get_last_time_ns(rgb_stream_id, time_domain)\n", - "\n", - "# Retrieve image size for the RGB stream\n", - "time_domain = TimeDomain.DEVICE_TIME # query data based on host time\n", - "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", - "\n", - "image_config = provider.get_image_configuration(rgb_stream_id)\n", - "width = image_config.image_width\n", - "height = image_config.image_height\n", - "\n", - "sample_count = 10\n", - "resize_ratio = 10\n", - "thumbnail = newImage = Image.new(\n", - " \"RGB\", (int(width * sample_count / resize_ratio), int(height / resize_ratio))\n", - ")\n", - "current_width = 0\n", - "\n", - "\n", - "# Samples 10 timestamps\n", - "sample_timestamps = np.linspace(start_time, end_time, sample_count)\n", - "for sample in tqdm(sample_timestamps):\n", - " image_tuple = provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)\n", - " image_array = auto_image_rotation(image_tuple[0].to_numpy_array())\n", - " image = Image.fromarray(image_array)\n", - " new_size = (\n", - " int(image.size[0] / resize_ratio),\n", - " int(image.size[1] / resize_ratio),\n", - " )\n", - " image = image.resize(new_size)\n", - " thumbnail.paste(image, (current_width, 0))\n", - " current_width = int(current_width + width / resize_ratio)\n", - "\n", - "from IPython.display import Image\n", - "display(thumbnail)" - ] - }, - { - "cell_type": "markdown", - "id": "0850064e", - "metadata": {}, - "source": [ - "# Obtain mapping between stream_id and sensor label\n", - "Goals:\n", - "- In a vrs file, each sensor data is identified through stream_id\n", - "- Learn mapping between stream_id and label for each sensor\n", - "\n", - "Key learnings:\n", - "- VRS is using Unique Identifier for each stream called stream_id. \n", - "- For each sensor data, it is attached with a stream_id, which contains two parts [RecordableTypeId, InstanceId]. \n", - "- To get the actual readable name of each sensor,\n", - "we can use `get_label_from_stream_id` vise versa `get_stream_id_from_label`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43d83a1e", - "metadata": {}, - "outputs": [], - "source": [ - "streams = provider.get_all_streams()\n", - "for stream_id in streams:\n", - " label = provider.get_label_from_stream_id(stream_id)\n", - " print(\n", - " f\"stream_id: [{stream_id}] convert to label: [{label}] and back: [{provider.get_stream_id_from_label(label)}]\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "3def75bb", - "metadata": {}, - "source": [ - "# Get sensor data in a sequence based on data capture time\n", - "Goal:\n", - "- Obtain sensor data sequentially based on timestamp\n", - "\n", - "Key learnings\n", - "- Default option activates all sensors and playback the entire dataset from vrs\n", - "- Setup option to only activate certain streams, truncate start/end time, and sample rate\n", - "- Obtain data from different sensor types\n", - "- `TimeDomain` are separated into four categories: `RECORD_TIME`, `DEVICE_TIME`, `HOST_TIME`, `TIME_CODE`" - ] - }, - { - "cell_type": "markdown", - "id": "89aba5a2", - "metadata": {}, - "source": [ - "### Step 1: obtain default options that provides the whole dataset from VRS\n", - "* activates all sensor streams\n", - "* No truncation for first/last timestamp\n", - "* Subsample rate = 1 (do not skip any data per sensor)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0124864", - "metadata": {}, - "outputs": [], - "source": [ - "options = (\n", - " provider.get_default_deliver_queued_options()\n", - ") # default options activates all streams" - ] - }, - { - "cell_type": "markdown", - "id": "fb6dca83", - "metadata": {}, - "source": [ - "### Step 2: set preferred deliver options\n", - "* truncate first/last time: `set_truncate_first_device_time_ns/set_truncate_last_device_time_ns()`\n", - "* subselect sensor streams to play: `activate_stream(stream_id)`\n", - "* skip sensor data : `set_subsample_rate(stream_id, rate)`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a046582c", - "metadata": {}, - "outputs": [], - "source": [ - "options.set_truncate_first_device_time_ns(int(1e8)) # 0.1 secs after vrs first timestamp\n", - "options.set_truncate_last_device_time_ns(int(2e8)) # 0.2 sec before vrs last timestamp\n", - "\n", - "# deactivate all sensors\n", - "options.deactivate_stream_all()\n", - "\n", - "# activate only a subset of sensors\n", - "slam_stream_ids = options.get_stream_ids(RecordableTypeId.SLAM_CAMERA_DATA)\n", - "imu_stream_ids = options.get_stream_ids(RecordableTypeId.SLAM_IMU_DATA)\n", - "\n", - "for stream_id in slam_stream_ids:\n", - " options.activate_stream(stream_id) # activate slam cameras\n", - " options.set_subsample_rate(stream_id, 1) # sample every data for each slam camera\n", - "\n", - "for stream_id in imu_stream_ids:\n", - " options.activate_stream(stream_id) # activate imus\n", - " options.set_subsample_rate(stream_id, 10) # sample every 10th data for each imu" - ] - }, - { - "cell_type": "markdown", - "id": "fdff6dd3", - "metadata": {}, - "source": [ - "### Step 3: create iterator to deliver data\n", - "`TimeDomain` contains the following\n", - "* `RECORD_TIME`: timestamp stored in vrs index, fast to access, but not guaranteed which time domain\n", - "* `DEVICE_TIME`: capture time in device's timedomain, accurate\n", - "* `HOST_TIME`: arrival time in host computer's timedomain, may not be accurate\n", - "* `TIME_CODE`: capture in TimeSync server's timedomain\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "efa5aad8", - "metadata": {}, - "outputs": [], - "source": [ - "iterator = provider.deliver_queued_sensor_data(options)\n", - "for sensor_data in iterator:\n", - " label = provider.get_label_from_stream_id(sensor_data.stream_id())\n", - " sensor_type = sensor_data.sensor_data_type()\n", - " device_timestamp = sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)\n", - " host_timestamp = sensor_data.get_time_ns(TimeDomain.HOST_TIME)\n", - " timecode_timestamp = sensor_data.get_time_ns(TimeDomain.TIME_CODE)\n", - " print(\n", - " f\"\"\"obtain data from {label} of type {sensor_type} with\n", - " DEVICE_TIME: {device_timestamp} nanoseconds\n", - " HOST_TIME: {host_timestamp} nanoseconds\n", - " \"\"\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "a0796407", - "metadata": {}, - "source": [ - "# Random access data\n", - "Goal\n", - "- Access data from a stream randomly using a data index or a timestamp\n", - "\n", - "Key learnings\n", - "- Sensor data can be obtained through index within the range of [0, number of data for this stream_id)\n", - "\n", - " - `get_sensor_data_by_index(stream_id, index)`\n", - " - `get_image_data_by_index(stream_id, index)`\n", - " - Access other sensor data by index interface is available in core/python/VrsDataProviderPyBind.h\n", - " \n", - "- `TimeQueryOptions` has three options: `TimeQueryOptions.BEFORE`, `TimeQueryOptions.AFTER`, `TimeQueryOptions.CLOSEST`\n", - "- Query through index will provide the exact data vs query through a timestamp that is not exact, data nearby will be omitted base on `TimeQueryOptions`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "63657c1f", - "metadata": {}, - "outputs": [], - "source": [ - "# get all image data by index, skip every 20 frames\n", - "num_data = provider.get_num_data(example_slam_stream_id)\n", - "\n", - "for index in range(0, num_data, 20):\n", - " image_data = provider.get_image_data_by_index(example_slam_stream_id, index)\n", - " print(\n", - " f\"Get image: {index} with timestamp {image_data[1].capture_timestamp_ns}\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "dca84bb5", - "metadata": {}, - "source": [ - "### Sensor data can be obtained by timestamp (nanoseconds)\n", - "* Get stream time range `get_first_time_ns` and `get_last_time_ns`\n", - "* Specify timedomain: `TimeDomain.DEVICE_TIME` (default)\n", - "* Query data by queryTime\n", - " * `TimeQueryOptions.BEFORE` (default): sensor_dataTime <= queryTime\n", - " * `TimeQueryOptions.AFTER` : sensor_dataTime >= queryTime\n", - " * `TimeQueryOptions.CLOSEST` : sensor_dataTime closest to queryTime" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7783e83f", - "metadata": {}, - "outputs": [], - "source": [ - "time_domain = TimeDomain.DEVICE_TIME # query data based on DEVICE_TIME\n", - "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", - "\n", - "start_time = provider.get_first_time_ns(example_slam_stream_id, time_domain)\n", - "end_time = provider.get_last_time_ns(example_slam_stream_id, time_domain)\n", - "\n", - "# Fetch every 1 second (1e9 ns)\n", - "for time in range(start_time, end_time, int(1e9)):\n", - " image_data = provider.get_image_data_by_time_ns(\n", - " example_slam_stream_id, time, time_domain, option\n", - " )\n", - " print(\n", - " f\"query time {time} and get capture image time {image_data[1].capture_timestamp_ns} within range {start_time} {end_time}\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "a8be0b53", - "metadata": {}, - "source": [ - "### Get sensor data configuration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6824e56a", - "metadata": {}, - "outputs": [], - "source": [ - "def image_config_example(config):\n", - " print(f\"device_type {config.device_type}\")\n", - " print(f\"device_version {config.device_version}\")\n", - " print(f\"device_serial {config.device_serial}\")\n", - " print(f\"sensor_serial {config.sensor_serial}\")\n", - " print(f\"nominal_rate_hz {config.nominal_rate_hz}\")\n", - " print(f\"image_width {config.image_width}\")\n", - " print(f\"image_height {config.image_height}\")\n", - " print(f\"pixel_format {config.pixel_format}\")\n", - " print(f\"gamma_factor {config.gamma_factor}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3bf5afb", - "metadata": {}, - "outputs": [], - "source": [ - "config = provider.get_image_configuration(example_slam_stream_id)\n", - "image_config_example(config)" - ] - }, - { - "cell_type": "markdown", - "id": "ddf4af2e", - "metadata": {}, - "source": [ - "# Calibration examples\n", - "Goal:\n", - "- Obtain camera extrinsics and intrinsics\n", - "- Learn to project a 3D point to camera frame\n", - "\n", - "Key learnings\n", - "- Get calibration for different sensors using sensor labels\n", - "- Learn how to use extrinsics/intrinsics to project a 3D points to a given camera\n", - "- Reference frame convention" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c47e8e18", - "metadata": {}, - "outputs": [], - "source": [ - "device_calib = provider.get_device_calibration()\n", - "all_sensor_labels = device_calib.get_all_labels()\n", - "print(f\"device calibration contains calibrations for the following sensors \\n {all_sensor_labels}\")" - ] - }, - { - "cell_type": "markdown", - "id": "872040fa", - "metadata": {}, - "source": [ - "### Project a 3D point to camera frame\n", - "\n", - "In this section we will learn how to retrieve calibration data and how to use it.\n", - "Aria calibration is defined by two objects: one defining the intrinsics (`rgb_calib.project` and `rgb_calib.unproject`) and one defining the extrinsics as a SE3 pose (`device_calib.get_transform_device_sensor(sensor_label`).\n", - "\n", - "Intrinsics can be used to project a 3d point to the image plane or un-project a 2d point as a bearing vector. Extrinsics are used to set the camera in world coordinates at a given rotation and position in space.\n", - "\n", - "### Reference frame convention\n", - "\n", - "> `transform_sensor1_sensor3` = `transform_sensor1_sensor2` * `transform_sensor2_sensor3` \\\n", - "> `point_in_sensor`: 3D point measured from sensor's reference frame \\\n", - "> `point_in_sensor` = `transform_sensor1_sensor` * `point_in_sensor`\n", - "\n", - "Device Frame: `device_calib.get_origin_label() = camera-slam-left`\\\n", - "Sensor extrinsics: `device_calib.get_transform_device_sensor(sensor_label)`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7e39f66", - "metadata": {}, - "outputs": [], - "source": [ - "camera_name = \"camera-rgb\"\n", - "transform_device_camera = device_calib.get_transform_device_sensor(camera_name).to_matrix()\n", - "transform_camera_device = np.linalg.inv(transform_device_camera)\n", - "print(f\"Device calibration origin label {device_calib.get_origin_label()}\")\n", - "print(f\"{camera_name} has extrinsics of \\n {transform_device_camera}\")\n", - "\n", - "rgb_calib = device_calib.get_camera_calib(\"camera-rgb\")\n", - "if rgb_calib is not None:\n", - " # project a 3D point in device frame [camera-slam-left] to rgb camera\n", - " point_in_device = np.array([0, 0, 10])\n", - " point_in_camera = (\n", - " np.matmul(transform_camera_device[0:3,0:3], point_in_device.transpose())\n", - " + transform_camera_device[0:3,3]\n", - " )\n", - "\n", - " maybe_pixel = rgb_calib.project(point_in_camera)\n", - " if maybe_pixel is not None:\n", - " print(\n", - " f\"Get pixel {maybe_pixel} within image of size {rgb_calib.get_image_size()}\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "3ad7ddcb", - "metadata": {}, - "source": [ - "### Get calibration data for other sensors\n", - "Aria is a multimodal capture device, each sensors calibration can be retrieved using the same interface. \n", - "\n", - "For Aria Gen1, EyeTracking (`get_aria_et_camera_calib()`) and Audio calibration (`get_aria_microphone_calib()`) is a bit different since we have multiple sensors that share the same stream_id." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc276139", - "metadata": {}, - "outputs": [], - "source": [ - "et_calib = device_calib.get_aria_et_camera_calib()\n", - "if et_calib is not None:\n", - " print(f\"Camera {et_calib[0].get_label()} has image size {et_calib[0].get_image_size()}\")\n", - " print(f\"Camera {et_calib[1].get_label()} has image size {et_calib[1].get_image_size()}\"),\n", - "\n", - "imu_calib = device_calib.get_imu_calib(\"imu-left\")\n", - "if imu_calib is not None:\n", - " print(f\"{imu_calib.get_label()} has extrinsics transform_Device_Imu:\\n {imu_calib.get_transform_device_imu().to_matrix3x4()}\")" - ] - }, - { - "cell_type": "markdown", - "id": "700e8af5", - "metadata": {}, - "source": [ - "### Undistort an image\n", - "You can remove distortions in an image in three steps. \n", - "\n", - "First, use the provider to access the image and the camera calibration of the stream. Then create a \"linear\" spherical camera model with `get_spherical_camera_calibration`. The function allows you to specify the image size as well as focal length of the model, assuming principal point is at the image center. Finally, apply `distort_by_calibration` function to distort the image." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e20cd362", - "metadata": {}, - "outputs": [], - "source": [ - "# input: retrieve image as a numpy array\n", - "sensor_name = \"camera-rgb\"\n", - "sensor_stream_id = provider.get_stream_id_from_label(sensor_name)\n", - "image_data = provider.get_image_data_by_index(sensor_stream_id, 0)\n", - "image_array = image_data[0].to_numpy_array()\n", - "# input: retrieve image distortion\n", - "device_calib = provider.get_device_calibration()\n", - "src_calib = device_calib.get_camera_calib(sensor_name)\n", - "\n", - "# create output calibration: a linear model of image example_linear_rgb_camera_model_params.\n", - "# Invisible pixels are shown as black.\n", - "dst_calib = calibration.get_linear_camera_calibration(example_linear_rgb_camera_model_params[0], example_linear_rgb_camera_model_params[1], example_linear_rgb_camera_model_params[2], camera_name)\n", - "\n", - "# distort image\n", - "rectified_array = calibration.distort_by_calibration(image_array, dst_calib, src_calib, InterpolationMethod.BILINEAR)\n", - "\n", - "# visualize input and results\n", - "plt.figure()\n", - "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", - "fig.suptitle(f\"Image undistortion (focal length = {dst_calib.get_focal_lengths()})\")\n", - "\n", - "axes[0].imshow(image_array, cmap=\"gray\", vmin=0, vmax=255)\n", - "axes[0].title.set_text(f\"sensor image ({sensor_name})\")\n", - "axes[0].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "axes[1].imshow(rectified_array, cmap=\"gray\", vmin=0, vmax=255)\n", - "axes[1].title.set_text(f\"undistorted image ({sensor_name})\")\n", - "axes[1].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "a098c273", - "metadata": {}, - "source": [ - "Note the rectified image shows a circular area of visible pixels. If you want the entire rectified image to be covered by pixels, you can increase the magnification." - ] - }, - { - "cell_type": "markdown", - "id": "37718fa1-3482-437c-8cf0-bb6bb9686468", - "metadata": {}, - "source": [ - "# Retrieve on-device machine perception data: EyeGaze + HandTracking (Aria Gen2 only)\n", - "\n", - "Goals:\n", - "- Learn how to retrieve on-device machine perception data from VRS\n", - "\n", - "Key learnings:\n", - "- Learn what on-device MP data streams are available in Aria Gen2. \n", - "- Learn how to query such data either by timestamp, or by index. \n", - "- Learn how to match the on-device MP data with camera images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "76f91a77-1f54-4ccc-9927-f713942faa29", - "metadata": {}, - "outputs": [], - "source": [ - "# Helper functions for on device MP plotting\n", - "from typing import List\n", - "from projectaria_tools.core.mps import hand_tracking\n", - "from matplotlib.collections import LineCollection\n", - "\n", - "def create_hand_skeleton_segments_from_landmarks(\n", - " all_landmark_locations, segment_landmark_names):\n", - " skeleton_segments = []\n", - "\n", - " # insert pairs into outline segments\n", - " for i in range(len(segment_landmark_names) - 1):\n", - " start_index = segment_landmark_names[i]\n", - " end_index = segment_landmark_names[i + 1]\n", - " skeleton_segments.append(\n", - " [all_landmark_locations[start_index], all_landmark_locations[end_index]]\n", - " )\n", - " return skeleton_segments\n", - "\n", - "\n", - "def create_hand_skeleton_from_landmarks(landmark_locations):\n", - " HandLandmark = hand_tracking.HandLandmark\n", - " hand_skeleton = []\n", - " # Palm shape\n", - " hand_skeleton.extend(\n", - " create_hand_skeleton_segments_from_landmarks(\n", - " landmark_locations,\n", - " [\n", - " HandLandmark.WRIST,\n", - " HandLandmark.THUMB_INTERMEDIATE,\n", - " HandLandmark.INDEX_PROXIMAL,\n", - " HandLandmark.MIDDLE_PROXIMAL,\n", - " HandLandmark.RING_PROXIMAL,\n", - " HandLandmark.PINKY_PROXIMAL,\n", - " HandLandmark.WRIST,\n", - " HandLandmark.PALM_CENTER,\n", - " ],\n", - " )\n", - " )\n", - "\n", - " # Thumb line\n", - " hand_skeleton.extend(\n", - " create_hand_skeleton_segments_from_landmarks(\n", - " landmark_locations,\n", - " [\n", - " HandLandmark.WRIST,\n", - " HandLandmark.THUMB_INTERMEDIATE,\n", - " HandLandmark.THUMB_DISTAL,\n", - " HandLandmark.THUMB_FINGERTIP,\n", - " ],\n", - " )\n", - " )\n", - "\n", - " # Index line\n", - " hand_skeleton.extend(\n", - " create_hand_skeleton_segments_from_landmarks(\n", - " landmark_locations,\n", - " [\n", - " HandLandmark.WRIST,\n", - " HandLandmark.INDEX_PROXIMAL,\n", - " HandLandmark.INDEX_INTERMEDIATE,\n", - " HandLandmark.INDEX_DISTAL,\n", - " HandLandmark.INDEX_FINGERTIP,\n", - " ],\n", - " )\n", - " )\n", - "\n", - " # Middle line\n", - " hand_skeleton.extend(\n", - " create_hand_skeleton_segments_from_landmarks(\n", - " landmark_locations,\n", - " [\n", - " HandLandmark.WRIST,\n", - " HandLandmark.MIDDLE_PROXIMAL,\n", - " HandLandmark.MIDDLE_INTERMEDIATE,\n", - " HandLandmark.MIDDLE_DISTAL,\n", - " HandLandmark.MIDDLE_FINGERTIP,\n", - " ],\n", - " )\n", - " )\n", - "\n", - " # Ring line\n", - " hand_skeleton.extend(\n", - " create_hand_skeleton_segments_from_landmarks(\n", - " landmark_locations,\n", - " [\n", - " HandLandmark.WRIST,\n", - " HandLandmark.RING_PROXIMAL,\n", - " HandLandmark.RING_INTERMEDIATE,\n", - " HandLandmark.RING_DISTAL,\n", - " HandLandmark.RING_FINGERTIP,\n", - " ],\n", - " )\n", - " )\n", - "\n", - " # Pinky line\n", - " hand_skeleton.extend(\n", - " create_hand_skeleton_segments_from_landmarks(\n", - " landmark_locations,\n", - " [\n", - " HandLandmark.WRIST,\n", - " HandLandmark.PINKY_PROXIMAL,\n", - " HandLandmark.PINKY_INTERMEDIATE,\n", - " HandLandmark.PINKY_DISTAL,\n", - " HandLandmark.PINKY_FINGERTIP,\n", - " ],\n", - " )\n", - " )\n", - "\n", - " # Remove segments that may contain empty pixels\n", - " hand_skeleton = list(\n", - " filter(lambda x: x[0] is not None and x[1] is not None, hand_skeleton)\n", - " )\n", - "\n", - " return hand_skeleton\n", - "\n", - "\n", - "def plot_single_hand(axes, hand_markers_in_device, rgb_calib, hand_label):\n", - " hand_markers_in_rgb = []\n", - " # Project markers into RGB camera frame\n", - " for marker_in_device in hand_markers_in_device:\n", - " marker_in_rgb = rgb_calib.project(rgb_calib.get_transform_device_camera().inverse() @ marker_in_device)\n", - " hand_markers_in_rgb.append(marker_in_rgb)\n", - "\n", - " # Create hand skeleton\n", - " hand_skeleton = create_hand_skeleton_from_landmarks(hand_markers_in_rgb)\n", - " hand_skeleton_line_collection = LineCollection(hand_skeleton, linewidths=2, colors='g')\n", - "\n", - " # Remove \"None\" markers from hand joints in camera. This is intentionally done AFTER the hand skeleton creation\n", - " hand_markers_in_rgb = list(\n", - " filter(lambda x: x is not None, hand_markers_in_rgb)\n", - " )\n", - " if len(hand_markers_in_rgb) == 0:\n", - " return\n", - "\n", - " hand_markers_x = [x[0] for x in hand_markers_in_rgb]\n", - " hand_markers_y = [x[1] for x in hand_markers_in_rgb]\n", - "\n", - " # Plot hand markers\n", - " if hand_label == \"left\":\n", - " color = \"orangered\"\n", - " else:\n", - " color = \"yellow\"\n", - " axes.plot(hand_markers_x, hand_markers_y, 'o', markersize=5, color=color) # 'o' is for circle markers\n", - "\n", - " axes.add_collection(hand_skeleton_line_collection)\n", - "\n", - "def plot_hand_pose_data(axes, provider, timestamp, time_domain, time_tolerance, rgb_calib):\n", - " hand_stream_id = provider.get_stream_id_from_label(\"handtracking\")\n", - " if hand_stream_id is None:\n", - " print(\"Hand tracking stream not found in current VRS, skipping.\")\n", - " else:\n", - " # Query hand pose data\n", - " hand_pose_data = provider.get_hand_pose_data_by_time_ns(hand_stream_id, timestamp, time_domain)\n", - "\n", - " if abs(hand_pose_data.tracking_timestamp.total_seconds() * 1e9 - timestamp) <= time_tolerance and (hand_pose_data.left_hand is not None or hand_pose_data.right_hand is not None):\n", - " print(\"Hand data valid at this timestamp\")\n", - "\n", - " if hand_pose_data.left_hand is not None:\n", - " plot_single_hand(axes, hand_pose_data.left_hand.landmark_positions_device, rgb_calib, \"left\")\n", - " if hand_pose_data.right_hand is not None:\n", - " plot_single_hand(axes, hand_pose_data.right_hand.landmark_positions_device, rgb_calib, \"right\")\n", - " plt.show()\n", - "\n", - " else:\n", - " print(\"Hand data invalid at this timestamp\")\n", - "\n", - "\n", - "def plot_eye_gaze_data(axes, provider, timestamp, time_domain, time_tolerance, rgb_calib, T_device_cpf):\n", - " eyegaze_stream_id = provider.get_stream_id_from_label(\"eyegaze\")\n", - " if eyegaze_stream_id is None:\n", - " print(\"eyegaze stream not found in current VRS, skipping.\")\n", - " else:\n", - " # Query eyegaze data\n", - " eyegaze_data = provider.get_eye_gaze_data_by_time_ns(eyegaze_stream_id, timestamp, time_domain)\n", - "\n", - " if eyegaze_data.spatial_gaze_point_valid and abs(eyegaze_data.tracking_timestamp.total_seconds() * 1e9 - timestamp) <= time_tolerance:\n", - " print(\"spatial gaze point is valid at this timestamp\")\n", - " spatial_gaze_point_in_cpf = eyegaze_data.spatial_gaze_point_in_cpf\n", - " spatial_gaze_point_in_device = T_device_cpf @ spatial_gaze_point_in_cpf\n", - "\n", - " # Project spatial gaze point into RGB frame\n", - " point = rgb_calib.get_transform_device_camera().inverse() @ spatial_gaze_point_in_device\n", - " projected_gaze_point = rgb_calib.project(point)\n", - " if projected_gaze_point is not None:\n", - " # Plot a red cross as gaze point\n", - " axes.plot(projected_gaze_point[0], projected_gaze_point[1], 'ro', linewidth = 3, markersize=8)\n", - " axes.text(projected_gaze_point[0]+50, projected_gaze_point[1], 'EyeGazePoint', color='red', fontsize=10,\n", - " bbox=dict(facecolor='red', alpha=0, boxstyle='round,pad=0.5'))\n", - " else:\n", - " print(\"eyegaze point projection out of camera frame\")\n", - " else:\n", - " print(\"spatial gaze point is not valid at this timestamp\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95ff54a8-1bed-4d06-988d-29b7136da3e0", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "if device_version == DeviceVersion.Gen2:\n", - " # Use a slider to get a certain RGB frame, and try to plot the corresponding EyeGaze and HandPose data in RGB image.\n", - " rgb_stream_id = StreamId(\"214-1\")\n", - " time_domain = TimeDomain.DEVICE_TIME\n", - " num_rgb_frames = provider.get_num_data(rgb_stream_id)\n", - "\n", - " # Get RGB calibration\n", - " device_calib = provider.get_device_calibration()\n", - " rgb_calib = device_calib.get_camera_calib(\"camera-rgb\")\n", - " T_device_cpf = device_calib.get_transform_device_cpf()\n", - "\n", - " # Create a widget with slider to choose an RGB frame to plot\n", - " import ipywidgets as widgets\n", - " from IPython.display import display\n", - " from functools import partial\n", - "\n", - " # Get the very first frame (frame=0) so we can initialize the image.\n", - " initial_rgb_record = provider.get_image_data_by_index(rgb_stream_id, 0)\n", - " initial_rgb_array = initial_rgb_record[0].to_numpy_array()\n", - " # Normalize [0,255] → [0,1]\n", - " initial_norm = (initial_rgb_array - 0) / 255.0\n", - " initial_norm = np.clip(initial_norm, 0, 1)\n", - "\n", - " # Create figure & axes just once:\n", - " fig, axes = plt.subplots(figsize=(6, 6))\n", - " img_handle = axes.imshow(initial_norm, cmap=\"gray\", vmin=0, vmax=1)\n", - " axes.axis(\"off\") # hide ticks\n", - " plt.close(fig)\n", - "\n", - " output = widgets.Output()\n", - " with output:\n", - " display(fig)\n", - "\n", - " slider = widgets.IntSlider(value=0, min=0, max=num_rgb_frames-1, continuous_update = False)\n", - "\n", - " def on_slider_change(change, output, provider, time_domain, rgb_stream_id, rgb_calib, T_device_cpf):\n", - " with output: # you need this for Bento Next\n", - " output.clear_output(wait=True)\n", - " rgb_frame_index = change['new']\n", - " print(f\"Selecting RGB frame {rgb_frame_index}\")\n", - "\n", - " # Plot RGB image\n", - " rgb_image_and_record = provider.get_image_data_by_index(\n", - " rgb_stream_id, rgb_frame_index)\n", - " rgb_image_array = rgb_image_and_record[0].to_numpy_array()\n", - " rgb_timestamp = rgb_image_and_record[1].capture_timestamp_ns\n", - " min_val, max_val = 0, 255 # Set your desired min and max values\n", - " normalized_rgb_image = (rgb_image_array - min_val) / (max_val - min_val)\n", - " normalized_rgb_image = np.clip(normalized_rgb_image, 0, 1) # Ensure values are within [0, 1]\n", - "\n", - " img_handle.set_data(normalized_rgb_image)\n", - "\n", - " # Remove any old overlays (eye gaze / hand pose) from previous call\n", - " for artist in axes.artists + axes.lines + axes.collections:\n", - " artist.remove()\n", - " for txt in axes.texts:\n", - " txt.remove()\n", - "\n", - " # tolerance time to ensure the MP data is close to the query time.\n", - " time_tolerance = 500e6\n", - "\n", - " # Plot Eye gaze data\n", - " plot_eye_gaze_data(axes, provider, rgb_timestamp, time_domain, time_tolerance, rgb_calib, T_device_cpf)\n", - "\n", - " # Plot hand pose data\n", - " plot_hand_pose_data(axes, provider, rgb_timestamp, time_domain, time_tolerance, rgb_calib)\n", - "\n", - " display(fig)\n", - "\n", - "\n", - " # Attach the function to the slider\n", - " print(\"Please select a RGB Frame ID, note that plotting may be slow in Bento notebook\")\n", - " wrapped_function = partial(on_slider_change, output = output, provider = provider, time_domain=time_domain, rgb_stream_id=rgb_stream_id, rgb_calib=rgb_calib, T_device_cpf=T_device_cpf)\n", - " slider.observe(wrapped_function, names='value')\n", - "\n", - " display(slider, output)\n", - "else:\n", - " print(\"On-device machine perception data is only available in Aria Gen2. \")" - ] - }, - { - "cell_type": "markdown", - "id": "b9f67149-e5eb-44c4-b3d1-1ac00dcb6166", - "metadata": {}, - "source": [ - "## Retrieve on-device machine perception data (VIO high frequency and VIO)\n", - "\n", - "Goals:\n", - "- Learn how to retrieve on-device machine perception data (VIO, VIO high frequency) from VRS\n", - "\n", - "Key learnings:\n", - "- Learn how to query VIO pose information from VRS." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7b150c-801c-4c99-b149-0e1dfaed94ca", - "metadata": {}, - "outputs": [], - "source": [ - "import plotly.graph_objs as go\n", - "from matplotlib import pyplot as plt\n", - "from projectaria_tools.core.sophus import SE3f\n", - "from projectaria_tools.core.sensor_data import TrackingQuality\n", - "\n", - "# Tune this parameter to control the plotted camera frustum size\n", - "CAMERA_FRUSTUM_SIZE = 0.1\n", - "\n", - "# Helper function to build the frustum\n", - "def build_camera_frustum(T_world_camera):\n", - " points = (\n", - " np.array(\n", - " [[0, 0, 0], [0.5, 0.5, 1], [-0.5, 0.5, 1], [-0.5, -0.5, 1], [0.5, -0.5, 1]]\n", - " )\n", - " * CAMERA_FRUSTUM_SIZE\n", - " )\n", - " points_transformed = T_world_camera @ points.transpose()\n", - " return go.Mesh3d(\n", - " x=points_transformed[0, :],\n", - " y=points_transformed[1, :],\n", - " z=points_transformed[2, :],\n", - " i=[0, 0, 0, 0, 1, 1],\n", - " j=[1, 2, 3, 4, 2, 3],\n", - " k=[2, 3, 4, 1, 3, 4],\n", - " showscale=False,\n", - " visible=False,\n", - " colorscale=\"jet\",\n", - " intensity=points[:, 2],\n", - " opacity=1.0,\n", - " hoverinfo=\"none\",\n", - " )\n", - "\n", - "# helper function to cast from double to float\n", - "def cast_SE3_to_SE3f(se3_double):\n", - " # Ensure size=1\n", - " if len(se3_double) != 1:\n", - " raise ValueError(\"Expected SE3 of size 1 for this cast helper\")\n", - " mat = se3_double.to_matrix() # shape (4,4)\n", - " mat_f = mat.astype(np.float32)\n", - " return SE3f.from_matrix(mat_f) # returns SE3f of size 1\n", - "\n", - "vio_high_freq_stream_id = provider.get_stream_id_from_label(\"vio_high_frequency\")\n", - "vio_stream_id = provider.get_stream_id_from_label(\"vio\")\n", - "if vio_high_freq_stream_id is not None and vio_stream_id is not None:\n", - " T_device_rgb = device_calib.get_transform_device_sensor(\"camera-rgb\")\n", - "\n", - " vio_high_freq_data_num = provider.get_num_data(vio_high_freq_stream_id)\n", - "\n", - " # Record RGB locations in the vio-high-freq trajectory (subsample by 20)\n", - " vio_high_freq_subsample_rate = 20\n", - " vio_high_freq_trajectory = np.empty([vio_high_freq_data_num // vio_high_freq_subsample_rate + 1, 3])\n", - " print(f\"--- vio high freq: num of data is {vio_high_freq_data_num}, size of traj is {vio_high_freq_trajectory.shape}\")\n", - " all_high_freq_poses = []\n", - " j = 0\n", - " for i in range(0, vio_high_freq_data_num, vio_high_freq_subsample_rate):\n", - " vio_high_freq_pose = provider.get_vio_high_freq_data_by_index(vio_high_freq_stream_id, i)\n", - " T_odometry_rgb = vio_high_freq_pose.transform_odometry_device @ T_device_rgb\n", - " vio_high_freq_trajectory[j, :] = T_odometry_rgb.translation()\n", - " all_high_freq_poses.append(vio_high_freq_pose)\n", - " j = j+1\n", - "\n", - " # Plot camera frustum trace along high freq trajectory\n", - " cam_frustums = [None]*len(vio_high_freq_trajectory)\n", - " steps = [None] * len(vio_high_freq_trajectory)\n", - " for i in range(len(vio_high_freq_trajectory)):\n", - " pose = all_high_freq_poses[i]\n", - " cam_frustums[i] = build_camera_frustum(pose.transform_odometry_device @ T_device_rgb)\n", - " timestamp = pose.tracking_timestamp.total_seconds()\n", - " step = dict(method=\"update\", args=[{\"visible\": [False] * len(cam_frustums) + [True] * 2}, {\"title\": \"Trajectory and Point Cloud\"},], label=timestamp,)\n", - " step[\"args\"][0][\"visible\"][i] = True # Toggle i'th trace to \"visible\"\n", - " steps[i] = step\n", - " cam_frustums[0].visible = True\n", - "\n", - " # Record RGB poses in the vio trajectory, check validity\n", - " valid_vio_poses = []\n", - " vio_data_num = provider.get_num_data(vio_stream_id)\n", - " for i in range(vio_data_num):\n", - " vio_data = provider.get_vio_data_by_index(vio_stream_id, i)\n", - " # Check if the pose quality is GOOD\n", - " if vio_data.pose_quality == TrackingQuality.GOOD:\n", - " T_odometry_rgb = (vio_data.transform_odometry_bodyimu @\n", - " vio_data.transform_bodyimu_device @\n", - " cast_SE3_to_SE3f(T_device_rgb))\n", - " valid_vio_poses.append(T_odometry_rgb.translation().transpose())\n", - " \n", - " # Convert the list of good poses to a NumPy array\n", - " vio_trajectory = np.array(valid_vio_poses).squeeze()\n", - "\n", - " # Create slider to allow scrubbing and set the layout\n", - " sliders = [dict(currentvalue={\"suffix\": \" s\", \"prefix\": \"Time :\"}, pad={\"t\": 5}, steps=steps,)]\n", - " layout = go.Layout(\n", - " sliders=sliders,\n", - " scene=dict(\n", - " bgcolor='lightgray',\n", - " dragmode='orbit',\n", - " aspectmode='data',\n", - " xaxis_visible=False,\n", - " yaxis_visible=False,\n", - " zaxis_visible=False,\n", - " camera=dict(\n", - " eye=dict(x=0.5, y=0.5, z=0.5),\n", - " center=dict(x=0, y=0, z=0),\n", - " up=dict(x=0, y=0, z=1)\n", - " )),\n", - " width=1100,\n", - " height=1000,\n", - " )\n", - "\n", - " # Plot trajectory\n", - " plotter_vio_high_freq_trajectory = go.Scatter3d(x=vio_high_freq_trajectory[:, 0], y=vio_high_freq_trajectory[:, 1], z=vio_high_freq_trajectory[:, 2],\n", - " mode=\"markers\", marker={\"size\": 2, \"opacity\": 0.8, \"color\": \"red\"},\n", - " name=\"Vio High Freq Trajectory\")\n", - " plotter_vio_trajectory = go.Scatter3d(x=vio_trajectory[:, 0], y=vio_trajectory[:, 1], z=vio_trajectory[:, 2],\n", - " mode=\"markers\", marker={\"size\": 4, \"opacity\": 0.8, \"color\": \"green\"},\n", - " name=\"Vio Trajectory\")\n", - "\n", - " # draw\n", - " plot_figure = go.Figure(data=cam_frustums + [plotter_vio_high_freq_trajectory, plotter_vio_trajectory], layout=layout)\n", - " plot_figure.show()\n", - "\n", - "else:\n", - " print(\"Vio high-freq stream does not exist in the current VRS file. \")" - ] - }, - { - "cell_type": "markdown", - "id": "bfc89c4b-361f-4188-9d5e-50eb2e38994c", - "metadata": {}, - "source": [ - "# Image color correction and devignetting examples (Aria Gen1 only)\n", - "## Correcting Color Distortion in Older Aria Captures\n", - "Videos and images captured with earlier versions of the Aria OS may exhibit color distortion due to inconsistent gamma curves and unconventional color temperatures. This can result in colors appearing inconsistent across images and overly blue.\n", - "This issue has been resolved in the new OS update V1.13. For images and videos captured before this update, we offer a Color Correction API to address the distortion. The images will be corrected to a reference color temperature of 5000K. \n", - "\n", - "Below, we demonstrate how to apply color correction: \n", - "1. set `set_color_correction` with True, default value is False\n", - "2. The output from `provider.get_image_data_by_index` would be color corrected. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ee1c8caf-6b6f-42ed-adaf-35c3c593b0b8", - "metadata": {}, - "outputs": [], - "source": [ - "if device_version == DeviceVersion.Gen1:\n", - " # save source image for comparison\n", - " stream_id = provider.get_stream_id_from_label(\"camera-rgb\")\n", - " provider.set_color_correction(False)\n", - " provider.set_devignetting(False)\n", - " src_image_array = provider.get_image_data_by_index(stream_id, 0)[0].to_numpy_array()\n", - "\n", - " provider.set_color_correction(True)\n", - " provider.set_devignetting(False)\n", - " color_corrected_image_array = provider.get_image_data_by_index(stream_id, 0)[0].to_numpy_array()\n", - "\n", - " # visualize input and results\n", - " plt.figure()\n", - " fig, axes = plt.subplots(1, 2, figsize=(12, 6))\n", - " fig.suptitle(f\"Color Correction\")\n", - "\n", - " axes[0].imshow(src_image_array, vmin=0, vmax=255)\n", - " axes[0].title.set_text(f\"before color correction\")\n", - " axes[1].imshow(color_corrected_image_array, vmin=0, vmax=255)\n", - " axes[1].title.set_text(f\"after color correction\")\n", - "\n", - " plt.show()\n", - "else:\n", - " print(\"Color correction feature is Gen1 only\")" - ] - }, - { - "cell_type": "markdown", - "id": "4d29fa4b", - "metadata": {}, - "source": [ - "#### Devignetting\n", - "\n", - "Devignetting corrects uneven lighting, enhancing image uniformity and clarity. We provide devignetting for camera-rgb full size image [2880, 2880], camera-rgb half size image[1408, 1408] and slam image [640, 480].\n", - "1. Aria devignetting masks can be downloaded from [Link](https://www.projectaria.com/async/sample/download/?bucket=core&filename=devignetting_masks_bin.zip). It contains the following files:\n", - "\n", - "```\n", - "devignetting_masks_bin\n", - "|- new_isp\n", - " |- slam_devignetting_mask.bin\n", - " |- rgb_half_devignetting_mask.bin\n", - " |- rgb_full_devignetting_mask.bin\n", - "|- old_isp\n", - " |- slam_devignetting_mask.bin\n", - " |- rgb_half_devignetting_mask.bin\n", - " |- rgb_full_devignetting_mask.bin\n", - "```\n", - "2. Turn on devignetting. Set devignetting mask folder path with the local aria camera devignetting masks folder path.\n", - " `set_devignetting(True)`\n", - " `mask_folder_path = \"devignetting_masks_bin\"`\n", - " `set_devignetting_mask_folder_path(mask_folder_path)`\n", - "3. The image data from `get_image_data_by_index` will be devignetted. \n", - "4. (Optional) If you don't want to devignetting feature, turn off by calling `set_devignetting(False)`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "747698e2-5cba-4b03-a90d-2d08b3e53456", - "metadata": {}, - "outputs": [], - "source": [ - "if device_version == DeviceVersion.Gen1:\n", - " # ==============================================================================\n", - " # Step 1: Download devignetting mask\n", - " # ==============================================================================\n", - " from urllib.request import urlretrieve\n", - " import zipfile\n", - " import ssl\n", - " ssl._create_default_https_context = ssl._create_unverified_context\n", - "\n", - " # Download from url\n", - " devignetting_mask_folder_path = os.path.join(repo_path, \"devignetting_masks\")\n", - " downloaded_devignetting_mask_zip = os.path.join(devignetting_mask_folder_path, \"aria_camera_devignetting_masks.zip\")\n", - " if not os.path.exists(devignetting_mask_folder_path):\n", - " os.mkdir(devignetting_mask_folder_path)\n", - " urlretrieve(\"https://www.projectaria.com/async/sample/download/?bucket=core&filename=devignetting_masks_bin.zip\", downloaded_devignetting_mask_zip)\n", - "\n", - " # unzip the mask files, with cross-platform compatibility\n", - " with zipfile.ZipFile(downloaded_devignetting_mask_zip, 'r') as zip_ref:\n", - " # Extract all files\n", - " zip_ref.extractall(devignetting_mask_folder_path)\n", - "\n", - " # Print out the filenames\n", - " print(f\"Successfully downloaded and extracted the following files for devignetting:\")\n", - " for file_info in zip_ref.infolist():\n", - " print(file_info.filename)\n", - "\n", - " # ==============================================================================\n", - " # Step 2: Turn on devignetting and set devignetting mask folder path\n", - " # ==============================================================================\n", - " index = 1\n", - " provider.set_devignetting(False)\n", - " provider.set_color_correction(False)\n", - " src_image_array = provider.get_image_data_by_index(stream_id, index)[0].to_numpy_array()\n", - " provider.set_devignetting(True)\n", - " provider.set_devignetting_mask_folder_path(devignetting_mask_folder_path)\n", - "\n", - " # ==============================================================================\n", - " # Step 3: Retrieve Image from stream\n", - " # ==============================================================================\n", - " devignetted_image_array = provider.get_image_data_by_index(stream_id, index)[0].to_numpy_array()\n", - "\n", - " # visualize input and results\n", - " plt.figure()\n", - " fig, axes = plt.subplots(1, 2, figsize=(12, 6))\n", - " fig.suptitle(f\"Image devignetting (camera-rgb)\")\n", - "\n", - " axes[0].imshow(src_image_array, vmin=0, vmax=255)\n", - " axes[0].title.set_text(f\"before devignetting\")\n", - " axes[1].imshow(devignetted_image_array, vmin=0, vmax=255)\n", - " axes[1].title.set_text(f\"after devignetting\")\n", - "\n", - " plt.show()\n", - "else:\n", - " print(\"Devignetting is only supported on Gen1.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f78c9dbb-6d22-4261-b314-aed8584f8346", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "custom": { - "cells": [], - "metadata": { - "fileHeader": "", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 - }, - "indentAmount": 2, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/Gen2/python_notebooks/gen2_multidevice_time_sync_tutorial.ipynb b/examples/Gen2/python_notebooks/gen2_multidevice_time_sync_tutorial.ipynb deleted file mode 100644 index c703689ca..000000000 --- a/examples/Gen2/python_notebooks/gen2_multidevice_time_sync_tutorial.ipynb +++ /dev/null @@ -1,156 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Multi-device time synchronization tutorial (for Aria Gen2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from PIL import Image\n", - "from typing import Iterator, Any\n", - "import os\n", - "\n", - "from projectaria_tools.core import data_provider\n", - "from projectaria_tools.core.data_provider import VrsMetadata, MetadataTimeSyncMode\n", - "from projectaria_tools.core.sensor_data import (\n", - " SensorData,\n", - " ImageData,\n", - " TimeDomain,\n", - " TimeQueryOptions,\n", - " TimeSyncMode,\n", - ")\n", - "from projectaria_tools.core.stream_id import StreamId" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download time-synced recordings\n", - "Download the `gen2_timesync_server.vrs` and `gen2_timesync_client.vrs` files from Google Drive, and modify the following code to point to their download path" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create server and client data provider from recordings\n", - "data_folder = os.path.join(os.path.expanduser(\"~\"), \"Downloads\", \"06_04_PAT_Gen2_External_BugBash\")\n", - "\n", - "server_recording = os.path.join(data_folder, \"gen2_timesync_server.vrs\")\n", - "server_data_provider = data_provider.create_vrs_data_provider(server_recording)\n", - "\n", - "client_recording = os.path.join(data_folder, \"gen2_timesync_client.vrs\")\n", - "client_data_provider = data_provider.create_vrs_data_provider(client_recording)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Find a valid timestamp on server time domain\n", - "Because server and client devices could start at different time, therefore we first locate a timestamp that both devices are guaranteed to have valid data. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Fetch a given RGB frame from the server device\n", - "rgb_stream_id = server_data_provider.get_stream_id_from_label(\"camera-rgb\")\n", - "assert rgb_stream_id is not None, \"RGB stream does not exist in server vrs\"\n", - "\n", - "# Find the first client timestamp, and convert it to server time.\n", - "first_client_timestamp = client_data_provider.get_first_time_ns(rgb_stream_id, TimeDomain.SUBGHZ)\n", - "\n", - "# Pick a server timetamp, which client device is guaranteed to have data\n", - "first_server_timestamp = server_data_provider.get_first_time_ns(rgb_stream_id, TimeDomain.DEVICE_TIME)\n", - "server_timestamp = max(first_server_timestamp, first_client_timestamp) + int(16.1e9)\n", - "print(f\"Fetching from server timestamp {server_timestamp}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fetch synced images from both devices" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Fetch RGB frame from server device. Note that we only need to fetch by DEVICE TIME on server, because it broadcasts DEVICE TIME\n", - "server_image_data = server_data_provider.get_image_data_by_time_ns(rgb_stream_id, server_timestamp, TimeDomain.DEVICE_TIME)\n", - "\n", - "# Fetch the corresponding RGB frame from the client device by SUBGHZ time domain.\n", - "client_image_data = client_data_provider.get_image_data_by_time_ns(rgb_stream_id, server_timestamp, TimeDomain.SUBGHZ)\n", - "\n", - "# Print the timestamp difference between query and result\n", - "timestamp_ns_difference = client_image_data[1].capture_timestamp_ns - client_data_provider.convert_from_synctime_to_device_time_ns(server_timestamp, TimeSyncMode.SUBGHZ)\n", - "print(f\"Timestamp difference between query and result: {timestamp_ns_difference/1e6} ms\")\n", - "\n", - "\n", - "# Display the fetched RGB frames between server and client\n", - "axes = []\n", - "fig, axes = plt.subplots(1, 2, figsize=(30, 15))\n", - "fig.suptitle('Retrieving time synced image data from server and client')\n", - "\n", - "axes[0].imshow(server_image_data[0].to_numpy_array(), cmap=\"gray\", vmin=0, vmax=255)\n", - "axes[0].title.set_text(\"server image\")\n", - "axes[0].grid(False)\n", - "axes[0].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "\n", - "axes[1].imshow(client_image_data[0].to_numpy_array(), cmap=\"gray\", vmin=0, vmax=255)\n", - "axes[1].title.set_text(\"client image\")\n", - "axes[1].grid(False)\n", - "axes[1].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/Gen2/python_notebooks/mps_quickstart_tutorial.ipynb b/examples/Gen2/python_notebooks/mps_quickstart_tutorial.ipynb deleted file mode 100644 index 07bbc503b..000000000 --- a/examples/Gen2/python_notebooks/mps_quickstart_tutorial.ipynb +++ /dev/null @@ -1,828 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "a0347389", - "metadata": {}, - "source": [ - "# MPS Tutorial\n", - "This sample will show you how to use the Aria MPS data via the MPS apis.\n", - "Please refer to the MPS wiki for more information about data formats and schemas\n", - "\n", - "### Notebook stuck?\n", - "Note that because of Jupyter and Plotly issues, sometimes the code may stuck at visualization. We recommend **restart the kernels** and try again to see if the issue is resolved.\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "163e63fa", - "metadata": {}, - "source": [ - "## Download the MPS sample dataset locally\n", - "> The sample dataset will get downloaded to a **tmp** folder by default. Please modify the path if necessary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "caa38162", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "from tqdm import tqdm\n", - "from urllib.request import urlretrieve\n", - "from zipfile import ZipFile\n", - "\n", - "google_colab_env = 'google.colab' in str(get_ipython())\n", - "if google_colab_env:\n", - " print(\"Running from Google Colab, installing projectaria_tools and getting sample data\")\n", - " !pip install projectaria-tools\n", - " mps_sample_path = \"./mps_sample_data/\"\n", - "else:\n", - " mps_sample_path = \"/tmp/mps_sample_data/\"\n", - "\n", - "base_url = \"https://www.projectaria.com/async/sample/download/?bucket=mps&filename=\"\n", - "os.makedirs(mps_sample_path, exist_ok=True)\n", - "\n", - "filenames = [\n", - " \"sample.vrs\",\n", - " \"slam_v1_1_0.zip\",\n", - " \"eye_gaze_v3_1_0.zip\",\n", - " \"hand_tracking_v2_0_0.zip\"]\n", - "\n", - "print(\"Downloading sample data\")\n", - "for filename in tqdm(filenames):\n", - " print(f\"Processing: {filename}\")\n", - " full_path: str = os.path.join(mps_sample_path, filename)\n", - " urlretrieve(f\"{base_url}{filename}\", full_path)\n", - " if filename.endswith(\".zip\"):\n", - " with ZipFile(full_path, 'r') as zip_ref:\n", - " folder_extraction = mps_sample_path\n", - " os.makedirs(folder_extraction, exist_ok=True)\n", - " zip_ref.extractall(path=folder_extraction)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "44984456", - "metadata": {}, - "source": [ - "## Load the trajectory, point cloud, eye gaze and hands using the MPS apis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6c7c09c", - "metadata": {}, - "outputs": [], - "source": [ - "from projectaria_tools.core import data_provider, mps\n", - "from projectaria_tools.core.mps.utils import (\n", - " filter_points_from_confidence,\n", - " get_gaze_vector_reprojection,\n", - " get_nearest_eye_gaze,\n", - " get_nearest_pose,\n", - ")\n", - "from projectaria_tools.core.stream_id import StreamId\n", - "import numpy as np\n", - "\n", - "# Load the VRS file\n", - "vrsfile = os.path.join(mps_sample_path, \"sample.vrs\")\n", - "\n", - "# Trajectory, global points, and online calibration\n", - "closed_loop_trajectory = os.path.join(\n", - " mps_sample_path, \"slam\", \"closed_loop_trajectory.csv\"\n", - ")\n", - "global_points = os.path.join(mps_sample_path, \"slam\", \"semidense_points.csv.gz\")\n", - "online_calibrations_path = os.path.join(mps_sample_path, \"slam\", \"online_calibration.jsonl\")\n", - "\n", - "# Eye gaze\n", - "generalized_eye_gaze_path = os.path.join(\n", - " mps_sample_path, \"eye_gaze\", \"general_eye_gaze.csv\"\n", - ")\n", - "calibrated_eye_gaze_path = os.path.join(\n", - " mps_sample_path, \"eye_gaze\", \"personalized_eye_gaze.csv\"\n", - ")\n", - "\n", - "# Hand tracking\n", - "hand_tracking_results_path = os.path.join(\n", - " mps_sample_path, \"hand_tracking\", \"hand_tracking_results.csv\"\n", - ")\n", - "\n", - "\n", - "# Create data provider and get T_device_rgb\n", - "provider = data_provider.create_vrs_data_provider(vrsfile)\n", - "# Since we want to display the position of the RGB camera, we are querying its relative location\n", - "# from the device and will apply it to the device trajectory.\n", - "T_device_RGB = provider.get_device_calibration().get_transform_device_sensor(\n", - " \"camera-rgb\"\n", - ")\n", - "\n", - "## Load trajectory and global points\n", - "mps_trajectory = mps.read_closed_loop_trajectory(closed_loop_trajectory)\n", - "points = mps.read_global_point_cloud(global_points)\n", - "\n", - "## Load online calibration file\n", - "online_calibrations = mps.read_online_calibration(online_calibrations_path)\n", - "\n", - "## Load eyegaze\n", - "generalized_eye_gazes = mps.read_eyegaze(generalized_eye_gaze_path)\n", - "calibrated_eye_gazes = mps.read_eyegaze(calibrated_eye_gaze_path)\n", - "\n", - "## Load hand tracking\n", - "hand_tracking_results = mps.hand_tracking.read_hand_tracking_results(\n", - " hand_tracking_results_path\n", - ")\n", - "\n", - "# Loaded data must be not empty\n", - "assert(\n", - " len(mps_trajectory) != 0 and\n", - " len(points) != 0 and\n", - " len(online_calibrations) !=0 and\n", - " len(generalized_eye_gazes) != 0 and\n", - " len(calibrated_eye_gazes) != 0 and\n", - " len(hand_tracking_results) != 0)\n", - "\n", - "\n", - "# Or you can load the whole mps output with MpsDataProvider\n", - "mps_data_provider = mps.MpsDataProvider(mps.MpsDataPathsProvider(mps_sample_path).get_data_paths())\n", - "\n", - "assert(mps_data_provider.has_general_eyegaze() and\n", - " mps_data_provider.has_personalized_eyegaze() and \n", - " mps_data_provider.has_open_loop_poses() and\n", - " mps_data_provider.has_closed_loop_poses() and\n", - " mps_data_provider.has_online_calibrations() and\n", - " mps_data_provider.has_semidense_point_cloud() and\n", - " mps_data_provider.has_hand_tracking_results())\n", - "\n", - "# Get the MPS service versions\n", - "print(f\"slam_version: {mps_data_provider.get_slam_version()}\")\n", - "print(f\"eyegaze_version: {mps_data_provider.get_eyegaze_version()}\")\n", - "print(f\"hand_tracking_version: {mps_data_provider.get_hand_tracking_version()}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2f2d6248", - "metadata": {}, - "source": [ - "## Helper functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01fc8fc1", - "metadata": {}, - "outputs": [], - "source": [ - "import plotly.graph_objs as go\n", - "from matplotlib import pyplot as plt\n", - "\n", - "# Helper function to build the frustum\n", - "def build_camera_frustum(transform_world_device):\n", - " points = (\n", - " np.array(\n", - " [[0, 0, 0], [0.5, 0.5, 1], [-0.5, 0.5, 1], [-0.5, -0.5, 1], [0.5, -0.5, 1]]\n", - " )\n", - " * 0.6\n", - " )\n", - " transform_world_rgb = transform_world_device @ T_device_RGB\n", - " points_transformed = transform_world_rgb @ points.transpose()\n", - " return go.Mesh3d(\n", - " x=points_transformed[0, :],\n", - " y=points_transformed[1, :],\n", - " z=points_transformed[2, :],\n", - " i=[0, 0, 0, 0, 1, 1],\n", - " j=[1, 2, 3, 4, 2, 3],\n", - " k=[2, 3, 4, 1, 3, 4],\n", - " showscale=False,\n", - " visible=False,\n", - " colorscale=\"jet\",\n", - " intensity=points[:, 2],\n", - " opacity=1.0,\n", - " hoverinfo=\"none\",\n", - " )" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "e125bbdc", - "metadata": {}, - "source": [ - "## Visualize the trajectory and point cloud in a 3D interactive plot\n", - "* Load trajectory\n", - "* Load global point cloud\n", - "* Render dense trajectory (1Khz) as points.\n", - "* Render subsampled 6DOF poses via camera frustum. Use calibration to transform RGB camera pose to world frame\n", - "* Render subsampled point cloud\n", - "\n", - "_Please wait a minute for all the data to load. Zoom in to the point cloud and adjust your view. Then use the time slider to move the camera_" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f26ead5", - "metadata": {}, - "outputs": [], - "source": [ - "# Load all world positions from the trajectory\n", - "traj = np.empty([len(mps_trajectory), 3])\n", - "for i in range(len(mps_trajectory)):\n", - " traj[i, :] = mps_trajectory[i].transform_world_device.translation()\n", - "\n", - "# Subsample trajectory for quick display\n", - "skip = 1000\n", - "mps_trajectory_subset = mps_trajectory[::skip]\n", - "steps = [None]*len(mps_trajectory_subset)\n", - "\n", - "# Load each pose as a camera frustum trace\n", - "cam_frustums = [None]*len(mps_trajectory_subset)\n", - "\n", - "for i in range(len(mps_trajectory_subset)):\n", - " pose = mps_trajectory_subset[i]\n", - " cam_frustums[i] = build_camera_frustum(pose.transform_world_device)\n", - " timestamp = pose.tracking_timestamp.total_seconds()\n", - " step = dict(method=\"update\", args=[{\"visible\": [False] * len(cam_frustums) + [True] * 2}, {\"title\": \"Trajectory and Point Cloud\"},], label=timestamp,)\n", - " step[\"args\"][0][\"visible\"][i] = True # Toggle i'th trace to \"visible\"\n", - " steps[i] = step\n", - "cam_frustums[0].visible = True\n", - "\n", - "# Filter the point cloud by inv depth and depth and load\n", - "points = filter_points_from_confidence(points)\n", - "# Retrieve point position\n", - "point_cloud = np.stack([it.position_world for it in points])\n", - "\n", - "# Create slider to allow scrubbing and set the layout\n", - "sliders = [dict(currentvalue={\"suffix\": \" s\", \"prefix\": \"Time :\"}, pad={\"t\": 5}, steps=steps,)]\n", - "layout = go.Layout(\n", - " sliders=sliders, \n", - " scene=dict(\n", - " bgcolor='lightgray', \n", - " dragmode='orbit', \n", - " aspectmode='data', \n", - " xaxis_visible=False, \n", - " yaxis_visible=False,\n", - " zaxis_visible=False,\n", - " camera=dict(\n", - " eye=dict(x=0.5, y=0.5, z=0.5),\n", - " center=dict(x=0, y=0, z=0),\n", - " up=dict(x=0, y=0, z=1)\n", - " )),\n", - " width=1100,\n", - " height=1000,\n", - ")\n", - "\n", - "# Plot trajectory and point cloud\n", - "# We color the points by their z coordinate\n", - "trajectory = go.Scatter3d(x=traj[:, 0], y=traj[:, 1], z=traj[:, 2], mode=\"markers\", marker={\"size\": 2, \"opacity\": 0.8, \"color\": \"red\"}, name=\"Trajectory\", hoverinfo='none')\n", - "global_points = go.Scatter3d(x=point_cloud[:, 0], y=point_cloud[:, 1], z=point_cloud[:, 2], mode=\"markers\",\n", - " marker={\"size\" : 1.5, \"color\": point_cloud[:, 2], \"cmin\": -1.5, \"cmax\": 2, \"colorscale\": \"viridis\",},\n", - " name=\"Global Points\", hoverinfo='none')\n", - "\n", - "# draw\n", - "plot_figure = go.Figure(data=cam_frustums + [trajectory, global_points], layout=layout)\n", - "plot_figure.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f37caf18", - "metadata": {}, - "source": [ - "## Visualize generalized and calibrated eye gaze projection on an rgb image.\n", - "* Load Eyegaze MPS output\n", - "* Select a random RGB frame\n", - "* Find the closest eye gaze data for the RGB frame\n", - "* Project the eye gaze for the RGB frame by **using a fixed depth of 1m** or existing depth if available.\n", - "* Show the gaze cross on the RGB image" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d31979e", - "metadata": {}, - "outputs": [], - "source": [ - "rgb_stream_id = StreamId(\"214-1\")\n", - "rgb_stream_label = provider.get_label_from_stream_id(rgb_stream_id)\n", - "num_rgb_frames = provider.get_num_data(rgb_stream_id)\n", - "rgb_frame = provider.get_image_data_by_index(rgb_stream_id, (int)(num_rgb_frames-5))\n", - "assert rgb_frame[0] is not None, \"no rgb frame\"\n", - "\n", - "image = rgb_frame[0].to_numpy_array()\n", - "capture_timestamp_ns = rgb_frame[1].capture_timestamp_ns\n", - "generalized_eye_gaze = get_nearest_eye_gaze(generalized_eye_gazes, capture_timestamp_ns)\n", - "calibrated_eye_gaze = get_nearest_eye_gaze(calibrated_eye_gazes, capture_timestamp_ns)\n", - "# get projection function\n", - "device_calibration = provider.get_device_calibration()\n", - "camera_calibration = device_calibration.get_camera_calib(rgb_stream_label)\n", - "assert camera_calibration is not None, \"no camera calibration\"\n", - "\n", - "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 10))\n", - "\n", - "# Draw a cross at the projected gaze center location on the RGB image at available depth or if unavailable a 1m proxy\n", - "depth_m = generalized_eye_gaze.depth or 1.0\n", - "generalized_gaze_center_in_pixels = get_gaze_vector_reprojection(generalized_eye_gaze, rgb_stream_label, device_calibration, camera_calibration, depth_m)\n", - "if generalized_gaze_center_in_pixels is not None:\n", - " ax1.imshow(image)\n", - " ax1.plot(generalized_gaze_center_in_pixels[0], generalized_gaze_center_in_pixels[1], '+', c=\"red\", mew=1, ms=20)\n", - " ax1.grid(False)\n", - " ax1.axis(False)\n", - " ax1.set_title(\"Generalized Eye Gaze\")\n", - "else:\n", - " print(f\"Eye gaze center projected to {generalized_gaze_center_in_pixels}, which is out of camera sensor plane.\")\n", - "\n", - "depth_m = calibrated_eye_gaze.depth or 1.0\n", - "calibrated_gaze_center_in_pixels = get_gaze_vector_reprojection(calibrated_eye_gaze, rgb_stream_label, device_calibration, camera_calibration, depth_m = 1.0)\n", - "if calibrated_gaze_center_in_pixels is not None:\n", - " ax2.imshow(image)\n", - " ax2.plot(calibrated_gaze_center_in_pixels[0], calibrated_gaze_center_in_pixels[1], '+', c=\"red\", mew=1, ms=20)\n", - " ax2.grid(False)\n", - " ax2.axis(False)\n", - " ax2.set_title(\"Personalized Eye Gaze\")\n", - "else:\n", - " print(f\"Eye gaze center projected to {calibrated_gaze_center_in_pixels}, which is out of camera sensor plane.\")\n", - "\n", - "plt.show()\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "ab51e157", - "metadata": {}, - "source": [ - "## Visualize hand tracking results (2d projections of landmarks, wrist and palm normals) on RGB and SLAM images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c57249d3", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Dict, List, Optional\n", - "\n", - "from projectaria_tools.core.calibration import CameraCalibration, DeviceCalibration\n", - "from projectaria_tools.core.sensor_data import TimeDomain, TimeQueryOptions\n", - "\n", - "time_domain: TimeDomain = TimeDomain.DEVICE_TIME\n", - "time_query_closest: TimeQueryOptions = TimeQueryOptions.CLOSEST\n", - "\n", - "NORMAL_VIS_LEN = 0.05 # meters\n", - "\n", - "# Get stream ids, stream labels, stream timestamps, and camera calibrations for RGB and SLAM cameras\n", - "stream_ids: Dict[str, StreamId] = {\n", - " \"rgb\": StreamId(\"214-1\"),\n", - " \"slam-left\": StreamId(\"1201-1\"),\n", - " \"slam-right\": StreamId(\"1201-2\"),\n", - "}\n", - "stream_labels: Dict[str, str] = {\n", - " key: provider.get_label_from_stream_id(stream_id)\n", - " for key, stream_id in stream_ids.items()\n", - "}\n", - "stream_timestamps_ns: Dict[str, List[int]] = {\n", - " key: provider.get_timestamps_ns(stream_id, time_domain)\n", - " for key, stream_id in stream_ids.items()\n", - "}\n", - "camera_calibrations = {\n", - " key: device_calibration.get_camera_calib(stream_label)\n", - " for key, stream_label in stream_labels.items()\n", - "}\n", - "for key, camera_calibration in camera_calibrations.items():\n", - " assert camera_calibration is not None, f\"no camera calibration for {key}\"\n", - "\n", - "# Get device calibration and transform from device to sensor\n", - "device_calibration = provider.get_device_calibration()\n", - "\n", - "\n", - "def get_T_device_sensor(key: str):\n", - " return device_calibration.get_transform_device_sensor(stream_labels[key])\n", - "\n", - "\n", - "# Get a sample frame for each of the RGB, SLAM left, and SLAM right streams\n", - "sample_timestamp_ns: int = stream_timestamps_ns[\"rgb\"][120]\n", - "sample_frames = {\n", - " key: provider.get_image_data_by_time_ns(\n", - " stream_id, sample_timestamp_ns, time_domain, time_query_closest\n", - " )[0]\n", - " for key, stream_id in stream_ids.items()\n", - "}\n", - "\n", - "# Get the hand tracking pose\n", - "hand_tracking_result = mps_data_provider.get_hand_tracking_result(\n", - " sample_timestamp_ns, time_query_closest\n", - ")\n", - "\n", - "\n", - "# Helper functions for reprojection and plotting\n", - "def get_point_reprojection(\n", - " point_position_device: np.array, key: str\n", - ") -> Optional[np.array]:\n", - " point_position_camera = get_T_device_sensor(key).inverse() @ point_position_device\n", - " point_position_pixel = camera_calibrations[key].project(point_position_camera)\n", - " return point_position_pixel\n", - "\n", - "\n", - "def get_landmark_pixels(key: str) -> np.array:\n", - " left_wrist = None\n", - " left_palm = None\n", - " left_landmarks = None\n", - " right_wrist = None\n", - " right_palm = None\n", - " right_landmarks = None\n", - " left_wrist_normal_tip = None\n", - " left_palm_normal_tip = None\n", - " right_wrist_normal_tip = None\n", - " right_palm_normal_tip = None\n", - " if hand_tracking_result.left_hand:\n", - " left_landmarks = [\n", - " get_point_reprojection(landmark, key)\n", - " for landmark in hand_tracking_result.left_hand.landmark_positions_device\n", - " ]\n", - " left_wrist = get_point_reprojection(\n", - " hand_tracking_result.left_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.WRIST)\n", - " ],\n", - " key,\n", - " )\n", - " left_palm = get_point_reprojection(\n", - " hand_tracking_result.left_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.PALM_CENTER)\n", - " ],\n", - " key,\n", - " )\n", - " if hand_tracking_result.left_hand.wrist_and_palm_normal_device is not None:\n", - " left_wrist_normal_tip = get_point_reprojection(\n", - " hand_tracking_result.left_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.WRIST)\n", - " ]\n", - " + hand_tracking_result.left_hand.wrist_and_palm_normal_device.wrist_normal_device\n", - " * NORMAL_VIS_LEN,\n", - " key,\n", - " )\n", - " left_palm_normal_tip = get_point_reprojection(\n", - " hand_tracking_result.left_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.PALM_CENTER)\n", - " ]\n", - " + hand_tracking_result.left_hand.wrist_and_palm_normal_device.palm_normal_device\n", - " * NORMAL_VIS_LEN,\n", - " key,\n", - " )\n", - " if hand_tracking_result.right_hand:\n", - " right_landmarks = [\n", - " get_point_reprojection(landmark, key)\n", - " for landmark in hand_tracking_result.right_hand.landmark_positions_device\n", - " ]\n", - " right_wrist = get_point_reprojection(\n", - " hand_tracking_result.right_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.WRIST)\n", - " ],\n", - " key,\n", - " )\n", - " right_palm = get_point_reprojection(\n", - " hand_tracking_result.right_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.PALM_CENTER)\n", - " ],\n", - " key,\n", - " )\n", - " if hand_tracking_result.right_hand.wrist_and_palm_normal_device is not None:\n", - " right_wrist_normal_tip = get_point_reprojection(\n", - " hand_tracking_result.right_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.WRIST)\n", - " ]\n", - " + hand_tracking_result.right_hand.wrist_and_palm_normal_device.wrist_normal_device\n", - " * NORMAL_VIS_LEN,\n", - " key,\n", - " )\n", - " right_palm_normal_tip = get_point_reprojection(\n", - " hand_tracking_result.right_hand.landmark_positions_device[\n", - " int(mps.hand_tracking.HandLandmark.PALM_CENTER)\n", - " ]\n", - " + hand_tracking_result.right_hand.wrist_and_palm_normal_device.palm_normal_device\n", - " * NORMAL_VIS_LEN,\n", - " key,\n", - " )\n", - " \n", - " return (\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal_tip,\n", - " left_palm_normal_tip,\n", - " right_wrist_normal_tip,\n", - " right_palm_normal_tip,\n", - " left_landmarks,\n", - " right_landmarks\n", - " )\n", - "\n", - "\n", - "def plot_landmarks_and_connections(\n", - " plt, \n", - " left_landmarks, \n", - " right_landmarks, \n", - " connections, \n", - " img_height\n", - "):\n", - " def plot_point(point, color):\n", - " plt.plot(img_height - 0.5 - point[1], point[0] + 0.5, \".\", c=color, mew=1, ms=5)\n", - " def plot_line(point1, point2, color):\n", - " plt.plot([img_height - 0.5 - point1[1], img_height - 0.5 - point2[1]], [point1[0] + 0.5, point2[0] + 0.5], color=color)\n", - "\n", - " if left_landmarks:\n", - " for left_landmark in left_landmarks:\n", - " if left_landmark is not None:\n", - " plot_point(left_landmark, \"blue\") \n", - " for connection in connections:\n", - " if left_landmarks[int(connection[0])] is not None and left_landmarks[int(connection[1])] is not None:\n", - " plot_line(left_landmarks[int(connection[0])], left_landmarks[int(connection[1])], \"blue\")\n", - " if right_landmarks:\n", - " for right_landmark in right_landmarks:\n", - " if right_landmark is not None:\n", - " plot_point(right_landmark, \"red\") \n", - " for connection in connections:\n", - " if right_landmarks[int(connection[0])] is not None and right_landmarks[int(connection[1])] is not None:\n", - " plot_line(right_landmarks[int(connection[0])], right_landmarks[int(connection[1])], \"red\")\n", - "\n", - "\n", - "def plot_wrists_and_palms(\n", - " plt,\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal_tip,\n", - " left_palm_normal_tip,\n", - " right_wrist_normal_tip,\n", - " right_palm_normal_tip,\n", - " img_height\n", - "):\n", - " def plot_point(point, color):\n", - " plt.plot(img_height - 0.5 - point[1], point[0] + 0.5, \".\", c=color, mew=1, ms=15)\n", - "\n", - " def plot_arrow(point, vector, color):\n", - " plt.arrow(img_height - 0.5 - point[1], point[0] + 0.5, -vector[1], vector[0], color=color)\n", - "\n", - " if left_wrist is not None:\n", - " plot_point(left_wrist, \"blue\")\n", - " if left_palm is not None:\n", - " plot_point(left_palm, \"blue\")\n", - " if right_wrist is not None:\n", - " plot_point(right_wrist, \"red\")\n", - " if right_palm is not None:\n", - " plot_point(right_palm, \"red\")\n", - " if left_wrist_normal_tip is not None and left_wrist is not None:\n", - " plot_arrow(left_wrist, left_wrist_normal_tip - left_wrist, \"blue\")\n", - " if left_palm_normal_tip is not None and left_palm is not None:\n", - " plot_arrow(left_palm, left_palm_normal_tip - left_palm, \"blue\")\n", - " if right_wrist_normal_tip is not None and right_wrist is not None:\n", - " plot_arrow(right_wrist, right_wrist_normal_tip - right_wrist, \"red\")\n", - " if right_palm_normal_tip is not None and right_palm is not None:\n", - " plot_arrow(right_palm, right_palm_normal_tip - right_palm, \"red\")\n", - "\n", - "\n", - "# Display wrist and palm positions on RGB, SLAM left, and SLAM right images\n", - "plt.figure()\n", - "rgb_image = sample_frames[\"rgb\"].to_numpy_array()\n", - "plt.grid(False)\n", - "plt.axis(\"off\")\n", - "plt.imshow(np.rot90(rgb_image, -1))\n", - "(\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal,\n", - " left_palm_normal,\n", - " right_wrist_normal,\n", - " right_palm_normal,\n", - " left_landmarks,\n", - " right_landmarks,\n", - ") = get_landmark_pixels(\"rgb\")\n", - "plot_wrists_and_palms(\n", - " plt,\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal,\n", - " left_palm_normal,\n", - " right_wrist_normal,\n", - " right_palm_normal,\n", - " rgb_image.shape[0]\n", - ")\n", - "plot_landmarks_and_connections(\n", - " plt,\n", - " left_landmarks,\n", - " right_landmarks,\n", - " mps.hand_tracking.kHandJointConnections,\n", - " rgb_image.shape[0]\n", - ")\n", - "\n", - "plt.figure()\n", - "plt.subplot(1, 2, 1)\n", - "slam_left_image = sample_frames[\"slam-left\"].to_numpy_array()\n", - "plt.grid(False)\n", - "plt.axis(\"off\")\n", - "plt.imshow(np.rot90(slam_left_image, -1), cmap=\"gray\", vmin=0, vmax=255)\n", - "(\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal,\n", - " left_palm_normal,\n", - " right_wrist_normal,\n", - " right_palm_normal,\n", - " left_landmarks,\n", - " right_landmarks,\n", - ") = get_landmark_pixels(\"slam-left\")\n", - "plot_wrists_and_palms(\n", - " plt,\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal,\n", - " left_palm_normal,\n", - " right_wrist_normal,\n", - " right_palm_normal,\n", - " slam_left_image.shape[0]\n", - ")\n", - "plot_landmarks_and_connections(\n", - " plt,\n", - " left_landmarks,\n", - " right_landmarks,\n", - " mps.hand_tracking.kHandJointConnections,\n", - " slam_left_image.shape[0]\n", - ")\n", - "\n", - "\n", - "plt.subplot(1, 2, 2)\n", - "slam_right_image = sample_frames[\"slam-right\"].to_numpy_array()\n", - "plt.grid(False)\n", - "plt.axis(\"off\")\n", - "plt.imshow(np.rot90(slam_right_image, -1), interpolation=\"nearest\", cmap=\"gray\")\n", - "(\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal,\n", - " left_palm_normal,\n", - " right_wrist_normal,\n", - " right_palm_normal,\n", - " left_landmarks,\n", - " right_landmarks,\n", - ") = get_landmark_pixels(\"slam-right\")\n", - "plot_wrists_and_palms(\n", - " plt,\n", - " left_wrist,\n", - " left_palm,\n", - " right_wrist,\n", - " right_palm,\n", - " left_wrist_normal,\n", - " left_palm_normal,\n", - " right_wrist_normal,\n", - " right_palm_normal,\n", - " slam_right_image.shape[0]\n", - ")\n", - "plot_landmarks_and_connections(\n", - " plt,\n", - " left_landmarks,\n", - " right_landmarks,\n", - " mps.hand_tracking.kHandJointConnections,\n", - " slam_right_image.shape[0]\n", - ")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "2e7692a2-e2b7-476b-a386-b77c38bed87f", - "metadata": {}, - "source": [ - "## Examples for using the Online calibration from MPS\n", - "\n", - "`online_calibration.jsonl` contains one json online calibration record per line. Each record is a json dict object that contains timestamp metadata and the result of online calibration for the cameras and IMUs. Note that after the v1.1.0 MPS SLAM release, we improved the RGB camera online calibration for time offsets estimation, intrinsics/extrinsics estimation, as well as exposing the image readout time for compensating the rolling shutter effect.\n", - "\n", - "The following example shows how to read the online calibrated parameters, and the difference of an RGB image undistorted with and without online calibration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70514a93-5277-44cb-9fd3-ef05187317c4", - "metadata": {}, - "outputs": [], - "source": [ - "from projectaria_tools.core import calibration\n", - "\n", - "rgb_stream_id = StreamId(\"214-1\")\n", - "rgb_stream_label = provider.get_label_from_stream_id(rgb_stream_id)\n", - "num_rgb_frames = provider.get_num_data(rgb_stream_id)\n", - "rgb_frame = provider.get_image_data_by_index(rgb_stream_id, (int)(num_rgb_frames / 2))\n", - "assert rgb_frame[0] is not None, \"no rgb frame\"\n", - "\n", - "image = rgb_frame[0].to_numpy_array()\n", - "capture_timestamp_ns = rgb_frame[1].capture_timestamp_ns\n", - "\n", - "# get the online calibration RGB projection function\n", - "corrected_rgb_timestamp_ns = mps_data_provider.get_rgb_corrected_timestamp_ns(capture_timestamp_ns)\n", - "\n", - "print(\"The online calibration estimated better timestamp for rgb mid exposure\",\n", - " f\"time is {corrected_rgb_timestamp_ns}ns vs {capture_timestamp_ns}ns with\",\n", - " f\"offset {corrected_rgb_timestamp_ns - capture_timestamp_ns}ns\")\n", - "\n", - "\n", - "# Example API to get the online calibrated RGB pose\n", - "corrected_rgb_pose = mps_data_provider.get_rgb_corrected_closed_loop_pose(capture_timestamp_ns)\n", - "\n", - "# Get the online calibration for rgb camera and undistort the image using it\n", - "online_camera_calibration = mps_data_provider.get_online_calibration(capture_timestamp_ns).get_camera_calib(rgb_stream_label)\n", - "rgb_linear_camera_calibration_online = calibration.get_linear_camera_calibration(\n", - " int(online_camera_calibration.get_image_size()[0]),\n", - " int(online_camera_calibration.get_image_size()[1]),\n", - " online_camera_calibration.get_focal_lengths()[0],\n", - " \"pinhole\",\n", - " online_camera_calibration.get_transform_device_camera(),\n", - ")\n", - "undistort_image_online = calibration.distort_by_calibration(\n", - " image,\n", - " rgb_linear_camera_calibration_online,\n", - " online_camera_calibration,\n", - " )\n", - "\n", - "# get projection function from device factory calibration and undistort the image using it\n", - "device_calibration = provider.get_device_calibration()\n", - "camera_calibration = device_calibration.get_camera_calib(rgb_stream_label)\n", - "rgb_linear_camera_calibration = calibration.get_linear_camera_calibration(\n", - " int(camera_calibration.get_image_size()[0]),\n", - " int(camera_calibration.get_image_size()[1]),\n", - " camera_calibration.get_focal_lengths()[0],\n", - " \"pinhole\",\n", - " camera_calibration.get_transform_device_camera(),\n", - ")\n", - "undistort_image = calibration.distort_by_calibration(\n", - " image,\n", - " rgb_linear_camera_calibration,\n", - " camera_calibration,\n", - " )\n", - "\n", - "# Compute the difference between the two images\n", - "diff_image = undistort_image - undistort_image_online\n", - "# Create a figure with three subplots\n", - "fig, axs = plt.subplots(1, 3, figsize=(12, 4))\n", - "# Plot the first image\n", - "axs[0].imshow(undistort_image)\n", - "axs[0].set_title('Undistorted by the device calibration')\n", - "# Plot the second image\n", - "axs[1].imshow(undistort_image_online)\n", - "axs[1].set_title('Undistorted by the online calibration')\n", - "# Plot the difference image\n", - "axs[2].imshow(diff_image)\n", - "axs[2].set_title('Difference')\n", - "# Show the plot\n", - "plt.show()\n", - "\n", - "# Note that the rgb image read out time is available now for rolling shutter handling\n", - "print(f\"Rgb camera take {online_camera_calibration.get_readout_time_sec()} seconds to read out the image\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/setup.py b/setup.py index d2f02d250..bc53f18d6 100644 --- a/setup.py +++ b/setup.py @@ -159,7 +159,7 @@ def main(): install_requires=[ "numpy", "requests", # Required for datasets downloader - "rerun-sdk>=0.20.0", + "rerun-sdk==0.22.1", "tqdm", ], extras_require={ @@ -171,6 +171,7 @@ def main(): "pillow", "plotly", "scipy", + "rerun-notebook==0.22.1", ## Required for vrs_to_mp4 "moviepy==1.0.3", ] diff --git a/version.txt b/version.txt index 4111d137b..0691e44f7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.0.0rc1 +2.0.0a5