feat: upgrade MediaPipe(0.8.0)

homuler · Nov 24, 2020 · 3b58752 · 3b58752
1 parent 2eea5b2
commit 3b58752
Show file tree

Hide file tree

Showing 39 changed files with 288 additions and 1,163 deletions.
diff --git a/Assets/MediaPipe/Examples/Objects/Graph/CPU/MultiHandTrackingGraphCPU.prefab b/Assets/MediaPipe/Examples/Objects/Graph/CPU/MultiHandTrackingGraphCPU.prefab
diff --git a/Assets/MediaPipe/Examples/Objects/Graph/CPU/MultiHandTrackingGraphCPU.prefab.meta b/Assets/MediaPipe/Examples/Objects/Graph/CPU/MultiHandTrackingGraphCPU.prefab.meta
diff --git a/Assets/MediaPipe/Examples/Objects/Graph/GPU/FaceMeshGraphGPU.prefab b/Assets/MediaPipe/Examples/Objects/Graph/GPU/FaceMeshGraphGPU.prefab
@@ -12,7 +12,7 @@ GameObject:
   - component: {fileID: 7004806987071314288}
   - component: {fileID: 7004806987071314290}
   m_Layer: 0
-  m_Name: FaceMeshGraph
+  m_Name: FaceMeshGraphGPU
   m_TagString: Untagged
   m_Icon: {fileID: 0}
   m_NavMeshLayer: 0
@@ -44,7 +44,8 @@ MonoBehaviour:
   m_Script: {fileID: 11500000, guid: 860c2553189c962f597b1b7dc1ee4c82, type: 3}
   m_Name: 
   m_EditorClassIdentifier: 
-  config: {fileID: 4900000, guid: 83097db0ae996a3ae8b65ad6b8db6444, type: 3}
+  config: {fileID: 4900000, guid: daf329e7d9e05b86e87226eba2dd3f32, type: 3}
+  numFaces: 3
 --- !u!114 &7004806987071314290
 MonoBehaviour:
   m_ObjectHideFlags: 0

diff --git a/Assets/MediaPipe/Examples/Objects/Graph/GPU/HandTrackingGraphGPU.prefab b/Assets/MediaPipe/Examples/Objects/Graph/GPU/HandTrackingGraphGPU.prefab
@@ -12,7 +12,7 @@ GameObject:
   - component: {fileID: 6942115883526269424}
   - component: {fileID: 8479715968756539202}
   m_Layer: 0
-  m_Name: MediapipeGraph
+  m_Name: HandTrackingGraphGPU
   m_TagString: Untagged
   m_Icon: {fileID: 0}
   m_NavMeshLayer: 0
@@ -57,11 +57,9 @@ MonoBehaviour:
   m_Script: {fileID: 11500000, guid: d8082593da04f2420974527dc1b29c06, type: 3}
   m_Name: 
   m_EditorClassIdentifier: 
-  handednessPrefab: {fileID: 5340126888248019913, guid: 3144fd50b7773ed768c77b259a74dbe6,
+  handLandmarkListsPrefab: {fileID: 2737917561592217227, guid: 2aac8e2b0b4f821fbbccc5cebab7bceb,
     type: 3}
-  handLandmarkListPrefab: {fileID: 3100741085041498678, guid: 4eb3a6d7cfea73afdb656b7166ae1a8d,
+  palmRectsPrefab: {fileID: 3702375667864819085, guid: 38f44807158015f02bddcfcc8dfe65b7,
     type: 3}
-  handRectPrefab: {fileID: 1405412484651109527, guid: c5c846cc7bbefdfa7af64f9d29912b70,
-    type: 3}
-  palmDetectionsPrefab: {fileID: 6957250898008990224, guid: 8d335567a8544a10e98a42b234b64b13,
+  palmDetectionsPrefab: {fileID: 5670327508126957478, guid: 324d403c9690c972abca928f3e614afc,
     type: 3}
diff --git a/Assets/MediaPipe/Examples/Objects/Graph/GPU/MultiHandTrackingGraphGPU.prefab b/Assets/MediaPipe/Examples/Objects/Graph/GPU/MultiHandTrackingGraphGPU.prefab
diff --git a/Assets/MediaPipe/Examples/Objects/Graph/GPU/MultiHandTrackingGraphGPU.prefab.meta b/Assets/MediaPipe/Examples/Objects/Graph/GPU/MultiHandTrackingGraphGPU.prefab.meta
diff --git a/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt
@@ -12,160 +12,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# CPU buffer. (ImageFrame)
 input_stream: "input_video"
-output_stream: "output_detections"
+
+# Detected faces. (std::vector<Detection>)
+output_stream: "face_detections"
+output_stream: "face_detections_presence"
 
 # Throttles the images flowing downstream for flow control. It passes through
-# the very first incoming image unaltered, and waits for
-# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
-# generating the corresponding detections before it passes through another
-# image. All images that come in while waiting are dropped, limiting the number
-# of in-flight images between this calculator and
-# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
-# from queuing up incoming images and data excessively, which leads to increased
-# latency and memory usage, unwanted in real-time mobile applications. It also
-# eliminates unnecessarily computation, e.g., a transformed image produced by
-# ImageTransformationCalculator may get dropped downstream if the subsequent
-# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
-# processing previous inputs.
+# the very first incoming image unaltered, and waits for downstream nodes
+# (calculators and subgraphs) in the graph to finish their tasks before it
+# passes through another image. All images that come in while waiting are
+# dropped, limiting the number of in-flight images in most part of the graph to
+# 1. This prevents the downstream nodes from queuing up incoming images and data
+# excessively, which leads to increased latency and memory usage, unwanted in
+# real-time mobile applications. It also eliminates unnecessarily computation,
+# e.g., the output produced by a node may get dropped downstream if the
+# subsequent nodes are still busy processing previous inputs.
 node {
   calculator: "FlowLimiterCalculator"
   input_stream: "input_video"
-  input_stream: "FINISHED:detections"
+  input_stream: "FINISHED:face_detections_presence"
   input_stream_info: {
     tag_index: "FINISHED"
     back_edge: true
   }
   output_stream: "throttled_input_video"
 }
 
-# Transforms the input image on CPU to a 128x128 image. To scale the input
-# image, the scale_mode option is set to FIT to preserve the aspect ratio,
-# resulting in potential letterboxing in the transformed image.
-node: {
-  calculator: "ImageTransformationCalculator"
-  input_stream: "IMAGE:throttled_input_video"
-  output_stream: "IMAGE:transformed_input_video_cpu"
-  output_stream: "LETTERBOX_PADDING:letterbox_padding"
-  node_options: {
-    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
-      output_width: 128
-      output_height: 128
-      scale_mode: FIT
-    }
-  }
-}
-
-# Converts the transformed input image on CPU into an image tensor stored as a
-# TfLiteTensor.
-node {
-  calculator: "TfLiteConverterCalculator"
-  input_stream: "IMAGE:transformed_input_video_cpu"
-  output_stream: "TENSORS:image_tensor"
-}
-
-# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
-# vector of tensors representing, for instance, detection boxes/keypoints and
-# scores.
-node {
-  calculator: "TfLiteInferenceCalculator"
-  input_stream: "TENSORS:image_tensor"
-  output_stream: "TENSORS:detection_tensors"
-  node_options: {
-    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
-      model_path: "mediapipe/models/face_detection_front.tflite"
-    }
-  }
-}
-
-# Generates a single side packet containing a vector of SSD anchors based on
-# the specification in the options.
-node {
-  calculator: "SsdAnchorsCalculator"
-  output_side_packet: "anchors"
-  node_options: {
-    [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
-      num_layers: 4
-      min_scale: 0.1484375
-      max_scale: 0.75
-      input_size_height: 128
-      input_size_width: 128
-      anchor_offset_x: 0.5
-      anchor_offset_y: 0.5
-      strides: 8
-      strides: 16
-      strides: 16
-      strides: 16
-      aspect_ratios: 1.0
-      fixed_anchor_size: true
-    }
-  }
-}
-
-# Decodes the detection tensors generated by the TensorFlow Lite model, based on
-# the SSD anchors and the specification in the options, into a vector of
-# detections. Each detection describes a detected object.
-node {
-  calculator: "TfLiteTensorsToDetectionsCalculator"
-  input_stream: "TENSORS:detection_tensors"
-  input_side_packet: "ANCHORS:anchors"
-  output_stream: "DETECTIONS:detections"
-  node_options: {
-    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
-      num_classes: 1
-      num_boxes: 896
-      num_coords: 16
-      box_coord_offset: 0
-      keypoint_coord_offset: 4
-      num_keypoints: 6
-      num_values_per_keypoint: 2
-      sigmoid_score: true
-      score_clipping_thresh: 100.0
-      reverse_output_order: true
-      x_scale: 128.0
-      y_scale: 128.0
-      h_scale: 128.0
-      w_scale: 128.0
-      min_score_thresh: 0.5
-    }
-  }
-}
-
-# Performs non-max suppression to remove excessive detections.
+# Subgraph that detects faces.
 node {
-  calculator: "NonMaxSuppressionCalculator"
-  input_stream: "detections"
-  output_stream: "filtered_detections"
-  node_options: {
-    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
-      min_suppression_threshold: 0.3
-      overlap_type: INTERSECTION_OVER_UNION
-      algorithm: WEIGHTED
-      return_empty_detections: true
-    }
-  }
-}
-
-# Maps detection label IDs to the corresponding label text ("Face"). The label
-# map is provided in the label_map_path option.
-node {
-  calculator: "DetectionLabelIdToTextCalculator"
-  input_stream: "filtered_detections"
-  output_stream: "labeled_detections"
-  node_options: {
-    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
-      label_map_path: "mediapipe/models/face_detection_front_labelmap.txt"
-    }
-  }
+  calculator: "FaceDetectionFrontCpu"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "DETECTIONS:face_detections"
 }
 
-# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
-# letterboxed image (after image transformation with the FIT scale mode) to the
-# corresponding locations on the same image with the letterbox removed (the
-# input image to the graph before image transformation).
 node {
-  calculator: "DetectionLetterboxRemovalCalculator"
-  input_stream: "DETECTIONS:labeled_detections"
-  input_stream: "LETTERBOX_PADDING:letterbox_padding"
-  output_stream: "DETECTIONS:output_detections"
+  calculator: "PacketPresenceCalculator"
+  input_stream: "PACKET:face_detections"
+  output_stream: "PRESENCE:face_detections_presence"
 }