From 5443d9bbc3c20f91683498327efb858a78e6a315 Mon Sep 17 00:00:00 2001
From: Steven Johnson <srj@google.com>
Date: Thu, 9 Feb 2023 16:22:10 -0800
Subject: [PATCH] Fix a subtle uninitialized-memory-read in
 Buffer::for_each_value() (#7330)

* Fix a subtle uninitialized-memory-read in Buffer::for_each_value()

When we flattened dimensions in for_each_value_prep(), we would copy from one past the end, meaning the last element contained uninitialized garbage. (This wasn't noticed as an out-of-bounds read because we overallocated in structure in for_each_value_impl()). This garbage stride was later used to advance ptrs in for_each_value_helper()... but only on the final iteration, so even if the ptr was wrong, it didn't matter, as the ptr was never used again. Under certain MSAN configurations, though, the read would be (correctly) flagged as uninitialized.

This fixes the MSAN bug, and also (slightly) improves the efficiency by returning the post-flattened number of dimensions, potentially reducing the number of iterations f for_each_value_helper() needed.

* Oopsie

* Update HalideBuffer.h

* Update HalideBuffer.h
---
 src/runtime/HalideBuffer.h | 47 ++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h
index d39f5461c9f4..5416e8bab8d8 100644
--- a/src/runtime/HalideBuffer.h
+++ b/src/runtime/HalideBuffer.h
@@ -2172,9 +2172,13 @@ class Buffer {
         }
     }
 
+    // Return pair is <new_dimensions, innermost_strides_are_one>
     template<int N>
-    HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
-                                                        const halide_buffer_t **buffers) {
+    HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
+                                                                        const halide_buffer_t **buffers) {
+        const int dimensions = buffers[0]->dimensions;
+        assert(dimensions > 0);
+
         // Check the buffers all have clean host allocations
         for (int i = 0; i < N; i++) {
             if (buffers[i]->device) {
@@ -2188,8 +2192,6 @@ class Buffer {
             }
         }
 
-        const int dimensions = buffers[0]->dimensions;
-
         // Extract the strides in all the dimensions
         for (int i = 0; i < dimensions; i++) {
             for (int j = 0; j < N; j++) {
@@ -2219,42 +2221,47 @@ class Buffer {
             }
             if (flat) {
                 t[i - 1].extent *= t[i].extent;
-                for (int j = i; j < d; j++) {
+                for (int j = i; j < d - 1; j++) {
                     t[j] = t[j + 1];
                 }
                 i--;
                 d--;
-                t[d].extent = 1;
             }
         }
 
+        // Note that we assert() that dimensions > 0 above
+        // (our one-and-only caller will only call us that way)
+        // so the unchecked access to t[0] should be safe.
         bool innermost_strides_are_one = true;
-        if (dimensions > 0) {
-            for (int i = 0; i < N; i++) {
-                innermost_strides_are_one &= (t[0].stride[i] == 1);
-            }
+        for (int i = 0; i < N; i++) {
+            innermost_strides_are_one &= (t[0].stride[i] == 1);
         }
 
-        return innermost_strides_are_one;
+        return {d, innermost_strides_are_one};
     }
 
     template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
     void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
         if (dimensions() > 0) {
+            const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
             Buffer<>::for_each_value_task_dim<N> *t =
-                (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
+                (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
             // Move the preparatory code into a non-templated helper to
             // save code size.
             const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
-            bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
-
-            Buffer<>::for_each_value_helper(f, dimensions() - 1,
-                                            innermost_strides_are_one,
-                                            t,
-                                            data(), (other_buffers.data())...);
-        } else {
-            f(*data(), (*other_buffers.data())...);
+            auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
+            if (new_dims > 0) {
+                Buffer<>::for_each_value_helper(f, new_dims - 1,
+                                                innermost_strides_are_one,
+                                                t,
+                                                data(), (other_buffers.data())...);
+                return;
+            }
+            // else fall thru
         }
+
+        // zero-dimensional case
+        f(*data(), (*other_buffers.data())...);
     }
     // @}