Skip to content

Commit

Permalink
Fix a subtle uninitialized-memory-read in Buffer::for_each_value() (h…
Browse files Browse the repository at this point in the history
…alide#7330)

* Fix a subtle uninitialized-memory-read in Buffer::for_each_value()

When we flattened dimensions in for_each_value_prep(), we would copy from one past the end, meaning the last element contained uninitialized garbage. (This wasn't noticed as an out-of-bounds read because we overallocated in structure in for_each_value_impl()). This garbage stride was later used to advance ptrs in for_each_value_helper()... but only on the final iteration, so even if the ptr was wrong, it didn't matter, as the ptr was never used again. Under certain MSAN configurations, though, the read would be (correctly) flagged as uninitialized.

This fixes the MSAN bug, and also (slightly) improves the efficiency by returning the post-flattened number of dimensions, potentially reducing the number of iterations f for_each_value_helper() needed.

* Oopsie

* Update HalideBuffer.h

* Update HalideBuffer.h
  • Loading branch information
steven-johnson authored and ardier committed Mar 3, 2024
1 parent e797c91 commit 5443d9b
Showing 1 changed file with 27 additions and 20 deletions.
47 changes: 27 additions & 20 deletions src/runtime/HalideBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -2172,9 +2172,13 @@ class Buffer {
}
}

// Return pair is <new_dimensions, innermost_strides_are_one>
template<int N>
HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
const halide_buffer_t **buffers) {
HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
const halide_buffer_t **buffers) {
const int dimensions = buffers[0]->dimensions;
assert(dimensions > 0);

// Check the buffers all have clean host allocations
for (int i = 0; i < N; i++) {
if (buffers[i]->device) {
Expand All @@ -2188,8 +2192,6 @@ class Buffer {
}
}

const int dimensions = buffers[0]->dimensions;

// Extract the strides in all the dimensions
for (int i = 0; i < dimensions; i++) {
for (int j = 0; j < N; j++) {
Expand Down Expand Up @@ -2219,42 +2221,47 @@ class Buffer {
}
if (flat) {
t[i - 1].extent *= t[i].extent;
for (int j = i; j < d; j++) {
for (int j = i; j < d - 1; j++) {
t[j] = t[j + 1];
}
i--;
d--;
t[d].extent = 1;
}
}

// Note that we assert() that dimensions > 0 above
// (our one-and-only caller will only call us that way)
// so the unchecked access to t[0] should be safe.
bool innermost_strides_are_one = true;
if (dimensions > 0) {
for (int i = 0; i < N; i++) {
innermost_strides_are_one &= (t[0].stride[i] == 1);
}
for (int i = 0; i < N; i++) {
innermost_strides_are_one &= (t[0].stride[i] == 1);
}

return innermost_strides_are_one;
return {d, innermost_strides_are_one};
}

template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
if (dimensions() > 0) {
const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
Buffer<>::for_each_value_task_dim<N> *t =
(Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
(Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
// Move the preparatory code into a non-templated helper to
// save code size.
const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);

Buffer<>::for_each_value_helper(f, dimensions() - 1,
innermost_strides_are_one,
t,
data(), (other_buffers.data())...);
} else {
f(*data(), (*other_buffers.data())...);
auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
if (new_dims > 0) {
Buffer<>::for_each_value_helper(f, new_dims - 1,
innermost_strides_are_one,
t,
data(), (other_buffers.data())...);
return;
}
// else fall thru
}

// zero-dimensional case
f(*data(), (*other_buffers.data())...);
}
// @}

Expand Down

0 comments on commit 5443d9b

Please sign in to comment.