Skip to content

Commit

Permalink
[AOT][Stack Allocator] Fix Initial Memory Misalignment (apache#8487)
Browse files Browse the repository at this point in the history
* add flag

* fix and test

* format

* fix memory memory_align function

* fix and address comments

* format

* fix crt aot test

* comments

* fix test

* trigger

* trigger

* trigger

* trigger

* trigger

Co-authored-by: Mehrdad Hessar <[email protected]>
  • Loading branch information
2 people authored and ylc committed Sep 29, 2021
1 parent a4ddd26 commit a7b0770
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 31 deletions.
13 changes: 10 additions & 3 deletions src/runtime/crt/memory/stack_allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,15 @@ tvm_crt_error_t StackMemoryManager_Free(tvm_workspace_t* tvm_runtime_workspace,

tvm_crt_error_t StackMemoryManager_Init(tvm_workspace_t* tvm_runtime_workspace,
uint8_t* g_aot_memory, size_t workspace_size) {
tvm_runtime_workspace->next_alloc = g_aot_memory;
tvm_runtime_workspace->workspace = g_aot_memory;
tvm_runtime_workspace->workspace_size = workspace_size;
// We need to round up g_aot_memory in case it is not aligned to
// TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES.
uintptr_t unaligned_mask = TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1;
uint8_t* memory_aligned =
(uint8_t*)(((uintptr_t)g_aot_memory + unaligned_mask) & ~unaligned_mask);
uint32_t offset = (uintptr_t)(memory_aligned - g_aot_memory);

tvm_runtime_workspace->next_alloc = memory_aligned;
tvm_runtime_workspace->workspace = memory_aligned;
tvm_runtime_workspace->workspace_size = workspace_size - offset;
return kTvmErrorNoError;
}
115 changes: 93 additions & 22 deletions tests/crt/aot_memory_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/

#include <gtest/gtest.h>
#include <tvm/runtime/crt/stack_allocator.h>

Expand All @@ -24,83 +25,126 @@

// Check with LIFO checks enabled for stack allocator
#define TVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK

// Number of memory misalignment in bytes
#define NUM_MEMORY_MISALIGNMENT_BYTES 1

/*!
* Align memory pointer.
* This function modifies memory_ptr to adjust alignment.
* \return Number of memory offset.
*/
static uint32_t align_pointer(uint8_t** memory_ptr) {
uint32_t extra = (uintptr_t)(*memory_ptr) % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES;
uint32_t offset =
(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - extra) & (TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1);
*memory_ptr += offset;
return offset;
}

/*!
* Add misalignment to memory pointer.
* This function modifies memory_ptr.
* \return Number of memory offset.
*/
static uint32_t misalign_pointer(uint8_t** memory_ptr) {
uint32_t extra = (uintptr_t)(*memory_ptr) % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES;
if (extra == 0) {
*memory_ptr += NUM_MEMORY_MISALIGNMENT_BYTES;
return 1;
}
return 0;
}

/*
* Tests allocations are properly aligned when allocated
* Tests allocations are properly aligned when allocated.
*/
TEST(AOTMemory, Allocate) {
static uint8_t model_memory[96];
static uint8_t model_memory[128];
tvm_workspace_t tvm_runtime_workspace;
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 96), kTvmErrorNoError);
void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 2, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);

void* two_blocks = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 24, &two_blocks, 1),
kTvmErrorNoError);
ASSERT_EQ(two_blocks, &model_memory[32 + 2 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(two_blocks, &model_memory_ptr[32 + 2 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);

void* block_three = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_three, 1),
kTvmErrorNoError);
ASSERT_EQ(block_three, &model_memory[64 + 3 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_three, &model_memory_ptr[64 + 3 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
}

/*
* Tests resetting the stack after dealloc
* Tests resetting the stack after dealloc.
*/
TEST(AOTMemory, Free) {
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 80), kTvmErrorNoError);
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(kTvmErrorNoError, StackMemoryManager_Free_Body(&tvm_runtime_workspace, block_two, 1));

void* two_blocks = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 2, &two_blocks, 1),
kTvmErrorNoError);
ASSERT_EQ(two_blocks, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(two_blocks, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(kTvmErrorNoError, StackMemoryManager_Free_Body(&tvm_runtime_workspace, two_blocks, 1));

void* block_three = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_three, 1),
kTvmErrorNoError);
ASSERT_EQ(block_three, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_three, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
}

/*
* Tests we return NULL if we over allocate
* Tests we return NULL if we over allocate.
*/
TEST(AOTMemory, OverAllocate) {
static uint8_t model_memory[72];
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 80), kTvmErrorNoError);
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);

void* two_blocks = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 64, &two_blocks, 1),
Expand All @@ -109,27 +153,54 @@ TEST(AOTMemory, OverAllocate) {
}

/*
* Test for out-of-order memory deallocation
* Test for out-of-order memory deallocation.
*/
TEST(AOTMemory, FreeOutOfOrder) {
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 80), kTvmErrorNoError);
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);

ASSERT_EQ(StackMemoryManager_Free_Body(&tvm_runtime_workspace, block_one, 1),
kTvmErrorPlatformStackAllocBadFree);
}

/*
* Test for initial memory misalignment.
*/
TEST(AOTMemory, InitialMemoryMisAlignment) {
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
uint8_t* model_memory_ptr = model_memory;

// Add misaslignment to memory pointer
uint32_t offset = misalign_pointer(&model_memory_ptr);

// Calculate expected offset
uint8_t* misaligned_ptr = model_memory_ptr;
uint32_t alignment_offset = align_pointer(&misaligned_ptr);

ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

ASSERT_EQ(tvm_runtime_workspace.next_alloc, &model_memory_ptr[alignment_offset]);
ASSERT_EQ(tvm_runtime_workspace.workspace_size, sizeof(model_memory) - offset - alignment_offset);
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
testing::FLAGS_gtest_death_test_style = "threadsafe";
Expand Down
16 changes: 10 additions & 6 deletions tests/python/relay/aot/aot_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def emit_main_network_definition(main_file, mod_name):


def emit_main_prologue(main_file, workspace_bytes):
main_file.write(f"#define WORKSPACE_SIZE ({workspace_bytes})\n")
# Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment.
main_file.write(
f"#define WORKSPACE_SIZE ({workspace_bytes} + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n"
)
main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
main_file.write("tvm_workspace_t app_workspace;\n")
main_file.write(
Expand All @@ -125,9 +128,8 @@ def emit_main_prologue(main_file, workspace_bytes):
void TVMLogf(const char* msg, ...) { }
TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
int main(){\n
"""
int main(){\n
"""
)


Expand Down Expand Up @@ -157,6 +159,7 @@ def emit_main_run(main_file, input_list, output_list, mod_name):
main_file.write(
f'tvm_runtime_run(&{mangle_name(mod_name,"network")}, {mangle_name(mod_name,"inputs")}, {mangle_name(mod_name,"outputs")});'
)
main_file.write("\n")


def emit_main_compare(main_file, output_list, mod_name):
Expand All @@ -165,17 +168,18 @@ def emit_main_compare(main_file, output_list, mod_name):
main_file.write(f'for (int i = 0; i<{mangle_name(mod_name,"output_data")}{i}_len; i++){{\n')
if is_float_dtype:
main_file.write(
f'if (fabs({mangle_name(mod_name,"output_data")}{i}[i]-{mangle_name(mod_name,"expected_output_data")}{i}[i]) > 0.001f){{printf("ko\\n");return -1;}}\n'
f'if (fabs({mangle_name(mod_name,"output_data")}{i}[i]-{mangle_name(mod_name,"expected_output_data")}{i}[i]) > 0.001f){{\n\tprintf("ko\\n");\n\treturn -1;}}\n'
)
else:
main_file.write(
f'if ({mangle_name(mod_name,"output_data")}{i}[i]!={mangle_name(mod_name, "expected_output_data")}{i}[i]){{printf("ko\\n");return -1;}}\n'
f'if ({mangle_name(mod_name,"output_data")}{i}[i]!={mangle_name(mod_name, "expected_output_data")}{i}[i]){{\n\tprintf("ko\\n");\n\treturn -1;}}\n'
)
main_file.write("}\n")


def emit_main_init_memory_manager(main_file):
main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
main_file.write("\n")


def emit_main_epilogue(main_file):
Expand Down

0 comments on commit a7b0770

Please sign in to comment.