Skip to content

Commit

Permalink
LightMetal - Initial Host API + Device Capture infra/library and unit…
Browse files Browse the repository at this point in the history
… tests (#17039)

 - This is round 5, builds upon previous 4 merges for LightMetal that brought
   flatbuffer cmake/infra, begin/end APIs, LoadTrace() API, flatbuffer/schema
   serialization/deserialization

 - This adds light-metal Capture support for and instruments with LIGHT_METAL_TRACE_FUNCTION_CALL()
   and LIGHT_METAL_TRACE_FUNCTION_ENTRY() to many popular (not exuahstive) APIs used by unit tests.
   The former TRACE_FUNCTION_ENTRY() is more recent, used to protect against host APIs recursively
   calling other host APIs (only trace top most level). Two macros not always called back-to-back.

 - Support Capture/Replay of the following ~14 host APIs

   EnqueueTrace(), ReplayTrace(), ReleaseTrace()
   CreateBuffer(), EnqueueWriteBuffer(), EnqueueReadBuffer(), DeallocateBuffer
   CreateKernel(), CreateCircularBuffer()
   SetRuntimeArgs(uint32) SetRuntimeArgs(Kernel,RuntimeArgs)
   CreateProgram(), EnqueueProgram()
   Finish()

 - During capture, complex objects like Programs, Kernels, Buffers, CBHandle are assigned
   unique global_id, and referred to by their global_id in capture when used by functions

 - When "Metal Trace" is enabled, don't capture EnqueueProgram(), instead
   inject ReplayTrace(), would be used alongside LoadTrace()

 - Can be optionally disabled at compile time using build_metal.sh --disable-light-metal-trace
   which will set C++ define TT_ENABLE_LIGHT_METAL_TRACE=0 (trace functions become NOP)

 - New Verif APIs LightMetalCompareToCapture() / LightMetalCompareToGolden().
   Put them in lightmetal_capture_utils.hpp instead of host_api.hpp since they are purely
   used at capture time, and not worthy enough to be inside host_api.h since just for verif

 - Test fixture runs capture-only right now, will automatically run binary once replay
   support is merged next.
  • Loading branch information
kmabeeTT committed Feb 5, 2025
1 parent 07fd4df commit 54efb00
Show file tree
Hide file tree
Showing 21 changed files with 1,522 additions and 18 deletions.
1 change: 1 addition & 0 deletions .github/workflows/cpp-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ jobs:
{name: stl, cmd: "./build/test/tt_metal/unit_tests_stl"},
{name: distributed, cmd: "./build/test/tt_metal/distributed/distributed_unit_tests_${{ inputs.arch }} --gtest_filter=MeshDeviceSuite.*"},

{name: lightmetal, cmd: "./build/test/tt_metal/unit_tests_lightmetal"},
{name: dispatch multicmd queue, cmd: "TT_METAL_GTEST_NUM_HW_CQS=2 ./build/test/tt_metal/unit_tests_dispatch_${{ inputs.arch }} --gtest_filter=MultiCommandQueue*Fixture.*"},

{name: ttnn cpp unit tests, cmd: ./build/test/ttnn/unit_tests_ttnn},
Expand Down
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ message(STATUS "Build TT METAL Tests: ${TT_METAL_BUILD_TESTS}")
message(STATUS "Build TTNN Tests: ${TTNN_BUILD_TESTS}")
message(STATUS "Build with Unity builds: ${TT_UNITY_BUILDS}")
message(STATUS "Build with Shared TTNN Sublibraries: ${ENABLE_TTNN_SHARED_SUBLIBS}")
message(STATUS "Build with LightMetal Trace Enabled: ${TT_ENABLE_LIGHT_METAL_TRACE}")

############################################################################################################################

Expand Down Expand Up @@ -232,6 +233,10 @@ add_link_options(
"$<$<BOOL:${ENABLE_UBSAN}>:-fsanitize=undefined>"
)

if(TT_ENABLE_LIGHT_METAL_TRACE)
add_compile_definitions(TT_ENABLE_LIGHT_METAL_TRACE)
endif()

if(ENABLE_CODE_TIMERS)
add_compile_definitions(TT_ENABLE_CODE_TIMERS)
endif()
Expand Down
12 changes: 12 additions & 0 deletions build_metal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ show_help() {
echo " --clean Remove build workspaces."
echo " --build-static-libs Build tt_metal (not ttnn) as a static lib (BUILD_SHARED_LIBS=OFF)"
echo " --disable-unity-builds Disable Unity builds"
echo " --disable-light-metal-trace Disable Light Metal tracing to binary."
echo " --cxx-compiler-path Set path to C++ compiler."
echo " --c-compiler-path Set path to C++ compiler."
echo " --ttnn-shared-sub-libs Use shared libraries for ttnn."
Expand Down Expand Up @@ -58,6 +59,7 @@ build_programming_examples="OFF"
build_tt_train="OFF"
build_static_libs="OFF"
unity_builds="ON"
light_metal_trace="ON"
build_all="OFF"
cxx_compiler_path=""
c_compiler_path=""
Expand Down Expand Up @@ -88,6 +90,7 @@ build-programming-examples
build-tt-train
build-static-libs
disable-unity-builds
disable-light-metal-trace
release
development
debug
Expand Down Expand Up @@ -155,6 +158,8 @@ while true; do
ttnn_shared_sub_libs="ON";;
--disable-unity-builds)
unity_builds="OFF";;
--disable-light-metal-trace)
light_metal_trace="OFF";;
--cxx-compiler-path)
cxx_compiler_path="$2";shift;;
--c-compiler-path)
Expand Down Expand Up @@ -218,6 +223,7 @@ echo "INFO: Install Prefix: $cmake_install_prefix"
echo "INFO: Build tests: $build_tests"
echo "INFO: Enable Unity builds: $unity_builds"
echo "INFO: TTNN Shared sub libs : $ttnn_shared_sub_libs"
echo "INFO: Enable Light Metal Trace: $light_metal_trace"

# Prepare cmake arguments
cmake_args+=("-B" "$build_dir")
Expand Down Expand Up @@ -308,6 +314,12 @@ else
cmake_args+=("-DTT_UNITY_BUILDS=OFF")
fi

if [ "$light_metal_trace" = "ON" ]; then
cmake_args+=("-DTT_ENABLE_LIGHT_METAL_TRACE=ON")
else
cmake_args+=("-DTT_ENABLE_LIGHT_METAL_TRACE=OFF")
fi

if [ "$build_all" = "ON" ]; then
cmake_args+=("-DTT_METAL_BUILD_TESTS=ON")
cmake_args+=("-DTTNN_BUILD_TESTS=ON")
Expand Down
2 changes: 2 additions & 0 deletions tests/tt_metal/tt_metal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/llk)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/perf_microbenchmark)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/stl)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/noc)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lightmetal)

add_custom_target(
metal_tests
Expand All @@ -92,4 +93,5 @@ add_custom_target(
unit_tests_llk
unit_tests_stl
unit_tests_noc
unit_tests_lightmetal
)
23 changes: 23 additions & 0 deletions tests/tt_metal/tt_metal/lightmetal/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
set(UNIT_TESTS_LIGHTMETAL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/test_lightmetal.cpp)

add_executable(unit_tests_lightmetal ${UNIT_TESTS_LIGHTMETAL_SRC})
TT_ENABLE_UNITY_BUILD(unit_tests_lightmetal)

target_link_libraries(unit_tests_lightmetal PUBLIC test_metal_common_libs)

target_include_directories(
unit_tests_lightmetal
PRIVATE
"$<TARGET_PROPERTY:Metalium::Metal,INCLUDE_DIRECTORIES>"
${PROJECT_SOURCE_DIR}/tests
${PROJECT_SOURCE_DIR}/tests/tt_metal/tt_metal/common
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/common
)

set_target_properties(
unit_tests_lightmetal
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY
${PROJECT_BINARY_DIR}/test/tt_metal
)
60 changes: 60 additions & 0 deletions tests/tt_metal/tt_metal/lightmetal/lightmetal_fixture.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "dispatch_fixture.hpp"
#include <tt-metalium/device_impl.hpp>
#include <tt-metalium/hal.hpp>
#include <tt-metalium/host_api.hpp>
#include <tt-metalium/tt_metal.hpp>
#include <circular_buffer_constants.h>
#include <tt-metalium/kernel.hpp>
#include <tt-metalium/tt_backend_api_types.hpp>
#include "command_queue_fixture.hpp"
#include <lightmetal_binary.hpp>

class SingleDeviceLightMetalFixture : public CommandQueueFixture {
protected:
std::string trace_bin_path_;
bool write_bin_to_disk_;

void SetUp() override {
this->validate_dispatch_mode();
this->arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name());
}

void CreateDevice(
const size_t trace_region_size, const bool replay_binary = true, const std::string trace_bin_path = "") {
// Skip writing to disk by default, unless user sets env var for local testing
write_bin_to_disk_ = tt::parse_env("LIGHTMETAL_SAVE_BINARY", false);

// If user didn't provide a specific trace bin path, set a default here based on test name
if (trace_bin_path == "") {
const auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
auto trace_filename = test_info ? std::string(test_info->name()) + ".bin" : "lightmetal_trace.bin";
this->trace_bin_path_ = "/tmp/" + trace_filename;
}

this->create_device(trace_region_size);
LightMetalBeginCapture();
}

// End light metal tracing, write to optional filename and optionally run from binary blob
void TearDown() override {
LightMetalBinary binary = LightMetalEndCapture();

if (binary.is_empty()) {
FAIL() << "Light Metal Binary is empty for test, unexpected.";
}
if (write_bin_to_disk_ && !this->trace_bin_path_.empty() && !binary.is_empty()) {
log_info(tt::LogTest, "Writing light metal binary {} bytes to {}", binary.size(), this->trace_bin_path_);
binary.save_to_file(this->trace_bin_path_);
}

if (!this->IsSlowDispatch()) {
tt::tt_metal::CloseDevice(this->device_);
}
}
};
Loading

0 comments on commit 54efb00

Please sign in to comment.