Skip to content

Commit

Permalink
Add perf reporting for ccl async mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Aswinmcw authored and mouliraj-mcw committed Jan 21, 2025
1 parent 51d78f8 commit 223273e
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
69 changes: 69 additions & 0 deletions tests/ttnn/unit_tests/operations/ccl/perf/test_ccl_async_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pytest
import ttnn
from models.utility_functions import skip_for_grayskull
from tests.ttnn.unit_tests.operations.ccl.test_new_all_gather import (
run_all_gather_impl,
)


@skip_for_grayskull("Requires eth connected devices to run")
@pytest.mark.parametrize(
"num_devices, num_links, output_shape, dim, layout",
[
(4, 1, [1, 1, 64, 512], 3, ttnn.TILE_LAYOUT),
(4, 1, [1, 1, 32, 32768], 3, ttnn.TILE_LAYOUT),
(4, 1, [1, 1, 1024, 1024], 3, ttnn.TILE_LAYOUT),
(4, 1, [1, 1, 2048, 16384], 3, ttnn.TILE_LAYOUT),
],
)
@pytest.mark.parametrize(
"input_dtype",
[
ttnn.bfloat16,
ttnn.bfloat8_b,
],
)
@pytest.mark.parametrize(
"mem_config",
[
ttnn.MemoryConfig(buffer_type=ttnn.BufferType.DRAM),
],
)
@pytest.mark.parametrize("num_iters", [20])
@pytest.mark.parametrize("enable_async", [True])
@pytest.mark.parametrize("device_params", [{"trace_region_size": 1824800}], indirect=True)
def test_all_gather_async_t3000(
t3k_mesh_device,
num_devices,
output_shape,
dim,
num_links,
input_dtype,
layout,
mem_config,
num_iters,
use_program_cache,
function_level_defaults,
enable_async,
):
run_all_gather_impl(
t3k_mesh_device,
num_devices,
output_shape,
dim,
num_links,
input_dtype,
layout,
use_program_cache,
function_level_defaults,
all_gather_topology=ttnn.Topology.Ring,
num_iters=num_iters,
enable_async=enable_async,
rand_tensor=True,
mem_config=mem_config,
trace_mode=True,
)
4 changes: 4 additions & 0 deletions tests/ttnn/unit_tests/operations/ccl/test_new_all_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def run_with_trace(
dim,
num_links,
output_mem_config,
enable_persistent_fabric,
multi_device_global_semaphore,
num_iter=20,
subdevice_id=None,
Expand All @@ -82,6 +83,7 @@ def run_with_trace(
memory_config=output_mem_config,
topology=all_gather_topology,
subdevice_id=subdevice_id,
enable_persistent_fabric_mode=enable_persistent_fabric,
)
for d in mesh_device.get_devices():
ttnn.synchronize_device(d)
Expand All @@ -98,6 +100,7 @@ def run_with_trace(
memory_config=output_mem_config,
topology=all_gather_topology,
subdevice_id=subdevice_id,
enable_persistent_fabric_mode=enable_persistent_fabric,
)
ttnn.end_trace_capture(mesh_device, trace_id, cq_id=0)
for d in mesh_device.get_devices():
Expand Down Expand Up @@ -242,6 +245,7 @@ def run_all_gather_impl(
dim,
num_links,
output_mem_config,
enable_persistent_fabric,
multi_device_global_semaphore=ccl_semaphore_handles,
num_iter=num_iters,
subdevice_id=worker_sub_device_id,
Expand Down

0 comments on commit 223273e

Please sign in to comment.