Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/xpu #1294

Open
wants to merge 18 commits into
base: dev/3.0
Choose a base branch
from
Open

Fix/xpu #1294

5 changes: 5 additions & 0 deletions modules/Nncase.Modules.CPU/Evaluator/CPU/PackedReduce.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ private IRType Visit(ITypeInferenceContext context, PackedReduce target, Distrib
switch (input.NdSBP[i])
{
case SBPSplit { Axis: int ix } when axes.Contains(ix):
if (target.ReduceOp is ReduceOp.Mean)
{
return new InvalidType($"Not support reduce mean for now.");
}

ndsbp[i] = SBP.P(target.ReduceOp);
break;
default:
Expand Down
1,110 changes: 956 additions & 154 deletions modules/Nncase.Modules.CPU/Passes/Distributed/AutoDistributed.cs

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions modules/Nncase.Modules.CPU/Passes/Rules/CPU/BoxingTransform.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nncase.IR;
using Nncase.IR.CPU;
using Nncase.PatternMatch;
using static Nncase.IR.F.NN;

using static Nncase.IR.TypePatternUtility;
using static Nncase.PatternMatch.F.CPU;
using static Nncase.PatternMatch.Utility;

namespace Nncase.Passes.Rules;

[RuleGenerator]
public partial class FoldBoxingConst : RewriteRule<Pattern>
{
/// <inheritdoc/>
public override Pattern Pattern { get; } = IsBoxing(
target_name: "boxing",
_ => true,
IsTensorConst("input"));

private Expr? GetReplace(Boxing boxing, Tensor input)
{
var type = (DistributedType)boxing.NewType;
return new TensorConst(input, type.NdSBP, type.Placement);
}
}

[RuleGenerator]
public partial class UnfoldDistributedConst : RewriteRule<Pattern>
{
/// <inheritdoc/>
public override Pattern Pattern { get; } = IsTensorConst("input");

private Expr? GetReplace(TensorConst input)
{
var type = input.CheckedType;
if (type is DistributedType)
{
return IR.F.CPU.Boxing(input.Value, type);
}

return null;
}
}

[RuleGenerator]
public partial class SplitPartialAndReshardBoxing : RewriteRule<Pattern>
{
/// <inheritdoc/>
public override Pattern Pattern { get; } = IsBoxing(
target_name: "boxing",
call_name: "call",
_ => true,
IsWildcard("input"));

private Expr? GetReplace(Call call, Expr input)
{
if (input.CheckedType is DistributedType it && it.NdSBP.Any(sbp => sbp is SBPPartial) && call.CheckedType is DistributedType ot)
{
var newSBPs = it.NdSBP.Select(sbp => sbp is SBPPartial ? SBP.B : sbp).ToArray();
if (newSBPs.Length != ot.NdSBP.Count || Enumerable.Range(0, newSBPs.Length).Any(i => newSBPs[i] != ot.NdSBP[i]))
{
return IR.F.CPU.Boxing(IR.F.CPU.Boxing(input, new DistributedType(it.TensorType, newSBPs, it.Placement)), ot);
}

return null;
}

return null;
}
}
34 changes: 0 additions & 34 deletions modules/Nncase.Modules.CPU/Passes/Rules/CPU/FoldBoxingConst.cs

This file was deleted.

2 changes: 1 addition & 1 deletion modules/Nncase.Modules.CPU/Targets/CPUTarget.cs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public void RegisterTargetDependentAfterQuantPass(IPassManager passManager, Comp
});
}

passManager.Add<Passes.Distributed.AutoDistributedPass>(true, Kind);
passManager.Add<Passes.Distributed.AutoDistributedPass>(Kind);

passManager.Add<CPUFunctionPartitionPass>();

Expand Down
13 changes: 13 additions & 0 deletions modules/Nncase.Modules.CPU/Targets/CPUTargetOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ public enum NocArchitecture : byte
CrossBar = 1,
}

public enum AutoDistributedSearchStrategy : uint
{
ExpandPartial = 0,
ExpandAll,
NoExpand,
}

public class CpuTargetOptions : ICpuTargetOptions
{
[DisplayName("--model-name")]
Expand Down Expand Up @@ -112,6 +119,12 @@ public class CpuTargetOptions : ICpuTargetOptions
[DefaultValue("")]
public string DistributedScheme { get; set; } = string.Empty;

[DisplayName("--distributed-search-strategy")]
[Description("the distributed search strategy.")]
[DefaultValue(AutoDistributedSearchStrategy.ExpandPartial)]
[CommandLine.FromAmong(AutoDistributedSearchStrategy.ExpandPartial, AutoDistributedSearchStrategy.ExpandAll, AutoDistributedSearchStrategy.NoExpand)]
public AutoDistributedSearchStrategy DistributedSearchStrategy { get; set; } = AutoDistributedSearchStrategy.ExpandPartial;

[DisplayName("--custom-op-scheme")]
[Description("the custom-op scheme path.")]
[DefaultValue("")]
Expand Down
10 changes: 9 additions & 1 deletion modules/Nncase.Modules.CPU/Targets/CPUTargetOptionsCommand.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.
/* This file is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* This file is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */

using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -110,6 +110,11 @@ public CpuTargetOptionsCommand(string name)
description: "the distributed scheme path.",
getDefaultValue: () => string.Empty);
Add(DistributedSchemeOption);
DistributedSearchStrategyOption = new Option<AutoDistributedSearchStrategy>(
name: "--distributed-search-strategy",
description: "the distributed search strategy.",
getDefaultValue: () => AutoDistributedSearchStrategy.ExpandPartial);
Add(DistributedSearchStrategyOption);
CustomOpSchemeOption = new Option<string>(
name: "--custom-op-scheme",
description: "the custom-op scheme path.",
Expand Down Expand Up @@ -145,6 +150,8 @@ public CpuTargetOptionsCommand(string name)

public Option<string> DistributedSchemeOption { get; }

public Option<AutoDistributedSearchStrategy> DistributedSearchStrategyOption { get; }

public Option<string> CustomOpSchemeOption { get; }
}

Expand Down Expand Up @@ -175,6 +182,7 @@ public CpuTargetOptions GetBoundValue(InvocationContext context)
MemoryCapacities = context.ParseResult.GetValueForOption(_cmd.MemoryCapacitiesOption)!.ToArray(),
MemoryBandWidths = context.ParseResult.GetValueForOption(_cmd.MemoryBandWidthsOption)!.ToArray(),
DistributedScheme = context.ParseResult.GetValueForOption(_cmd.DistributedSchemeOption)!,
DistributedSearchStrategy = context.ParseResult.GetValueForOption(_cmd.DistributedSearchStrategyOption)!,
CustomOpScheme = context.ParseResult.GetValueForOption(_cmd.CustomOpSchemeOption)!,
};
}
Expand Down
13 changes: 9 additions & 4 deletions python/_nncase.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from typing import Any, List, BinaryIO, Enum
import numpy


""" This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """
""" This block is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. """
class MemoryAccessArchitecture(Enum):
UMA = 0
NUMA = 1
Expand All @@ -13,9 +13,13 @@ class NocArchitecture(Enum):
class HierarchyKind(Enum):
Parallel = 0
SMT = 1
""" end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """
class AutoDistributedSearchStrategy(Enum):
ExpandPartial = 0
ExpandAll = 1
NoExpand = 2
""" end the auto generated block by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. """

""" This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """
""" This block is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. """
class CpuTargetOptions:
def __init__(self) -> None: ...
ModelName: str
Expand All @@ -32,8 +36,9 @@ class CpuTargetOptions:
MemoryCapacities: List[int]
MemoryBandWidths: List[int]
DistributedScheme: str
DistributedSearchStrategy: AutoDistributedSearchStrategy
CustomOpScheme: str
""" end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """
""" end the auto generated block by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. """

class CompileOptions:
benchmark_only: bool
Expand Down
2 changes: 1 addition & 1 deletion python/nncase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import _nncase
from _nncase import RuntimeTensor, TensorDesc, Simulator, CpuTargetOptions, NocArchitecture, HierarchyKind, MemoryAccessArchitecture
from _nncase import RuntimeTensor, TensorDesc, Simulator, CpuTargetOptions, NocArchitecture, HierarchyKind, MemoryAccessArchitecture, AutoDistributedSearchStrategy


def _initialize():
Expand Down
13 changes: 11 additions & 2 deletions python/nncase/native/ffi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ PYBIND11_MODULE(_nncase, m) {
&shape_bucket_options::fix_var_map));

// clang-format off
/* This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* This block is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */

py::enum_<memory_access_architecture_t>(m, "MemoryAccessArchitecture")
.value("UMA", memory_access_architecture_uma)
Expand All @@ -249,6 +249,11 @@ PYBIND11_MODULE(_nncase, m) {
.value("Parallel", hierarchy_kind_parallel)
.value("SMT", hierarchy_kind_smt);

py::enum_<auto_distributed_search_strategy_t>(m, "AutoDistributedSearchStrategy")
.value("ExpandPartial", auto_distributed_search_strategy_expand_partial)
.value("ExpandAll", auto_distributed_search_strategy_expand_all)
.value("NoExpand", auto_distributed_search_strategy_no_expand);


py::class_<cpu_target_options>(m, "CpuTargetOptions")
.def(py::init())
Expand Down Expand Up @@ -308,12 +313,16 @@ PYBIND11_MODULE(_nncase, m) {
"DistributedScheme",
[]() {},
py::overload_cast<std::string_view>(&cpu_target_options::distributed_scheme))
.def_property(
"DistributedSearchStrategy",
py::overload_cast<>(&cpu_target_options::distributed_search_strategy),
py::overload_cast<auto_distributed_search_strategy_t>(&cpu_target_options::distributed_search_strategy))
.def_property(
"CustomOpScheme",
[]() {},
py::overload_cast<std::string_view>(&cpu_target_options::custom_op_scheme)) ;

/* end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* end the auto generated block by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
// clang-format on

py::class_<calibration_dataset_provider>(m, "CalibrationDatasetProvider")
Expand Down
27 changes: 21 additions & 6 deletions src/Native/include/nncase/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ typedef enum {
} nncase_input_type_t;

// clang-format off
/* This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* This block is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
enum memory_access_architecture_t : uint8_t {
memory_access_architecture_uma = 0,
memory_access_architecture_numa = 1,
Expand All @@ -94,7 +94,12 @@ enum hierarchy_kind_t : uint8_t {
hierarchy_kind_parallel = 0,
hierarchy_kind_smt = 1,
};
/* end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
enum auto_distributed_search_strategy_t : uint8_t {
auto_distributed_search_strategy_expand_partial = 0,
auto_distributed_search_strategy_expand_all = 1,
auto_distributed_search_strategy_no_expand = 2,
};
/* end the auto generated block by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
// clang-format on

typedef struct {
Expand Down Expand Up @@ -241,7 +246,7 @@ typedef struct {
clr_object_handle_t shape_bucket_options, const char *fix_var_map,
size_t fix_var_map_size);
// clang-format off
/* This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* This block is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
clr_object_handle_t (*cpu_target_options_create)();
void (*cpu_target_options_set_model_name)(clr_object_handle_t handle, const char* value, size_t length);
void (*cpu_target_options_set_packing)(clr_object_handle_t handle, uint8_t value);
Expand All @@ -260,8 +265,10 @@ typedef struct {
void (*cpu_target_options_set_memory_capacities)(clr_object_handle_t handle, int32_t* value, size_t shape0);
void (*cpu_target_options_set_memory_band_widths)(clr_object_handle_t handle, int32_t* value, size_t shape0);
void (*cpu_target_options_set_distributed_scheme)(clr_object_handle_t handle, const char* value, size_t length);
uint32_t (*cpu_target_options_get_distributed_search_strategy)(clr_object_handle_t handle);
void (*cpu_target_options_set_distributed_search_strategy)(clr_object_handle_t handle, uint32_t value);
void (*cpu_target_options_set_custom_op_scheme)(clr_object_handle_t handle, const char* value, size_t length);
/* end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* end the auto generated block by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
// clang-format on

clr_object_handle_t (*rtvalue_from_handle)(nncase::value_node *value);
Expand Down Expand Up @@ -505,7 +512,7 @@ class shape_bucket_options : public clr_object_base {
};

// clang-format off
/* This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* This block is generated by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
class cpu_target_options : public clr_object_base {
public:
using clr_object_base::clr_object_base;
Expand Down Expand Up @@ -628,11 +635,19 @@ class cpu_target_options : public clr_object_base {
nncase_clr_api()->cpu_target_options_set_distributed_scheme(obj_.get(), value.data(), value.length());
}

auto_distributed_search_strategy_t distributed_search_strategy() {
return (auto_distributed_search_strategy_t)nncase_clr_api()->cpu_target_options_get_distributed_search_strategy(obj_.get());
}

void distributed_search_strategy(auto_distributed_search_strategy_t value) {
nncase_clr_api()->cpu_target_options_set_distributed_search_strategy(obj_.get(), value);
}

void custom_op_scheme(std::string_view value) {
nncase_clr_api()->cpu_target_options_set_custom_op_scheme(obj_.get(), value.data(), value.length());
}
};
/* end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
/* end the auto generated block by tools/stackvm_gen/CApiGen at 2/10/2025 1:59:47 PM +08:00. */
// clang-format on

class cstream : public clr_object_base {
Expand Down
Loading
Loading