diff --git a/python/tvm/contrib/ethosu/cascader/pareto.py b/python/tvm/contrib/ethosu/cascader/pareto.py index b282cfcbaa43b..3c4dcbc88a45d 100644 --- a/python/tvm/contrib/ethosu/cascader/pareto.py +++ b/python/tvm/contrib/ethosu/cascader/pareto.py @@ -21,8 +21,6 @@ from . import _ffi_api from .plan import Plan -from .proposal import Proposal -from .tensor_config import MemoryRegion def _get_pareto_frontier(costs: List[List[float]]) -> List[bool]: @@ -39,9 +37,3 @@ def _thin_vector(vec: List[Object], max_size: int) -> List[Object]: def _pareto_cull_plans(plans: List[Plan], max_plans: int) -> List[Plan]: return list(_ffi_api.ParetoCullPlans(plans, max_plans)) - - -def pareto_cull_proposals( - proposals: List[Proposal], cascade_region: MemoryRegion, max_proposals: int -) -> List[Proposal]: - return list(_ffi_api.ParetoCullProposals(proposals, cascade_region, max_proposals)) diff --git a/python/tvm/contrib/ethosu/cascader/proposal.py b/python/tvm/contrib/ethosu/cascader/proposal.py index c72653a108312..13184108120e9 100644 --- a/python/tvm/contrib/ethosu/cascader/proposal.py +++ b/python/tvm/contrib/ethosu/cascader/proposal.py @@ -22,51 +22,85 @@ from tvm.runtime import Object from . import _ffi_api -from .graph import Tensor, Part +from .graph import Tensor, Part, CascaderGraph from .tensor_config import TensorConfig, MemoryRegion @tvm._ffi.register_object("contrib.ethosu.cascader.Proposal") class Proposal(Object): - """Proposal class""" + """A class which describes how to schedule a CascaderGraph as a series of disjoint Plans. + + Attributes + ---------- + graph : CascaderGraph + The CascaderGraph to which the Proposal applies. + part_group : FrozenSet[Part] + The Parts which are covered by the Proposal. + plans : List[Plan] + The Plans used in the Proposal. + input_tensor_configs : Dict[Tensor, TensorConfig] + The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan. + cascade_region : MemoryRegion + The MemoryRegion where cascading buffers should be homed. + memory_usage : int + The memory required to execute the Proposal in the cascading MemoryRegion. + cycles : int + The estimated cycles taken to execute the Proposal. + + """ def __init__( self, + graph: CascaderGraph, part_group: FrozenSet[Part], plans: List[Plan], input_tensor_configs: Dict[Tensor, TensorConfig], + cascade_region: MemoryRegion, memory_usage: Dict[MemoryRegion, int], cycles: int, ): self.__init_handle_by_constructor__( _ffi_api.Proposal, + graph, list(part_group), plans, input_tensor_configs, + cascade_region, memory_usage, cycles, ) @property - def graph(self): + def graph(self) -> CascaderGraph: + """The CascaderGraph to which the Proposal applies.""" return self._graph @property - def part_group(self): + def part_group(self) -> FrozenSet[Part]: + """The Parts which are covered by the Proposal.""" return frozenset(self._part_group) @property - def plans(self): + def plans(self) -> List[Plan]: + """The Plans used in the Proposal.""" return list(self._plans) @property - def input_tensor_configs(self): + def input_tensor_configs(self) -> Dict[Tensor, TensorConfig]: + """The TensorConfigs indexed by Tensor in the Proposal which aren't produced by a Plan.""" return dict(self._input_tensor_configs) @property - def memory_usage(self): + def cascade_region(self) -> MemoryRegion: + """The MemoryRegion where cascading buffers should be homed.""" + return self._cascade_region + + @property + def memory_usage(self) -> int: + """The memory required to execute the Proposal in the cascading MemoryRegion.""" return int(self._memory_usage) @property - def cycles(self): + def cycles(self) -> int: + """The estimated cycles taken to execute the Proposal.""" return int(self._cycles) diff --git a/python/tvm/contrib/ethosu/cascader/proposal_generator.py b/python/tvm/contrib/ethosu/cascader/proposal_generator.py index 99361cb60cc7b..d79021a20539e 100644 --- a/python/tvm/contrib/ethosu/cascader/proposal_generator.py +++ b/python/tvm/contrib/ethosu/cascader/proposal_generator.py @@ -29,6 +29,26 @@ def generate_proposals( home_map: Dict[FrozenSet[Part], List[Plan]], options: CascaderOptions, ) -> List[Proposal]: + """Generate Pareto optimal Proposals for a CascaderGraph. + + This algorithm takes a top-down dynamic programming approach to determining how + to optimally combine Plans into Proposals. + + Parameters + ---------- + graph : CascaderGraph + The CascaderGraph to generate Proposals for. + home_map : Dict[FrozenSet[Part], List[Plan]] + The Tensor homing map defining valid memory homes for Tensors. + options : CascaderOptions + The configuration options with which to run the generator. + + Returns + ------ + List[Proposal] + A list of Pareto optimal Proposals. + + """ return list( _ffi_api.GenerateProposals( graph, diff --git a/src/contrib/ethosu/cascader/pareto.cc b/src/contrib/ethosu/cascader/pareto.cc index 21f0994ba3164..52ea729bffa26 100644 --- a/src/contrib/ethosu/cascader/pareto.cc +++ b/src/contrib/ethosu/cascader/pareto.cc @@ -161,12 +161,6 @@ TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullPlans") return Array(ParetoCullPlans(vplans, max_size)); }); -TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullProposals") - .set_body_typed([](Array proposals, int max_size) { - std::vector vproposals(proposals.begin(), proposals.end()); - return Array(ParetoCullProposals(vproposals, max_size)); - }); - } // namespace cascader } // namespace ethosu } // namespace contrib diff --git a/src/contrib/ethosu/cascader/proposal.cc b/src/contrib/ethosu/cascader/proposal.cc index 5a7c88a1a7a7f..e96be3466e107 100644 --- a/src/contrib/ethosu/cascader/proposal.cc +++ b/src/contrib/ethosu/cascader/proposal.cc @@ -23,6 +23,7 @@ #include #include +#include #include #include diff --git a/src/contrib/ethosu/cascader/proposal_generator.cc b/src/contrib/ethosu/cascader/proposal_generator.cc index 3fda0d9921b5e..ce709cbaa657b 100644 --- a/src/contrib/ethosu/cascader/proposal_generator.cc +++ b/src/contrib/ethosu/cascader/proposal_generator.cc @@ -51,13 +51,12 @@ std::unordered_set GetPlanBoundaryConfigs(const Plan& plan) { return boundary_configs; } -bool IsPlanCompatible(const Proposal& proposal, - const std::vector& plan_part_group, +bool IsPlanCompatible(const Proposal& proposal, const std::vector& plan_part_group, const std::unordered_set& plan_boundary_configs) { // Check the Plan Part group is disjoint with the Proposal Part group - for(const auto& plan_part : plan_part_group) { - for(const auto& proposal_part : proposal->GetPartGroup()) { - if(plan_part == proposal_part) { + for (const auto& plan_part : plan_part_group) { + for (const auto& proposal_part : proposal->GetPartGroup()) { + if (plan_part == proposal_part) { return false; } } @@ -126,24 +125,25 @@ Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan, new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage()); int new_cycles = proposal->GetCycles() + plan->GetCycles(); std::vector new_part_group = proposal->GetPartGroup(); - new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), plan->GetPartGroup().end()); + new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), + plan->GetPartGroup().end()); std::sort(new_part_group.begin(), new_part_group.end()); return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs, proposal->GetCascadeRegion(), new_memory_usage, new_cycles); } -std::vector GeneratePartialProposals(const CascaderGraph& graph, const HomeMap& home_map, - const CascaderOptions options, - const std::unordered_map, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, - const std::vector& partial_proposal_group, - std::unordered_map, std::vector>* proposals_by_group) { +std::vector GeneratePartialProposals( + const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions options, + const std::unordered_map, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, + const std::vector& partial_proposal_group, + std::unordered_map, std::vector>* proposals_by_group) { if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) { return proposals_by_group->at(partial_proposal_group); } if (partial_proposal_group.size() == 0) { (*proposals_by_group)[partial_proposal_group] = - std::vector{Proposal(graph, std::vector(), std::vector(), - TensorConfigMap(), options->cascade_region, 0, 0)}; + std::vector{Proposal(graph, std::vector(), std::vector(), + TensorConfigMap(), options->cascade_region, 0, 0)}; } else { Part part = partial_proposal_group.back(); const auto& plans = plans_by_part.at(part); @@ -158,26 +158,26 @@ std::vector GeneratePartialProposals(const CascaderGraph& graph, const // pick the current Plan. std::vector residual_proposal_group; std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(), - std::back_inserter(residual_proposal_group), [&plan](Part value) { - return std::find(plan->GetPartGroup().begin(), - plan->GetPartGroup().end(), + std::back_inserter(residual_proposal_group), [&plan](Part value) { + return std::find(plan->GetPartGroup().begin(), plan->GetPartGroup().end(), value) == plan->GetPartGroup().end(); - }); + }); // std::sort(residual_proposal_group.begin(), residual_proposal_group.end()); - const auto& residual_proposals = GeneratePartialProposals(graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); + const auto& residual_proposals = GeneratePartialProposals( + graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); auto plan_output_tensor = plan->GetOutputConfig()->GetTensor(); ICHECK_LE(plan_output_tensor->GetProducers().size(), 1) << "All tensors must have at most one producer."; for (const auto& residual_proposal : residual_proposals) { if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) { - (*proposals_by_group)[partial_proposal_group].push_back(AddPlanToProposal( - residual_proposal, plan, plan_boundary_configs)); + (*proposals_by_group)[partial_proposal_group].push_back( + AddPlanToProposal(residual_proposal, plan, plan_boundary_configs)); } } } } - (*proposals_by_group)[partial_proposal_group] = ParetoCullProposals( - proposals_by_group->at(partial_proposal_group), options->max_proposals); + (*proposals_by_group)[partial_proposal_group] = + ParetoCullProposals(proposals_by_group->at(partial_proposal_group), options->max_proposals); } return proposals_by_group->at(partial_proposal_group); } @@ -194,7 +194,8 @@ std::vector GenerateProposals(const CascaderGraph& graph, const HomeMa std::vector partial_proposal_group = graph->GetPartOrder(); // A map of Proposals indexed by the Part group they cover std::unordered_map, std::vector> proposals_by_group; - return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, &proposals_by_group); + return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, + &proposals_by_group); } TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals") diff --git a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py index eecf7b561ee2d..5af89a415978b 100644 --- a/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py +++ b/tests/python/contrib/test_ethosu/cascader/test_proposal_generator.py @@ -17,124 +17,122 @@ import pytest from tvm.contrib.ethosu.cascader.proposal_generator import generate_proposals -from .infra import make_simple_home_map, make_options - - -def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph): - graph = TwoConv2DGraph - min_sram = 3700 - max_sram = 11700 - input_configs = 1 - parts = 2 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=4, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 - - -def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph): - graph = BinaryGraph - input_configs = 2 - parts = 3 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=4, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - # assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert proposal.cycles > 0 - - -def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph): - graph = MobileNetv1StartGraph - min_sram = 200000 - max_sram = 1300000 - input_configs = 1 - parts = 8 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=5, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 - - -def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph): - graph = MobileNetv1Graph - min_sram = 200000 - max_sram = 1300000 - input_configs = 1 - parts = 27 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=32, - stripe_factors=5, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 - - -def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph): - graph = MobileNetv2DiamondGraph - min_sram = 370000 - max_sram = 990000 - input_configs = 1 - parts = 5 - home_map = make_simple_home_map(graph, SRAM, FLASH) - options = make_options( - cascade_region=SRAM, - max_proposals=64, - stripe_factors=5, - max_plan_size=10, - ) - - proposals = generate_proposals(graph, home_map, options) - - for proposal in proposals: - assert 0 < len(proposal.plans) <= parts - assert len(proposal.input_tensor_configs) == input_configs - assert len(proposal.part_group) == parts - assert min_sram < proposal.memory_usage < max_sram - assert proposal.cycles > 0 +from .infra import make_simple_home_map, make_options, ethosu_enabled + + +if ethosu_enabled: + + def test_generate_proposals(FLASH, SRAM, TwoConv2DGraph): + graph = TwoConv2DGraph + min_sram = 3700 + max_sram = 11700 + input_configs = 1 + parts = 2 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_binary(FLASH, SRAM, BinaryGraph): + graph = BinaryGraph + input_configs = 2 + parts = 3 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=4, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv1_start(FLASH, SRAM, MobileNetv1StartGraph): + graph = MobileNetv1StartGraph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 8 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv1(FLASH, SRAM, MobileNetv1Graph): + graph = MobileNetv1Graph + min_sram = 200000 + max_sram = 1300000 + input_configs = 1 + parts = 27 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=32, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 + + def test_generate_proposals_mobilenetv2diamond(FLASH, SRAM, MobileNetv2DiamondGraph): + graph = MobileNetv2DiamondGraph + min_sram = 370000 + max_sram = 990000 + input_configs = 1 + parts = 5 + home_map = make_simple_home_map(graph, SRAM, FLASH) + options = make_options( + cascade_region=SRAM, + max_proposals=64, + stripe_factors=5, + max_plan_size=10, + ) + + proposals = generate_proposals(graph, home_map, options) + + for proposal in proposals: + assert 0 < len(proposal.plans) <= parts + assert len(proposal.input_tensor_configs) == input_configs + assert len(proposal.part_group) == parts + assert min_sram < proposal.memory_usage < max_sram + assert proposal.cycles > 0 if __name__ == "__main__":