diff --git a/python/tvm/driver/tvmc/autotuner.py b/python/tvm/driver/tvmc/autotuner.py index 53c8f3bdc43d..71ccc8546e8b 100644 --- a/python/tvm/driver/tvmc/autotuner.py +++ b/python/tvm/driver/tvmc/autotuner.py @@ -23,7 +23,7 @@ from urllib.parse import urlparse -from tvm import autotvm +from tvm import autotvm, auto_scheduler from tvm.autotvm.tuner import GATuner from tvm.autotvm.tuner import GridSearchTuner from tvm.autotvm.tuner import RandomTuner @@ -116,12 +116,6 @@ def add_tune_parser(subparsers): default=1000, help="the maximum number of tuning trials to perform", ) - parser.add_argument( - "--tuner", - choices=["ga", "gridsearch", "random", "xgb", "xgb_knob", "xgb-rank"], - default="xgb", - help="type of tuner to use", - ) parser.add_argument( "--tuning-records", metavar="PATH", @@ -133,6 +127,85 @@ def add_tune_parser(subparsers): default=None, help="change the data layout of the whole graph", ) + parser.add_argument( + "--enable-autoscheduler", + help="enable tuning the graph through the autoscheduler", + action="store_true", + ) + + auto_scheduler_group = parser.add_argument_group( + "Autoscheduler options", + "Autoscheduler options, used when --enabled-auto-scheduler is provided", + ) + + auto_scheduler_group.add_argument( + "--cache-line-bytes", + type=int, + default=64, + help="the size of cache line in bytes", + ) + auto_scheduler_group.add_argument( + "--num-cores", + type=int, + default=4, + help="the number of device cores", + ) + auto_scheduler_group.add_argument( + "--vector-unit-bytes", + type=int, + default=16, + help="the width of vector units in bytes", + ) + auto_scheduler_group.add_argument( + "--max-shared-memory-per-block", + type=int, + default=0, + help="the max shared memory per block in bytes", + ) + auto_scheduler_group.add_argument( + "--max-local-memory-per-block", + type=int, + default=0, + help="the max local memory per block in bytes", + ) + auto_scheduler_group.add_argument( + "--max-threads-per-block", + type=int, + default=0, + help="the max number of threads per block", + ) + auto_scheduler_group.add_argument( + "--max-vthread-extent", + type=int, + default=0, + help="the max vthread extent", + ) + auto_scheduler_group.add_argument( + "--warp-size", + type=int, + default=0, + help="the thread numbers of a warp", + ) + auto_scheduler_group.add_argument( + "--include-simple-tasks", + help="whether to extract simple tasks that do not include complicated ops", + action="store_true", + ) + auto_scheduler_group.add_argument( + "--log-estimated-latency", + help="whether to log the estimated latency to the file after tuning a task", + action="store_true", + ) + autotvm_group = parser.add_argument_group( + "autotvm options", + "autotvm options, used when the autoscheduler is not enabled", + ) + autotvm_group.add_argument( + "--tuner", + choices=["ga", "gridsearch", "random", "xgb", "xgb_knob", "xgb-rank"], + default="xgb", + help="type of tuner to use when tuning with autotvm.", + ) # TODO (@leandron) This is a path to a physical file, but # can be improved in future to add integration with a modelzoo # or URL, for example. @@ -147,7 +220,6 @@ def drive_tune(args): args: argparse.Namespace Arguments from command line parser. """ - # extra arguments validation before importing the model, so that obvious errors # are pointed in advance. if args.rpc_tracker: @@ -174,17 +246,9 @@ def drive_tune(args): min_repeat_ms = 0 if target.keys[0] == "cpu" else 1000 logger.debug("Default --min-repeat-ms for this target is %s", min_repeat_ms) - tasks = get_tuning_tasks( - mod=mod, - params=params, - target=target, - target_host=args.target_host, - alter_layout=args.desired_layout, - ) - if args.rpc_tracker: - - runner = autotvm.RPCRunner( + runner_ctor = auto_scheduler.RPCRunner if args.enable_autoscheduler else autotvm.RPCRunner + runner = runner_ctor( key=args.rpc_key, host=rpc_hostname, port=rpc_port, @@ -196,29 +260,75 @@ def drive_tune(args): ) else: logger.info("starting localhost tuning") - runner = autotvm.LocalRunner( + runner_ctor = ( + auto_scheduler.LocalRunner if args.enable_autoscheduler else autotvm.LocalRunner + ) + runner = runner_ctor( number=args.number, repeat=args.repeat, timeout=args.timeout, min_repeat_ms=min_repeat_ms, ) - tuning_option = { - "tuner": args.tuner, - "trials": args.trials, - "early_stopping": args.early_stopping, - "measure_option": autotvm.measure_option( - builder=autotvm.LocalBuilder(build_func="default"), runner=runner - ), - "tuning_records": args.tuning_records, - } - logger.debug(" tuning options: %s", tuning_option) + if args.enable_autoscheduler: + # Specify hardware parameters + hardware_params = auto_scheduler.HardwareParams( + args.num_cores, + args.vector_unit_bytes, + args.cache_line_bytes, + args.max_shared_memory_per_block, + args.max_local_memory_per_block, + args.max_threads_per_block, + args.max_vthread_extent, + args.warp_size, + ) + tasks, weights = autoscheduler_get_tuning_tasks( + mod=mod, + params=params, + target=target, + target_host=args.target_host, + alter_layout=args.desired_layout, + hardware_params=hardware_params, + include_simple_tasks=args.include_simple_tasks, + ) - tune_tasks(tasks, args.output, **tuning_option) + # Create the autoscheduler tuning options + tuning_options = auto_scheduler.TuningOptions( + num_measure_trials=args.trials, + measure_callbacks=[auto_scheduler.RecordToFile(args.output)], + runner=runner, + early_stopping=args.early_stopping, + ) + # Schedule the tasks (i.e., produce a schedule for each task) + schedule_tasks( + tasks, weights, tuning_options, args.tuning_records, args.log_estimated_latency + ) + else: + tasks = autotvm_get_tuning_tasks( + mod=mod, + params=params, + target=target, + target_host=args.target_host, + alter_layout=args.desired_layout, + ) -def get_tuning_tasks(mod, params, target, target_host=None, alter_layout=None): - """Get the tuning tasks for a given relay module. + tuning_option = { + "tuner": args.tuner, + "trials": args.trials, + "early_stopping": args.early_stopping, + "measure_option": autotvm.measure_option( + builder=autotvm.LocalBuilder(build_func="default"), runner=runner + ), + "tuning_records": args.tuning_records, + } + logger.debug(" tuning options: %s", tuning_option) + + tune_tasks(tasks, args.output, **tuning_option) + + +def autotvm_get_tuning_tasks(mod, params, target, target_host=None, alter_layout=None): + """Get the autotvm tuning tasks for a given relay module. Parameters ---------- @@ -253,6 +363,91 @@ def get_tuning_tasks(mod, params, target, target_host=None, alter_layout=None): return tasks +def autoscheduler_get_tuning_tasks( + mod, + params, + target, + target_host=None, + alter_layout=None, + hardware_params=None, + include_simple_tasks=False, +): + """Get the autoscheduler tuning tasks for a given relay module. + + Parameters + ---------- + mod : tvm.relay.Module + The relay module from which to extract tuning tasks. + params : dict + The params for the relay module. + target : tvm.target.Target + The compilation target. + target_host : str, optional + The compilation target for the host. + alter_layout : str, optional + The layout to convert the graph to. Note, the convert layout + pass doesn't currently guarantee the whole of the graph will + be converted to the chosen layout. + hardware_params : Optional[HardwareParams] + Hardware parameters used for the search tasks + + Returns + ------- + tasks : list of autotvm.Tasks + list of tasks to be tuned + weights : List[int] + the weight (i.e. the number of appearance) of extracted tasks + """ + if alter_layout: + mod = common.convert_graph_layout(mod, alter_layout) + + # Extract the tasks + tasks, task_weights = auto_scheduler.extract_tasks( + mod["main"], + params, + target=target, + target_host=target_host, + hardware_params=hardware_params, + include_simple_tasks=include_simple_tasks, + ) + + return tasks, task_weights + + +def schedule_tasks( + tasks, task_weights, tuning_options, tuning_records=None, log_estimated_latency=False +): + """Generate the schedules for the different tasks (i.e., subgraphs) contained in the module. + Store the schedules in a json file that will be used later by the compiler. + + Parameters + ---------- + tasks : list + A list of auto_scheduler.SearchTask to tune. + task_weights : list + The weight (i.e. the number of appearance) of extracted tasks + tuning_options: dict + The options of tuning + tuning_records : str, optional + The json file used to preload the autoscheduler + """ + if not log_estimated_latency: + callbacks = [auto_scheduler.task_scheduler.PrintTableInfo()] + else: + callbacks = [ + auto_scheduler.task_scheduler.PrintTableInfo(), + auto_scheduler.task_scheduler.LogEstimatedLatency(("total_latency.tsv")), + ] + + # Create the scheduler + tuner = auto_scheduler.TaskScheduler( + tasks, task_weights, load_log_file=tuning_records, callbacks=callbacks + ) + + # Tune the tasks + tuner.tune(tuning_options) + + def tune_tasks( tasks, log_file, diff --git a/python/tvm/driver/tvmc/compiler.py b/python/tvm/driver/tvmc/compiler.py index 57071476b073..90b0aceaa17a 100644 --- a/python/tvm/driver/tvmc/compiler.py +++ b/python/tvm/driver/tvmc/compiler.py @@ -23,7 +23,7 @@ from pathlib import Path import tvm -from tvm import autotvm +from tvm import autotvm, auto_scheduler from tvm import relay from tvm.contrib import cc from tvm.contrib import utils @@ -182,10 +182,29 @@ def compile_model( if tuning_records and os.path.exists(tuning_records): logger.debug("tuning records file provided: %s", tuning_records) - with autotvm.apply_history_best(tuning_records): - with tvm.transform.PassContext(opt_level=3): - logger.debug("building relay graph with tuning records") - graph_module = relay.build(mod, tvm_target, params=params, target_host=target_host) + + use_autoscheduler = True + try: + auto_scheduler.load_records(tuning_records) + except tvm._ffi.base.TVMError: + use_autoscheduler = False + + if use_autoscheduler: + with auto_scheduler.ApplyHistoryBest(tuning_records): + with tvm.transform.PassContext( + opt_level=3, config={"relay.backend.use_auto_scheduler": True} + ): + logger.debug("building relay graph with autoscheduler") + graph_module = relay.build( + mod, target=target, params=params, target_host=target_host + ) + else: + with autotvm.apply_history_best(tuning_records): + with tvm.transform.PassContext(opt_level=3): + logger.debug("building relay graph with tuning records") + graph_module = relay.build( + mod, tvm_target, params=params, target_host=target_host + ) else: with tvm.transform.PassContext(opt_level=3): logger.debug("building relay graph (no tuning records provided)") diff --git a/tests/python/driver/tvmc/test_autoscheduler.py b/tests/python/driver/tvmc/test_autoscheduler.py new file mode 100644 index 000000000000..25525eb9ce97 --- /dev/null +++ b/tests/python/driver/tvmc/test_autoscheduler.py @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +import pytest +import os +import tarfile + +from os import path + +from tvm import auto_scheduler +from tvm.driver import tvmc + + +def _get_tasks(model): + mod, params = tvmc.frontends.load_model(model) + tasks, weights = tvmc.autotuner.autoscheduler_get_tuning_tasks(mod, params, "llvm") + return (tasks, weights) + + +def _autoscheduler_test_helper( + model, tmpdir_name, tasks_weights=None, early_stopping=1, tuning_records=None +): + tasks, weights = tasks_weights if tasks_weights else _get_tasks(model) + log_file = os.path.join(tmpdir_name, "autoscheduler.json") + + tuning_options = auto_scheduler.TuningOptions( + num_measure_trials=1, + measure_callbacks=[auto_scheduler.RecordToFile(log_file)], + runner="local", + builder="local", + verbose=0, + early_stopping=early_stopping, + ) + + tvmc.autotuner.schedule_tasks(tasks[:1], weights[:1], tuning_options, tuning_records) + + # testing whether the log file was produced + assert path.exists(log_file), "autoscheduler log file should exist" + + with auto_scheduler.ApplyHistoryBest(log_file) as best: + assert isinstance( + best, auto_scheduler.dispatcher.ApplyHistoryBest + ), "unable to load the best results of tuning" + + return log_file + + +def test_get_tuning_tasks(onnx_resnet50): + pytest.importorskip("onnx") + + tasks, weights = _get_tasks(onnx_resnet50) + expected_task_type = auto_scheduler.SearchTask + + assert type(tasks) is list + assert len(tasks) > 0 + assert all([type(x) is expected_task_type for x in tasks]) is True + + +def test_tune_tasks(onnx_resnet50, tmpdir_factory): + pytest.importorskip("onnx") + + tmpdir_name = tmpdir_factory.mktemp("data") + _autoscheduler_test_helper(onnx_resnet50, tmpdir_name) + + +def test_tune_tasks__tuning_records(onnx_resnet50, tmpdir_factory): + pytest.importorskip("onnx") + + tmpdir_name = tmpdir_factory.mktemp("data") + output_log_phase_1 = _autoscheduler_test_helper(onnx_resnet50, tmpdir_name) + + # Exercises transfer learning by making sure a previous log exists + _autoscheduler_test_helper(onnx_resnet50, tmpdir_name, tuning_records=output_log_phase_1) + + +def test_tune_tasks__no_early_stopping(onnx_resnet50, tmpdir_factory): + pytest.importorskip("onnx") + + tmpdir_name = tmpdir_factory.mktemp("data") + _autoscheduler_test_helper(onnx_resnet50, tmpdir_name, tasks_weights=None, early_stopping=None) + + +def test_tune_tasks__no_tuning_records(onnx_resnet50, tmpdir_factory): + pytest.importorskip("onnx") + + tmpdir_name = tmpdir_factory.mktemp("data") + _autoscheduler_test_helper(onnx_resnet50, tmpdir_name, tasks_weights=None, tuning_records=None) diff --git a/tests/python/driver/tvmc/test_autotuner.py b/tests/python/driver/tvmc/test_autotuner.py index bdad167cfe3a..5ce4ca95c810 100644 --- a/tests/python/driver/tvmc/test_autotuner.py +++ b/tests/python/driver/tvmc/test_autotuner.py @@ -27,7 +27,7 @@ def _get_tasks(model): mod, params = tvmc.frontends.load_model(model) - return tvmc.autotuner.get_tuning_tasks(mod, params, "llvm") + return tvmc.autotuner.autotvm_get_tuning_tasks(mod, params, "llvm") def _get_measure_options():