From ffc1fc0116c36ac835145d57ceb2e33246827f92 Mon Sep 17 00:00:00 2001
From: Philipp van Kempen <philipp.van-kempen@tum.de>
Date: Thu, 30 Mar 2023 15:13:08 +0200
Subject: [PATCH] [TVMC] Allow selecting a subset of tasks to be used in `tvmc
 tune` (#12525)

This adds a `--tasks` flag to the `tvmc tune` command to filter the lists of tasks to be tuned. See examples below.

## Motivation

- As auto-tuning can be quite time consuming, it is often desirable to cut down the number of tuned tasks in a session.
- If the tuning session was canceled halfway through, it would be a bad idea to start from scratch. Instead continue with the last untuned task
- Some tasks have more impact on the model performance than others, thus we should be able to train some tasks longer than others

## Examples

1. Use `--task list` to show which tasks are available for tuning
```
$ tvmc tune toycar.tflite -o out.txt --task list
Available Tasks for tuning:
  0. Task(func_name=dense_nopack.x86, args=(('TENSOR', (1, 640), 'int16'), ('TENSOR', (128, 640), 'int...
  1. Task(func_name=dense_pack.x86, args=(('TENSOR', (1, 640), 'int16'), ('TENSOR', (128, 640), 'int16...
  2. Task(func_name=dense_nopack.x86, args=(('TENSOR', (1, 128), 'int16'), ('TENSOR', (128, 128), 'int...
  3. Task(func_name=dense_pack.x86, args=(('TENSOR', (1, 128), 'int16'), ('TENSOR', (128, 128), 'int16...
  4. Task(func_name=dense_nopack.x86, args=(('TENSOR', (1, 128), 'int16'), ('TENSOR', (8, 128), 'int16...
  5. Task(func_name=dense_pack.x86, args=(('TENSOR', (1, 128), 'int16'), ('TENSOR', (8, 128), 'int16')...
  6. Task(func_name=dense_nopack.x86, args=(('TENSOR', (1, 8), 'int16'), ('TENSOR', (128, 8), 'int16')...
  7. Task(func_name=dense_pack.x86, args=(('TENSOR', (1, 8), 'int16'), ('TENSOR', (128, 8), 'int16'), ...
  8. Task(func_name=dense_nopack.x86, args=(('TENSOR', (1, 128), 'int16'), ('TENSOR', (640, 128), 'int...
  9. Task(func_name=dense_pack.x86, args=(('TENSOR', (1, 128), 'int16'), ('TENSOR', (640, 128), 'int16...
```

2. Filter the list of tasks to be tuned:

```
# Only tune a single task (index 5)
tvmc tune toycar.tflite -o out.txt --tasks 5

# Tunes tasks starting with index 6
tvmc tune toycar.tflite -o out.txt --tasks "6-"

# Tune tasks 1,4,5,6,8,9
tvmc tune toycar.tflite -o out.txt --tasks "1,4-6,8-"
```

## Tests
I added a basic unit test for the `filter_tasks` utility in `tests/python/driver/tvmc/test_autotuner.py`.

## Open Questions
- ~~While the (truncated) string representations of AutoTVM tasks are quite helpful to pick the correct tasks, using AutoScheduler the tasks can not really be distinguished from each other (only by index). Is there a way to get similar information from AutoScheduler tasks?~~
---
 gallery/tutorial/tvmc_command_line_driver.py |   5 +
 python/tvm/driver/tvmc/autotuner.py          | 132 +++++++++++++++++--
 tests/python/driver/tvmc/test_autotuner.py   | 102 +++++++++++++-
 3 files changed, 227 insertions(+), 12 deletions(-)

diff --git a/gallery/tutorial/tvmc_command_line_driver.py b/gallery/tutorial/tvmc_command_line_driver.py
index a462e24dc7b8..a20dcb9c96a4 100644
--- a/gallery/tutorial/tvmc_command_line_driver.py
+++ b/gallery/tutorial/tvmc_command_line_driver.py
@@ -412,6 +412,11 @@
 # process, in terms of number of repetitions (``--repeat`` and ``--number``, for example), the tuning
 # algorithm to be used, and so on. Check ``tvmc tune --help`` for more information.
 #
+# In some situations it might be a good idea, to only tune specific tasks (i.e. the most relevant ones)
+# to waste less time tuning simpler workworloads. The flag `--task` offers versatile options to limt
+# the tasks used for tuning, e.g. `--task 20,22` or `--task 16-`. All available tasks can be printed
+# using `--task list`.
+#
 
 ################################################################################
 # Compiling an Optimized Model with Tuning Data
diff --git a/python/tvm/driver/tvmc/autotuner.py b/python/tvm/driver/tvmc/autotuner.py
index 9f730cbf3205..b9d0e3558286 100644
--- a/python/tvm/driver/tvmc/autotuner.py
+++ b/python/tvm/driver/tvmc/autotuner.py
@@ -135,6 +135,11 @@ def add_tune_parser(subparsers, _, json_params):
         help="enable tuning the graph through the AutoScheduler tuner",
         action="store_true",
     )
+    parser.add_argument(
+        "--tasks",
+        default="all",
+        help="which tasks should be tuned, i.e. 0 0,2 3-5 all list",
+    )
 
     auto_scheduler_group = parser.add_argument_group(
         "AutoScheduler options",
@@ -290,10 +295,100 @@ def drive_tune(args):
         include_simple_tasks=args.include_simple_tasks,
         log_estimated_latency=args.log_estimated_latency,
         additional_target_options=reconstruct_target_args(args),
+        tasks_filter=args.tasks,
         **transform_args,
     )
 
 
+def filter_tasks(
+    tasks: Union[List[auto_scheduler.SearchTask], List[autotvm.task.Task]],
+    expr: str,
+):
+    """Utility to filter a list of tasks (AutoTVM or AutoScheduler) based on
+    a user-supplied string expression.
+
+    Parameters
+    ----------
+    tasks: list
+        A list of extracted AutoTVM or AutoScheduler tasks.
+    expr: str
+        User-supplied expression to be used for filtering.
+    """
+    assert isinstance(expr, str), "Expected filter expression of string type"
+    assert len(expr) > 0, "Got empty filter expression"
+
+    # groups of keywords are comma-separated
+    splitted = expr.split(",")
+
+    do_list = False
+    do_filter = False
+    selected = []
+    for item in splitted:
+        if item in ["list", "help"]:
+            do_list = True
+        elif item in ["all"]:
+            selected = list(range(len(tasks)))
+        else:
+            do_filter = True
+            if "-" in item:
+                assert item.count("-") == 1, "Malformed range expression"
+                assert len(item) > 1, "Missing lhs or rhs for range expression"
+                lhs, rhs = item.split("-")[:2]
+                lhs = int(lhs) if lhs else 0
+                rhs = int(rhs) if rhs else len(tasks) - 1
+                assert 0 <= lhs < len(tasks), "Left-hand side expression out of range"
+                assert 0 <= rhs < len(tasks), "Right-hand side expression out of range"
+                selected.extend(list(range(lhs, rhs + 1)))
+            else:
+                assert isinstance(item, str)
+                idx = int(item)
+                assert 0 <= idx < len(tasks), "Task index out of range"
+                selected.append(idx)
+
+    if do_filter:
+        # remove duplicates
+        selected = list(set(selected))
+        tasks = [task for i, task in enumerate(tasks) if i in selected]
+
+    return tasks, do_list
+
+
+def gen_task_list(
+    tasks: Union[List[auto_scheduler.SearchTask], List[autotvm.task.Task]],
+    enable_autoscheduler: bool,
+):
+    """Utility for printing a list of tasks (AutoTVM or AutoScheduler)
+    to the terminal.
+
+    Parameters
+    ----------
+    tasks: list
+        A list of extracted AutoTVM or AutoScheduler tasks.
+    enable_autoscheduler: bool
+        Wether the tasks are extracted with AutoScheduler or AutoTVM.
+    """
+    ret = "Available Tasks for tuning:\n"
+
+    def _trunc_helper(text, length):
+        return text if len(text) < length else text[: length - 3] + "..."
+
+    ret += "\n".join(
+        [
+            "  {}. {}".format(
+                i, _trunc_helper("Unnamed" if len(task.desc) == 0 else task.desc, 100)
+            )
+            if enable_autoscheduler
+            else "  {}. {} (len={})".format(
+                i,
+                _trunc_helper(str(task), 100),
+                "?" if task.config_space is None else len(task.config_space),
+            )
+            for i, task in enumerate(tasks)
+        ]
+    )
+    return ret
+
+
 def tune_model(
     tvmc_model: TVMCModel,
     target: str,
@@ -316,6 +411,7 @@ def tune_model(
     include_simple_tasks: bool = False,
     log_estimated_latency: bool = False,
     additional_target_options: Optional[Dict[str, Dict[str, Any]]] = None,
+    tasks_filter: str = "all",
     desired_layout: Optional[str] = None,
     desired_layout_ops: Optional[List[str]] = None,
     mixed_precision: bool = False,
@@ -376,6 +472,9 @@ def tune_model(
         If using the autoscheduler, write the estimated latency at each step of tuning to file.
     additional_target_options: Optional[Dict[str, Dict[str, Any]]]
         Additional target options in a dictionary to combine with initial Target arguments
+    tasks_filter : str, optional
+        Filter which tasks should be tuned or output a list of the extracted tasks.
+        Examples: 0 0,2 3-5 all list
     desired_layout: str, optional
         Can be one of "NCHW" or "NHWC". When specified, compatible operations in the graph
         will have their layout set to this format. Tasks will then be tuned using this
@@ -391,7 +490,6 @@ def tune_model(
     mixed_precision_acc_type: str
         The accumulation data type to be used while mixed precision.
 
-
     Returns
     -------
     tuning_records : str
@@ -464,7 +562,6 @@ def tune_model(
                 runner = local_server
 
         if enable_autoscheduler:
-
             tasks, weights = autoscheduler_get_tuning_tasks(
                 mod=mod,
                 params=params,
@@ -473,7 +570,27 @@ def tune_model(
                 hardware_params=hardware_params,
                 include_simple_tasks=include_simple_tasks,
             )
+        else:
+            tasks = autotvm_get_tuning_tasks(
+                mod=mod,
+                params=params,
+                target=target,
+                transform_args=transform_args,
+            )
+
+        # Filter extracted tasks by provided user expression
+        if tasks_filter:
+            tasks, do_list = filter_tasks(tasks, tasks_filter)
+            if do_list:
+                print(gen_task_list(tasks, enable_autoscheduler))
+                return None
+        if len(tasks) == 0:
+            logger.info("No tasks have been selected for tuning.")
+            return None
+        else:
+            logger.info("Selected %s tasks for tuning.", len(tasks))
 
+        if enable_autoscheduler:
             # Create the autoscheduler tuning options
             tuning_options = auto_scheduler.TuningOptions(
                 num_measure_trials=trials,
@@ -487,16 +604,9 @@ def tune_model(
             # Schedule the tasks (i.e., produce a schedule for each task)
             schedule_tasks(tasks, weights, tuning_options, prior_records, log_estimated_latency)
         else:
-            tasks = autotvm_get_tuning_tasks(
-                mod=mod,
-                params=params,
-                target=target,
-                transform_args=transform_args,
-            )
-
             # In autotvm, trials is specified per task. We can convert the per-model input
             # provided to per-task trials by dividing by the number of tasks.
-            trials = int(trials / max(len(tasks), 1))
+            trials = int(max(1, trials / max(len(tasks), 1)))
             logger.info("Autotuning with %d trials per task.", trials)
 
             tuning_options = {
@@ -710,7 +820,7 @@ def tune_tasks(
             early_stopping=early_stopping,
             measure_option=measure_option,
             callbacks=[
-                autotvm.callback.progress_bar(trials, prefix=prefix),
+                autotvm.callback.progress_bar(min(trials, len(tsk.config_space)), prefix=prefix),
                 autotvm.callback.log_to_file(log_file),
             ],
         )
diff --git a/tests/python/driver/tvmc/test_autotuner.py b/tests/python/driver/tvmc/test_autotuner.py
index eb6550e40cdc..ce5b888f25dc 100644
--- a/tests/python/driver/tvmc/test_autotuner.py
+++ b/tests/python/driver/tvmc/test_autotuner.py
@@ -24,8 +24,10 @@
 from pathlib import Path
 
 import tvm
-from tvm import autotvm
+import tvm.testing
+from tvm import autotvm, auto_scheduler
 from tvm.driver import tvmc
+from tvm.driver.tvmc.autotuner import filter_tasks, gen_task_list
 
 
 def _get_tasks(model):
@@ -207,3 +209,101 @@ def test_autotune_pass_context(mock_pc, onnx_mnist, tmpdir_factory):
     # AutoTVM overrides the pass context later in the pipeline to disable AlterOpLayout
     assert mock_pc.call_count == 2
     assert mock_pc.call_args_list[0][1]["opt_level"] == 3
+
+
+def test_filter_tasks_valid():
+    filter_tasks(list(range(10)), "list") == ([], True)
+    filter_tasks(list(range(10)), "help") == ([], True)
+    filter_tasks(list(range(10)), "all") == ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], False)
+    filter_tasks(list(range(10)), "5") == ([5], False)
+    filter_tasks(list(range(10)), "1-5") == ([1, 2, 3, 4, 5], False)
+    filter_tasks(list(range(10)), "-5") == ([0, 1, 2, 3, 4, 5], False)
+    filter_tasks(list(range(10)), "6-") == ([6, 7, 8, 9], False)
+    filter_tasks(list(range(10)), "0,1-3,all") == ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], False)
+    filter_tasks(list(range(10)), "0,4-5,9,list") == ([0, 4, 5, 9], True)
+
+
+@pytest.mark.parametrize(
+    "value,err_msg",
+    [
+        ("10", "Task index out of range"),
+        ("5,10", "Task index out of range"),
+        ("1-10", "Right-hand side expression out of range"),
+        ("-10", "Right-hand side expression out of range"),
+        ("-", "Missing lhs or rhs for range expression"),
+        ("-10-", "Malformed range expression"),
+        ("--", "Malformed range expression"),
+    ],
+)
+def test_filter_tasks_invalid(value, err_msg):
+    with pytest.raises(AssertionError, match=err_msg):
+        filter_tasks(list(range(10)), value)
+
+
+@pytest.mark.parametrize(
+    "enable_autoscheduler,expected",
+    [
+        (
+            False,
+            """Available Tasks for tuning:
+  0. Task(func_name=taskA, args=[], kwargs={}, workload=('taskA',)) (len=?)
+  1. Task(func_name=taskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBta... (len=?)
+  2. Task(func_name=taskC, args=[], kwargs={}, workload=('taskC',)) (len=?)""",
+        ),
+        (
+            True,
+            """Available Tasks for tuning:
+  0. taskA
+  1. taskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBtaskBta...
+  2. Unnamed""",
+        ),
+    ],
+)
+def test_print_task_list(enable_autoscheduler, expected):
+    if enable_autoscheduler:
+        auto_scheduler.search_task.TASK_INPUT_BUFFER_TABLE.clear()
+        N = 64
+        target = "llvm"
+        test_input_0 = tvm.runtime.ndarray.empty((64, 64))
+        test_input_1 = tvm.runtime.ndarray.empty((10, 20))
+        test_input_2 = tvm.runtime.ndarray.empty((30, 40, 50))
+        task_inputs = {
+            "test_input_0": test_input_0,
+            "test_input_1": test_input_1,
+            "test_input_2": test_input_2,
+        }
+        task1 = auto_scheduler.SearchTask(
+            func="matmul_auto_scheduler_test",
+            args=(N, N, N),
+            target=target,
+            task_inputs=task_inputs,
+            task_inputs_overwrite=True,
+            desc="taskA",
+        )
+        task2 = auto_scheduler.SearchTask(
+            func="matmul_auto_scheduler_test",
+            args=(N, N, N),
+            target=target,
+            task_inputs=task_inputs,
+            task_inputs_overwrite=True,
+            desc="taskB" * 20,  # very long name
+        )
+        task3 = auto_scheduler.SearchTask(
+            func="matmul_auto_scheduler_test",
+            args=(N, N, N),
+            target=target,
+            task_inputs=task_inputs,
+            task_inputs_overwrite=True,
+            # missing description
+        )
+    else:
+        task1 = autotvm.task.Task("taskA", [])
+        task2 = autotvm.task.Task("taskB" * 20, [])  # very long name
+        task3 = autotvm.task.Task("taskC", [])
+    tasks = [task1, task2, task3]
+    out = gen_task_list(tasks, enable_autoscheduler)
+    assert out == expected
+
+
+if __name__ == "__main__":
+    tvm.testing.main()