apache · junrushao · Jun 11, 2022 · Jun 10, 2022 · Jun 10, 2022
diff --git a/python/tvm/meta_schedule/testing/dataset_collect_models.py b/python/tvm/meta_schedule/testing/dataset_collect_models.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=missing-docstring
+
+import argparse
+import os
+from typing import List, Tuple
+
+from tqdm import tqdm  # type: ignore
+from tvm.meta_schedule.testing.relay_workload import get_network
+
+
+# pylint: disable=too-many-branches
+def _build_dataset() -> List[Tuple[str, List[int]]]:
+    network_keys = []
+    for name in [
+        "resnet_18",
+        "resnet_50",
+        "mobilenet_v2",
+        "mobilenet_v3",
+        "wide_resnet_50",
+        "resnext_50",
+        "densenet_121",
+        "vgg_16",
+    ]:
+        for batch_size in [1, 4, 8]:
+            for image_size in [224, 240, 256]:
+                network_keys.append((name, [batch_size, 3, image_size, image_size]))
+    # inception-v3
+    for name in ["inception_v3"]:
+        for batch_size in [1, 2, 4]:
+            for image_size in [299]:
+                network_keys.append((name, [batch_size, 3, image_size, image_size]))
+    # resnet3d
+    for name in ["resnet3d_18"]:
+        for batch_size in [1, 2, 4]:
+            for image_size in [112, 128, 144]:
+                network_keys.append((name, [batch_size, 3, image_size, image_size, 16]))
+    # bert
+    for name in ["bert_tiny", "bert_base", "bert_medium", "bert_large"]:
+        for batch_size in [1, 2, 4]:
+            for seq_length in [64, 128, 256]:
+                network_keys.append((name, [batch_size, seq_length]))
+    # dcgan
+    for name in ["dcgan"]:
+        for batch_size in [1, 4, 8]:
+            for image_size in [64]:
+                network_keys.append((name, [batch_size, 3, image_size, image_size]))
+    return network_keys
+
+
+def main():
+    model_cache_dir = args.model_cache_dir
+    try:
+        os.makedirs(model_cache_dir, exist_ok=True)
+    except OSError:
+        print(f"Directory {model_cache_dir} cannot be created successfully.")
+    keys = _build_dataset()
+    for name, input_shape in tqdm(keys):
+        get_network(name=name, input_shape=input_shape, cache_dir=model_cache_dir)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  # pylint: disable=invalid-name
+    parser.add_argument(
+        "--model_cache_dir",
+        type=str,
+        help="Please provide the full path to the model cache dir.",
+    )
+    args = parser.parse_args()  # pylint: disable=invalid-name
+    main()
diff --git a/python/tvm/meta_schedule/testing/dataset_extract_tasks.py b/python/tvm/meta_schedule/testing/dataset_extract_tasks.py
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=missing-docstring
+
+import argparse
+import glob
+import json
+import os
+
+from tqdm import tqdm  # type: ignore
+import tvm
+from tvm import meta_schedule as ms
+from tvm.ir import save_json
+from tvm.meta_schedule.testing.relay_workload import _load_cache
+from tvm.runtime import load_param_dict
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_cache_dir", type=str, help="Please provide the full path to the model cache dir."
+    )
+    parser.add_argument(
+        "--task_cache_dir", type=str, help="Please provide the full path to save extracted tasks."
+    )
+    parser.add_argument(
+        "--target", type=str, default="cuda", help="Please specify the target hardware for tuning."
+    )
+    return parser.parse_args()
+
+
+# pylint: disable=too-many-locals
+def extract_and_save_tasks(cache_file):
+    """Extract tuning tasks and cache the nonspatial ones in the given directory.
+
+    Parameters
+    ----------
+    cache_file : str
+        The filename of the cached model.
+
+    Returns
+    -------
+    None
+    """
+
+    mod, params_bytearray, _ = _load_cache(args.model_cache_dir, cache_file)
+    params = load_param_dict(params_bytearray)
+    try:
+        extracted_tasks = ms.extract_task_from_relay(mod, target=args.target, params=params)
+    except tvm.error.TVMError as error:
+        print(str(error))
+        return
+    task_cache_path = os.path.join(
+        args.task_cache_dir, cache_file.split(".")[0] + "_extracted_tasks.json"
+    )
+    is_spatial = tvm.get_global_func("tir.schedule.IsSpatialPrimFunc")
+    with open(task_cache_path, "w", encoding="utf8") as file:
+        for i, task in enumerate(extracted_tasks):
+            subgraph = task.dispatched[0]
+            prim_func = subgraph[subgraph.get_global_vars()[0]]
+            if not is_spatial(prim_func):
+                subgraph_str = save_json(subgraph)
+                json_obj = [task.task_name, json.loads(subgraph_str)]
+                json_str = json.dumps(json_obj)
+                assert "\n" not in json_str, "Failed to generate single line string."
+                if i == len(extracted_tasks) - 1:
+                    file.write(json_str)
+                else:
+                    file.write(json_str + "\n")
+
+
+args = _parse_args()  # pylint: disable=invalid-name
+
+
+def main():
+    if not os.path.isdir(args.model_cache_dir):
+        raise Exception("Please provide a correct model cache dir.")
+    try:
+        os.makedirs(args.task_cache_dir, exist_ok=True)
+    except OSError:
+        print(f"Directory {args.task_cache_dir} cannot be created successfully.")
+
+    paths = glob.glob(os.path.join(args.model_cache_dir, "*.json"))  # pylint: disable=invalid-name
+    for path in tqdm(paths):
+        filename = path.split("/")[-1]
+        extract_and_save_tasks(filename)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/tvm/meta_schedule/testing/dataset_sample_candidates.py b/python/tvm/meta_schedule/testing/dataset_sample_candidates.py
@@ -0,0 +1,191 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=missing-docstring
+
+import argparse
+import glob
+import json
+import os
+from typing import List
+
+from tqdm import tqdm  # type: ignore
+import tvm
+from tvm import meta_schedule as ms
+from tvm.ir import load_json
+from tvm.target import Target
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--task_cache_dir", type=str, help="Please provide the full path to the extracted tasks."
+    )
+    parser.add_argument(
+        "--candidate_cache_dir",
+        type=str,
+        help="Please provide the full path to save the sampled candidates.",
+    )
+    parser.add_argument(
+        "--target",
+        type=str,
+        default="nvidia/geforce-rtx-3070",
+        help="Please specify the target hardware for tuning.\
+                    Note: for generating dataset, the hardware does not need to be present.",
+    )
+    parser.add_argument(
+        "--init_population_size",
+        type=int,
+        default=256,
+        help="The initial population size used in evolutionary search.",
+    )
+    parser.add_argument(
+        "--num_samples_per_task",
+        type=int,
+        default=400,
+        help="The number of samples to gather per tuning task.",
+    )
+    parser.add_argument(
+        "--num_trials_per_iter",
+        type=int,
+        default=64,
+        help="The number of trials per iteration in evolutionary search.",
+    )
+    parser.add_argument(
+        "--max_trials_per_task",
+        type=int,
+        default=400,
+        help="The maximum number of trials per task in evolutionary search.",
+    )
+    parser.add_argument(
+        "--max_retry_per_task",
+        type=int,
+        default=10,
+        help="The maximum number of retry attempts allowed.",
+    )
+    parser.add_argument(
+        "--file_group",
+        type=int,
+        default=0,
+        help="To enable running multiple scripts in parallel, files [idx * 10 : (idx + 1) * 10]\
+        in the sorted file list from the given directory will be run.",
+    )
+    return parser.parse_args()
+
+
+# pylint: disable=too-many-locals
+def sample_candidates(task, task_name, model_name):
+    """Randomly sample candidates for a task and save the candidates in the given directory.
+
+    Parameters
+    ----------
+    task : IRModule
+        The initial ir module used for generating the search space.
+    task_name : str
+        The name of the task.
+    model_name : str
+        The name of the model.
+
+    Returns
+    -------
+    None
+    """
+    sample_init_population = tvm.get_global_func(
+        "meta_schedule.SearchStrategyEvolutionarySearchSampleInitPopulation"
+    )
+    evolve_with_cost_model = tvm.get_global_func(
+        "meta_schedule.SearchStrategyEvolutionarySearchEvolveWithCostModel"
+    )
+    strategy = ms.search_strategy.EvolutionarySearch(
+        num_trials_per_iter=args.num_trials_per_iter,
+        max_trials_per_task=args.max_trials_per_task,
+        init_measured_ratio=0.0,
+    )
+    target = Target(args.target)
+    context = ms.TuneContext(
+        mod=task,
+        target=target,
+        space_generator=ms.space_generator.PostOrderApply(),
+        search_strategy=strategy,
+        sch_rules=ms.default_config.schedule_rules(None, target),
+        postprocs=ms.default_config.postproc(None, target),
+        mutator_probs=ms.default_config.mutator_probs(None, target),
+        task_name=task_name,
+    )
+    context.initialize()
+    context.pre_tuning(
+        context.generate_design_space(),
+        database=ms.database.MemoryDatabase(),  # type: ignore
+        cost_model=ms.cost_model.RandomModel(),  # type: ignore
+    )
+
+    all_states: List[tvm.tir.Schedule] = []
+    num_retry, itr = 0, 0
+    states = sample_init_population(strategy, args.init_population_size)
+    while len(all_states) < args.num_samples_per_task and num_retry < args.max_retry_per_task:
+        states = evolve_with_cost_model(strategy, states, len(states))
+        all_states += states
+        if len(states) == 0:
+            states = sample_init_population(strategy, args.init_population_size)
+            num_retry += 1
+        else:
+            num_retry = 0
+        print(f"iter: {itr}, number of states sampled: {len(all_states)}")
+        itr += 1
+    all_states = all_states[: args.num_samples_per_task]
+
+    workload = ms.database.Workload(context.mod)
+    file_path = os.path.join(args.candidate_cache_dir, model_name, task_name + ".json")
+    with open(file_path, "w", encoding="utf8") as file:
+        for i, state in enumerate(all_states):
+            tuning_record = ms.database.TuningRecord(state.trace, workload)
+            json_str = json.dumps(tuning_record.as_json())
+            assert "\n" not in json_str, "Failed to generate single line string."
+            if i == len(all_states) - 1:
+                file.write(json_str)
+            else:
+                file.write(json_str + "\n")
+
+
+args = _parse_args()  # pylint: disable=invalid-name
+
+
+def main():
+    if not os.path.isdir(args.task_cache_dir):
+        raise Exception("Please provide a correct task cache dir.")
+    try:
+        os.makedirs(args.candidate_cache_dir, exist_ok=True)
+    except OSError:
+        print(f"Directory {args.candidate_cache_dir} cannot be created successfully.")
+
+    task_paths = sorted(glob.glob(os.path.join(args.task_cache_dir, "*.json")))[
+        args.file_group * 10 : (args.file_group + 1) * 10
+    ]
+    print(f"Selected models: {task_paths}")
+    for num, task_path in enumerate(task_paths):
+        print(f"Processing model {num} ...")
+        with open(task_path, "rb") as file:
+            tasks = file.readlines()
+        model_name = task_path.split("/")[-1][len("relay-") :][: -len("_extracted_tasks.json")]
+        os.makedirs(os.path.join(args.candidate_cache_dir, model_name), exist_ok=True)
+        for task_str in tqdm(tasks):
+            task_name, task_mod = json.loads(task_str)
+            task_mod = load_json(json.dumps(task_mod))
+            sample_candidates(task_mod, task_name, model_name)
+
+
+if __name__ == "__main__":
+    main()