Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MetaSchedule] Generate MetaSchedule Dataset #11641

Merged
merged 2 commits into from
Jun 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions python/tvm/meta_schedule/testing/dataset_collect_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=missing-docstring

import argparse
import os
from typing import List, Tuple

from tqdm import tqdm # type: ignore
from tvm.meta_schedule.testing.relay_workload import get_network


# pylint: disable=too-many-branches
def _build_dataset() -> List[Tuple[str, List[int]]]:
network_keys = []
for name in [
"resnet_18",
"resnet_50",
"mobilenet_v2",
"mobilenet_v3",
"wide_resnet_50",
"resnext_50",
"densenet_121",
"vgg_16",
]:
for batch_size in [1, 4, 8]:
for image_size in [224, 240, 256]:
network_keys.append((name, [batch_size, 3, image_size, image_size]))
# inception-v3
for name in ["inception_v3"]:
for batch_size in [1, 2, 4]:
for image_size in [299]:
network_keys.append((name, [batch_size, 3, image_size, image_size]))
# resnet3d
for name in ["resnet3d_18"]:
for batch_size in [1, 2, 4]:
for image_size in [112, 128, 144]:
network_keys.append((name, [batch_size, 3, image_size, image_size, 16]))
# bert
for name in ["bert_tiny", "bert_base", "bert_medium", "bert_large"]:
for batch_size in [1, 2, 4]:
for seq_length in [64, 128, 256]:
network_keys.append((name, [batch_size, seq_length]))
# dcgan
for name in ["dcgan"]:
for batch_size in [1, 4, 8]:
for image_size in [64]:
network_keys.append((name, [batch_size, 3, image_size, image_size]))
return network_keys


def main():
model_cache_dir = args.model_cache_dir
try:
os.makedirs(model_cache_dir, exist_ok=True)
except OSError:
print(f"Directory {model_cache_dir} cannot be created successfully.")
keys = _build_dataset()
for name, input_shape in tqdm(keys):
get_network(name=name, input_shape=input_shape, cache_dir=model_cache_dir)


if __name__ == "__main__":
parser = argparse.ArgumentParser() # pylint: disable=invalid-name
parser.add_argument(
"--model_cache_dir",
type=str,
help="Please provide the full path to the model cache dir.",
)
args = parser.parse_args() # pylint: disable=invalid-name
main()
104 changes: 104 additions & 0 deletions python/tvm/meta_schedule/testing/dataset_extract_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=missing-docstring

import argparse
import glob
import json
import os

from tqdm import tqdm # type: ignore
import tvm
from tvm import meta_schedule as ms
from tvm.ir import save_json
from tvm.meta_schedule.testing.relay_workload import _load_cache
from tvm.runtime import load_param_dict


def _parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_cache_dir", type=str, help="Please provide the full path to the model cache dir."
)
parser.add_argument(
"--task_cache_dir", type=str, help="Please provide the full path to save extracted tasks."
)
parser.add_argument(
"--target", type=str, default="cuda", help="Please specify the target hardware for tuning."
)
return parser.parse_args()


# pylint: disable=too-many-locals
def extract_and_save_tasks(cache_file):
"""Extract tuning tasks and cache the nonspatial ones in the given directory.

Parameters
----------
cache_file : str
The filename of the cached model.

Returns
-------
None
"""

mod, params_bytearray, _ = _load_cache(args.model_cache_dir, cache_file)
params = load_param_dict(params_bytearray)
try:
extracted_tasks = ms.extract_task_from_relay(mod, target=args.target, params=params)
except tvm.error.TVMError as error:
print(str(error))
return
task_cache_path = os.path.join(
args.task_cache_dir, cache_file.split(".")[0] + "_extracted_tasks.json"
)
is_spatial = tvm.get_global_func("tir.schedule.IsSpatialPrimFunc")
with open(task_cache_path, "w", encoding="utf8") as file:
for i, task in enumerate(extracted_tasks):
subgraph = task.dispatched[0]
prim_func = subgraph[subgraph.get_global_vars()[0]]
if not is_spatial(prim_func):
subgraph_str = save_json(subgraph)
json_obj = [task.task_name, json.loads(subgraph_str)]
json_str = json.dumps(json_obj)
assert "\n" not in json_str, "Failed to generate single line string."
if i == len(extracted_tasks) - 1:
file.write(json_str)
else:
file.write(json_str + "\n")


args = _parse_args() # pylint: disable=invalid-name


def main():
if not os.path.isdir(args.model_cache_dir):
raise Exception("Please provide a correct model cache dir.")
try:
os.makedirs(args.task_cache_dir, exist_ok=True)
except OSError:
print(f"Directory {args.task_cache_dir} cannot be created successfully.")

paths = glob.glob(os.path.join(args.model_cache_dir, "*.json")) # pylint: disable=invalid-name
for path in tqdm(paths):
filename = path.split("/")[-1]
extract_and_save_tasks(filename)


if __name__ == "__main__":
main()
191 changes: 191 additions & 0 deletions python/tvm/meta_schedule/testing/dataset_sample_candidates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=missing-docstring

import argparse
import glob
import json
import os
from typing import List

from tqdm import tqdm # type: ignore
import tvm
from tvm import meta_schedule as ms
from tvm.ir import load_json
from tvm.target import Target


def _parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--task_cache_dir", type=str, help="Please provide the full path to the extracted tasks."
)
parser.add_argument(
"--candidate_cache_dir",
type=str,
help="Please provide the full path to save the sampled candidates.",
)
parser.add_argument(
"--target",
type=str,
default="nvidia/geforce-rtx-3070",
help="Please specify the target hardware for tuning.\
Note: for generating dataset, the hardware does not need to be present.",
)
parser.add_argument(
"--init_population_size",
type=int,
default=256,
help="The initial population size used in evolutionary search.",
)
parser.add_argument(
"--num_samples_per_task",
type=int,
default=400,
help="The number of samples to gather per tuning task.",
)
parser.add_argument(
"--num_trials_per_iter",
type=int,
default=64,
help="The number of trials per iteration in evolutionary search.",
)
parser.add_argument(
"--max_trials_per_task",
type=int,
default=400,
help="The maximum number of trials per task in evolutionary search.",
)
parser.add_argument(
"--max_retry_per_task",
type=int,
default=10,
help="The maximum number of retry attempts allowed.",
)
parser.add_argument(
"--file_group",
type=int,
default=0,
help="To enable running multiple scripts in parallel, files [idx * 10 : (idx + 1) * 10]\
in the sorted file list from the given directory will be run.",
)
return parser.parse_args()


# pylint: disable=too-many-locals
def sample_candidates(task, task_name, model_name):
"""Randomly sample candidates for a task and save the candidates in the given directory.

Parameters
----------
task : IRModule
The initial ir module used for generating the search space.
task_name : str
The name of the task.
model_name : str
The name of the model.

Returns
-------
None
"""
sample_init_population = tvm.get_global_func(
"meta_schedule.SearchStrategyEvolutionarySearchSampleInitPopulation"
)
evolve_with_cost_model = tvm.get_global_func(
"meta_schedule.SearchStrategyEvolutionarySearchEvolveWithCostModel"
)
strategy = ms.search_strategy.EvolutionarySearch(
num_trials_per_iter=args.num_trials_per_iter,
max_trials_per_task=args.max_trials_per_task,
init_measured_ratio=0.0,
)
target = Target(args.target)
context = ms.TuneContext(
mod=task,
target=target,
space_generator=ms.space_generator.PostOrderApply(),
search_strategy=strategy,
sch_rules=ms.default_config.schedule_rules(None, target),
postprocs=ms.default_config.postproc(None, target),
mutator_probs=ms.default_config.mutator_probs(None, target),
task_name=task_name,
)
context.initialize()
context.pre_tuning(
context.generate_design_space(),
database=ms.database.MemoryDatabase(), # type: ignore
cost_model=ms.cost_model.RandomModel(), # type: ignore
)

all_states: List[tvm.tir.Schedule] = []
num_retry, itr = 0, 0
states = sample_init_population(strategy, args.init_population_size)
while len(all_states) < args.num_samples_per_task and num_retry < args.max_retry_per_task:
states = evolve_with_cost_model(strategy, states, len(states))
all_states += states
if len(states) == 0:
states = sample_init_population(strategy, args.init_population_size)
num_retry += 1
else:
num_retry = 0
print(f"iter: {itr}, number of states sampled: {len(all_states)}")
itr += 1
all_states = all_states[: args.num_samples_per_task]

workload = ms.database.Workload(context.mod)
file_path = os.path.join(args.candidate_cache_dir, model_name, task_name + ".json")
with open(file_path, "w", encoding="utf8") as file:
for i, state in enumerate(all_states):
tuning_record = ms.database.TuningRecord(state.trace, workload)
json_str = json.dumps(tuning_record.as_json())
assert "\n" not in json_str, "Failed to generate single line string."
if i == len(all_states) - 1:
file.write(json_str)
else:
file.write(json_str + "\n")


args = _parse_args() # pylint: disable=invalid-name


def main():
if not os.path.isdir(args.task_cache_dir):
raise Exception("Please provide a correct task cache dir.")
try:
os.makedirs(args.candidate_cache_dir, exist_ok=True)
except OSError:
print(f"Directory {args.candidate_cache_dir} cannot be created successfully.")

task_paths = sorted(glob.glob(os.path.join(args.task_cache_dir, "*.json")))[
args.file_group * 10 : (args.file_group + 1) * 10
]
print(f"Selected models: {task_paths}")
for num, task_path in enumerate(task_paths):
print(f"Processing model {num} ...")
with open(task_path, "rb") as file:
tasks = file.readlines()
model_name = task_path.split("/")[-1][len("relay-") :][: -len("_extracted_tasks.json")]
os.makedirs(os.path.join(args.candidate_cache_dir, model_name), exist_ok=True)
for task_str in tqdm(tasks):
task_name, task_mod = json.loads(task_str)
task_mod = load_json(json.dumps(task_mod))
sample_candidates(task_mod, task_name, model_name)


if __name__ == "__main__":
main()
Loading