Skip to content

Commit

Permalink
Fixes for docker mounts (#150)
Browse files Browse the repository at this point in the history
* Conditionally enable mitten from src

* Fix docker mounts, docker_input_mapping -> input_mapping

* Fix ENV string for mounts

* Update userid if the docker user is existing
  • Loading branch information
arjunsuresh authored Jan 25, 2025
1 parent a6dcd31 commit d2e1f4e
Show file tree
Hide file tree
Showing 21 changed files with 166 additions and 73 deletions.
1 change: 1 addition & 0 deletions .github/workflows/check-broken-links.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
jobs:
markdown-link-check:
runs-on: ubuntu-latest

# check out the latest version of the code
steps:
- uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-mlc-script-features.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ jobs:
mlc run script --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=mlc-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=local --quiet
- name: Run MLPerf Inference Retinanet with native and virtual Python
if: runner.os == 'linux'
run: |
mlcr --tags=app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu --adr.python.version_min=3.8 --adr.compiler.tags=gcc --adr.openimages-preprocessed.tags=_50 --scenario=Offline --mode=accuracy --test_query_count=10 --rerun --quiet
Expand Down
40 changes: 26 additions & 14 deletions automation/script/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,7 @@ def docker_run(self_module, i):
env = i.get('env', {})

regenerate_docker_file = not i.get('docker_noregenerate', False)
recreate_docker_image = i.get('docker_recreate', False)

if is_true(i.get('docker_skip_build', False)):
regenerate_docker_file = False
recreate_docker_image = False
env['MLC_DOCKER_SKIP_BUILD'] = 'yes'
rebuild_docker_image = i.get('docker_rebuild', False)

# Prune unnecessary Docker-related input keys
r = prune_input({'input': i, 'extra_keys_starts_with': ['docker_']})
Expand Down Expand Up @@ -269,7 +264,10 @@ def docker_run(self_module, i):
'alias', ''), meta.get(
'uid', '')

mounts = copy.deepcopy(i.get('docker_mounts', []))
mounts = copy.deepcopy(
i.get(
'docker_mounts',
[])) # do we need a copy here?
variations = meta.get('variations', {})
docker_settings = meta.get('docker', {})
state['docker'] = docker_settings
Expand Down Expand Up @@ -334,31 +332,45 @@ def docker_run(self_module, i):
if r['return'] > 0:
return r

# Handle environment variable-based mounts
mounts = process_mounts(mounts, env, i, docker_settings)
if mounts is None:
return {'return': 1, 'error': 'Error processing mounts'}

# Prepare Docker-specific inputs
docker_inputs, dockerfile_path = prepare_docker_inputs(
i, docker_settings, script_path, True)

if docker_inputs is None:
return {'return': 1, 'error': 'Error preparing Docker inputs'}

docker_input_mapping = docker_settings.get('input_mapping')

# Update env based on docker_input_mapping if they are in input
if docker_input_mapping and i:
env.update({docker_input_mapping[key]: i[key]
for key in docker_input_mapping if key in i})

# Handle environment variable-based mounts
res = process_mounts(mounts, env, docker_settings, f_run_cmd)
if res['return'] > 0:
return res
docker_inputs['mounts'] = res['mounts']
container_env_string = res['container_env_string']

res = update_docker_environment(
docker_settings, env, container_env_string)
if res['return'] > 0:
return res

# Generate the run command
r = regenerate_script_cmd({'script_uid': script_uid,
'script_alias': script_alias,
'tags': tags,
'run_cmd': f_run_cmd})
if r['return'] > 0:
return r
final_run_cmd = r['run_cmd_string']
final_run_cmd = f"""{r['run_cmd_string']} {container_env_string} --docker_run_deps """

# Execute the Docker container
mlc_docker_input = {
'action': 'run', 'automation': 'script', 'tags': 'run,docker,container',
'recreate': recreate_docker_image,
'rebuild': rebuild_docker_image,
'env': env, 'mounts': mounts,
'script_tags': i.get('tags'), 'run_cmd': final_run_cmd, 'v': verbose,
'quiet': True, 'real_run': True, 'add_deps_recursive': {'build-docker-image': {'dockerfile': dockerfile_path}},
Expand Down
142 changes: 122 additions & 20 deletions automation/script/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import copy


def process_mounts(mounts, env, i, docker_settings):
def process_mounts(mounts, env, docker_settings, f_run_cmd):
"""
Processes and updates the Docker mounts based on the provided inputs and environment variables.
Expand All @@ -20,21 +20,71 @@ def process_mounts(mounts, env, i, docker_settings):
Returns:
Updated mounts list or None in case of an error.
"""
try:
# Add mounts specified via `env` variables
for mount_key in docker_settings.get('env_mounts', []):
mount_path = env.get(mount_key, '')
if mount_path:
mounts.append(mount_path)

# Include user-specified additional mounts
if 'docker_additional_mounts' in i:
mounts.extend(i['docker_additional_mounts'])

return mounts
except Exception as e:
logging.error(f"Error processing mounts: {e}")
return None
if 'mounts' in docker_settings:
mounts.extend(docker_settings['mounts'])

docker_input_mapping = docker_settings.get("input_mapping", {})
container_env_string = ""

for index in range(len(mounts)):
mount = mounts[index]

# Locate the last ':' to separate the mount into host and container
# paths
j = mount.rfind(':')
if j <= 0:
return {
'return': 1,
'error': f"Can't find separator ':' in the mount string: {mount}"
}

host_mount, container_mount = mount[:j], mount[j + 1:]
new_host_mount = host_mount
new_container_mount = container_mount
host_env_key, container_env_key = None, str(container_mount)

# Process host mount for environment variables
host_placeholders = re.findall(r'\${{ (.*?) }}', host_mount)
if host_placeholders:
for placeholder in host_placeholders:
if placeholder in env:
host_env_key = placeholder
new_host_mount = get_host_path(env[placeholder])
else: # Skip mount if variable is missing
mounts[index] = None
break

# Process container mount for environment variables
container_placeholders = re.findall(r'\${{ (.*?) }}', container_mount)
if container_placeholders:
for placeholder in container_placeholders:
if placeholder in env:
new_container_mount, container_env_key = get_container_path(
env[placeholder])
else: # Skip mount if variable is missing
mounts[index] = None
break

# Skip further processing if the mount was invalid
if mounts[index] is None:
continue

# Update mount entry
mounts[index] = f"{new_host_mount}:{new_container_mount}"

# Update container environment string and mappings
if host_env_key:
container_env_string += f" --env.{host_env_key}={container_env_key} "
for key, value in docker_input_mapping.items():
if value == host_env_key:
i[key] = container_env_key
f_run_cmd[key] = container_env_key

# Remove invalid mounts and construct mount string
mounts = [item for item in mounts if item is not None]

return {'return': 0, 'mounts': mounts,
'container_env_string': container_env_string}


def prepare_docker_inputs(input_params, docker_settings,
Expand All @@ -61,7 +111,7 @@ def prepare_docker_inputs(input_params, docker_settings,
keys += [
"skip_run_cmd", "pre_run_cmds", "run_cmd_prefix", "all_gpus", "num_gpus", "device", "gh_token",
"port_maps", "shm_size", "pass_user_id", "pass_user_group", "extra_run_args", "detached", "interactive",
"dt", "it"
"dt", "it", "use_host_group_id", "use_host_user_id"
]
# Collect Dockerfile inputs
docker_inputs = {
Expand Down Expand Up @@ -102,7 +152,57 @@ def prepare_docker_inputs(input_params, docker_settings,
return docker_inputs, dockerfile_path


def update_docker_paths(path, mounts=None, force_target_path=''):
def update_docker_environment(docker_settings, env, container_env_string):
"""
Updates the Docker environment variables and build arguments.
Args:
docker_settings (dict): Docker configuration settings.
env (dict): The environment dictionary to update.
container_env_string (str): A string to store Docker container environment variable options.
Returns:
dict: A dictionary with a return code indicating success or failure.
"""
# Define proxy-related environment variable keys to propagate
proxy_keys = [
"ftp_proxy", "FTP_PROXY",
"http_proxy", "HTTP_PROXY",
"https_proxy", "HTTPS_PROXY",
"no_proxy", "NO_PROXY",
"socks_proxy", "SOCKS_PROXY",
"GH_TOKEN"
]

# Ensure the '+ CM_DOCKER_BUILD_ARGS' key exists in the environment
if '+ MLC_DOCKER_BUILD_ARGS' not in env:
env['+ MLC_DOCKER_BUILD_ARGS'] = []

# Add proxy environment variables to Docker build arguments and container
# environment string
for proxy_key in proxy_keys:
proxy_value = os.environ.get(proxy_key)
if proxy_value:
container_env_string += f" --env.{proxy_key}={proxy_value} "
env['+ MLC_DOCKER_BUILD_ARGS'].append(f"{proxy_key}={proxy_value}")

# Add host group ID if specified in the Docker settings and not on Windows
if not is_false(docker_settings.get('pass_group_id')) and os.name != 'nt':
env['+ MLC_DOCKER_BUILD_ARGS'].append(
f"GID=\\\" $(id -g $USER) \\\""
)

# Add host user ID if specified in the Docker settings and not on Windows
if not is_false(docker_settings.get(
'use_host_user_id')) and os.name != 'nt':
env['+ MLC_DOCKER_BUILD_ARGS'].append(
f"UID=\\\" $(id -u $USER) \\\""
)

return {'return': 0}


def update_container_paths(path, mounts=None, force_target_path=''):
"""
Update and return the absolute paths for a given host path and its container equivalent.
Optionally updates a mounts list with the mapping of host and container paths.
Expand Down Expand Up @@ -275,7 +375,9 @@ def get_docker_default(key):
"skip_run_cmd": False,
"pre_run_cmds": [],
"run_cmd_prefix": '',
"port_maps": []
"port_maps": [],
"use_host_user_id": True,
"use_host_group_id": True,
}
if key in defaults:
return defaults[key]
Expand Down Expand Up @@ -317,5 +419,5 @@ def get_container_path(value):
new_path_split2 = new_path_split + path_split[repo_entry_index:]
return "/".join(new_path_split1), "/".join(new_path_split2)
else:
orig_path, target_path = update_path_for_docker(path=value)
orig_path, target_path = update_container_paths(path=value)
return target_path, target_path
9 changes: 0 additions & 9 deletions automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,10 +823,6 @@ def _run(self, i):
posthook_deps = meta.get('posthook_deps', [])
input_mapping = meta.get('input_mapping', {})
docker_settings = meta.get('docker')
docker_input_mapping = {}
if docker_settings:
docker_input_mapping = docker_settings.get(
'docker_input_mapping', {})
new_env_keys_from_meta = meta.get('new_env_keys', [])
new_state_keys_from_meta = meta.get('new_state_keys', [])

Expand Down Expand Up @@ -6058,11 +6054,6 @@ def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps
new_docker_settings = meta.get('docker')
if new_docker_settings:
docker_settings = state.get('docker', {})
# docker_input_mapping = docker_settings.get('docker_input_mapping', {})
# new_docker_input_mapping = new_docker_settings.get('docker_input_mapping', {})
# if new_docker_input_mapping:
# # update_env_from_input_mapping(env, i['input'], docker_input_mapping)
# utils.merge_dicts({'dict1':docker_input_mapping, 'dict2':new_docker_input_mapping, 'append_lists':True, 'append_unique':True})
utils.merge_dicts({'dict1': docker_settings,
'dict2': new_docker_settings,
'append_lists': True,
Expand Down
9 changes: 4 additions & 5 deletions script/app-mlperf-inference-nvidia/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,15 +281,14 @@ deps:
enable_if_env:
MLC_RUN_STATE_DOCKER:
- 'yes'
- True
- 'True'

- tags: get,nvidia,mitten
skip_if_env:
MLC_RUN_STATE_DOCKER:
- 'yes'
- True
- 'True'
enable_if_env:
MLC_NVIDIA_MITTEN_FROM_SRC:
- 'yes'

prehook_deps:
########################################################################
Expand Down Expand Up @@ -446,7 +445,7 @@ variations:
group: model
env:
MLC_MODEL: stable-diffusion-xl
MLC_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174"
MLC_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/mlperf-automations/blob/main/script/get-ml-model-stable-diffusion/meta.yaml"
MLC_ML_MODEL_WEIGHT_TRANSFORMATIONS: "quantization, affine fusion"
MLC_ML_MODEL_INPUTS_DATA_TYPE: int32
MLC_ML_MODEL_WEIGHTS_DATA_TYPE: int8
Expand Down
4 changes: 2 additions & 2 deletions script/app-mlperf-inference/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ variations:
os: ubuntu
real_run: false
run: true
docker_input_mapping:
input_mapping:
criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH
dlrm_data_path: DLRM_DATA_PATH
intel_gptj_int8_model_path: MLC_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH
Expand Down Expand Up @@ -1905,7 +1905,7 @@ docker:
mlc_repo_branch: dev
real_run: False
os_version: '22.04'
docker_input_mapping:
input_mapping:
imagenet_path: IMAGENET_PATH
gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH
criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH
Expand Down
13 changes: 3 additions & 10 deletions script/build-docker-image/customize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from mlc import utils
import os
from os.path import exists
from utils import *


def preprocess(i):
Expand Down Expand Up @@ -50,7 +51,7 @@ def preprocess(i):
if env.get("MLC_DOCKER_IMAGE_TAG", "") == '':
env['MLC_DOCKER_IMAGE_TAG'] = "latest"

if str(env.get("MLC_DOCKER_CACHE", "yes")).lower() in ["no", "false", "0"]:
if is_false(env.get("MLC_DOCKER_CACHE", "True")):
env["MLC_DOCKER_CACHE_ARG"] = " --no-cache"

CMD = ''
Expand Down Expand Up @@ -82,13 +83,8 @@ def preprocess(i):

CMD = ''.join(XCMD)

print('================================================')
print('CM generated the following Docker build command:')
print('')
print(CMD)

print('')

env['MLC_DOCKER_BUILD_CMD'] = CMD

return {'return': 0}
Expand All @@ -108,7 +104,7 @@ def postprocess(i):
env = i['env']

# Check if need to push docker image to the Docker Hub
if env.get('MLC_DOCKER_PUSH_IMAGE', '') in ['True', True, 'yes']:
if is_true(env.get('MLC_DOCKER_PUSH_IMAGE', '')):
image_name = get_image_name(env)

# Prepare CMD to build image
Expand All @@ -122,9 +118,6 @@ def postprocess(i):
with open(dockerfile_path + '.build.bat', 'w') as f:
f.write(PCMD + '\n')

print('================================================')
print('CM generated the following Docker push command:')
print('')
print(PCMD)

print('')
Expand Down
Loading

0 comments on commit d2e1f4e

Please sign in to comment.