-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Allows concurrent evaluation of models on a separate dataset during training, with --validation_data_path - This is done with minimal impact on training time by only utilizing the CPU for the validation dataset when it is mostly idle doing tf.train(), and pinning processes to specific CPUs - The amount of impact can be adjusted via a gin.config on cpu_affinity.py - CPU affinities are only optimized for internal AMD-Zen based systems at the moment, but can be extended in the future.
- Loading branch information
1 parent
99671e4
commit 2523ea1
Showing
11 changed files
with
376 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# coding=utf-8 | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Utility functions to set cpu affinities when operating main and subprocesses | ||
simultaneously.""" | ||
import gin | ||
import psutil | ||
import itertools | ||
|
||
N = psutil.cpu_count() | ||
|
||
CPU_CONFIG = { # List of CPU numbers in cache-sharing order. | ||
# 'google-epyc' assumes logical core 0 and N/2 are the same physical core. | ||
# Also, L3 cache is assumed to be shared between consecutive core numbers. | ||
'google-epyc': list(itertools.chain(*zip(range(N // 2), range(N // 2, N)))) | ||
} | ||
|
||
|
||
@gin.configurable | ||
def set_and_get(is_main_process: bool, | ||
max_cpus=N, | ||
min_main_cpu: int = 32, | ||
arch: str = 'google-epyc'): | ||
""" | ||
Sets the cpu affinity of the current process to appropriate values, and | ||
returns the list of cpus the process is set to use. | ||
Args: | ||
is_main_process: whether the caller is the main process. | ||
max_cpus: maximal number of cpus to use | ||
min_main_cpu: number of cpus to assign to the main process. | ||
arch: the system type, used to infer the cpu cache architecture. | ||
""" | ||
config = CPU_CONFIG[arch][:max_cpus] | ||
if is_main_process: | ||
cpus = config[:min_main_cpu] | ||
else: | ||
cpus = config[min_main_cpu:] | ||
if len(cpus) == 0: | ||
raise ValueError('Attempting to set cpu affinity of process to nothing.') | ||
psutil.Process().cpu_affinity(cpus) | ||
return list(cpus) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.