Skip to content

Commit

Permalink
Merge branch 'master' into refactor/move-builtins
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasileios Karakasis committed Mar 15, 2022
2 parents 6a8f36a + 2db7e2f commit 3a5f03c
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 52 deletions.
126 changes: 76 additions & 50 deletions cscs-checks/apps/cpmd/cpmd_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,80 +3,106 @@
#
# SPDX-License-Identifier: BSD-3-Clause

import contextlib
import reframe as rfm
import reframe.utility.sanity as sn


@rfm.simple_test
class CPMDCheck(rfm.RunOnlyRegressionTest):
scale = parameter(['small', 'large'])
descr = 'CPMD check (C4H6 metadynamics)'
maintainers = ['AJ', 'LM']
tags = {'production'}
valid_systems = ['daint:gpu']
num_tasks_per_node = 1
valid_prog_environs = ['builtin']
modules = ['CPMD']
executable = 'cpmd.x'
executable_opts = ['ana_c4h6.in > stdout.txt']
readonly_files = ['ana_c4h6.in', 'C_MT_BLYP', 'H_MT_BLYP']
use_multithreading = True
strict_check = False
executable_opts = ['ana_c4h6.in']
extra_resources = {
'switches': {
'num_switches': 1
}
}
allref = {
'9': {
'p100': {
'time': (284, None, 0.15, 's')
strict_check = False
use_multithreading = False
tags = {'maintenance', 'production'}
maintainers = ['AJ', 'LM']

num_nodes = parameter([6, 16], loggable=True)
references = {
6: {
'sm_60': {
'dom:gpu': {'time': (120, None, 0.15, 's')},
'daint:gpu': {'time': (120, None, 0.15, 's')},
},
'broadwell': {
'dom:mc': {'time': (150.0, None, 0.15, 's')},
'daint:mc': {'time': (150.0, None, 0.15, 's')},
},
},
'16': {
'p100': {
'time': (226, None, 0.15, 's')
}
16: {
'sm_60': {
'daint:gpu': {'time': (120, None, 0.15, 's')}
},
'broadwell': {
'daint:mc': {'time': (150.0, None, 0.15, 's')},
},
}
}

@run_after('init')
def setup_by_scale(self):
if self.scale == 'small':
self.valid_systems += ['dom:gpu']
self.num_tasks = 9
else:
self.num_tasks = 16

@run_before('performance')
def set_perf_reference(self):
proc = self.current_partition.processor
pname = self.current_partition.fullname
if pname in ('daint:gpu', 'dom:gpu'):
arch = 'p100'
else:
arch = proc.arch

with contextlib.suppress(KeyError):
self.reference = {
pname: {
'perf': self.allref[self.num_tasks][arch][self.benchmark]
}
}
@performance_function('s')
def elapsed_time(self):
return sn.extractsingle(r'^ cpmd(\s+[\d\.]+){3}\s+(?P<time>\S+)',
self.stdout, 'time', float)

@sanity_function
def assert_energy_diff(self):
# OpenMP version of CPMD segfaults
# self.variables = { 'OMP_NUM_THREADS' : '8' }
energy = sn.extractsingle(
r'CLASSICAL ENERGY\s+-(?P<result>\S+)',
'stdout.txt', 'result', float)
self.stdout, 'result', float)
energy_reference = 25.81
energy_diff = sn.abs(energy - energy_reference)
return sn.assert_lt(energy_diff, 0.26)

@performance_function('s')
def time(self):
return sn.extractsingle(r'^ cpmd(\s+[\d\.]+){3}\s+(?P<perf>\S+)',
'stdout.txt', 'perf', float)
@run_after('init')
def setup_system_filtering(self):
self.descr = f'CPMD check ({self.num_nodes} node(s))'

# setup system filter
valid_systems = {
6: ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'],
16: ['daint:gpu', 'daint:mc']
}

self.skip_if(self.num_nodes not in valid_systems,
f'No valid systems found for {self.num_nodes}(s)')
self.valid_systems = valid_systems[self.num_nodes]

# setup programming environment filter
self.valid_prog_environs = ['builtin']

@run_before('run')
def setup_run(self):
# retrieve processor data
self.skip_if_no_procinfo()
proc = self.current_partition.processor

# set architecture for GPU partition (no auto-detection)
if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
arch = 'sm_60'
self.variables = {
'CRAY_CUDA_MPS': '1'
}
else:
arch = proc.arch

# common setup for every architecture
self.job.launcher.options = ['--cpu-bind=cores']
self.job.options = ['--distribution=block:block']
# FIXME: the current test case does not scale beyond 72 MPI tasks,
# so the last node in 16-nodes jobs will be used only partially.
# The test case needs to be updated (warning about XC_DRIVER IN &DFT)
self.num_tasks = 72

try:
found = self.references[self.num_nodes][arch]
except KeyError:
self.skip(f'Configuration with {self.num_nodes} node(s) '
f'is not supported on {arch!r}')

# setup performance references
self.reference = self.references[self.num_nodes][arch]
12 changes: 10 additions & 2 deletions reframe/frontend/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,14 @@ def print_failure_report(self, printer):
f"{r['dependencies_actual']}")
printer.info(f" * Maintainers: {r['maintainers']}")
printer.info(f" * Failing phase: {r['fail_phase']}")
printer.info(f" * Rerun with '-n {r['unique_name']}"
if rt.runtime().get_option('general/0/compact_test_names'):
cls = r['display_name'].split(' ')[0]
variant = r['unique_name'].replace(cls, '').replace('_', '@')
nameoptarg = cls + variant
else:
nameoptarg = r['unique_name']

printer.info(f" * Rerun with '-n {nameoptarg}"
f" -p {r['environment']} --system {r['system']} -r'")
printer.info(f" * Reason: {r['fail_reason']}")

Expand All @@ -256,7 +263,8 @@ def print_failure_stats(self, printer):
partfullname = partition.fullname if partition else 'None'
environ_name = (check.current_environ.name
if check.current_environ else 'None')
f = f'[{check.unique_name}, {environ_name}, {partfullname}]'
f = (f'[{check.display_name} (uid: {check.unique_name}), '
f'{environ_name}, {partfullname}]')
if tf.failed_stage not in failures:
failures[tf.failed_stage] = []

Expand Down

0 comments on commit 3a5f03c

Please sign in to comment.