Skip to content

Commit

Permalink
Merge pull request #1475 from DARMA-tasking/1469-lb-stats-json
Browse files Browse the repository at this point in the history
1469 Output LB statistics as JSON
  • Loading branch information
lifflander authored Jun 28, 2021
2 parents 525c7d0 + 1852358 commit a36acef
Show file tree
Hide file tree
Showing 175 changed files with 92,337 additions and 225 deletions.
14 changes: 13 additions & 1 deletion ci/docker/ubuntu-18.04-nvidia-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,19 @@ RUN if test ${compiler} = "nvcc-10"; then \
rm -rf /var/lib/apt/lists/* && \
rm -rf cuda-repo-ubuntu1804-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb && \
ln -s /usr/local/cuda-10.1 /usr/local/cuda-versioned; \
else \
elif test ${compiler} = "nvcc-10.2"; then \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin && \
mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-ubuntu1804-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb && \
dpkg -i cuda-repo-ubuntu1804-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb && \
apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub && \
apt-get update && \
apt-get -y install cuda && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
rm -rf cuda-repo-ubuntu1804-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb && \
ln -s /usr/local/cuda-10.2 /usr/local/cuda-versioned; \
else \
wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin && \
mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
wget http://developer.download.nvidia.com/compute/cuda/11.0.1/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.1-450.36.06-1_amd64.deb && \
Expand Down
14 changes: 14 additions & 0 deletions cmake/link_vt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ function(link_target_with_vt)
LINK_DL
LINK_ZOLTAN
LINK_FORT
LINK_JSON
LINK_BROTLI
)
set(
multiValueArg
Expand All @@ -49,6 +51,18 @@ function(link_target_with_vt)
message(STATUS "link_target_with_vt: default link=${ARG_DEFAULT_LINK_SET}")
endif()

if (NOT DEFINED ARG_LINK_JSON AND ${ARG_DEFAULT_LINK_SET} OR ARG_LINK_JSON)
target_link_libraries(
${ARG_TARGET} PRIVATE ${ARG_BUILD_TYPE} ${JSON_LIBRARY}
)
endif()

if (NOT DEFINED ARG_LINK_BROTLI AND ${ARG_DEFAULT_LINK_SET} OR ARG_LINK_BROTLI)
target_link_libraries(
${ARG_TARGET} PRIVATE ${ARG_BUILD_TYPE} ${BROTLI_LIBRARY}
)
endif()

if (NOT DEFINED ARG_LINK_FORT AND ${ARG_DEFAULT_LINK_SET} OR ARG_LINK_FORT)
if (vt_libfort_enabled)
target_link_libraries(
Expand Down
14 changes: 14 additions & 0 deletions cmake/load_bundled_libraries.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ add_subdirectory(${PROJECT_LIB_DIR}/CLI)
set(FMT_LIBRARY fmt)
add_subdirectory(${PROJECT_LIB_DIR}/fmt)

# json library always included in the build
set(JSON_BuildTests OFF)
set(JSON_MultipleHeaders ON)
set(JSON_LIBRARY nlohmann_json)
add_subdirectory(${PROJECT_LIB_DIR}/json)

# brotli library always included in the build
set(BROTLI_DISABLE_TESTS ON)
# we need to disable bundled mode so it will install properly
set(BROTLI_BUNDLED_MODE OFF)
set(BROTLI_BUILD_PORTABLE ON)
set(BROTLI_LIBRARY brotlicommon brotlienc brotlidec)
add_subdirectory(${PROJECT_LIB_DIR}/brotli)

# Optionally include mimalloc (alternative memory allocator)
if (vt_mimalloc_enabled)
add_subdirectory(${PROJECT_LIB_DIR}/mimalloc)
Expand Down
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# clang-3.9, clang-4.0, clang-5.0, clang-6.0, clang-7, clang-8,
# clang-9, clang-10,
# icc-18, icc-19,
# nvcc-10, nvcc-11}
# nvcc-10, nvcc-10.2, nvcc-11}
# REPO=lifflander1/vt
# UBUNTU={18.04, 20.04}
# ULIMIT_CORE=0
Expand Down Expand Up @@ -87,6 +87,7 @@ volumes:
amd64-ubuntu-18.04-icc-19-cache:
amd64-ubuntu-18.04-icc-20-cache:
amd64-ubuntu-18.04-nvcc-10-cache:
amd64-ubuntu-18.04-nvcc-10.2-cache:
amd64-ubuntu-18.04-nvcc-11-cache:
amd64-alpine-clang-3.9-cache:
amd64-alpine-clang-4.0-cache:
Expand All @@ -106,6 +107,7 @@ volumes:
amd64-alpine-icc-19-cache:
amd64-alpine-icc-20-cache:
amd64-alpine-nvcc-10-cache:
amd64-alpine-nvcc-10.2-cache:
amd64-alpine-nvcc-11-cache:
arm64v8-ubuntu-18.04-gcc-7-cache:
arm64v8-alpine-gcc-7-cache:
Expand Down
158 changes: 148 additions & 10 deletions docs/md/node-stats.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,161 @@ the statistics and mapping.

\subsection stats-file-format File Format

Each line in the file will one of two formats. The first line is a computation
time line for each phase, that breaks time down into subphases:
The VOM files are output in JSON format, either compressed with brotli
compression (default on) or pure JSON if the argument `--vt_lb_stats_compress`
is set to `false`.

\code
<phase>, <object-id>, <time-in-seconds> <#-of-subphases> '[' [<subphase-time-1>] ... [<subphase-time-N>] ']'
The JSON files contain an array of `phases` that have been captured by \vt and
output to the file. Each phase has an `id` indicating which phase it was while
the application was running. Each phase also has an array of `tasks` that
represent work that was done during that phase. Each task has a `time`,
`resource`, `node`, `entity`, and optionally a list of `subphases`. The `entity`
contains information about the task that performed this work. If that `entity`
is a virtual collection object, it will specify the unique `id` for the object,
and optionally the `index`, `home`, and `collection_id` for that object.

\code{.json}
{
"phases": [
{
"id": 0,
"tasks": [
{
"entity": {
"collection_id": 7,
"home": 0,
"id": 12884901888,
"index": [
3
],
"type": "object"
},
"node": 0,
"resource": "cpu",
"subphases": [
{
"id": 0,
"time": 0.014743804931640625
}
],
"time": 0.014743804931640625
},
{
"entity": {
"collection_id": 7,
"home": 0,
"id": 4294967296,
"index": [
1
],
"type": "object"
},
"node": 0,
"resource": "cpu",
"subphases": [
{
"id": 0,
"time": 0.013672113418579102
}
],
"time": 0.013672113418579102
}
]
},
{
"id": 1,
"tasks": [
{
"entity": {
"collection_id": 7,
"home": 0,
"id": 12884901888,
"index": [
3
],
"type": "object"
},
"node": 0,
"resource": "cpu",
"subphases": [
{
"id": 0,
"time": 0.014104127883911133
}
],
"time": 0.014104127883911133
}
]
}
]
}
\endcode

The second line format is a communication line:
Each phase in the file may also have a `communications` array that specify any
communication between tasks that occurred during the phase. Each communication
has `type`, which is described below in the following table. Additionally, it
specifies the `bytes`, number of `messages`, and the two entities that were
involved in the operator as `to` and `from`. The entities may be of different
types, like an `object` or `node` depending on the type of communication.

\code
<phase>, <object-id1-to/recv>, <object-id2-from/send>, <num-bytes>, <comm-type={1..6}>
\code{.json}
{
"phases": [
{
"communications": [
{
"bytes": 262.0,
"from": {
"home": 1,
"id": 1,
"type": "object"
},
"messages": 1,
"to": {
"home": 0,
"id": 4294967296,
"type": "object"
},
"type": "SendRecv"
},
{
"bytes": 96.0,
"from": {
"home": 0,
"id": 4294967296,
"type": "object"
},
"messages": 1,
"to": {
"id": 1,
"type": "node"
},
"type": "CollectionToNode"
},
{
"bytes": 259.0,
"from": {
"id": 0,
"type": "node"
},
"messages": 1,
"to": {
"home": 0,
"id": 0,
"type": "object"
},
"type": "NodeToCollection"
}
],
"id": 0
}
]
}
\endcode


Where `<comm-type>` is the type of communication occurred. The type of
communication lines up the enum `vt::vrt::collection::balance::CommCategory` in
the code.
The type of communication lines up with the enum
`vt::vrt::collection::balance::CommCategory` in the code.

| Value | Enum entry | Description |
| ----- | ---------- | ----------- |
Expand Down
Loading

0 comments on commit a36acef

Please sign in to comment.