-
Notifications
You must be signed in to change notification settings - Fork 126
/
Copy pathReportResults.cpp
413 lines (349 loc) · 22.6 KB
/
ReportResults.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
//@HEADER
// ***************************************************
//
// HPCG: High Performance Conjugate Gradient Benchmark
//
// Contact:
// Michael A. Heroux ( [email protected])
// Jack Dongarra ([email protected])
// Piotr Luszczek ([email protected])
//
// ***************************************************
//@HEADER
/*!
@file ReportResults.cpp
HPCG routine
*/
#ifndef HPCG_NO_MPI
#include <mpi.h>
#endif
#include <vector>
#include "ReportResults.hpp"
#include "OutputFile.hpp"
#include "OptimizeProblem.hpp"
#ifdef HPCG_DEBUG
#include <fstream>
using std::endl;
#include "hpcg.hpp"
#endif
/*!
Creates a YAML file and writes the information about the HPCG run, its results, and validity.
@param[in] geom The description of the problem's geometry.
@param[in] A The known system matrix
@param[in] numberOfMgLevels Number of levels in multigrid V cycle
@param[in] numberOfCgSets Number of CG runs performed
@param[in] niters Number of preconditioned CG iterations performed to lower the residual below a threshold
@param[in] times Vector of cumulative timings for each of the phases of a preconditioned CG iteration
@param[in] testcg_data the data structure with the results of the CG-correctness test including pass/fail information
@param[in] testsymmetry_data the data structure with the results of the CG symmetry test including pass/fail information
@param[in] testnorms_data the data structure with the results of the CG norm test including pass/fail information
@param[in] global_failure indicates whether a failure occurred during the correctness tests of CG
@see YAML_Doc
*/
void ReportResults(const SparseMatrix & A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters,int optMaxIters, double times[],
const TestCGData & testcg_data, const TestSymmetryData & testsymmetry_data, const TestNormsData & testnorms_data, int global_failure, bool quickPath) {
double minOfficialTime = 1800; // Any official benchmark result must run at least this many seconds
#ifndef HPCG_NO_MPI
double t4 = times[4];
double t4min = 0.0;
double t4max = 0.0;
double t4avg = 0.0;
MPI_Allreduce(&t4, &t4min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
MPI_Allreduce(&t4, &t4max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&t4, &t4avg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
t4avg = t4avg/((double) A.geom->size);
#endif
if (A.geom->rank==0) { // Only PE 0 needs to compute and report timing results
// TODO: Put the FLOP count, Memory BW and Memory Usage models into separate functions
// ======================== FLOP count model =======================================
double fNumberOfCgSets = numberOfCgSets;
double fniters = fNumberOfCgSets * (double) optMaxIters;
double fnrow = A.totalNumberOfRows;
double fnnz = A.totalNumberOfNonzeros;
// Op counts come from implementation of CG in CG.cpp (include 1 extra for the CG preamble ops)
double fnops_ddot = (3.0*fniters+fNumberOfCgSets)*2.0*fnrow; // 3 ddots with nrow adds and nrow mults
double fnops_waxpby = (3.0*fniters+fNumberOfCgSets)*2.0*fnrow; // 3 WAXPBYs with nrow adds and nrow mults
double fnops_sparsemv = (fniters+fNumberOfCgSets)*2.0*fnnz; // 1 SpMV with nnz adds and nnz mults
// Op counts from the multigrid preconditioners
double fnops_precond = 0.0;
const SparseMatrix * Af = &A;
for (int i=1; i<numberOfMgLevels; ++i) {
double fnnz_Af = Af->totalNumberOfNonzeros;
double fnumberOfPresmootherSteps = Af->mgData->numberOfPresmootherSteps;
double fnumberOfPostsmootherSteps = Af->mgData->numberOfPostsmootherSteps;
fnops_precond += fnumberOfPresmootherSteps*fniters*4.0*fnnz_Af; // number of presmoother flops
fnops_precond += fniters*2.0*fnnz_Af; // cost of fine grid residual calculation
fnops_precond += fnumberOfPostsmootherSteps*fniters*4.0*fnnz_Af; // number of postsmoother flops
Af = Af->Ac; // Go to next coarse level
}
fnops_precond += fniters*4.0*((double) Af->totalNumberOfNonzeros); // One symmetric GS sweep at the coarsest level
double fnops = fnops_ddot+fnops_waxpby+fnops_sparsemv+fnops_precond;
double frefnops = fnops * ((double) refMaxIters)/((double) optMaxIters);
// ======================== Memory bandwidth model =======================================
// Read/Write counts come from implementation of CG in CG.cpp (include 1 extra for the CG preamble ops)
double fnreads_ddot = (3.0*fniters+fNumberOfCgSets)*2.0*fnrow*sizeof(double); // 3 ddots with 2 nrow reads
double fnwrites_ddot = (3.0*fniters+fNumberOfCgSets)*sizeof(double); // 3 ddots with 1 write
double fnreads_waxpby = (3.0*fniters+fNumberOfCgSets)*2.0*fnrow*sizeof(double); // 3 WAXPBYs with nrow adds and nrow mults
double fnwrites_waxpby = (3.0*fniters+fNumberOfCgSets)*fnrow*sizeof(double); // 3 WAXPBYs with nrow adds and nrow mults
double fnreads_sparsemv = (fniters+fNumberOfCgSets)*(fnnz*(sizeof(double)+sizeof(local_int_t)) + fnrow*sizeof(double));// 1 SpMV with nnz reads of values, nnz reads indices,
// plus nrow reads of x
double fnwrites_sparsemv = (fniters+fNumberOfCgSets)*fnrow*sizeof(double); // 1 SpMV nrow writes
// Op counts from the multigrid preconditioners
double fnreads_precond = 0.0;
double fnwrites_precond = 0.0;
Af = &A;
for (int i=1; i<numberOfMgLevels; ++i) {
double fnnz_Af = Af->totalNumberOfNonzeros;
double fnrow_Af = Af->totalNumberOfRows;
double fnumberOfPresmootherSteps = Af->mgData->numberOfPresmootherSteps;
double fnumberOfPostsmootherSteps = Af->mgData->numberOfPostsmootherSteps;
fnreads_precond += fnumberOfPresmootherSteps*fniters*(2.0*fnnz_Af*(sizeof(double)+sizeof(local_int_t)) + fnrow_Af*sizeof(double)); // number of presmoother reads
fnwrites_precond += fnumberOfPresmootherSteps*fniters*fnrow_Af*sizeof(double); // number of presmoother writes
fnreads_precond += fniters*(fnnz_Af*(sizeof(double)+sizeof(local_int_t)) + fnrow_Af*sizeof(double)); // Number of reads for fine grid residual calculation
fnwrites_precond += fniters*fnnz_Af*sizeof(double); // Number of writes for fine grid residual calculation
fnreads_precond += fnumberOfPostsmootherSteps*fniters*(2.0*fnnz_Af*(sizeof(double)+sizeof(local_int_t)) + fnrow_Af*sizeof(double)); // number of postsmoother reads
fnwrites_precond += fnumberOfPostsmootherSteps*fniters*fnnz_Af*sizeof(double); // number of postsmoother writes
Af = Af->Ac; // Go to next coarse level
}
double fnnz_Af = Af->totalNumberOfNonzeros;
double fnrow_Af = Af->totalNumberOfRows;
fnreads_precond += fniters*(2.0*fnnz_Af*(sizeof(double)+sizeof(local_int_t)) + fnrow_Af*sizeof(double));; // One symmetric GS sweep at the coarsest level
fnwrites_precond += fniters*fnrow_Af*sizeof(double); // One symmetric GS sweep at the coarsest level
double fnreads = fnreads_ddot+fnreads_waxpby+fnreads_sparsemv+fnreads_precond;
double fnwrites = fnwrites_ddot+fnwrites_waxpby+fnwrites_sparsemv+fnwrites_precond;
double frefnreads = fnreads * ((double) refMaxIters)/((double) optMaxIters);
double frefnwrites = fnwrites * ((double) refMaxIters)/((double) optMaxIters);
// ======================== Memory usage model =======================================
// Data in GenerateProblem_ref
double numberOfNonzerosPerRow = 27.0; // We are approximating a 27-point finite element/volume/difference 3D stencil
double size = ((double) A.geom->size); // Needed for estimating size of halo
double fnbytes = ((double) sizeof(Geometry)); // Geometry struct in main.cpp
fnbytes += ((double) sizeof(double)*fNumberOfCgSets); // testnorms_data in main.cpp
// Model for GenerateProblem_ref.cpp
fnbytes += fnrow*sizeof(char); // array nonzerosInRow
fnbytes += fnrow*((double) sizeof(global_int_t*)); // mtxIndG
fnbytes += fnrow*((double) sizeof(local_int_t*)); // mtxIndL
fnbytes += fnrow*((double) sizeof(double*)); // matrixValues
fnbytes += fnrow*((double) sizeof(double*)); // matrixDiagonal
fnbytes += fnrow*numberOfNonzerosPerRow*((double) sizeof(local_int_t)); // mtxIndL[1..nrows]
fnbytes += fnrow*numberOfNonzerosPerRow*((double) sizeof(double)); // matrixValues[1..nrows]
fnbytes += fnrow*numberOfNonzerosPerRow*((double) sizeof(global_int_t)); // mtxIndG[1..nrows]
fnbytes += fnrow*((double) 3*sizeof(double)); // x, b, xexact
// Model for CGData.hpp
double fncol = ((global_int_t) A.localNumberOfColumns) * size; // Estimate of the global number of columns using the value from rank 0
fnbytes += fnrow*((double) 2*sizeof(double)); // r, Ap
fnbytes += fncol*((double) 2*sizeof(double)); // z, p
std::vector<double> fnbytesPerLevel(numberOfMgLevels); // Count byte usage per level (level 0 is main CG level)
fnbytesPerLevel[0] = fnbytes;
// Benchmarker-provided model for OptimizeProblem.cpp
double fnbytes_OptimizedProblem = OptimizeProblemMemoryUse(A);
fnbytes += fnbytes_OptimizedProblem;
Af = A.Ac;
for (int i=1; i<numberOfMgLevels; ++i) {
double fnrow_Af = Af->totalNumberOfRows;
double fncol_Af = ((global_int_t) Af->localNumberOfColumns) * size; // Estimate of the global number of columns using the value from rank 0
double fnbytes_Af = 0.0;
// Model for GenerateCoarseProblem.cpp
fnbytes_Af += fnrow_Af*((double) sizeof(local_int_t)); // f2cOperator
fnbytes_Af += fnrow_Af*((double) sizeof(double)); // rc
fnbytes_Af += 2.0*fncol_Af*((double) sizeof(double)); // xc, Axf are estimated based on the size of these arrays on rank 0
fnbytes_Af += ((double) (sizeof(Geometry)+sizeof(SparseMatrix)+3*sizeof(Vector)+sizeof(MGData))); // Account for structs geomc, Ac, rc, xc, Axf - (minor)
// Model for GenerateProblem.cpp (called within GenerateCoarseProblem.cpp)
fnbytes_Af += fnrow_Af*sizeof(char); // array nonzerosInRow
fnbytes_Af += fnrow_Af*((double) sizeof(global_int_t*)); // mtxIndG
fnbytes_Af += fnrow_Af*((double) sizeof(local_int_t*)); // mtxIndL
fnbytes_Af += fnrow_Af*((double) sizeof(double*)); // matrixValues
fnbytes_Af += fnrow_Af*((double) sizeof(double*)); // matrixDiagonal
fnbytes_Af += fnrow_Af*numberOfNonzerosPerRow*((double) sizeof(local_int_t)); // mtxIndL[1..nrows]
fnbytes_Af += fnrow_Af*numberOfNonzerosPerRow*((double) sizeof(double)); // matrixValues[1..nrows]
fnbytes_Af += fnrow_Af*numberOfNonzerosPerRow*((double) sizeof(global_int_t)); // mtxIndG[1..nrows]
// Model for SetupHalo_ref.cpp
#ifndef HPCG_NO_MPI
fnbytes_Af += ((double) sizeof(double)*Af->totalToBeSent); //sendBuffer
fnbytes_Af += ((double) sizeof(local_int_t)*Af->totalToBeSent); // elementsToSend
fnbytes_Af += ((double) sizeof(int)*Af->numberOfSendNeighbors); // neighbors
fnbytes_Af += ((double) sizeof(local_int_t)*Af->numberOfSendNeighbors); // receiveLength, sendLength
#endif
fnbytesPerLevel[i] = fnbytes_Af;
fnbytes += fnbytes_Af; // Running sum
Af = Af->Ac; // Go to next coarse level
}
assert(Af==0); // Make sure we got to the lowest grid level
// Count number of bytes used per equation
double fnbytesPerEquation = fnbytes/fnrow;
// Instantiate YAML document
OutputFile doc("HPCG-Benchmark", "3.1");
doc.add("Release date", "March 28, 2019");
doc.add("Machine Summary","");
doc.get("Machine Summary")->add("Distributed Processes",A.geom->size);
doc.get("Machine Summary")->add("Threads per processes",A.geom->numThreads);
doc.add("Global Problem Dimensions","");
doc.get("Global Problem Dimensions")->add("Global nx",A.geom->gnx);
doc.get("Global Problem Dimensions")->add("Global ny",A.geom->gny);
doc.get("Global Problem Dimensions")->add("Global nz",A.geom->gnz);
doc.add("Processor Dimensions","");
doc.get("Processor Dimensions")->add("npx",A.geom->npx);
doc.get("Processor Dimensions")->add("npy",A.geom->npy);
doc.get("Processor Dimensions")->add("npz",A.geom->npz);
doc.add("Local Domain Dimensions","");
doc.get("Local Domain Dimensions")->add("nx",A.geom->nx);
doc.get("Local Domain Dimensions")->add("ny",A.geom->ny);
int ipartz_ids = 0;
for (int i=0; i< A.geom->npartz; ++i) {
doc.get("Local Domain Dimensions")->add("Lower ipz", ipartz_ids);
doc.get("Local Domain Dimensions")->add("Upper ipz", A.geom->partz_ids[i]-1);
doc.get("Local Domain Dimensions")->add("nz",A.geom->partz_nz[i]);
ipartz_ids = A.geom->partz_ids[i];
}
doc.add("########## Problem Summary ##########","");
doc.add("Setup Information","");
doc.get("Setup Information")->add("Setup Time",times[9]);
doc.add("Linear System Information","");
doc.get("Linear System Information")->add("Number of Equations",A.totalNumberOfRows);
doc.get("Linear System Information")->add("Number of Nonzero Terms",A.totalNumberOfNonzeros);
doc.add("Multigrid Information","");
doc.get("Multigrid Information")->add("Number of coarse grid levels", numberOfMgLevels-1);
Af = &A;
doc.get("Multigrid Information")->add("Coarse Grids","");
for (int i=1; i<numberOfMgLevels; ++i) {
doc.get("Multigrid Information")->get("Coarse Grids")->add("Grid Level",i);
doc.get("Multigrid Information")->get("Coarse Grids")->add("Number of Equations",Af->Ac->totalNumberOfRows);
doc.get("Multigrid Information")->get("Coarse Grids")->add("Number of Nonzero Terms",Af->Ac->totalNumberOfNonzeros);
doc.get("Multigrid Information")->get("Coarse Grids")->add("Number of Presmoother Steps",Af->mgData->numberOfPresmootherSteps);
doc.get("Multigrid Information")->get("Coarse Grids")->add("Number of Postsmoother Steps",Af->mgData->numberOfPostsmootherSteps);
Af = Af->Ac;
}
doc.add("########## Memory Use Summary ##########","");
doc.add("Memory Use Information","");
doc.get("Memory Use Information")->add("Total memory used for data (Gbytes)",fnbytes/1000000000.0);
doc.get("Memory Use Information")->add("Memory used for OptimizeProblem data (Gbytes)",fnbytes_OptimizedProblem/1000000000.0);
doc.get("Memory Use Information")->add("Bytes per equation (Total memory / Number of Equations)",fnbytesPerEquation);
doc.get("Memory Use Information")->add("Memory used for linear system and CG (Gbytes)",fnbytesPerLevel[0]/1000000000.0);
doc.get("Memory Use Information")->add("Coarse Grids","");
for (int i=1; i<numberOfMgLevels; ++i) {
doc.get("Memory Use Information")->get("Coarse Grids")->add("Grid Level",i);
doc.get("Memory Use Information")->get("Coarse Grids")->add("Memory used",fnbytesPerLevel[i]/1000000000.0);
}
doc.add("########## V&V Testing Summary ##########","");
doc.add("Spectral Convergence Tests","");
if (testcg_data.count_fail==0)
doc.get("Spectral Convergence Tests")->add("Result", "PASSED");
else
doc.get("Spectral Convergence Tests")->add("Result", "FAILED");
doc.get("Spectral Convergence Tests")->add("Unpreconditioned","");
doc.get("Spectral Convergence Tests")->get("Unpreconditioned")->add("Maximum iteration count", testcg_data.niters_max_no_prec);
doc.get("Spectral Convergence Tests")->get("Unpreconditioned")->add("Expected iteration count", testcg_data.expected_niters_no_prec);
doc.get("Spectral Convergence Tests")->add("Preconditioned","");
doc.get("Spectral Convergence Tests")->get("Preconditioned")->add("Maximum iteration count", testcg_data.niters_max_prec);
doc.get("Spectral Convergence Tests")->get("Preconditioned")->add("Expected iteration count", testcg_data.expected_niters_prec);
const char DepartureFromSymmetry[] = "Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon";
doc.add(DepartureFromSymmetry,"");
if (testsymmetry_data.count_fail==0)
doc.get(DepartureFromSymmetry)->add("Result", "PASSED");
else
doc.get(DepartureFromSymmetry)->add("Result", "FAILED");
doc.get(DepartureFromSymmetry)->add("Departure for SpMV", testsymmetry_data.depsym_spmv);
doc.get(DepartureFromSymmetry)->add("Departure for MG", testsymmetry_data.depsym_mg);
doc.add("########## Iterations Summary ##########","");
doc.add("Iteration Count Information","");
if (!global_failure)
doc.get("Iteration Count Information")->add("Result", "PASSED");
else
doc.get("Iteration Count Information")->add("Result", "FAILED");
doc.get("Iteration Count Information")->add("Reference CG iterations per set", refMaxIters);
doc.get("Iteration Count Information")->add("Optimized CG iterations per set", optMaxIters);
doc.get("Iteration Count Information")->add("Total number of reference iterations", refMaxIters*numberOfCgSets);
doc.get("Iteration Count Information")->add("Total number of optimized iterations", optMaxIters*numberOfCgSets);
doc.add("########## Reproducibility Summary ##########","");
doc.add("Reproducibility Information","");
if (testnorms_data.pass)
doc.get("Reproducibility Information")->add("Result", "PASSED");
else
doc.get("Reproducibility Information")->add("Result", "FAILED");
doc.get("Reproducibility Information")->add("Scaled residual mean", testnorms_data.mean);
doc.get("Reproducibility Information")->add("Scaled residual variance", testnorms_data.variance);
doc.add("########## Performance Summary (times in sec) ##########","");
doc.add("Benchmark Time Summary","");
doc.get("Benchmark Time Summary")->add("Optimization phase",times[7]);
doc.get("Benchmark Time Summary")->add("DDOT",times[1]);
doc.get("Benchmark Time Summary")->add("WAXPBY",times[2]);
doc.get("Benchmark Time Summary")->add("SpMV",times[3]);
doc.get("Benchmark Time Summary")->add("MG",times[5]);
doc.get("Benchmark Time Summary")->add("Total",times[0]);
doc.add("Floating Point Operations Summary","");
doc.get("Floating Point Operations Summary")->add("Raw DDOT",fnops_ddot);
doc.get("Floating Point Operations Summary")->add("Raw WAXPBY",fnops_waxpby);
doc.get("Floating Point Operations Summary")->add("Raw SpMV",fnops_sparsemv);
doc.get("Floating Point Operations Summary")->add("Raw MG",fnops_precond);
doc.get("Floating Point Operations Summary")->add("Total",fnops);
doc.get("Floating Point Operations Summary")->add("Total with convergence overhead",frefnops);
doc.add("GB/s Summary","");
doc.get("GB/s Summary")->add("Raw Read B/W",fnreads/times[0]/1.0E9);
doc.get("GB/s Summary")->add("Raw Write B/W",fnwrites/times[0]/1.0E9);
doc.get("GB/s Summary")->add("Raw Total B/W",(fnreads+fnwrites)/(times[0])/1.0E9);
doc.get("GB/s Summary")->add("Total with convergence and optimization phase overhead",(frefnreads+frefnwrites)/(times[0]+fNumberOfCgSets*(times[7]/10.0+times[9]/10.0))/1.0E9);
doc.add("GFLOP/s Summary","");
doc.get("GFLOP/s Summary")->add("Raw DDOT",fnops_ddot/times[1]/1.0E9);
doc.get("GFLOP/s Summary")->add("Raw WAXPBY",fnops_waxpby/times[2]/1.0E9);
doc.get("GFLOP/s Summary")->add("Raw SpMV",fnops_sparsemv/(times[3])/1.0E9);
doc.get("GFLOP/s Summary")->add("Raw MG",fnops_precond/(times[5])/1.0E9);
doc.get("GFLOP/s Summary")->add("Raw Total",fnops/times[0]/1.0E9);
doc.get("GFLOP/s Summary")->add("Total with convergence overhead",frefnops/times[0]/1.0E9);
// This final GFLOP/s rating includes the overhead of problem setup and optimizing the data structures vs ten sets of 50 iterations of CG
double totalGflops = frefnops/(times[0]+fNumberOfCgSets*(times[7]/10.0+times[9]/10.0))/1.0E9;
double totalGflops24 = frefnops/(times[0]+fNumberOfCgSets*times[7]/10.0)/1.0E9;
doc.get("GFLOP/s Summary")->add("Total with convergence and optimization phase overhead",totalGflops);
doc.add("User Optimization Overheads","");
doc.get("User Optimization Overheads")->add("Optimization phase time (sec)", (times[7]));
doc.get("User Optimization Overheads")->add("Optimization phase time vs reference SpMV+MG time", times[7]/times[8]);
#ifndef HPCG_NO_MPI
doc.add("DDOT Timing Variations","");
doc.get("DDOT Timing Variations")->add("Min DDOT MPI_Allreduce time",t4min);
doc.get("DDOT Timing Variations")->add("Max DDOT MPI_Allreduce time",t4max);
doc.get("DDOT Timing Variations")->add("Avg DDOT MPI_Allreduce time",t4avg);
//doc.get("Sparse Operations Overheads")->add("Halo exchange time (sec)", (times[6]));
//doc.get("Sparse Operations Overheads")->add("Halo exchange as percentage of SpMV time", (times[6])/totalSparseMVTime*100.0);
#endif
doc.add("Final Summary","");
bool isValidRun = (testcg_data.count_fail==0) && (testsymmetry_data.count_fail==0) && (testnorms_data.pass) && (!global_failure);
if (isValidRun) {
doc.get("Final Summary")->add("HPCG result is VALID with a GFLOP/s rating of", totalGflops);
doc.get("Final Summary")->add("HPCG 2.4 rating for historical reasons is", totalGflops24);
if (!A.isDotProductOptimized) {
doc.get("Final Summary")->add("Reference version of ComputeDotProduct used","Performance results are most likely suboptimal");
}
if (!A.isSpmvOptimized) {
doc.get("Final Summary")->add("Reference version of ComputeSPMV used","Performance results are most likely suboptimal");
}
if (!A.isMgOptimized) {
if (A.geom->numThreads>1)
doc.get("Final Summary")->add("Reference version of ComputeMG used and number of threads greater than 1","Performance results are severely suboptimal");
else // numThreads ==1
doc.get("Final Summary")->add("Reference version of ComputeMG used","Performance results are most likely suboptimal");
}
if (!A.isWaxpbyOptimized) {
doc.get("Final Summary")->add("Reference version of ComputeWAXPBY used","Performance results are most likely suboptimal");
}
if (times[0]>=minOfficialTime) {
doc.get("Final Summary")->add("Please upload results from the YAML file contents to","http://hpcg-benchmark.org");
}
else {
doc.get("Final Summary")->add("Results are valid but execution time (sec) is",times[0]);
if (quickPath) {
doc.get("Final Summary")->add("You have selected the QuickPath option", "Results are official for legacy installed systems with confirmation from the HPCG Benchmark leaders.");
doc.get("Final Summary")->add("After confirmation please upload results from the YAML file contents to","http://hpcg-benchmark.org");
} else {
doc.get("Final Summary")->add("Official results execution time (sec) must be at least",minOfficialTime);
}
}
} else {
doc.get("Final Summary")->add("HPCG result is","INVALID.");
doc.get("Final Summary")->add("Please review the YAML file contents","You may NOT submit these results for consideration.");
}
std::string yaml = doc.generate();
#ifdef HPCG_DEBUG
HPCG_fout << yaml;
#endif
}
return;
}