Skip to content

Commit

Permalink
Avoid nan's when precision is too low
Browse files Browse the repository at this point in the history
time ./gcheck.exe -p 2048 256 12
***************************************
NumIterations             = 12
NumThreadsPerBlock        = 256
NumBlocksPerGrid          = 2048
---------------------------------------
FP precision              = FLOAT (nan=1)
Momenta memory layout     = AOSOA[32]
Wavefunction GPU memory   = LOCAL
Curand generation         = DEVICE (CUDA code)
---------------------------------------
NumberOfEntries           = 12
TotalTimeInWaveFuncs      = 4.766575e-03 sec
MeanTimeInWaveFuncs       = 3.972145e-04 sec
StdDevTimeInWaveFuncs     = 2.252078e-05 sec
MinTimeInWaveFuncs        = 3.850530e-04 sec
MaxTimeInWaveFuncs        = 3.902850e-04 sec
---------------------------------------
ProcessID:                = 19290
NProcesses                = 1
NumMatrixElementsComputed = 6291456
MatrixElementsPerSec      = 1.319911e+09 sec^-1
***************************************
NumMatrixElements(notNan) = 6291455
MeanMatrixElemValue       = 1.394717e-02 GeV^0
StdErrMatrixElemValue     = 3.034432e-06 GeV^0
StdDevMatrixElemValue     = 7.611198e-03 GeV^0
MinMatrixElemValue        = 8.679477e-04 GeV^0
MaxMatrixElemValue        = 6.583720e-02 GeV^0
***************************************
00 CudaFree : 0.307731 sec
0a ProcInit : 0.000567 sec
0b MemAlloc : 0.039883 sec
0c GenCreat : 0.013313 sec
1a GenSeed  : 0.000012 sec
1b GenRnGen : 0.007917 sec
2a RamboIni : 0.000107 sec
2b RamboFin : 0.000070 sec
2c CpDTHwgt : 0.004209 sec
2d CpDTHmom : 0.039181 sec
3a SigmaKin : 0.000087 sec
3b CpDTHmes : 0.009364 sec
4a DumpLoop : 0.019952 sec
9a DumpAll  : 0.023443 sec
9b GenDestr : 0.000225 sec
9c MemFree  : 0.009629 sec
9d CudReset : 0.041831 sec
      TOTAL : 0.517521 sec
***************************************
real    0m0.527s
user    0m0.225s
sys     0m0.250s
  • Loading branch information
valassi committed Aug 8, 2020
1 parent 85201ea commit 6a31ca2
Showing 1 changed file with 55 additions and 42 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include <algorithm> // perf stats
#include <algorithm>
#include <cmath>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <numeric> // perf stats
#include <numeric>
#include <unistd.h>

#include "mgOnGpuConfig.h"
Expand Down Expand Up @@ -428,73 +429,85 @@ int main(int argc, char **argv)
float mean = sum / niter;
float stdev = std::sqrt( sq_sum / niter - mean * mean );

int num_mes = niter*ndim;
const int num_mes = niter*ndim;
int num_nan = 0;
float sumelem = 0;
float sqselem = 0;
float minelem = matrixelementvector[0];
float maxelem = matrixelementvector[0];
for (int imes = 0; imes < num_mes; ++imes)
{
if ( isnan( matrixelementvector[imes] ) )
{
//std::cout << "WARNING! ME[" << imes << "} is nan" << std::endl;
num_nan++;
continue;
}
sumelem += matrixelementvector[imes];
sqselem += matrixelementvector[imes]*matrixelementvector[imes];
minelem = std::min( minelem, (float)matrixelementvector[imes] );
maxelem = std::max( maxelem, (float)matrixelementvector[imes] );
}
float meanelem = sumelem / num_mes;
float stdelem = std::sqrt( sqselem / num_mes - meanelem * meanelem );

std::cout << "*************************************" << std::endl
<< "NumIterations = " << niter << std::endl
<< "NumThreadsPerBlock = " << gputhreads << std::endl
<< "NumBlocksPerGrid = " << gpublocks << std::endl
<< "-------------------------------------" << std::endl
float meanelem = sumelem / ( num_mes - num_nan );
float stdelem = std::sqrt( sqselem / ( num_mes - num_nan) - meanelem * meanelem );

std::cout << "***************************************" << std::endl
<< "NumIterations = " << niter << std::endl
<< "NumThreadsPerBlock = " << gputhreads << std::endl
<< "NumBlocksPerGrid = " << gpublocks << std::endl
<< "---------------------------------------" << std::endl
#if defined MGONGPU_FPTYPE_DOUBLE
<< "FP precision = DOUBLE (nan=" << num_nan << ")" << std::endl
#elif defined MGONGPU_FPTYPE_FLOAT
<< "FP precision = FLOAT (nan=" << num_nan << ")" << std::endl
#endif
#if defined MGONGPU_LAYOUT_ASA
<< "Momenta memory layout = AOSOA[" << nepp << "]" << std::endl
<< "Momenta memory layout = AOSOA[" << nepp << "]" << std::endl
#elif defined MGONGPU_LAYOUT_SOA
<< "Momenta memory layout = SOA" << std::endl
<< "Momenta memory layout = SOA" << std::endl
#elif defined MGONGPU_LAYOUT_AOS
<< "Momenta memory layout = AOS" << std::endl
<< "Momenta memory layout = AOS" << std::endl
#endif
#ifdef __CUDACC__
#if defined MGONGPU_WFMEM_LOCAL
<< "Wavefunction GPU memory = LOCAL" << std::endl
<< "Wavefunction GPU memory = LOCAL" << std::endl
#elif defined MGONGPU_WFMEM_GLOBAL
<< "Wavefunction GPU memory = GLOBAL" << std::endl
<< "Wavefunction GPU memory = GLOBAL" << std::endl
#elif defined MGONGPU_WFMEM_SHARED
<< "Wavefunction GPU memory = SHARED" << std::endl
<< "Wavefunction GPU memory = SHARED" << std::endl
#endif
#endif
#ifdef __CUDACC__
#if defined MGONGPU_CURAND_ONDEVICE
<< "Curand generation = DEVICE (CUDA code)" << std::endl
<< "Curand generation = DEVICE (CUDA code)" << std::endl
#elif defined MGONGPU_CURAND_ONHOST
<< "Curand generation = HOST (CUDA code)" << std::endl
<< "Curand generation = HOST (CUDA code)" << std::endl
#endif
#else
<< "Curand generation = HOST (C++ code)" << std::endl
<< "Curand generation = HOST (C++ code)" << std::endl
#endif
<< "-------------------------------------" << std::endl
<< "NumberOfEntries = " << niter << std::endl
<< "---------------------------------------" << std::endl
<< "NumberOfEntries = " << niter << std::endl
<< std::scientific
<< "TotalTimeInWaveFuncs = " << sum << " sec" << std::endl
<< "MeanTimeInWaveFuncs = " << mean << " sec" << std::endl
<< "StdDevTimeInWaveFuncs = " << stdev << " sec" << std::endl
<< "MinTimeInWaveFuncs = " << mintime << " sec" << std::endl
<< "MaxTimeInWaveFuncs = " << maxtime << " sec" << std::endl
<< "-------------------------------------" << std::endl
<< "ProcessID: = " << getpid() << std::endl
<< "NProcesses = " << process.nprocesses << std::endl
<< "NumMatrixElements = " << num_mes << std::endl
<< "MatrixElementsPerSec = " << num_mes/sum << " sec^-1" << std::endl;

std::cout << "*************************************" << std::endl
<< "NumMatrixElements = " << num_mes << std::endl
<< "TotalTimeInWaveFuncs = " << sum << " sec" << std::endl
<< "MeanTimeInWaveFuncs = " << mean << " sec" << std::endl
<< "StdDevTimeInWaveFuncs = " << stdev << " sec" << std::endl
<< "MinTimeInWaveFuncs = " << mintime << " sec" << std::endl
<< "MaxTimeInWaveFuncs = " << maxtime << " sec" << std::endl
<< "---------------------------------------" << std::endl
<< "ProcessID: = " << getpid() << std::endl
<< "NProcesses = " << process.nprocesses << std::endl
<< "NumMatrixElementsComputed = " << num_mes << std::endl
<< "MatrixElementsPerSec = " << num_mes/sum << " sec^-1" << std::endl;

std::cout << "***************************************" << std::endl
<< "NumMatrixElements(notNan) = " << num_mes - num_nan << std::endl
<< std::scientific
<< "MeanMatrixElemValue = " << meanelem << " GeV^" << meGeVexponent << std::endl
<< "StdErrMatrixElemValue = " << stdelem/sqrt(num_mes) << " GeV^" << meGeVexponent << std::endl
<< "StdDevMatrixElemValue = " << stdelem << " GeV^" << meGeVexponent << std::endl
<< "MinMatrixElemValue = " << minelem << " GeV^" << meGeVexponent << std::endl
<< "MaxMatrixElemValue = " << maxelem << " GeV^" << meGeVexponent << std::endl;
<< "MeanMatrixElemValue = " << meanelem << " GeV^" << meGeVexponent << std::endl
<< "StdErrMatrixElemValue = " << stdelem/sqrt(num_mes) << " GeV^" << meGeVexponent << std::endl
<< "StdDevMatrixElemValue = " << stdelem << " GeV^" << meGeVexponent << std::endl
<< "MinMatrixElemValue = " << minelem << " GeV^" << meGeVexponent << std::endl
<< "MaxMatrixElemValue = " << maxelem << " GeV^" << meGeVexponent << std::endl;
}

// --- 9b. Destroy curand generator
Expand Down Expand Up @@ -545,9 +558,9 @@ int main(int argc, char **argv)
timermap.stop();
if (perf)
{
std::cout << "*************************************" << std::endl;
std::cout << "***************************************" << std::endl;
timermap.dump();
std::cout << "*************************************" << std::endl;
std::cout << "***************************************" << std::endl;
}

//std::cout << "ALL OK" << std::endl;
Expand Down

0 comments on commit 6a31ca2

Please sign in to comment.