diff --git a/core/accelogic/inc/ZipAccelogic.h b/core/accelogic/inc/ZipAccelogic.h index ae6801d11bd26..1ac47130acf89 100644 --- a/core/accelogic/inc/ZipAccelogic.h +++ b/core/accelogic/inc/ZipAccelogic.h @@ -14,9 +14,9 @@ #include "EDataType.h" -void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **tgts, int tgt_number, int *irep, EDataType datatype = EDataType::kNoType_t); +void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, EDataType datatype = EDataType::kNoType_t); -void R__unzipBLAST(int **srcsizes, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep); +void R__unzipBLAST(int *srcsize, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep); // The below two interfaces are to maintain backward compatibility until RZip.cxx is updated to use the above two interfaces diff --git a/core/accelogic/src/ZipAccelogic.cxx b/core/accelogic/src/ZipAccelogic.cxx index 1bd9e78c4353c..5e87c40b00f43 100644 --- a/core/accelogic/src/ZipAccelogic.cxx +++ b/core/accelogic/src/ZipAccelogic.cxx @@ -40,17 +40,16 @@ union IntegerTypes { void R__zipBLAST(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgt, int *irep, EDataType datatype) { - R__zipBLAST(&cxlevel,srcsize,src,&tgtsize,&tgt,1,irep,datatype); + R__zipBLAST(&cxlevel,srcsize,src,tgtsize,&tgt,1,irep,datatype); } -void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **tgts, int tgt_number, int *irep, EDataType datatype) +void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, EDataType datatype) { - *irep = 0; - int *tgtsize = *tgtsizes; + memset(irep,0,tgt_number*sizeof(int)); // irep needs to point to an array of integers of size tgt_number (could just be a single integer) char *tgt = *tgts; for (int tgt_idx=0; tgt_idx (size_t)*tgtsizes[tgt_idx] ); + excessive_size |= ( ( out_sizes[tgt_idx] + kHeaderSize) > (size_t)tgtsize[tgt_idx] ); if (excessive_size) { for (int tgt_idx=0; tgt_idx +#include #include "PrecisionCascadeConfigArrayContent.h" namespace ROOT { diff --git a/core/zip/src/RZip.cxx b/core/zip/src/RZip.cxx index 92235b8c8ab05..5d9d5dce1dfb1 100644 --- a/core/zip/src/RZip.cxx +++ b/core/zip/src/RZip.cxx @@ -14,6 +14,7 @@ #include "ZipLZ4.h" #include "ZipZSTD.h" #include "ZipAccelogic.h" +#include "PrecisionCascadeConfigArrayContent.h" #include "zlib.h" @@ -25,6 +26,8 @@ // - 3 bytes to identify the deflated buffer size. // - 3 bytes to identify the inflated buffer size. #define HDRSIZE 9 +// - 2 bytes to identify datatype and cascade count in BLAST +#define HDRSIZE_BLAST 11 /** * Forward decl's @@ -107,7 +110,7 @@ void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize, } else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kZSTD) { R__zipZSTD(cxlevel, srcsize, src, tgtsize, tgt, irep); } else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kBLAST) { - R__zipBLAST(cxlevel, srcsize, src, tgtsize, tgt, irep, datatype); + R__zipBLAST(&cxlevel, srcsize, src, tgtsize, &tgt, 1, irep, datatype); } else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kOldCompressionAlgo || compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kUseGlobal) { R__zipOld(cxlevel, srcsize, src, tgtsize, tgt, irep); } else { @@ -118,6 +121,36 @@ void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize, } } +void R__zipPrecisionCascade(int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues compressionAlgorithm, + EDataType datatype /* = kNoType_t */, + int configsize /* = 0 */, char * configarray /* = nullptr */) +{ + + if (*srcsize < 1 + HDRSIZE_BLAST + 1) { + memset(irep,0,tgt_number*sizeof(int)); + return; + } + + auto content = reinterpret_cast(configarray); + (void) configsize; + assert(content && (content->SizeOf() == configsize)); + Int_t *cxlevels = content->GetLevels(); // This an array of size `content->fLen` + + for (int tgt_idx=0; tgt_idx 0 && tgt_idx == tgt_number-1 ? 0 : 1); + if (cxlevels[tgt_idx] < cxlevel_min) { + memset(irep,0,tgt_number*sizeof(int)); + return; + } + } + + if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kBLAST) { + R__zipBLAST(cxlevels, srcsize, src, tgtsize, tgts, tgt_number, irep, datatype); + } +} + // The very old algorithm for backward compatibility // 0 for selecting with R__ZipMode in a backward compatible way // 3 for selecting in other cases @@ -306,8 +339,9 @@ int R__unzip_header(int *srcsize, uch *src, int *tgtsize) return 1; } - *srcsize = HDRSIZE + ((long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16)); - *tgtsize = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16); + *srcsize = ((long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16)) + + (is_valid_header_blast(src) ? HDRSIZE_BLAST : HDRSIZE); // compressed size + *tgtsize = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16); // uncompressed size return 0; } @@ -339,7 +373,7 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /* long ibufcnt, obufcnt; *irep = 0L; - + /* C H E C K H E A D E R */ if (*srcsize < HDRSIZE) { @@ -383,7 +417,7 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /* R__unzipZSTD(srcsize, src, tgtsize, tgt, irep); return; } else if (is_valid_header_blast(src)) { - R__unzipBLAST(srcsize, src, tgtsize, tgt, irep); + R__unzipBLAST(srcsize, &src, tgtsize, tgt, 1, irep); return; } @@ -405,6 +439,80 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /* *irep = isize; } +void R__unzipPrecisionCascade(int *srcsize, uch **srcs, int *tgtsize, uch *tgt, int src_number, int *irep, int /* configsize = 0 */, char * /* configarray = nullptr */) +{ + + long isize = 0; + long ibufcnt, obufcnt; + + *irep = 0L; + + /* This check can be done here, or in R__unzipBLAST, depending on whether + the check may apply to other [future] PrecisionCascade algorithms + if (src_number > srcs[0][10]) { + src_number = srcs[0][10]; // more sources than we need?!? Ignore extras for now + } else if (src_number < srcs[0][10] && srcs[src_number-1][2] == 0) { + // This is a simple check that full precision is only ever possible with all sources from compression. + // However, it is possible that full precision was not saved at compression, + // so even using all saved sources may not return full precision + fprintf(stderr, "R__unzipPrecisionCascade: too few sources provided (%d) to obtain full precision (requires at least %d sources)", src_number, srcs[0][10]); + return; + } + */ + + obufcnt = *tgtsize; + + auto is_valid_headers_blast = true; + + for (int src_idx=0; src_idx srcsize[src_idx]) { + fprintf(stderr, "R__unzipPrecisionCascade: too small source\n"); + return; + } + + is_valid_headers_blast &= is_valid_header_blast(srcs[src_idx]); + } + + if (is_valid_headers_blast) { + R__unzipBLAST(srcsize, srcs, tgtsize, tgt, src_number, irep); + return; + } + + // nothing else to do + +} + void R__unzipZLIB(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep) { z_stream stream; /* decompression stream */ diff --git a/tree/tree/src/TBasket.cxx b/tree/tree/src/TBasket.cxx index 65ad8c841092c..481602a0b9ef7 100644 --- a/tree/tree/src/TBasket.cxx +++ b/tree/tree/src/TBasket.cxx @@ -608,10 +608,17 @@ Int_t TBasket::ReadBasketBuffers(Long64_t pos, Int_t len, TFile *file, Int_t bas return ReadBasketBuffersUncompressedCase(); } - std::vector precisionCascades; + memcpy(rawUncompressedBuffer, rawCompressedBuffer, fKeylen); + char *rawUncompressedObjectBuffer = rawUncompressedBuffer+fKeylen; + UChar_t *rawCompressedObjectBuffer = (UChar_t*)rawCompressedBuffer+fKeylen; + Int_t nin, nbuf; + Int_t nout = 0, noutot = 0, nintot = 0; + + std::vector precisionCascades; + std::vector nins; struct Destructor { - std::vector &fValue; - Destructor(std::vector &cont) : fValue(cont) {} ; + std::vector &fValue; + Destructor(std::vector &cont) : fValue(cont) {} ; ~Destructor() { for(auto c : fValue) { delete [] c; @@ -619,12 +626,15 @@ Int_t TBasket::ReadBasketBuffers(Long64_t pos, Int_t len, TFile *file, Int_t bas } }; Destructor dst(precisionCascades); - if (fBranch->GetPrecisionCascades() && !fBranch->GetPrecisionCascades()->empty()) + auto doPrecisionCascades = (fBranch->GetPrecisionCascades() && !fBranch->GetPrecisionCascades()->empty()); + if (doPrecisionCascades) { + precisionCascades.push_back(rawCompressedObjectBuffer); auto tree = fBranch->GetTree(); for(auto brpc : *fBranch->GetPrecisionCascades()) { - precisionCascades.push_back( brpc->RetrieveCascade(*tree, basketnumber) ); + precisionCascades.push_back((unsigned char*) brpc->RetrieveCascade(*tree, basketnumber) ); } + nins.assign(precisionCascades.size(),0); } // Optional monitor for zip time profiling. @@ -633,12 +643,6 @@ Int_t TBasket::ReadBasketBuffers(Long64_t pos, Int_t len, TFile *file, Int_t bas start = TTimeStamp(); } - memcpy(rawUncompressedBuffer, rawCompressedBuffer, fKeylen); - char *rawUncompressedObjectBuffer = rawUncompressedBuffer+fKeylen; - UChar_t *rawCompressedObjectBuffer = (UChar_t*)rawCompressedBuffer+fKeylen; - Int_t nin, nbuf; - Int_t nout = 0, noutot = 0, nintot = 0; - Int_t configArraySize = fBranch->GetConfigArraySize(); char *configArray = fBranch->GetConfigArray(); @@ -655,16 +659,26 @@ Int_t TBasket::ReadBasketBuffers(Long64_t pos, Int_t len, TFile *file, Int_t bas goto AfterBuffer; } - R__unzip(&nin, rawCompressedObjectBuffer, &nbuf, (unsigned char*) rawUncompressedObjectBuffer, &nout, configArraySize, configArray); + if (doPrecisionCascades) { + nins[0] = nin; + for (size_t i=1; i= fObjlen) break; rawCompressedObjectBuffer += nin; rawUncompressedObjectBuffer += nout; - if (!precisionCascades.empty()) { - // increment each element by its corresponding nout. - } + for (size_t i = 0; iLength() - fKeylen; fHeaderOnly = kTRUE; @@ -1274,8 +1288,11 @@ Int_t TBasket::WriteBuffer() char *configArray = fBranch->GetConfigArray(); std::vector precisionCascades; - if (fBranch->GetPrecisionCascades() && !fBranch->GetPrecisionCascades()->empty()) + std::vector nouts, bufmaxs; + auto doPrecisionCascades = (fBranch->GetPrecisionCascades() && !fBranch->GetPrecisionCascades()->empty()); + if (doPrecisionCascades) { + precisionCascades.push_back(bufcur); auto tree = fBranch->GetTree(); auto basketnumber = fBranch->GetWriteBasket(); for(auto brpc : *fBranch->GetPrecisionCascades()) { @@ -1284,6 +1301,7 @@ Int_t TBasket::WriteBuffer() R__SizeBuffer(*cascade_buffer, buflen); cascade_buffer->SetWriteMode(); precisionCascades.push_back( cascade_buffer->Buffer() + cascade_basket->GetKeylen() ); + nouts.assign(precisionCascades.size(),0); } } @@ -1302,7 +1320,13 @@ Int_t TBasket::WriteBuffer() // NOTE this is declared with C linkage, so it shouldn't except. Also, when // USE_IMT is defined, we are guaranteed that the compression buffer is unique per-branch. // (see fCompressedBufferRef in constructor). - R__zipMultipleAlgorithm(cxlevel, &bufmax, objbuf, &bufmax, bufcur, &nout, cxAlgorithm, datatype, configArraySize, configArray); + if (doPrecisionCascades) { + bufmaxs.assign(precisionCascades.size(),bufmax); // assuming bufmax is the same for each of the precisionCascade buffers + R__zipPrecisionCascade(&bufmax, objbuf, bufmaxs.data(), precisionCascades.data(), precisionCascades.size(), nouts.data(), cxAlgorithm, datatype, configArraySize, configArray); + nout = nouts[0]; + } else { + R__zipMultipleAlgorithm(cxlevel, &bufmax, objbuf, &bufmax, bufcur, &nout, cxAlgorithm, datatype, configArraySize, configArray); + } #ifdef R__USE_IMT sentry.lock(); #endif // R__USE_IMT @@ -1329,9 +1353,8 @@ Int_t TBasket::WriteBuffer() noutot += nout; objbuf += kMAXZIPBUF; nzip += kMAXZIPBUF; - if (!precisionCascades.empty()) { - // increment each element by its corresponding nout. - } + for (size_t i = 0; i