Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PrecisionCascade connections to compression functions #13

Merged
merged 14 commits into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/accelogic/inc/ZipAccelogic.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

#include "EDataType.h"

void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **tgts, int tgt_number, int *irep, EDataType datatype = EDataType::kNoType_t);
void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, EDataType datatype = EDataType::kNoType_t);

void R__unzipBLAST(int **srcsizes, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep);
void R__unzipBLAST(int *srcsize, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep);

// The below two interfaces are to maintain backward compatibility until RZip.cxx is updated to use the above two interfaces

Expand Down
22 changes: 10 additions & 12 deletions core/accelogic/src/ZipAccelogic.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,16 @@ union IntegerTypes {

void R__zipBLAST(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgt, int *irep, EDataType datatype)
{
R__zipBLAST(&cxlevel,srcsize,src,&tgtsize,&tgt,1,irep,datatype);
R__zipBLAST(&cxlevel,srcsize,src,tgtsize,&tgt,1,irep,datatype);
}

void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **tgts, int tgt_number, int *irep, EDataType datatype)
void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, EDataType datatype)
{
*irep = 0;
int *tgtsize = *tgtsizes;
memset(irep,0,tgt_number*sizeof(int)); // irep needs to point to an array of integers of size tgt_number (could just be a single integer)
char *tgt = *tgts;

for (int tgt_idx=0; tgt_idx<tgt_number; tgt_idx++) {
if (*(tgtsizes[tgt_idx]) <= 0) {
if (tgtsize[tgt_idx] <= 0) {
return;
}
}
Expand Down Expand Up @@ -109,7 +108,7 @@ void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **

auto excessive_size = false;
for (int tgt_idx=0; tgt_idx<tgt_number && !excessive_size; tgt_idx++)
excessive_size |= ( ( out_sizes[tgt_idx] + kHeaderSize) > (size_t)*tgtsizes[tgt_idx] );
excessive_size |= ( ( out_sizes[tgt_idx] + kHeaderSize) > (size_t)tgtsize[tgt_idx] );

if (excessive_size) {
for (int tgt_idx=0; tgt_idx<tgt_number; tgt_idx++)
Expand All @@ -121,8 +120,8 @@ void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **
memcpy(tgts[tgt_idx] + kHeaderSize, staging[tgt_idx], out_sizes[tgt_idx]);
tgts[tgt_idx][2] = cxlevels[tgt_idx];
delete [] (staging[tgt_idx]);
// *irep will be the sum of all buffer sizes
*irep += out_sizes[tgt_idx] + kHeaderSize;
// irep points to an array of all buffer sizes
irep[tgt_idx] = out_sizes[tgt_idx] + kHeaderSize;
}
} else {
// Use "RLE".
Expand Down Expand Up @@ -204,10 +203,10 @@ void R__zipBLAST(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **

void R__unzipBLAST(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
{
R__unzipBLAST(&srcsize,&src,tgtsize,tgt,1,irep);
R__unzipBLAST(srcsize,&src,tgtsize,tgt,1,irep);
}

void R__unzipBLAST(int **srcsizes, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep)
void R__unzipBLAST(int *srcsize, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep)
{
*irep = 0;
unsigned char *src = *srcs;
Expand Down Expand Up @@ -239,7 +238,7 @@ void R__unzipBLAST(int **srcsizes, unsigned char **srcs, int *tgtsize, unsigned
for (int src_idx=0; src_idx<src_number; src_idx++) {
absSensLevels[src_idx] = srcs[src_idx][2] - 61;
sources[src_idx] = (char*)(&srcs[src_idx][kHeaderSize]);
in_sizes[src_idx] = (size_t) (srcsizes[src_idx] - kHeaderSize);
in_sizes[src_idx] = (size_t) (srcsize[src_idx] - kHeaderSize);
}
auto absSens_src_number = src_number - 1; // Needs to be 1 less than provided sources
// Note: We need to check the destination really start of a float boundary.
Expand All @@ -260,7 +259,6 @@ void R__unzipBLAST(int **srcsizes, unsigned char **srcs, int *tgtsize, unsigned
delete [] staging.c;
*irep = out_size;
} else {
int *srcsize = *srcsizes;
char* source = (char*)(&src[kHeaderSize]);
size_t in_size = (*srcsize) - kHeaderSize;

Expand Down
5 changes: 5 additions & 0 deletions core/zip/inc/RZip.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ extern "C" unsigned long R__memcompress(char *tgt, unsigned long tgtsize, char *
extern "C" void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgt, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues,
EDataType datatype = EDataType::kNoType_t, int configsize = 0, char *configarray = nullptr);

extern "C" void R__zipPrecisionCascade(int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues,
EDataType datatype = EDataType::kNoType_t, int configsize = 0, char *configarray = nullptr);

/**
* This is a historical definition, prior to ROOT supporting multiple algorithms in a single file. Use
* R__zipMultipleAlgorithm instead.
Expand All @@ -33,6 +36,8 @@ extern "C" void R__zip(int cxlevel, int *srcsize, char *src, int *tgtsize, char

extern "C" void R__unzip(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep, int configsize = 0, char *configarray = nullptr);

extern "C" void R__unzipPrecisionCascade(int *srcsize, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int src_number, int *irep, int configsize = 0, char *configarray = nullptr);

extern "C" int R__unzip_header(int *srcsize, unsigned char *src, int *tgtsize);

enum { kMAXZIPBUF = 0xffffff };
Expand Down
1 change: 1 addition & 0 deletions core/zip/src/Compression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "Compression.h"
#include <stdexcept>
#include <string>
#include "PrecisionCascadeConfigArrayContent.h"

namespace ROOT {
Expand Down
118 changes: 113 additions & 5 deletions core/zip/src/RZip.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "ZipLZ4.h"
#include "ZipZSTD.h"
#include "ZipAccelogic.h"
#include "PrecisionCascadeConfigArrayContent.h"

#include "zlib.h"

Expand All @@ -25,6 +26,8 @@
// - 3 bytes to identify the deflated buffer size.
// - 3 bytes to identify the inflated buffer size.
#define HDRSIZE 9
// - 2 bytes to identify datatype and cascade count in BLAST
#define HDRSIZE_BLAST 11

/**
* Forward decl's
Expand Down Expand Up @@ -107,7 +110,7 @@ void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize,
} else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kZSTD) {
R__zipZSTD(cxlevel, srcsize, src, tgtsize, tgt, irep);
} else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kBLAST) {
R__zipBLAST(cxlevel, srcsize, src, tgtsize, tgt, irep, datatype);
R__zipBLAST(&cxlevel, srcsize, src, tgtsize, &tgt, 1, irep, datatype);
} else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kOldCompressionAlgo || compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kUseGlobal) {
R__zipOld(cxlevel, srcsize, src, tgtsize, tgt, irep);
} else {
Expand All @@ -118,6 +121,36 @@ void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize,
}
}

void R__zipPrecisionCascade(int *srcsize, char *src, int *tgtsize, char **tgts, int tgt_number, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues compressionAlgorithm,
EDataType datatype /* = kNoType_t */,
int configsize /* = 0 */, char * configarray /* = nullptr */)
{

if (*srcsize < 1 + HDRSIZE_BLAST + 1) {
memset(irep,0,tgt_number*sizeof(int));
return;
}

Copy link
Owner

@pcanal pcanal Jun 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto content = reinterpret_cast<ROOT::Internal::PrecisionCascadeConfigArrayContent*>(fConfigArray);
assert(content->SizeOf() == configsize);
Int_t *cxlevels = content->GetLevels(); // This an array of size `content->fLen`

Copy link
Owner

@pcanal pcanal Jun 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The compression levels are the user provided levels, so they still need offset by 61.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#include "PrecisionCascadeConfigArrayContent.h" is needed.

auto content = reinterpret_cast<ROOT::Internal::PrecisionCascadeConfigArrayContent*>(configarray);
(void) configsize;
assert(content && (content->SizeOf() == configsize));
Int_t *cxlevels = content->GetLevels(); // This an array of size `content->fLen`

for (int tgt_idx=0; tgt_idx<tgt_number; tgt_idx++) {
// can only be 0 for the last of multiple (not just one) targets
// otherwise must be a positive value
int cxlevel_min = (tgt_idx > 0 && tgt_idx == tgt_number-1 ? 0 : 1);
if (cxlevels[tgt_idx] < cxlevel_min) {
memset(irep,0,tgt_number*sizeof(int));
return;
}
}

if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kBLAST) {
R__zipBLAST(cxlevels, srcsize, src, tgtsize, tgts, tgt_number, irep, datatype);
}
}

// The very old algorithm for backward compatibility
// 0 for selecting with R__ZipMode in a backward compatible way
// 3 for selecting in other cases
Expand Down Expand Up @@ -306,8 +339,9 @@ int R__unzip_header(int *srcsize, uch *src, int *tgtsize)
return 1;
}

*srcsize = HDRSIZE + ((long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16));
*tgtsize = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16);
*srcsize = ((long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16))
+ (is_valid_header_blast(src) ? HDRSIZE_BLAST : HDRSIZE); // compressed size
*tgtsize = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16); // uncompressed size

return 0;
}
Expand Down Expand Up @@ -339,7 +373,7 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /*
long ibufcnt, obufcnt;

*irep = 0L;

/* C H E C K H E A D E R */

if (*srcsize < HDRSIZE) {
Expand Down Expand Up @@ -383,7 +417,7 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /*
R__unzipZSTD(srcsize, src, tgtsize, tgt, irep);
return;
} else if (is_valid_header_blast(src)) {
R__unzipBLAST(srcsize, src, tgtsize, tgt, irep);
R__unzipBLAST(srcsize, &src, tgtsize, tgt, 1, irep);
return;
}

Expand All @@ -405,6 +439,80 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /*
*irep = isize;
}

void R__unzipPrecisionCascade(int *srcsize, uch **srcs, int *tgtsize, uch *tgt, int src_number, int *irep, int /* configsize = 0 */, char * /* configarray = nullptr */)
{

long isize = 0;
long ibufcnt, obufcnt;

*irep = 0L;

/* This check can be done here, or in R__unzipBLAST, depending on whether
the check may apply to other [future] PrecisionCascade algorithms
if (src_number > srcs[0][10]) {
src_number = srcs[0][10]; // more sources than we need?!? Ignore extras for now
} else if (src_number < srcs[0][10] && srcs[src_number-1][2] == 0) {
// This is a simple check that full precision is only ever possible with all sources from compression.
// However, it is possible that full precision was not saved at compression,
// so even using all saved sources may not return full precision
fprintf(stderr, "R__unzipPrecisionCascade: too few sources provided (%d) to obtain full precision (requires at least %d sources)", src_number, srcs[0][10]);
return;
}
*/

obufcnt = *tgtsize;

auto is_valid_headers_blast = true;

for (int src_idx=0; src_idx<src_number; src_idx++) {


/* C H E C K H E A D E R */

if (srcsize[src_idx] < HDRSIZE_BLAST) {
fprintf(stderr, "R__unzipPrecisionCascade: too small source (index %d)\n",src_idx);
return;
}

uch *src = srcs[src_idx];
if (!is_valid_header(src)) {
fprintf(stderr, "Error R__unzipPrecisionCascade: error in header\n");
return;
}

ibufcnt = (long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16); // compressed size
long isize_temp = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16); // uncompressed size

if (src_idx) {
if (isize_temp != isize) {
fprintf(stderr, "R__unzipPrecisionCascade: mismatching source headers\n");
return;
}
} else {
if (obufcnt < isize_temp) {
fprintf(stderr, "R__unzipPrecisionCascade: too small target\n");
return;
}
isize = isize_temp;
}

if (ibufcnt + HDRSIZE_BLAST > srcsize[src_idx]) {
fprintf(stderr, "R__unzipPrecisionCascade: too small source\n");
return;
}

is_valid_headers_blast &= is_valid_header_blast(srcs[src_idx]);
}

if (is_valid_headers_blast) {
R__unzipBLAST(srcsize, srcs, tgtsize, tgt, src_number, irep);
return;
}

// nothing else to do

}

void R__unzipZLIB(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
{
z_stream stream; /* decompression stream */
Expand Down
Loading