From 84bbb4a8743a93d567f40d7233d5196105c8d84e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 21 Aug 2017 23:12:45 +0200 Subject: [PATCH 01/29] opj_t1_allocate_buffers(): remove useless overflow checks --- src/lib/openjp2/t1.c | 73 ++++++++------------------------------------ 1 file changed, 13 insertions(+), 60 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 15e166f2d..9a192f933 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1426,27 +1426,18 @@ static OPJ_BOOL opj_t1_allocate_buffers( OPJ_UINT32 w, OPJ_UINT32 h) { - size_t flagssize; + OPJ_UINT32 flagssize; OPJ_UINT32 flags_stride; + /* No risk of overflow. Prior checks ensure those assert are met */ + /* They are per the specification */ + assert(w <= 1024); + assert(h <= 1024); + assert(w * h <= 4096); + /* encoder uses tile buffer, so no need to allocate */ if (!t1->encoder) { - size_t datasize; - -#if (SIZE_MAX / 0xFFFFFFFFU) < 0xFFFFFFFFU /* UINT32_MAX */ - /* Overflow check */ - if ((w > 0U) && ((size_t)h > (SIZE_MAX / (size_t)w))) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#endif - datasize = (size_t)w * h; - - /* Overflow check */ - if (datasize > (SIZE_MAX / sizeof(OPJ_INT32))) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } + OPJ_UINT32 datasize = w * h; if (datasize > (size_t)t1->datasize) { opj_aligned_free(t1->data); @@ -1455,15 +1446,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( /* FIXME event manager error callback */ return OPJ_FALSE; } -#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ - /* TODO remove this if t1->datasize type changes to size_t */ - /* Overflow check */ - if (datasize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#endif - t1->datasize = (OPJ_UINT32)datasize; + t1->datasize = datasize; } /* memset first arg is declared to never be null by gcc */ if (t1->data != NULL) { @@ -1471,40 +1454,18 @@ static OPJ_BOOL opj_t1_allocate_buffers( } } - /* Overflow check */ - if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } flags_stride = w + 2U; /* can't be 0U */ -#if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */ - /* Overflow check */ - if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#endif flagssize = (h + 3U) / 4U + 2U; - /* Overflow check */ - if (flagssize > (SIZE_MAX / (size_t)flags_stride)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - flagssize *= (size_t)flags_stride; + flagssize *= flags_stride; { - /* BIG FAT XXX */ opj_flag_t* p; OPJ_UINT32 x; OPJ_UINT32 flags_height = (h + 3U) / 4U; - if (flagssize > (size_t)t1->flagssize) { - /* Overflow check */ - if (flagssize > (SIZE_MAX / sizeof(opj_flag_t))) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } + if (flagssize > t1->flagssize) { + opj_aligned_free(t1->flags); t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof( opj_flag_t)); @@ -1512,16 +1473,8 @@ static OPJ_BOOL opj_t1_allocate_buffers( /* FIXME event manager error callback */ return OPJ_FALSE; } -#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ - /* TODO remove this if t1->flagssize type changes to size_t */ - /* Overflow check */ - if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#endif } - t1->flagssize = (OPJ_UINT32)flagssize; + t1->flagssize = flagssize; memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); From 0a25dceca7761ee3f16cbb2ced87b915a948b25e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 21 Aug 2017 23:20:51 +0200 Subject: [PATCH 02/29] opj_j2k_setup_encoder(): validate code block width/height --- src/lib/openjp2/j2k.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 4fd65872a..174cf7696 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -6730,6 +6730,7 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, { OPJ_UINT32 i, j, tileno, numpocs_tile; opj_cp_t *cp = 00; + OPJ_UINT32 cblkw, cblkh; if (!p_j2k || !parameters || ! image) { return OPJ_FALSE; @@ -6743,6 +6744,38 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, return OPJ_FALSE; } + if (parameters->cblockw_init < 4 || parameters->cblockw_init > 1024) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for cblockw_init: %d not a power of 2 in range [4,1024]\n", + parameters->cblockw_init); + return OPJ_FALSE; + } + if (parameters->cblockh_init < 4 || parameters->cblockh_init > 1024) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for cblockh_init: %d not a power of 2 not in range [4,1024]\n", + parameters->cblockh_init); + return OPJ_FALSE; + } + if (parameters->cblockw_init * parameters->cblockh_init > 4096) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for cblockw_init * cblockh_init: should be <= 4096\n"); + return OPJ_FALSE; + } + cblkw = (OPJ_UINT32)opj_int_floorlog2(parameters->cblockw_init); + cblkh = (OPJ_UINT32)opj_int_floorlog2(parameters->cblockh_init); + if (parameters->cblockw_init != (1 << cblkw)) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for cblockw_init: %d not a power of 2 in range [4,1024]\n", + parameters->cblockw_init); + return OPJ_FALSE; + } + if (parameters->cblockh_init != (1 << cblkh)) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for cblockw_init: %d not a power of 2 in range [4,1024]\n", + parameters->cblockh_init); + return OPJ_FALSE; + } + /* keep a link to cp so that we can destroy it later in j2k_destroy_compress */ cp = &(p_j2k->m_cp); From aa7198146b995fe2993ce24f5715057b7da0386d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 21 Aug 2017 23:21:39 +0200 Subject: [PATCH 03/29] opj_compress: reorder checks related to code block dimensions, to avoid potential int overflow --- src/bin/jp2/opj_compress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bin/jp2/opj_compress.c b/src/bin/jp2/opj_compress.c index 494b366a7..ab07fbb50 100644 --- a/src/bin/jp2/opj_compress.c +++ b/src/bin/jp2/opj_compress.c @@ -907,8 +907,9 @@ static int parse_cmdline_encoder(int argc, char **argv, case 'b': { /* code-block dimension */ int cblockw_init = 0, cblockh_init = 0; sscanf(opj_optarg, "%d,%d", &cblockw_init, &cblockh_init); - if (cblockw_init * cblockh_init > 4096 || cblockw_init > 1024 - || cblockw_init < 4 || cblockh_init > 1024 || cblockh_init < 4) { + if (cblockw_init > 1024 || cblockw_init < 4 || + cblockh_init > 1024 || cblockh_init < 4 || + cblockw_init * cblockh_init > 4096) { fprintf(stderr, "!! Size of code_block error (option -b) !!\n\nRestriction :\n" " * width*height<=4096\n * 4<=width,height<= 1024\n\n"); From f9e9942330f476b66ac4a35d0ae521200878f343 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:29 +0200 Subject: [PATCH 04/29] Sub-tile decoding: only allocate tile component buffer of the needed dimension Instead of being the full tile size. * Use a sparse array mechanism to store code-blocks and intermediate stages of IDWT. * IDWT, DC level shift and MCT stages are done just on that smaller array. * Improve copy of tile component array to final image, by saving an intermediate buffer. * For full-tile decoding at reduced resolution, only allocate the tile buffer to the reduced size, instead of the full-resolution size. --- CMakeLists.txt | 2 +- src/lib/openjp2/CMakeLists.txt | 18 +- src/lib/openjp2/bench_dwt.c | 9 +- src/lib/openjp2/dwt.c | 467 ++++++++++++++++++---------- src/lib/openjp2/dwt.h | 4 +- src/lib/openjp2/j2k.c | 264 ++++------------ src/lib/openjp2/opj_includes.h | 3 + src/lib/openjp2/sparse_array.c | 233 ++++++++++++++ src/lib/openjp2/sparse_array.h | 141 +++++++++ src/lib/openjp2/t1.c | 115 ++++--- src/lib/openjp2/tcd.c | 386 ++++++++++++++++++----- src/lib/openjp2/tcd.h | 52 +++- src/lib/openjp2/test_sparse_array.c | 148 +++++++++ 13 files changed, 1335 insertions(+), 507 deletions(-) create mode 100644 src/lib/openjp2/sparse_array.c create mode 100644 src/lib/openjp2/sparse_array.h create mode 100644 src/lib/openjp2/test_sparse_array.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 70554ad1b..f315d7cf6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -253,7 +253,7 @@ if(BUILD_JPIP_SERVER) endif() add_subdirectory(src/lib) option(BUILD_LUTS_GENERATOR "Build utility to generate t1_luts.h" OFF) -option(BUILD_BENCH_DWT "Build bench_dwt utility (development benchmark)" OFF) +option(BUILD_UNIT_TESTS "Build unit tests (bench_dwt, test_sparse_array, etc..)" OFF) #----------------------------------------------------------------------------- # Build Applications diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index 57c1751ef..697b07ea2 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -54,6 +54,8 @@ set(OPENJPEG_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.c ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.h ${CMAKE_CURRENT_SOURCE_DIR}/opj_stdint.h + ${CMAKE_CURRENT_SOURCE_DIR}/sparse_array.c + ${CMAKE_CURRENT_SOURCE_DIR}/sparse_array.h ) if(BUILD_JPIP) add_definitions(-DUSE_JPIP) @@ -192,12 +194,20 @@ if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT}) endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) -if(BUILD_BENCH_DWT) - add_executable(bench_dwt bench_dwt.c dwt.c opj_malloc.c thread.c) +if(BUILD_UNIT_TESTS) + add_executable(bench_dwt bench_dwt.c) if(UNIX) - target_link_libraries(bench_dwt m) + target_link_libraries(bench_dwt m ${OPENJPEG_LIBRARY_NAME}) endif() if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) target_link_libraries(bench_dwt ${CMAKE_THREAD_LIBS_INIT}) endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) -endif(BUILD_BENCH_DWT) + + add_executable(test_sparse_array test_sparse_array.c) + if(UNIX) + target_link_libraries(test_sparse_array m ${OPENJPEG_LIBRARY_NAME}) + endif() + if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + target_link_libraries(test_sparse_array ${CMAKE_THREAD_LIBS_INIT}) + endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) +endif(BUILD_UNIT_TESTS) diff --git a/src/lib/openjp2/bench_dwt.c b/src/lib/openjp2/bench_dwt.c index 36f4c0c9a..0dc278f3e 100644 --- a/src/lib/openjp2/bench_dwt.c +++ b/src/lib/openjp2/bench_dwt.c @@ -198,10 +198,11 @@ int main(int argc, char** argv) memset(&tcd, 0, sizeof(tcd)); tcd.thread_pool = tp; - tcd.decoded_x0 = (OPJ_UINT32)tilec.x0; - tcd.decoded_y0 = (OPJ_UINT32)tilec.y0; - tcd.decoded_x1 = (OPJ_UINT32)tilec.x1; - tcd.decoded_y1 = (OPJ_UINT32)tilec.y1; + tcd.whole_tile_decoding = OPJ_TRUE; + tcd.win_x0 = (OPJ_UINT32)tilec.x0; + tcd.win_y0 = (OPJ_UINT32)tilec.y0; + tcd.win_x1 = (OPJ_UINT32)tilec.x1; + tcd.win_y1 = (OPJ_UINT32)tilec.y1; tcd.tcd_image = &tcd_image; memset(&tcd_image, 0, sizeof(tcd_image)); tcd_image.tiles = &tcd_tile; diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 7377b6429..b32508dbf 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -151,9 +151,9 @@ Inverse wavelet transform in 2-D. static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i); -static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *p_tcd, - opj_tcd_tilecomp_t* tilec, - OPJ_UINT32 numres); +static OPJ_BOOL opj_dwt_decode_partial_tile( + opj_tcd_tilecomp_t* tilec, + OPJ_UINT32 numres); static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)); @@ -1194,50 +1194,16 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1); } -static OPJ_BOOL opj_dwt_is_whole_tile_decoding(opj_tcd_t *p_tcd, - opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) -{ - opj_image_comp_t* image_comp = &(p_tcd->image->comps[tilec->compno]); - /* Compute the intersection of the area of interest, expressed in tile coordinates */ - /* with the tile coordinates */ - OPJ_UINT32 tcx0 = opj_uint_max( - (OPJ_UINT32)tilec->x0, - opj_uint_ceildiv(p_tcd->decoded_x0, image_comp->dx)); - OPJ_UINT32 tcy0 = opj_uint_max( - (OPJ_UINT32)tilec->y0, - opj_uint_ceildiv(p_tcd->decoded_y0, image_comp->dy)); - OPJ_UINT32 tcx1 = opj_uint_min( - (OPJ_UINT32)tilec->x1, - opj_uint_ceildiv(p_tcd->decoded_x1, image_comp->dx)); - OPJ_UINT32 tcy1 = opj_uint_min( - (OPJ_UINT32)tilec->y1, - opj_uint_ceildiv(p_tcd->decoded_y1, image_comp->dy)); - - OPJ_UINT32 shift = tilec->numresolutions - numres; - - /* Tolerate small margin within the reduced resolution factor to consider if */ - /* the whole tile path must be taken */ - return (tcx0 >= (OPJ_UINT32)tilec->x0 && - tcy0 >= (OPJ_UINT32)tilec->y0 && - tcx1 <= (OPJ_UINT32)tilec->x1 && - tcy1 <= (OPJ_UINT32)tilec->y1 && - (shift >= 32 || - (((tcx0 - (OPJ_UINT32)tilec->x0) >> shift) == 0 && - ((tcy0 - (OPJ_UINT32)tilec->y0) >> shift) == 0 && - (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 && - (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0))); -} - /* */ /* Inverse 5-3 wavelet transform in 2-D. */ /* */ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { - if (opj_dwt_is_whole_tile_decoding(p_tcd, tilec, numres)) { + if (p_tcd->whole_tile_decoding) { return opj_dwt_decode_tile(p_tcd->thread_pool, tilec, numres); } else { - return opj_dwt_decode_partial_tile(p_tcd, tilec, numres); + return opj_dwt_decode_partial_tile(tilec, numres); } } @@ -1403,7 +1369,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - + 1].x1 - + tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); size_t h_mem_size; int num_threads; @@ -1552,51 +1520,53 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, static void opj_dwt_interleave_partial_h(OPJ_INT32 *dest, OPJ_INT32 cas, - const OPJ_INT32* src, - OPJ_INT32 sn, - OPJ_INT32 win_l_x0, - OPJ_INT32 win_l_x1, - OPJ_INT32 win_h_x0, - OPJ_INT32 win_h_x1) + opj_sparse_array_int32_t* sa, + OPJ_UINT32 sa_line, + OPJ_UINT32 sn, + OPJ_UINT32 win_l_x0, + OPJ_UINT32 win_l_x1, + OPJ_UINT32 win_h_x0, + OPJ_UINT32 win_h_x1) { - const OPJ_INT32 *ai = src; - OPJ_INT32 *bi = dest + cas; - OPJ_INT32 i; - - for (i = win_l_x0; i < win_l_x1; i++) { - bi[2 * i] = ai[i]; - } - - ai = src + sn; - bi = dest + 1 - cas; - for (i = win_h_x0; i < win_h_x1; i++) { - bi[2 * i] = ai[i]; - } + OPJ_BOOL ret; + ret = opj_sparse_array_int32_read(sa, + win_l_x0, sa_line, + win_l_x1, sa_line + 1, + dest + cas + 2 * win_l_x0, + 2, 0, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + sn + win_h_x0, sa_line, + sn + win_h_x1, sa_line + 1, + dest + 1 - cas + 2 * win_h_x0, + 2, 0, OPJ_TRUE); + assert(ret); } + static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, OPJ_INT32 cas, - const OPJ_INT32* src, - OPJ_INT32 sn, - OPJ_INT32 stride, - OPJ_INT32 win_l_y0, - OPJ_INT32 win_l_y1, - OPJ_INT32 win_h_y0, - OPJ_INT32 win_h_y1) + opj_sparse_array_int32_t* sa, + OPJ_UINT32 sa_col, + OPJ_UINT32 sn, + OPJ_UINT32 win_l_y0, + OPJ_UINT32 win_l_y1, + OPJ_UINT32 win_h_y0, + OPJ_UINT32 win_h_y1) { - const OPJ_INT32 *ai = src; - OPJ_INT32 *bi = dest + cas; - OPJ_INT32 i; - - for (i = win_l_y0; i < win_l_y1; i++) { - bi[2 * i] = ai[i * stride]; - } - - ai = src + sn * stride; - bi = dest + 1 - cas; - for (i = win_h_y0; i < win_h_y1; i++) { - bi[2 * i] = ai[i * stride]; - } + OPJ_BOOL ret; + ret = opj_sparse_array_int32_read(sa, + sa_col, win_l_y0, + sa_col + 1, win_l_y1, + dest + cas + 2 * win_l_y0, + 0, 2, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + sa_col, sn + win_h_y0, + sa_col + 1, sn + win_h_y1, + dest + 1 - cas + 2 * win_h_y0, + 0, 2, OPJ_TRUE); + assert(ret); } static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, @@ -1683,10 +1653,68 @@ static void opj_dwt_segment_grow(OPJ_UINT32 filter_width, *end = opj_uint_min(*end, max_size); } -static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd, - opj_tcd_tilecomp_t* tilec, - OPJ_UINT32 numres) + +static opj_sparse_array_int32_t* opj_dwt_init_sparse_array( + opj_tcd_tilecomp_t* tilec, + OPJ_UINT32 numres) +{ + opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]); + OPJ_UINT32 w = (OPJ_UINT32)(tr_max->x1 - tr_max->x0); + OPJ_UINT32 h = (OPJ_UINT32)(tr_max->y1 - tr_max->y0); + OPJ_UINT32 resno, bandno, precno, cblkno; + opj_sparse_array_int32_t* sa = opj_sparse_array_int32_create( + w, h, opj_uint_min(w, 64), opj_uint_min(h, 64)); + if (sa == NULL) { + return NULL; + } + + for (resno = 0; resno < numres; ++resno) { + opj_tcd_resolution_t* res = &tilec->resolutions[resno]; + + for (bandno = 0; bandno < res->numbands; ++bandno) { + opj_tcd_band_t* band = &res->bands[bandno]; + + for (precno = 0; precno < res->pw * res->ph; ++precno) { + opj_tcd_precinct_t* precinct = &band->precincts[precno]; + for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { + opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; + if (cblk->decoded_data != NULL) { + OPJ_UINT32 x = (OPJ_UINT32)(cblk->x0 - band->x0); + OPJ_UINT32 y = (OPJ_UINT32)(cblk->y0 - band->y0); + OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); + OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); + + if (band->bandno & 1) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + x += (OPJ_UINT32)(pres->x1 - pres->x0); + } + if (band->bandno & 2) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + y += (OPJ_UINT32)(pres->y1 - pres->y0); + } + + if (!opj_sparse_array_int32_write(sa, x, y, + x + cblk_w, y + cblk_h, + cblk->decoded_data, + 1, cblk_w, OPJ_TRUE)) { + opj_sparse_array_int32_free(sa); + return NULL; + } + } + } + } + } + } + + return sa; +} + + +static OPJ_BOOL opj_dwt_decode_partial_tile( + opj_tcd_tilecomp_t* tilec, + OPJ_UINT32 numres) { + opj_sparse_array_int32_t* sa; opj_dwt_t h; opj_dwt_t v; OPJ_UINT32 resno; @@ -1695,38 +1723,42 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd, const OPJ_UINT32 filter_width = 2U; opj_tcd_resolution_t* tr = tilec->resolutions; + opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]); OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0); /* width of the resolution level computed */ OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); size_t h_mem_size; - opj_image_comp_t* image_comp = &(tcd->image->comps[tilec->compno]); /* Compute the intersection of the area of interest, expressed in tile coordinates */ /* with the tile coordinates */ - OPJ_UINT32 win_tcx0 = opj_uint_max( - (OPJ_UINT32)tilec->x0, - opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx)); - OPJ_UINT32 win_tcy0 = opj_uint_max( - (OPJ_UINT32)tilec->y0, - opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy)); - OPJ_UINT32 win_tcx1 = opj_uint_min( - (OPJ_UINT32)tilec->x1, - opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx)); - OPJ_UINT32 win_tcy1 = opj_uint_min( - (OPJ_UINT32)tilec->y1, - opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy)); + OPJ_UINT32 win_tcx0 = tilec->win_x0; + OPJ_UINT32 win_tcy0 = tilec->win_y0; + OPJ_UINT32 win_tcx1 = tilec->win_x1; + OPJ_UINT32 win_tcy1 = tilec->win_y1; + + sa = opj_dwt_init_sparse_array(tilec, numres); if (numres == 1U) { + OPJ_BOOL ret = opj_sparse_array_int32_read(sa, + tr_max->win_x0 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y0 - (OPJ_UINT32)tr_max->y0, + tr_max->win_x1 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y1 - (OPJ_UINT32)tr_max->y0, + tilec->data_win, + 1, tr_max->win_x1 - tr_max->win_x0, + OPJ_TRUE); + assert(ret); + opj_sparse_array_int32_free(sa); return OPJ_TRUE; } h_mem_size = opj_dwt_max_resolution(tr, numres); /* overflow check */ if (h_mem_size > (SIZE_MAX / sizeof(OPJ_INT32))) { /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); return OPJ_FALSE; } @@ -1734,13 +1766,13 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd, h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size); if (! h.mem) { /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); return OPJ_FALSE; } v.mem = h.mem; - for (resno = 1; --numres > 0; resno ++) { - OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data; + for (resno = 1; resno < numres; resno ++) { OPJ_UINT32 i, j; /* Window of interest subband-based coordinates */ OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1; @@ -1826,47 +1858,74 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd, for (j = 0; j < rh; ++j) { if ((j >= win_ll_y0 && j < win_ll_y1) || (j >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) { - memset(h.mem, 0, (OPJ_UINT32)(h.sn + h.dn) * sizeof(OPJ_INT32)); opj_dwt_interleave_partial_h(h.mem, h.cas, - &tiledp[j * w], - h.sn, - (OPJ_INT32)win_ll_x0, - (OPJ_INT32)win_ll_x1, - (OPJ_INT32)win_hl_x0, - (OPJ_INT32)win_hl_x1); + sa, + j, + (OPJ_UINT32)h.sn, + win_ll_x0, + win_ll_x1, + win_hl_x0, + win_hl_x1); opj_dwt_decode_partial_1(h.mem, h.dn, h.sn, h.cas, (OPJ_INT32)win_ll_x0, (OPJ_INT32)win_ll_x1, (OPJ_INT32)win_hl_x0, (OPJ_INT32)win_hl_x1); - memcpy(&tiledp[j * w] + win_tr_x0, h.mem + win_tr_x0, - (win_tr_x1 - win_tr_x0) * sizeof(OPJ_INT32)); + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j, + win_tr_x1, j + 1, + h.mem + win_tr_x0, + 1, 0, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.mem); + return OPJ_FALSE; + } } } for (i = win_tr_x0; i < win_tr_x1; ++i) { - memset(v.mem, 0, (OPJ_UINT32)(v.sn + v.dn) * sizeof(OPJ_INT32)); opj_dwt_interleave_partial_v(v.mem, v.cas, - tiledp + i, - v.sn, - (OPJ_INT32)w, - (OPJ_INT32)win_ll_y0, - (OPJ_INT32)win_ll_y1, - (OPJ_INT32)win_lh_y0, - (OPJ_INT32)win_lh_y1); + sa, + i, + (OPJ_UINT32)v.sn, + win_ll_y0, + win_ll_y1, + win_lh_y0, + win_lh_y1); opj_dwt_decode_partial_1(v.mem, v.dn, v.sn, v.cas, (OPJ_INT32)win_ll_y0, (OPJ_INT32)win_ll_y1, (OPJ_INT32)win_lh_y0, (OPJ_INT32)win_lh_y1); - for (j = win_tr_y0; j < win_tr_y1; j++) { - tiledp[j * w + i] = v.mem[j]; + if (!opj_sparse_array_int32_write(sa, + i, win_tr_y0, + i + 1, win_tr_y1, + v.mem + win_tr_y0, + 0, 1, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.mem); + return OPJ_FALSE; } } } opj_aligned_free(h.mem); + + { + OPJ_BOOL ret = opj_sparse_array_int32_read(sa, + tr_max->win_x0 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y0 - (OPJ_UINT32)tr_max->y0, + tr_max->win_x1 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y1 - (OPJ_UINT32)tr_max->y0, + tilec->data_win, + 1, tr_max->win_x1 - tr_max->win_x0, + OPJ_TRUE); + assert(ret); + } + opj_sparse_array_int32_free(sa); return OPJ_TRUE; } @@ -1924,6 +1983,31 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, } } +static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, + opj_sparse_array_int32_t* sa, + OPJ_UINT32 sa_line, + OPJ_UINT32 remaining_height) +{ + OPJ_UINT32 i; + for (i = 0; i < remaining_height; i++) { + OPJ_BOOL ret; + ret = opj_sparse_array_int32_read(sa, + dwt->win_l_x0, sa_line + i, + dwt->win_l_x1, sa_line + i + 1, + /* Nasty cast from float* to int32* */ + (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, + 8, 0, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i, + (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1, + /* Nasty cast from float* to int32* */ + (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, + 8, 0, OPJ_TRUE); + assert(ret); + } +} + static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_UINT32 width, @@ -1944,6 +2028,29 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, } } +static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, + opj_sparse_array_int32_t* sa, + OPJ_UINT32 sa_col, + OPJ_UINT32 nb_elts_read) +{ + OPJ_UINT32 i; + for (i = 0; i < nb_elts_read; i++) { + OPJ_BOOL ret; + ret = opj_sparse_array_int32_read(sa, + sa_col + i, dwt->win_l_x0, + sa_col + i + 1, dwt->win_l_x1, + (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, + 0, 8, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + sa_col + i, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, + sa_col + i + 1, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, + (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, + 0, 8, OPJ_TRUE); + assert(ret); + } +} + #ifdef __SSE__ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, @@ -2146,7 +2253,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ - OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - + 1].x1 - + tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); size_t l_data_size; @@ -2262,10 +2371,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, } static -OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd, - opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, +OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { + opj_sparse_array_int32_t* sa; opj_v4dwt_t h; opj_v4dwt_t v; OPJ_UINT32 resno; @@ -2275,31 +2384,37 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd, const OPJ_UINT32 filter_width = 4U; opj_tcd_resolution_t* tr = tilec->resolutions; + opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]); OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0); /* width of the resolution level computed */ OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); - size_t l_data_size; - opj_image_comp_t* image_comp = &(tcd->image->comps[tilec->compno]); /* Compute the intersection of the area of interest, expressed in tile coordinates */ /* with the tile coordinates */ - OPJ_UINT32 win_tcx0 = opj_uint_max( - (OPJ_UINT32)tilec->x0, - opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx)); - OPJ_UINT32 win_tcy0 = opj_uint_max( - (OPJ_UINT32)tilec->y0, - opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy)); - OPJ_UINT32 win_tcx1 = opj_uint_min( - (OPJ_UINT32)tilec->x1, - opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx)); - OPJ_UINT32 win_tcy1 = opj_uint_min( - (OPJ_UINT32)tilec->y1, - opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy)); + OPJ_UINT32 win_tcx0 = tilec->win_x0; + OPJ_UINT32 win_tcy0 = tilec->win_y0; + OPJ_UINT32 win_tcx1 = tilec->win_x1; + OPJ_UINT32 win_tcy1 = tilec->win_y1; + + sa = opj_dwt_init_sparse_array(tilec, numres); + + if (numres == 1U) { + OPJ_BOOL ret = opj_sparse_array_int32_read(sa, + tr_max->win_x0 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y0 - (OPJ_UINT32)tr_max->y0, + tr_max->win_x1 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y1 - (OPJ_UINT32)tr_max->y0, + tilec->data_win, + 1, tr_max->win_x1 - tr_max->win_x0, + OPJ_TRUE); + assert(ret); + opj_sparse_array_int32_free(sa); + return OPJ_TRUE; + } l_data_size = opj_dwt_max_resolution(tr, numres); /* overflow check */ @@ -2320,8 +2435,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd, } v.wavelet = h.wavelet; - for (resno = 1; --numres; resno++) { - OPJ_FLOAT32 * OPJ_RESTRICT aj = (OPJ_FLOAT32*) tilec->data; + for (resno = 1; resno < numres; resno ++) { OPJ_UINT32 j; /* Window of interest subband-based coordinates */ OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1; @@ -2408,19 +2522,24 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd, h.win_l_x1 = win_ll_x1; h.win_h_x0 = win_hl_x0; h.win_h_x1 = win_hl_x1; - for (j = 0; j + 3 < rh; j += 4, aj += w * 4) { + for (j = 0; j + 3 < rh; j += 4) { if ((j + 3 >= win_ll_y0 && j < win_ll_y1) || (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) { OPJ_UINT32 k; - opj_v4dwt_interleave_h(&h, aj, w, rh - j); + opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j)); opj_v4dwt_decode(&h); - - for (k = win_tr_x0; k < win_tr_x1; k++) { - aj[k ] = h.wavelet[k].f[0]; - aj[k + w ] = h.wavelet[k].f[1]; - aj[k + w * 2] = h.wavelet[k].f[2]; - aj[k + w * 3] = h.wavelet[k].f[3]; + for (k = 0; k < 4; k++) { + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j + k, + win_tr_x1, j + k + 1, + (OPJ_INT32*)&h.wavelet[win_tr_x0].f[k], + 4, 0, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } } } } @@ -2430,18 +2549,18 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd, (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn))) { OPJ_UINT32 k; - opj_v4dwt_interleave_h(&h, aj, w, rh - j); + opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j); opj_v4dwt_decode(&h); - for (k = win_tr_x0; k < win_tr_x1; k++) { - switch (rh - j) { - case 3: - aj[k + w * 2] = h.wavelet[k].f[2]; - /* FALLTHRU */ - case 2: - aj[k + w ] = h.wavelet[k].f[1]; - /* FALLTHRU */ - case 1: - aj[k ] = h.wavelet[k].f[0]; + for (k = 0; k < rh - j; k++) { + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j + k, + win_tr_x1, j + k + 1, + (OPJ_INT32*)&h.wavelet[win_tr_x0].f[k], + 4, 0, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; } } } @@ -2450,21 +2569,41 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd, v.win_l_x1 = win_ll_y1; v.win_h_x0 = win_lh_y0; v.win_h_x1 = win_lh_y1; - aj = (OPJ_FLOAT32*) tilec->data; - aj += win_tr_x0; - for (j = win_tr_x0; j < win_tr_x1; j += 4, aj += 4) { + for (j = win_tr_x0; j < win_tr_x1; j += 4) { OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j); OPJ_UINT32 k; - opj_v4dwt_interleave_v(&v, aj, w, nb_elts); + opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts); opj_v4dwt_decode(&v); - for (k = win_tr_y0; k < win_tr_y1; ++k) { - memcpy(&aj[k * w], &v.wavelet[k], nb_elts * sizeof(OPJ_FLOAT32)); + for (k = 0; k < nb_elts; k++) { + if (!opj_sparse_array_int32_write(sa, + j + k, win_tr_y0, + j + k + 1, win_tr_y1, + (OPJ_INT32*)&h.wavelet[win_tr_y0].f[k], + 0, 4, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } } } } + { + OPJ_BOOL ret = opj_sparse_array_int32_read(sa, + tr_max->win_x0 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y0 - (OPJ_UINT32)tr_max->y0, + tr_max->win_x1 - (OPJ_UINT32)tr_max->x0, + tr_max->win_y1 - (OPJ_UINT32)tr_max->y0, + tilec->data_win, + 1, tr_max->win_x1 - tr_max->win_x0, + OPJ_TRUE); + assert(ret); + } + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); return OPJ_TRUE; } @@ -2474,9 +2613,9 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { - if (opj_dwt_is_whole_tile_decoding(p_tcd, tilec, numres)) { + if (p_tcd->whole_tile_decoding) { return opj_dwt_decode_tile_97(tilec, numres); } else { - return opj_dwt_decode_partial_97(p_tcd, tilec, numres); + return opj_dwt_decode_partial_97(tilec, numres); } } diff --git a/src/lib/openjp2/dwt.h b/src/lib/openjp2/dwt.h index a66ac71e0..4f63e524a 100644 --- a/src/lib/openjp2/dwt.h +++ b/src/lib/openjp2/dwt.h @@ -63,7 +63,7 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec); /** Inverse 5-3 wavelet transform in 2-D. Apply a reversible inverse DWT transform to a component of an image. -@param tcd TCD handle +@param p_tcd TCD handle @param tilec Tile component information (current tile) @param numres Number of resolution levels to decode */ @@ -93,7 +93,7 @@ OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec); /** Inverse 9-7 wavelet transform in 2-D. Apply an irreversible inverse DWT transform to a component of an image. -@param tcd TCD handle +@param p_tcd TCD handle @param tilec Tile component information (current tile) @param numres Number of resolution levels to decode */ diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 174cf7696..0d8bbc3fd 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -49,8 +49,6 @@ /** @name Local static functions */ /*@{*/ -#define OPJ_UNUSED(x) (void)x - /** * Sets up the procedures to do on reading header. Developpers wanting to extend the library can add their own reading procedures. */ @@ -371,7 +369,7 @@ static OPJ_BOOL opj_j2k_pre_write_tile(opj_j2k_t * p_j2k, opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager); -static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, +static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, opj_image_t* p_output_image); static void opj_get_tile_dimensions(opj_image_t * l_image, @@ -8789,7 +8787,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, *p_tile_index = p_j2k->m_current_tile_number; *p_go_on = OPJ_TRUE; - *p_data_size = opj_tcd_get_decoded_tile_size(p_j2k->m_tcd); + *p_data_size = opj_tcd_get_decoded_tile_size(p_j2k->m_tcd, OPJ_FALSE); if (*p_data_size == UINT_MAX) { return OPJ_FALSE; } @@ -8902,26 +8900,24 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k, return OPJ_TRUE; } -static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, +static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, opj_image_t* p_output_image) { - OPJ_UINT32 i, j, k = 0; + OPJ_UINT32 i, j; OPJ_UINT32 l_width_src, l_height_src; OPJ_UINT32 l_width_dest, l_height_dest; OPJ_INT32 l_offset_x0_src, l_offset_y0_src, l_offset_x1_src, l_offset_y1_src; - OPJ_SIZE_T l_start_offset_src, l_line_offset_src, l_end_offset_src ; + OPJ_SIZE_T l_start_offset_src; OPJ_UINT32 l_start_x_dest, l_start_y_dest; OPJ_UINT32 l_x0_dest, l_y0_dest, l_x1_dest, l_y1_dest; - OPJ_SIZE_T l_start_offset_dest, l_line_offset_dest; + OPJ_SIZE_T l_start_offset_dest; opj_image_comp_t * l_img_comp_src = 00; opj_image_comp_t * l_img_comp_dest = 00; opj_tcd_tilecomp_t * l_tilec = 00; opj_image_t * l_image_src = 00; - OPJ_UINT32 l_size_comp, l_remaining; OPJ_INT32 * l_dest_ptr; - opj_tcd_resolution_t* l_res = 00; l_tilec = p_tcd->tcd_image->tiles->comps; l_image_src = p_tcd->image; @@ -8930,6 +8926,9 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, l_img_comp_dest = p_output_image->comps; for (i = 0; i < l_image_src->numcomps; i++) { + OPJ_INT32 res_x0, res_x1, res_y0, res_y1; + OPJ_UINT32 src_data_stride; + const OPJ_INT32* p_src_data; /* Allocate output component buffer if necessary */ if (!l_img_comp_dest->data) { @@ -8953,29 +8952,38 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, /* Copy info from decoded comp image to output image */ l_img_comp_dest->resno_decoded = l_img_comp_src->resno_decoded; - /*-----*/ - /* Compute the precision of the output buffer */ - l_size_comp = l_img_comp_src->prec >> 3; /*(/ 8)*/ - l_remaining = l_img_comp_src->prec & 7; /* (%8) */ - l_res = l_tilec->resolutions + l_img_comp_src->resno_decoded; - - if (l_remaining) { - ++l_size_comp; + if (p_tcd->whole_tile_decoding) { + opj_tcd_resolution_t* l_res = l_tilec->resolutions + + l_img_comp_src->resno_decoded; + res_x0 = l_res->x0; + res_y0 = l_res->y0; + res_x1 = l_res->x1; + res_y1 = l_res->y1; + src_data_stride = (OPJ_UINT32)( + l_tilec->resolutions[l_tilec->minimum_num_resolutions - 1].x1 - + l_tilec->resolutions[l_tilec->minimum_num_resolutions - 1].x0); + p_src_data = l_tilec->data; + } else { + opj_tcd_resolution_t* l_res = l_tilec->resolutions + + l_img_comp_src->resno_decoded; + res_x0 = (OPJ_INT32)l_res->win_x0; + res_y0 = (OPJ_INT32)l_res->win_y0; + res_x1 = (OPJ_INT32)l_res->win_x1; + res_y1 = (OPJ_INT32)l_res->win_y1; + src_data_stride = l_res->win_x1 - l_res->win_x0; + p_src_data = l_tilec->data_win; } - if (l_size_comp == 3) { - l_size_comp = 4; - } - /*-----*/ + l_width_src = (OPJ_UINT32)(res_x1 - res_x0); + l_height_src = (OPJ_UINT32)(res_y1 - res_y0); + /* Current tile component size*/ /*if (i == 0) { fprintf(stdout, "SRC: l_res_x0=%d, l_res_x1=%d, l_res_y0=%d, l_res_y1=%d\n", - l_res->x0, l_res->x1, l_res->y0, l_res->y1); + res_x0, res_x1, res_y0, res_y1); }*/ - l_width_src = (OPJ_UINT32)(l_res->x1 - l_res->x0); - l_height_src = (OPJ_UINT32)(l_res->y1 - l_res->y0); /* Border of the current output component*/ l_x0_dest = opj_uint_ceildivpow2(l_img_comp_dest->x0, l_img_comp_dest->factor); @@ -8996,53 +9004,53 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, * l_start_y_dest, l_width_dest, l_height_dest) which will be modified * by this input area. * */ - assert(l_res->x0 >= 0); - assert(l_res->x1 >= 0); - if (l_x0_dest < (OPJ_UINT32)l_res->x0) { - l_start_x_dest = (OPJ_UINT32)l_res->x0 - l_x0_dest; + assert(res_x0 >= 0); + assert(res_x1 >= 0); + if (l_x0_dest < (OPJ_UINT32)res_x0) { + l_start_x_dest = (OPJ_UINT32)res_x0 - l_x0_dest; l_offset_x0_src = 0; - if (l_x1_dest >= (OPJ_UINT32)l_res->x1) { + if (l_x1_dest >= (OPJ_UINT32)res_x1) { l_width_dest = l_width_src; l_offset_x1_src = 0; } else { - l_width_dest = l_x1_dest - (OPJ_UINT32)l_res->x0 ; + l_width_dest = l_x1_dest - (OPJ_UINT32)res_x0 ; l_offset_x1_src = (OPJ_INT32)(l_width_src - l_width_dest); } } else { l_start_x_dest = 0U; - l_offset_x0_src = (OPJ_INT32)l_x0_dest - l_res->x0; + l_offset_x0_src = (OPJ_INT32)l_x0_dest - res_x0; - if (l_x1_dest >= (OPJ_UINT32)l_res->x1) { + if (l_x1_dest >= (OPJ_UINT32)res_x1) { l_width_dest = l_width_src - (OPJ_UINT32)l_offset_x0_src; l_offset_x1_src = 0; } else { l_width_dest = l_img_comp_dest->w ; - l_offset_x1_src = l_res->x1 - (OPJ_INT32)l_x1_dest; + l_offset_x1_src = res_x1 - (OPJ_INT32)l_x1_dest; } } - if (l_y0_dest < (OPJ_UINT32)l_res->y0) { - l_start_y_dest = (OPJ_UINT32)l_res->y0 - l_y0_dest; + if (l_y0_dest < (OPJ_UINT32)res_y0) { + l_start_y_dest = (OPJ_UINT32)res_y0 - l_y0_dest; l_offset_y0_src = 0; - if (l_y1_dest >= (OPJ_UINT32)l_res->y1) { + if (l_y1_dest >= (OPJ_UINT32)res_y1) { l_height_dest = l_height_src; l_offset_y1_src = 0; } else { - l_height_dest = l_y1_dest - (OPJ_UINT32)l_res->y0 ; + l_height_dest = l_y1_dest - (OPJ_UINT32)res_y0 ; l_offset_y1_src = (OPJ_INT32)(l_height_src - l_height_dest); } } else { l_start_y_dest = 0U; - l_offset_y0_src = (OPJ_INT32)l_y0_dest - l_res->y0; + l_offset_y0_src = (OPJ_INT32)l_y0_dest - res_y0; - if (l_y1_dest >= (OPJ_UINT32)l_res->y1) { + if (l_y1_dest >= (OPJ_UINT32)res_y1) { l_height_dest = l_height_src - (OPJ_UINT32)l_offset_y0_src; l_offset_y1_src = 0; } else { l_height_dest = l_img_comp_dest->h ; - l_offset_y1_src = l_res->y1 - (OPJ_INT32)l_y1_dest; + l_offset_y1_src = res_y1 - (OPJ_INT32)l_y1_dest; } } @@ -9058,114 +9066,24 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, /* Compute the input buffer offset */ l_start_offset_src = (OPJ_SIZE_T)l_offset_x0_src + (OPJ_SIZE_T)l_offset_y0_src - * (OPJ_SIZE_T)l_width_src; - l_line_offset_src = (OPJ_SIZE_T)l_offset_x1_src + (OPJ_SIZE_T)l_offset_x0_src; - l_end_offset_src = (OPJ_SIZE_T)l_offset_y1_src * (OPJ_SIZE_T)l_width_src - - (OPJ_SIZE_T)l_offset_x0_src; + * (OPJ_SIZE_T)src_data_stride; /* Compute the output buffer offset */ l_start_offset_dest = (OPJ_SIZE_T)l_start_x_dest + (OPJ_SIZE_T)l_start_y_dest * (OPJ_SIZE_T)l_img_comp_dest->w; - l_line_offset_dest = (OPJ_SIZE_T)l_img_comp_dest->w - (OPJ_SIZE_T)l_width_dest; /* Move the output buffer to the first place where we will write*/ l_dest_ptr = l_img_comp_dest->data + l_start_offset_dest; - /*if (i == 0) { - fprintf(stdout, "COMPO[%d]:\n",i); - fprintf(stdout, "SRC: l_start_x_src=%d, l_start_y_src=%d, l_width_src=%d, l_height_src=%d\n" - "\t tile offset:%d, %d, %d, %d\n" - "\t buffer offset: %d; %d, %d\n", - l_res->x0, l_res->y0, l_width_src, l_height_src, - l_offset_x0_src, l_offset_y0_src, l_offset_x1_src, l_offset_y1_src, - l_start_offset_src, l_line_offset_src, l_end_offset_src); - - fprintf(stdout, "DEST: l_start_x_dest=%d, l_start_y_dest=%d, l_width_dest=%d, l_height_dest=%d\n" - "\t start offset: %d, line offset= %d\n", - l_start_x_dest, l_start_y_dest, l_width_dest, l_height_dest, l_start_offset_dest, l_line_offset_dest); - }*/ - - switch (l_size_comp) { - case 1: { - OPJ_CHAR * l_src_ptr = (OPJ_CHAR*) p_data; - l_src_ptr += l_start_offset_src; /* Move to the first place where we will read*/ - - if (l_img_comp_src->sgnd) { - for (j = 0 ; j < l_height_dest ; ++j) { - for (k = 0 ; k < l_width_dest ; ++k) { - *(l_dest_ptr++) = (OPJ_INT32)(* - (l_src_ptr++)); /* Copy only the data needed for the output image */ - } - - l_dest_ptr += - l_line_offset_dest; /* Move to the next place where we will write */ - l_src_ptr += l_line_offset_src ; /* Move to the next place where we will read */ - } - } else { - for (j = 0 ; j < l_height_dest ; ++j) { - for (k = 0 ; k < l_width_dest ; ++k) { - *(l_dest_ptr++) = (OPJ_INT32)((*(l_src_ptr++)) & 0xff); - } - - l_dest_ptr += l_line_offset_dest; - l_src_ptr += l_line_offset_src; - } - } - - l_src_ptr += - l_end_offset_src; /* Move to the end of this component-part of the input buffer */ - p_data = (OPJ_BYTE*) - l_src_ptr; /* Keep the current position for the next component-part */ - } - break; - case 2: { - OPJ_INT16 * l_src_ptr = (OPJ_INT16 *) p_data; - l_src_ptr += l_start_offset_src; - - if (l_img_comp_src->sgnd) { - for (j = 0; j < l_height_dest; ++j) { - for (k = 0; k < l_width_dest; ++k) { - OPJ_INT16 val; - memcpy(&val, l_src_ptr, sizeof(val)); - l_src_ptr ++; - *(l_dest_ptr++) = val; - } - - l_dest_ptr += l_line_offset_dest; - l_src_ptr += l_line_offset_src ; - } - } else { - for (j = 0; j < l_height_dest; ++j) { - for (k = 0; k < l_width_dest; ++k) { - OPJ_INT16 val; - memcpy(&val, l_src_ptr, sizeof(val)); - l_src_ptr ++; - *(l_dest_ptr++) = val & 0xffff; - } - - l_dest_ptr += l_line_offset_dest; - l_src_ptr += l_line_offset_src ; - } - } - - l_src_ptr += l_end_offset_src; - p_data = (OPJ_BYTE*) l_src_ptr; - } - break; - case 4: { - OPJ_INT32 * l_src_ptr = (OPJ_INT32 *) p_data; + { + const OPJ_INT32 * l_src_ptr = p_src_data; l_src_ptr += l_start_offset_src; for (j = 0; j < l_height_dest; ++j) { memcpy(l_dest_ptr, l_src_ptr, l_width_dest * sizeof(OPJ_INT32)); - l_dest_ptr += l_width_dest + l_line_offset_dest; - l_src_ptr += l_width_dest + l_line_offset_src ; + l_dest_ptr += l_img_comp_dest->w; + l_src_ptr += src_data_stride; } - - l_src_ptr += l_end_offset_src; - p_data = (OPJ_BYTE*) l_src_ptr; - } - break; } ++l_img_comp_dest; @@ -10548,10 +10466,9 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, { OPJ_BOOL l_go_on = OPJ_TRUE; OPJ_UINT32 l_current_tile_no; - OPJ_UINT32 l_data_size, l_max_data_size; + OPJ_UINT32 l_data_size; OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1; OPJ_UINT32 l_nb_comps; - OPJ_BYTE * l_current_data; OPJ_UINT32 nr_tiles = 0; /* Particular case for whole single tile decoding */ @@ -10595,13 +10512,6 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, return OPJ_TRUE; } - l_current_data = (OPJ_BYTE*)opj_malloc(1000); - if (! l_current_data) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tiles\n"); - return OPJ_FALSE; - } - l_max_data_size = 1000; - for (;;) { if (! opj_j2k_read_tile_header(p_j2k, &l_current_tile_no, @@ -10612,7 +10522,6 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, &l_go_on, p_stream, p_manager)) { - opj_free(l_current_data); return OPJ_FALSE; } @@ -10620,34 +10529,22 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, break; } - if (l_data_size > l_max_data_size) { - OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, - l_data_size); - if (! l_new_current_data) { - opj_free(l_current_data); - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tile %d/%d\n", - l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw); - return OPJ_FALSE; - } - l_current_data = l_new_current_data; - l_max_data_size = l_data_size; - } - - if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, l_current_data, l_data_size, + if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, p_stream, p_manager)) { - opj_free(l_current_data); opj_event_msg(p_manager, EVT_ERROR, "Failed to decode tile %d/%d\n", l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw); return OPJ_FALSE; } + opj_event_msg(p_manager, EVT_INFO, "Tile %d/%d has been decoded.\n", l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw); - if (! opj_j2k_update_image_data(p_j2k->m_tcd, l_current_data, + if (! opj_j2k_update_image_data(p_j2k->m_tcd, p_j2k->m_output_image)) { - opj_free(l_current_data); return OPJ_FALSE; } + opj_j2k_tcp_data_destroy(&p_j2k->m_cp.tcps[l_current_tile_no]); + opj_event_msg(p_manager, EVT_INFO, "Image data has been updated with tile %d.\n\n", l_current_tile_no + 1); @@ -10660,8 +10557,6 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, } } - opj_free(l_current_data); - return OPJ_TRUE; } @@ -10694,24 +10589,15 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, OPJ_BOOL l_go_on = OPJ_TRUE; OPJ_UINT32 l_current_tile_no; OPJ_UINT32 l_tile_no_to_dec; - OPJ_UINT32 l_data_size, l_max_data_size; + OPJ_UINT32 l_data_size; OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1; OPJ_UINT32 l_nb_comps; - OPJ_BYTE * l_current_data; OPJ_UINT32 l_nb_tiles; OPJ_UINT32 i; - l_current_data = (OPJ_BYTE*)opj_malloc(1000); - if (! l_current_data) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode one tile\n"); - return OPJ_FALSE; - } - l_max_data_size = 1000; - /*Allocate and initialize some elements of codestrem index if not already done*/ if (!p_j2k->cstr_index->tile_index) { if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { - opj_free(l_current_data); return OPJ_FALSE; } } @@ -10726,7 +10612,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, if (!(opj_stream_read_seek(p_stream, p_j2k->m_specific_param.m_decoder.m_last_sot_read_pos + 2, p_manager))) { opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n"); - opj_free(l_current_data); return OPJ_FALSE; } } else { @@ -10734,7 +10619,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, p_j2k->cstr_index->tile_index[l_tile_no_to_dec].tp_index[0].start_pos + 2, p_manager))) { opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n"); - opj_free(l_current_data); return OPJ_FALSE; } } @@ -10763,7 +10647,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, &l_go_on, p_stream, p_manager)) { - opj_free(l_current_data); return OPJ_FALSE; } @@ -10771,33 +10654,19 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, break; } - if (l_data_size > l_max_data_size) { - OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, - l_data_size); - if (! l_new_current_data) { - opj_free(l_current_data); - l_current_data = NULL; - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tile %d/%d\n", - l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw); - return OPJ_FALSE; - } - l_current_data = l_new_current_data; - l_max_data_size = l_data_size; - } - - if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, l_current_data, l_data_size, + if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, p_stream, p_manager)) { - opj_free(l_current_data); return OPJ_FALSE; } opj_event_msg(p_manager, EVT_INFO, "Tile %d/%d has been decoded.\n", l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw); - if (! opj_j2k_update_image_data(p_j2k->m_tcd, l_current_data, + if (! opj_j2k_update_image_data(p_j2k->m_tcd, p_j2k->m_output_image)) { - opj_free(l_current_data); return OPJ_FALSE; } + opj_j2k_tcp_data_destroy(&p_j2k->m_cp.tcps[l_current_tile_no]); + opj_event_msg(p_manager, EVT_INFO, "Image data has been updated with tile %d.\n\n", l_current_tile_no + 1); @@ -10806,7 +10675,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, if (!(opj_stream_read_seek(p_stream, p_j2k->cstr_index->main_head_end + 2, p_manager))) { opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n"); - opj_free(l_current_data); return OPJ_FALSE; } break; @@ -10818,8 +10686,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, } - opj_free(l_current_data); - return OPJ_TRUE; } diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index b33e63cef..0a8628c96 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -216,6 +216,8 @@ static INLINE long opj_lrintf(float f) /* Type to use for bit-fields in internal headers */ typedef unsigned int OPJ_BITFIELD; +#define OPJ_UNUSED(x) (void)x + #include "opj_inttypes.h" #include "opj_clock.h" #include "opj_malloc.h" @@ -243,6 +245,7 @@ typedef unsigned int OPJ_BITFIELD; #include "t2.h" #include "mct.h" #include "opj_intmath.h" +#include "sparse_array.h" #ifdef USE_JPIP #include "cidx_manager.h" diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c new file mode 100644 index 000000000..fb552f8b5 --- /dev/null +++ b/src/lib/openjp2/sparse_array.c @@ -0,0 +1,233 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2017, IntoPix SA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opj_includes.h" + + +struct opj_sparse_array_int32 { + OPJ_UINT32 width; + OPJ_UINT32 height; + OPJ_UINT32 block_width; + OPJ_UINT32 block_height; + OPJ_UINT32 block_count_hor; + OPJ_UINT32 block_count_ver; + OPJ_INT32** data_blocks; +}; + +opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width, + OPJ_UINT32 height, + OPJ_UINT32 block_width, + OPJ_UINT32 block_height) +{ + opj_sparse_array_int32_t* sa; + + if (width == 0 || height == 0 || block_width == 0 || block_height == 0) { + return NULL; + } + if (block_width > ((OPJ_UINT32)~0U) / block_height / sizeof(OPJ_INT32)) { + return NULL; + } + + sa = opj_calloc(1, sizeof(opj_sparse_array_int32_t)); + sa->width = width; + sa->height = height; + sa->block_width = block_width; + sa->block_height = block_height; + sa->block_count_hor = opj_uint_ceildiv(width, block_width); + sa->block_count_ver = opj_uint_ceildiv(height, block_height); + if (sa->block_count_hor > ((OPJ_UINT32)~0U) / sa->block_count_ver) { + opj_free(sa); + return NULL; + } + sa->data_blocks = opj_calloc(sizeof(OPJ_INT32*), + sa->block_count_hor * sa->block_count_ver); + if (sa->data_blocks == NULL) { + opj_free(sa); + return NULL; + } + + return sa; +} + +void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa) +{ + if (sa) { + OPJ_UINT32 i; + for (i = 0; i < sa->block_count_hor * sa->block_count_ver; i++) { + if (sa->data_blocks[i]) { + opj_free(sa->data_blocks[i]); + } + } + opj_free(sa->data_blocks); + opj_free(sa); + } +} + +OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1) +{ + return !(x0 >= sa->width || x1 <= x0 || x1 > sa->width || + y0 >= sa->height || y1 <= y0 || y1 > sa->height); +} + +static OPJ_BOOL opj_sparse_array_int32_read_or_write( + opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + OPJ_INT32* buf, + OPJ_UINT32 buf_col_stride, + OPJ_UINT32 buf_line_stride, + OPJ_BOOL forgiving, + OPJ_BOOL is_read_op) +{ + OPJ_UINT32 y, block_y; + OPJ_UINT32 y_incr = 0; + if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) { + return forgiving; + } + + block_y = y0 / sa->block_height; + for (y = y0; y < y1; block_y ++, y += y_incr) { + OPJ_UINT32 x, block_x; + OPJ_UINT32 x_incr = 0; + OPJ_UINT32 block_y_offset; + y_incr = (y == y0) ? sa->block_height - (y0 % sa->block_height) : + sa->block_height; + block_y_offset = sa->block_height - y_incr; + y_incr = opj_uint_min(y_incr, y1 - y); + block_x = x0 / sa->block_width; + for (x = x0; x < x1; block_x ++, x += x_incr) { + OPJ_UINT32 j; + OPJ_UINT32 block_x_offset; + OPJ_INT32* src_block; + x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width; + block_x_offset = sa->block_width - x_incr; + x_incr = opj_uint_min(x_incr, x1 - x); + src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; + if (is_read_op) { + if (src_block == NULL) { + for (j = 0; j < y_incr; j++) { + if (buf_col_stride == 1) { + memset(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + 0, + sizeof(OPJ_INT32) * x_incr); + } else { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = 0; + } + } + } + } else { + for (j = 0; j < y_incr; j++) { + if (buf_col_stride == 1) { + memcpy(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + src_block + (block_y_offset + j) * sa->block_width + block_x_offset, + sizeof(OPJ_INT32) * x_incr); + } else { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = + *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k); + } + } + } + } + } else { + if (src_block == NULL) { + src_block = opj_calloc(1, + sa->block_width * sa->block_height * sizeof(OPJ_INT32)); + if (src_block == NULL) { + return OPJ_FALSE; + } + sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; + } + + for (j = 0; j < y_incr; j++) { + if (buf_col_stride == 1) { + memcpy(src_block + (block_y_offset + j) * sa->block_width + block_x_offset, + buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + sizeof(OPJ_INT32) * x_incr); + } else { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k) = + *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride); + } + } + } + } + } + } + + return OPJ_TRUE; +} + +OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + OPJ_INT32* dest, + OPJ_UINT32 dest_col_stride, + OPJ_UINT32 dest_line_stride, + OPJ_BOOL forgiving) +{ + return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, + dest, + dest_col_stride, + dest_line_stride, + forgiving, + OPJ_TRUE); +} + +OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + const OPJ_INT32* src, + OPJ_UINT32 src_col_stride, + OPJ_UINT32 src_line_stride, + OPJ_BOOL forgiving) +{ + return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, + (OPJ_INT32*)src, + src_col_stride, + src_line_stride, + forgiving, + OPJ_FALSE); +} diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h new file mode 100644 index 000000000..485cafeae --- /dev/null +++ b/src/lib/openjp2/sparse_array.h @@ -0,0 +1,141 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2017, IntoPix SA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opj_includes.h" + +#ifndef OPJ_SPARSE_ARRAY_H +#define OPJ_SPARSE_ARRAY_H +/** +@file sparse_array.h +@brief Sparse array management + +The functions in this file manage sparse arrays. Sparse arrays are arrays with +potential big dimensions, but with very few samples actually set. Such sparse +arrays require allocating a low amount of memory, by just allocating memory +for blocks of the array that are set. The minimum memory allocation unit is a +a block. There is a trade-off to pick up an appropriate dimension for blocks. +If it is too big, and pixels set are far from each other, too much memory will +be used. If blocks are too small, the book-keeping costs of blocks will raise. +*/ + +/** @defgroup SPARSE_ARRAY SPARSE ARRAYS - Sparse arrays */ +/*@{*/ + +/** Opaque type for sparse arrays that contain int32 values */ +typedef struct opj_sparse_array_int32 opj_sparse_array_int32_t; + +/** Creates a new sparse array. + * @param width total width of the array. + * @param height total height of the array + * @param block_width width of a block. + * @param block_height height of a block. + * @return a new sparse array instance, or NULL in case of failure. + */ +opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width, + OPJ_UINT32 height, + OPJ_UINT32 block_width, + OPJ_UINT32 block_height); + +/** Frees a sparse array. + * @param sa sparse array instance. + */ +void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa); + +/** Returns whether region bounds are valid (non empty and within array bounds) + * @param sa sparse array instance. + * @param x0 left x coordinate of the region. + * @param y0 top x coordinate of the region. + * @param x1 right x coordinate (not included) of the region. Must be greater than x0. + * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0. + * @return OPJ_TRUE or OPJ_FALSE. + */ +OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1); + +/** Read the content of a rectangular region of the sparse array into a + * user buffer. + * + * Regions not written with opj_sparse_array_int32_write() are read as 0. + * + * @param sa sparse array instance. + * @param x0 left x coordinate of the region to read in the sparse array. + * @param y0 top x coordinate of the region to read in the sparse array. + * @param x1 right x coordinate (not included) of the region to read in the sparse array. Must be greater than x0. + * @param y1 bottom y coordinate (not included) of the region to read in the sparse array. Must be greater than y0. + * @param dest user buffer to fill. Must be at least sizeof(int32) * ( (y1 - y0 - 1) * dest_line_stride + (x1 - x0 - 1) * dest_col_stride + 1) bytes large. + * @param dest_col_stride spacing (in elements, not in bytes) in x dimension between consecutive elements of the user buffer. + * @param dest_line_stride spacing (in elements, not in bytes) in y dimension between consecutive elements of the user buffer. + * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. + * @return OPJ_TRUE in case of success. + */ +OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + OPJ_INT32* dest, + OPJ_UINT32 dest_col_stride, + OPJ_UINT32 dest_line_stride, + OPJ_BOOL forgiving); + + +/** Write the content of a rectangular region into the sparse array from a + * user buffer. + * + * Blocks intersecting the region are allocated, if not already done. + * + * @param sa sparse array instance. + * @param x0 left x coordinate of the region to write into the sparse array. + * @param y0 top x coordinate of the region to write into the sparse array. + * @param x1 right x coordinate (not included) of the region to write into the sparse array. Must be greater than x0. + * @param y1 bottom y coordinate (not included) of the region to write into the sparse array. Must be greater than y0. + * @param src user buffer to fill. Must be at least sizeof(int32) * ( (y1 - y0 - 1) * src_line_stride + (x1 - x0 - 1) * src_col_stride + 1) bytes large. + * @param src_col_stride spacing (in elements, not in bytes) in x dimension between consecutive elements of the user buffer. + * @param src_line_stride spacing (in elements, not in bytes) in y dimension between consecutive elements of the user buffer. + * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. + * @return OPJ_TRUE in case of success. + */ +OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + const OPJ_INT32* src, + OPJ_UINT32 src_col_stride, + OPJ_UINT32 src_line_stride, + OPJ_BOOL forgiving); + +/*@}*/ + +#endif /* OPJ_SPARSE_ARRAY_H */ \ No newline at end of file diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 9a192f933..953c7ab14 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1601,7 +1601,9 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) band = job->band; tilec = job->tilec; tccp = job->tccp; - tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1 + - + tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); if (!*(job->pret)) { opj_free(job); @@ -1640,7 +1642,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) y += pres->y1 - pres->y0; } - datap = t1->data; + datap = cblk->decoded_data ? cblk->decoded_data : t1->data; cblk_w = t1->w; cblk_h = t1->h; @@ -1665,7 +1667,35 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } - if (tccp->qmfbid == 1) { + + if (cblk->decoded_data) { + if (tccp->qmfbid == 1) { + for (j = 0; j < cblk_h; ++j) { + i = 0; + for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { + OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U]; + OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; + OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; + OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; + datap[(j * cblk_w) + i + 0U] = tmp0 / 2; + datap[(j * cblk_w) + i + 1U] = tmp1 / 2; + datap[(j * cblk_w) + i + 2U] = tmp2 / 2; + datap[(j * cblk_w) + i + 3U] = tmp3 / 2; + } + for (; i < cblk_w; ++i) { + datap[(j * cblk_w) + i] /= 2; + } + } + } else { /* if (tccp->qmfbid == 0) */ + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; + memcpy(datap, &tmp, sizeof(tmp)); + datap++; + } + } + } + } else if (tccp->qmfbid == 1) { OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; for (j = 0; j < cblk_h; ++j) { @@ -1724,7 +1754,6 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, for (precno = 0; precno < res->pw * res->ph; ++precno) { opj_tcd_precinct_t* precinct = &band->precincts[precno]; - OPJ_BOOL skip_precinct = OPJ_FALSE; if (!opj_tcd_is_subband_area_of_interest(tcd, tilec->compno, @@ -1734,51 +1763,46 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, (OPJ_UINT32)precinct->y0, (OPJ_UINT32)precinct->x1, (OPJ_UINT32)precinct->y1)) { - skip_precinct = OPJ_TRUE; - /* TODO: do a continue here once the below 0 initialization */ - /* of tiledp is removed */ + continue; } for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; opj_t1_cblk_decode_processing_job_t* job; - if (skip_precinct || - !opj_tcd_is_subband_area_of_interest(tcd, - tilec->compno, - resno, - band->bandno, - (OPJ_UINT32)cblk->x0, - (OPJ_UINT32)cblk->y0, - (OPJ_UINT32)cblk->x1, - (OPJ_UINT32)cblk->y1)) { - - /* TODO: remove this once we don't iterate over */ - /* tile pixels that are not in the subwindow of interest */ - OPJ_UINT32 j; - OPJ_INT32 x = cblk->x0 - band->x0; - OPJ_INT32 y = cblk->y0 - band->y0; - OPJ_INT32* OPJ_RESTRICT tiledp; - OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + assert(cblk->decoded_data == NULL); + + if (!opj_tcd_is_subband_area_of_interest(tcd, + tilec->compno, + resno, + band->bandno, + (OPJ_UINT32)cblk->x0, + (OPJ_UINT32)cblk->y0, + (OPJ_UINT32)cblk->x1, + (OPJ_UINT32)cblk->y1)) { + continue; + } + + if (!tcd->whole_tile_decoding) { OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); - - if (band->bandno & 1) { - opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; - x += pres->x1 - pres->x0; + if (cblk_w == 0 || cblk_h == 0) { + continue; } - if (band->bandno & 2) { - opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; - y += pres->y1 - pres->y0; - } - - tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + - (OPJ_UINT32)x]; - - for (j = 0; j < cblk_h; ++j) { - memset(tiledp + j * tile_w, 0, cblk_w * sizeof(OPJ_INT32)); + /* Zero-init required */ + cblk->decoded_data = opj_calloc(1, cblk_w * cblk_h * sizeof(OPJ_INT32)); + if (cblk->decoded_data == NULL) { + if (p_manager_mutex) { + opj_mutex_lock(p_manager_mutex); + } + opj_event_msg(p_manager, EVT_ERROR, + "Cannot allocate cblk->decoded_data\n"); + if (p_manager_mutex) { + opj_mutex_unlock(p_manager_mutex); + } + *pret = OPJ_FALSE; + return; } - continue; } job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, @@ -1827,6 +1851,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, OPJ_BYTE* cblkdata = NULL; OPJ_UINT32 cblkdataindex = 0; OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */ + OPJ_INT32* original_t1_data = NULL; mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); @@ -1893,6 +1918,13 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, cblkdata = cblk->chunks[0].data; } + /* For subtile decoding, directly decode in the decoded_data buffer of */ + /* the code-block. Hack t1->data to point to it, and restore it later */ + if (cblk->decoded_data) { + original_t1_data = t1->data; + t1->data = cblk->decoded_data; + } + for (segno = 0; segno < cblk->real_num_segs; ++segno) { opj_tcd_seg_t *seg = &cblk->segs[segno]; @@ -1972,6 +2004,11 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } } + /* Restore original t1->data is needed */ + if (cblk->decoded_data) { + t1->data = original_t1_data; + } + return OPJ_TRUE; } diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 1c56c1b37..c221c6ed4 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -190,6 +190,10 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd, opj_codestream_info_t *p_cstr_info, opj_event_mgr_t *p_manager); + +static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *tcd, + OPJ_UINT32 compno); + /* ----------------------------------------------------------------------- */ /** @@ -679,7 +683,7 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) ((l_tilec->data_size_needed > l_tilec->data_size) && (l_tilec->ownsData == OPJ_FALSE))) { l_tilec->data = (OPJ_INT32 *) opj_image_data_alloc(l_tilec->data_size_needed); - if (! l_tilec->data) { + if (!l_tilec->data && l_tilec->data_size_needed != 0) { return OPJ_FALSE; } /*fprintf(stderr, "tAllocate data of tilec (int): %d x OPJ_UINT32n",l_data_size);*/ @@ -794,22 +798,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_tilec->compno = compno; /*fprintf(stderr, "\tTile compo border = %d,%d,%d,%d\n", l_tilec->x0, l_tilec->y0,l_tilec->x1,l_tilec->y1);*/ - /* compute l_data_size with overflow check */ - l_data_size = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0); - /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ - if ((l_data_size > 0U) && - ((((OPJ_UINT32) - 1) / l_data_size) < (OPJ_UINT32)(l_tilec->y1 - - l_tilec->y0))) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); - return OPJ_FALSE; - } - l_data_size = l_data_size * (OPJ_UINT32)(l_tilec->y1 - l_tilec->y0); - - if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); - return OPJ_FALSE; - } - l_data_size = l_data_size * (OPJ_UINT32)sizeof(OPJ_UINT32); l_tilec->numresolutions = l_tccp->numresolutions; if (l_tccp->numresolutions < l_cp->m_specific_param.m_dec.m_reduce) { l_tilec->minimum_num_resolutions = 1; @@ -818,15 +806,37 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_cp->m_specific_param.m_dec.m_reduce; } - l_tilec->data_size_needed = l_data_size; - if (p_tcd->m_is_decoder && !opj_alloc_tile_component_data(l_tilec)) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); - return OPJ_FALSE; + if (isEncoder) { + /* compute l_data_size with overflow check */ + l_data_size = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0); + /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ + if ((l_data_size > 0U) && + ((((OPJ_UINT32) - 1) / l_data_size) < (OPJ_UINT32)(l_tilec->y1 - + l_tilec->y0))) { + opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size = l_data_size * (OPJ_UINT32)(l_tilec->y1 - l_tilec->y0); + + if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { + opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size = l_data_size * (OPJ_UINT32)sizeof(OPJ_UINT32); + + l_tilec->data_size_needed = l_data_size; } l_data_size = l_tilec->numresolutions * (OPJ_UINT32)sizeof( opj_tcd_resolution_t); + opj_aligned_free(l_tilec->data_win); + l_tilec->data_win = NULL; + l_tilec->win_x0 = 0; + l_tilec->win_y0 = 0; + l_tilec->win_x1 = 0; + l_tilec->win_y1 = 0; + if (l_tilec->resolutions == 00) { l_tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(l_data_size); if (! l_tilec->resolutions) { @@ -875,6 +885,28 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_res->y0 = opj_int_ceildivpow2(l_tilec->y0, (OPJ_INT32)l_level_no); l_res->x1 = opj_int_ceildivpow2(l_tilec->x1, (OPJ_INT32)l_level_no); l_res->y1 = opj_int_ceildivpow2(l_tilec->y1, (OPJ_INT32)l_level_no); + + if (!isEncoder && resno + 1 == l_tilec->minimum_num_resolutions) { + /* compute l_data_size with overflow check */ + OPJ_UINT32 res_w = (OPJ_UINT32)(l_res->x1 - l_res->x0); + OPJ_UINT32 res_h = (OPJ_UINT32)(l_res->y1 - l_res->y0); + + /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ + if (res_h > 0 && res_h > (((OPJ_UINT32) - 1) / res_h)) { + opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size = res_w * res_h; + + if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { + opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size *= (OPJ_UINT32)sizeof(OPJ_UINT32); + + l_tilec->data_size_needed = l_data_size; + } + /*fprintf(stderr, "\t\t\tres_x0= %d, res_y0 =%d, res_x1=%d, res_y1=%d\n", l_res->x0, l_res->y0, l_res->x1, l_res->y1);*/ /* p. 35, table A-23, ISO/IEC FDIS154444-1 : 2000 (18 august 2000) */ l_pdx = l_tccp->prcw[resno]; @@ -1249,6 +1281,9 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc; OPJ_UINT32 i; + opj_free(p_code_block->decoded_data); + p_code_block->decoded_data = 00; + memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t)); p_code_block->segs = l_segs; p_code_block->m_current_max_segs = l_current_max_segs; @@ -1262,7 +1297,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * return OPJ_TRUE; } -OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd) +OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, + OPJ_BOOL take_into_account_partial_decoding) { OPJ_UINT32 i; OPJ_UINT32 l_data_size = 0; @@ -1288,8 +1324,13 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd) } l_res = l_tile_comp->resolutions + l_tile_comp->minimum_num_resolutions - 1; - l_temp = (OPJ_UINT32)((l_res->x1 - l_res->x0) * (l_res->y1 - - l_res->y0)); /* x1*y1 can't overflow */ + if (take_into_account_partial_decoding && !p_tcd->whole_tile_decoding) { + l_temp = (l_res->win_x1 - l_res->win_x0) * + (l_res->win_y1 - l_res->win_y0); + } else { + l_temp = (OPJ_UINT32)((l_res->x1 - l_res->x0) * (l_res->y1 - + l_res->y0)); /* x1*y1 can't overflow */ + } if (l_size_comp && UINT_MAX / l_size_comp < l_temp) { return UINT_MAX; } @@ -1401,10 +1442,10 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, } OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, - OPJ_UINT32 decoded_x0, - OPJ_UINT32 decoded_y0, - OPJ_UINT32 decoded_x1, - OPJ_UINT32 decoded_y1, + OPJ_UINT32 win_x0, + OPJ_UINT32 win_y0, + OPJ_UINT32 win_x1, + OPJ_UINT32 win_y1, OPJ_BYTE *p_src, OPJ_UINT32 p_max_length, OPJ_UINT32 p_tile_no, @@ -1413,12 +1454,66 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, ) { OPJ_UINT32 l_data_read; + OPJ_UINT32 compno; + p_tcd->tcd_tileno = p_tile_no; p_tcd->tcp = &(p_tcd->cp->tcps[p_tile_no]); - p_tcd->decoded_x0 = decoded_x0; - p_tcd->decoded_y0 = decoded_y0; - p_tcd->decoded_x1 = decoded_x1; - p_tcd->decoded_y1 = decoded_y1; + p_tcd->win_x0 = win_x0; + p_tcd->win_y0 = win_y0; + p_tcd->win_x1 = win_x1; + p_tcd->win_y1 = win_y1; + p_tcd->whole_tile_decoding = OPJ_TRUE; + + for (compno = 0; compno < p_tcd->image->numcomps; compno++) { + if (!opj_tcd_is_whole_tilecomp_decoding(p_tcd, compno)) { + p_tcd->whole_tile_decoding = OPJ_FALSE; + break; + } + } + + if (p_tcd->whole_tile_decoding) { + for (compno = 0; compno < p_tcd->image->numcomps; compno++) { + if (!opj_alloc_tile_component_data(&(p_tcd->tcd_image->tiles->comps[compno]))) { + opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + } + } else { + /* Compute restricted tile-component and tile-resolution coordinates */ + /* of the window of interest, but defer the memory allocation until */ + /* we know the resno_decoded */ + for (compno = 0; compno < p_tcd->image->numcomps; compno++) { + OPJ_UINT32 resno; + opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); + opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]); + /* Compute the intersection of the area of interest, expressed in tile coordinates */ + /* with the tile coordinates */ + tilec->win_x0 = opj_uint_max( + (OPJ_UINT32)tilec->x0, + opj_uint_ceildiv(p_tcd->win_x0, image_comp->dx)); + tilec->win_y0 = opj_uint_max( + (OPJ_UINT32)tilec->y0, + opj_uint_ceildiv(p_tcd->win_y0, image_comp->dy)); + tilec->win_x1 = opj_uint_min( + (OPJ_UINT32)tilec->x1, + opj_uint_ceildiv(p_tcd->win_x1, image_comp->dx)); + tilec->win_y1 = opj_uint_min( + (OPJ_UINT32)tilec->y1, + opj_uint_ceildiv(p_tcd->win_y1, image_comp->dy)); + + for (resno = 0; resno < tilec->numresolutions; ++resno) { + opj_tcd_resolution_t *res = tilec->resolutions + resno; + res->win_x0 = opj_uint_ceildivpow2(tilec->win_x0, + tilec->numresolutions - 1 - resno); + res->win_y0 = opj_uint_ceildivpow2(tilec->win_y0, + tilec->numresolutions - 1 - resno); + res->win_x1 = opj_uint_ceildivpow2(tilec->win_x1, + tilec->numresolutions - 1 - resno); + res->win_y1 = opj_uint_ceildivpow2(tilec->win_y1, + tilec->numresolutions - 1 - resno); + } + } + } #ifdef TODO_MSD /* FIXME */ /* INDEX >> */ @@ -1461,6 +1556,42 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, } /* FIXME _ProfStop(PGROUP_T1); */ + + /* For subtile decoding, now we know the resno_decoded, we can allocate */ + /* the tile data buffer */ + if (!p_tcd->whole_tile_decoding) { + for (compno = 0; compno < p_tcd->image->numcomps; compno++) { + opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); + opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]); + opj_tcd_resolution_t *res = tilec->resolutions + image_comp->resno_decoded; + OPJ_UINT32 w = res->win_x1 - res->win_x0; + OPJ_UINT32 h = res->win_y1 - res->win_y0; + OPJ_UINT32 l_data_size; + + opj_aligned_free(tilec->data_win); + tilec->data_win = NULL; + + if (w > 0 && h > 0) { + if (w > ((OPJ_UINT32) - 1) / h) { + opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size = w * h; + if (l_data_size > ((OPJ_UINT32) - 1) / sizeof(OPJ_INT32)) { + opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size *= (OPJ_UINT32)sizeof(OPJ_INT32); + + tilec->data_win = opj_aligned_malloc(l_data_size); + if (tilec->data_win == NULL) { + opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + } + } + } + /*----------------DWT---------------------*/ /* FIXME _ProfStart(PGROUP_DWT); */ @@ -1502,7 +1633,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, OPJ_UINT32 l_size_comp, l_remaining; OPJ_UINT32 l_stride, l_width, l_height; - l_data_size = opj_tcd_get_decoded_tile_size(p_tcd); + l_data_size = opj_tcd_get_decoded_tile_size(p_tcd, OPJ_TRUE); if (l_data_size == UINT_MAX || l_data_size > p_dest_length) { return OPJ_FALSE; } @@ -1511,12 +1642,23 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, l_img_comp = p_tcd->image->comps; for (i = 0; i < p_tcd->image->numcomps; ++i) { + const OPJ_INT32* l_src_data; l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/ l_remaining = l_img_comp->prec & 7; /* (%8) */ l_res = l_tilec->resolutions + l_img_comp->resno_decoded; - l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0); - l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0); - l_stride = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0) - l_width; + if (p_tcd->whole_tile_decoding) { + l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0); + l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0); + l_stride = (OPJ_UINT32)(l_tilec->resolutions[l_tilec->minimum_num_resolutions - + 1].x1 - + l_tilec->resolutions[l_tilec->minimum_num_resolutions - 1].x0) - l_width; + l_src_data = l_tilec->data; + } else { + l_width = l_res->win_x1 - l_res->win_x0; + l_height = l_res->win_y1 - l_res->win_y0; + l_stride = 0; + l_src_data = l_tilec->data_win; + } if (l_remaining) { ++l_size_comp; @@ -1529,7 +1671,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, switch (l_size_comp) { case 1: { OPJ_CHAR * l_dest_ptr = (OPJ_CHAR *) p_dest; - const OPJ_INT32 * l_src_ptr = l_tilec->data; + const OPJ_INT32 * l_src_ptr = l_src_data; if (l_img_comp->sgnd) { for (j = 0; j < l_height; ++j) { @@ -1551,7 +1693,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, } break; case 2: { - const OPJ_INT32 * l_src_ptr = l_tilec->data; + const OPJ_INT32 * l_src_ptr = l_src_data; OPJ_INT16 * l_dest_ptr = (OPJ_INT16 *) p_dest; if (l_img_comp->sgnd) { @@ -1579,7 +1721,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, break; case 4: { OPJ_INT32 * l_dest_ptr = (OPJ_INT32 *) p_dest; - OPJ_INT32 * l_src_ptr = l_tilec->data; + const OPJ_INT32 * l_src_ptr = l_src_data; for (j = 0; j < l_height; ++j) { memcpy(l_dest_ptr, l_src_ptr, l_width * sizeof(OPJ_INT32)); @@ -1674,6 +1816,9 @@ static void opj_tcd_free_tile(opj_tcd_t *p_tcd) l_tile_comp->data_size = 0; l_tile_comp->data_size_needed = 0; } + + opj_aligned_free(l_tile_comp->data_win); + ++l_tile_comp; } @@ -1764,18 +1909,6 @@ static OPJ_BOOL opj_tcd_dwt_decode(opj_tcd_t *p_tcd) opj_image_comp_t * l_img_comp = p_tcd->image->comps; for (compno = 0; compno < l_tile->numcomps; compno++) { - /* - if (tcd->cp->reduce != 0) { - tcd->image->comps[compno].resno_decoded = - tile->comps[compno].numresolutions - tcd->cp->reduce - 1; - if (tcd->image->comps[compno].resno_decoded < 0) - { - return false; - } - } - numres2decode = tcd->image->comps[compno].resno_decoded + 1; - if(numres2decode > 0){ - */ if (l_tccp->qmfbid == 1) { if (! opj_dwt_decode(p_tcd, l_tile_comp, @@ -1796,6 +1929,7 @@ static OPJ_BOOL opj_tcd_dwt_decode(opj_tcd_t *p_tcd) return OPJ_TRUE; } + static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) { opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; @@ -1807,17 +1941,40 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) return OPJ_TRUE; } - l_samples = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) * - (l_tile_comp->y1 - l_tile_comp->y0)); + if (p_tcd->whole_tile_decoding) { + /* A bit inefficient: we process more data than needed if */ + /* resno_decoded < l_tile_comp->minimum_num_resolutions-1, */ + /* but we would need to take into account a stride then */ + l_samples = (OPJ_UINT32)(( + l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x1 - + l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x0) * + (l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].y1 - + l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].y0)); + } else { + opj_tcd_resolution_t* l_res; + l_res = l_tile_comp->resolutions + p_tcd->image->comps[0].resno_decoded; + l_samples = (l_res->win_x1 - l_res->win_x0) * + (l_res->win_y1 - l_res->win_y0); + } if (l_tile->numcomps >= 3) { + opj_tcd_resolution_t* res_comp0 = l_tile->comps[0].resolutions + + p_tcd->image->comps[0].resno_decoded; + opj_tcd_resolution_t* res_comp1 = l_tile->comps[1].resolutions + + p_tcd->image->comps[1].resno_decoded; + opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions + + p_tcd->image->comps[2].resno_decoded; + OPJ_INT32 l_res_samples = (OPJ_INT32)(res_comp0->x1 - res_comp0->x0) * + (res_comp0->y1 - res_comp0->y0); /* testcase 1336.pdf.asan.47.376 */ - if ((l_tile->comps[0].x1 - l_tile->comps[0].x0) * (l_tile->comps[0].y1 - - l_tile->comps[0].y0) < (OPJ_INT32)l_samples || - (l_tile->comps[1].x1 - l_tile->comps[1].x0) * (l_tile->comps[1].y1 - - l_tile->comps[1].y0) < (OPJ_INT32)l_samples || - (l_tile->comps[2].x1 - l_tile->comps[2].x0) * (l_tile->comps[2].y1 - - l_tile->comps[2].y0) < (OPJ_INT32)l_samples) { + if (p_tcd->image->comps[0].resno_decoded != + p_tcd->image->comps[1].resno_decoded || + p_tcd->image->comps[0].resno_decoded != + p_tcd->image->comps[2].resno_decoded || + (res_comp1->x1 - res_comp1->x0) * (res_comp1->y1 - + res_comp1->y0) != l_res_samples || + (res_comp2->x1 - res_comp2->x0) * (res_comp2->y1 - + res_comp2->y0) != l_res_samples) { opj_event_msg(p_manager, EVT_ERROR, "Tiles don't all have the same dimension. Skip the MCT step.\n"); return OPJ_FALSE; @@ -1834,7 +1991,11 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) } for (i = 0; i < l_tile->numcomps; ++i) { - l_data[i] = (OPJ_BYTE*) l_tile_comp->data; + if (p_tcd->whole_tile_decoding) { + l_data[i] = (OPJ_BYTE*) l_tile_comp->data; + } else { + l_data[i] = (OPJ_BYTE*) l_tile_comp->data_win; + } ++l_tile_comp; } @@ -1855,15 +2016,29 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) opj_free(l_data); } else { if (l_tcp->tccps->qmfbid == 1) { - opj_mct_decode(l_tile->comps[0].data, - l_tile->comps[1].data, - l_tile->comps[2].data, - l_samples); + if (p_tcd->whole_tile_decoding) { + opj_mct_decode(l_tile->comps[0].data, + l_tile->comps[1].data, + l_tile->comps[2].data, + l_samples); + } else { + opj_mct_decode(l_tile->comps[0].data_win, + l_tile->comps[1].data_win, + l_tile->comps[2].data_win, + l_samples); + } } else { - opj_mct_decode_real((OPJ_FLOAT32*)l_tile->comps[0].data, - (OPJ_FLOAT32*)l_tile->comps[1].data, - (OPJ_FLOAT32*)l_tile->comps[2].data, - l_samples); + if (p_tcd->whole_tile_decoding) { + opj_mct_decode_real((OPJ_FLOAT32*)l_tile->comps[0].data, + (OPJ_FLOAT32*)l_tile->comps[1].data, + (OPJ_FLOAT32*)l_tile->comps[2].data, + l_samples); + } else { + opj_mct_decode_real((OPJ_FLOAT32*)l_tile->comps[0].data_win, + (OPJ_FLOAT32*)l_tile->comps[1].data_win, + (OPJ_FLOAT32*)l_tile->comps[2].data_win, + l_samples); + } } } } else { @@ -1896,12 +2071,24 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; compno++) { l_res = l_tile_comp->resolutions + l_img_comp->resno_decoded; - l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0); - l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0); - l_stride = (OPJ_UINT32)(l_tile_comp->x1 - l_tile_comp->x0) - l_width; - assert(l_height == 0 || - l_width + l_stride <= l_tile_comp->data_size / l_height); /*MUPDF*/ + if (!p_tcd->whole_tile_decoding) { + l_width = l_res->win_x1 - l_res->win_x0; + l_height = l_res->win_y1 - l_res->win_y0; + l_stride = 0; + l_current_ptr = l_tile_comp->data_win; + } else { + l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0); + l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0); + l_stride = (OPJ_UINT32)( + l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x1 - + l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x0) + - l_width; + l_current_ptr = l_tile_comp->data; + + assert(l_height == 0 || + l_width + l_stride <= l_tile_comp->data_size / l_height); /*MUPDF*/ + } if (l_img_comp->sgnd) { l_min = -(1 << (l_img_comp->prec - 1)); @@ -1911,7 +2098,6 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd) l_max = (OPJ_INT32)((1U << l_img_comp->prec) - 1); } - l_current_ptr = l_tile_comp->data; if (l_tccp->qmfbid == 1) { for (j = 0; j < l_height; ++j) { @@ -1981,6 +2167,9 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct) l_code_block->chunks = 00; } + opj_free(l_code_block->decoded_data); + l_code_block->decoded_data = NULL; + ++l_code_block; } @@ -2396,16 +2585,16 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd, /* with the tile coordinates */ OPJ_UINT32 tcx0 = opj_uint_max( (OPJ_UINT32)tilec->x0, - opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx)); + opj_uint_ceildiv(tcd->win_x0, image_comp->dx)); OPJ_UINT32 tcy0 = opj_uint_max( (OPJ_UINT32)tilec->y0, - opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy)); + opj_uint_ceildiv(tcd->win_y0, image_comp->dy)); OPJ_UINT32 tcx1 = opj_uint_min( (OPJ_UINT32)tilec->x1, - opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx)); + opj_uint_ceildiv(tcd->win_x1, image_comp->dx)); OPJ_UINT32 tcy1 = opj_uint_min( (OPJ_UINT32)tilec->y1, - opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy)); + opj_uint_ceildiv(tcd->win_y1, image_comp->dy)); /* Compute number of decomposition for this band. See table F-1 */ OPJ_UINT32 nb = (resno == 0) ? tilec->numresolutions - 1 : @@ -2452,3 +2641,44 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd, #endif return intersects; } + +/** Returns whether a tile componenent is fully decoded, taking into account + * p_tcd->win_* members. + * + * @param p_tcd TCD handle. + * @param compno Component number + * @return OPJ_TRUE whether the tile componenent is fully decoded + */ +static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd, + OPJ_UINT32 compno) +{ + opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); + opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]); + /* Compute the intersection of the area of interest, expressed in tile coordinates */ + /* with the tile coordinates */ + OPJ_UINT32 tcx0 = opj_uint_max( + (OPJ_UINT32)tilec->x0, + opj_uint_ceildiv(p_tcd->win_x0, image_comp->dx)); + OPJ_UINT32 tcy0 = opj_uint_max( + (OPJ_UINT32)tilec->y0, + opj_uint_ceildiv(p_tcd->win_y0, image_comp->dy)); + OPJ_UINT32 tcx1 = opj_uint_min( + (OPJ_UINT32)tilec->x1, + opj_uint_ceildiv(p_tcd->win_x1, image_comp->dx)); + OPJ_UINT32 tcy1 = opj_uint_min( + (OPJ_UINT32)tilec->y1, + opj_uint_ceildiv(p_tcd->win_y1, image_comp->dy)); + + OPJ_UINT32 shift = tilec->numresolutions - tilec->minimum_num_resolutions; + /* Tolerate small margin within the reduced resolution factor to consider if */ + /* the whole tile path must be taken */ + return (tcx0 >= (OPJ_UINT32)tilec->x0 && + tcy0 >= (OPJ_UINT32)tilec->y0 && + tcx1 <= (OPJ_UINT32)tilec->x1 && + tcy1 <= (OPJ_UINT32)tilec->y1 && + (shift >= 32 || + (((tcx0 - (OPJ_UINT32)tilec->x0) >> shift) == 0 && + ((tcy0 - (OPJ_UINT32)tilec->y0) >> shift) == 0 && + (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 && + (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0))); +} diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index bf3c457e8..8ad57e07f 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -134,6 +134,8 @@ typedef struct opj_tcd_cblk_dec { OPJ_UINT32 m_current_max_segs; /* allocated number of segs[] items */ OPJ_UINT32 numchunks; /* Number of valid chunks items */ OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */ + /* Decoded code-block. Only used for subtile decoding. Otherwise tilec->data is directly updated */ + OPJ_INT32* decoded_data; } opj_tcd_cblk_dec_t; /** Precinct structure */ @@ -175,6 +177,12 @@ typedef struct opj_tcd_resolution { OPJ_UINT32 numbands; /* subband information */ opj_tcd_band_t bands[3]; + + /* dimension of the resolution limited to window of interest. Only valid if tcd->whole_tile_decoding is set */ + OPJ_UINT32 win_x0; + OPJ_UINT32 win_y0; + OPJ_UINT32 win_x1; + OPJ_UINT32 win_y1; } opj_tcd_resolution_t; /** Tile-component structure */ @@ -191,7 +199,8 @@ typedef struct opj_tcd_tilecomp { opj_tcd_resolution_t *resolutions; /* size of data for resolutions (in bytes) */ OPJ_UINT32 resolutions_size; - /* data of the component */ + + /* data of the component. For decoding, only valid if tcd->whole_tile_decoding is set (so exclusive of data_win member) */ OPJ_INT32 *data; /* if true, then need to free after usage, otherwise do not free */ OPJ_BOOL ownsData; @@ -199,6 +208,15 @@ typedef struct opj_tcd_tilecomp { OPJ_UINT32 data_size_needed; /* size of the data of the component */ OPJ_UINT32 data_size; + + /** data of the component limited to window of interest. Only valid for decoding and if tcd->whole_tile_decoding is NOT set (so exclusive of data member) */ + OPJ_INT32 *data_win; + /* dimension of the component limited to window of interest. Only valid for decoding and if tcd->whole_tile_decoding is NOT set */ + OPJ_UINT32 win_x0; + OPJ_UINT32 win_y0; + OPJ_UINT32 win_x1; + OPJ_UINT32 win_y1; + /* add fixed_quality */ OPJ_INT32 numpix; } opj_tcd_tilecomp_t; @@ -256,10 +274,12 @@ typedef struct opj_tcd { /** Thread pool */ opj_thread_pool_t* thread_pool; /** Coordinates of the window of interest, in grid reference space */ - OPJ_UINT32 decoded_x0; - OPJ_UINT32 decoded_y0; - OPJ_UINT32 decoded_x1; - OPJ_UINT32 decoded_y1; + OPJ_UINT32 win_x0; + OPJ_UINT32 win_y0; + OPJ_UINT32 win_x1; + OPJ_UINT32 win_y1; + /** Only valid for decoding. Whether the whole tile is decoded, or just the region in win_x0/win_y0/win_x1/win_y1 */ + OPJ_BOOL whole_tile_decoding; } opj_tcd_t; /** @name Exported functions */ @@ -331,7 +351,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, /** * Gets the maximum tile size that will be taken by the tile once decoded. */ -OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd); +OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, + OPJ_BOOL take_into_account_partial_decoding); /** * Encodes a tile from the raw image into the given buffer. @@ -356,10 +377,10 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, /** Decode a tile from a buffer into a raw image @param tcd TCD handle -@param decoded_x0 Upper left x of region to decode (in grid coordinates) -@param decoded_y0 Upper left y of region to decode (in grid coordinates) -@param decoded_x1 Lower right x of region to decode (in grid coordinates) -@param decoded_y1 Lower right y of region to decode (in grid coordinates) +@param win_x0 Upper left x of region to decode (in grid coordinates) +@param win_y0 Upper left y of region to decode (in grid coordinates) +@param win_x1 Lower right x of region to decode (in grid coordinates) +@param win_y1 Lower right y of region to decode (in grid coordinates) @param src Source buffer @param len Length of source buffer @param tileno Number that identifies one of the tiles to be decoded @@ -367,10 +388,10 @@ Decode a tile from a buffer into a raw image @param manager the event manager. */ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *tcd, - OPJ_UINT32 decoded_x0, - OPJ_UINT32 decoded_y0, - OPJ_UINT32 decoded_x1, - OPJ_UINT32 decoded_y1, + OPJ_UINT32 win_x0, + OPJ_UINT32 win_y0, + OPJ_UINT32 win_x1, + OPJ_UINT32 win_y1, OPJ_BYTE *src, OPJ_UINT32 len, OPJ_UINT32 tileno, @@ -427,7 +448,7 @@ void opj_tcd_reinit_segment(opj_tcd_seg_t* seg); /** Returns whether a sub-band region contributes to the area of interest - * tcd->decoded_x0,tcd->decoded_y0,tcd->decoded_x1,tcd->decoded_y1. + * tcd->win_x0,tcd->win_y0,tcd->win_x1,tcd->win_y1. * * @param tcd TCD handle. * @param compno Component number @@ -449,7 +470,6 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd, OPJ_UINT32 x1, OPJ_UINT32 y1); - /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c new file mode 100644 index 000000000..82c83e901 --- /dev/null +++ b/src/lib/openjp2/test_sparse_array.c @@ -0,0 +1,148 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2017, IntoPix SA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opj_includes.h" + +int main() +{ + OPJ_UINT32 i, j, w, h; + OPJ_INT32 buffer[ 99 * 101 ]; + OPJ_BOOL ret; + opj_sparse_array_int32_t* sa; + + sa = opj_sparse_array_int32_create(0, 1, 1, 1); + assert(sa == NULL); + opj_sparse_array_int32_free(sa); + + sa = opj_sparse_array_int32_create(1, 0, 1, 1); + assert(sa == NULL); + + sa = opj_sparse_array_int32_create(1, 1, 0, 1); + assert(sa == NULL); + + sa = opj_sparse_array_int32_create(1, 1, 1, 0); + assert(sa == NULL); + + sa = opj_sparse_array_int32_create(99, 101, ~0U, ~0U); + assert(sa == NULL); + + sa = opj_sparse_array_int32_create(99, 101, 15, 17); + opj_sparse_array_int32_free(sa); + + sa = opj_sparse_array_int32_create(99, 101, 15, 17); + ret = opj_sparse_array_int32_read(sa, 0, 0, 0, 1, buffer, 1, 1, OPJ_FALSE); + assert(!ret); + ret = opj_sparse_array_int32_read(sa, 0, 0, 1, 0, buffer, 1, 1, OPJ_FALSE); + assert(!ret); + ret = opj_sparse_array_int32_read(sa, 0, 0, 100, 1, buffer, 1, 1, OPJ_FALSE); + assert(!ret); + ret = opj_sparse_array_int32_read(sa, 0, 0, 1, 102, buffer, 1, 1, OPJ_FALSE); + assert(!ret); + ret = opj_sparse_array_int32_read(sa, 1, 0, 0, 1, buffer, 1, 1, OPJ_FALSE); + assert(!ret); + ret = opj_sparse_array_int32_read(sa, 0, 1, 1, 0, buffer, 1, 1, OPJ_FALSE); + assert(!ret); + ret = opj_sparse_array_int32_read(sa, 99, 101, 99, 101, buffer, 1, 1, + OPJ_FALSE); + assert(!ret); + + buffer[0] = 1; + ret = opj_sparse_array_int32_read(sa, 0, 0, 1, 1, buffer, 1, 1, OPJ_FALSE); + assert(ret); + assert(buffer[0] == 0); + + memset(buffer, 0xFF, sizeof(buffer)); + ret = opj_sparse_array_int32_read(sa, 0, 0, 99, 101, buffer, 1, 99, OPJ_FALSE); + assert(ret); + for (i = 0; i < 99 * 101; i++) { + assert(buffer[i] == 0); + } + + buffer[0] = 1; + ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, + OPJ_FALSE); + assert(ret); + buffer[0] = 2; + ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, + OPJ_FALSE); + assert(ret); + + buffer[0] = 0; + buffer[1] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 2); + assert(buffer[1] == 0xFF); + + w = 15 + 1; + h = 17 + 1; + memset(buffer, 0xFF, sizeof(buffer)); + ret = opj_sparse_array_int32_read(sa, 2, 1, 2 + w, 1 + h, buffer, 1, w, + OPJ_FALSE); + assert(ret); + for (j = 0; j < h; j++) { + for (i = 0; i < w; i++) { + if (i == 4 - 2 && j == 5 - 1) { + assert(buffer[ j * w + i ] == 2); + } else { + assert(buffer[ j * w + i ] == 0); + } + } + } + + opj_sparse_array_int32_free(sa); + + + sa = opj_sparse_array_int32_create(99, 101, 15, 17); + memset(buffer, 0xFF, sizeof(buffer)); + ret = opj_sparse_array_int32_read(sa, 0, 0, 2, 1, buffer, 2, 4, OPJ_FALSE); + assert(ret); + assert(buffer[0] == 0); + assert(buffer[1] == -1); + assert(buffer[2] == 0); + + buffer[0] = 1; + buffer[2] = 3; + ret = opj_sparse_array_int32_write(sa, 0, 0, 2, 1, buffer, 2, 4, OPJ_FALSE); + assert(ret); + + memset(buffer, 0xFF, sizeof(buffer)); + ret = opj_sparse_array_int32_read(sa, 0, 0, 2, 1, buffer, 2, 4, OPJ_FALSE); + assert(ret); + assert(buffer[0] == 1); + assert(buffer[1] == -1); + assert(buffer[2] == 3); + + opj_sparse_array_int32_free(sa); + + return 0; +} From eee5104a8885227167a349b6dabdb1f72a04d9d0 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:32 +0200 Subject: [PATCH 05/29] opj_dwt_decode_partial_tile(): avoid undefined behaviour in lifting operation by properly initializing working buffer --- src/lib/openjp2/dwt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index b32508dbf..047981d6d 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1858,6 +1858,19 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( for (j = 0; j < rh; ++j) { if ((j >= win_ll_y0 && j < win_ll_y1) || (j >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) { + + /* Avoids dwt.c:1584:44 (in opj_dwt_decode_partial_1): runtime error: */ + /* signed integer overflow: -1094795586 + -1094795586 cannot be represented in type 'int' */ + /* on opj_decompress -i ../../openjpeg/MAPA.jp2 -o out.tif -d 0,0,256,256 */ + /* This is less extreme than memsetting the whole buffer to 0 */ + /* although we could potentially do better with better handling of edge conditions */ + if (win_tr_x1 >= 1 && win_tr_x1 < rw) { + h.mem[win_tr_x1 - 1] = 0; + } + if (win_tr_x1 < rw) { + h.mem[win_tr_x1] = 0; + } + opj_dwt_interleave_partial_h(h.mem, h.cas, sa, From c37e360a5112b5471168e429159f61b74c0c2f58 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:35 +0200 Subject: [PATCH 06/29] opj_tcd_init_tile(): fix typo on overflow detection condition (introduced in previous commit) --- src/lib/openjp2/tcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index c221c6ed4..6758487a1 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -892,7 +892,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, OPJ_UINT32 res_h = (OPJ_UINT32)(l_res->y1 - l_res->y0); /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ - if (res_h > 0 && res_h > (((OPJ_UINT32) - 1) / res_h)) { + if (res_h > 0 && res_w > (((OPJ_UINT32) - 1) / res_h)) { opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } From d5153ba404fa13b9c6e7b2c20d4d128aac718fba Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:37 +0200 Subject: [PATCH 07/29] Remove limitation that prevents from opening images bigger than 4 billion pixels However the intermediate buffer for decoding must still be smaller than 4 billion pixels, so this is useful for decoding at a lower resolution level, or subtile decoding. --- src/lib/openjp2/j2k.c | 25 ++++++++---------- src/lib/openjp2/tcd.c | 60 ++++++++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 0d8bbc3fd..400fcaa4a 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -2142,13 +2142,6 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k, return OPJ_FALSE; } - /* testcase 1610.pdf.SIGSEGV.59c.681 */ - if ((0xFFFFFFFFU / l_image->x1) < l_image->y1) { - opj_event_msg(p_manager, EVT_ERROR, - "Prevent buffer overflow (x1: %d, y1: %d)\n", l_image->x1, l_image->y1); - return OPJ_FALSE; - } - /* testcase issue427-illegal-tile-offset.jp2 */ l_tx1 = opj_uint_adds(l_cp->tx0, l_cp->tdx); /* manage overflow */ l_ty1 = opj_uint_adds(l_cp->ty0, l_cp->tdy); /* manage overflow */ @@ -8787,9 +8780,13 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, *p_tile_index = p_j2k->m_current_tile_number; *p_go_on = OPJ_TRUE; - *p_data_size = opj_tcd_get_decoded_tile_size(p_j2k->m_tcd, OPJ_FALSE); - if (*p_data_size == UINT_MAX) { - return OPJ_FALSE; + if (p_data_size) { + /* For internal use in j2k.c, we don't need this */ + /* This is just needed for folks using the opj_read_tile_header() / opj_decode_tile_data() combo */ + *p_data_size = opj_tcd_get_decoded_tile_size(p_j2k->m_tcd, OPJ_FALSE); + if (*p_data_size == UINT_MAX) { + return OPJ_FALSE; + } } *p_tile_x0 = p_j2k->m_tcd->tcd_image->tiles->x0; *p_tile_y0 = p_j2k->m_tcd->tcd_image->tiles->y0; @@ -10466,7 +10463,6 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, { OPJ_BOOL l_go_on = OPJ_TRUE; OPJ_UINT32 l_current_tile_no; - OPJ_UINT32 l_data_size; OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1; OPJ_UINT32 l_nb_comps; OPJ_UINT32 nr_tiles = 0; @@ -10483,7 +10479,7 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, OPJ_UINT32 i; if (! opj_j2k_read_tile_header(p_j2k, &l_current_tile_no, - &l_data_size, + NULL, &l_tile_x0, &l_tile_y0, &l_tile_x1, &l_tile_y1, &l_nb_comps, @@ -10515,7 +10511,7 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, for (;;) { if (! opj_j2k_read_tile_header(p_j2k, &l_current_tile_no, - &l_data_size, + NULL, &l_tile_x0, &l_tile_y0, &l_tile_x1, &l_tile_y1, &l_nb_comps, @@ -10589,7 +10585,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, OPJ_BOOL l_go_on = OPJ_TRUE; OPJ_UINT32 l_current_tile_no; OPJ_UINT32 l_tile_no_to_dec; - OPJ_UINT32 l_data_size; OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1; OPJ_UINT32 l_nb_comps; OPJ_UINT32 l_nb_tiles; @@ -10640,7 +10635,7 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, for (;;) { if (! opj_j2k_read_tile_header(p_j2k, &l_current_tile_no, - &l_data_size, + NULL, &l_tile_x0, &l_tile_y0, &l_tile_x1, &l_tile_y1, &l_nb_comps, diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 6758487a1..2ce55c2d3 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -886,27 +886,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_res->x1 = opj_int_ceildivpow2(l_tilec->x1, (OPJ_INT32)l_level_no); l_res->y1 = opj_int_ceildivpow2(l_tilec->y1, (OPJ_INT32)l_level_no); - if (!isEncoder && resno + 1 == l_tilec->minimum_num_resolutions) { - /* compute l_data_size with overflow check */ - OPJ_UINT32 res_w = (OPJ_UINT32)(l_res->x1 - l_res->x0); - OPJ_UINT32 res_h = (OPJ_UINT32)(l_res->y1 - l_res->y0); - - /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ - if (res_h > 0 && res_w > (((OPJ_UINT32) - 1) / res_h)) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); - return OPJ_FALSE; - } - l_data_size = res_w * res_h; - - if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); - return OPJ_FALSE; - } - l_data_size *= (OPJ_UINT32)sizeof(OPJ_UINT32); - - l_tilec->data_size_needed = l_data_size; - } - /*fprintf(stderr, "\t\t\tres_x0= %d, res_y0 =%d, res_x1=%d, res_y1=%d\n", l_res->x0, l_res->y0, l_res->x1, l_res->y1);*/ /* p. 35, table A-23, ISO/IEC FDIS154444-1 : 2000 (18 august 2000) */ l_pdx = l_tccp->prcw[resno]; @@ -1312,6 +1291,7 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, l_img_comp = p_tcd->image->comps; for (i = 0; i < p_tcd->image->numcomps; ++i) { + OPJ_UINT32 w, h; l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/ l_remaining = l_img_comp->prec & 7; /* (%8) */ @@ -1325,12 +1305,16 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, l_res = l_tile_comp->resolutions + l_tile_comp->minimum_num_resolutions - 1; if (take_into_account_partial_decoding && !p_tcd->whole_tile_decoding) { - l_temp = (l_res->win_x1 - l_res->win_x0) * - (l_res->win_y1 - l_res->win_y0); + w = l_res->win_x1 - l_res->win_x0; + h = l_res->win_y1 - l_res->win_y0; } else { - l_temp = (OPJ_UINT32)((l_res->x1 - l_res->x0) * (l_res->y1 - - l_res->y0)); /* x1*y1 can't overflow */ + w = (OPJ_UINT32)(l_res->x1 - l_res->x0); + h = (OPJ_UINT32)(l_res->y1 - l_res->y0); + } + if (h > 0 && UINT_MAX / w < h) { + return UINT_MAX; } + l_temp = w * h; if (l_size_comp && UINT_MAX / l_size_comp < l_temp) { return UINT_MAX; } @@ -1473,7 +1457,31 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, if (p_tcd->whole_tile_decoding) { for (compno = 0; compno < p_tcd->image->numcomps; compno++) { - if (!opj_alloc_tile_component_data(&(p_tcd->tcd_image->tiles->comps[compno]))) { + opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); + opj_tcd_resolution_t *l_res = & + (tilec->resolutions[tilec->minimum_num_resolutions - 1]); + OPJ_UINT32 l_data_size; + + /* compute l_data_size with overflow check */ + OPJ_UINT32 res_w = (OPJ_UINT32)(l_res->x1 - l_res->x0); + OPJ_UINT32 res_h = (OPJ_UINT32)(l_res->y1 - l_res->y0); + + /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ + if (res_h > 0 && res_w > (((OPJ_UINT32) - 1) / res_h)) { + opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size = res_w * res_h; + + if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { + opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + return OPJ_FALSE; + } + l_data_size *= (OPJ_UINT32)sizeof(OPJ_UINT32); + + tilec->data_size_needed = l_data_size; + + if (!opj_alloc_tile_component_data(tilec)) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } From d1299d9670d766006a7a162a770d3b70eaeef9bd Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:39 +0200 Subject: [PATCH 08/29] Fix compiler warning in release mode --- src/lib/openjp2/dwt.c | 8 ++++++++ src/lib/openjp2/test_sparse_array.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 047981d6d..81a8150a2 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1541,6 +1541,7 @@ static void opj_dwt_interleave_partial_h(OPJ_INT32 *dest, dest + 1 - cas + 2 * win_h_x0, 2, 0, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); } @@ -1567,6 +1568,7 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, dest + 1 - cas + 2 * win_h_y0, 0, 2, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); } static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, @@ -1751,6 +1753,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( 1, tr_max->win_x1 - tr_max->win_x0, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); opj_sparse_array_int32_free(sa); return OPJ_TRUE; } @@ -1937,6 +1940,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( 1, tr_max->win_x1 - tr_max->win_x0, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); } opj_sparse_array_int32_free(sa); return OPJ_TRUE; @@ -2018,6 +2022,7 @@ static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, 8, 0, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); } } @@ -2061,6 +2066,7 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, 0, 8, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); } } @@ -2425,6 +2431,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, 1, tr_max->win_x1 - tr_max->win_x0, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); opj_sparse_array_int32_free(sa); return OPJ_TRUE; } @@ -2614,6 +2621,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, 1, tr_max->win_x1 - tr_max->win_x0, OPJ_TRUE); assert(ret); + OPJ_UNUSED(ret); } opj_sparse_array_int32_free(sa); diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c index 82c83e901..0b49110fb 100644 --- a/src/lib/openjp2/test_sparse_array.c +++ b/src/lib/openjp2/test_sparse_array.c @@ -29,6 +29,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#undef NDEBUG + #include "opj_includes.h" int main() From 008a12d4fce0a7b1eabc51e04ed339be759646e2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:41 +0200 Subject: [PATCH 09/29] TCD: allow tile buffer to be greater than 4GB on 64 bit hosts (but number of pixels must remain under 4 billion) --- src/lib/openjp2/tcd.c | 42 ++++++++++++++++++++++-------------------- src/lib/openjp2/tcd.h | 4 ++-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 2ce55c2d3..1213f757d 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -807,24 +807,26 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, } if (isEncoder) { + size_t l_tile_data_size; + /* compute l_data_size with overflow check */ - l_data_size = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0); + size_t w = (size_t)(l_tilec->x1 - l_tilec->x0); + size_t h = (size_t)(l_tilec->y1 - l_tilec->y0); + /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ - if ((l_data_size > 0U) && - ((((OPJ_UINT32) - 1) / l_data_size) < (OPJ_UINT32)(l_tilec->y1 - - l_tilec->y0))) { + if (h > 0 && w > SIZE_MAX / h) { opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } - l_data_size = l_data_size * (OPJ_UINT32)(l_tilec->y1 - l_tilec->y0); + l_tile_data_size = w * h; - if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { + if (SIZE_MAX / sizeof(OPJ_UINT32) < l_tile_data_size) { opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } - l_data_size = l_data_size * (OPJ_UINT32)sizeof(OPJ_UINT32); + l_tile_data_size = l_tile_data_size * sizeof(OPJ_UINT32); - l_tilec->data_size_needed = l_data_size; + l_tilec->data_size_needed = l_tile_data_size; } l_data_size = l_tilec->numresolutions * (OPJ_UINT32)sizeof( @@ -1460,24 +1462,24 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); opj_tcd_resolution_t *l_res = & (tilec->resolutions[tilec->minimum_num_resolutions - 1]); - OPJ_UINT32 l_data_size; + size_t l_data_size; /* compute l_data_size with overflow check */ - OPJ_UINT32 res_w = (OPJ_UINT32)(l_res->x1 - l_res->x0); - OPJ_UINT32 res_h = (OPJ_UINT32)(l_res->y1 - l_res->y0); + size_t res_w = (size_t)(l_res->x1 - l_res->x0); + size_t res_h = (size_t)(l_res->y1 - l_res->y0); /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ - if (res_h > 0 && res_w > (((OPJ_UINT32) - 1) / res_h)) { + if (res_h > 0 && res_w > SIZE_MAX / res_h) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } l_data_size = res_w * res_h; - if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) { + if (SIZE_MAX / sizeof(OPJ_UINT32) < l_data_size) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } - l_data_size *= (OPJ_UINT32)sizeof(OPJ_UINT32); + l_data_size *= sizeof(OPJ_UINT32); tilec->data_size_needed = l_data_size; @@ -1572,24 +1574,24 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]); opj_tcd_resolution_t *res = tilec->resolutions + image_comp->resno_decoded; - OPJ_UINT32 w = res->win_x1 - res->win_x0; - OPJ_UINT32 h = res->win_y1 - res->win_y0; - OPJ_UINT32 l_data_size; + size_t w = res->win_x1 - res->win_x0; + size_t h = res->win_y1 - res->win_y0; + size_t l_data_size; opj_aligned_free(tilec->data_win); tilec->data_win = NULL; if (w > 0 && h > 0) { - if (w > ((OPJ_UINT32) - 1) / h) { + if (w > SIZE_MAX / h) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } l_data_size = w * h; - if (l_data_size > ((OPJ_UINT32) - 1) / sizeof(OPJ_INT32)) { + if (l_data_size > SIZE_MAX / sizeof(OPJ_INT32)) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); return OPJ_FALSE; } - l_data_size *= (OPJ_UINT32)sizeof(OPJ_INT32); + l_data_size *= sizeof(OPJ_INT32); tilec->data_win = opj_aligned_malloc(l_data_size); if (tilec->data_win == NULL) { diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index 8ad57e07f..7c974c53f 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -205,9 +205,9 @@ typedef struct opj_tcd_tilecomp { /* if true, then need to free after usage, otherwise do not free */ OPJ_BOOL ownsData; /* we may either need to allocate this amount of data, or re-use image data and ignore this value */ - OPJ_UINT32 data_size_needed; + size_t data_size_needed; /* size of the data of the component */ - OPJ_UINT32 data_size; + size_t data_size; /** data of the component limited to window of interest. Only valid for decoding and if tcd->whole_tile_decoding is NOT set (so exclusive of data member) */ OPJ_INT32 *data_win; From 98b93103613c90753fb6c57696f9403f8ea0b1d6 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:44 +0200 Subject: [PATCH 10/29] Various changes to allow tile buffers of more than 4giga pixels Untested though, since that means a tile buffer of at least 16 GB. So there might be places where uint32 overflow on multiplication still occur... --- src/lib/openjp2/dwt.c | 94 ++++++++++++++++++---------------- src/lib/openjp2/j2k.c | 2 +- src/lib/openjp2/mct.c | 22 ++++---- src/lib/openjp2/mct.h | 12 ++--- src/lib/openjp2/sparse_array.c | 25 +++++---- src/lib/openjp2/t1.c | 23 +++++---- src/lib/openjp2/tcd.c | 43 ++++++++-------- src/lib/openjp2/tcd.h | 4 +- 8 files changed, 119 insertions(+), 106 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 81a8150a2..d0cd3b4e8 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, l_src += 2; } /* b[i*x]=a[2*i+cas]; */ - l_dest = b + sn * x; + l_dest = b + (size_t)sn * (size_t)x; l_src = a + 1 - cas; i = dn; @@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) bi += 2; ai += x; } - ai = a + (v->sn * x); + ai = a + (v->sn * (size_t)x); bi = v->mem + 1 - v->cas; i = v->dn ; while (i--) { @@ -616,7 +616,7 @@ static void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, const OPJ_INT32* tmp, OPJ_INT32 len, - OPJ_INT32 stride) + size_t stride) { OPJ_INT32 i; for (i = 0; i < len; ++i) { @@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, PARALLEL_COLS_53 * sizeof(OPJ_INT32)) would do but would be a tiny bit slower. We can take here advantage of our knowledge of alignment */ - STOREU(&tiledp_col[i * stride + 0], + STOREU(&tiledp_col[(size_t)i * stride + 0], LOAD(&tmp[PARALLEL_COLS_53 * i + 0])); - STOREU(&tiledp_col[i * stride + VREG_INT_COUNT], + STOREU(&tiledp_col[(size_t)i * stride + VREG_INT_COUNT], LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT])); } } @@ -639,12 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { const OPJ_INT32* in_even = &tiledp_col[0]; - const OPJ_INT32* in_odd = &tiledp_col[sn * stride]; + const OPJ_INT32* in_odd = &tiledp_col[(size_t)sn * stride]; - OPJ_INT32 i, j; + OPJ_INT32 i; + size_t j; VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0; VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1; const VREG two = LOAD_CST(2); @@ -703,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( if (len & 1) { VREG tmp_len_minus_1; - s1n_0 = LOADU(in_even + ((len - 1) / 2) * stride); + s1n_0 = LOADU(in_even + (size_t)((len - 1) / 2) * stride); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1); @@ -711,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( STORE(tmp + PARALLEL_COLS_53 * (len - 2), ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1))); - s1n_1 = LOADU(in_even + ((len - 1) / 2) * stride + VREG_INT_COUNT); + s1n_1 = LOADU(in_even + (size_t)((len - 1) / 2) * stride + VREG_INT_COUNT); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT, @@ -739,15 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { - OPJ_INT32 i, j; + OPJ_INT32 i; + size_t j; VREG s1_0, s2_0, dc_0, dn_0; VREG s1_1, s2_1, dc_1, dn_1; const VREG two = LOAD_CST(2); - const OPJ_INT32* in_even = &tiledp_col[sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -807,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( if (!(len & 1)) { /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */ - dn_0 = SUB(LOADU(in_odd + (len / 2 - 1) * stride), + dn_0 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride), SAR(ADD3(s1_0, s1_0, two), 2)); - dn_1 = SUB(LOADU(in_odd + (len / 2 - 1) * stride + VREG_INT_COUNT), + dn_1 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride + VREG_INT_COUNT), SAR(ADD3(s1_1, s1_1, two), 2)); /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */ @@ -849,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { OPJ_INT32 i, j; OPJ_INT32 d1c, d1n, s1n, s0c, s0n; @@ -860,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, /* accesses and explicit interleaving. */ s1n = tiledp_col[0]; - d1n = tiledp_col[sn * stride]; + d1n = tiledp_col[(size_t)sn * stride]; s0n = s1n - ((d1n + 1) >> 1); for (i = 0, j = 0; i < (len - 3); i += 2, j++) { d1c = d1n; s0c = s0n; - s1n = tiledp_col[(j + 1) * stride]; - d1n = tiledp_col[(sn + j + 1) * stride]; + s1n = tiledp_col[(size_t)(j + 1) * stride]; + d1n = tiledp_col[(size_t)(sn + j + 1) * stride]; s0n = s1n - ((d1c + d1n + 2) >> 2); @@ -880,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, if (len & 1) { tmp[len - 1] = - tiledp_col[((len - 1) / 2) * stride] - + tiledp_col[(size_t)((len - 1) / 2) * stride] - ((d1n + 1) >> 1); tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1); } else { @@ -888,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[i * stride] = tmp[i]; + tiledp_col[(size_t)i * stride] = tmp[i]; } } @@ -899,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { OPJ_INT32 i, j; OPJ_INT32 s1, s2, dc, dn; - const OPJ_INT32* in_even = &tiledp_col[sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -916,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, tmp[0] = in_even[0] + dc; for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) { - s2 = in_even[(j + 1) * stride]; + s2 = in_even[(size_t)(j + 1) * stride]; - dn = in_odd[j * stride] - ((s1 + s2 + 2) >> 2); + dn = in_odd[(size_t)j * stride] - ((s1 + s2 + 2) >> 2); tmp[i ] = dc; tmp[i + 1] = s1 + ((dn + dc) >> 1); @@ -927,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } tmp[i] = dc; if (!(len & 1)) { - dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); + dn = in_odd[(size_t)(len / 2 - 1) * stride] - ((s1 + 1) >> 1); tmp[len - 2] = s1 + ((dn + dc) >> 1); tmp[len - 1] = dn; } else { @@ -935,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[i * stride] = tmp[i]; + tiledp_col[(size_t)i * stride] = tmp[i]; } } #endif /* !defined(STANDARD_SLOW_VERSION) */ @@ -946,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, /* Performs interleave, inverse wavelet transform and copy back to buffer */ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* tiledp_col, - OPJ_INT32 stride, + size_t stride, OPJ_INT32 nb_cols) { #ifdef STANDARD_SLOW_VERSION @@ -994,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* out = dwt->mem; for (c = 0; c < nb_cols; c++, tiledp_col++) { OPJ_INT32 i; - const OPJ_INT32* in_even = &tiledp_col[sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; out[1] = in_odd[0] - ((in_even[0] + 1) >> 1); out[0] = in_even[0] + out[1]; for (i = 0; i < len; ++i) { - tiledp_col[i * stride] = out[i]; + tiledp_col[(size_t)i * stride] = out[i]; } } @@ -1341,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) job = (opj_dwd_decode_v_job_t*)user_data; for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; j += PARALLEL_COLS_53) { - opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, PARALLEL_COLS_53); } if (j < job->max_j) - opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, (OPJ_INT32)(job->max_j - j)); opj_aligned_free(job->v.mem); @@ -1413,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rh <= 1) { for (j = 0; j < rh; ++j) { - opj_idwt53_h(&h, &tiledp[j * w]); + opj_idwt53_h(&h, &tiledp[(size_t)j * w]); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -1465,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rw <= 1) { for (j = 0; j + PARALLEL_COLS_53 <= rw; j += PARALLEL_COLS_53) { - opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, PARALLEL_COLS_53); + opj_idwt53_v(&v, &tiledp[j], (size_t)w, PARALLEL_COLS_53); } if (j < rw) { - opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, (OPJ_INT32)(rw - j)); + opj_idwt53_v(&v, &tiledp[j], (size_t)w, (OPJ_INT32)(rw - j)); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -2035,14 +2037,16 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 i; for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { - memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (size_t)width], + (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); } - a += (OPJ_UINT32)dwt->sn * width; + a += (OPJ_UINT32)dwt->sn * (size_t)width; bi = dwt->wavelet + 1 - dwt->cas; for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) { - memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (size_t)width], + (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); } } @@ -2325,9 +2329,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { aj[k ] = h.wavelet[k].f[0]; - aj[k + w ] = h.wavelet[k].f[1]; - aj[k + w * 2] = h.wavelet[k].f[2]; - aj[k + w * 3] = h.wavelet[k].f[3]; + aj[k + (size_t)w ] = h.wavelet[k].f[1]; + aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; + aj[k + (size_t)w * 3] = h.wavelet[k].f[3]; } aj += w * 4; @@ -2340,10 +2344,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { switch (rh - j) { case 3: - aj[k + w * 2] = h.wavelet[k].f[2]; + aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; /* FALLTHRU */ case 2: - aj[k + w ] = h.wavelet[k].f[1]; + aj[k + (size_t)w ] = h.wavelet[k].f[1]; /* FALLTHRU */ case 1: aj[k] = h.wavelet[k].f[0]; @@ -2366,7 +2370,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (size_t)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); } aj += 4; } @@ -2380,7 +2384,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (size_t)w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); } } } diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 400fcaa4a..aef0c1c02 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -10923,7 +10923,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, { OPJ_UINT32 i, j; OPJ_UINT32 l_nb_tiles; - OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size; + OPJ_SIZE_T l_max_tile_size = 0, l_current_tile_size; OPJ_BYTE * l_current_data = 00; OPJ_BOOL l_reuse_data = OPJ_FALSE; opj_tcd_t* p_tcd = 00; diff --git a/src/lib/openjp2/mct.c b/src/lib/openjp2/mct.c index 8c82ee20a..b79d4b87c 100644 --- a/src/lib/openjp2/mct.c +++ b/src/lib/openjp2/mct.c @@ -77,7 +77,7 @@ void opj_mct_encode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -119,7 +119,7 @@ void opj_mct_encode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -146,7 +146,7 @@ void opj_mct_decode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -181,7 +181,7 @@ void opj_mct_decode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_UINT32 i; for (i = 0; i < n; ++i) { @@ -214,7 +214,7 @@ void opj_mct_encode_real( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -359,7 +359,7 @@ void opj_mct_encode_real( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_UINT32 i; for (i = 0; i < n; ++i) { @@ -386,7 +386,7 @@ void opj_mct_decode_real( OPJ_FLOAT32* OPJ_RESTRICT c0, OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_UINT32 i; #ifdef __SSE__ @@ -451,13 +451,13 @@ OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) OPJ_BOOL opj_mct_encode_custom( OPJ_BYTE * pCodingdata, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** pData, OPJ_UINT32 pNbComp, OPJ_UINT32 isSigned) { OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata; - OPJ_UINT32 i; + OPJ_SIZE_T i; OPJ_UINT32 j; OPJ_UINT32 k; OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp; @@ -505,13 +505,13 @@ OPJ_BOOL opj_mct_encode_custom( OPJ_BOOL opj_mct_decode_custom( OPJ_BYTE * pDecodingData, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** pData, OPJ_UINT32 pNbComp, OPJ_UINT32 isSigned) { OPJ_FLOAT32 * lMct; - OPJ_UINT32 i; + OPJ_SIZE_T i; OPJ_UINT32 j; OPJ_UINT32 k; diff --git a/src/lib/openjp2/mct.h b/src/lib/openjp2/mct.h index 0ed980e89..2e37ce733 100644 --- a/src/lib/openjp2/mct.h +++ b/src/lib/openjp2/mct.h @@ -61,7 +61,7 @@ Apply a reversible multi-component transform to an image @param n Number of samples for each component */ void opj_mct_encode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Apply a reversible multi-component inverse transform to an image @param c0 Samples for luminance component @@ -70,7 +70,7 @@ Apply a reversible multi-component inverse transform to an image @param n Number of samples for each component */ void opj_mct_decode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Get norm of the basis function used for the reversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) @@ -86,7 +86,7 @@ Apply an irreversible multi-component transform to an image @param n Number of samples for each component */ void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Apply an irreversible multi-component inverse transform to an image @param c0 Samples for luminance component @@ -95,7 +95,7 @@ Apply an irreversible multi-component inverse transform to an image @param n Number of samples for each component */ void opj_mct_decode_real(OPJ_FLOAT32* OPJ_RESTRICT c0, - OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Get norm of the basis function used for the irreversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) @@ -114,7 +114,7 @@ FIXME DOC */ OPJ_BOOL opj_mct_encode_custom( OPJ_BYTE * p_coding_data, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** p_data, OPJ_UINT32 p_nb_comp, OPJ_UINT32 is_signed); @@ -129,7 +129,7 @@ FIXME DOC */ OPJ_BOOL opj_mct_decode_custom( OPJ_BYTE * pDecodingData, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** pData, OPJ_UINT32 pNbComp, OPJ_UINT32 isSigned); diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index fb552f8b5..3402dca2d 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -141,27 +141,30 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (src_block == NULL) { for (j = 0; j < y_incr; j++) { if (buf_col_stride == 1) { - memset(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, 0, sizeof(OPJ_INT32) * x_incr); } else { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = 0; + *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) + = 0; } } } } else { for (j = 0; j < y_incr; j++) { if (buf_col_stride == 1) { - memcpy(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, - src_block + (block_y_offset + j) * sa->block_width + block_x_offset, + memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, + src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset, sizeof(OPJ_INT32) * x_incr); } else { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = - *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k); + *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) + = + *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + + k); } } } @@ -178,14 +181,16 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( for (j = 0; j < y_incr; j++) { if (buf_col_stride == 1) { - memcpy(src_block + (block_y_offset + j) * sa->block_width + block_x_offset, - buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width + + block_x_offset, + buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, sizeof(OPJ_INT32) * x_incr); } else { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k) = - *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride); + *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + + k) = + *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride); } } } diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 953c7ab14..44a2f2437 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1696,8 +1696,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } else if (tccp->qmfbid == 1) { - OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + - (OPJ_UINT32)x]; + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w + + (size_t)x]; for (j = 0; j < cblk_h; ++j) { i = 0; for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { @@ -1705,19 +1705,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2; } for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2; } } } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * - tile_w + (OPJ_UINT32)x]; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y * + tile_w + (size_t)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { @@ -2052,7 +2052,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, OPJ_INT32* OPJ_RESTRICT tiledp; OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_h; - OPJ_UINT32 i, j, tileIndex = 0, tileLineAdvance; + OPJ_UINT32 i, j, tileLineAdvance; + size_t tileIndex = 0; OPJ_INT32 x = cblk->x0 - band->x0; OPJ_INT32 y = cblk->y0 - band->y0; @@ -2076,7 +2077,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, cblk_h = t1->h; tileLineAdvance = tile_w - cblk_w; - tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x]; t1->data = tiledp; t1->data_stride = tile_w; if (tccp->qmfbid == 1) { diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 1213f757d..485eb9e2f 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1974,17 +1974,17 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) p_tcd->image->comps[1].resno_decoded; opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions + p_tcd->image->comps[2].resno_decoded; - OPJ_INT32 l_res_samples = (OPJ_INT32)(res_comp0->x1 - res_comp0->x0) * - (res_comp0->y1 - res_comp0->y0); + size_t l_res_samples = (size_t)(res_comp0->x1 - res_comp0->x0) * + (size_t)(res_comp0->y1 - res_comp0->y0); /* testcase 1336.pdf.asan.47.376 */ if (p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[1].resno_decoded || p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[2].resno_decoded || - (res_comp1->x1 - res_comp1->x0) * (res_comp1->y1 - - res_comp1->y0) != l_res_samples || - (res_comp2->x1 - res_comp2->x0) * (res_comp2->y1 - - res_comp2->y0) != l_res_samples) { + (size_t)(res_comp1->x1 - res_comp1->x0) * + (size_t)(res_comp1->y1 - res_comp1->y0) != l_res_samples || + (size_t)(res_comp2->x1 - res_comp2->x0) * + (size_t)(res_comp2->y1 - res_comp2->y0) != l_res_samples) { opj_event_msg(p_manager, EVT_ERROR, "Tiles don't all have the same dimension. Skip the MCT step.\n"); return OPJ_FALSE; @@ -2225,9 +2225,10 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct) } } -OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) +OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) { - OPJ_UINT32 i, l_data_size = 0; + OPJ_UINT32 i; + OPJ_SIZE_T l_data_size = 0; opj_image_comp_t * l_img_comp = 00; opj_tcd_tilecomp_t * l_tilec = 00; OPJ_UINT32 l_size_comp, l_remaining; @@ -2246,8 +2247,8 @@ OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) l_size_comp = 4; } - l_data_size += l_size_comp * (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) * - (l_tilec->y1 - l_tilec->y0)); + l_data_size += l_size_comp * ((OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) * + (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0)); ++l_img_comp; ++l_tilec; } @@ -2262,7 +2263,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) opj_tccp_t * l_tccp = 00; opj_image_comp_t * l_img_comp = 00; opj_tcd_tile_t * l_tile; - OPJ_UINT32 l_nb_elem, i; + size_t l_nb_elem, i; OPJ_INT32 * l_current_ptr; l_tile = p_tcd->tcd_image->tiles; @@ -2272,8 +2273,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; compno++) { l_current_ptr = l_tile_comp->data; - l_nb_elem = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) * - (l_tile_comp->y1 - l_tile_comp->y0)); + l_nb_elem = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * + (size_t)(l_tile_comp->y1 - l_tile_comp->y0); if (l_tccp->qmfbid == 1) { for (i = 0; i < l_nb_elem; ++i) { @@ -2299,8 +2300,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) { opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps; - OPJ_UINT32 samples = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) * - (l_tile_comp->y1 - l_tile_comp->y0)); + size_t samples = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * + (size_t)(l_tile_comp->y1 - l_tile_comp->y0); OPJ_UINT32 i; OPJ_BYTE ** l_data = 00; opj_tcp_t * l_tcp = p_tcd->tcp; @@ -2482,13 +2483,15 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd, OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_BYTE * p_src, - OPJ_UINT32 p_src_length) + OPJ_SIZE_T p_src_length) { - OPJ_UINT32 i, j, l_data_size = 0; + OPJ_UINT32 i; + OPJ_SIZE_T j; + OPJ_SIZE_T l_data_size = 0; opj_image_comp_t * l_img_comp = 00; opj_tcd_tilecomp_t * l_tilec = 00; OPJ_UINT32 l_size_comp, l_remaining; - OPJ_UINT32 l_nb_elem; + OPJ_SIZE_T l_nb_elem; l_data_size = opj_tcd_get_encoded_tile_size(p_tcd); if (l_data_size != p_src_length) { @@ -2500,8 +2503,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, for (i = 0; i < p_tcd->image->numcomps; ++i) { l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/ l_remaining = l_img_comp->prec & 7; /* (%8) */ - l_nb_elem = (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) * (l_tilec->y1 - - l_tilec->y0)); + l_nb_elem = (size_t)(l_tilec->x1 - l_tilec->x0) * + (size_t)(l_tilec->y1 - l_tilec->y0); if (l_remaining) { ++l_size_comp; diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index 7c974c53f..63c22c457 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -409,7 +409,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, /** * */ -OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); +OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); /** * Initialize the tile coder and may reuse some meory. @@ -428,7 +428,7 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, */ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_BYTE * p_src, - OPJ_UINT32 p_src_length); + OPJ_SIZE_T p_src_length); /** * Allocates tile component data From 5d07d463fdb0a5eeffa90eba1566cc21697011b8 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:45 +0200 Subject: [PATCH 11/29] opj_j2k_decode_tiles(): apply whole single tile image decoding optimization to reading at reduced resolution as well --- src/lib/openjp2/j2k.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index aef0c1c02..f1a894a4b 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -10474,8 +10474,7 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, p_j2k->m_output_image->x0 == 0 && p_j2k->m_output_image->y0 == 0 && p_j2k->m_output_image->x1 == p_j2k->m_cp.tdx && - p_j2k->m_output_image->y1 == p_j2k->m_cp.tdy && - p_j2k->m_output_image->comps[0].factor == 0) { + p_j2k->m_output_image->y1 == p_j2k->m_cp.tdy) { OPJ_UINT32 i; if (! opj_j2k_read_tile_header(p_j2k, &l_current_tile_no, From 0ae3cba3404674bbe2028ea9a801301a4c951b33 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:48 +0200 Subject: [PATCH 12/29] Allow several repeated calls to opj_set_decode_area() and opj_decode() for single-tiled images * Only works for single-tiled images --> will error out cleanly, as currently in other cases * Save re-reading the codestream for the tile, and re-use code-blocks of the previous decoding pass. * Future improvements might involve improving opj_decompress, and the image writing logic, to use this strategy. --- src/lib/openjp2/j2k.c | 62 ++++++++---- src/lib/openjp2/openjpeg.h | 6 ++ src/lib/openjp2/t1.c | 42 +++++++- tests/CMakeLists.txt | 4 + tests/test_decode_area.c | 200 ++++++++++++++++++++++++++++++++----- 5 files changed, 268 insertions(+), 46 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index f1a894a4b..e548fefc7 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -9147,10 +9147,15 @@ OPJ_BOOL opj_j2k_set_decode_area(opj_j2k_t *p_j2k, OPJ_BOOL ret; OPJ_UINT32 it_comp; + if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 && + &p_j2k->m_cp.tcps[0].m_data != NULL) { + /* In the case of a single-tiled image whose codestream we have already */ + /* ingested, go on */ + } /* Check if we are read the main header */ - if (p_j2k->m_specific_param.m_decoder.m_state != J2K_STATE_TPHSOT) { + else if (p_j2k->m_specific_param.m_decoder.m_state != J2K_STATE_TPHSOT) { opj_event_msg(p_manager, EVT_ERROR, - "Need to decode the main header before begin to decode the remaining codestream"); + "Need to decode the main header before begin to decode the remaining codestream.\n"); return OPJ_FALSE; } @@ -10508,20 +10513,27 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, } for (;;) { - if (! opj_j2k_read_tile_header(p_j2k, - &l_current_tile_no, - NULL, - &l_tile_x0, &l_tile_y0, - &l_tile_x1, &l_tile_y1, - &l_nb_comps, - &l_go_on, - p_stream, - p_manager)) { - return OPJ_FALSE; - } + if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 && + p_j2k->m_cp.tcps[0].m_data != NULL) { + l_current_tile_no = 0; + p_j2k->m_current_tile_number = 0; + p_j2k->m_specific_param.m_decoder.m_state |= J2K_STATE_DATA; + } else { + if (! opj_j2k_read_tile_header(p_j2k, + &l_current_tile_no, + NULL, + &l_tile_x0, &l_tile_y0, + &l_tile_x1, &l_tile_y1, + &l_nb_comps, + &l_go_on, + p_stream, + p_manager)) { + return OPJ_FALSE; + } - if (! l_go_on) { - break; + if (! l_go_on) { + break; + } } if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, @@ -10538,7 +10550,16 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, p_j2k->m_output_image)) { return OPJ_FALSE; } - opj_j2k_tcp_data_destroy(&p_j2k->m_cp.tcps[l_current_tile_no]); + + if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 && + !(p_j2k->m_output_image->x0 == p_j2k->m_private_image->x0 && + p_j2k->m_output_image->y0 == p_j2k->m_private_image->y0 && + p_j2k->m_output_image->x1 == p_j2k->m_private_image->x1 && + p_j2k->m_output_image->y1 == p_j2k->m_private_image->y1)) { + /* Keep current tcp data */ + } else { + opj_j2k_tcp_data_destroy(&p_j2k->m_cp.tcps[l_current_tile_no]); + } opj_event_msg(p_manager, EVT_INFO, "Image data has been updated with tile %d.\n\n", l_current_tile_no + 1); @@ -10738,9 +10759,11 @@ OPJ_BOOL opj_j2k_decode(opj_j2k_t * p_j2k, } } - p_j2k->m_output_image = opj_image_create0(); - if (!(p_j2k->m_output_image)) { - return OPJ_FALSE; + if (p_j2k->m_output_image == NULL) { + p_j2k->m_output_image = opj_image_create0(); + if (!(p_j2k->m_output_image)) { + return OPJ_FALSE; + } } opj_copy_image_header(p_image, p_j2k->m_output_image); @@ -10760,6 +10783,7 @@ OPJ_BOOL opj_j2k_decode(opj_j2k_t * p_j2k, for (compno = 0; compno < p_image->numcomps; compno++) { p_image->comps[compno].resno_decoded = p_j2k->m_output_image->comps[compno].resno_decoded; + opj_image_data_free(p_image->comps[compno].data); p_image->comps[compno].data = p_j2k->m_output_image->comps[compno].data; #if 0 char fn[256]; diff --git a/src/lib/openjp2/openjpeg.h b/src/lib/openjp2/openjpeg.h index 21755b48d..7020d37d2 100644 --- a/src/lib/openjp2/openjpeg.h +++ b/src/lib/openjp2/openjpeg.h @@ -1340,6 +1340,12 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_read_header(opj_stream_t *p_stream, * that is to say at the highest resolution level, even if requesting the image at lower * resolution levels. * + * Generally opj_set_decode_area() should be followed by opj_decode(), and the + * codec cannot be re-used. + * In the particular case of an image made of a single tile, several sequences of + * calls to opoj_set_decode_area() and opj_decode() are allowed, and will bring + * performance improvements when reading an image by chunks. + * * @param p_codec the jpeg2000 codec. * @param p_image the decoded image previously setted by opj_read_header * @param p_start_x the left position of the rectangle to decode (in image coordinates). diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 44a2f2437..0277f8cc7 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1668,6 +1668,11 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } + /* Both can be non NULL if for example decoding a full tile and then */ + /* partially a tile. In which case partial decoding should be the */ + /* priority */ + assert((cblk->decoded_data != NULL) || (tilec->data != NULL)); + if (cblk->decoded_data) { if (tccp->qmfbid == 1) { for (j = 0; j < cblk_h; ++j) { @@ -1763,6 +1768,17 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, (OPJ_UINT32)precinct->y0, (OPJ_UINT32)precinct->x1, (OPJ_UINT32)precinct->y1)) { + for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { + opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; + if (cblk->decoded_data) { +#ifdef DEBUG_VERBOSE + printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", + cblk->x0, cblk->y0, resno, bandno); +#endif + opj_free(cblk->decoded_data); + cblk->decoded_data = NULL; + } + } continue; } @@ -1770,8 +1786,6 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; opj_t1_cblk_decode_processing_job_t* job; - assert(cblk->decoded_data == NULL); - if (!opj_tcd_is_subband_area_of_interest(tcd, tilec->compno, resno, @@ -1780,15 +1794,34 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, (OPJ_UINT32)cblk->y0, (OPJ_UINT32)cblk->x1, (OPJ_UINT32)cblk->y1)) { + if (cblk->decoded_data) { +#ifdef DEBUG_VERBOSE + printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", + cblk->x0, cblk->y0, resno, bandno); +#endif + opj_free(cblk->decoded_data); + cblk->decoded_data = NULL; + } continue; } if (!tcd->whole_tile_decoding) { OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); + if (cblk->decoded_data != NULL) { +#ifdef DEBUG_VERBOSE + printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n", + cblk->x0, cblk->y0, resno, bandno); +#endif + continue; + } if (cblk_w == 0 || cblk_h == 0) { continue; } +#ifdef DEBUG_VERBOSE + printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n", + cblk->x0, cblk->y0, resno, bandno); +#endif /* Zero-init required */ cblk->decoded_data = opj_calloc(1, cblk_w * cblk_h * sizeof(OPJ_INT32)); if (cblk->decoded_data == NULL) { @@ -1803,6 +1836,11 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, *pret = OPJ_FALSE; return; } + } else if (cblk->decoded_data) { + /* Not sure if that code path can happen, but better be */ + /* safe than sorry */ + opj_free(cblk->decoded_data); + cblk->decoded_data = NULL; } job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 579e066b8..95b752915 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -103,6 +103,10 @@ add_test(NAME tda_prep_irreversible_203_201_17_19_no_precinct COMMAND test_tile_ add_test(NAME tda_irreversible_203_201_17_19_no_precinct COMMAND test_decode_area -q irreversible_203_201_17_19_no_precinct.j2k) set_property(TEST tda_irreversible_203_201_17_19_no_precinct APPEND PROPERTY DEPENDS tda_prep_irreversible_203_201_17_19_no_precinct) +add_test(NAME tda_prep_strip COMMAND test_tile_encoder 1 256 256 256 256 8 0 tda_single_tile.j2k) +add_test(NAME tda_strip COMMAND test_decode_area -q -strip_height 3 -strip_check tda_single_tile.j2k) +set_property(TEST tda_strip APPEND PROPERTY DEPENDS tda_prep_strip) + add_executable(include_openjpeg include_openjpeg.c) # No image send to the dashboard if lib PNG is not available. diff --git a/tests/test_decode_area.c b/tests/test_decode_area.c index e773bc667..922299080 100644 --- a/tests/test_decode_area.c +++ b/tests/test_decode_area.c @@ -99,31 +99,13 @@ static void info_callback(const char *msg, void *client_data) /*fprintf(stdout, "[INFO] %s", msg);*/ } -opj_image_t* decode( - OPJ_BOOL quiet, - const char* input_file, - OPJ_INT32 x0, - OPJ_INT32 y0, - OPJ_INT32 x1, - OPJ_INT32 y1, - OPJ_UINT32* ptilew, - OPJ_UINT32* ptileh, - OPJ_UINT32* pcblkw, - OPJ_UINT32* pcblkh) +static opj_codec_t* create_codec_and_stream(const char* input_file, + opj_stream_t** pOutStream) { opj_dparameters_t l_param; opj_codec_t * l_codec = NULL; - opj_image_t * l_image = NULL; opj_stream_t * l_stream = NULL; - if (!quiet) { - if (x0 != 0 || x1 != 0 || y0 != 0 || y1 != 0) { - printf("Decoding %d,%d,%d,%d\n", x0, y0, x1, y1); - } else { - printf("Decoding full image\n"); - } - } - l_stream = opj_stream_create_default_file_stream(input_file, OPJ_TRUE); if (!l_stream) { fprintf(stderr, "ERROR -> failed to create the stream from the file\n"); @@ -168,6 +150,40 @@ opj_image_t* decode( return NULL; } + *pOutStream = l_stream; + return l_codec; +} + + +opj_image_t* decode( + OPJ_BOOL quiet, + const char* input_file, + OPJ_INT32 x0, + OPJ_INT32 y0, + OPJ_INT32 x1, + OPJ_INT32 y1, + OPJ_UINT32* ptilew, + OPJ_UINT32* ptileh, + OPJ_UINT32* pcblkw, + OPJ_UINT32* pcblkh) +{ + opj_codec_t * l_codec = NULL; + opj_image_t * l_image = NULL; + opj_stream_t * l_stream = NULL; + + if (!quiet) { + if (x0 != 0 || x1 != 0 || y0 != 0 || y1 != 0) { + printf("Decoding %d,%d,%d,%d\n", x0, y0, x1, y1); + } else { + printf("Decoding full image\n"); + } + } + + l_codec = create_codec_and_stream(input_file, &l_stream); + if (l_codec == NULL) { + return NULL; + } + /* Read the main header of the codestream and if necessary the JP2 boxes*/ if (! opj_read_header(l_stream, l_codec, &l_image)) { fprintf(stderr, "ERROR -> failed to read the header\n"); @@ -226,6 +242,122 @@ opj_image_t* decode( return l_image; } +int decode_by_strip(OPJ_BOOL quiet, + const char* input_file, + OPJ_UINT32 strip_height, + opj_image_t* full_image) +{ + /* OPJ_UINT32 tilew, tileh; */ + opj_codec_t * l_codec = NULL; + opj_image_t * l_image = NULL; + opj_stream_t * l_stream = NULL; + OPJ_UINT32 x0, y0, x1, y1, y; + + l_codec = create_codec_and_stream(input_file, &l_stream); + if (l_codec == NULL) { + return 1; + } + + /* Read the main header of the codestream and if necessary the JP2 boxes*/ + if (! opj_read_header(l_stream, l_codec, &l_image)) { + fprintf(stderr, "ERROR -> failed to read the header\n"); + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + return 1; + } + + x0 = l_image->x0; + y0 = l_image->y0; + x1 = l_image->x1; + y1 = l_image->y1; + for (y = y0; y < y1; y += strip_height) { + OPJ_UINT32 h_req = strip_height; + if (y + h_req > y1) { + h_req = y1 - y; + } + if (!quiet) { + printf("Decoding %u...%u\n", y, y + h_req); + } + if (!opj_set_decode_area(l_codec, l_image, (OPJ_INT32)x0, (OPJ_INT32)y, + (OPJ_INT32)x1, (OPJ_INT32)(y + h_req))) { + fprintf(stderr, "ERROR -> failed to set the decoded area\n"); + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + opj_image_destroy(l_image); + return 1; + } + + /* Get the decoded image */ + if (!(opj_decode(l_codec, l_stream, l_image))) { + fprintf(stderr, "ERROR -> failed to decode image!\n"); + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + opj_image_destroy(l_image); + return 1; + } + + if (full_image) { + OPJ_UINT32 y_check, x; + OPJ_UINT32 compno; + for (compno = 0; compno < l_image->numcomps; compno ++) { + for (y_check = 0; y_check < h_req; y_check++) { + for (x = x0; x < x1; x++) { + OPJ_INT32 sub_image_val = + l_image->comps[compno].data[y_check * (x1 - x0) + x]; + OPJ_INT32 image_val = + full_image->comps[compno].data[(y + y_check) * (x1 - x0) + x]; + if (sub_image_val != image_val) { + fprintf(stderr, + "Difference found at subimage pixel (%u,%u) " + "of compno=%u: got %d, expected %d\n", + x, y_check + y, compno, sub_image_val, image_val); + return 1; + } + } + } + } + } + + } + + /* If image is small enough, try a final whole image read */ + if (x1 - x0 < 10000 && y1 - y0 < 10000) { + if (!quiet) { + printf("Decoding full image\n"); + } + if (!opj_set_decode_area(l_codec, l_image, (OPJ_INT32)x0, (OPJ_INT32)y0, + (OPJ_INT32)x1, (OPJ_INT32)y1)) { + fprintf(stderr, "ERROR -> failed to set the decoded area\n"); + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + opj_image_destroy(l_image); + return 1; + } + + /* Get the decoded image */ + if (!(opj_decode(l_codec, l_stream, l_image))) { + fprintf(stderr, "ERROR -> failed to decode image!\n"); + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + opj_image_destroy(l_image); + return 1; + } + } + + if (! opj_end_decompress(l_codec, l_stream)) { + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + opj_image_destroy(l_image); + return 1; + } + + + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + opj_image_destroy(l_image); + return 0; +} + OPJ_BOOL check_consistency(opj_image_t* p_image, opj_image_t* p_sub_image) { OPJ_UINT32 compno; @@ -273,10 +405,13 @@ int main(int argc, char** argv) OPJ_UINT32 step_x, step_y; OPJ_BOOL quiet = OPJ_FALSE; OPJ_UINT32 nsteps = 100; + OPJ_UINT32 strip_height = 0; + OPJ_BOOL strip_check = OPJ_FALSE; if (argc < 2) { fprintf(stderr, - "Usage: test_decode_area [-q] [-steps n] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n"); + "Usage: test_decode_area [-q] [-steps n] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n" + "or : test_decode_area [-q] [-strip_height h] [-strip_check] input_file_jp2_or_jk2\n"); return 1; } @@ -288,6 +423,11 @@ int main(int argc, char** argv) } else if (strcmp(argv[iarg], "-steps") == 0 && iarg + 1 < argc) { nsteps = (OPJ_UINT32)atoi(argv[iarg + 1]); iarg ++; + } else if (strcmp(argv[iarg], "-strip_height") == 0 && iarg + 1 < argc) { + strip_height = (OPJ_UINT32)atoi(argv[iarg + 1]); + iarg ++; + } else if (strcmp(argv[iarg], "-strip_check") == 0) { + strip_check = OPJ_TRUE; } else if (input_file == NULL) { input_file = argv[iarg]; } else if (iarg + 3 < argc) { @@ -300,10 +440,20 @@ int main(int argc, char** argv) } } - l_image = decode(quiet, input_file, 0, 0, 0, 0, - &tilew, &tileh, &cblkw, &cblkh); - if (!l_image) { - return 1; + if (!strip_height || strip_check) { + l_image = decode(quiet, input_file, 0, 0, 0, 0, + &tilew, &tileh, &cblkw, &cblkh); + if (!l_image) { + return 1; + } + } + + if (strip_height) { + int ret = decode_by_strip(quiet, input_file, strip_height, l_image); + if (l_image) { + opj_image_destroy(l_image); + } + return ret; } if (da_x0 != 0 || da_x1 != 0 || da_y0 != 0 || da_y1 != 0) { From b2cc8f7f81242f967b65e76de043e5e31663d793 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:50 +0200 Subject: [PATCH 13/29] Optimize reading/write into sparse array --- src/lib/openjp2/sparse_array.c | 126 ++++++++++++++++++---------- src/lib/openjp2/sparse_array.h | 4 +- src/lib/openjp2/test_sparse_array.c | 26 +++++- 3 files changed, 111 insertions(+), 45 deletions(-) diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 3402dca2d..b0634f67e 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -91,7 +91,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa) } } -OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -102,7 +102,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, } static OPJ_BOOL opj_sparse_array_int32_read_or_write( - opj_sparse_array_int32_t* sa, + const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -115,6 +115,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( { OPJ_UINT32 y, block_y; OPJ_UINT32 y_incr = 0; + const OPJ_UINT32 block_width = sa->block_width; + if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) { return forgiving; } @@ -128,43 +130,64 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( sa->block_height; block_y_offset = sa->block_height - y_incr; y_incr = opj_uint_min(y_incr, y1 - y); - block_x = x0 / sa->block_width; + block_x = x0 / block_width; for (x = x0; x < x1; block_x ++, x += x_incr) { OPJ_UINT32 j; OPJ_UINT32 block_x_offset; OPJ_INT32* src_block; - x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width; - block_x_offset = sa->block_width - x_incr; + x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width; + block_x_offset = block_width - x_incr; x_incr = opj_uint_min(x_incr, x1 - x); src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; if (is_read_op) { if (src_block == NULL) { - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - 0, - sizeof(OPJ_INT32) * x_incr); - } else { + if (buf_col_stride == 1) { + OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + } + } else { + OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) - = 0; + dest_ptr[k * buf_col_stride] = 0; } + dest_ptr += buf_line_stride; } } } else { - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset, - sizeof(OPJ_INT32) * x_incr); + const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + if (buf_col_stride == 1) { + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } + } else { + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + if (x_incr == 1) { + for (j = 0; j < y_incr; j++) { + *dest_ptr = *src_ptr; + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } else { - OPJ_UINT32 k; - for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) - = - *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + - k); + /* General case */ + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } + dest_ptr += buf_line_stride; + src_ptr += block_width; } } } @@ -179,18 +202,36 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; } - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width + - block_x_offset, - buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - sizeof(OPJ_INT32) * x_incr); + if (buf_col_stride == 1) { + OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * + (size_t)buf_line_stride + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } + } else { + OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * + (size_t)buf_line_stride + (x - x0) * buf_col_stride; + if (x_incr == 1) { + for (j = 0; j < y_incr; j++) { + *dest_ptr = *src_ptr; + src_ptr += buf_line_stride; + dest_ptr += block_width; + } } else { - OPJ_UINT32 k; - for (k = 0; k < x_incr; k++) { - *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + - k) = - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride); + /* General case */ + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + } + src_ptr += buf_line_stride; + dest_ptr += block_width; } } } @@ -201,7 +242,7 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( return OPJ_TRUE; } -OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -211,12 +252,13 @@ OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, OPJ_UINT32 dest_line_stride, OPJ_BOOL forgiving) { - return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, - dest, - dest_col_stride, - dest_line_stride, - forgiving, - OPJ_TRUE); + return opj_sparse_array_int32_read_or_write( + (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1, + dest, + dest_col_stride, + dest_line_stride, + forgiving, + OPJ_TRUE); } OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h index 485cafeae..130fe13ef 100644 --- a/src/lib/openjp2/sparse_array.h +++ b/src/lib/openjp2/sparse_array.h @@ -77,7 +77,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa); * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0. * @return OPJ_TRUE or OPJ_FALSE. */ -OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -99,7 +99,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. * @return OPJ_TRUE in case of success. */ -OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c index 0b49110fb..8e1364515 100644 --- a/src/lib/openjp2/test_sparse_array.c +++ b/src/lib/openjp2/test_sparse_array.c @@ -92,6 +92,7 @@ int main() ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, OPJ_FALSE); assert(ret); + buffer[0] = 2; ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, OPJ_FALSE); @@ -105,6 +106,29 @@ int main() assert(buffer[0] == 2); assert(buffer[1] == 0xFF); + buffer[0] = 0xFF; + buffer[1] = 0xFF; + buffer[2] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 2, buffer, 0, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 2); + assert(buffer[1] == 0); + assert(buffer[2] == 0xFF); + + buffer[0] = 3; + ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 0, 1, + OPJ_FALSE); + assert(ret); + + buffer[0] = 0; + buffer[1] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 3); + assert(buffer[1] == 0xFF); + w = 15 + 1; h = 17 + 1; memset(buffer, 0xFF, sizeof(buffer)); @@ -114,7 +138,7 @@ int main() for (j = 0; j < h; j++) { for (i = 0; i < w; i++) { if (i == 4 - 2 && j == 5 - 1) { - assert(buffer[ j * w + i ] == 2); + assert(buffer[ j * w + i ] == 3); } else { assert(buffer[ j * w + i ] == 0); } From 1644665a9105e2d761e8b29f43af602560e6bcf9 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:52 +0200 Subject: [PATCH 14/29] opj_j2k_update_image_data(): avoid zero-ing the buffer if not needed --- src/lib/openjp2/j2k.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index e548fefc7..8fb56e37d 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -8926,6 +8926,7 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_INT32 res_x0, res_x1, res_y0, res_y1; OPJ_UINT32 src_data_stride; const OPJ_INT32* p_src_data; + OPJ_BOOL check_if_must_memset = OPJ_FALSE; /* Allocate output component buffer if necessary */ if (!l_img_comp_dest->data) { @@ -8942,8 +8943,8 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, if (! l_img_comp_dest->data) { return OPJ_FALSE; } - /* Do we really need this memset ? */ - memset(l_img_comp_dest->data, 0, l_width * l_height * sizeof(OPJ_INT32)); + + check_if_must_memset = OPJ_TRUE; } /* Copy info from decoded comp image to output image */ @@ -9061,6 +9062,12 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, } /*-----*/ + if (check_if_must_memset && (l_img_comp_dest->w != l_width_dest || + l_img_comp_dest->h != l_height_dest)) { + memset(l_img_comp_dest->data, 0, + (OPJ_SIZE_T)l_img_comp_dest->w * l_img_comp_dest->h * sizeof(OPJ_INT32)); + } + /* Compute the input buffer offset */ l_start_offset_src = (OPJ_SIZE_T)l_offset_x0_src + (OPJ_SIZE_T)l_offset_y0_src * (OPJ_SIZE_T)src_data_stride; From 82a43d8035ca5f43e198e15faef6c9ac44b39541 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:54 +0200 Subject: [PATCH 15/29] Optimize opj_dwt_decode_partial_1() when cas == 0 --- src/lib/openjp2/dwt.c | 45 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index d0cd3b4e8..153bfa40b 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1584,12 +1584,55 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, if (!cas) { if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = win_l_x0; i < win_l_x1; i++) { + + /* Naive version is : + for (i = win_l_x0; i < i_max; i++) { OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; } for (i = win_h_x0; i < win_h_x1; i++) { OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; } + but the compiler doesn't manage to unroll it to avoid bound + checking in OPJ_S_ and OPJ_D_ macros + */ + + i = win_l_x0; + if (i < win_l_x1) { + OPJ_INT32 i_max; + + /* Left-most case */ + OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; + i ++; + + i_max = win_l_x1; + if (i_max > dn) { + i_max = dn; + } + for (; i < i_max; i++) { + /* No bound checking */ + OPJ_S(i) -= (OPJ_D(i - 1) + OPJ_D(i) + 2) >> 2; + } + for (; i < win_l_x1; i++) { + /* Right-most case */ + OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; + } + } + + i = win_h_x0; + if (i < win_h_x1) { + OPJ_INT32 i_max = win_h_x1; + if (i_max >= sn) { + i_max = sn - 1; + } + for (; i < i_max; i++) { + /* No bound checking */ + OPJ_D(i) += (OPJ_S(i) + OPJ_S(i + 1)) >> 1; + } + for (; i < win_h_x1; i++) { + /* Right-most case */ + OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; + } + } } } else { if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ From 18635df518602265b71da3c70c4d3b774bc3dfc2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:56 +0200 Subject: [PATCH 16/29] test_decode_area: accept user bounds in -strip_height mode --- tests/test_decode_area.c | 45 ++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/tests/test_decode_area.c b/tests/test_decode_area.c index 922299080..59da86aeb 100644 --- a/tests/test_decode_area.c +++ b/tests/test_decode_area.c @@ -245,6 +245,10 @@ opj_image_t* decode( int decode_by_strip(OPJ_BOOL quiet, const char* input_file, OPJ_UINT32 strip_height, + OPJ_INT32 da_x0, + OPJ_INT32 da_y0, + OPJ_INT32 da_x1, + OPJ_INT32 da_y1, opj_image_t* full_image) { /* OPJ_UINT32 tilew, tileh; */ @@ -252,6 +256,7 @@ int decode_by_strip(OPJ_BOOL quiet, opj_image_t * l_image = NULL; opj_stream_t * l_stream = NULL; OPJ_UINT32 x0, y0, x1, y1, y; + OPJ_UINT32 full_x0, full_y0, full_x1, full_y1; l_codec = create_codec_and_stream(input_file, &l_stream); if (l_codec == NULL) { @@ -266,10 +271,22 @@ int decode_by_strip(OPJ_BOOL quiet, return 1; } - x0 = l_image->x0; - y0 = l_image->y0; - x1 = l_image->x1; - y1 = l_image->y1; + full_x0 = l_image->x0; + full_y0 = l_image->y0; + full_x1 = l_image->x1; + full_y1 = l_image->y1; + + if (da_x0 != 0 || da_y0 != 0 || da_x1 != 0 || da_y1 != 0) { + x0 = (OPJ_UINT32)da_x0; + y0 = (OPJ_UINT32)da_y0; + x1 = (OPJ_UINT32)da_x1; + y1 = (OPJ_UINT32)da_y1; + } else { + x0 = l_image->x0; + y0 = l_image->y0; + x1 = l_image->x1; + y1 = l_image->y1; + } for (y = y0; y < y1; y += strip_height) { OPJ_UINT32 h_req = strip_height; if (y + h_req > y1) { @@ -303,9 +320,9 @@ int decode_by_strip(OPJ_BOOL quiet, for (y_check = 0; y_check < h_req; y_check++) { for (x = x0; x < x1; x++) { OPJ_INT32 sub_image_val = - l_image->comps[compno].data[y_check * (x1 - x0) + x]; + l_image->comps[compno].data[y_check * (x1 - x0) + (x - x0)]; OPJ_INT32 image_val = - full_image->comps[compno].data[(y + y_check) * (x1 - x0) + x]; + full_image->comps[compno].data[(y + y_check) * (x1 - x0) + (x - x0)]; if (sub_image_val != image_val) { fprintf(stderr, "Difference found at subimage pixel (%u,%u) " @@ -321,12 +338,13 @@ int decode_by_strip(OPJ_BOOL quiet, } /* If image is small enough, try a final whole image read */ - if (x1 - x0 < 10000 && y1 - y0 < 10000) { + if (full_x1 - full_x0 < 10000 && full_y1 - full_y0 < 10000) { if (!quiet) { printf("Decoding full image\n"); } - if (!opj_set_decode_area(l_codec, l_image, (OPJ_INT32)x0, (OPJ_INT32)y0, - (OPJ_INT32)x1, (OPJ_INT32)y1)) { + if (!opj_set_decode_area(l_codec, l_image, + (OPJ_INT32)full_x0, (OPJ_INT32)full_y0, + (OPJ_INT32)full_x1, (OPJ_INT32)full_y1)) { fprintf(stderr, "ERROR -> failed to set the decoded area\n"); opj_stream_destroy(l_stream); opj_destroy_codec(l_codec); @@ -411,7 +429,7 @@ int main(int argc, char** argv) if (argc < 2) { fprintf(stderr, "Usage: test_decode_area [-q] [-steps n] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n" - "or : test_decode_area [-q] [-strip_height h] [-strip_check] input_file_jp2_or_jk2\n"); + "or : test_decode_area [-q] [-strip_height h] [-strip_check] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n"); return 1; } @@ -435,6 +453,10 @@ int main(int argc, char** argv) da_y0 = atoi(argv[iarg + 1]); da_x1 = atoi(argv[iarg + 2]); da_y1 = atoi(argv[iarg + 3]); + if (da_x0 < 0 || da_y0 < 0 || da_x1 < 0 || da_y1 < 0) { + fprintf(stderr, "Wrong bounds\n"); + return 1; + } iarg += 3; } } @@ -449,7 +471,8 @@ int main(int argc, char** argv) } if (strip_height) { - int ret = decode_by_strip(quiet, input_file, strip_height, l_image); + int ret = decode_by_strip(quiet, input_file, strip_height, da_x0, da_y0, da_x1, + da_y1, l_image); if (l_image) { opj_image_destroy(l_image); } From ccac773556070ede24ea3dfbdec47c2b3c5be5c4 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:58 +0200 Subject: [PATCH 17/29] Tiny perf improvement in T1 stage for subtile decoding --- src/lib/openjp2/t1.c | 37 ++++++++++++------------------------- src/lib/openjp2/tcd.c | 4 ++-- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 0277f8cc7..e3062adfa 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1674,30 +1674,16 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) assert((cblk->decoded_data != NULL) || (tilec->data != NULL)); if (cblk->decoded_data) { + OPJ_UINT32 cblk_size = cblk_w * cblk_h; if (tccp->qmfbid == 1) { - for (j = 0; j < cblk_h; ++j) { - i = 0; - for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { - OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U]; - OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; - OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; - OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; - datap[(j * cblk_w) + i + 0U] = tmp0 / 2; - datap[(j * cblk_w) + i + 1U] = tmp1 / 2; - datap[(j * cblk_w) + i + 2U] = tmp2 / 2; - datap[(j * cblk_w) + i + 3U] = tmp3 / 2; - } - for (; i < cblk_w; ++i) { - datap[(j * cblk_w) + i] /= 2; - } + for (i = 0; i < cblk_size; ++i) { + datap[i] /= 2; } } else { /* if (tccp->qmfbid == 0) */ - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; - memcpy(datap, &tmp, sizeof(tmp)); - datap++; - } + for (i = 0; i < cblk_size; ++i) { + OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; + memcpy(datap, &tmp, sizeof(tmp)); + datap++; } } } else if (tccp->qmfbid == 1) { @@ -1775,7 +1761,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", cblk->x0, cblk->y0, resno, bandno); #endif - opj_free(cblk->decoded_data); + opj_aligned_free(cblk->decoded_data); cblk->decoded_data = NULL; } } @@ -1799,7 +1785,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", cblk->x0, cblk->y0, resno, bandno); #endif - opj_free(cblk->decoded_data); + opj_aligned_free(cblk->decoded_data); cblk->decoded_data = NULL; } continue; @@ -1823,7 +1809,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, cblk->x0, cblk->y0, resno, bandno); #endif /* Zero-init required */ - cblk->decoded_data = opj_calloc(1, cblk_w * cblk_h * sizeof(OPJ_INT32)); + cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32)); if (cblk->decoded_data == NULL) { if (p_manager_mutex) { opj_mutex_lock(p_manager_mutex); @@ -1836,10 +1822,11 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, *pret = OPJ_FALSE; return; } + memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32)); } else if (cblk->decoded_data) { /* Not sure if that code path can happen, but better be */ /* safe than sorry */ - opj_free(cblk->decoded_data); + opj_aligned_free(cblk->decoded_data); cblk->decoded_data = NULL; } diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 485eb9e2f..3bcab9810 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1262,7 +1262,7 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc; OPJ_UINT32 i; - opj_free(p_code_block->decoded_data); + opj_aligned_free(p_code_block->decoded_data); p_code_block->decoded_data = 00; memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t)); @@ -2177,7 +2177,7 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct) l_code_block->chunks = 00; } - opj_free(l_code_block->decoded_data); + opj_aligned_free(l_code_block->decoded_data); l_code_block->decoded_data = NULL; ++l_code_block; From 873004c615b1bed3ce780e869288602af86fdee5 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:00 +0200 Subject: [PATCH 18/29] Sub-tile decoding: speed up vertical pass in IDWT5x3 by processing 4 cols at a time --- src/lib/openjp2/dwt.c | 143 +++++++++++++++++++++++++++++---- src/lib/openjp2/sparse_array.c | 47 +++++++++-- 2 files changed, 165 insertions(+), 25 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 153bfa40b..ae1cbd50f 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1551,6 +1551,7 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, OPJ_INT32 cas, opj_sparse_array_int32_t* sa, OPJ_UINT32 sa_col, + OPJ_UINT32 nb_cols, OPJ_UINT32 sn, OPJ_UINT32 win_l_y0, OPJ_UINT32 win_l_y1, @@ -1560,15 +1561,15 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, OPJ_BOOL ret; ret = opj_sparse_array_int32_read(sa, sa_col, win_l_y0, - sa_col + 1, win_l_y1, - dest + cas + 2 * win_l_y0, - 0, 2, OPJ_TRUE); + sa_col + nb_cols, win_l_y1, + dest + cas * 4 + 2 * 4 * win_l_y0, + 1, 2 * 4, OPJ_TRUE); assert(ret); ret = opj_sparse_array_int32_read(sa, sa_col, sn + win_h_y0, - sa_col + 1, sn + win_h_y1, - dest + 1 - cas + 2 * win_h_y0, - 0, 2, OPJ_TRUE); + sa_col + nb_cols, sn + win_h_y1, + dest + (1 - cas) * 4 + 2 * 4 * win_h_y0, + 1, 2 * 4, OPJ_TRUE); assert(ret); OPJ_UNUSED(ret); } @@ -1648,6 +1649,109 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, } } +#define OPJ_S_off(i,off) a[(OPJ_UINT32)(i)*2*4+off] +#define OPJ_D_off(i,off) a[(1+(OPJ_UINT32)(i)*2)*4+off] +#define OPJ_S__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=sn?OPJ_S_off(sn-1,off):OPJ_S_off(i,off))) +#define OPJ_D__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=dn?OPJ_D_off(dn-1,off):OPJ_D_off(i,off))) +#define OPJ_SS__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=dn?OPJ_S_off(dn-1,off):OPJ_S_off(i,off))) +#define OPJ_DD__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=sn?OPJ_D_off(sn-1,off):OPJ_D_off(i,off))) + +static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a, + OPJ_UINT32 nb_cols, + OPJ_INT32 dn, OPJ_INT32 sn, + OPJ_INT32 cas, + OPJ_INT32 win_l_x0, + OPJ_INT32 win_l_x1, + OPJ_INT32 win_h_x0, + OPJ_INT32 win_h_x1) +{ + OPJ_INT32 i; + OPJ_UINT32 off; + + (void)nb_cols; + + if (!cas) { + if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ + + /* Naive version is : + for (i = win_l_x0; i < i_max; i++) { + OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; + } + for (i = win_h_x0; i < win_h_x1; i++) { + OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; + } + but the compiler doesn't manage to unroll it to avoid bound + checking in OPJ_S_ and OPJ_D_ macros + */ + + i = win_l_x0; + if (i < win_l_x1) { + OPJ_INT32 i_max; + + /* Left-most case */ + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2; + } + i ++; + + i_max = win_l_x1; + if (i_max > dn) { + i_max = dn; + } + for (; i < i_max; i++) { + /* No bound checking */ + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) -= (OPJ_D_off(i - 1, off) + OPJ_D_off(i, off) + 2) >> 2; + } + } + for (; i < win_l_x1; i++) { + /* Right-most case */ + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2; + } + } + } + + i = win_h_x0; + if (i < win_h_x1) { + OPJ_INT32 i_max = win_h_x1; + if (i_max >= sn) { + i_max = sn - 1; + } + for (; i < i_max; i++) { + /* No bound checking */ + for (off = 0; off < 4; off++) { + OPJ_D_off(i, off) += (OPJ_S_off(i, off) + OPJ_S_off(i + 1, off)) >> 1; + } + } + for (; i < win_h_x1; i++) { + /* Right-most case */ + for (off = 0; off < 4; off++) { + OPJ_D_off(i, off) += (OPJ_S__off(i, off) + OPJ_S__off(i + 1, off)) >> 1; + } + } + } + } + } else { + if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ + for (off = 0; off < 4; off++) { + OPJ_S_off(0, off) /= 2; + } + } else { + for (i = win_l_x0; i < win_l_x1; i++) { + for (off = 0; off < 4; off++) { + OPJ_D_off(i, off) -= (OPJ_SS__off(i, off) + OPJ_SS__off(i + 1, off) + 2) >> 2; + } + } + for (i = win_h_x0; i < win_h_x1; i++) { + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) += (OPJ_DD__off(i, off) + OPJ_DD__off(i - 1, off)) >> 1; + } + } + } + } +} + static void opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 resno, OPJ_UINT32 bandno, @@ -1804,13 +1908,14 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( } h_mem_size = opj_dwt_max_resolution(tr, numres); /* overflow check */ - if (h_mem_size > (SIZE_MAX / sizeof(OPJ_INT32))) { + /* in vertical pass, we process 4 columns at a time */ + if (h_mem_size > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); return OPJ_FALSE; } - h_mem_size *= sizeof(OPJ_INT32); + h_mem_size *= 4 * sizeof(OPJ_INT32); h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size); if (! h.mem) { /* FIXME event manager error callback */ @@ -1946,31 +2051,35 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( } } - for (i = win_tr_x0; i < win_tr_x1; ++i) { + for (i = win_tr_x0; i < win_tr_x1;) { + OPJ_UINT32 nb_cols = opj_uint_min(4U, win_tr_x1 - i); opj_dwt_interleave_partial_v(v.mem, v.cas, sa, i, + nb_cols, (OPJ_UINT32)v.sn, win_ll_y0, win_ll_y1, win_lh_y0, win_lh_y1); - opj_dwt_decode_partial_1(v.mem, v.dn, v.sn, v.cas, - (OPJ_INT32)win_ll_y0, - (OPJ_INT32)win_ll_y1, - (OPJ_INT32)win_lh_y0, - (OPJ_INT32)win_lh_y1); + opj_dwt_decode_partial_1_parallel(v.mem, nb_cols, v.dn, v.sn, v.cas, + (OPJ_INT32)win_ll_y0, + (OPJ_INT32)win_ll_y1, + (OPJ_INT32)win_lh_y0, + (OPJ_INT32)win_lh_y1); if (!opj_sparse_array_int32_write(sa, i, win_tr_y0, - i + 1, win_tr_y1, - v.mem + win_tr_y0, - 0, 1, OPJ_TRUE)) { + i + nb_cols, win_tr_y1, + v.mem + 4 * win_tr_y0, + 1, 4, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.mem); return OPJ_FALSE; } + + i += nb_cols; } } opj_aligned_free(h.mem); diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index b0634f67e..48c4b23b0 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -165,10 +165,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (buf_col_stride == 1) { OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + (x - x0) * buf_col_stride; - for (j = 0; j < y_incr; j++) { - memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); - dest_ptr += buf_line_stride; - src_ptr += block_width; + if (x_incr == 4) { + // Same code as general branch, but the compiler + // can have an efficient memcpy() + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } + } else { + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + @@ -179,6 +189,17 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( dest_ptr += buf_line_stride; src_ptr += block_width; } + } else if (y_incr == 1 && buf_col_stride == 2) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; + dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; + dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; + } + for (; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { @@ -207,10 +228,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( (size_t)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * (size_t)buf_line_stride + (x - x0) * buf_col_stride; - for (j = 0; j < y_incr; j++) { - memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); - dest_ptr += block_width; - src_ptr += buf_line_stride; + if (x_incr == 4) { + // Same code as general branch, but the compiler + // can have an efficient memcpy() + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } + } else { + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * From 470f3ed416a8cc3618b2c04000ea7b4694043f34 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:02 +0200 Subject: [PATCH 19/29] opj_dwt_decode_partial_1_parallel(): add SSE2 optimization --- src/lib/openjp2/dwt.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index ae1cbd50f..cbbe50d0d 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1698,6 +1698,28 @@ static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a, if (i_max > dn) { i_max = dn; } + +#ifdef __SSE2__ + if (i + 1 < i_max) { + const __m128i two = _mm_set1_epi32(2); + __m128i Dm1 = _mm_load_si128((__m128i * const)(a + 4 + (i - 1) * 8)); + for (; i + 1 < i_max; i += 2) { + /* No bound checking */ + __m128i S = _mm_load_si128((__m128i * const)(a + i * 8)); + __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8)); + __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8)); + __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8)); + S = _mm_sub_epi32(S, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(Dm1, D), two), 2)); + S1 = _mm_sub_epi32(S1, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(D, D1), two), 2)); + _mm_store_si128((__m128i*)(a + i * 8), S); + _mm_store_si128((__m128i*)(a + (i + 1) * 8), S1); + Dm1 = D1; + } + } +#endif + for (; i < i_max; i++) { /* No bound checking */ for (off = 0; off < 4; off++) { @@ -1718,6 +1740,25 @@ static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a, if (i_max >= sn) { i_max = sn - 1; } + +#ifdef __SSE2__ + if (i + 1 < i_max) { + __m128i S = _mm_load_si128((__m128i * const)(a + i * 8)); + for (; i + 1 < i_max; i += 2) { + /* No bound checking */ + __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8)); + __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8)); + __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8)); + __m128i S2 = _mm_load_si128((__m128i * const)(a + (i + 2) * 8)); + D = _mm_add_epi32(D, _mm_srai_epi32(_mm_add_epi32(S, S1), 1)); + D1 = _mm_add_epi32(D1, _mm_srai_epi32(_mm_add_epi32(S1, S2), 1)); + _mm_store_si128((__m128i*)(a + 4 + i * 8), D); + _mm_store_si128((__m128i*)(a + 4 + (i + 1) * 8), D1); + S = S2; + } + } +#endif + for (; i < i_max; i++) { /* No bound checking */ for (off = 0; off < 4; off++) { From ae19001ba425a9b396a29fba3417dccc746bd2ef Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:04 +0200 Subject: [PATCH 20/29] opj_tcd_dc_level_shift_decode(): optimize lossy case --- src/lib/openjp2/opj_intmath.h | 22 ++++++++++++++++++++++ src/lib/openjp2/tcd.c | 12 +++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/lib/openjp2/opj_intmath.h b/src/lib/openjp2/opj_intmath.h index d7d85e52c..ad1359766 100644 --- a/src/lib/openjp2/opj_intmath.h +++ b/src/lib/openjp2/opj_intmath.h @@ -124,6 +124,28 @@ static INLINE OPJ_INT32 opj_int_clamp(OPJ_INT32 a, OPJ_INT32 min, } return a; } + +/** +Clamp an integer inside an interval +@return +
    +
  • Returns a if (min < a < max) +
  • Returns max if (a > max) +
  • Returns min if (a < min) +
+*/ +static INLINE OPJ_INT64 opj_int64_clamp(OPJ_INT64 a, OPJ_INT64 min, + OPJ_INT64 max) +{ + if (a < min) { + return min; + } + if (a > max) { + return max; + } + return a; +} + /** @return Get absolute value of integer */ diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 3bcab9810..26c9f2a48 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -2112,6 +2112,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd) if (l_tccp->qmfbid == 1) { for (j = 0; j < l_height; ++j) { for (i = 0; i < l_width; ++i) { + /* TODO: do addition on int64 ? */ *l_current_ptr = opj_int_clamp(*l_current_ptr + l_tccp->m_dc_level_shift, l_min, l_max); ++l_current_ptr; @@ -2122,13 +2123,14 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd) for (j = 0; j < l_height; ++j) { for (i = 0; i < l_width; ++i) { OPJ_FLOAT32 l_value = *((OPJ_FLOAT32 *) l_current_ptr); - OPJ_INT32 l_value_int = (OPJ_INT32)opj_lrintf(l_value); - if (l_value > INT_MAX || - (l_value_int > 0 && l_tccp->m_dc_level_shift > 0 && - l_value_int > INT_MAX - l_tccp->m_dc_level_shift)) { + if (l_value > INT_MAX) { *l_current_ptr = l_max; + } else if (l_value < INT_MIN) { + *l_current_ptr = l_min; } else { - *l_current_ptr = opj_int_clamp( + /* Do addition on int64 to avoid overflows */ + OPJ_INT64 l_value_int = (OPJ_INT64)opj_lrintf(l_value); + *l_current_ptr = (OPJ_INT32)opj_int64_clamp( l_value_int + l_tccp->m_dc_level_shift, l_min, l_max); } ++l_current_ptr; From 83b5a168ec0e89210671d60670c9a1143ce8776b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:06 +0200 Subject: [PATCH 21/29] opj_dwt_decode_partial_97(): simplify/more efficient use of sparse arrays in vertical pass --- src/lib/openjp2/dwt.c | 94 +++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index cbbe50d0d..185248181 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -2248,23 +2248,20 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 sa_col, OPJ_UINT32 nb_elts_read) { - OPJ_UINT32 i; - for (i = 0; i < nb_elts_read; i++) { - OPJ_BOOL ret; - ret = opj_sparse_array_int32_read(sa, - sa_col + i, dwt->win_l_x0, - sa_col + i + 1, dwt->win_l_x1, - (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, - 0, 8, OPJ_TRUE); - assert(ret); - ret = opj_sparse_array_int32_read(sa, - sa_col + i, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, - sa_col + i + 1, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, - (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, - 0, 8, OPJ_TRUE); - assert(ret); - OPJ_UNUSED(ret); - } + OPJ_BOOL ret; + ret = opj_sparse_array_int32_read(sa, + sa_col, dwt->win_l_x0, + sa_col + nb_elts_read, dwt->win_l_x1, + (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0), + 1, 8, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, + sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, + (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0), + 1, 8, OPJ_TRUE); + assert(ret); + OPJ_UNUSED(ret); } #ifdef __SSE__ @@ -2743,20 +2740,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, if ((j + 3 >= win_ll_y0 && j < win_ll_y1) || (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) { - OPJ_UINT32 k; opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j)); opj_v4dwt_decode(&h); - for (k = 0; k < 4; k++) { - if (!opj_sparse_array_int32_write(sa, - win_tr_x0, j + k, - win_tr_x1, j + k + 1, - (OPJ_INT32*)&h.wavelet[win_tr_x0].f[k], - 4, 0, OPJ_TRUE)) { - /* FIXME event manager error callback */ - opj_sparse_array_int32_free(sa); - opj_aligned_free(h.wavelet); - return OPJ_FALSE; - } + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j, + win_tr_x1, j + 4, + (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], + 4, 1, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; } } } @@ -2765,20 +2759,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, ((j + 3 >= win_ll_y0 && j < win_ll_y1) || (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn))) { - OPJ_UINT32 k; opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j); opj_v4dwt_decode(&h); - for (k = 0; k < rh - j; k++) { - if (!opj_sparse_array_int32_write(sa, - win_tr_x0, j + k, - win_tr_x1, j + k + 1, - (OPJ_INT32*)&h.wavelet[win_tr_x0].f[k], - 4, 0, OPJ_TRUE)) { - /* FIXME event manager error callback */ - opj_sparse_array_int32_free(sa); - opj_aligned_free(h.wavelet); - return OPJ_FALSE; - } + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j, + win_tr_x1, rh, + (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], + 4, 1, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; } } @@ -2788,22 +2779,19 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, v.win_h_x1 = win_lh_y1; for (j = win_tr_x0; j < win_tr_x1; j += 4) { OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j); - OPJ_UINT32 k; opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts); opj_v4dwt_decode(&v); - for (k = 0; k < nb_elts; k++) { - if (!opj_sparse_array_int32_write(sa, - j + k, win_tr_y0, - j + k + 1, win_tr_y1, - (OPJ_INT32*)&h.wavelet[win_tr_y0].f[k], - 0, 4, OPJ_TRUE)) { - /* FIXME event manager error callback */ - opj_sparse_array_int32_free(sa); - opj_aligned_free(h.wavelet); - return OPJ_FALSE; - } + if (!opj_sparse_array_int32_write(sa, + j, win_tr_y0, + j + nb_elts, win_tr_y1, + (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0], + 1, 4, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; } } } From 8a17be8945f6f8fcae3f9e5c7c4988e971d245ee Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:08 +0200 Subject: [PATCH 22/29] opj_v4dwt_decode_step2_sse(): loop unroll --- src/lib/openjp2/dwt.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 185248181..71597f810 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -2302,7 +2302,29 @@ static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, vw += start * 2; tmp1 = vw[-3]; } - for (i = start; i < imax; ++i) { + + i = start; + + /* 4x loop unrolling */ + for (; i + 3 < imax; i += 4) { + __m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; + tmp2 = vw[-1]; + tmp3 = vw[ 0]; + tmp4 = vw[ 1]; + tmp5 = vw[ 2]; + tmp6 = vw[ 3]; + tmp7 = vw[ 4]; + tmp8 = vw[ 5]; + tmp9 = vw[ 6]; + vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); + vw[ 1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c)); + vw[ 3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c)); + vw[ 5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c)); + tmp1 = tmp9; + vw += 8; + } + + for (; i < imax; ++i) { tmp2 = vw[-1]; tmp3 = vw[ 0]; vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); From 7017e67a01c378a7a1ee5e34dd544c793b5c23e4 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:10 +0200 Subject: [PATCH 23/29] sparse_array: optimizations for lossy case --- src/lib/openjp2/sparse_array.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 48c4b23b0..40459bdb9 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -200,6 +200,21 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( for (; k < x_incr; k++) { dest_ptr[k * buf_col_stride] = src_ptr[k]; } + } else if (x_incr >= 8 && buf_col_stride == 8) { + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; + dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; + dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; + } + for (; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { @@ -254,6 +269,21 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( src_ptr += buf_line_stride; dest_ptr += block_width; } + } else if (x_incr >= 8 && buf_col_stride == 8) { + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + dest_ptr[k + 1] = src_ptr[(k + 1) * buf_col_stride]; + dest_ptr[k + 2] = src_ptr[(k + 2) * buf_col_stride]; + dest_ptr[k + 3] = src_ptr[(k + 3) * buf_col_stride]; + } + for (; k < x_incr; k++) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + } + src_ptr += buf_line_stride; + dest_ptr += block_width; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { From 559d16e8f43a0cd090d217d7d111820989299b85 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:13 +0200 Subject: [PATCH 24/29] opj_t1_decode_cblk(): move some code to codeblock processor for (theoretical) better multi-threading in subtile decoding --- src/lib/openjp2/t1.c | 64 +++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index e3062adfa..54fb814a2 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1563,6 +1563,7 @@ void opj_t1_destroy(opj_t1_t *p_t1) } typedef struct { + OPJ_BOOL whole_tile_decoding; OPJ_UINT32 resno; opj_tcd_cblk_dec_t* cblk; opj_tcd_band_t* band; @@ -1596,8 +1597,37 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_UINT32 tile_w; job = (opj_t1_cblk_decode_processing_job_t*) user_data; - resno = job->resno; + cblk = job->cblk; + + if (!job->whole_tile_decoding) { + cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); + cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); + + cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32)); + if (cblk->decoded_data == NULL) { + if (job->p_manager_mutex) { + opj_mutex_lock(job->p_manager_mutex); + } + opj_event_msg(job->p_manager, EVT_ERROR, + "Cannot allocate cblk->decoded_data\n"); + if (job->p_manager_mutex) { + opj_mutex_unlock(job->p_manager_mutex); + } + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + /* Zero-init required */ + memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32)); + } else if (cblk->decoded_data) { + /* Not sure if that code path can happen, but better be */ + /* safe than sorry */ + opj_aligned_free(cblk->decoded_data); + cblk->decoded_data = NULL; + } + + resno = job->resno; band = job->band; tilec = job->tilec; tccp = job->tccp; @@ -1737,6 +1767,11 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, opj_thread_pool_t* tp = tcd->thread_pool; OPJ_UINT32 resno, bandno, precno, cblkno; +#ifdef DEBUG_VERBOSE + OPJ_UINT32 codeblocks_decoded = 0; + printf("Enter opj_t1_decode_cblks()\n"); +#endif + for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) { opj_tcd_resolution_t* res = &tilec->resolutions[resno]; @@ -1808,26 +1843,6 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n", cblk->x0, cblk->y0, resno, bandno); #endif - /* Zero-init required */ - cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32)); - if (cblk->decoded_data == NULL) { - if (p_manager_mutex) { - opj_mutex_lock(p_manager_mutex); - } - opj_event_msg(p_manager, EVT_ERROR, - "Cannot allocate cblk->decoded_data\n"); - if (p_manager_mutex) { - opj_mutex_unlock(p_manager_mutex); - } - *pret = OPJ_FALSE; - return; - } - memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32)); - } else if (cblk->decoded_data) { - /* Not sure if that code path can happen, but better be */ - /* safe than sorry */ - opj_aligned_free(cblk->decoded_data); - cblk->decoded_data = NULL; } job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, @@ -1836,6 +1851,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, *pret = OPJ_FALSE; return; } + job->whole_tile_decoding = tcd->whole_tile_decoding; job->resno = resno; job->cblk = cblk; job->band = band; @@ -1847,6 +1863,9 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, job->check_pterm = check_pterm; job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1; opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job); +#ifdef DEBUG_VERBOSE + codeblocks_decoded ++; +#endif if (!(*pret)) { return; } @@ -1855,6 +1874,9 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd, } /* bandno */ } /* resno */ +#ifdef DEBUG_VERBOSE + printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded); +#endif return; } From 2c365fe0ecc8c6597db491a953a91308b1d0d4b1 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 20:43:39 +0200 Subject: [PATCH 25/29] Replace error message 'Not enough memory for tile data' by 'Size of tile data exceeds system limits' (refs https://github.com/uclouvain/openjpeg/pull/730#issuecomment-326654188) --- src/lib/openjp2/tcd.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 26c9f2a48..0ec85891c 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -815,13 +815,13 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ if (h > 0 && w > SIZE_MAX / h) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_tile_data_size = w * h; if (SIZE_MAX / sizeof(OPJ_UINT32) < l_tile_data_size) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_tile_data_size = l_tile_data_size * sizeof(OPJ_UINT32); @@ -907,14 +907,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, /*fprintf(stderr, "\t\t\tres_pw=%d, res_ph=%d\n", l_res->pw, l_res->ph );*/ if ((l_res->pw != 0U) && ((((OPJ_UINT32) - 1) / l_res->pw) < l_res->ph)) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_nb_precincts = l_res->pw * l_res->ph; if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(opj_tcd_precinct_t)) < l_nb_precincts) { - opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_nb_precinct_size = l_nb_precincts * (OPJ_UINT32)sizeof(opj_tcd_precinct_t); @@ -1470,13 +1470,15 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ if (res_h > 0 && res_w > SIZE_MAX / res_h) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_data_size = res_w * res_h; if (SIZE_MAX / sizeof(OPJ_UINT32) < l_data_size) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_data_size *= sizeof(OPJ_UINT32); @@ -1484,7 +1486,8 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, tilec->data_size_needed = l_data_size; if (!opj_alloc_tile_component_data(tilec)) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } } @@ -1583,19 +1586,22 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, if (w > 0 && h > 0) { if (w > SIZE_MAX / h) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_data_size = w * h; if (l_data_size > SIZE_MAX / sizeof(OPJ_INT32)) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } l_data_size *= sizeof(OPJ_INT32); tilec->data_win = opj_aligned_malloc(l_data_size); if (tilec->data_win == NULL) { - opj_event_msg(p_manager, EVT_ERROR, "Not enough memory for tile data\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Size of tile data exceeds system limits\n"); return OPJ_FALSE; } } From 4c7effa6bc37beb2a8e2f29ecf5845cde93f6b88 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 21:17:26 +0200 Subject: [PATCH 26/29] opj_t1_clbl_decode_processor(): use SSE2 in subtile decoding code path, for irreversible --- src/lib/openjp2/t1.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 54fb814a2..0cc6f2505 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -38,7 +38,20 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define OPJ_SKIP_POISON #include "opj_includes.h" + +#ifdef __SSE__ +#include +#endif +#ifdef __SSE2__ +#include +#endif + +#if defined(__GNUC__) +#pragma GCC poison malloc calloc realloc free +#endif + #include "t1_luts.h" /** @defgroup T1 T1 - Implementation of the tier-1 coding */ @@ -1710,7 +1723,28 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) datap[i] /= 2; } } else { /* if (tccp->qmfbid == 0) */ - for (i = 0; i < cblk_size; ++i) { + i = 0; +#ifdef __SSE2__ + { + const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize); + for (; i < (cblk_size & ~15U); i += 16) { + __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 0))); + __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 4))); + __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 8))); + __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 12))); + _mm_store_ps((float*)(datap + 0), _mm_mul_ps(xmm0_data, xmm_stepsize)); + _mm_store_ps((float*)(datap + 4), _mm_mul_ps(xmm1_data, xmm_stepsize)); + _mm_store_ps((float*)(datap + 8), _mm_mul_ps(xmm2_data, xmm_stepsize)); + _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize)); + datap += 16; + } + } +#endif + for (; i < cblk_size; ++i) { OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; memcpy(datap, &tmp, sizeof(tmp)); datap++; From 676d4c807f0162877f9f32e4cdda2e242788d410 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 21:36:35 +0200 Subject: [PATCH 27/29] opj_j2k_update_image_data(): avoid allocating image buffer if we can just reuse the tile buffer one --- src/lib/openjp2/j2k.c | 70 ++++++++++++++++++++++++------------------- src/lib/openjp2/tcd.c | 8 ++--- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 8fb56e37d..8e4b39666 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -8922,30 +8922,11 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, l_img_comp_dest = p_output_image->comps; - for (i = 0; i < l_image_src->numcomps; i++) { + for (i = 0; i < l_image_src->numcomps; + i++, ++l_img_comp_dest, ++l_img_comp_src, ++l_tilec) { OPJ_INT32 res_x0, res_x1, res_y0, res_y1; OPJ_UINT32 src_data_stride; const OPJ_INT32* p_src_data; - OPJ_BOOL check_if_must_memset = OPJ_FALSE; - - /* Allocate output component buffer if necessary */ - if (!l_img_comp_dest->data) { - OPJ_SIZE_T l_width = l_img_comp_dest->w; - OPJ_SIZE_T l_height = l_img_comp_dest->h; - - if ((l_height == 0U) || (l_width > (SIZE_MAX / l_height)) || - l_width * l_height > SIZE_MAX / sizeof(OPJ_INT32)) { - /* would overflow */ - return OPJ_FALSE; - } - l_img_comp_dest->data = (OPJ_INT32*) opj_image_data_alloc(l_width * l_height * - sizeof(OPJ_INT32)); - if (! l_img_comp_dest->data) { - return OPJ_FALSE; - } - - check_if_must_memset = OPJ_TRUE; - } /* Copy info from decoded comp image to output image */ l_img_comp_dest->resno_decoded = l_img_comp_src->resno_decoded; @@ -9062,12 +9043,6 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, } /*-----*/ - if (check_if_must_memset && (l_img_comp_dest->w != l_width_dest || - l_img_comp_dest->h != l_height_dest)) { - memset(l_img_comp_dest->data, 0, - (OPJ_SIZE_T)l_img_comp_dest->w * l_img_comp_dest->h * sizeof(OPJ_INT32)); - } - /* Compute the input buffer offset */ l_start_offset_src = (OPJ_SIZE_T)l_offset_x0_src + (OPJ_SIZE_T)l_offset_y0_src * (OPJ_SIZE_T)src_data_stride; @@ -9076,6 +9051,43 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, l_start_offset_dest = (OPJ_SIZE_T)l_start_x_dest + (OPJ_SIZE_T)l_start_y_dest * (OPJ_SIZE_T)l_img_comp_dest->w; + /* Allocate output component buffer if necessary */ + if (l_img_comp_dest->data == NULL && + l_start_offset_src == 0 && l_start_offset_dest == 0 && + l_width_dest == l_img_comp_dest->w && + l_height_dest == l_img_comp_dest->h) { + /* If the final image matches the tile buffer, then borrow it */ + /* directly to save a copy */ + if (p_tcd->whole_tile_decoding) { + l_img_comp_dest->data = l_tilec->data; + l_tilec->data = NULL; + } else { + l_img_comp_dest->data = l_tilec->data_win; + l_tilec->data_win = NULL; + } + continue; + } else if (l_img_comp_dest->data == NULL) { + OPJ_SIZE_T l_width = l_img_comp_dest->w; + OPJ_SIZE_T l_height = l_img_comp_dest->h; + + if ((l_height == 0U) || (l_width > (SIZE_MAX / l_height)) || + l_width * l_height > SIZE_MAX / sizeof(OPJ_INT32)) { + /* would overflow */ + return OPJ_FALSE; + } + l_img_comp_dest->data = (OPJ_INT32*) opj_image_data_alloc(l_width * l_height * + sizeof(OPJ_INT32)); + if (! l_img_comp_dest->data) { + return OPJ_FALSE; + } + + if (l_img_comp_dest->w != l_width_dest || + l_img_comp_dest->h != l_height_dest) { + memset(l_img_comp_dest->data, 0, + (OPJ_SIZE_T)l_img_comp_dest->w * l_img_comp_dest->h * sizeof(OPJ_INT32)); + } + } + /* Move the output buffer to the first place where we will write*/ l_dest_ptr = l_img_comp_dest->data + l_start_offset_dest; @@ -9090,9 +9102,7 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, } } - ++l_img_comp_dest; - ++l_img_comp_src; - ++l_tilec; + } return OPJ_TRUE; diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 0ec85891c..bd5d49603 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -832,7 +832,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_data_size = l_tilec->numresolutions * (OPJ_UINT32)sizeof( opj_tcd_resolution_t); - opj_aligned_free(l_tilec->data_win); + opj_image_data_free(l_tilec->data_win); l_tilec->data_win = NULL; l_tilec->win_x0 = 0; l_tilec->win_y0 = 0; @@ -1581,7 +1581,7 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, size_t h = res->win_y1 - res->win_y0; size_t l_data_size; - opj_aligned_free(tilec->data_win); + opj_image_data_free(tilec->data_win); tilec->data_win = NULL; if (w > 0 && h > 0) { @@ -1598,7 +1598,7 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, } l_data_size *= sizeof(OPJ_INT32); - tilec->data_win = opj_aligned_malloc(l_data_size); + tilec->data_win = opj_image_data_alloc(l_data_size); if (tilec->data_win == NULL) { opj_event_msg(p_manager, EVT_ERROR, "Size of tile data exceeds system limits\n"); @@ -1833,7 +1833,7 @@ static void opj_tcd_free_tile(opj_tcd_t *p_tcd) l_tile_comp->data_size_needed = 0; } - opj_aligned_free(l_tile_comp->data_win); + opj_image_data_free(l_tile_comp->data_win); ++l_tile_comp; } From c1e0fba0c46cb528a08b89b986e86ff0f4792558 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 22:09:58 +0200 Subject: [PATCH 28/29] opj_v4dwt_decode_step1_sse(): rework a bit to improve code generation --- src/lib/openjp2/dwt.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 71597f810..719c73303 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -2274,14 +2274,19 @@ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, __m128* OPJ_RESTRICT vw = (__m128*) w; OPJ_UINT32 i; /* 4x unrolled loop */ - for (i = start; i + 3 < end; i += 4) { - vw[2 * i] = _mm_mul_ps(vw[2 * i], c); - vw[2 * i + 2] = _mm_mul_ps(vw[2 * i + 2], c); - vw[2 * i + 4] = _mm_mul_ps(vw[2 * i + 4], c); - vw[2 * i + 6] = _mm_mul_ps(vw[2 * i + 6], c); - } - for (; i < end; ++i) { - vw[2 * i] = _mm_mul_ps(vw[2 * i], c); + vw += 2 * start; + for (i = start; i + 3 < end; i += 4, vw += 8) { + __m128 xmm0 = _mm_mul_ps(vw[0], c); + __m128 xmm2 = _mm_mul_ps(vw[2], c); + __m128 xmm4 = _mm_mul_ps(vw[4], c); + __m128 xmm6 = _mm_mul_ps(vw[6], c); + vw[0] = xmm0; + vw[2] = xmm2; + vw[4] = xmm4; + vw[6] = xmm6; + } + for (; i < end; ++i, vw += 2) { + vw[0] = _mm_mul_ps(vw[0], c); } } From 579b8937eae7e6b6868b8b5c6286a742c10a5130 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 4 Sep 2017 17:35:52 +0200 Subject: [PATCH 29/29] Replace uses of size_t by OPJ_SIZE_T --- src/lib/openjp2/dwt.c | 113 +++++++++++++++++---------------- src/lib/openjp2/sparse_array.c | 20 +++--- src/lib/openjp2/t1.c | 24 +++---- src/lib/openjp2/tcd.c | 47 +++++++------- 4 files changed, 104 insertions(+), 100 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 719c73303..18270045f 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, l_src += 2; } /* b[i*x]=a[2*i+cas]; */ - l_dest = b + (size_t)sn * (size_t)x; + l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x; l_src = a + 1 - cas; i = dn; @@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) bi += 2; ai += x; } - ai = a + (v->sn * (size_t)x); + ai = a + (v->sn * (OPJ_SIZE_T)x); bi = v->mem + 1 - v->cas; i = v->dn ; while (i--) { @@ -616,7 +616,7 @@ static void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, const OPJ_INT32* tmp, OPJ_INT32 len, - size_t stride) + OPJ_SIZE_T stride) { OPJ_INT32 i; for (i = 0; i < len; ++i) { @@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, PARALLEL_COLS_53 * sizeof(OPJ_INT32)) would do but would be a tiny bit slower. We can take here advantage of our knowledge of alignment */ - STOREU(&tiledp_col[(size_t)i * stride + 0], + STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + 0], LOAD(&tmp[PARALLEL_COLS_53 * i + 0])); - STOREU(&tiledp_col[(size_t)i * stride + VREG_INT_COUNT], + STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + VREG_INT_COUNT], LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT])); } } @@ -639,13 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { const OPJ_INT32* in_even = &tiledp_col[0]; - const OPJ_INT32* in_odd = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_odd = &tiledp_col[(OPJ_SIZE_T)sn * stride]; OPJ_INT32 i; - size_t j; + OPJ_SIZE_T j; VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0; VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1; const VREG two = LOAD_CST(2); @@ -662,7 +662,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( /* Note: loads of input even/odd values must be done in a unaligned */ /* fashion. But stores in tmp can be done with aligned store, since */ /* the temporary buffer is properly aligned */ - assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); + assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); s1n_0 = LOADU(in_even + 0); s1n_1 = LOADU(in_even + VREG_INT_COUNT); @@ -704,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( if (len & 1) { VREG tmp_len_minus_1; - s1n_0 = LOADU(in_even + (size_t)((len - 1) / 2) * stride); + s1n_0 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1); @@ -712,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( STORE(tmp + PARALLEL_COLS_53 * (len - 2), ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1))); - s1n_1 = LOADU(in_even + (size_t)((len - 1) / 2) * stride + VREG_INT_COUNT); + s1n_1 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride + VREG_INT_COUNT); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT, @@ -740,16 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { OPJ_INT32 i; - size_t j; + OPJ_SIZE_T j; VREG s1_0, s2_0, dc_0, dn_0; VREG s1_1, s2_1, dc_1, dn_1; const VREG two = LOAD_CST(2); - const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -764,7 +764,7 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( /* Note: loads of input even/odd values must be done in a unaligned */ /* fashion. But stores in tmp can be done with aligned store, since */ /* the temporary buffer is properly aligned */ - assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); + assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); s1_0 = LOADU(in_even + stride); /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */ @@ -809,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( if (!(len & 1)) { /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */ - dn_0 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride), + dn_0 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride), SAR(ADD3(s1_0, s1_0, two), 2)); - dn_1 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride + VREG_INT_COUNT), + dn_1 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride + VREG_INT_COUNT), SAR(ADD3(s1_1, s1_1, two), 2)); /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */ @@ -851,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { OPJ_INT32 i, j; OPJ_INT32 d1c, d1n, s1n, s0c, s0n; @@ -862,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, /* accesses and explicit interleaving. */ s1n = tiledp_col[0]; - d1n = tiledp_col[(size_t)sn * stride]; + d1n = tiledp_col[(OPJ_SIZE_T)sn * stride]; s0n = s1n - ((d1n + 1) >> 1); for (i = 0, j = 0; i < (len - 3); i += 2, j++) { d1c = d1n; s0c = s0n; - s1n = tiledp_col[(size_t)(j + 1) * stride]; - d1n = tiledp_col[(size_t)(sn + j + 1) * stride]; + s1n = tiledp_col[(OPJ_SIZE_T)(j + 1) * stride]; + d1n = tiledp_col[(OPJ_SIZE_T)(sn + j + 1) * stride]; s0n = s1n - ((d1c + d1n + 2) >> 2); @@ -882,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, if (len & 1) { tmp[len - 1] = - tiledp_col[(size_t)((len - 1) / 2) * stride] - + tiledp_col[(OPJ_SIZE_T)((len - 1) / 2) * stride] - ((d1n + 1) >> 1); tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1); } else { @@ -890,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[(size_t)i * stride] = tmp[i]; + tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i]; } } @@ -901,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { OPJ_INT32 i, j; OPJ_INT32 s1, s2, dc, dn; - const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -918,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, tmp[0] = in_even[0] + dc; for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) { - s2 = in_even[(size_t)(j + 1) * stride]; + s2 = in_even[(OPJ_SIZE_T)(j + 1) * stride]; - dn = in_odd[(size_t)j * stride] - ((s1 + s2 + 2) >> 2); + dn = in_odd[(OPJ_SIZE_T)j * stride] - ((s1 + s2 + 2) >> 2); tmp[i ] = dc; tmp[i + 1] = s1 + ((dn + dc) >> 1); @@ -929,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } tmp[i] = dc; if (!(len & 1)) { - dn = in_odd[(size_t)(len / 2 - 1) * stride] - ((s1 + 1) >> 1); + dn = in_odd[(OPJ_SIZE_T)(len / 2 - 1) * stride] - ((s1 + 1) >> 1); tmp[len - 2] = s1 + ((dn + dc) >> 1); tmp[len - 1] = dn; } else { @@ -937,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[(size_t)i * stride] = tmp[i]; + tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i]; } } #endif /* !defined(STANDARD_SLOW_VERSION) */ @@ -948,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, /* Performs interleave, inverse wavelet transform and copy back to buffer */ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* tiledp_col, - size_t stride, + OPJ_SIZE_T stride, OPJ_INT32 nb_cols) { #ifdef STANDARD_SLOW_VERSION @@ -996,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* out = dwt->mem; for (c = 0; c < nb_cols; c++, tiledp_col++) { OPJ_INT32 i; - const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; out[1] = in_odd[0] - ((in_even[0] + 1) >> 1); out[0] = in_even[0] + out[1]; for (i = 0; i < len; ++i) { - tiledp_col[(size_t)i * stride] = out[i]; + tiledp_col[(OPJ_SIZE_T)i * stride] = out[i]; } } @@ -1113,7 +1113,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, OPJ_INT32 rw; /* width of the resolution level computed */ OPJ_INT32 rh; /* height of the resolution level computed */ - size_t l_data_size; + OPJ_SIZE_T l_data_size; opj_tcd_resolution_t * l_cur_res = 0; opj_tcd_resolution_t * l_last_res = 0; @@ -1343,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) job = (opj_dwd_decode_v_job_t*)user_data; for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; j += PARALLEL_COLS_53) { - opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, PARALLEL_COLS_53); } if (j < job->max_j) - opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, (OPJ_INT32)(job->max_j - j)); opj_aligned_free(job->v.mem); @@ -1374,7 +1374,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1 - tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); - size_t h_mem_size; + OPJ_SIZE_T h_mem_size; int num_threads; if (numres == 1U) { @@ -1415,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rh <= 1) { for (j = 0; j < rh; ++j) { - opj_idwt53_h(&h, &tiledp[(size_t)j * w]); + opj_idwt53_h(&h, &tiledp[(OPJ_SIZE_T)j * w]); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -1467,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rw <= 1) { for (j = 0; j + PARALLEL_COLS_53 <= rw; j += PARALLEL_COLS_53) { - opj_idwt53_v(&v, &tiledp[j], (size_t)w, PARALLEL_COLS_53); + opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, PARALLEL_COLS_53); } if (j < rw) { - opj_idwt53_v(&v, &tiledp[j], (size_t)w, (OPJ_INT32)(rw - j)); + opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, (OPJ_INT32)(rw - j)); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -1922,7 +1922,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - size_t h_mem_size; + OPJ_SIZE_T h_mem_size; /* Compute the intersection of the area of interest, expressed in tile coordinates */ /* with the tile coordinates */ @@ -2152,8 +2152,8 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 x1 = dwt->win_l_x1; for (k = 0; k < 2; ++k) { - if (remaining_height >= 4 && ((size_t) a & 0x0f) == 0 && - ((size_t) bi & 0x0f) == 0 && (width & 0x0f) == 0) { + if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 && + ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) { /* Fast code path */ for (i = x0; i < x1; ++i) { OPJ_UINT32 j = i; @@ -2230,16 +2230,16 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 i; for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { - memcpy(&bi[i * 2], &a[i * (size_t)width], - (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width], + (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32)); } - a += (OPJ_UINT32)dwt->sn * (size_t)width; + a += (OPJ_UINT32)dwt->sn * (OPJ_SIZE_T)width; bi = dwt->wavelet + 1 - dwt->cas; for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) { - memcpy(&bi[i * 2], &a[i * (size_t)width], - (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width], + (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32)); } } @@ -2497,7 +2497,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, 1].x1 - tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); - size_t l_data_size; + OPJ_SIZE_T l_data_size; l_data_size = opj_dwt_max_resolution(res, numres); /* overflow check */ @@ -2546,9 +2546,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { aj[k ] = h.wavelet[k].f[0]; - aj[k + (size_t)w ] = h.wavelet[k].f[1]; - aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; - aj[k + (size_t)w * 3] = h.wavelet[k].f[3]; + aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; } aj += w * 4; @@ -2561,10 +2561,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { switch (rh - j) { case 3: - aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; /* FALLTHRU */ case 2: - aj[k + (size_t)w ] = h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; /* FALLTHRU */ case 1: aj[k] = h.wavelet[k].f[0]; @@ -2587,7 +2587,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * (size_t)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); } aj += 4; } @@ -2601,7 +2601,8 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * (size_t)w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], + (OPJ_SIZE_T)j * sizeof(OPJ_FLOAT32)); } } } @@ -2631,7 +2632,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - size_t l_data_size; + OPJ_SIZE_T l_data_size; /* Compute the intersection of the area of interest, expressed in tile coordinates */ /* with the tile coordinates */ diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 40459bdb9..6a2d8d434 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -142,14 +142,14 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (is_read_op) { if (src_block == NULL) { if (buf_col_stride == 1) { - OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; for (j = 0; j < y_incr; j++) { memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr); dest_ptr += buf_line_stride; } } else { - OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; for (j = 0; j < y_incr; j++) { OPJ_UINT32 k; @@ -161,9 +161,10 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( } } else { const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset * - (size_t)block_width + block_x_offset; + (OPJ_SIZE_T)block_width + block_x_offset; if (buf_col_stride == 1) { - OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + + (x - x0) * buf_col_stride; if (x_incr == 4) { // Same code as general branch, but the compiler @@ -181,7 +182,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( } } } else { - OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + + (x - x0) * buf_col_stride; if (x_incr == 1) { for (j = 0; j < y_incr; j++) { @@ -240,9 +242,9 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (buf_col_stride == 1) { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * - (size_t)block_width + block_x_offset; + (OPJ_SIZE_T)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * - (size_t)buf_line_stride + (x - x0) * buf_col_stride; + (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; if (x_incr == 4) { // Same code as general branch, but the compiler // can have an efficient memcpy() @@ -260,9 +262,9 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * - (size_t)block_width + block_x_offset; + (OPJ_SIZE_T)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * - (size_t)buf_line_stride + (x - x0) * buf_col_stride; + (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; if (x_incr == 1) { for (j = 0; j < y_incr; j++) { *dest_ptr = *src_ptr; diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 0cc6f2505..a583e6920 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1452,7 +1452,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( if (!t1->encoder) { OPJ_UINT32 datasize = w * h; - if (datasize > (size_t)t1->datasize) { + if (datasize > t1->datasize) { opj_aligned_free(t1->data); t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32)); if (!t1->data) { @@ -1751,8 +1751,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } else if (tccp->qmfbid == 1) { - OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w + - (size_t)x]; + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { i = 0; for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { @@ -1760,19 +1760,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2; } for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2; } } } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y * - tile_w + (size_t)x]; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * + tile_w + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { @@ -2134,7 +2134,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_h; OPJ_UINT32 i, j, tileLineAdvance; - size_t tileIndex = 0; + OPJ_SIZE_T tileIndex = 0; OPJ_INT32 x = cblk->x0 - band->x0; OPJ_INT32 y = cblk->y0 - band->y0; @@ -2158,7 +2158,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, cblk_h = t1->h; tileLineAdvance = tile_w - cblk_w; - tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x]; + tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; t1->data = tiledp; t1->data_stride = tile_w; if (tccp->qmfbid == 1) { diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index bd5d49603..c652f8ba9 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -807,11 +807,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, } if (isEncoder) { - size_t l_tile_data_size; + OPJ_SIZE_T l_tile_data_size; /* compute l_data_size with overflow check */ - size_t w = (size_t)(l_tilec->x1 - l_tilec->x0); - size_t h = (size_t)(l_tilec->y1 - l_tilec->y0); + OPJ_SIZE_T w = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0); + OPJ_SIZE_T h = (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0); /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ if (h > 0 && w > SIZE_MAX / h) { @@ -1366,7 +1366,8 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, p_cstr_info->tile[p_tile_no].pdy[i] = (int)l_tccp->prch[i]; } p_cstr_info->tile[p_tile_no].packet = (opj_packet_info_t*) opj_calloc(( - size_t)p_cstr_info->numcomps * (size_t)p_cstr_info->numlayers * l_num_packs, + OPJ_SIZE_T)p_cstr_info->numcomps * (OPJ_SIZE_T)p_cstr_info->numlayers * + l_num_packs, sizeof(opj_packet_info_t)); if (!p_cstr_info->tile[p_tile_no].packet) { /* FIXME event manager error callback */ @@ -1462,11 +1463,11 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); opj_tcd_resolution_t *l_res = & (tilec->resolutions[tilec->minimum_num_resolutions - 1]); - size_t l_data_size; + OPJ_SIZE_T l_data_size; /* compute l_data_size with overflow check */ - size_t res_w = (size_t)(l_res->x1 - l_res->x0); - size_t res_h = (size_t)(l_res->y1 - l_res->y0); + OPJ_SIZE_T res_w = (OPJ_SIZE_T)(l_res->x1 - l_res->x0); + OPJ_SIZE_T res_h = (OPJ_SIZE_T)(l_res->y1 - l_res->y0); /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ if (res_h > 0 && res_w > SIZE_MAX / res_h) { @@ -1577,9 +1578,9 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]); opj_tcd_resolution_t *res = tilec->resolutions + image_comp->resno_decoded; - size_t w = res->win_x1 - res->win_x0; - size_t h = res->win_y1 - res->win_y0; - size_t l_data_size; + OPJ_SIZE_T w = res->win_x1 - res->win_x0; + OPJ_SIZE_T h = res->win_y1 - res->win_y0; + OPJ_SIZE_T l_data_size; opj_image_data_free(tilec->data_win); tilec->data_win = NULL; @@ -1980,17 +1981,17 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) p_tcd->image->comps[1].resno_decoded; opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions + p_tcd->image->comps[2].resno_decoded; - size_t l_res_samples = (size_t)(res_comp0->x1 - res_comp0->x0) * - (size_t)(res_comp0->y1 - res_comp0->y0); + OPJ_SIZE_T l_res_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) * + (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0); /* testcase 1336.pdf.asan.47.376 */ if (p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[1].resno_decoded || p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[2].resno_decoded || - (size_t)(res_comp1->x1 - res_comp1->x0) * - (size_t)(res_comp1->y1 - res_comp1->y0) != l_res_samples || - (size_t)(res_comp2->x1 - res_comp2->x0) * - (size_t)(res_comp2->y1 - res_comp2->y0) != l_res_samples) { + (OPJ_SIZE_T)(res_comp1->x1 - res_comp1->x0) * + (OPJ_SIZE_T)(res_comp1->y1 - res_comp1->y0) != l_res_samples || + (OPJ_SIZE_T)(res_comp2->x1 - res_comp2->x0) * + (OPJ_SIZE_T)(res_comp2->y1 - res_comp2->y0) != l_res_samples) { opj_event_msg(p_manager, EVT_ERROR, "Tiles don't all have the same dimension. Skip the MCT step.\n"); return OPJ_FALSE; @@ -2271,7 +2272,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) opj_tccp_t * l_tccp = 00; opj_image_comp_t * l_img_comp = 00; opj_tcd_tile_t * l_tile; - size_t l_nb_elem, i; + OPJ_SIZE_T l_nb_elem, i; OPJ_INT32 * l_current_ptr; l_tile = p_tcd->tcd_image->tiles; @@ -2281,8 +2282,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; compno++) { l_current_ptr = l_tile_comp->data; - l_nb_elem = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * - (size_t)(l_tile_comp->y1 - l_tile_comp->y0); + l_nb_elem = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) * + (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0); if (l_tccp->qmfbid == 1) { for (i = 0; i < l_nb_elem; ++i) { @@ -2308,8 +2309,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) { opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps; - size_t samples = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * - (size_t)(l_tile_comp->y1 - l_tile_comp->y0); + OPJ_SIZE_T samples = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) * + (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0); OPJ_UINT32 i; OPJ_BYTE ** l_data = 00; opj_tcp_t * l_tcp = p_tcd->tcp; @@ -2511,8 +2512,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, for (i = 0; i < p_tcd->image->numcomps; ++i) { l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/ l_remaining = l_img_comp->prec & 7; /* (%8) */ - l_nb_elem = (size_t)(l_tilec->x1 - l_tilec->x0) * - (size_t)(l_tilec->y1 - l_tilec->y0); + l_nb_elem = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) * + (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0); if (l_remaining) { ++l_size_comp;