diff --git a/lib/chunkio/CMakeLists.txt b/lib/chunkio/CMakeLists.txt index f49d01d9095..dbc7d4805b0 100644 --- a/lib/chunkio/CMakeLists.txt +++ b/lib/chunkio/CMakeLists.txt @@ -3,7 +3,7 @@ project(chunk-io C) set(CIO_VERSION_MAJOR 1) set(CIO_VERSION_MINOR 5) -set(CIO_VERSION_PATCH 1) +set(CIO_VERSION_PATCH 2) set(CIO_VERSION_STR "${CIO_VERSION_MAJOR}.${CIO_VERSION_MINOR}.${CIO_VERSION_PATCH}") # CFLAGS diff --git a/lib/chunkio/include/chunkio/cio_file.h b/lib/chunkio/include/chunkio/cio_file.h index 7d447492902..3bc6591710c 100644 --- a/lib/chunkio/include/chunkio/cio_file.h +++ b/lib/chunkio/include/chunkio/cio_file.h @@ -44,6 +44,7 @@ struct cio_file { HANDLE backing_file; HANDLE backing_mapping; #endif + int taint_flag; /* content modification flag */ /* cached addr */ char *st_content; crc_t crc_cur; /* crc: current value calculated */ diff --git a/lib/chunkio/include/chunkio/cio_file_st.h b/lib/chunkio/include/chunkio/cio_file_st.h index 4b1552b612a..231cb588aa2 100644 --- a/lib/chunkio/include/chunkio/cio_file_st.h +++ b/lib/chunkio/include/chunkio/cio_file_st.h @@ -112,17 +112,36 @@ static inline ssize_t cio_file_st_infer_content_len(char *map, size_t size) return content_length; } +/* Set content length */ +static inline void cio_file_st_set_content_len(char *map, uint32_t len) +{ + uint8_t *content_length_buffer; + + content_length_buffer = (uint8_t *) &map[CIO_FILE_CONTENT_LENGTH_OFFSET]; + + content_length_buffer[0] = (uint8_t) ((len & 0xFF000000) >> 24); + content_length_buffer[1] = (uint8_t) ((len & 0x00FF0000) >> 16); + content_length_buffer[2] = (uint8_t) ((len & 0x0000FF00) >> 8); + content_length_buffer[3] = (uint8_t) ((len & 0x000000FF) >> 0); +} + /* Get content length */ -static inline ssize_t cio_file_st_get_content_len(char *map, size_t size, - size_t page_size) +static inline ssize_t cio_file_st_get_content_len(char *map, + size_t size, + size_t page_size, + int tainted_data_flag) { uint8_t *content_length_buffer; + uint8_t *content_buffer; ssize_t content_length; + ssize_t content_offset; if (size < CIO_FILE_HEADER_MIN) { return -1; } + content_offset = CIO_FILE_CONTENT_OFFSET + 2 + cio_file_st_get_meta_len(map); + content_length_buffer = (uint8_t *) &map[CIO_FILE_CONTENT_LENGTH_OFFSET]; content_length = (ssize_t) (((uint32_t) content_length_buffer[0]) << 24) | @@ -134,37 +153,29 @@ static inline ssize_t cio_file_st_get_content_len(char *map, size_t size, * previous versions of chunkio that didn't include the content length * as part of the headers. * - * The reason why we need to ensure that the file size is larger than 4096 - * is that this is the minimal expected page size which is the unit used - * to initialize chunk files when they are created. - * - * In doing so, we effectively avoid returning bogus results when loading - * newly created, non trimmed files while at the same time retaining the - * capability of loading legacy files (that don't have a content size) - * that are larger than 4096 bytes. + * tainted_data_flag is used to differentiate non trimmed files being + * loaded from files whoses chunk data is grown over the threshold and + * shrinked by the filter stack. * - * The only caveat is that trimmed files + * Because even when the content size is set to zero the data is not + * zeroed out (nor is the file shrinked) we can compare the first + * byte of the content section against zero to ensure that it's a + * valid msgpack serialized payload. */ - if (content_length == 0 && - size > 0 && - size != page_size) { - content_length = cio_file_st_infer_content_len(map, size); - } - return content_length; -} + if (!tainted_data_flag && + content_length == 0 && + size > content_offset) { + content_buffer = (uint8_t *) &map[content_offset]; -/* Set content length */ -static inline void cio_file_st_set_content_len(char *map, uint32_t len) -{ - uint8_t *content_length_buffer; + if (content_buffer[0] != 0x00) { + content_length = cio_file_st_infer_content_len(map, size); - content_length_buffer = (uint8_t *) &map[CIO_FILE_CONTENT_LENGTH_OFFSET]; + cio_file_st_set_content_len(map, content_length); + } + } - content_length_buffer[0] = (uint8_t) ((len & 0xFF000000) >> 24); - content_length_buffer[1] = (uint8_t) ((len & 0x00FF0000) >> 16); - content_length_buffer[2] = (uint8_t) ((len & 0x0000FF00) >> 8); - content_length_buffer[3] = (uint8_t) ((len & 0x000000FF) >> 0); + return content_length; } #endif diff --git a/lib/chunkio/src/CMakeLists.txt b/lib/chunkio/src/CMakeLists.txt index 9e666381dbf..bb52273d494 100644 --- a/lib/chunkio/src/CMakeLists.txt +++ b/lib/chunkio/src/CMakeLists.txt @@ -42,7 +42,7 @@ endif() if (CIO_LIB_SHARED) add_library(chunkio-shared SHARED ${src}) - target_link_libraries(chunkio-static ${libs}) + target_link_libraries(chunkio-shared ${libs}) if(CIO_SANITIZE_ADDRESS) add_sanitizers(chunkio-shared) endif() diff --git a/lib/chunkio/src/cio_file.c b/lib/chunkio/src/cio_file.c index 019baa89048..56ac160a067 100644 --- a/lib/chunkio/src/cio_file.c +++ b/lib/chunkio/src/cio_file.c @@ -80,13 +80,15 @@ void cio_file_calculate_checksum(struct cio_file *cf, crc_t *out) content_length = cio_file_st_get_content_len(cf->map, cf->fs_size, - cf->page_size); + cf->page_size, + cf->taint_flag); if (content_length > 0) { len += content_length; } in_data = (unsigned char *) cf->map + CIO_FILE_CONTENT_OFFSET; + val = cio_crc32_update(cf->crc_cur, in_data, len); *out = val; } @@ -236,7 +238,8 @@ static int cio_file_format_check(struct cio_chunk *ch, /* Expected / logical file size verification */ content_length = cio_file_st_get_content_len(cf->map, cf->fs_size, - cf->page_size); + cf->page_size, + cf->taint_flag); if (content_length == -1) { cio_log_debug(ch->ctx, "[cio file] truncated header (%zu / %zu) %s", @@ -349,6 +352,8 @@ static int mmap_file(struct cio_ctx *ctx, struct cio_chunk *ch, size_t size) return CIO_OK; } + cf->taint_flag = CIO_FALSE; + /* * 'size' value represents the value of a previous fstat(2) set by a previous * caller. If the value is greater than zero, just use it, otherwise do a new @@ -420,7 +425,8 @@ static int mmap_file(struct cio_ctx *ctx, struct cio_chunk *ch, size_t size) if (fs_size > 0) { content_size = cio_file_st_get_content_len(cf->map, fs_size, - cf->page_size); + cf->page_size, + cf->taint_flag); if (content_size == -1) { cio_error_set(ch, CIO_ERR_BAD_FILE_SIZE); @@ -653,6 +659,7 @@ struct cio_file *cio_file_open(struct cio_ctx *ctx, cf->realloc_size = CIO_REALLOC_HINT_MIN; } + cf->taint_flag = CIO_FALSE; cf->st_content = NULL; cf->crc_cur = cio_crc32_init(); cf->path = path; @@ -1028,6 +1035,8 @@ int cio_file_write(struct cio_chunk *ch, const void *buf, size_t count) cio_file_st_set_content_len(cf->map, cf->data_size); + cf->taint_flag = CIO_TRUE; + return 0; } diff --git a/lib/chunkio/tools/CMakeLists.txt b/lib/chunkio/tools/CMakeLists.txt index 0fe1845f6c3..f5bbd146f05 100644 --- a/lib/chunkio/tools/CMakeLists.txt +++ b/lib/chunkio/tools/CMakeLists.txt @@ -2,4 +2,9 @@ set(src cio.c) add_executable(cio ${src}) -target_link_libraries(cio chunkio-static) + +if (CIO_LIB_SHARED) + target_link_libraries(cio chunkio-shared) +elseif (CIO_LIB_STATIC) + target_link_libraries(cio chunkio-static) +endif() \ No newline at end of file