From 3e62c48353d5014981dc77e3dc91d66d3704cfc7 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Thu, 6 Aug 2020 09:54:31 +0200 Subject: [PATCH 01/14] make xpt v8 SAS-readable --- src/sas/readstat_xport_write.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sas/readstat_xport_write.c b/src/sas/readstat_xport_write.c index 5265fceb..6310185f 100644 --- a/src/sas/readstat_xport_write.c +++ b/src/sas/readstat_xport_write.c @@ -87,9 +87,11 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { readstat_variable_t *variable = readstat_get_variable(writer, i); size_t width = xport_variable_width(variable->type, variable->user_width); xport_namestr_t namestr = { - .nvar0 = i, + .nvar0 = i+1, .nlng = width, - .npos = offset + .npos = offset, + .niform = " ", + .nform = " " }; if (readstat_variable_get_type_class(variable) == READSTAT_TYPE_CLASS_STRING) { namestr.ntype = SAS_COLUMN_TYPE_CHR; From 842bf2877840c6a3f9e4599b0a1cbe5b0eacba82 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Fri, 21 Aug 2020 13:09:55 +0200 Subject: [PATCH 02/14] fix reading long variable names from xpt --- .gitignore | 1 + src/sas/readstat_xport_read.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c2846b6b..592aecf3 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ test_double_decimals *.py *.tar.gz *.zip +.DS_Store diff --git a/src/sas/readstat_xport_read.c b/src/sas/readstat_xport_read.c index cf6bde18..7e3e55b2 100644 --- a/src/sas/readstat_xport_read.c +++ b/src/sas/readstat_xport_read.c @@ -449,8 +449,13 @@ static readstat_error_t xport_read_variables(xport_ctx_t *ctx) { variable->decimals = namestr.nfd; variable->alignment = namestr.nfj ? READSTAT_ALIGNMENT_RIGHT : READSTAT_ALIGNMENT_LEFT; - readstat_convert(variable->name, sizeof(variable->name), - namestr.nname, sizeof(namestr.nname), ctx->converter); + if (ctx->version == 5) { + readstat_convert(variable->name, sizeof(variable->name), + namestr.nname, sizeof(namestr.nname), ctx->converter); + } else { + readstat_convert(variable->name, sizeof(variable->name), + namestr.longname, sizeof(namestr.longname), ctx->converter); + } if (retval != READSTAT_OK) goto cleanup; From 64a3501bfc053e27d5f2dfb537dad85231b88e46 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Wed, 26 Aug 2020 19:32:49 +0200 Subject: [PATCH 03/14] correct "file_label" to "table_name" --- src/bin/readstat.c | 4 ++++ src/sas/readstat_sas.c | 16 ++++++++-------- src/sas/readstat_sas.h | 5 +++-- src/sas/readstat_sas7bcat_read.c | 8 ++++---- src/sas/readstat_sas7bdat_read.c | 8 ++++---- src/sas/readstat_xport_read.c | 2 +- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/bin/readstat.c b/src/bin/readstat.c index 9ce4deff..b0313317 100644 --- a/src/bin/readstat.c +++ b/src/bin/readstat.c @@ -392,6 +392,7 @@ static int convert_file(const char *input_filename, const char *catalog_filename static int dump_metadata(readstat_metadata_t *metadata, void *ctx) { printf("Columns: %d\n", readstat_get_var_count(metadata)); printf("Rows: %d\n", readstat_get_row_count(metadata)); + const char *table_name = readstat_get_table_name(metadata); const char *file_label = readstat_get_file_label(metadata); const char *orig_encoding = readstat_get_file_encoding(metadata); long version = readstat_get_file_format_version(metadata); @@ -399,6 +400,9 @@ static int dump_metadata(readstat_metadata_t *metadata, void *ctx) { readstat_compress_t compression = readstat_get_compression(metadata); readstat_endian_t endianness = readstat_get_endianness(metadata); + if (table_name && table_name[0]) { + printf("Table name: %s\n", table_name); + } if (file_label && file_label[0]) { printf("File label: %s\n", file_label); } diff --git a/src/sas/readstat_sas.c b/src/sas/readstat_sas.c index a6fab159..b7ac9425 100644 --- a/src/sas/readstat_sas.c +++ b/src/sas/readstat_sas.c @@ -161,7 +161,7 @@ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } - memcpy(hinfo->file_label, header_start.file_label, sizeof(header_start.file_label)); + memcpy(hinfo->table_name, header_start.table_name, sizeof(header_start.table_name)); if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; @@ -289,16 +289,16 @@ readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t * struct tm epoch_tm = { .tm_year = 60, .tm_mday = 1 }; time_t epoch = mktime(&epoch_tm); - memset(header_start.file_label, ' ', sizeof(header_start.file_label)); + memset(header_start.table_name, ' ', sizeof(header_start.table_name)); - size_t file_label_len = strlen(writer->file_label); - if (file_label_len > sizeof(header_start.file_label)) - file_label_len = sizeof(header_start.file_label); + size_t table_name_len = strlen(writer->table_name); + if (table_name_len > sizeof(header_start.table_name)) + table_name_len = sizeof(header_start.table_name); - if (file_label_len) { - memcpy(header_start.file_label, writer->file_label, file_label_len); + if (table_name_len) { + memcpy(header_start.table_name, writer->table_name, table_name_len); } else { - memcpy(header_start.file_label, "DATASET", sizeof("DATASET")-1); + memcpy(header_start.table_name, "DATASET", sizeof("DATASET")-1); } retval = readstat_write_bytes(writer, &header_start, sizeof(sas_header_start_t)); diff --git a/src/sas/readstat_sas.h b/src/sas/readstat_sas.h index 4d707689..145e39ce 100644 --- a/src/sas/readstat_sas.h +++ b/src/sas/readstat_sas.h @@ -17,7 +17,8 @@ typedef struct sas_header_start_s { unsigned char encoding; unsigned char mystery5[13]; char file_type[8]; - char file_label[64]; + char table_name[32]; + unsigned char mystery6[32]; char file_info[8]; } sas_header_start_t; @@ -47,7 +48,7 @@ typedef struct sas_header_info_s { int64_t header_size; time_t creation_time; time_t modification_time; - char file_label[64]; + char table_name[32]; char *encoding; } sas_header_info_t; diff --git a/src/sas/readstat_sas7bcat_read.c b/src/sas/readstat_sas7bcat_read.c index 77f16bd5..bd743cc4 100644 --- a/src/sas/readstat_sas7bcat_read.c +++ b/src/sas/readstat_sas7bcat_read.c @@ -415,7 +415,7 @@ readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char * } if (ctx->metadata_handler) { - char file_label[4*64+1]; + char table_name[4*32+1]; readstat_metadata_t metadata = { .file_encoding = ctx->input_encoding, /* orig encoding? */ .modified_time = hinfo->modification_time, @@ -424,12 +424,12 @@ readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char * .endianness = hinfo->little_endian ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG, .is64bit = ctx->u64 }; - retval = readstat_convert(file_label, sizeof(file_label), - hinfo->file_label, sizeof(hinfo->file_label), ctx->converter); + retval = readstat_convert(table_name, sizeof(table_name), + hinfo->table_name, sizeof(hinfo->table_name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; - metadata.file_label = file_label; + metadata.table_name = table_name; if (ctx->metadata_handler(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 911f9ea2..1f9cac7e 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -85,7 +85,7 @@ typedef struct sas7bdat_ctx_s { time_t ctime; time_t mtime; int version; - char file_label[4*64+1]; + char table_name[4*32+1]; char error_buf[2048]; } sas7bdat_ctx_t; @@ -616,7 +616,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres readstat_metadata_t metadata = { .row_count = ctx->row_limit, .var_count = ctx->column_count, - .file_label = ctx->file_label, + .table_name = ctx->table_name, .file_encoding = ctx->input_encoding, /* orig encoding? */ .creation_time = ctx->ctime, .modified_time = ctx->mtime, @@ -1116,8 +1116,8 @@ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char * ctx->converter = converter; } - if ((retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), - hinfo->file_label, sizeof(hinfo->file_label), ctx->converter)) != READSTAT_OK) { + if ((retval = readstat_convert(ctx->table_name, sizeof(ctx->table_name), + hinfo->table_name, sizeof(hinfo->table_name), ctx->converter)) != READSTAT_OK) { goto cleanup; } diff --git a/src/sas/readstat_xport_read.c b/src/sas/readstat_xport_read.c index 9c229423..233b2864 100644 --- a/src/sas/readstat_xport_read.c +++ b/src/sas/readstat_xport_read.c @@ -32,7 +32,7 @@ typedef struct xport_ctx_s { int row_offset; size_t row_length; int parsed_row_count; - char file_label[40*4+1]; + char file_label[256*4+1]; char table_name[32*4+1]; readstat_variable_t **variables; From bd79a61f4deeab74e787a18928877e036a2e791f Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Sat, 5 Sep 2020 22:22:12 +0200 Subject: [PATCH 04/14] reads and prints SAS dataset label --- src/bin/readstat.c | 2 +- src/sas/readstat_sas7bdat_read.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/bin/readstat.c b/src/bin/readstat.c index b0313317..ef6b69f5 100644 --- a/src/bin/readstat.c +++ b/src/bin/readstat.c @@ -404,7 +404,7 @@ static int dump_metadata(readstat_metadata_t *metadata, void *ctx) { printf("Table name: %s\n", table_name); } if (file_label && file_label[0]) { - printf("File label: %s\n", file_label); + printf("Table label: %s\n", file_label); } if (version) { printf("Format version: %ld\n", version); diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 72627c88..ea8a1dd9 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -86,6 +86,7 @@ typedef struct sas7bdat_ctx_s { time_t mtime; int version; char table_name[4*32+1]; + char file_label[4*256+1]; char error_buf[2048]; unsigned int rdc_compression:1; @@ -298,6 +299,9 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); + if (i == 0) { + memcpy(ctx->file_label, &ctx->text_blobs[0][32+4*ctx->u64+8*ctx->rdc_compression], ctx->col_info[0].name_ref.offset-32-4*ctx->u64-8*ctx->rdc_compression); + } cnp += 8; } @@ -713,6 +717,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres .row_count = ctx->row_limit, .var_count = ctx->column_count, .table_name = ctx->table_name, + .file_label = ctx->file_label, .file_encoding = ctx->input_encoding, /* orig encoding? */ .creation_time = ctx->ctime, .modified_time = ctx->mtime, From 5e7bd816037c7978563a6c469e82f348edff6a86 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Sun, 6 Sep 2020 17:57:16 +0200 Subject: [PATCH 05/14] fix lengths of name and label for testing sas7bdat --- src/sas/readstat_sas7bdat_read.c | 5 ++++- src/test/test_read.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index ea8a1dd9..265c245a 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -300,7 +300,10 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); if (i == 0) { - memcpy(ctx->file_label, &ctx->text_blobs[0][32+4*ctx->u64+8*ctx->rdc_compression], ctx->col_info[0].name_ref.offset-32-4*ctx->u64-8*ctx->rdc_compression); + memcpy(ctx->file_label, + &ctx->text_blobs[0][12 + 24*ctx->u64 + 8*ctx->rdc_compression], + ctx->col_info[0].name_ref.offset - ctx->col_info[0].name_ref.length + ); } cnp += 8; } diff --git a/src/test/test_read.c b/src/test/test_read.c index 82c0bf57..a3ffa744 100644 --- a/src/test/test_read.c +++ b/src/test/test_read.c @@ -77,7 +77,8 @@ void parse_ctx_reset(rt_parse_ctx_t *parse_ctx, long file_format) { } else if ((file_format & RT_FORMAT_SAV)) { parse_ctx->max_file_label_len = 64; } else if ((file_format & RT_FORMAT_SAS7BDAT)) { - parse_ctx->max_file_label_len = 64; + parse_ctx->max_table_name_len = 32; + parse_ctx->max_file_label_len = 256; } else { parse_ctx->max_file_label_len = 20; } @@ -85,6 +86,7 @@ void parse_ctx_reset(rt_parse_ctx_t *parse_ctx, long file_format) { parse_ctx->max_table_name_len = 8; } else if ((file_format & RT_FORMAT_XPORT_8)) { parse_ctx->max_table_name_len = 32; + parse_ctx->max_file_label_len = 256; } parse_ctx->var_index = -1; parse_ctx->obs_index = -1; From 93cf3187379f7c5590cd1c5838747ba8ba4bb3e1 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Sun, 6 Sep 2020 21:59:50 +0200 Subject: [PATCH 06/14] nearly correct dataset labels --- src/sas/readstat_sas7bdat_read.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 265c245a..9eab29f8 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -300,9 +300,10 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); if (i == 0) { + uint8_t off = ctx->text_blobs[0][11]; memcpy(ctx->file_label, - &ctx->text_blobs[0][12 + 24*ctx->u64 + 8*ctx->rdc_compression], - ctx->col_info[0].name_ref.offset - ctx->col_info[0].name_ref.length + &ctx->text_blobs[0][12+4*ctx->u64+off+ctx->rdc_compression*ctx->subheader_pointer_size], + ctx->col_info[0].name_ref.offset - 12 - 4*ctx->u64 - off - ctx->rdc_compression*ctx->subheader_pointer_size ); } cnp += 8; From 66a20ba675a6b0d1cc408aab7d41175e9c805070 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Mon, 7 Sep 2020 00:04:13 +0200 Subject: [PATCH 07/14] dataset label offset needs short integer --- src/sas/readstat_sas7bdat_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 9eab29f8..5189476e 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -300,7 +300,7 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); if (i == 0) { - uint8_t off = ctx->text_blobs[0][11]; + uint16_t off = (ctx->text_blobs[0][10]<<8)+ctx->text_blobs[0][11]; memcpy(ctx->file_label, &ctx->text_blobs[0][12+4*ctx->u64+off+ctx->rdc_compression*ctx->subheader_pointer_size], ctx->col_info[0].name_ref.offset - 12 - 4*ctx->u64 - off - ctx->rdc_compression*ctx->subheader_pointer_size From 01d67da958a3fa854fb3314621b1cb43e82f12a8 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Mon, 7 Sep 2020 23:33:58 +0200 Subject: [PATCH 08/14] finally right dataset label --- src/sas/readstat_sas7bdat_read.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 5189476e..0ee3c2ab 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -286,6 +286,7 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead int i; const char *cnp = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); + int off; if (remainder != sas_subheader_remainder(len, signature_len)) { retval = READSTAT_ERROR_PARSE; @@ -300,10 +301,12 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); if (i == 0) { - uint16_t off = (ctx->text_blobs[0][10]<<8)+ctx->text_blobs[0][11]; + if (!memcmp(&ctx->text_blobs[0][12], "SASYZCR", 7)) { + off = 44; + } else off = ctx->u64 ? 36 : 12; memcpy(ctx->file_label, - &ctx->text_blobs[0][12+4*ctx->u64+off+ctx->rdc_compression*ctx->subheader_pointer_size], - ctx->col_info[0].name_ref.offset - 12 - 4*ctx->u64 - off - ctx->rdc_compression*ctx->subheader_pointer_size + &ctx->text_blobs[0][off], + ctx->col_info[i].name_ref.offset - off ); } cnp += 8; From b849f68cdd5324ccf5836f75933171bde87bbcac Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Mon, 7 Sep 2020 23:47:48 +0200 Subject: [PATCH 09/14] check text blob length for dataset label retrieval --- src/sas/readstat_sas7bdat_read.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 0ee3c2ab..7e24c50f 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -304,9 +304,13 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead if (!memcmp(&ctx->text_blobs[0][12], "SASYZCR", 7)) { off = 44; } else off = ctx->u64 ? 36 : 12; + if (ctx->col_info[0].name_ref.offset > ctx->text_blob_lengths[0]) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } memcpy(ctx->file_label, &ctx->text_blobs[0][off], - ctx->col_info[i].name_ref.offset - off + ctx->col_info[0].name_ref.offset - off ); } cnp += 8; From 6065ea329a88c5273d2fe0b1b12711c4d6747be6 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Tue, 8 Sep 2020 22:24:39 +0200 Subject: [PATCH 10/14] better protect pointers reading dataset label --- src/sas/readstat_sas7bdat_read.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 7e24c50f..51ca7aa3 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -301,17 +301,33 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); if (i == 0) { - if (!memcmp(&ctx->text_blobs[0][12], "SASYZCR", 7)) { - off = 44; - } else off = ctx->u64 ? 36 : 12; - if (ctx->col_info[0].name_ref.offset > ctx->text_blob_lengths[0]) { + if (ctx->version < 9) { + off = 36; + } else { + if (ctx->text_blob_lengths[0] < 19) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + if (!memcmp(&ctx->text_blobs[0][12], "SASYZCR", 7)) { + off = 44; + } else { + off = ctx->u64 ? 36 : 12; + } + } + if (ctx->col_info[0].name_ref.offset >= + ctx->text_blob_lengths[0] || + ctx->col_info[0].name_ref.offset < off) { retval = READSTAT_ERROR_PARSE; goto cleanup; } - memcpy(ctx->file_label, - &ctx->text_blobs[0][off], - ctx->col_info[0].name_ref.offset - off - ); + retval = readstat_convert(ctx->file_label, + sizeof(ctx->file_label), + &ctx->text_blobs[0][off], + ctx->col_info[0].name_ref.offset - off, + ctx->converter + ); + if (retval != READSTAT_OK) + goto cleanup; } cnp += 8; } From 83b548d87fbe6d75d714247f590c6dac76c82259 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Wed, 16 Sep 2020 00:00:48 +0200 Subject: [PATCH 11/14] fix writing test sas7bdats --- src/readstat.h | 2 +- src/sas/readstat_sas.c | 2 +- src/sas/readstat_sas.h | 1 + src/sas/readstat_sas7bdat_write.c | 7 +++++-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/readstat.h b/src/readstat.h index 46066f67..c24e120c 100644 --- a/src/readstat.h +++ b/src/readstat.h @@ -479,7 +479,7 @@ typedef struct readstat_writer_s { int row_count; int current_row; - char file_label[100]; + char file_label[257]; char table_name[33]; const readstat_variable_t *fweight_variable; diff --git a/src/sas/readstat_sas.c b/src/sas/readstat_sas.c index a26b408c..3c43e0bc 100644 --- a/src/sas/readstat_sas.c +++ b/src/sas/readstat_sas.c @@ -379,7 +379,7 @@ readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t * goto cleanup; sas_header_end_t header_end = { - .host = "W32_VSPRO" + .host = "9.0401M6Linux" }; char release[sizeof(header_end.release)+1] = { 0 }; diff --git a/src/sas/readstat_sas.h b/src/sas/readstat_sas.h index 145e39ce..5fce15c3 100644 --- a/src/sas/readstat_sas.h +++ b/src/sas/readstat_sas.h @@ -49,6 +49,7 @@ typedef struct sas_header_info_s { time_t creation_time; time_t modification_time; char table_name[32]; + char file_label[256]; char *encoding; } sas_header_info_t; diff --git a/src/sas/readstat_sas7bdat_write.c b/src/sas/readstat_sas7bdat_write.c index 19467ab7..a22b2310 100644 --- a/src/sas/readstat_sas7bdat_write.c +++ b/src/sas/readstat_sas7bdat_write.c @@ -140,7 +140,7 @@ static readstat_error_t sas7bdat_emit_header(readstat_writer_t *writer, sas_head .file_format = SAS_FILE_FORMAT_UNIX, .encoding = 20, /* UTF-8 */ .file_type = "SAS FILE", - .file_info = "DATA ~ ~" + .file_info = "DATA " }; memcpy(&header_start.magic, sas7bdat_magic_number, sizeof(header_start.magic)); @@ -218,13 +218,16 @@ static sas7bdat_subheader_t *sas7bdat_col_name_subheader_init(readstat_writer_t sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_NAME, len); memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t)); + + sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, "READSTAT"); + text_ref = sas7bdat_make_text_ref(column_text_array, hinfo->file_label); int i; char *ptrs = &subheader->data[signature_len+8]; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); const char *name = readstat_variable_get_name(variable); - sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, name); + text_ref = sas7bdat_make_text_ref(column_text_array, name); memcpy(&ptrs[0], &text_ref.index, sizeof(uint16_t)); memcpy(&ptrs[2], &text_ref.offset, sizeof(uint16_t)); memcpy(&ptrs[4], &text_ref.length, sizeof(uint16_t)); From 80d1843c313d997d219cfbaa33072e2565784883 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Wed, 16 Sep 2020 22:40:13 +0200 Subject: [PATCH 12/14] write dataset labels into sas7bdat files --- src/sas/readstat_sas7bdat_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sas/readstat_sas7bdat_write.c b/src/sas/readstat_sas7bdat_write.c index a22b2310..f33a442d 100644 --- a/src/sas/readstat_sas7bdat_write.c +++ b/src/sas/readstat_sas7bdat_write.c @@ -220,7 +220,7 @@ static sas7bdat_subheader_t *sas7bdat_col_name_subheader_init(readstat_writer_t memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t)); sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, "READSTAT"); - text_ref = sas7bdat_make_text_ref(column_text_array, hinfo->file_label); + text_ref = sas7bdat_make_text_ref(column_text_array, writer->file_label); int i; char *ptrs = &subheader->data[signature_len+8]; From 828f481168b0307113e32aa33e271bec1bcb7bb4 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Thu, 17 Sep 2020 09:30:53 +0200 Subject: [PATCH 13/14] avoid writing labels into SAS7BCAT files --- src/test/test_write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/test_write.c b/src/test/test_write.c index f7831398..46e14306 100644 --- a/src/test/test_write.c +++ b/src/test/test_write.c @@ -31,7 +31,9 @@ readstat_error_t write_file_to_buffer(rt_test_file_t *file, rt_buffer_t *buffer, readstat_writer_t *writer = readstat_writer_init(); readstat_set_data_writer(writer, &write_data); - readstat_writer_set_file_label(writer, file->label); + if ((format & RT_FORMAT_SAS7BCAT)) { + strncpy(file->label, "", 1); + } else readstat_writer_set_file_label(writer, file->label); readstat_writer_set_table_name(writer, file->table_name); readstat_writer_set_error_handler(writer, &handle_error); if (file->timestamp.tm_year) { From 2c83606557f716a298f988f0880e1a941374e111 Mon Sep 17 00:00:00 2001 From: Reinhold <8436725+reikoch@users.noreply.github.com> Date: Fri, 18 Sep 2020 19:56:05 +0200 Subject: [PATCH 14/14] protect against bad pointer for first text blob --- src/sas/readstat_sas7bdat_read.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 51ca7aa3..44e13135 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -301,6 +301,10 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); if (i == 0) { + if (ctx->text_blobs == NULL || ctx->text_blob_lengths == NULL) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } if (ctx->version < 9) { off = 36; } else {