Skip to content

Commit

Permalink
Remove aligned writes and simplify unaligned writes
Browse files Browse the repository at this point in the history
Compilers are not taking advantage of the optimization opportunity, and
these functions make the code more complex for no real gain.
Unaligned writes can use the bytewrite macro to be shorter.
  • Loading branch information
aaaaaa123456789 committed Jan 15, 2022
1 parent 7db9687 commit 90e24db
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 57 deletions.
44 changes: 22 additions & 22 deletions src/bmpwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,23 @@ void generate_BMP_bitmasked_data (struct context * context, uint32_t depth, unsi
memset(attributes, 0, 108);
write_le32_unaligned(offset_pointer, 122);
*attributes = 108;
write_le32((uint32_t *) (attributes + 4), context -> source -> width);
write_le32((uint32_t *) (attributes + 8), context -> source -> height);
write_le32_unaligned(attributes + 4, context -> source -> width);
write_le32_unaligned(attributes + 8, context -> source -> height);
attributes[12] = 1;
attributes[14] = ((reddepth + greendepth + bluedepth + alphadepth) <= 16) ? 16 : 32;
attributes[16] = 3;
write_le32((uint32_t *) (attributes + 40), ((uint32_t) 1 << reddepth) - 1);
write_le32((uint32_t *) (attributes + 44), (((uint32_t) 1 << greendepth) - 1) << reddepth);
write_le32((uint32_t *) (attributes + 48), (((uint32_t) 1 << bluedepth) - 1) << blueshift);
write_le32_unaligned(attributes + 40, ((uint32_t) 1 << reddepth) - 1);
write_le32_unaligned(attributes + 44, (((uint32_t) 1 << greendepth) - 1) << reddepth);
write_le32_unaligned(attributes + 48, (((uint32_t) 1 << bluedepth) - 1) << blueshift);
if (alphadepth)
write_le32((uint32_t *) (attributes + 52), (((uint32_t) 1 << alphadepth) - 1) << alphashift);
write_le32_unaligned(attributes + 52, (((uint32_t) 1 << alphadepth) - 1) << alphashift);
else
write_le32((uint32_t *) (attributes + 52), 0);
write_le32((uint32_t *) (attributes + 56), 0x73524742u); // 'sRGB'
write_le32_unaligned(attributes + 52, 0);
write_le32_unaligned(attributes + 56, 0x73524742u); // 'sRGB'
size_t rowsize = (size_t) context -> source -> width * (attributes[14] >> 3);
if ((attributes[14] == 16) && (context -> source -> width & 1)) rowsize += 2;
size_t imagesize = rowsize * context -> source -> height;
if (imagesize <= 0x7fffffffu) write_le32((uint32_t *) (attributes + 20), imagesize);
if (imagesize <= 0x7fffffffu) write_le32_unaligned(attributes + 20, imagesize);
unsigned char * data = append_output_node(context, imagesize);
uint_fast32_t row = context -> source -> height - 1;
do {
Expand All @@ -82,15 +82,15 @@ void generate_BMP_bitmasked_data (struct context * context, uint32_t depth, unsi
((color & 0xffff00000000u) >> (48 - bluedepth) << blueshift);
if (alphadepth) out |= (color & 0xffff000000000000u) >> (64 - alphadepth) << alphashift;
if (attributes[14] == 16) {
write_le16((uint16_t *) data, out);
write_le16_unaligned(data, out);
data += 2;
} else {
write_le32((uint32_t *) data, out);
write_le32_unaligned(data, out);
data += 4;
}
}
if ((attributes[14] == 16) && (context -> source -> width & 1)) {
write_le16((uint16_t *) data, 0);
write_le16_unaligned(data, 0);
data += 2;
}
} while (row --);
Expand All @@ -101,11 +101,11 @@ void generate_BMP_palette_halfbyte_data (struct context * context, unsigned char
write_le32_unaligned(offset_pointer, 58 + 4 * context -> source -> max_palette_index);
memset(attributes, 0, 40);
*attributes = 40;
write_le32((uint32_t *) (attributes + 4), context -> source -> width);
write_le32((uint32_t *) (attributes + 8), context -> source -> height);
write_le32_unaligned(attributes + 4, context -> source -> width);
write_le32_unaligned(attributes + 8, context -> source -> height);
attributes[12] = 1;
attributes[14] = 4;
write_le32((uint32_t *) (attributes + 32), context -> source -> max_palette_index + 1);
write_le32_unaligned(attributes + 32, context -> source -> max_palette_index + 1);
append_BMP_palette(context);
size_t rowsize = ((context -> source -> width + 7) & ~7u) >> 1;
if (context -> source -> max_palette_index < 2) rowsize = ((rowsize >> 2) + 3) & ~3u;
Expand All @@ -114,7 +114,7 @@ void generate_BMP_palette_halfbyte_data (struct context * context, unsigned char
size_t compressed = try_compress_BMP(context, imagesize, &compress_BMP_halfbyte_row);
if (compressed) {
attributes[16] = 2;
if (compressed <= 0x7fffffffu) write_le32((uint32_t *) (attributes + 20), compressed);
if (compressed <= 0x7fffffffu) write_le32_unaligned(attributes + 20, compressed);
context -> output -> size = compressed;
return;
}
Expand Down Expand Up @@ -149,18 +149,18 @@ void generate_BMP_palette_byte_data (struct context * context, unsigned char * o
write_le32_unaligned(offset_pointer, 58 + 4 * context -> source -> max_palette_index);
memset(attributes, 0, 40);
*attributes = 40;
write_le32((uint32_t *) (attributes + 4), context -> source -> width);
write_le32((uint32_t *) (attributes + 8), context -> source -> height);
write_le32_unaligned(attributes + 4, context -> source -> width);
write_le32_unaligned(attributes + 8, context -> source -> height);
attributes[12] = 1;
attributes[14] = 8;
write_le32((uint32_t *) (attributes + 32), context -> source -> max_palette_index + 1);
write_le32_unaligned(attributes + 32, context -> source -> max_palette_index + 1);
append_BMP_palette(context);
size_t rowsize = (context -> source -> width + 3) & ~3u, imagesize = rowsize * context -> source -> height;
unsigned char * data = append_output_node(context, imagesize);
size_t compressed = try_compress_BMP(context, imagesize, &compress_BMP_byte_row);
if (compressed) {
attributes[16] = 1;
if (compressed <= 0x7fffffffu) write_le32((uint32_t *) (attributes + 20), compressed);
if (compressed <= 0x7fffffffu) write_le32_unaligned(attributes + 20, compressed);
context -> output -> size = compressed;
return;
}
Expand Down Expand Up @@ -360,8 +360,8 @@ void generate_BMP_RGB_data (struct context * context, unsigned char * offset_poi
write_le32_unaligned(offset_pointer, 54);
memset(attributes, 0, 40);
*attributes = 40;
write_le32((uint32_t *) (attributes + 4), context -> source -> width);
write_le32((uint32_t *) (attributes + 8), context -> source -> height);
write_le32_unaligned(attributes + 4, context -> source -> width);
write_le32_unaligned(attributes + 8, context -> source -> height);
attributes[12] = 1;
attributes[14] = 24;
uint32_t * data;
Expand Down
4 changes: 2 additions & 2 deletions src/gifwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ void generate_GIF_data (struct context * context) {
// technically, some GIFs could be 87a; however, at the time of writing, 89a is over three decades old and supported by everything relevant
byteoutput(context, 0x47, 0x49, 0x46, 0x38, 0x39, 0x61);
unsigned char * header = append_output_node(context, 7);
write_le16((uint16_t *) header, context -> source -> width);
write_le16((uint16_t *) (header + 2), context -> source -> height);
write_le16_unaligned(header, context -> source -> width);
write_le16_unaligned(header + 2, context -> source -> height);
uint_fast32_t depth = get_true_color_depth(context -> source);
uint8_t overall = depth;
if ((uint8_t) (depth >> 8) > overall) overall = depth >> 8;
Expand Down
35 changes: 4 additions & 31 deletions src/multibyte.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,44 +17,17 @@ static inline uint32_t read_be32_unaligned (const unsigned char * data) {
}

static inline void write_le16_unaligned (unsigned char * buffer, uint16_t value) {
*(buffer ++) = value;
*(buffer ++) = value >> 8;
}

static inline void write_le16 (uint16_t * buffer, uint16_t value) {
*((unsigned char *) buffer) = value;
1[(unsigned char *) buffer] = value >> 8;
bytewrite(buffer, value, value >> 8);
}

static inline void write_le32_unaligned (unsigned char * buffer, uint32_t value) {
*(buffer ++) = value;
*(buffer ++) = value >> 8;
*(buffer ++) = value >> 16;
*buffer = value >> 24;
}

static inline void write_le32 (uint32_t * buffer, uint32_t value) {
*((unsigned char *) buffer) = value;
1[(unsigned char *) buffer] = value >> 8;
2[(unsigned char *) buffer] = value >> 16;
3[(unsigned char *) buffer] = value >> 24;
bytewrite(buffer, value, value >> 8, value >> 16, value >> 24);
}

static inline void write_be16_unaligned (unsigned char * buffer, uint32_t value) {
*(buffer ++) = value >> 8;
*buffer = value;
bytewrite(buffer, value >> 8, value);
}

static inline void write_be32_unaligned (unsigned char * buffer, uint32_t value) {
*(buffer ++) = value >> 24;
*(buffer ++) = value >> 16;
*(buffer ++) = value >> 8;
*buffer = value;
}

static inline void write_be32 (uint32_t * buffer, uint32_t value) {
*((unsigned char *) buffer) = value >> 24;
1[(unsigned char *) buffer] = value >> 16;
2[(unsigned char *) buffer] = value >> 8;
3[(unsigned char *) buffer] = value;
bytewrite(buffer, value >> 24, value >> 16, value >> 8, value);
}
4 changes: 2 additions & 2 deletions src/pngwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ void append_APNG_frame_header (struct context * context, uint64_t duration, uint

void output_PNG_chunk (struct context * context, uint32_t type, uint32_t size, const void * restrict data) {
unsigned char * node = append_output_node(context, size + 12);
write_be32((uint32_t *) node, size);
write_be32((uint32_t *) (node + 4), type);
write_be32_unaligned(node, size);
write_be32_unaligned(node + 4, type);
if (size) memcpy(node + 8, data, size);
write_be32_unaligned(node + size + 8, compute_PNG_CRC(node + 4, size + 4));
}
Expand Down

0 comments on commit 90e24db

Please sign in to comment.