Skip to content

Commit

Permalink
Hint compiler about memory aliasing restrictions
Browse files Browse the repository at this point in the history
  • Loading branch information
arkq committed Nov 20, 2024
1 parent c927a9e commit 1ecfc8c
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 75 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ libraries (except original Qualcomm libraries) were compiled with Clang version
| aptxHD100 | — | — | 1m21.950s | 0.89616 |
| [libopenaptx-0.2.0][2] | 1m22.090s | 0.89062 | 1m25.730s | 0.85429 |

[1]: ./archive "Archive with Qualcomm apt-X encoding libraries"
[1]: archive/aarch64 "Archive with Qualcomm apt-X encoding libraries"
[2]: https://github.com/pali/libopenaptx "The apt-X encoder/decoder based on FFmpeg code"

## Resources
Expand Down
31 changes: 16 additions & 15 deletions src/aptx422/processor.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ void aptX_prediction_filtering(int32_t a, aptX_prediction_filter_422 * f) {
int32_t tmp1 = a + f->unk8;
clamp_int24_t(tmp1);

int32_t tmp2 = ((int64_t)tmp1 * f->unk2 + (int64_t)f->unk3 * f->unk6) >> 22;
int64_t x1 = (int64_t)f->unk3 * f->unk6;
int64_t x2 = (int64_t)tmp1 * f->unk2;
int32_t tmp2 = (x1 + x2) >> 22;
clamp_int24_t(tmp2);

int32_t v1 = 128;
Expand All @@ -44,24 +46,21 @@ void aptX_prediction_filtering(int32_t a, aptX_prediction_filter_422 * f) {
v2 = ((a >> 31) & 0xFF000000) + 8388736;
}

int32_t * q = &f->arr2[f->i + f->width];
size_t q = f->i + f->width;
int64_t sum = 0;
int32_t c = a;

f->i = (f->i + 1) % f->width;
f->subband_param_unk3_3 = a;
int64_t c = a;

for (size_t i = 0; i < (size_t)f->width; i++, q--) {

int32_t tmp;
if (*q >= 0)
if (f->arr2[q] >= 0)
tmp = v2 - f->arr1[i];
else
tmp = v1 - f->arr1[i];

f->arr1[i] += (tmp >> 8) - (((uint32_t)tmp) << 23 == 0x80000000);
sum += (int64_t)f->arr1[i] * c;
c = *q;
sum += c * f->arr1[i];
c = f->arr2[q];
}

f->unk6 = tmp1;
Expand All @@ -70,6 +69,8 @@ void aptX_prediction_filtering(int32_t a, aptX_prediction_filter_422 * f) {
f->unk8 = f->unk7 + tmp2;
clamp_int24_t(f->unk8);

f->i = (f->i + 1) % f->width;

f->arr2[f->i] = a;
f->arr2[f->i + f->width] = a;
}
Expand All @@ -88,15 +89,15 @@ void aptX_process_subband(int32_t a, int32_t dither, aptX_prediction_filter_422
f->sign2 = f->sign1;
f->sign1 = -1;
}
if (tmp == 0) {
sign1 *= 0;
sign2 *= 0;
else if (tmp > 0) {
sign1 *= 1;
sign2 *= 1;
f->sign2 = f->sign1;
f->sign1 = 1;
}
if (tmp > 0) {
sign1 *= 1;
sign2 *= 1;
else {
sign1 *= 0;
sign2 *= 0;
f->sign2 = f->sign1;
f->sign1 = 1;
}
Expand Down
3 changes: 2 additions & 1 deletion src/aptx422/qmf.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ void aptX_QMF_conv_inner(const int32_t s1[16], const int32_t s2[16], int32_t * o
*out_b = r2;
}

void aptX_QMF_analysis(aptX_QMF_analyzer_422 * qmf, const int32_t samples[4], const int32_t refs[4], int32_t diff[4]) {
void aptX_QMF_analysis(aptX_QMF_analyzer_422 * restrict qmf, const int32_t samples[restrict 4],
const int32_t refs[restrict 4], int32_t diff[restrict 4]) {

int32_t a, b, c, d;
int32_t tmp[4];
Expand Down
12 changes: 4 additions & 8 deletions src/aptx422/search.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,11 @@ static size_t aptX_search_quant_coeff(uint32_t a, int32_t x, const int32_t * dat
* integer space. The search is done using a simple binary search algorithm. */

int64_t aa = (int64_t)a << 32;
int64_t xx = x << 8;
size_t i = 0;
size_t n;

for (n = size / 2; n > 0; n /= 2)
/* XXX: There might be a potential error during calculation, because it
* seems that the subtraction is performed as an unsigned operation.
* Anyway, this algorithm and the original one (from the apt-X lib)
* have been stress-tested and both return the same values. */
if ((int64_t)data[i + n] * (x << 8) - aa <= 0)

for (size_t n = size / 2; n > 0; n /= 2)
if (xx * data[i + n] <= aa)
i += n;

return i;
Expand Down
73 changes: 36 additions & 37 deletions src/aptxhd100/processor.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
*/

#include "processor.h"
#include <stdio.h>

#include "mathex.h"

Expand All @@ -21,7 +20,7 @@ void aptXHD_invert_quantization(int32_t a, int32_t dither, aptXHD_inverter_100 *
int64_t tmp = (int64_t)dither * i->subband_param_dith16_sf1[i_];
tmp = rshift32(((int64_t)sl1 << 31) + tmp);
clamp_int24_t(tmp);
i->unk11 = (i->unk9 * tmp) >> 19;
i->unk11 = (tmp * i->unk9) >> 19;
clamp_int24_t(i->unk11);

i->unk10 = rshift15(32620 * i->unk10 + (i->subband_param_incr16[i_] << 15));
Expand All @@ -33,82 +32,82 @@ void aptXHD_invert_quantization(int32_t a, int32_t dither, aptXHD_inverter_100 *

void aptXHD_prediction_filtering(int32_t a, aptXHD_prediction_filter_100 * f) {

uint64_t x1 = (unsigned)f->unk6 * (uint64_t)(unsigned)f->unk3;
x1 += (uint64_t)(f->unk6 * (f->unk3 >> 31) + f->unk3 * (f->unk6 >> 31)) << 32;
int32_t tmp1 = a + f->unk8;
clamp_int24_t(tmp1);

f->unk6 = a + f->unk8;
clamp_int24_t(f->unk6);
int64_t x1 = (int64_t)f->unk3 * f->unk6;
int64_t x2 = (int64_t)tmp1 * f->unk2;
int32_t tmp2 = (x1 + x2) >> 22;
clamp_int24_t(tmp2);

uint64_t x2 = (unsigned)f->unk2 * (uint64_t)(unsigned)f->unk6;
x2 += (uint64_t)(f->unk6 * (f->unk2 >> 31) + f->unk2 * (f->unk6 >> 31)) << 32;

f->unk8 = (x1 + x2) >> 22;
clamp_int24_t(f->unk8);

int32_t v1 = 0x80;
int32_t v2 = 0x80;
int32_t v1 = 128;
int32_t v2 = 128;
if (a) {
v1 = ((a >> 31) & 0x01000000) - 0x7FFF80;
v2 = ((a >> 31) & 0xFF000000) + 0x800080;
v1 = ((a >> 31) & 0x01000000) - 8388480;
v2 = ((a >> 31) & 0xFF000000) + 8388736;
}

size_t q = f->i + f->width;
int64_t sum = 0;
int64_t c = a;

for (size_t i = 0; i < (size_t)f->width; i++) {
for (size_t i = 0; i < (size_t)f->width; i++, q--) {

int32_t tmp;
if (f->arr2[f->i + f->width - i] >= 0)
if (f->arr2[q] >= 0)
tmp = v2 - f->arr1[i];
else
tmp = v1 - f->arr1[i];

f->arr1[i] += (tmp >> 8) - (((uint32_t)tmp) << 23 == 0x80000000);

sum += c * f->arr1[i];
c = f->arr2[f->i + f->width - i];
c = f->arr2[q];
}

f->unk6 = tmp1;
f->unk7 = sum >> 22;
clamp_int24_t(f->unk7);
f->unk8 = f->unk7 + f->unk8;
f->unk8 = f->unk7 + tmp2;
clamp_int24_t(f->unk8);

f->i = (f->i + 1) % f->width;

f->arr2[f->i] = a;
f->arr2[f->i + f->width] = a;
f->subband_param_unk3_3 = a;
}

void aptXHD_process_subband(int32_t a, int32_t dither, aptXHD_prediction_filter_100 * f, aptXHD_inverter_100 * i) {

aptXHD_invert_quantization(a, dither, i);

int32_t sign1 = f->sign1;
int32_t sign2 = f->sign2;

int32_t tmp = f->unk7 + i->unk11;
int sign1 = f->sign1;
int sign2 = f->sign2;
if (tmp > 0) {
f->sign1 = 1;
f->sign2 = sign1;
} else if (tmp < 0) {
f->sign1 = -1;
f->sign2 = sign1;
if (tmp < 0) {
sign1 *= -1;
sign2 *= -1;
} else {
f->sign2 = f->sign1;
f->sign1 = -1;
}
else if (tmp > 0) {
sign1 *= 1;
sign2 *= 1;
f->sign2 = f->sign1;
f->sign1 = 1;
}
else {
sign1 *= 0;
sign2 *= 0;
f->sign2 = f->sign1;
f->sign1 = 1;
f->sign2 = sign1;
sign1 = 0;
sign2 = 0;
}

tmp = -1 * f->unk2 * sign1;
tmp = ((tmp + 1) >> 1) - ((tmp & 3) == 1);

tmp = tmp + 0x80000 * sign2;
clip_range(tmp, -0x100000, 0x100000);

f->unk3 = 254 * f->unk3 + (tmp >> 4 << 8);
f->unk3 = 254 * f->unk3 + 0x800000 * sign2 + (tmp >> 4 << 8);
f->unk3 = rshift8(f->unk3);
clip_range(f->unk3, -0x300000, 0x300000);

Expand Down
11 changes: 6 additions & 5 deletions src/aptxhd100/qmf.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@ void aptXHD_QMF_conv_inner(const int32_t s1[16], const int32_t s2[16], int32_t *
*out_b = r2;
}

void aptXHD_QMF_analysis(aptXHD_QMF_analyzer_100 * qmf, const int32_t samples[4], const int32_t refs[4],
int32_t diff[4]) {
void aptXHD_QMF_analysis(aptXHD_QMF_analyzer_100 * restrict qmf, const int32_t samples[restrict 4],
const int32_t refs[restrict 4], int32_t diff[restrict 4]) {

int32_t a, b, c, d;
int32_t tmp[4];

qmf->outer[0][qmf->i_outer + 0] = samples[0];
qmf->outer[0][qmf->i_outer + 16] = samples[0];
Expand Down Expand Up @@ -96,12 +97,12 @@ void aptXHD_QMF_analysis(aptXHD_QMF_analyzer_100 * qmf, const int32_t samples[4]

qmf->i_inner = (qmf->i_inner + 1) % 16;

aptXHD_QMF_conv_inner(&qmf->inner[2][qmf->i_inner + 15], &qmf->inner[0][qmf->i_inner], &diff[0], &diff[1]);
aptXHD_QMF_conv_inner(&qmf->inner[2][qmf->i_inner + 15], &qmf->inner[0][qmf->i_inner], &tmp[0], &tmp[1]);

aptXHD_QMF_conv_inner(&qmf->inner[1][qmf->i_inner + 15], &qmf->inner[3][qmf->i_inner], &diff[2], &diff[3]);
aptXHD_QMF_conv_inner(&qmf->inner[1][qmf->i_inner + 15], &qmf->inner[3][qmf->i_inner], &tmp[2], &tmp[3]);

for (size_t i = 0; i < 4; i++)
diff[i] -= refs[i];
diff[i] = tmp[i] - refs[i];
for (size_t i = 0; i < 4; i++)
clamp_int24_t(diff[i]);
}
6 changes: 3 additions & 3 deletions src/aptxhd100/quantizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ static void aptXHD_quantize_difference(int32_t diff, int32_t dither, int32_t qua
int absdiff = abs32(diff);
clamp_int24_t(absdiff);

int64_t v3 = v2 * 16 * (int64_t)(quant * -256);
q->unk3 = rshift3((v3 >> 32) + absdiff);
int32_t v3 = rshift32((int64_t)(v2 << 4) * (quant * -1 << 8)) + absdiff;
q->unk3 = ((v3 + 4) >> 3) - ((uint8_t)(v3 << 5) == 0x80);

if (absdiff + (v3 >> 32) < 0) {
if (q->unk3 < 0) {
q->unk2 = q->unk1;
q->unk1 = q->unk1 - 1;
q->unk3 = -q->unk3;
Expand Down
3 changes: 1 addition & 2 deletions src/aptxhd100/search.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ static size_t aptXHD_search_quant_coeff(uint32_t a, int32_t x, const int32_t * d
int64_t aa = (int64_t)a << 32;
int64_t xx = x << 8;
size_t i = 0;
size_t n;

for (n = size / 2; n > 0; n /= 2)
for (size_t n = size / 2; n > 0; n /= 2)
if (xx * data[i + n] <= aa)
i += n;

Expand Down
6 changes: 3 additions & 3 deletions test/heval-hd100.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ static int eval_init(size_t nloops, bool errstop) {
aptxhdbtenc_init(&enc_100, endian);
aptXHD_init(&enc_new, endian);

int c, b, ret = 0;
for (c = 0; c < APTXHD_CHANNELS; c++)
for (b = 0; b < APTXHD_SUBBANDS; b++) {
int ret = 0;
for (size_t c = 0; c < APTXHD_CHANNELS; c++)
for (size_t b = 0; b < APTXHD_SUBBANDS; b++) {
for (size_t i = 0; i < param_sizes[b]; i++)
ret |= diffint("bit16", enc_new.encoder[c].processor[b].inverter.subband_param_bit16_sl1[i],
enc_100.encoder[c].processor[b].inverter.subband_param_bit16_sl1[i]);
Expand Down

0 comments on commit 1ecfc8c

Please sign in to comment.