From d2e3e0daf588ae49aaa64f7c4e5fcd0d3f4d0812 Mon Sep 17 00:00:00 2001 From: Simon Sievert Date: Sun, 5 Jan 2025 15:45:00 +0100 Subject: [PATCH] Add support for decoding the Quite Ok Audio Format (QOA) --- ChangeLog | 1 + Makefile | 5 + libs/audio/qoa/jswrap_qoa.c | 209 ++++++++++++++++++++++++++++++++ libs/audio/qoa/jswrap_qoa.h | 24 ++++ libs/audio/qoa/qoa.c | 233 ++++++++++++++++++++++++++++++++++++ libs/audio/qoa/qoa.h | 43 +++++++ 6 files changed, 515 insertions(+) create mode 100644 libs/audio/qoa/jswrap_qoa.c create mode 100644 libs/audio/qoa/jswrap_qoa.h create mode 100644 libs/audio/qoa/qoa.c create mode 100644 libs/audio/qoa/qoa.h diff --git a/ChangeLog b/ChangeLog index e35e6109f..1016e8b92 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,5 @@ : nRF5x: ensure TIMER1_IRQHandler doesn't always wake idle loop up (fix #1900) + Add support for decoding the Quite Ok Audio Format (QOA) 2v25 : ESP32C3: Get analogRead working correctly Graphics: Adjust image alignment when rotating images to avoid cropping (fix #2535) diff --git a/Makefile b/Makefile index bd4d33497..2b005a6e4 100755 --- a/Makefile +++ b/Makefile @@ -685,6 +685,11 @@ ifeq ($(USE_JIT),1) SOURCES += src/jsjit.c src/jsjitc.c endif +ifeq ($(USE_QOA),1) + INCLUDE += -I$(ROOT)/libs/audio/qoa + WRAPPERSOURCES += libs/audio/qoa/jswrap_qoa.c + SOURCES += libs/audio/qoa/qoa.c +endif endif # BOOTLOADER ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ DON'T USE STUFF ABOVE IN BOOTLOADER diff --git a/libs/audio/qoa/jswrap_qoa.c b/libs/audio/qoa/jswrap_qoa.c new file mode 100644 index 000000000..67023fe10 --- /dev/null +++ b/libs/audio/qoa/jswrap_qoa.c @@ -0,0 +1,209 @@ +/* + * This file is part of Espruino, a JavaScript interpreter for Microcontrollers + * + * Copyright (C) 2013 Gordon Williams + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * ---------------------------------------------------------------------------- + * JavaScript methods for working with the Quite Ok Audio Format (QOA) + * ---------------------------------------------------------------------------- + */ + +#include "jswrap_qoa.h" +#include "qoa.h" +#include "jsinteractive.h" + +/*JSON{ + "type" : "class", + "class" : "QOA" +} +Class for working with the [Quite Ok Audio Format (QOA)](https://qoaformat.org). + +QOA does reasonably fast lossy audio compression at 3.2 bits per sample. + +You can use the [reference encoder](https://github.com/phoboslab/qoa) to encode some audio, +decode that audio on-device and then play decoded audio with the `Waveform` class. +*/ + +/*JSON{ + "type" : "staticmethod", + "class" : "QOA", + "name" : "initDecode", + "ifndef" : "SAVE_ON_FLASH", + "generate" : "jswrap_qoa_init_decode", + "params" : [ + ["header","JsVar","an arraybuffer containing (at least) the header of the qoa file"] + ], + "return" : ["JsVar","an object {firstFramePos, qoaState} containing the first frame pos as an integer and the qoa decoder state as an ArrayBuffer, or null on error"] +} +Initialise a QOA decoding context. +*/ +JsVar *jswrap_qoa_init_decode(JsVar *header) { + if (!jsvIsArrayBuffer(header)) { + jsiConsolePrint("error: header is not an arraybuffer\n"); + return NULL; + } + unsigned char headerBuf[QOA_MIN_FILESIZE]; { + JsVar *headerStr = jsvGetArrayBufferBackingString(header, NULL); + size_t readChars = jsvGetStringChars(headerStr, 0, headerBuf, QOA_MIN_FILESIZE); + jsvUnLock(headerStr); + if (readChars < QOA_MIN_FILESIZE) { + jsiConsolePrint("error: not enough data read from header\n"); + return NULL; + } + } + qoa_desc *qoa = NULL; + JsVar *qoaStateBuf = jsvNewArrayBufferWithPtr(sizeof(qoa_desc), (char **) &qoa); + uint32_t firstFramePos = qoa_decode_header(headerBuf, QOA_MIN_FILESIZE, qoa); + if (!firstFramePos) { + jsvUnLock(qoaStateBuf); + jsiConsolePrint("error: failed to decode QOA header\n"); + return NULL; + } + if (qoa->channels > 1) { + jsvUnLock(qoaStateBuf); + jsiConsolePrint("error: too many channels; only single channel audio is supported at the moment\n"); + return NULL; + } + JsVar *result = jsvNewObject(); + jsvAddNamedChildAndUnLock(result, jsvNewFromInteger((JsVarInt) firstFramePos), "firstFramePos"); + jsvAddNamedChildAndUnLock(result, qoaStateBuf, "qoaState"); + return result; +} + +/*JSON{ + "type" : "staticproperty", + "class" : "QOA", + "name" : "FRAME_LEN", + "generate" : "jswrap_qoa_frame_len", + "return" : ["JsVar","Number of samples in a frame of decoded audio."] +}*/ +JsVar *jswrap_qoa_frame_len() { + return jsvNewFromInteger(QOA_FRAME_LEN); +} + +/*JSON{ + "type" : "staticmethod", + "class" : "QOA", + "name" : "decode", + "ifndef" : "SAVE_ON_FLASH", + "generate" : "jswrap_qoa_decode", + "params" : [ + ["encoded","JsVar","an ArrayBuffer containing some frames of a qoa file"], + ["decoded","JsVar","an ArrayBuffer to write decoded samples into"], + ["options","JsVar","an object containing {offset, qoaState, bits, fill}; see below"] + ], + "return" : ["JsVar","an object with two integers: {frameLen, frameSize}, or null on error"] +} + +Decode some QOA encoded audio samples. + +Data is read from the "encoded" buffer and written to the "decoded" buffer. +Note that the "decoded" buffer needs to be at least `QOA.FRAME_LEN` long and match the number of bits per sample. + +Decoded samples are unsigned integers and take up either 8 or 16 bits, depending on the `bits` argument. +The default is 16 bits per sample, which is what QOA usually decodes to. +But you may choose 8 bits per sample if you want to save some RAM. + +The options object must contain: +- offset: where to start reading from the encoded data buffer +- qoaState: an ArrayBuffer containing the qoa decoder state +And can optionally contain: +- bits: number of bits per sample to write to the decoded data buffer, either 8 or 16 (default is 16) +- fill: bool; true to fill remaining decoded data buffer with silence +*/ +JsVar *jswrap_qoa_decode(JsVar *encoded, JsVar *decoded, JsVar *options) { + bool use16Bit = true; + bool fillRestOfBuffer = false; + int offsetVal = 0; { + JsVar *offset = jsvObjectGetChildIfExists(options, "offset"); + if (jsvIsIntegerish(offset)) { + offsetVal = jsvGetInteger(offset); + } else { + jsiConsolePrint("error: options must contain a numeric \"offset\"\n"); + jsvUnLock(offset); + return NULL; + } + jsvUnLock(offset); + } { + JsVar *bits = jsvObjectGetChildIfExists(options, "bits"); + if (jsvIsIntegerish(bits)) { + int bitsVal = jsvGetInteger(bits); + if (bitsVal == 8) { + use16Bit = false; + } else if (bitsVal == 16) { + use16Bit = true; + } else { + jsiConsolePrint("error: bits must be either 8 or 16\n"); + jsvUnLock(bits); + return NULL; + } + } + jsvUnLock(bits); + } { + JsVar *fill = jsvObjectGetChildIfExists(options, "fill"); + if (jsvGetBool(fill)) { + fillRestOfBuffer = true; + } + jsvUnLock(fill); + } { + uint32_t decodedBufStrOffset = 0; + JsVar *decodedBufStr = jsvGetArrayBufferBackingString(decoded, &decodedBufStrOffset); + size_t decodedBufLength = jsvGetStringLength(decodedBufStr) - decodedBufStrOffset; + size_t minNeededDecodedBufLength = QOA_FRAME_LEN * (use16Bit ? 2 : 1); + jsvUnLock(decodedBufStr); + if (decodedBufLength < minNeededDecodedBufLength) { + jsiConsolePrintf("error: encoded data buffer not big enough; must be at least %d samples long\n", QOA_FRAME_LEN); + return NULL; + } + } + JsVar *qoaState = jsvObjectGetChildIfExists(options, "qoaState"); + if (qoaState == NULL) { + jsiConsolePrint("error: options must contain \"qoaState\"\n"); + jsvUnLock(qoaState); + return NULL; + } + uint32_t qoaStateStrOffset = 0; + JsVar *qoaStateStr = jsvGetArrayBufferBackingString(qoaState, &qoaStateStrOffset); + if (!jsvIsFlatString(qoaStateStr)) { + jsiConsolePrintf("error: qoaState isn't backed by a contiguous area of memory\n"); + jsvUnLock2(qoaState, qoaStateStr); + return NULL; + } { + size_t qoaStateLength = jsvGetStringLength(qoaStateStr) - qoaStateStrOffset; + if (qoaStateLength != sizeof(qoa_desc)) { + jsvUnLock2(qoaState, qoaStateStr); + jsiConsolePrintf("error: qoaState has wrong size (wanted: %d given: %d)\n", sizeof(qoa_desc), qoaStateLength); + return NULL; + } + } + qoa_desc *qoa = (qoa_desc *) (jsvGetFlatStringPointer(qoaStateStr) + qoaStateStrOffset); + JsVar *encodedStr = jsvGetArrayBufferBackingString(encoded, NULL); + static const size_t frameSize = QOA_FRAME_SIZE(1, QOA_SLICES_PER_FRAME); + unsigned char encodedData[frameSize]; + size_t readChars = jsvGetStringChars(encodedStr, offsetVal, encodedData, frameSize); + jsvUnLock(encodedStr); + unsigned int frameLen; + JsvArrayBufferIterator it; + jsvArrayBufferIteratorNew(&it, decoded, 0); + unsigned int encodedFrameSize = qoa_decode_frame(encodedData, readChars, qoa, &(it.it), &frameLen, use16Bit); + if (fillRestOfBuffer) { + while (jsvStringIteratorHasChar(&(it.it))) { + if (use16Bit) { + jsvStringIteratorSetCharAndNext(&(it.it), 0); + jsvStringIteratorSetCharAndNext(&(it.it), 1 << 7); + } else { + jsvStringIteratorSetCharAndNext(&(it.it), 1 << 7); + } + } + } + jsvArrayBufferIteratorFree(&it); + jsvUnLock2(qoaState, qoaStateStr); + JsVar *result = jsvNewObject(); + jsvAddNamedChildAndUnLock(result, jsvNewFromInteger((JsVarInt) frameLen), "frameLen"); + jsvAddNamedChildAndUnLock(result, jsvNewFromInteger((JsVarInt) encodedFrameSize), "frameSize"); + return result; +} diff --git a/libs/audio/qoa/jswrap_qoa.h b/libs/audio/qoa/jswrap_qoa.h new file mode 100644 index 000000000..78b9ffd64 --- /dev/null +++ b/libs/audio/qoa/jswrap_qoa.h @@ -0,0 +1,24 @@ +/* + * This file is part of Espruino, a JavaScript interpreter for Microcontrollers + * + * Copyright (C) 2013 Gordon Williams + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * ---------------------------------------------------------------------------- + * JavaScript methods for working with the Quite Ok Audio Format (QOA) + * ---------------------------------------------------------------------------- + */ + +#ifndef JSWRAP_QOA_H_ +#define JSWRAP_QOA_H_ + +#include "jsvar.h" + +JsVar *jswrap_qoa_frame_len(); +JsVar *jswrap_qoa_init_decode(JsVar *header); +JsVar *jswrap_qoa_decode(JsVar *encoded, JsVar *decoded, JsVar *options); + +#endif //JSWRAP_QOA_H_ diff --git a/libs/audio/qoa/qoa.c b/libs/audio/qoa/qoa.c new file mode 100644 index 000000000..904667ae1 --- /dev/null +++ b/libs/audio/qoa/qoa.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org + * SPDX-License-Identifier: MIT + * + * QOA - The "Quite OK Audio" format for fast, lossy audio compression + */ + +#include "qoa.h" + + +/* ----------------------------------------------------------------------------- + Implementation */ + +typedef unsigned long long qoa_uint64_t; + +/* The dequant_tab maps each of the scalefactors and quantized residuals to +their unscaled & dequantized version. + +Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4 +instead of 1. The dequant_tab assumes the following dequantized values for each +of the quant_tab indices and is computed as: +float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7}; +dequant_tab[s][q] <- round_ties_away_from_zero(scalefactor_tab[s] * dqt[q]) + +The rounding employed here is "to nearest, ties away from zero", i.e. positive +and negative values are treated symmetrically. +*/ + +// note: this is a modified version that uses a quarter of the storage the original one used + +static const int16_t qoa_dequant_tab[16][4] = { + { 1, 3, 5, 7}, + { 5, 18, 32, 49}, + { 16, 53, 95, 147}, + { 34, 113, 203, 315}, + { 63, 210, 378, 588}, + { 104, 345, 621, 966}, + { 158, 528, 950, 1477}, + { 228, 760, 1368, 2128}, + { 316, 1053, 1895, 2947}, + { 422, 1405, 2529, 3934}, + { 548, 1828, 3290, 5117}, + { 696, 2320, 4176, 6496}, + { 868, 2893, 5207, 8099}, + {1064, 3548, 6386, 9933}, + {1286, 4288, 7718, 12005}, + {1536, 5120, 9216, 14336}, +}; + + +/* The Least Mean Squares Filter is the heart of QOA. It predicts the next +sample based on the previous 4 reconstructed samples. It does so by continuously +adjusting 4 weights based on the residual of the previous prediction. + +The next sample is predicted as the sum of (weight[i] * history[i]). + +The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or +subtracts the residual to each weight, based on the corresponding sample from +the history. This, surprisingly, is sufficient to get worthwhile predictions. + +This is all done with fixed point integers. Hence the right-shifts when updating +the weights and calculating the prediction. */ + +static int qoa_lms_predict(qoa_lms_t *lms) { + int prediction = 0; + for (int i = 0; i < QOA_LMS_LEN; i++) { + prediction += lms->weights[i] * lms->history[i]; + } + return prediction >> 13; +} + +static void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) { + int delta = residual >> 4; + for (int i = 0; i < QOA_LMS_LEN; i++) { + lms->weights[i] += lms->history[i] < 0 ? -delta : delta; + } + + for (int i = 0; i < QOA_LMS_LEN - 1; i++) { + lms->history[i] = lms->history[i + 1]; + } + lms->history[QOA_LMS_LEN - 1] = sample; +} + +static inline int qoa_clamp(int v, int min, int max) { + if (v < min) { return min; } + if (v > max) { return max; } + return v; +} + +/* This specialized clamp function for the signed 16 bit range improves decode +performance quite a bit. The extra if() statement works nicely with the CPUs +branch prediction as this branch is rarely taken. */ + +static inline int qoa_clamp_s16(int v) { + if ((unsigned int) (v + 32768) > 65535) { + if (v < -32768) { return -32768; } + if (v > 32767) { return 32767; } + } + return v; +} + +static inline qoa_uint64_t qoa_read_u64(const unsigned char *bytes, unsigned int *p) { + bytes += *p; + *p += 8; + return + ((qoa_uint64_t) (bytes[0]) << 56) | ((qoa_uint64_t) (bytes[1]) << 48) | + ((qoa_uint64_t) (bytes[2]) << 40) | ((qoa_uint64_t) (bytes[3]) << 32) | + ((qoa_uint64_t) (bytes[4]) << 24) | ((qoa_uint64_t) (bytes[5]) << 16) | + ((qoa_uint64_t) (bytes[6]) << 8) | ((qoa_uint64_t) (bytes[7]) << 0); +} + + +/* ----------------------------------------------------------------------------- + Decoder */ + +unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa) { + unsigned int p = 0; + if (size < QOA_MIN_FILESIZE) { + return 0; + } + + + /* Read the file header, verify the magic number ('qoaf') and read the + total number of samples. */ + qoa_uint64_t file_header = qoa_read_u64(bytes, &p); + + if ((file_header >> 32) != QOA_MAGIC) { + return 0; + } + + qoa->samples = file_header & 0xffffffff; + if (!qoa->samples) { + return 0; + } + + /* Peek into the first frame header to get the number of channels and + the samplerate. */ + qoa_uint64_t frame_header = qoa_read_u64(bytes, &p); + qoa->channels = (frame_header >> 56) & 0x0000ff; + qoa->samplerate = (frame_header >> 32) & 0xffffff; + + if (qoa->channels == 0 || qoa->samples == 0 || qoa->samplerate == 0) { + return 0; + } + + return 8; +} + +unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, + JsvStringIterator *sample_data_it, unsigned int *frame_len, bool use16Bit) { + unsigned int p = 0; + *frame_len = 0; + + if (size < 8 + QOA_LMS_LEN * 4 * qoa->channels) { + return 0; + } + + /* Read and verify the frame header */ + qoa_uint64_t frame_header = qoa_read_u64(bytes, &p); + unsigned int channels = (frame_header >> 56) & 0x0000ff; + unsigned int samplerate = (frame_header >> 32) & 0xffffff; + unsigned int samples = (frame_header >> 16) & 0x00ffff; + unsigned int frame_size = (frame_header) & 0x00ffff; + + unsigned int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels; + unsigned int num_slices = data_size / 8; + unsigned int max_total_samples = num_slices * QOA_SLICE_LEN; + + if ( + channels != qoa->channels || + samplerate != qoa->samplerate || + frame_size > size || + samples * channels > max_total_samples + ) { + return 0; + } + + + /* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */ + for (unsigned int c = 0; c < channels; c++) { + qoa_uint64_t history = qoa_read_u64(bytes, &p); + qoa_uint64_t weights = qoa_read_u64(bytes, &p); + for (int i = 0; i < QOA_LMS_LEN; i++) { + qoa->lms[c].history[i] = ((signed short) (history >> 48)); + history <<= 16; + qoa->lms[c].weights[i] = ((signed short) (weights >> 48)); + weights <<= 16; + } + } + + + /* Decode all slices for all channels in this frame */ + for (unsigned int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) { + for (unsigned int c = 0; c < channels; c++) { + qoa_uint64_t slice = qoa_read_u64(bytes, &p); + + int scalefactor = (slice >> 60) & 0xf; + slice <<= 4; + + int slice_start = sample_index * channels + c; + int slice_end = qoa_clamp(sample_index + QOA_SLICE_LEN, 0, samples) * channels + c; + + for (int si = slice_start; si < slice_end; si += channels) { + int predicted = qoa_lms_predict(&qoa->lms[c]); + int quantized = (slice >> 61) & 0x7; + int dequantized = qoa_dequant_tab[scalefactor][quantized / 2]; + if (quantized % 2 == 1) { + dequantized = -dequantized; + } + int reconstructed = qoa_clamp_s16(predicted + dequantized); + + if (use16Bit) { + uint16_t value = (uint16_t) reconstructed ^ 0x8000; + for (size_t j = 0; j < 2; j++) { + uint8_t setValue = ((value >> (j * 8)) & 0xff); + jsvStringIteratorSetChar(sample_data_it, (char) (setValue)); + jsvStringIteratorNext(sample_data_it); + } + } else { + uint8_t value = ((uint16_t) reconstructed ^ 0x8000) >> 8; + jsvStringIteratorSetChar(sample_data_it, (char) value); + jsvStringIteratorNext(sample_data_it); + } + slice <<= 3; + + qoa_lms_update(&qoa->lms[c], reconstructed, dequantized); + } + } + } + + *frame_len = samples; + return p; +} diff --git a/libs/audio/qoa/qoa.h b/libs/audio/qoa/qoa.h new file mode 100644 index 000000000..cacd848cb --- /dev/null +++ b/libs/audio/qoa/qoa.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org + * SPDX-License-Identifier: MIT + * + * QOA - The "Quite OK Audio" format for fast, lossy audio compression + */ + +#ifndef QOA_H +#define QOA_H + +#include "jsvar.h" +#include "jsvariterator.h" + +#define QOA_MIN_FILESIZE 16 +#define QOA_MAX_CHANNELS 8 + +#define QOA_SLICE_LEN 20 +#define QOA_SLICES_PER_FRAME 256 +#define QOA_FRAME_LEN (QOA_SLICES_PER_FRAME * QOA_SLICE_LEN) +#define QOA_LMS_LEN 4 +#define QOA_MAGIC 0x716f6166 /* 'qoaf' */ + +#define QOA_FRAME_SIZE(channels, slices) \ +(8 + QOA_LMS_LEN * 4 * channels + 8 * slices * channels) + +typedef struct { + int history[QOA_LMS_LEN]; + int weights[QOA_LMS_LEN]; +} qoa_lms_t; + +typedef struct { + unsigned int channels; + unsigned int samplerate; + unsigned int samples; + qoa_lms_t lms[QOA_MAX_CHANNELS]; +} qoa_desc; + +unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa); + +unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, + JsvStringIterator *sample_data_it, unsigned int *frame_len, bool use16Bit); + +#endif //QOA_H