From b2ba04ed420ca0a92049f7c6356e4e13745db74c Mon Sep 17 00:00:00 2001 From: Shuichi Takano Date: Sat, 31 Aug 2019 13:13:02 +0900 Subject: [PATCH] =?UTF-8?q?=E3=83=93=E3=83=87=E3=82=AA=E5=87=BA=E5=8A=9B?= =?UTF-8?q?=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 4 + audio.cpp | 2 +- chrono.cpp | 47 +++++ chrono.h | 28 +++ fmsx_interface.c | 47 ++++- fmsx_interface.h | 8 + lcd.cpp | 61 +----- main.cpp | 36 ++-- spi_dma.cpp | 97 +++++++++ spi_dma.h | 56 +++++ video.cpp | 517 +++++++++++++++++++++++++++++++++++++++++++++++ video.h | 134 ++++++++++++ video_out.cpp | 171 ++++++++++++++++ video_out.h | 28 +++ 14 files changed, 1153 insertions(+), 83 deletions(-) create mode 100644 chrono.cpp create mode 100644 chrono.h create mode 100644 spi_dma.cpp create mode 100644 spi_dma.h create mode 100644 video.cpp create mode 100644 video.h create mode 100644 video_out.cpp create mode 100644 video_out.h diff --git a/CMakeLists.txt b/CMakeLists.txt index dc73278..deda490 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,10 @@ add_source_files( std_file.cpp lcd.cpp audio.cpp + spi_dma.cpp + video.cpp + video_out.cpp + chrono.cpp ) include(${SDK_ROOT}/cmake/executable.cmake) diff --git a/audio.cpp b/audio.cpp index 7687818..7cec103 100644 --- a/audio.cpp +++ b/audio.cpp @@ -109,7 +109,7 @@ void play() play(); return 0; }; - irq.priority = 1; + irq.priority = 2; active_ = true; i2s_handle_data_dma(I2S_DEVICE_0, data, &irq); } diff --git a/chrono.cpp b/chrono.cpp new file mode 100644 index 0000000..de22d8d --- /dev/null +++ b/chrono.cpp @@ -0,0 +1,47 @@ +/* + * author : Shuichi TAKANO + * since : Thu Aug 29 2019 1:19:5 + */ + +#include "chrono.h" +#include // read_cycle() +#include +#include + +namespace +{ + +constexpr int CORE_COUNT = 2; + +uint64_t cpuClock_ = 0; +clock_t prevClock_[CORE_COUNT]; + +} // namespace + +void initChrono() +{ + cpuClock_ = sysctl_clock_get_freq(SYSCTL_CLOCK_CPU); + prevClock_[0] = prevClock_[1] = read_cycle(); + + printf("cpu clock: %d\n", (int)cpuClock_); +} + +clock_t getClockCounter() +{ + return read_cycle(); +} + +uint32_t clockToMicroSec(clock_t v) +{ + return v * 1000000 / cpuClock_; +} + +uint32_t getTickTimeInMicroSec() +{ + uint64_t core = current_coreid(); + auto prev = prevClock_[core]; + auto clk = read_cycle(); + auto r = clockToMicroSec(clk - prev); + prevClock_[core] = clk; + return r; +} diff --git a/chrono.h b/chrono.h new file mode 100644 index 0000000..327ba2d --- /dev/null +++ b/chrono.h @@ -0,0 +1,28 @@ +/* + * author : Shuichi TAKANO + * since : Thu Aug 29 2019 1:18:1 + */ +#ifndef _69B08F22_6134_1654_1202_ECAB715778AD +#define _69B08F22_6134_1654_1202_ECAB715778AD + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + typedef uint64_t clock_t; + + void initChrono(); + + clock_t getClockCounter(); + uint32_t clockToMicroSec(clock_t v); + + uint32_t getTickTimeInMicroSec(); + +#ifdef __cplusplus +} +#endif + +#endif /* _69B08F22_6134_1654_1202_ECAB715778AD */ diff --git a/fmsx_interface.c b/fmsx_interface.c index cb26d5b..f06ff96 100644 --- a/fmsx_interface.c +++ b/fmsx_interface.c @@ -12,6 +12,7 @@ #include #include "lcd.h" #include "audio.h" +#include "video_out.h" #include #include // read_cycle() @@ -19,6 +20,12 @@ #define WIDTH 272 /* Buffer width */ #define HEIGHT 228 /* Buffer height */ +static VideoOutMode videoOutMode = VIDEOOUTMODE_LCD; +void setFMSXVideoOutMode(VideoOutMode m) +{ + videoOutMode = m; +} + static Image NormScreen; static Image WideScreen; static pixel *WBuf; @@ -115,7 +122,8 @@ int InitMachine(void) InitSound(44100, 150); int SndSwitch = (1 << MAXCHANNELS) - 1; - int SndVolume = 2; + // int SndVolume = 2; + int SndVolume = 100; SetChannels(SndVolume, SndSwitch); return 1; @@ -149,21 +157,44 @@ int ShowVideo(void) { const Image *img = VideoImg; if (!img) + { return 1; + } - while (1) + if (videoOutMode == VIDEOOUTMODE_LCD) + { + while (1) + { + uint64_t cy = read_cycle(); + uint64_t delta = cy - prevCycle_; + if (delta >= frameCycles_) + { + prevCycle_ = cy; + printf("%d%%\n", (int)(delta * 100 / frameCycles_)); + break; + } + } + lcdDrawHScaleImage(0, (240 - img->H) >> 1, 320, + img->W, img->H, img->L, img->Data); + } + else { + waitVBlank(); + uint64_t cy = read_cycle(); uint64_t delta = cy - prevCycle_; - if (delta >= frameCycles_) + prevCycle_ = cy; + + if (img->W == WIDTH) + { + setVideoImagex4(img->W, img->H, img->L, img->Data); + } + else { - prevCycle_ = cy; - printf("%d%%\n", (int)(delta * 100 / frameCycles_)); - break; + setVideoImagex2(img->W, img->H, img->L, img->Data); } + printf("%d%%\n", (int)(delta * 100 / frameCycles_)); } - lcdDrawHScaleImage(0, (240 - img->H) >> 1, 320, - img->W, img->H, img->L, img->Data); return 1; } diff --git a/fmsx_interface.h b/fmsx_interface.h index 5399f67..237dc89 100644 --- a/fmsx_interface.h +++ b/fmsx_interface.h @@ -10,6 +10,14 @@ extern "C" { #endif + typedef enum + { + VIDEOOUTMODE_LCD, + VIDEOOUTMODE_COMPOSITE_VIDEO, + } VideoOutMode; + + void setFMSXVideoOutMode(VideoOutMode m); + int start_fMSX(); #ifdef __cplusplus diff --git a/lcd.cpp b/lcd.cpp index 49e889d..e1b82de 100644 --- a/lcd.cpp +++ b/lcd.cpp @@ -6,10 +6,7 @@ #include "lcd.h" #include #include - -#include -#include -#include +#include "spi_dma.h" namespace { @@ -122,6 +119,8 @@ void LCD::init(spi_device_num_t spi_num, gpio_set_drive_mode(dcx, GPIO_DM_OUTPUT); gpio_set_pin(gpioDCX_, GPIO_PV_HIGH); + SPIDMA::instance().init(spiNum_, dmaCh_, ss_pin, ss, 1 /* prio */); + if (rst >= 0) { fpioa_set_function(rst_pin, (fpioa_function_t)(FUNC_GPIO0 + rst)); @@ -286,6 +285,7 @@ void LCD::drawHScaleImage(int dx, int dy, int dw, int sw, int h, void LCD::writeCommand(uint8_t cmd) { + SPIDMA::instance().waitDone(); setDCXControl(); spi_init(spiNum_, SPI_WORK_MODE_0, SPI_FF_OCTAL, 8, 0); spi_init_non_standard(spiNum_, 8 /*instrction length*/, 0 /*address length*/, 0 /*wait cycles*/, @@ -295,6 +295,7 @@ void LCD::writeCommand(uint8_t cmd) void LCD::writeByte(const uint8_t *data_buf, uint32_t length) { + SPIDMA::instance().waitDone(); setDCXData(); spi_init(spiNum_, SPI_WORK_MODE_0, SPI_FF_OCTAL, 8, 0); spi_init_non_standard(spiNum_, 8 /*instrction length*/, 0 /*address length*/, 0 /*wait cycles*/, @@ -304,6 +305,7 @@ void LCD::writeByte(const uint8_t *data_buf, uint32_t length) void LCD::writeHalf(const uint16_t *data_buf, uint32_t length) { + SPIDMA::instance().waitDone(); setDCXData(); spi_init(spiNum_, SPI_WORK_MODE_0, SPI_FF_OCTAL, 16, 0); spi_init_non_standard(spiNum_, 16 /*instrction length*/, 0 /*address length*/, 0 /*wait cycles*/, @@ -313,54 +315,11 @@ void LCD::writeHalf(const uint16_t *data_buf, uint32_t length) void LCD::writeWord(const uint32_t *data_buf, uint32_t length) { + auto &dma = SPIDMA::instance(); + dma.waitDone(); setDCXData(); - spi_init(spiNum_, SPI_WORK_MODE_0, SPI_FF_OCTAL, 32, 0); - - spi_init_non_standard(spiNum_, 0 /*instrction length*/, 32 /*address length*/, 0 /*wait cycles*/, - SPI_AITM_AS_FRAME_FORMAT /*spi address trans mode*/); -#if 1 - - // spi_send_data_normal_dma(dmaCh_, spiNum_, cs_, data_buf, length, SPI_TRANS_INT); - - auto spi_handle = (volatile spi_t *)SPI0_BASE_ADDR; - set_bit(&spi_handle->ctrlr0, 3 << 8, SPI_TMOD_TRANS << 8); - spi_handle->dmacr = 0x2; /*enable dma transmit*/ - spi_handle->ssienr = 0x01; - // spi_set_tmod(SPI_CHANNEL, SPI_TMOD_TRANS); - - sysctl_dma_select((sysctl_dma_channel_t)dmaCh_, SYSCTL_DMA_SELECT_SSI0_TX_REQ); - - dmac_set_single_mode(dmaCh_, data_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE, - DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, length); - spi_handle->ser = 1U << cs_; -#else - printf("ts."); - //static volatile bool idle = true; - // while (!idle) - // ; - - dmac_wait_done(dmaCh_); - - spi_data_t spiData; - spiData.tx_channel = dmaCh_; - spiData.fill_mode = false; - spiData.tx_buf = (uint32_t *)data_buf; - spiData.tx_len = length; - spiData.transfer_mode = SPI_TMOD_TRANS; - - plic_interrupt_t ir; - ir.callback = [](void *param) -> int { - printf("trans\n"); - // idle = true; - return 0; - }; - ir.priority = 2; - - //idle = false; - spi_handle_data_dma(spiNum_, cs_, spiData, &ir); - //spi_handle_data_dma(spiNum_, cs_, spiData, nullptr); - printf("te %zd\n", spiData.tx_len); -#endif + dma.resetCallback(); + dma.transferBE(data_buf, length); } void LCD::fillData(const uint32_t *data_buf, uint32_t length) diff --git a/main.cpp b/main.cpp index ca97e16..1815947 100644 --- a/main.cpp +++ b/main.cpp @@ -13,6 +13,8 @@ #include #include #include "audio.h" +#include "video_out.h" +#include "chrono.h" #include #include @@ -84,25 +86,15 @@ void initI2S() gpio_set_pin(AUDIO_ENABLE_GPIONUM, GPIO_PV_HIGH); initAudio(44100); - //initAudio(22050); -} - -int core1_function(void *ctx) -{ - uint64_t core = current_coreid(); - printf("Core %ld Hello world\n", core); - while (1) - ; } int main() { + // auto cpuFreq = sysctl_cpu_set_freq(600000000); sysctl_pll_set_freq(SYSCTL_PLL0, 800000000); - printf("hello.\n"); + // printf("hello.\n"); - uint64_t core = current_coreid(); - printf("Core %ld Hello world\n", core); - register_core1(core1_function, NULL); + initChrono(); sd_test(); @@ -116,20 +108,18 @@ int main() LCD_SCLK_PIN); lcd.setDirection(LCD::DIR_XY_RLDU); - - // lcd.clear(0xfd20); lcd.clear(0); initI2S(); -#if 0 - auto fp = fopen("boot.py_", "rb"); - printf("fp = %p\n", fp); - char buf[1024] = {}; - auto readSize = fread(buf, 1, sizeof(buf), fp); - printf("read size = %d, str = '%s'\n", (int)readSize, buf); - fclose(fp); -#endif + if (1) + { + initVideo(390000000 * 2, 390000000 / 18, 228 * 2, + 272 * 4, 228); + //setInterlaceMode(true); + startVideoTransfer(); + setFMSXVideoOutMode(VIDEOOUTMODE_COMPOSITE_VIDEO); + } start_fMSX(); diff --git a/spi_dma.cpp b/spi_dma.cpp new file mode 100644 index 0000000..2d7c1d2 --- /dev/null +++ b/spi_dma.cpp @@ -0,0 +1,97 @@ +/* + * author : Shuichi TAKANO + * since : Sun Aug 25 2019 15:33:59 + */ + +#include "spi_dma.h" + +#include +#include +#include +#include +#include + +SPIDMA &SPIDMA::instance() +{ + static SPIDMA inst; + return inst; +} + +void SPIDMA::init(spi_device_num_t spiNum, + dmac_channel_number_t dmaCh, + int ssPin, int ss, int prio) +{ + spiNum_ = spiNum; + + volatile spi_t *const spi[4] = + { + (volatile spi_t *)SPI0_BASE_ADDR, + (volatile spi_t *)SPI1_BASE_ADDR, + (volatile spi_t *)SPI_SLAVE_BASE_ADDR, + (volatile spi_t *)SPI3_BASE_ADDR, + }; + spi_ = spi[spiNum]; + cs_ = (spi_chip_select_t)(SPI_CHIP_SELECT_0 + ss); + dmaCh_ = dmaCh; + priority_ = prio; + + fpioa_set_function(ssPin, (fpioa_function_t)(FUNC_SPI0_SS0 + ss)); +} + +void SPIDMA::waitDone() +{ + dmac_wait_done(dmaCh_); +} + +void SPIDMA::waitIdle() +{ + dmac_wait_idle(dmaCh_); +} + +int SPIDMA::irqEntry(void *ctx) +{ + ((SPIDMA *)ctx)->irq(); + return 0; +} + +void SPIDMA::irq() +{ + dmac_irq_unregister(dmaCh_); + +#if 0 + while ((spi_->sr & 0x05) != 0x04) + ; + spi_->ser = 0x00; + spi_->ssienr = 0x00; +#endif + if (callback_) + { + callback_(); + } +} + +void SPIDMA::_setupTransfer() +{ + spi_init(spiNum_, SPI_WORK_MODE_0, SPI_FF_OCTAL, 32, 0); + spi_init_non_standard(spiNum_, 0 /*instrction length*/, 32 /*address length*/, 0 /*wait cycles*/, + SPI_AITM_AS_FRAME_FORMAT /*spi address trans mode*/); + set_bit(&spi_->ctrlr0, 3 << 8, SPI_TMOD_TRANS << 8); + spi_->dmacr = 0x2; /*enable dma transmit*/ + spi_->ssienr = 0x01; + sysctl_dma_select((sysctl_dma_channel_t)dmaCh_, SYSCTL_DMA_SELECT_SSI0_TX_REQ); +} + +void SPIDMA::_transferBE(const uint32_t *data, uint32_t count) +{ + dmac_irq_register(dmaCh_, irqEntry, this, priority_); + dmac_set_single_mode(dmaCh_, data, (void *)(&spi_->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE, + DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, count); + spi_->ser = 1U << cs_; +} + +// Big Endianでデータを転送する +void SPIDMA::transferBE(const uint32_t *data, uint32_t count) +{ + _setupTransfer(); + _transferBE(data, count); +} diff --git a/spi_dma.h b/spi_dma.h new file mode 100644 index 0000000..939b1d6 --- /dev/null +++ b/spi_dma.h @@ -0,0 +1,56 @@ +/* + * author : Shuichi TAKANO + * since : Sun Aug 25 2019 15:39:25 + */ +#ifndef E010AF3D_A134_1650_EAE0_1F6097E2920D +#define E010AF3D_A134_1650_EAE0_1F6097E2920D + +#include +#include + +class SPIDMA +{ + spi_device_num_t spiNum_{}; + volatile spi_t *spi_{}; + spi_chip_select_t cs_; + dmac_channel_number_t dmaCh_; + int priority_; + + std::function callback_; + +public: + void init(spi_device_num_t spiNum, + dmac_channel_number_t dmaCh, + int ssPin, int ss, + int prio); + + void waitDone(); + void waitIdle(); + + spi_device_num_t getSPINum() const + { + return spiNum_; + } + + void setCallback(std::function &&f) + { + callback_ = f; + } + void resetCallback() + { + callback_ = std::function(); + } + + void _setupTransfer(); + void _transferBE(const uint32_t *data, uint32_t count); + + void transferBE(const uint32_t *data, uint32_t count); + + static SPIDMA &instance(); + +private: + static int irqEntry(void *ctx); + void irq(); +}; + +#endif /* E010AF3D_A134_1650_EAE0_1F6097E2920D */ diff --git a/video.cpp b/video.cpp new file mode 100644 index 0000000..b76d1df --- /dev/null +++ b/video.cpp @@ -0,0 +1,517 @@ +/* + * author : Shuichi TAKANO + * since : Mon Aug 12 2019 12:22:58 + */ + +#include "video.h" + +#include +#include +#include +#include + +#include "chrono.h" + +namespace +{ + +void setValue(std::vector &v, int idx, int value) +{ + ((uint8_t *)&v[idx >> 2])[(idx & 3) ^ 3] = value; +} + +} // namespace + +void NTSCEncoder::makeSinTable(float signalScale) +{ + for (int i = 0; i < SIN_TABLE_SIZE; ++i) + { + float t = i * (3.14159265359f * 2 / SIN_TABLE_SIZE); + sinTable_[i] = { + int8_t(sinf(t) * 128 / 2.03f * signalScale), + int8_t(cosf(t) * 128 / 1.14f * signalScale)}; + } +} + +void NTSCEncoder::makeLineSinTable() +{ + lineSinTable_.clear(); + lineSinTable_.reserve(width_ * 2); + for (int i = 0; i < 2; ++i) + { + auto ph = phaseStep24_ * (hStart_ - phaseOrigin_) + getSCPhase(i & 1); + for (int x = 0; x < width_; ++x) + { + const auto &sc = sinTable_[(ph >> (24 - SIN_TABLE_BITS)) & (SIN_TABLE_SIZE - 1)]; + lineSinTable_.push_back(sc); + ph += phaseStep24_; + } + } +} + +void NTSCEncoder::setSize(int w, int h) +{ + width_ = w; + height_ = h; + vStart_ = ((242 - height_) >> 1) + 11; + hStart_ = (((videoHSamples_ - width_) >> 1) + baseHOfs_ + 2) & ~3; + printf("(%d x %d) ofs %d, %d\n", w, h, vStart_, hStart_); + makeLineSinTable(); +} + +void NTSCEncoder::init(uint32_t freq, int targetNumSCPerLine2) +{ + freq_ = freq; + + auto hCycle = (targetNumSCPerLine2 / (2.0f * 3579545)) * freq; + signalStride_ = int(hCycle * 0.25 + 0.5) * 4; + printf("signal freq %d, stride %d\n", freq, signalStride_); + printf("line sc cycle %f\n", (double)signalStride_ / freq * 3579545); + + signalStrideInDW_ = signalStride_ >> 2; + halfLineSignalSizeInDW_ = signalStride_ >> 3; + + // 同期信号 -40IRE + // カラーバースト幅 40IRE + // 白レベル 100IRE + // ピーク 133IRE + auto colorBurstAmp = 256 * 20 / (133 + 40); + pedestalLevel_ = 256 * 40 / (133 + 40); + setupLevel_ = pedestalLevel_; + float signalScale = 100.0f / (133 + 40) * 0.75f; // 75% + signalScale128_ = signalScale * 128.0f; + + makeSinTable(signalScale); + + auto iv = pedestalLevel_ * 0x01010101; + + vsync_[0].resize(9 * signalStrideInDW_, iv); + vsync_[1].resize(10 * signalStrideInDW_ - halfLineSignalSizeInDW_, iv); + + const int signalBufferLines = 255; + video_.resize(signalStrideInDW_ * signalBufferLines, iv); + printf("vsync %zd+%zdbytes, video %zdbytes.\n", vsync_[0].size() * 4, vsync_[1].size() * 4, video_.size() * 4); + + // 信号全体をフロントポーチ(1.5us)分オフセットして考える + // 水平同期4.7us + // 水平同期からカラーバーストまで19sc + // カラーバースト9sc + // 水平同期からセットアップまで9.4us + // 1ライン63.55556us=227.5sc + // 映像期間63.6-10.9=52.7us + // 等価パルス幅2.35us + // 切り込みパルス幅4.7us + + baseHOfs_ = 9.4e-6 * freq_; + videoHSamples_ = 52.7e-6 * freq_; + + int hsyncPulseWidth = 4.7e-6 * freq_; + int equivPulseWidth = hsyncPulseWidth >> 1; + int invSerratioPulseWidth = (signalStride_ >> 1) - hsyncPulseWidth; + + auto makePulse = [&](auto &v, int ofs, int w) { + for (int i = 0; i < w; ++i) + { + setValue(v, i + ofs, 0); + } + }; + auto makeVSync = [&](auto &v, int ofs) { + for (int i = 0; i < 6; ++i) + { + makePulse(v, ofs + (signalStride_ * i >> 1), equivPulseWidth); + makePulse(v, ofs + (signalStride_ * (i + 12) >> 1), equivPulseWidth); + } + for (int i = 0; i < 6; ++i) + { + makePulse(v, ofs + (signalStride_ * (i + 6) >> 1), invSerratioPulseWidth); + } + }; + + makeVSync(vsync_[0], 0); + makeVSync(vsync_[1], (signalStride_ >> 1) - halfLineSignalSizeInDW_ * 4); + +#if 0 + // ライン長からPLL想定 + freqSC_ = 227.5 * freq / signalStride_; +#else + freqSC_ = 3579545; +#endif + + phaseStep24_ = 16777216.0 * freqSC_ / freq; + phaseOffsetPerLine_ = targetNumSCPerLine2 & 1 ? 16777216 / 2 : 0; + + int colorBurstStart = (19.0f / freqSC_) * freq + 0.5f; + int colorBurstWidth = (28.0f / freqSC_) * freq - colorBurstStart; + // auto colorBurstStartPhase24 = phaseStep24_ * colorBurstStart; + + phaseOrigin_ = colorBurstStart; + // phaseOrigin_ = 0; + + printf("colorBurst %d, %d\n", colorBurstStart, colorBurstWidth); + + auto makeHSync = [&](int ofs, bool phase) { + makePulse(video_, ofs, hsyncPulseWidth); + + ofs += colorBurstStart; + // auto ph = colorBurstStartPhase24 + getSCPhase(phase); + auto ph = getSCPhase(phase); // カラーバーストを位相の起点にする + //auto ph = phaseStep24_ * ofs; + for (int i = 0; i < colorBurstWidth; ++i) + { + float phase = (ph & 16777215) * (3.14159265f * 2 / 16777216.0f); + int value = -sinf(phase) * colorBurstAmp; + setValue(video_, ofs, value + pedestalLevel_); + ++ofs; + ph += phaseStep24_; + } + }; + + for (int i = 0; i < signalBufferLines; ++i) + { + makeHSync(i * signalStride_, i & 1); + } +} + +void NTSCEncoder::makeColorBar() +{ + // 75% white, yellow, cyan, green, megenta, red, blue, black + + auto makeLine = [&](int ofs, bool odd) { + ofs += hStart_; + auto ph = phaseStep24_ * (hStart_ - phaseOrigin_) + getSCPhase(odd); + //auto ph = phaseStep24_ * ofs; + auto unitWidth = width_ >> 3; + + uint8_t col[] = {7, 3, 6, 2, 5, 1, 4, 0}; + + for (int i = 0; i < 8; ++i) + { + auto cm = col[i]; + float r = cm & 1 ? signalScale128_ * 2 : 0; + float g = cm & 2 ? signalScale128_ * 2 : 0; + float b = cm & 4 ? signalScale128_ * 2 : 0; + + for (int j = 0; j < unitWidth; ++j) + { + float phase = (ph & 16777215) * (3.14159265f * 2 / 16777216.0f); + float sp = sinf(phase); + float cp = cosf(phase); + + // Y = 0.587G+0.114B+0.299R + // Cb = B-Y = -0.587G+0.886B-0.299R + // Cr = R-Y = -0.587G-0.114B+0.701R + // Cb' = Cb/2.03 + // Cr' = Cr/1.14 + // NTSC = Y + Cb' sin(2 pi f t) +Cr' cos(2 pi f t) + + float y = 0.587f * g + 0.114f * b + 0.299 * r; + float cb = (b - y) * (1 / 2.03f); + float cr = (r - y) * (1 / 1.14f); + int v = y + cb * sp + cr * cp; + setValue(video_, ofs, v + pedestalLevel_); + + ofs += 1; + ph += phaseStep24_; + } + } + }; + + int ofs = 0; + for (int i = 0; i < 255; ++i) + { + makeLine(ofs, i & 1); + ofs += signalStride_; + } +} + +void NTSCEncoder::makeColotLUT(int rBits, int gBits, int bBits, + int rShift, int gShift, int bShift) +{ + int bpp = rBits + gBits + bBits; + int ct = 1 << bpp; + colorLUT_.resize(ct); + + int rMax = 1 << rBits; + int gMax = 1 << gBits; + int bMax = 1 << bBits; + int rMask = (rMax - 1) << rShift; + int gMask = (gMax - 1) << gShift; + int bMask = (bMax - 1) << bShift; + float rNorm = 1.0f / rMask; + float gNorm = 1.0f / gMask; + float bNorm = 1.0f / bMask; + + float yscale = signalScale128_ * (1 << 7); + float ybias = pedestalLevel_ << 6; + + for (int i = 0; i < ct; ++i) + { + float r = (i & rMask) * rNorm; + float g = (i & gMask) * gNorm; + float b = (i & bMask) * bNorm; + + float y = 0.299f * r + 0.587f * g + 0.114f * b; + float cb = b - y; + float cr = r - y; + + colorLUT_[i] = DiffColor{(uint16_t)(y * yscale + ybias), + (int8_t)(cb * (1 << 7)), + (int8_t)(cr * (1 << 7))}; + } +} + +#if 1 + + +void NTSCEncoder::setLinex2(int w, int line, const uint16_t *img) +{ + line += vStart_; + int ofs = hStart_ + line * signalStride_; + assert((ofs & 3) == 0); + auto *p = (uint8_t *)&video_[ofs >> 2]; + auto *sct = getLineSinTable(line); + + auto ct = w; + while (ct > 0) + { + auto c1 = colorLUT_[img[0]]; + auto c2 = colorLUT_[img[1]]; + + int v0 = c1.compute(sct[0]); + int v1 = c1.compute(sct[1]); + int v2 = c2.compute(sct[2]); + int v3 = c2.compute(sct[3]); + + p[0] = v3; + p[1] = v2; + p[2] = v1; + p[3] = v0; + p += 4; + sct += 4; + img += 2; + ct -= 2; + } +} + +void NTSCEncoder::setLinex4(int w, int line, const uint16_t *img) +{ + line += vStart_; + int ofs = hStart_ + line * signalStride_; + assert((ofs & 3) == 0); + auto *p = (uint8_t *)&video_[ofs >> 2]; + auto *sct = getLineSinTable(line); + + auto ct = w; + while (ct > 0) + { + auto c = colorLUT_[img[0]]; + + int v0 = c.compute(sct[0]); + int v1 = c.compute(sct[1]); + int v2 = c.compute(sct[2]); + int v3 = c.compute(sct[3]); + + p[0] = v3; + p[1] = v2; + p[2] = v1; + p[3] = v0; + p += 4; + sct += 4; + img += 1; + ct -= 1; + } +} + +#else + +namespace +{ + +struct YCbCr +{ + int y; // 8:20 + int cb; // 8:13 + int cr; // 8:13 + + inline YCbCr(uint16_t src, int ss128) + { + static constexpr int rcoef = int(0.299f * 2048) << 5; + static constexpr int gcoef = int(0.587f * 1024); + static constexpr int bcoef = int(0.114f * 2048) << 5; + + int r = src >> 11; + int g = src & (63 << 5); + int b = src & 31; + y = rcoef * r + gcoef * g + bcoef * b; + cb = (b << 16) - y; + cr = (r << 16) - y; + y *= ss128; + } + + inline int compute(const NTSCEncoder::SinCos &sc) const + { + return (y + cb * sc.sin_ + cr * sc.cos_) >> 20; + } +}; + +} // namespace + +void NTSCEncoder::setLinex2(int w, int line, const uint16_t *img) +{ + line += vStart_; + int ofs = hStart_ + line * signalStride_; + assert((ofs & 3) == 0); + auto *p = (uint8_t *)&video_[ofs >> 2]; + auto *sct = getLineSinTable(line); + + auto ct = w; + while (ct > 0) + { + YCbCr c1(img[0], signalScale128_); + YCbCr c2(img[1], signalScale128_); + + int v0 = c1.compute(sct[0]) + pedestalLevel_; + int v1 = c1.compute(sct[1]) + pedestalLevel_; + int v2 = c2.compute(sct[2]) + pedestalLevel_; + int v3 = c2.compute(sct[3]) + pedestalLevel_; + + p[0] = v3; + p[1] = v2; + p[2] = v1; + p[3] = v0; + p += 4; + sct += 4; + img += 2; + ct -= 2; + } +} + +void NTSCEncoder::setLinex4(int w, int line, const uint16_t *img) +{ + line += vStart_; + int ofs = hStart_ + line * signalStride_; + assert((ofs & 3) == 0); + auto *p = &video_[ofs >> 2]; + auto *sct = getLineSinTable(line); + + auto ct = w; + while (ct) + { + YCbCr c(*img, signalScale128_); + + int v0 = c.compute(sct[0]) + pedestalLevel_; + int v1 = c.compute(sct[1]) + pedestalLevel_; + int v2 = c.compute(sct[2]) + pedestalLevel_; + int v3 = c.compute(sct[3]) + pedestalLevel_; + + *p = (v0 << 24) | (v1 << 16) | (v2 << 8) | v3; + + ++p; + sct += 4; + ++img; + --ct; + } +} +#endif + +namespace +{ +using WorkerFunc = void (*)(void *); + +void *volatile workerParam_ = nullptr; +volatile WorkerFunc workerFunc_ = nullptr; + +} // namespace + +int worker(void *) +{ + while (1) + { + if (workerFunc_) + { + workerFunc_(workerParam_); + workerFunc_ = nullptr; + } + } +} + +void setWorkload(WorkerFunc f, void *param) +{ + while (workerFunc_) + ; + workerParam_ = param; + workerFunc_ = f; +} + +void startWorker() +{ + register_core1(worker, nullptr); +} + +namespace +{ + +template +void setImageImpl(int w, int h, int pitch, const uint16_t *img, int lineOffset, const F &lineFunc) +{ +#if 0 + int line = lineOffset; + for (int i = 0; i < h; ++i) + { + setLinex4(w, line, img); + ++line; + img += pitch; + } +#else + struct Param + { + const F &func_; + int w, h, pitch, lineOffset; + const uint16_t *img; + volatile bool done = false; + + int proc() + { +// auto prevClk = getClockCounter(); + + int line = lineOffset; + for (int i = 0; i < h; i += 2) + { + func_(w, line, img); + line += 2; + img += pitch * 2; + } + done = true; + +// auto dt = clockToMicroSec(getClockCounter() - prevClk); +// printf("core %d: %dus\n", (int)current_coreid(), dt); + + return 0; + } + }; + + auto p0 = Param{lineFunc, w, h, pitch, lineOffset + 0, img + pitch * 0}; + auto p1 = Param{lineFunc, w, h, pitch, lineOffset + 1, img + pitch * 1}; + + setWorkload([](void *p) { ((Param *)p)->proc(); }, &p1); + p0.proc(); + while (!p1.done) + ; +#endif +} + +} // namespace + +void NTSCEncoder::setImagex2(int w, int h, int pitch, const uint16_t *img, + int lineOffset) +{ + setImageImpl(w, h, pitch, img, lineOffset, + [&](int w, int line, const uint16_t *img) { setLinex2(w, line, img); }); +} + +void NTSCEncoder::setImagex4(int w, int h, int pitch, const uint16_t *img, + int lineOffset) +{ + setImageImpl(w, h, pitch, img, lineOffset, + [&](int w, int line, const uint16_t *img) { setLinex4(w, line, img); }); +} diff --git a/video.h b/video.h new file mode 100644 index 0000000..56869cb --- /dev/null +++ b/video.h @@ -0,0 +1,134 @@ +/* + * author : Shuichi TAKANO + * since : Mon Aug 12 2019 12:15:45 + */ +#ifndef _59D6FE19_2134_1651_B97A_A42DFABF74CA +#define _59D6FE19_2134_1651_B97A_A42DFABF74CA + +#include +#include +#include + +class NTSCEncoder +{ +public: + struct SinCos + { + int8_t sin_; + int8_t cos_; + }; + +public: + void init(uint32_t freq, int targetNumSCPerLine2 = 455); + + void setSize(int w, int h); + void makeColotLUT(int rBits, int gBits, int bBits, + int rShift, int gShift, int bShift); + + void makeColorBar(); + + const std::vector &getVSyncBuffer(bool odd) const + { + return vsync_[odd]; + } + + std::vector &getVideoBuffer() + { + return video_; + } + + int getVideoDataSize(bool odd) const + { + return signalStrideInDW_ * 253 + (odd ? 0 : halfLineSignalSizeInDW_); + } + + int getVideoOffset(bool phase) const + { + return phase ? signalStrideInDW_ : 0; + } + + int getWriteLineOffset(bool phase) const + { + return phase ? 1 : 0; + } + + int getVideoDataSizeNonInterlace(int lines = 262) const + { + return signalStrideInDW_ * (lines - 9); + } + + uint32_t + getSCPhase(bool odd) const + { + return odd ? phaseOffsetPerLine_ : 0; + } + + bool is4Phase() const + { + return phaseOffsetPerLine_; + } + + void setLinex2(int w, int line, const uint16_t *img); + void setLinex4(int w, int line, const uint16_t *img); + + void setImagex2(int w, int h, int pitch, const uint16_t *img, + int lineOffset); + void setImagex4(int w, int h, int pitch, const uint16_t *img, + int lineOffset); + +protected: + void makeSinTable(float signalScale); + void makeLineSinTable(); + + const SinCos *getLineSinTable(int line) + { + return &lineSinTable_[width_ * (line & 1)]; + } + +private: + std::vector vsync_[2]; + std::vector video_; + + uint32_t freq_ = 0; + float freqSC_ = 0; + int signalStride_ = 0; + int signalStrideInDW_ = 0; + int halfLineSignalSizeInDW_ = 0; + uint32_t phaseStep24_ = 0; + uint32_t phaseOffsetPerLine_ = 0; + int phaseOrigin_ = 0; + int baseHOfs_ = 0; + int videoHSamples_ = 0; + + int pedestalLevel_ = 0; + int setupLevel_ = 0; + int colorBurstAmp_ = 0; + int signalScale128_ = 0; + + int width_ = 0; + int height_ = 0; + int vStart_ = 0; + int hStart_ = 0; + + static constexpr int SIN_TABLE_BITS = 8; + static constexpr int SIN_TABLE_SIZE = 1 << SIN_TABLE_BITS; + SinCos sinTable_[SIN_TABLE_SIZE]; + + std::vector lineSinTable_; // x2 line + + struct DiffColor + { + uint16_t y; + int8_t cb, cr; + + inline int compute(const SinCos& sc) const + { + return (y + cb * sc.sin_ + cr * sc.cos_) >> 6; + } + }; + std::vector colorLUT_; +}; + +void startWorker(); + +#endif /* _59D6FE19_2134_1651_B97A_A42DFABF74CA */ \ No newline at end of file diff --git a/video_out.cpp b/video_out.cpp new file mode 100644 index 0000000..d517215 --- /dev/null +++ b/video_out.cpp @@ -0,0 +1,171 @@ +/* + * author : Shuichi TAKANO + * since : Sun Aug 25 2019 17:34:2 + */ + +#include "video_out.h" +#include "video.h" +#include "spi_dma.h" +#include + +namespace +{ + +NTSCEncoder NTSC_; + +enum class Stage +{ + FIELD1_VSYNC = 0, + FIELD1_VIDEO, + FIELD2_VSYNC, + FIELD2_VIDEO, + FIELD3_VSYNC, + FIELD3_VIDEO, + FIELD4_VSYNC, + FIELD4_VIDEO, +}; + +Stage nextStage_{}; +bool isInterlace_ = false; + +volatile bool vblanking_ = false; +volatile int writeLineOffset_ = 0; + +#if 1 + +const uint32_t *nextTransferData_ = nullptr; +uint32_t nextTransferSize_ = 0; + +void transfer() +{ + auto &dma = SPIDMA::instance(); + if (nextTransferData_) + { + dma._transferBE(nextTransferData_, nextTransferSize_); + } + else + { + dma._transferBE(&nextTransferSize_, 1); // dummy + } + + bool is4Phase = NTSC_.is4Phase(); + int field = (int)nextStage_ >> 1; + bool vsync = (((int)nextStage_) & 1) == 0; + bool oddField = field & 1; + bool phase = is4Phase ? ((int)nextStage_ + 1) >> 1 : 0; + + nextStage_ = (Stage)(((int)nextStage_ + 1) & 7); + vblanking_ = !vsync; // 次の転送単位なので反転 + writeLineOffset_ = NTSC_.getWriteLineOffset(phase); + + bool interlaced = isInterlace_ || is4Phase; + + if (vsync) + { + auto &v = NTSC_.getVSyncBuffer(oddField && interlaced); + nextTransferData_ = v.data(); + nextTransferSize_ = v.size(); + } + else + { + auto &v = NTSC_.getVideoBuffer(); + if (interlaced) + { + nextTransferData_ = v.data() + NTSC_.getVideoOffset(phase); + nextTransferSize_ = NTSC_.getVideoDataSize(oddField); + } + else + { + nextTransferData_ = v.data(); + nextTransferSize_ = NTSC_.getVideoDataSizeNonInterlace(); + } + } +} +#else +void transfer() +{ + bool is4Phase = NTSC_.is4Phase(); + auto &dma = SPIDMA::instance(); + int field = (int)nextStage_ >> 1; + bool vsync = (((int)nextStage_) & 1) == 0; + bool oddField = field & 1; + bool phase = is4Phase ? ((int)nextStage_ + 1) >> 1 : 0; + + nextStage_ = (Stage)(((int)nextStage_ + 1) & 7); + vblanking_ = vsync; + writeLineOffset_ = NTSC_.getWriteLineOffset(phase); + + bool interlaced = isInterlace_ || is4Phase; + + if (vsync) + { + auto &v = NTSC_.getVSyncBuffer(oddField && interlaced); + dma._transferBE(v.data(), v.size()); + } + else + { + auto &v = NTSC_.getVideoBuffer(); + if (interlaced) + { + dma._transferBE(v.data() + NTSC_.getVideoOffset(phase), + NTSC_.getVideoDataSize(oddField)); + } + else + { + dma._transferBE(v.data(), + NTSC_.getVideoDataSizeNonInterlace()); + } + } +} +#endif + +} // namespace + +void initVideo(uint32_t pll0Clock, uint32_t dotClock, int scPerLinex2, + int w, int h) +{ + sysctl_pll_set_freq(SYSCTL_PLL0, pll0Clock); + auto realFreq = spi_set_clk_rate(SPIDMA::instance().getSPINum(), + dotClock); + + NTSC_.init(realFreq, scPerLinex2); + NTSC_.setSize(w, h); + // NTSC_.makeColorBar(); + + NTSC_.makeColotLUT(5, 6, 5, 11, 5, 0); + + startWorker(); // かり +} + +void startVideoTransfer() +{ + SPIDMA::instance().setCallback([&] { + transfer(); + }); + + SPIDMA::instance()._setupTransfer(); + + nextStage_ = Stage::FIELD1_VSYNC; + transfer(); +} + +void setInterlaceMode(int f) +{ + isInterlace_ = f; +} + +void waitVBlank() +{ + while (!vblanking_) + ; +} + +void setVideoImagex2(int w, int h, int pitch, const uint16_t *img) +{ + NTSC_.setImagex2(w, h, pitch, img, writeLineOffset_); +} + +void setVideoImagex4(int w, int h, int pitch, const uint16_t *img) +{ + NTSC_.setImagex4(w, h, pitch, img, writeLineOffset_); +} diff --git a/video_out.h b/video_out.h new file mode 100644 index 0000000..4c756ba --- /dev/null +++ b/video_out.h @@ -0,0 +1,28 @@ +/* + * author : Shuichi TAKANO + * since : Sun Aug 25 2019 17:33:29 + */ +#ifndef AD19B26C_F134_1650_1089_D2BD915E1521 +#define AD19B26C_F134_1650_1089_D2BD915E1521 + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + void initVideo(uint32_t pll0Clock, uint32_t dotClock, int scPerLinex2, + int w, int h); + + void startVideoTransfer(); + void setInterlaceMode(int f); + + void waitVBlank(); + void setVideoImagex2(int w, int h, int pitch, const uint16_t *img); + void setVideoImagex4(int w, int h, int pitch, const uint16_t *img); + +#ifdef __cplusplus +} +#endif +#endif /* AD19B26C_F134_1650_1089_D2BD915E1521 */