diff --git a/.changelogrc.js b/.changelogrc.cjs similarity index 100% rename from .changelogrc.js rename to .changelogrc.cjs diff --git a/.commitlintrc.js b/.commitlintrc.cjs similarity index 100% rename from .commitlintrc.js rename to .commitlintrc.cjs diff --git a/.dumirc.ts b/.dumirc.ts index 4e5e797..d957106 100644 --- a/.dumirc.ts +++ b/.dumirc.ts @@ -1,4 +1,5 @@ import { defineConfig } from 'dumi'; +import path from 'node:path'; import { description, homepage, name } from './package.json'; @@ -41,6 +42,9 @@ const themeConfig = { }; export default defineConfig({ + alias: { + '@lobehub/tts/react': path.join(__dirname, './src/react'), + }, apiParser: isProduction ? {} : false, base: '/', define: { @@ -51,11 +55,10 @@ export default defineConfig({ mfsu: isWin ? undefined : {}, npmClient: 'pnpm', publicPath: '/', - resolve: isProduction - ? { - entryFile: './src/index.ts', - } - : undefined, + resolve: { + atomDirs: [{ dir: 'src/react', type: 'component' }], + entryFile: isProduction ? './src/index.ts' : undefined, + }, styles: [ `html, body { background: transparent; } diff --git a/.eslintrc.js b/.eslintrc.cjs similarity index 100% rename from .eslintrc.js rename to .eslintrc.cjs diff --git a/.prettierrc.js b/.prettierrc.cjs similarity index 100% rename from .prettierrc.js rename to .prettierrc.cjs diff --git a/.releaserc.js b/.releaserc.cjs similarity index 100% rename from .releaserc.js rename to .releaserc.cjs diff --git a/.remarkrc.js b/.remarkrc.cjs similarity index 100% rename from .remarkrc.js rename to .remarkrc.cjs diff --git a/CHANGELOG.md b/CHANGELOG.md index 957cd37..1f51e45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,231 @@ # Changelog +## [Version 1.13.0-beta.3](https://github.com/lobehub/lobe-tts/compare/v1.13.0-beta.2...v1.13.0-beta.3) + +Released on **2023-11-15** + +#### ♻ Code Refactoring + +- **misc**: Refactor the demo usage. + +
+ +
+Improvements and Fixes + +#### Code refactoring + +- **misc**: Refactor the demo usage ([802c59a](https://github.com/lobehub/lobe-tts/commit/802c59a)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +## [Version 1.13.0-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.13.0-beta.1...v1.13.0-beta.2) + +Released on **2023-11-15** + +#### ✨ Features + +- **misc**: Add SWR config to hooks. + +
+ +
+Improvements and Fixes + +#### What's improved + +- **misc**: Add SWR config to hooks ([2c49e02](https://github.com/lobehub/lobe-tts/commit/2c49e02)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +## [Version 1.13.0-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.6...v1.13.0-beta.1) + +Released on **2023-11-15** + +#### ✨ Features + +- **misc**: Add class. + +
+ +
+Improvements and Fixes + +#### What's improved + +- **misc**: Add class ([07245e3](https://github.com/lobehub/lobe-tts/commit/07245e3)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.12.1-beta.6](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.5...v1.12.1-beta.6) + +Released on **2023-11-15** + +#### ♻ Code Refactoring + +- **misc**: Refactor to fix build. + +
+ +
+Improvements and Fixes + +#### Code refactoring + +- **misc**: Refactor to fix build ([9858fcb](https://github.com/lobehub/lobe-tts/commit/9858fcb)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.12.1-beta.5](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.4...v1.12.1-beta.5) + +Released on **2023-11-15** + +#### ♻ Code Refactoring + +- **misc**: Refactor to fix build. + +
+ +
+Improvements and Fixes + +#### Code refactoring + +- **misc**: Refactor to fix build ([d875be6](https://github.com/lobehub/lobe-tts/commit/d875be6)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.12.1-beta.4](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.3...v1.12.1-beta.4) + +Released on **2023-11-15** + +#### 🐛 Bug Fixes + +- **misc**: Fix client fetch. + +
+ +
+Improvements and Fixes + +#### What's fixed + +- **misc**: Fix client fetch ([9262608](https://github.com/lobehub/lobe-tts/commit/9262608)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.12.1-beta.3](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.2...v1.12.1-beta.3) + +Released on **2023-11-15** + +#### ♻ Code Refactoring + +- **misc**: Fix react entry. + +
+ +
+Improvements and Fixes + +#### Code refactoring + +- **misc**: Fix react entry ([6a4631d](https://github.com/lobehub/lobe-tts/commit/6a4631d)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.12.1-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.1...v1.12.1-beta.2) + +Released on **2023-11-15** + +#### ♻ Code Refactoring + +- **misc**: 将 react 部分抽取到 /react 子级路径下. + +
+ +
+Improvements and Fixes + +#### Code refactoring + +- **misc**: 将 react 部分抽取到 /react 子级路径下 ([80b24e8](https://github.com/lobehub/lobe-tts/commit/80b24e8)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ +### [Version 1.12.1-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.12.0...v1.12.1-beta.1) + +Released on **2023-11-15** + +#### ♻ Code Refactoring + +- **misc**: Refactor api. + +
+ +
+Improvements and Fixes + +#### Code refactoring + +- **misc**: Refactor api ([a7c0095](https://github.com/lobehub/lobe-tts/commit/a7c0095)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ## [Version 1.12.0](https://github.com/lobehub/lobe-tts/compare/v1.11.1...v1.12.0) Released on **2023-11-13** diff --git a/README.md b/README.md index f1c9683..f01621d 100644 --- a/README.md +++ b/README.md @@ -86,17 +86,17 @@ Click button below to deploy your private plugins' gateway. This project provides some additional configuration items set with environment variables: -| Environment Variable | Description | Default | -| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- | -| `ALLOW_ORIGINS` | Allow origins , string or string array | | -| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` | -| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.openai.com/v1` | -| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | | -| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | | -| `AZURE_SPEECH_PROXY_URL` | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/azure-speech` | -| `MICROSOFT_SPEECH_PROXY_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech` | -| `EDDGE_API_TOKEN` | This is the API key of Edge Speech Service | | -| `EDDGE_PROXY_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | | +| Environment Variable | Description | Default | +| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- | +| `ALLOW_ORIGINS` | Allow origins , string or string array | | +| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` | +| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.openai.com/v1` | +| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | | +| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | | +| `AZURE_SPEECH_PROXY_URL` | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/azure-speech` | +| `MICROSOFT_SPEECH_API_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech` | +| `EDGE_API_TOKEN` | This is the API key of Edge Speech Service | | +| `EDGE_SPEECH_API_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | |
diff --git a/api/azure-speech.ts b/api/azure-speech.ts deleted file mode 100644 index f6d90da..0000000 --- a/api/azure-speech.ts +++ /dev/null @@ -1,28 +0,0 @@ -// TODO: fix vercel error -// Error: The Edge Function "api/azure-speech" is referencing unsupported modules: -// - https-proxy-agent: net, tls, url -// - microsoft-cognitiveservices-speech-sdk: vc-blob-asset:speech-processor.js, fs, net, tls - -/* -import cors from '../src/server/cors'; -import { getAllowOrigins } from '../src/server/getAllowOrigins'; -import { handleAzureSpeechRequest } from '../src/server/handleAzureSpeechRequest'; - -export const config = { - runtime: 'edge', -}; - -export default async (req: Request) => { - if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); - const origin = getAllowOrigins(req); - if (!origin) return new Response('Origin Not Allowed', { status: 403 }); - const res = await handleAzureSpeechRequest(req); - return cors(req, res, { methods: ['POST'], origin }); -}; -*/ - -export default async (req: Request) => { - if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); - - return new Response('WIP'); -}; diff --git a/api/edge-speech.ts b/api/edge-speech.ts new file mode 100644 index 0000000..9bee67d --- /dev/null +++ b/api/edge-speech.ts @@ -0,0 +1,14 @@ +import { createEdgeSpeechComletion } from '../src/server/createEdgeSpeechComletion'; +import { EdgeSpeechPayload } from '../src/server/types'; + +export const config = { + runtime: 'edge', +}; + +export default async (req: Request) => { + if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); + + const payload = (await req.json()) as EdgeSpeechPayload; + + return createEdgeSpeechComletion({ payload }); +}; diff --git a/api/microsoft-speech.ts b/api/microsoft-speech.ts index 9b62c17..48e2408 100644 --- a/api/microsoft-speech.ts +++ b/api/microsoft-speech.ts @@ -1,6 +1,5 @@ -import cors from '../src/server/cors'; -import { getAllowOrigins } from '../src/server/getAllowOrigins'; -import { handleMicrosoftSpeechRequest } from '../src/server/handleMicrosoftSpeechRequest'; +import { createMicrosoftSpeechComletion } from '../src/server/createMicrosoftSpeechComletion'; +import { MicrosoftSpeechPayload } from '../src/server/types'; export const config = { runtime: 'edge', @@ -8,8 +7,7 @@ export const config = { export default async (req: Request) => { if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); - const origin = getAllowOrigins(req); - if (!origin) return new Response('Origin Not Allowed', { status: 403 }); - const res = await handleMicrosoftSpeechRequest(req); - return cors(req, new Response(res.body, res), { methods: ['POST'], origin }); + const payload = (await req.json()) as MicrosoftSpeechPayload; + + return createMicrosoftSpeechComletion({ payload }); }; diff --git a/api/open-stt.ts b/api/open-stt.ts new file mode 100644 index 0000000..9576821 --- /dev/null +++ b/api/open-stt.ts @@ -0,0 +1,28 @@ +import OpenAI from 'openai'; + +import { createOpenaiAudioTranscriptionsCompletion } from '../src/server/createOpenaiAudioTranscriptionsCompletion'; +import { OpenAISTTPayload } from '../src/server/types'; + +export const config = { + runtime: 'edge', +}; + +export default async (req: Request) => { + if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); + + const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL; + + if (!OPENAI_API_KEY) return new Response('OPENAI_API_KEY is not set', { status: 500 }); + + const payload = (await req.json()) as OpenAISTTPayload; + + const openai = new OpenAI({ apiKey: OPENAI_API_KEY, baseURL: OPENAI_PROXY_URL }); + const res = await createOpenaiAudioTranscriptionsCompletion({ openai, payload }); + + return new Response(JSON.stringify(res), { + headers: { + 'content-type': 'application/json;charset=UTF-8', + }, + }); +}; diff --git a/api/openai-tts.ts b/api/openai-tts.ts new file mode 100644 index 0000000..2338729 --- /dev/null +++ b/api/openai-tts.ts @@ -0,0 +1,21 @@ +import OpenAI from 'openai'; + +import { createOpenaiAudioSpeechCompletion } from '../src/server/createOpenaiAudioSpeechCompletion'; +import { OpenAITTSPayload } from '../src/server/types'; + +export const config = { + runtime: 'edge', +}; + +export default async (req: Request) => { + if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 }); + const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL; + + if (!OPENAI_API_KEY) return new Response('OPENAI_API_KEY is not set', { status: 500 }); + const payload = (await req.json()) as OpenAITTSPayload; + + const openai = new OpenAI({ apiKey: OPENAI_API_KEY, baseURL: OPENAI_PROXY_URL }); + + return createOpenaiAudioSpeechCompletion({ openai, payload }); +}; diff --git a/package.json b/package.json index c8b5aa4..4be404d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/tts", - "version": "1.12.0", + "version": "1.13.0-beta.3", "description": "A high-quality & reliable TTS React Hooks library", "homepage": "https://github.com/lobehub/lobe-tts", "bugs": { @@ -13,6 +13,20 @@ "license": "MIT", "author": "LobeHub ", "sideEffects": false, + "type": "module", + "exports": { + "./package.json": "./package.json", + ".": { + "types": "./es/index.d.ts", + "import": "./es/index.js", + "module": "./es/index.js" + }, + "./react": { + "types": "./es/react/index.d.ts", + "import": "./es/react/index.js", + "module": "./es/react/index.js" + } + }, "main": "es/index.js", "module": "es/index.js", "types": "es/index.d.ts", @@ -21,10 +35,9 @@ ], "scripts": { "build": "father build", - "build:server": "tsc server.ts --declaration", "ci": "npm run lint && npm run type-check", "dev": "father dev", - "docs:build": "npm run setup && dumi build", + "docs:build": "npm run setup && npm run build && dumi build", "docs:build-analyze": "ANALYZE=1 dumi build", "docs:dev": "npm run setup && dumi dev", "doctor": "father doctor", @@ -66,11 +79,10 @@ "antd-style": "^3", "lodash-es": "^4", "lucide-react": "latest", - "microsoft-cognitiveservices-speech-sdk": "^1", + "openai": "^4.17.3", "query-string": "^8", "react-error-boundary": "^4.0.11", "react-layout-kit": "^1", - "ssml-document": "^1", "swr": "^2", "url-join": "^5", "uuid": "^9" diff --git a/src/class/EdgeSpeechTTS.ts b/src/class/EdgeSpeechTTS.ts new file mode 100644 index 0000000..3bc721b --- /dev/null +++ b/src/class/EdgeSpeechTTS.ts @@ -0,0 +1,20 @@ +import edgeVoiceList from '@/data/edgeVoiceList'; +import voiceName from '@/data/voiceList'; +import { fetchEdgeSpeech } from '@/services/fetchEdgeSpeech'; +import { getEdgeVoiceOptions, getVoiceLocaleOptions } from '@/utils/getVoiceList'; + +export class EdgeSpeechTTS { + private locale?: string; + constructor(locale?: string) { + this.locale = locale; + } + + get voiceOptions() { + return getEdgeVoiceOptions(this.locale); + } + + static localeOptions = getVoiceLocaleOptions(); + static voiceList = edgeVoiceList; + static voiceName = voiceName; + fetch = fetchEdgeSpeech; +} diff --git a/src/class/MicrosoftSpeechTTS.ts b/src/class/MicrosoftSpeechTTS.ts new file mode 100644 index 0000000..414be55 --- /dev/null +++ b/src/class/MicrosoftSpeechTTS.ts @@ -0,0 +1,20 @@ +import azureVoiceList from '@/data/azureVoiceList'; +import voiceName from '@/data/voiceList'; +import { fetchMicrosoftSpeech } from '@/services/fetchMicrosoftSpeech'; +import { getAzureVoiceOptions, getVoiceLocaleOptions } from '@/utils/getVoiceList'; + +export class MicrosoftSpeechTTS { + private locale?: string; + constructor(locale?: string) { + this.locale = locale; + } + get voiceOptions() { + return getAzureVoiceOptions(this.locale); + } + + static localeOptions = getVoiceLocaleOptions(); + + voiceList = azureVoiceList; + voiceName = voiceName; + fetch = fetchMicrosoftSpeech; +} diff --git a/src/class/OpenaiSTT.ts b/src/class/OpenaiSTT.ts new file mode 100644 index 0000000..eafd676 --- /dev/null +++ b/src/class/OpenaiSTT.ts @@ -0,0 +1,5 @@ +import { fetchOpenaiSTT } from '@/services/fetchOpenaiSTT'; + +export class OpenaiSTT { + fetch = fetchOpenaiSTT; +} diff --git a/src/class/OpenaiTTS.ts b/src/class/OpenaiTTS.ts new file mode 100644 index 0000000..4edd084 --- /dev/null +++ b/src/class/OpenaiTTS.ts @@ -0,0 +1,18 @@ +import openaiVoiceList from '@/data/openaiVoiceList'; +import { fetchOpenaiTTS } from '@/services/fetchOpenaiTTS'; +import { getOpenaiVoiceOptions, getVoiceLocaleOptions } from '@/utils/getVoiceList'; + +export class OpenaiTTS { + static voiceList = openaiVoiceList; + + get voiceOptions() { + return getOpenaiVoiceOptions(); + } + get localeOptions() { + return getVoiceLocaleOptions(); + } + + static localeOptions = getVoiceLocaleOptions(); + + fetch = fetchOpenaiTTS; +} diff --git a/src/class/VoiceList.ts b/src/class/VoiceList.ts new file mode 100644 index 0000000..18c6c5c --- /dev/null +++ b/src/class/VoiceList.ts @@ -0,0 +1,38 @@ +import { + getAzureVoiceOptions, + getEdgeVoiceOptions, + getOpenaiVoiceOptions, + getSpeechSynthesVoiceOptions, + getVoiceLocaleOptions, +} from '@/utils/getVoiceList'; + +export class VoiceList { + private locale?: string; + constructor(locale?: string) { + this.locale = locale; + } + + get speechSynthesVoiceOptions() { + return getSpeechSynthesVoiceOptions(this.locale); + } + + get azureVoiceOptions() { + return getAzureVoiceOptions(this.locale); + } + + get edgeVoiceOptions() { + return getEdgeVoiceOptions(this.locale); + } + + get microsoftVoiceOptions() { + return getEdgeVoiceOptions(this.locale); + } + + get openaiVoiceOptions() { + return getOpenaiVoiceOptions(); + } + + get localeOptions() { + return getVoiceLocaleOptions(); + } +} diff --git a/src/const/api.ts b/src/const/api.ts index 89afd3c..2b5efd8 100644 --- a/src/const/api.ts +++ b/src/const/api.ts @@ -1,33 +1,21 @@ import urlJoin from 'url-join'; -export const MICROSOFT_SPPECH_URL = +export const MICROSOFT_SPEECH_URL = 'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak'; -export const MICROSOFT_SPEECH_PROXY_URL = - process.env.MICROSOFT_SPEECH_PROXY_URL || - process.env.NEXT_PUBLIC_MICROSOFT_SPEECH_PROXY_URL || - '/api/microsoft-speech'; -export const AZURE_SPEECH_PROXY_URL = - process.env.AZURE_SPEECH_PROXY_URL || - process.env.NEXT_PUBLIC_AZURE_SPEECH_PROXY_URL || - '/api/azure-speech'; +export const EDGE_SPEECH_URL = + 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1'; +export const EDGE_API_TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'; +export const OPENAI_BASE_URL = 'https://api.openai.com/v1'; + +export const MICROSOFT_SPEECH_API_URL = '/api/microsoft-speech'; +export const EDGE_SPEECH_API_URL = '/api/edge-speech'; +export const OPENAI_TTS_API_URL = '/api/openai-tts'; +export const OPENAI_STT_API_URL = '/api/openai-stt'; + export const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY || process.env.NEXT_PUBLIC_AZURE_SPEECH_KEY || ''; export const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION || process.env.NEXT_PUBLIC_AZURE_SPEECH_REGION || ''; -export const OPENAI_API_KEY = - process.env.OPENAI_API_KEY || process.env.NEXT_PUBLIC_OPENAI_API_KEY || ''; -export const OPENAI_PROXY_URL = - process.env.OPENAI_PROXY_URL || - process.env.NEXT_PUBLIC_OPENAI_PROXY_URL || - 'https://api.openai.com/v1'; -export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech'); -export const OPENAI_STT_URL = (api?: string) => - urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions'); -export const EDDGE_PROXY_URL = - process.env.EDDGE_PROXY_URL || - process.env.NEXT_PUBLIC_EDDGE_PROXY_UR || - 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1'; -export const EDDGE_API_TOKEN = - process.env.EDDGE_API_TOKEN || - process.env.NEXT_PUBLIC_EDDGE_API_TOKEN || - '6A5AA1D4EAFF4E9FB37E23D68491D6F4'; + +export const OPENAI_TTS_URL = (api: string) => urlJoin(api, 'audio/speech'); +export const OPENAI_STT_URL = (api: string) => urlJoin(api, 'audio/transcriptions'); diff --git a/src/data/nameList.ts b/src/data/voiceList.ts similarity index 100% rename from src/data/nameList.ts rename to src/data/voiceList.ts diff --git a/src/index.ts b/src/index.ts index 5f2b637..0f034cc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,44 +1,22 @@ -export { default as AudioPlayer, type AudioPlayerProps } from './AudioPlayer'; -export { default as AudioVisualizer, type AudioVisualizerProps } from './AudioVisualizer'; -export { default as azureVoiceList } from './data/azureVoiceList'; -export { default as edgeVoiceList } from './data/edgeVoiceList'; -export { default as voiceLocale } from './data/locales'; -export { default as nameList } from './data/nameList'; -export { default as openaiVoiceList } from './data/openaiVoiceList'; -export { useAudioPlayer } from './hooks/useAudioPlayer'; -export { useAudioVisualizer } from './hooks/useAudioVisualizer'; -export { useBlobUrl } from './hooks/useBlobUrl'; -export { useStreamAudioPlayer } from './hooks/useStreamAudioPlayer'; -export { type AzureSpeechOptions, fetchAzureSpeech } from './services/fetchAzureSpeech'; -export { type EdgeSpeechOptions, fetchEdgeSpeech } from './services/fetchEdgeSpeech'; -export { fetchMicrosoftSpeech, type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech'; -export { fetchOpenaiSTT, type OpenaiSttOptions } from './services/fetchOpenaiSTT'; -export { fetchOpenaiTTS, type OpenaiTtsOptions } from './services/fetchOpenaiTTS'; -export { useAudioRecorder } from './useAudioRecorder'; -export { useAzureSpeech } from './useAzureSpeech'; -export { useEdgeSpeech } from './useEdgeSpeech'; -export { useMicrosoftSpeech } from './useMicrosoftSpeech'; -export { - type OpenaiSpeechRecognitionOptions, - type OpenaiSTTFetcher, - useOpenaiSTT, - useOpenaiSTTWithPSR, - useOpenaiSTTWithRecord, - useOpenaiSTTWithSR, -} from './useOpenaiSTT'; -export { useOpenaiTTS } from './useOpenaiTTS'; -export { usePersistedSpeechRecognition } from './useSpeechRecognition/usePersistedSpeechRecognition'; -export { - type SpeechRecognitionOptions, - useSpeechRecognition, -} from './useSpeechRecognition/useSpeechRecognition'; -export { useSpeechSynthes } from './useSpeechSynthes'; +export { EdgeSpeechTTS } from './class/EdgeSpeechTTS'; +export { MicrosoftSpeechTTS } from './class/MicrosoftSpeechTTS'; +export { OpenaiSTT } from './class/OpenaiSTT'; +export { OpenaiTTS } from './class/OpenaiTTS'; +export { VoiceList } from './class/VoiceList'; +export { type EdgeSpeechOptions } from './services/fetchEdgeSpeech'; +export { type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech'; +export { type OpenaiSttOptions } from './services/fetchOpenaiSTT'; +export { type OpenaiTtsOptions } from './services/fetchOpenaiTTS'; export { getRecordMineType, type RecordMineType } from './utils/getRecordMineType'; +export { getSpeechSynthesVoiceOptions } from './utils/getVoiceList'; export { - genLevaOptions, - getAzureVoiceOptions, - getEdgeVoiceOptions, - getOpenaiVoiceOptions, - getSpeechSynthesVoiceOptions, - getVoiceLocaleOptions, -} from './utils/getVoiceList'; + EDGE_SPEECH_API_URL, + MICROSOFT_SPEECH_API_URL, + OPENAI_STT_API_URL, + OPENAI_TTS_API_URL, +} from '@/const/api'; +export { createEdgeSpeechComletion } from '@/server/createEdgeSpeechComletion'; +export { createMicrosoftSpeechComletion } from '@/server/createMicrosoftSpeechComletion'; +export { createOpenaiAudioSpeechCompletion } from '@/server/createOpenaiAudioSpeechCompletion'; +export { createOpenaiAudioTranscriptionsCompletion } from '@/server/createOpenaiAudioTranscriptionsCompletion'; +export * from '@/server/types'; diff --git a/src/AudioPlayer/demos/index.tsx b/src/react/AudioPlayer/demos/index.tsx similarity index 91% rename from src/AudioPlayer/demos/index.tsx rename to src/react/AudioPlayer/demos/index.tsx index 0310eb6..3bc6309 100644 --- a/src/AudioPlayer/demos/index.tsx +++ b/src/react/AudioPlayer/demos/index.tsx @@ -1,4 +1,4 @@ -import { AudioPlayer, useAudioPlayer } from '@lobehub/tts'; +import { AudioPlayer, useAudioPlayer } from '@lobehub/tts/react'; import { StoryBook, useControls, useCreateStore } from '@lobehub/ui'; export default () => { diff --git a/src/AudioPlayer/index.md b/src/react/AudioPlayer/index.md similarity index 100% rename from src/AudioPlayer/index.md rename to src/react/AudioPlayer/index.md diff --git a/src/AudioPlayer/index.tsx b/src/react/AudioPlayer/index.tsx similarity index 100% rename from src/AudioPlayer/index.tsx rename to src/react/AudioPlayer/index.tsx diff --git a/src/AudioVisualizer/Visualizer.tsx b/src/react/AudioVisualizer/Visualizer.tsx similarity index 93% rename from src/AudioVisualizer/Visualizer.tsx rename to src/react/AudioVisualizer/Visualizer.tsx index 0fa7e65..dd889c2 100644 --- a/src/AudioVisualizer/Visualizer.tsx +++ b/src/react/AudioVisualizer/Visualizer.tsx @@ -1,7 +1,8 @@ -import { useAudioVisualizer } from '@lobehub/tts'; import { useTheme } from 'antd-style'; import { RefObject, memo } from 'react'; +import { useAudioVisualizer } from '../hooks/useAudioVisualizer'; + export interface VisualizerProps { borderRadius?: number; color?: string; diff --git a/src/AudioVisualizer/demos/index.tsx b/src/react/AudioVisualizer/demos/index.tsx similarity index 98% rename from src/AudioVisualizer/demos/index.tsx rename to src/react/AudioVisualizer/demos/index.tsx index d6d67b8..d195e03 100644 --- a/src/AudioVisualizer/demos/index.tsx +++ b/src/react/AudioVisualizer/demos/index.tsx @@ -1,4 +1,4 @@ -import { AudioPlayer, AudioVisualizer, useAudioPlayer } from '@lobehub/tts'; +import { AudioPlayer, AudioVisualizer, useAudioPlayer } from '@lobehub/tts/react'; import { StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Flexbox } from 'react-layout-kit'; diff --git a/src/AudioVisualizer/index.md b/src/react/AudioVisualizer/index.md similarity index 100% rename from src/AudioVisualizer/index.md rename to src/react/AudioVisualizer/index.md diff --git a/src/AudioVisualizer/index.tsx b/src/react/AudioVisualizer/index.tsx similarity index 94% rename from src/AudioVisualizer/index.tsx rename to src/react/AudioVisualizer/index.tsx index 2e80d1b..b8de105 100644 --- a/src/AudioVisualizer/index.tsx +++ b/src/react/AudioVisualizer/index.tsx @@ -4,7 +4,7 @@ import { CSSProperties, RefObject, memo } from 'react'; import { ErrorBoundary } from 'react-error-boundary'; import { Flexbox } from 'react-layout-kit'; -import Visualizer, { VisualizerProps } from '@/AudioVisualizer/Visualizer'; +import Visualizer, { VisualizerProps } from '@/react/AudioVisualizer/Visualizer'; export interface AudioVisualizerProps { audioRef: RefObject; diff --git a/src/react/_util/leva.ts b/src/react/_util/leva.ts new file mode 100644 index 0000000..1e54a9a --- /dev/null +++ b/src/react/_util/leva.ts @@ -0,0 +1,7 @@ +import { SelectProps } from 'antd'; + +export const genLevaOptions = (options: SelectProps['options']) => { + const data: any = {}; + options?.forEach((item: any) => (data[item?.label || item?.value] = item?.value)); + return data; +}; diff --git a/src/hooks/useAudioPlayer.ts b/src/react/hooks/useAudioPlayer.ts similarity index 98% rename from src/hooks/useAudioPlayer.ts rename to src/react/hooks/useAudioPlayer.ts index 66d2867..2bb7390 100644 --- a/src/hooks/useAudioPlayer.ts +++ b/src/react/hooks/useAudioPlayer.ts @@ -1,7 +1,7 @@ import { RefObject, useCallback, useEffect, useRef, useState } from 'react'; import useSWR from 'swr'; -import { AudioProps } from '@/AudioPlayer'; +import { AudioProps } from '@/react/AudioPlayer'; import { arrayBufferConvert } from '@/utils/arrayBufferConvert'; import { audioBufferToBlob } from '@/utils/audioBufferToBlob'; diff --git a/src/hooks/useAudioVisualizer.ts b/src/react/hooks/useAudioVisualizer.ts similarity index 100% rename from src/hooks/useAudioVisualizer.ts rename to src/react/hooks/useAudioVisualizer.ts diff --git a/src/hooks/useBlobUrl.ts b/src/react/hooks/useBlobUrl.ts similarity index 100% rename from src/hooks/useBlobUrl.ts rename to src/react/hooks/useBlobUrl.ts diff --git a/src/hooks/useStreamAudioPlayer.ts b/src/react/hooks/useStreamAudioPlayer.ts similarity index 98% rename from src/hooks/useStreamAudioPlayer.ts rename to src/react/hooks/useStreamAudioPlayer.ts index 07b1d7a..224c296 100644 --- a/src/hooks/useStreamAudioPlayer.ts +++ b/src/react/hooks/useStreamAudioPlayer.ts @@ -1,6 +1,6 @@ import { RefObject, useCallback, useEffect, useRef, useState } from 'react'; -import { AudioProps } from '@/AudioPlayer'; +import { AudioProps } from '@/react/AudioPlayer'; import { audioBufferToBlob, audioBuffersToBlob } from '@/utils/audioBufferToBlob'; export interface StreamAudioPlayerHook extends AudioProps { diff --git a/src/react/index.ts b/src/react/index.ts new file mode 100644 index 0000000..f5bc749 --- /dev/null +++ b/src/react/index.ts @@ -0,0 +1,23 @@ +export { default as AudioPlayer, type AudioPlayerProps } from './AudioPlayer'; +export { default as AudioVisualizer, type AudioVisualizerProps } from './AudioVisualizer'; +export { useAudioPlayer } from './hooks/useAudioPlayer'; +export { useAudioVisualizer } from './hooks/useAudioVisualizer'; +export { useBlobUrl } from './hooks/useBlobUrl'; +export { useStreamAudioPlayer } from './hooks/useStreamAudioPlayer'; +export { useAudioRecorder } from './useAudioRecorder'; +export { useEdgeSpeech } from './useEdgeSpeech'; +export { useMicrosoftSpeech } from './useMicrosoftSpeech'; +export { + type OpenaiSpeechRecognitionOptions, + useOpenaiSTT, + useOpenaiSTTWithPSR, + useOpenaiSTTWithRecord, + useOpenaiSTTWithSR, +} from './useOpenaiSTT'; +export { useOpenaiTTS } from './useOpenaiTTS'; +export { usePersistedSpeechRecognition } from './useSpeechRecognition/usePersistedSpeechRecognition'; +export { + type SpeechRecognitionOptions, + useSpeechRecognition, +} from './useSpeechRecognition/useSpeechRecognition'; +export { useSpeechSynthes } from './useSpeechSynthes'; diff --git a/src/useAudioRecorder/demos/index.tsx b/src/react/useAudioRecorder/demos/index.tsx similarity index 92% rename from src/useAudioRecorder/demos/index.tsx rename to src/react/useAudioRecorder/demos/index.tsx index d9b8442..40e3c9e 100644 --- a/src/useAudioRecorder/demos/index.tsx +++ b/src/react/useAudioRecorder/demos/index.tsx @@ -1,4 +1,4 @@ -import { useAudioRecorder } from '@lobehub/tts'; +import { useAudioRecorder } from '@lobehub/tts/react'; import { Icon } from '@lobehub/ui'; import { Button } from 'antd'; import { Mic, StopCircle } from 'lucide-react'; diff --git a/src/useAudioRecorder/index.md b/src/react/useAudioRecorder/index.md similarity index 100% rename from src/useAudioRecorder/index.md rename to src/react/useAudioRecorder/index.md diff --git a/src/useAudioRecorder/index.ts b/src/react/useAudioRecorder/index.ts similarity index 100% rename from src/useAudioRecorder/index.ts rename to src/react/useAudioRecorder/index.ts diff --git a/src/useEdgeSpeech/demos/index.tsx b/src/react/useEdgeSpeech/demos/index.tsx similarity index 78% rename from src/useEdgeSpeech/demos/index.tsx rename to src/react/useEdgeSpeech/demos/index.tsx index dae1ef8..a01234b 100644 --- a/src/useEdgeSpeech/demos/index.tsx +++ b/src/react/useEdgeSpeech/demos/index.tsx @@ -1,9 +1,12 @@ -import { AudioPlayer, genLevaOptions, getEdgeVoiceOptions, useEdgeSpeech } from '@lobehub/tts'; +import { EDGE_SPEECH_API_URL, EdgeSpeechTTS } from '@lobehub/tts'; +import { AudioPlayer, useEdgeSpeech } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Volume2 } from 'lucide-react'; import { Flexbox } from 'react-layout-kit'; +import { genLevaOptions } from '../../_util/leva'; + const defaultText = '这是一段使用 Edge Speech 的语音演示'; export default () => { @@ -11,22 +14,15 @@ export default () => { const api: any = useControls( { - key: { - label: 'EDDGE_API_TOKEN', - value: '', - }, - proxy: { - label: 'EDDGE_PROXY_URL', - value: '', - }, + url: EDGE_SPEECH_API_URL, }, { store }, ); const options: any = useControls( { - name: { - options: genLevaOptions(getEdgeVoiceOptions()), + voice: { + options: genLevaOptions(new EdgeSpeechTTS().voiceOptions), value: 'zh-CN-YunxiaNeural', }, }, diff --git a/src/useEdgeSpeech/index.md b/src/react/useEdgeSpeech/index.md similarity index 100% rename from src/useEdgeSpeech/index.md rename to src/react/useEdgeSpeech/index.md diff --git a/src/react/useEdgeSpeech/index.ts b/src/react/useEdgeSpeech/index.ts new file mode 100644 index 0000000..e717728 --- /dev/null +++ b/src/react/useEdgeSpeech/index.ts @@ -0,0 +1,22 @@ +import { useState } from 'react'; + +import { TTSConfig, useTTS } from '@/react/useTTS'; +import { EdgeSpeechOptions, fetchEdgeSpeech } from '@/services/fetchEdgeSpeech'; + +export const useEdgeSpeech = ( + defaultText: string, + options: EdgeSpeechOptions, + config?: TTSConfig, +) => { + const [text, setText] = useState(defaultText); + const rest = useTTS( + options.voice, + text, + (segmentText: string) => fetchEdgeSpeech(segmentText, options), + config, + ); + return { + setText, + ...rest, + }; +}; diff --git a/src/useMicrosoftSpeech/demos/index.tsx b/src/react/useMicrosoftSpeech/demos/index.tsx similarity index 75% rename from src/useMicrosoftSpeech/demos/index.tsx rename to src/react/useMicrosoftSpeech/demos/index.tsx index ac31eba..756866c 100644 --- a/src/useMicrosoftSpeech/demos/index.tsx +++ b/src/react/useMicrosoftSpeech/demos/index.tsx @@ -1,23 +1,27 @@ -import { AudioPlayer, genLevaOptions, getEdgeVoiceOptions, useMicrosoftSpeech } from '@lobehub/tts'; +import { MICROSOFT_SPEECH_API_URL, MicrosoftSpeechTTS } from '@lobehub/tts'; +import { AudioPlayer, useMicrosoftSpeech } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Volume2 } from 'lucide-react'; import { Flexbox } from 'react-layout-kit'; +import { genLevaOptions } from '../../_util/leva'; + const defaultText = '这是一段使用 Microsoft Speech 的语音演示'; export default () => { const store = useCreateStore(); - const options: any = useControls( + const api: any = useControls( { - api: { - label: 'MICROSOFT_SPEECH_PROXY_URL', - value: '', - }, - name: { - options: genLevaOptions(getEdgeVoiceOptions()), - value: 'zh-CN-YunxiaNeural', + url: { + label: 'MICROSOFT_SPEECH_API_URL', + value: MICROSOFT_SPEECH_API_URL, }, + }, + { store }, + ); + const options: any = useControls( + { pitch: { max: 1, min: -1, @@ -46,10 +50,17 @@ export default () => { ], value: 'general', }, + voice: { + options: genLevaOptions(new MicrosoftSpeechTTS().voiceOptions), + value: 'zh-CN-YunxiaNeural', + }, }, { store }, ); - const { setText, isGlobalLoading, audio, start, stop } = useMicrosoftSpeech(defaultText, options); + const { setText, isGlobalLoading, audio, start, stop } = useMicrosoftSpeech(defaultText, { + api, + ...options, + }); return ( diff --git a/src/useMicrosoftSpeech/index.md b/src/react/useMicrosoftSpeech/index.md similarity index 77% rename from src/useMicrosoftSpeech/index.md rename to src/react/useMicrosoftSpeech/index.md index 8ab707e..b431580 100644 --- a/src/useMicrosoftSpeech/index.md +++ b/src/react/useMicrosoftSpeech/index.md @@ -6,6 +6,6 @@ title: useMicrosoftSpeech ## hooks -- ENV: `MICROSOFT_SPEECH_PROXY_URL` +- ENV: `MICROSOFT_SPEECH_API_URL` diff --git a/src/react/useMicrosoftSpeech/index.ts b/src/react/useMicrosoftSpeech/index.ts new file mode 100644 index 0000000..bd17726 --- /dev/null +++ b/src/react/useMicrosoftSpeech/index.ts @@ -0,0 +1,22 @@ +import { useState } from 'react'; + +import { TTSConfig, useTTS } from '@/react/useTTS'; +import { type MicrosoftSpeechOptions, fetchMicrosoftSpeech } from '@/services/fetchMicrosoftSpeech'; + +export const useMicrosoftSpeech = ( + defaultText: string, + options: MicrosoftSpeechOptions, + config?: TTSConfig, +) => { + const [text, setText] = useState(defaultText); + const rest = useTTS( + options.voice, + text, + (segmentText: string) => fetchMicrosoftSpeech(segmentText, options), + config, + ); + return { + setText, + ...rest, + }; +}; diff --git a/src/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx b/src/react/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx similarity index 95% rename from src/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx rename to src/react/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx index 88c15e4..3ff7ec3 100644 --- a/src/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx +++ b/src/react/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx @@ -1,4 +1,4 @@ -import { useOpenaiSTTWithPSR } from '@lobehub/tts'; +import { useOpenaiSTTWithPSR } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Mic, StopCircle } from 'lucide-react'; diff --git a/src/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx b/src/react/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx similarity index 95% rename from src/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx rename to src/react/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx index 6cfd7c6..3103aac 100644 --- a/src/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx +++ b/src/react/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx @@ -1,4 +1,4 @@ -import { useOpenaiSTTWithSR } from '@lobehub/tts'; +import { useOpenaiSTTWithSR } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Mic, StopCircle } from 'lucide-react'; diff --git a/src/useOpenaiSTT/demos/index.tsx b/src/react/useOpenaiSTT/demos/index.tsx similarity index 89% rename from src/useOpenaiSTT/demos/index.tsx rename to src/react/useOpenaiSTT/demos/index.tsx index e638a02..aa1b687 100644 --- a/src/useOpenaiSTT/demos/index.tsx +++ b/src/react/useOpenaiSTT/demos/index.tsx @@ -1,4 +1,5 @@ -import { useOpenaiSTTWithRecord } from '@lobehub/tts'; +import { OPENAI_STT_API_URL } from '@lobehub/tts'; +import { useOpenaiSTTWithRecord } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Mic, StopCircle } from 'lucide-react'; @@ -16,6 +17,7 @@ export default () => { label: 'OPENAI_PROXY_URL', value: '', }, + url: OPENAI_STT_API_URL, }, { store }, ); diff --git a/src/useOpenaiSTT/index.md b/src/react/useOpenaiSTT/index.md similarity index 100% rename from src/useOpenaiSTT/index.md rename to src/react/useOpenaiSTT/index.md diff --git a/src/useOpenaiSTT/index.ts b/src/react/useOpenaiSTT/index.ts similarity index 76% rename from src/useOpenaiSTT/index.ts rename to src/react/useOpenaiSTT/index.ts index 8d5af8b..728150a 100644 --- a/src/useOpenaiSTT/index.ts +++ b/src/react/useOpenaiSTT/index.ts @@ -1,4 +1,4 @@ -export { type OpenaiSTTFetcher, useOpenaiSTT } from './useOpenaiSTT'; +export { useOpenaiSTT } from './useOpenaiSTT'; export { useOpenaiSTTWithPSR } from './useOpenaiSTTWithPSR'; export { type OpenaiSpeechRecognitionOptions, diff --git a/src/useOpenaiSTT/useOpenaiSTT.ts b/src/react/useOpenaiSTT/useOpenaiSTT.ts similarity index 68% rename from src/useOpenaiSTT/useOpenaiSTT.ts rename to src/react/useOpenaiSTT/useOpenaiSTT.ts index 1f99fac..7080b3b 100644 --- a/src/useOpenaiSTT/useOpenaiSTT.ts +++ b/src/react/useOpenaiSTT/useOpenaiSTT.ts @@ -3,23 +3,19 @@ import useSWR, { type SWRConfiguration } from 'swr'; import { OpenaiSttOptions, fetchOpenaiSTT } from '@/services/fetchOpenaiSTT'; import { getRecordMineType } from '@/utils/getRecordMineType'; -export type OpenaiSTTFetcher = (blob: Blob, sttOptions: OpenaiSttOptions) => Promise; export const useOpenaiSTT = ( shouldFetch?: boolean, blob?: Blob, options?: OpenaiSttOptions, config?: SWRConfiguration, - fetcher?: OpenaiSTTFetcher, ) => { const key = new Date().getDate().toString(); const optionsWithMineType: OpenaiSttOptions = { ...options, mineType: getRecordMineType() }; - const openaiSTTFetcher = fetcher ?? fetchOpenaiSTT; - return useSWR( shouldFetch && blob ? key : null, - async () => await openaiSTTFetcher(blob as Blob, optionsWithMineType), + async () => await fetchOpenaiSTT(blob as Blob, optionsWithMineType), config, ); }; diff --git a/src/useOpenaiSTT/useOpenaiSTTWithPSR.ts b/src/react/useOpenaiSTT/useOpenaiSTTWithPSR.ts similarity index 57% rename from src/useOpenaiSTT/useOpenaiSTTWithPSR.ts rename to src/react/useOpenaiSTT/useOpenaiSTTWithPSR.ts index 72b65ff..dfff989 100644 --- a/src/useOpenaiSTT/useOpenaiSTTWithPSR.ts +++ b/src/react/useOpenaiSTT/useOpenaiSTTWithPSR.ts @@ -1,14 +1,22 @@ import { useCallback, useState } from 'react'; -import { OpenaiSTTFetcher, useOpenaiSTT } from '@/useOpenaiSTT/useOpenaiSTT'; -import { usePersistedSpeechRecognition } from '@/useSpeechRecognition'; +import { useOpenaiSTT } from '@/react/useOpenaiSTT/useOpenaiSTT'; +import { usePersistedSpeechRecognition } from '@/react/useSpeechRecognition'; -import { OpenaiSpeechRecognitionOptions } from './useOpenaiSTTWithRecord'; +import { OpenaiSpeechRecognitionOptions, STTConfig } from './useOpenaiSTTWithRecord'; export const useOpenaiSTTWithPSR = ( locale: string, - { onBolbAvailable, onTextChange, ...options }: OpenaiSpeechRecognitionOptions, - fetcher?: OpenaiSTTFetcher, + options: OpenaiSpeechRecognitionOptions, + { + onBolbAvailable, + onTextChange, + onSuccess, + onError, + onFinished, + onStart, + onStop, + }: STTConfig = {}, ) => { const [isGlobalLoading, setIsGlobalLoading] = useState(false); const [shouldFetch, setShouldFetch] = useState(false); @@ -33,34 +41,33 @@ export const useOpenaiSTTWithPSR = ( }); const handleStart = useCallback(() => { + onStart?.(); setIsGlobalLoading(true); start(); setText(''); }, [start]); const handleStop = useCallback(() => { + onStop?.(); stop(); setShouldFetch(false); setIsGlobalLoading(false); }, [stop]); - const { isLoading } = useOpenaiSTT( - shouldFetch, - blob, - options, - { - onError: (err) => { - console.error(err); - handleStop(); - }, - onSuccess: (data) => { - setText(data); - onTextChange?.(data); - handleStop(); - }, + const { isLoading } = useOpenaiSTT(shouldFetch, blob, options, { + onError: (err, ...rest) => { + onError?.(err, ...rest); + console.error(err); + handleStop(); }, - fetcher, - ); + onSuccess: (data, ...rest) => { + onSuccess?.(data, ...rest); + setText(data); + onTextChange?.(data); + handleStop(); + onFinished?.(data, ...rest); + }, + }); return { blob, diff --git a/src/useOpenaiSTT/useOpenaiSTTWithRecord.ts b/src/react/useOpenaiSTT/useOpenaiSTTWithRecord.ts similarity index 51% rename from src/useOpenaiSTT/useOpenaiSTTWithRecord.ts rename to src/react/useOpenaiSTT/useOpenaiSTTWithRecord.ts index abc037f..fd98e02 100644 --- a/src/useOpenaiSTT/useOpenaiSTTWithRecord.ts +++ b/src/react/useOpenaiSTT/useOpenaiSTTWithRecord.ts @@ -1,15 +1,30 @@ import { useCallback, useState } from 'react'; +import { SWRConfiguration } from 'swr'; +import { useAudioRecorder } from '@/react/useAudioRecorder'; +import { useOpenaiSTT } from '@/react/useOpenaiSTT/useOpenaiSTT'; +import { SpeechRecognitionOptions } from '@/react/useSpeechRecognition/useSpeechRecognition'; import { OpenaiSttOptions } from '@/services/fetchOpenaiSTT'; -import { useAudioRecorder } from '@/useAudioRecorder'; -import { OpenaiSTTFetcher, useOpenaiSTT } from '@/useOpenaiSTT/useOpenaiSTT'; -import { SpeechRecognitionOptions } from '@/useSpeechRecognition/useSpeechRecognition'; export type OpenaiSpeechRecognitionOptions = SpeechRecognitionOptions & OpenaiSttOptions; +export interface STTConfig extends SpeechRecognitionOptions, SWRConfiguration { + onFinished?: SWRConfiguration['onSuccess']; + onStart?: () => void; + onStop?: () => void; +} + export const useOpenaiSTTWithRecord = ( - { onBolbAvailable, onTextChange, ...options }: OpenaiSpeechRecognitionOptions, - fetcher?: OpenaiSTTFetcher, + options: OpenaiSttOptions, + { + onBolbAvailable, + onTextChange, + onSuccess, + onError, + onFinished, + onStart, + onStop, + }: STTConfig = {}, ) => { const [isGlobalLoading, setIsGlobalLoading] = useState(false); const [shouldFetch, setShouldFetch] = useState(false); @@ -22,34 +37,33 @@ export const useOpenaiSTTWithRecord = ( ); const handleStart = useCallback(() => { + onStart?.(); setIsGlobalLoading(true); start(); setText(''); }, [start]); const handleStop = useCallback(() => { + onStop?.(); stop(); setShouldFetch(false); setIsGlobalLoading(false); }, [stop]); - const { isLoading } = useOpenaiSTT( - shouldFetch, - blob, - options, - { - onError: (err) => { - console.error(err); - handleStop(); - }, - onSuccess: (data, value) => { - setText(data); - onTextChange?.(value); - handleStop(); - }, + const { isLoading } = useOpenaiSTT(shouldFetch, blob, options, { + onError: (err, ...rest) => { + onError?.(err, ...rest); + console.error(err); + handleStop(); }, - fetcher, - ); + onSuccess: (data, ...rest) => { + onSuccess?.(data, ...rest); + setText(data); + onTextChange?.(data); + handleStop(); + onFinished?.(data, ...rest); + }, + }); return { blob, diff --git a/src/useOpenaiSTT/useOpenaiSTTWithSR.ts b/src/react/useOpenaiSTT/useOpenaiSTTWithSR.ts similarity index 57% rename from src/useOpenaiSTT/useOpenaiSTTWithSR.ts rename to src/react/useOpenaiSTT/useOpenaiSTTWithSR.ts index 6e92f74..b1bb7f6 100644 --- a/src/useOpenaiSTT/useOpenaiSTTWithSR.ts +++ b/src/react/useOpenaiSTT/useOpenaiSTTWithSR.ts @@ -1,14 +1,22 @@ import { useCallback, useState } from 'react'; -import { OpenaiSTTFetcher, useOpenaiSTT } from '@/useOpenaiSTT/useOpenaiSTT'; -import { useSpeechRecognition } from '@/useSpeechRecognition'; +import { useOpenaiSTT } from '@/react/useOpenaiSTT/useOpenaiSTT'; +import { useSpeechRecognition } from '@/react/useSpeechRecognition'; -import { OpenaiSpeechRecognitionOptions } from './useOpenaiSTTWithRecord'; +import { OpenaiSpeechRecognitionOptions, STTConfig } from './useOpenaiSTTWithRecord'; export const useOpenaiSTTWithSR = ( locale: string, - { onBolbAvailable, onTextChange, ...options }: OpenaiSpeechRecognitionOptions, - fetcher?: OpenaiSTTFetcher, + options: OpenaiSpeechRecognitionOptions, + { + onBolbAvailable, + onTextChange, + onSuccess, + onError, + onFinished, + onStart, + onStop, + }: STTConfig = {}, ) => { const [isGlobalLoading, setIsGlobalLoading] = useState(false); const [shouldFetch, setShouldFetch] = useState(false); @@ -33,34 +41,33 @@ export const useOpenaiSTTWithSR = ( }); const handleStart = useCallback(() => { + onStart?.(); setIsGlobalLoading(true); start(); setText(''); }, [start]); const handleStop = useCallback(() => { + onStop?.(); stop(); setShouldFetch(false); setIsGlobalLoading(false); }, [stop]); - const { isLoading } = useOpenaiSTT( - shouldFetch, - blob, - options, - { - onError: (err) => { - console.error(err); - handleStop(); - }, - onSuccess: (data) => { - setText(data); - onTextChange?.(data); - handleStop(); - }, + const { isLoading } = useOpenaiSTT(shouldFetch, blob, options, { + onError: (err, ...rest) => { + onError?.(err, ...rest); + console.error(err); + handleStop(); }, - fetcher, - ); + onSuccess: (data, ...rest) => { + onSuccess?.(data, ...rest); + setText(data); + onTextChange?.(data); + handleStop(); + onFinished?.(data, ...rest); + }, + }); return { blob, diff --git a/src/useOpenaiTTS/demos/index.tsx b/src/react/useOpenaiTTS/demos/index.tsx similarity index 86% rename from src/useOpenaiTTS/demos/index.tsx rename to src/react/useOpenaiTTS/demos/index.tsx index b07e534..5cdc0be 100644 --- a/src/useOpenaiTTS/demos/index.tsx +++ b/src/react/useOpenaiTTS/demos/index.tsx @@ -1,4 +1,5 @@ -import { AudioPlayer, openaiVoiceList, useOpenaiTTS } from '@lobehub/tts'; +import { OPENAI_TTS_API_URL, OpenaiTTS } from '@lobehub/tts'; +import { AudioPlayer, useOpenaiTTS } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Volume2 } from 'lucide-react'; @@ -19,14 +20,15 @@ export default () => { label: 'OPENAI_PROXY_URL', value: '', }, + url: OPENAI_TTS_API_URL, }, { store }, ); const options: any = useControls( { - name: { - options: openaiVoiceList, + voice: { + options: OpenaiTTS.voiceList, value: 'alloy', }, }, diff --git a/src/useOpenaiTTS/index.md b/src/react/useOpenaiTTS/index.md similarity index 100% rename from src/useOpenaiTTS/index.md rename to src/react/useOpenaiTTS/index.md diff --git a/src/react/useOpenaiTTS/index.ts b/src/react/useOpenaiTTS/index.ts new file mode 100644 index 0000000..66648f5 --- /dev/null +++ b/src/react/useOpenaiTTS/index.ts @@ -0,0 +1,22 @@ +import { useState } from 'react'; + +import { TTSConfig, useTTS } from '@/react/useTTS'; +import { type OpenaiTtsOptions, fetchOpenaiTTS } from '@/services/fetchOpenaiTTS'; + +export const useOpenaiTTS = ( + defaultText: string, + options: OpenaiTtsOptions, + config?: TTSConfig, +) => { + const [text, setText] = useState(defaultText); + const rest = useTTS( + options.voice, + text, + (segmentText: string) => fetchOpenaiTTS(segmentText, options), + config, + ); + return { + setText, + ...rest, + }; +}; diff --git a/src/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx b/src/react/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx similarity index 93% rename from src/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx rename to src/react/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx index 5b0400c..57cf7c6 100644 --- a/src/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx +++ b/src/react/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx @@ -1,4 +1,4 @@ -import { usePersistedSpeechRecognition } from '@lobehub/tts'; +import { usePersistedSpeechRecognition } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Mic, StopCircle } from 'lucide-react'; diff --git a/src/useSpeechRecognition/demos/index.tsx b/src/react/useSpeechRecognition/demos/index.tsx similarity index 94% rename from src/useSpeechRecognition/demos/index.tsx rename to src/react/useSpeechRecognition/demos/index.tsx index 59ff68c..f361c38 100644 --- a/src/useSpeechRecognition/demos/index.tsx +++ b/src/react/useSpeechRecognition/demos/index.tsx @@ -1,4 +1,4 @@ -import { useSpeechRecognition } from '@lobehub/tts'; +import { useSpeechRecognition } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { Mic, StopCircle } from 'lucide-react'; diff --git a/src/useSpeechRecognition/index.md b/src/react/useSpeechRecognition/index.md similarity index 100% rename from src/useSpeechRecognition/index.md rename to src/react/useSpeechRecognition/index.md diff --git a/src/useSpeechRecognition/index.ts b/src/react/useSpeechRecognition/index.ts similarity index 100% rename from src/useSpeechRecognition/index.ts rename to src/react/useSpeechRecognition/index.ts diff --git a/src/useSpeechRecognition/usePersistedSpeechRecognition.ts b/src/react/useSpeechRecognition/usePersistedSpeechRecognition.ts similarity index 95% rename from src/useSpeechRecognition/usePersistedSpeechRecognition.ts rename to src/react/useSpeechRecognition/usePersistedSpeechRecognition.ts index f2bd740..6b0e385 100644 --- a/src/useSpeechRecognition/usePersistedSpeechRecognition.ts +++ b/src/react/useSpeechRecognition/usePersistedSpeechRecognition.ts @@ -1,6 +1,6 @@ import { useCallback, useMemo, useState } from 'react'; -import { useAudioRecorder } from '@/useAudioRecorder'; +import { useAudioRecorder } from '@/react/useAudioRecorder'; import { useRecognition } from './useRecognition'; import { SpeechRecognitionOptions } from './useSpeechRecognition'; diff --git a/src/useSpeechRecognition/useRecognition.ts b/src/react/useSpeechRecognition/useRecognition.ts similarity index 100% rename from src/useSpeechRecognition/useRecognition.ts rename to src/react/useSpeechRecognition/useRecognition.ts diff --git a/src/useSpeechRecognition/useSpeechRecognition.ts b/src/react/useSpeechRecognition/useSpeechRecognition.ts similarity index 86% rename from src/useSpeechRecognition/useSpeechRecognition.ts rename to src/react/useSpeechRecognition/useSpeechRecognition.ts index 285b666..8e80c93 100644 --- a/src/useSpeechRecognition/useSpeechRecognition.ts +++ b/src/react/useSpeechRecognition/useSpeechRecognition.ts @@ -1,7 +1,7 @@ import { useCallback } from 'react'; -import { useAudioRecorder } from '@/useAudioRecorder'; -import { useRecognition } from '@/useSpeechRecognition/useRecognition'; +import { useAudioRecorder } from '@/react/useAudioRecorder'; +import { useRecognition } from '@/react/useSpeechRecognition/useRecognition'; export interface SpeechRecognitionOptions { onBolbAvailable?: (blob: Blob) => void; diff --git a/src/useSpeechSynthes/demos/index.tsx b/src/react/useSpeechSynthes/demos/index.tsx similarity index 87% rename from src/useSpeechSynthes/demos/index.tsx rename to src/react/useSpeechSynthes/demos/index.tsx index 162c9d6..9ea314a 100644 --- a/src/useSpeechSynthes/demos/index.tsx +++ b/src/react/useSpeechSynthes/demos/index.tsx @@ -1,19 +1,18 @@ -import { genLevaOptions, getSpeechSynthesVoiceOptions, useSpeechSynthes } from '@lobehub/tts'; +import { getSpeechSynthesVoiceOptions } from '@lobehub/tts'; +import { useSpeechSynthes } from '@lobehub/tts/react'; import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; import { Button, Input } from 'antd'; import { StopCircle, Volume2 } from 'lucide-react'; import { Flexbox } from 'react-layout-kit'; +import { genLevaOptions } from '../../_util/leva'; + const defaultText = '这是一段使用 Speech Synthes 的语音演示'; export default () => { const store = useCreateStore(); const options: any = useControls( { - name: { - options: genLevaOptions(getSpeechSynthesVoiceOptions()), - value: '婷婷', - }, pitch: { max: 1, min: -1, @@ -26,6 +25,10 @@ export default () => { step: 0.1, value: 0, }, + voice: { + options: genLevaOptions(getSpeechSynthesVoiceOptions()), + value: '婷婷', + }, }, { store }, ); diff --git a/src/useSpeechSynthes/index.md b/src/react/useSpeechSynthes/index.md similarity index 100% rename from src/useSpeechSynthes/index.md rename to src/react/useSpeechSynthes/index.md diff --git a/src/useSpeechSynthes/index.ts b/src/react/useSpeechSynthes/index.ts similarity index 83% rename from src/useSpeechSynthes/index.ts rename to src/react/useSpeechSynthes/index.ts index e34cea2..af9b347 100644 --- a/src/useSpeechSynthes/index.ts +++ b/src/react/useSpeechSynthes/index.ts @@ -2,18 +2,18 @@ import { useCallback, useMemo, useState } from 'react'; import { SsmlOptions } from '@/utils/genSSML'; -export const useSpeechSynthes = (defaultText: string, { name, rate, pitch }: SsmlOptions) => { +export const useSpeechSynthes = (defaultText: string, { voice, rate, pitch }: SsmlOptions) => { const [voiceList, setVoiceList] = useState(speechSynthesis.getVoices()); const [text, setText] = useState(defaultText); const [isLoading, setIsLoading] = useState(false); const speechSynthesisUtterance = useMemo(() => { const utterance = new SpeechSynthesisUtterance(text); - utterance.voice = voiceList.find((item) => item.name === name) as any; + utterance.voice = voiceList.find((item) => item.name === voice) as any; if (pitch) utterance.pitch = pitch * 10; if (rate) utterance.rate = rate * 10; return utterance; - }, [text, voiceList, rate, pitch, name]); + }, [text, voiceList, rate, pitch, voice]); speechSynthesis.onvoiceschanged = () => { setVoiceList(speechSynthesis.getVoices()); diff --git a/src/useTTS/index.ts b/src/react/useTTS/index.ts similarity index 72% rename from src/useTTS/index.ts rename to src/react/useTTS/index.ts index b548b24..48be2bb 100644 --- a/src/useTTS/index.ts +++ b/src/react/useTTS/index.ts @@ -1,11 +1,11 @@ import { useCallback, useEffect, useState } from 'react'; -import useSWR from 'swr'; +import useSWR, { type SWRConfiguration } from 'swr'; -import { AudioProps } from '@/AudioPlayer'; -import { useStreamAudioPlayer } from '@/hooks/useStreamAudioPlayer'; +import { AudioProps } from '@/react/AudioPlayer'; +import { useStreamAudioPlayer } from '@/react/hooks/useStreamAudioPlayer'; import { splitTextIntoSegments } from '@/utils/splitTextIntoSegments'; -export interface TTSHook { +export interface TTSHook extends SWRConfiguration { audio: AudioProps; isGlobalLoading: boolean; isLoading: boolean; @@ -13,10 +13,17 @@ export interface TTSHook { stop: () => void; } +export interface TTSConfig extends SWRConfiguration { + onFinish?: SWRConfiguration['onSuccess']; + onStart?: () => void; + onStop?: () => void; +} + export const useTTS = ( key: string, text: string, fetchTTS: (segmentText: string) => Promise, + { onError, onSuccess, onFinish, onStart, onStop, ...restSWRConfig }: TTSConfig = {}, ): TTSHook => { const { load, reset, ...rest } = useStreamAudioPlayer(); const [shouldFetch, setShouldFetch] = useState(false); @@ -33,6 +40,7 @@ export const useTTS = ( }, []); const handleStop = useCallback(() => { + onStop?.(); handleReset(); }, []); @@ -40,24 +48,29 @@ export const useTTS = ( shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]] : null, async () => await fetchTTS(textArray[index]), { - onError: (err) => { + onError: (err, ...rest) => { + onError?.(err, ...rest); console.error(err); handleReset(); }, - onSuccess: (data) => { + onSuccess: (data, ...rest) => { + onSuccess?.(data, ...rest); load(data); if (index < textArray.length - 1) { setIndex(index + 1); } else { + onFinish?.(data, ...rest); setShouldFetch(false); setIsGlobalLoading(false); } }, + ...restSWRConfig, }, ); const handleStart = useCallback(() => { if (isLoading) return; + onStart?.(); reset(); setShouldFetch(true); setIsGlobalLoading(true); diff --git a/src/server.ts b/src/server.ts deleted file mode 100644 index 4c232a9..0000000 --- a/src/server.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { handleAzureSpeechRequest } from './server/handleAzureSpeechRequest'; -export { handleMicrosoftSpeechRequest } from './server/handleMicrosoftSpeechRequest'; diff --git a/src/server/cors.ts b/src/server/cors.ts deleted file mode 100644 index a8be6c5..0000000 --- a/src/server/cors.ts +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Multi purpose CORS lib. - * Note: Based on the `cors` package in npm but using only - * web APIs. Feel free to use it in your own projects. - */ - -type StaticOrigin = boolean | string | RegExp | (boolean | string | RegExp)[]; - -type OriginFn = (origin: string | undefined, req: Request) => StaticOrigin | Promise; - -interface CorsOptions { - allowedHeaders?: string | string[]; - credentials?: boolean; - exposedHeaders?: string | string[]; - maxAge?: number; - methods?: string | string[]; - optionsSuccessStatus?: number; - origin?: StaticOrigin | OriginFn; - preflightContinue?: boolean; -} - -const defaultOptions: CorsOptions = { - methods: 'GET,HEAD,PUT,PATCH,POST,DELETE', - optionsSuccessStatus: 204, - origin: '*', - preflightContinue: false, -}; - -function isOriginAllowed(origin: string, allowed: StaticOrigin): boolean { - return Array.isArray(allowed) - ? allowed.some((o) => isOriginAllowed(origin, o)) - : typeof allowed === 'string' - ? origin === allowed - : allowed instanceof RegExp - ? allowed.test(origin) - : !!allowed; -} - -function getOriginHeaders(reqOrigin: string | undefined, origin: StaticOrigin) { - const headers = new Headers(); - - if (origin === '*') { - // Allow any origin - headers.set('Access-Control-Allow-Origin', '*'); - } else if (typeof origin === 'string') { - // Fixed origin - headers.set('Access-Control-Allow-Origin', origin); - headers.append('Vary', 'Origin'); - } else { - const allowed = isOriginAllowed(reqOrigin ?? '', origin); - - if (allowed && reqOrigin) { - headers.set('Access-Control-Allow-Origin', reqOrigin); - } - headers.append('Vary', 'Origin'); - } - - return headers; -} - -// originHeadersFromReq - -async function originHeadersFromReq(req: Request, origin: StaticOrigin | OriginFn) { - const reqOrigin = req.headers.get('Origin') || undefined; - const value = typeof origin === 'function' ? await origin(reqOrigin, req) : origin; - - if (!value) return; - return getOriginHeaders(reqOrigin, value); -} - -function getAllowedHeaders(req: Request, allowed?: string | string[]) { - const headers = new Headers(); - - if (!allowed) { - allowed = req.headers.get('Access-Control-Request-Headers')!; - headers.append('Vary', 'Access-Control-Request-Headers'); - } else if (Array.isArray(allowed)) { - // If the allowed headers is an array, turn it into a string - allowed = allowed.join(','); - } - if (allowed) { - headers.set('Access-Control-Allow-Headers', allowed); - } - - return headers; -} - -export default async function cors(req: Request, res: Response, options?: CorsOptions) { - const opts = { ...defaultOptions, ...options }; - const { headers } = res; - const originHeaders = await originHeadersFromReq(req, opts.origin ?? false); - const mergeHeaders = (v: string, k: string) => { - if (k === 'Vary') headers.append(k, v); - else headers.set(k, v); - }; - - // If there's no origin we won't touch the response - if (!originHeaders) return res; - - originHeaders.forEach(mergeHeaders); - - if (opts.credentials) { - headers.set('Access-Control-Allow-Credentials', 'true'); - } - - const exposed = Array.isArray(opts.exposedHeaders) - ? opts.exposedHeaders.join(',') - : opts.exposedHeaders; - - if (exposed) { - headers.set('Access-Control-Expose-Headers', exposed); - } - - // Handle the preflight request - if (req.method === 'OPTIONS') { - if (opts.methods) { - const methods = Array.isArray(opts.methods) ? opts.methods.join(',') : opts.methods; - - headers.set('Access-Control-Allow-Methods', methods); - } - - getAllowedHeaders(req, opts.allowedHeaders).forEach(mergeHeaders); - - if (typeof opts.maxAge === 'number') { - headers.set('Access-Control-Max-Age', String(opts.maxAge)); - } - - if (opts.preflightContinue) return res; - - headers.set('Content-Length', '0'); - return new Response(null, { headers, status: opts.optionsSuccessStatus }); - } - - // If we got here, it's a normal request - return res; -} - -export function initCors(options?: CorsOptions) { - return (req: Request, res: Response) => cors(req, res, options); -} diff --git a/src/server/createEdgeSpeechComletion.ts b/src/server/createEdgeSpeechComletion.ts new file mode 100644 index 0000000..c0a478c --- /dev/null +++ b/src/server/createEdgeSpeechComletion.ts @@ -0,0 +1,103 @@ +import qs from 'query-string'; +import { v4 as uuidv4 } from 'uuid'; + +import { EDGE_API_TOKEN, EDGE_SPEECH_URL } from '../const/api'; +import { EdgeSpeechPayload } from '../server/types'; +import { genSSML } from '../utils/genSSML'; +import { genSendContent } from '../utils/genSendContent'; +import { getHeadersAndData } from '../utils/getHeadersAndData'; + +const configConent = JSON.stringify({ + context: { + synthesis: { + audio: { + metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true }, + outputFormat: 'audio-24khz-48kbitrate-mono-mp3', + }, + }, + }, +}); + +const genHeader = (connectId: string) => { + const date = new Date().toString(); + const configHeader = { + 'Content-Type': 'application/json; charset=utf-8', + 'Path': 'speech.config', + 'X-Timestamp': date, + }; + const contentHeader = { + 'Content-Type': 'application/ssml+xml', + 'Path': 'ssml', + 'X-RequestId': connectId, + 'X-Timestamp': date, + }; + return { + configHeader, + contentHeader, + }; +}; + +interface CreateEdgeSpeechComletionOptions { + payload: EdgeSpeechPayload; +} + +export const createEdgeSpeechComletion = async ({ + payload, +}: CreateEdgeSpeechComletionOptions): Promise => { + const { input, options } = payload; + + const connectId = uuidv4().replaceAll('-', ''); + const url = qs.stringifyUrl({ + query: { + ConnectionId: connectId, + TrustedClientToken: EDGE_API_TOKEN, + }, + url: EDGE_SPEECH_URL, + }); + + const { configHeader, contentHeader } = genHeader(connectId); + const config = genSendContent(configHeader, configConent); + const content = genSendContent(contentHeader, genSSML(input, options)); + + return new Promise((resolve, reject) => { + const ws = new WebSocket(url); + ws.binaryType = 'arraybuffer'; + const onOpen = () => { + ws.send(config); + ws.send(content); + }; + let audioData = new ArrayBuffer(0); + const onMessage = async (event: MessageEvent) => { + if (typeof event.data === 'string') { + const { headers } = getHeadersAndData(event.data); + switch (headers['Path']) { + case 'turn.end': { + ws.close(); + if (!audioData.byteLength) return; + const res = new Response(audioData); + resolve(res); + break; + } + } + } else if (event.data instanceof ArrayBuffer) { + const dataview = new DataView(event.data); + const headerLength = dataview.getInt16(0); + if (event.data.byteLength > headerLength + 2) { + const newBody = event.data.slice(2 + headerLength); + const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength); + const mergedUint8Array = new Uint8Array(newAudioData); + mergedUint8Array.set(new Uint8Array(audioData), 0); + mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength); + audioData = newAudioData; + } + } + }; + const onError = () => { + reject(new Error('WebSocket error occurred.')); + ws.close(); + }; + ws.addEventListener('open', onOpen); + ws.addEventListener('message', onMessage); + ws.addEventListener('error', onError); + }); +}; diff --git a/src/server/handleMicrosoftSpeechRequest.ts b/src/server/createMicrosoftSpeechComletion.ts similarity index 55% rename from src/server/handleMicrosoftSpeechRequest.ts rename to src/server/createMicrosoftSpeechComletion.ts index 1032970..9b2f854 100644 --- a/src/server/handleMicrosoftSpeechRequest.ts +++ b/src/server/createMicrosoftSpeechComletion.ts @@ -1,8 +1,18 @@ import { v4 as uuidv4 } from 'uuid'; -import { MICROSOFT_SPPECH_URL } from '../const/api'; +import { MICROSOFT_SPEECH_URL } from '../const/api'; +import { MicrosoftSpeechPayload } from '../server/types'; +import { genSSML } from '../utils/genSSML'; + +interface CreateMicrosoftSpeechComletionOptions { + payload: MicrosoftSpeechPayload; +} + +export const createMicrosoftSpeechComletion = async ({ + payload, +}: CreateMicrosoftSpeechComletionOptions) => { + const { input, options } = payload; -export const handleMicrosoftSpeechRequest = async (req: Request, options?: any) => { const DEFAULT_HEADERS = new Headers({ 'accept': '*/*', 'accept-language': 'zh-CN,zh;q=0.9', @@ -20,13 +30,21 @@ export const handleMicrosoftSpeechRequest = async (req: Request, options?: any) 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', }); - const res = await fetch(MICROSOFT_SPPECH_URL, { - body: req.body, + const body = JSON.stringify({ + offsetInPlainText: 0, + properties: { + SpeakTriggerSource: 'AccTuningPagePlayButton', + }, + ssml: genSSML(input, options), + ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3', + }); + + const res = await fetch(MICROSOFT_SPEECH_URL, { + body, headers: DEFAULT_HEADERS, method: 'POST', // @ts-ignore responseType: 'arraybuffer', - ...options, }); return res; diff --git a/src/server/createOpenaiAudioSpeechCompletion.ts b/src/server/createOpenaiAudioSpeechCompletion.ts new file mode 100644 index 0000000..8b0a1fe --- /dev/null +++ b/src/server/createOpenaiAudioSpeechCompletion.ts @@ -0,0 +1,26 @@ +import OpenAI from 'openai'; + +import { OpenAITTSPayload } from './types'; + +interface CreateOpenaiAudioSpeechCompletionOptions { + openai: OpenAI; + payload: OpenAITTSPayload; +} + +export const createOpenaiAudioSpeechCompletion = async ({ + payload, + openai, +}: CreateOpenaiAudioSpeechCompletionOptions) => { + const { options, input } = payload; + + const response = await openai.audio.speech.create( + { + input, + model: options.model, + voice: options.voice, + }, + { headers: { Accept: '*/*' } }, + ); + + return response; +}; diff --git a/src/server/createOpenaiAudioTranscriptionsCompletion.ts b/src/server/createOpenaiAudioTranscriptionsCompletion.ts new file mode 100644 index 0000000..880cb3c --- /dev/null +++ b/src/server/createOpenaiAudioTranscriptionsCompletion.ts @@ -0,0 +1,29 @@ +import OpenAI from 'openai'; + +import { OpenAISTTPayload } from './types'; + +interface CreateOpenaiAudioTranscriptionsOptions { + openai: OpenAI; + payload: OpenAISTTPayload; +} + +export const createOpenaiAudioTranscriptionsCompletion = async ({ + payload, + openai, +}: CreateOpenaiAudioTranscriptionsOptions) => { + const { blob, options } = payload; + + const file = new File([blob], `${Date.now()}.${options.mineType.extension}`, { + type: options.mineType.mineType, + }); + + const response = await openai.audio.transcriptions.create( + { + file, + model: options.model, + }, + { headers: { Accept: '*/*' } }, + ); + + return response; +}; diff --git a/src/server/getAllowOrigins.ts b/src/server/getAllowOrigins.ts deleted file mode 100644 index 3fccfdf..0000000 --- a/src/server/getAllowOrigins.ts +++ /dev/null @@ -1,15 +0,0 @@ -const ALLOW_ORIGINS = process.env?.ALLOW_ORIGINS?.split(',') || undefined; - -export const getAllowOrigins = (req: Request) => { - let origin = '*'; - - if (ALLOW_ORIGINS) { - const reqOrigin = req.headers.get('origin'); - if (reqOrigin && ALLOW_ORIGINS.includes(reqOrigin)) { - origin = reqOrigin; - } else { - return; - } - } - return origin; -}; diff --git a/src/server/handleAzureSpeechRequest.ts b/src/server/handleAzureSpeechRequest.ts deleted file mode 100644 index 9c9033f..0000000 --- a/src/server/handleAzureSpeechRequest.ts +++ /dev/null @@ -1,52 +0,0 @@ -import { - AudioConfig, - PropertyId, - ResultReason, - SpeechConfig, - SpeechSynthesisOutputFormat, - SpeechSynthesisResult, - SpeechSynthesizer, -} from 'microsoft-cognitiveservices-speech-sdk'; - -import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '../const/api'; - -const fetchAzureSpeech = async (ssml: string, { api }: any): Promise => { - const key = api.key || AZURE_SPEECH_KEY; - const region = api.key || AZURE_SPEECH_REGION; - const speechConfig = SpeechConfig.fromSubscription(key, region); - speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true'); - speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus; - - const audioConfig = AudioConfig.fromDefaultSpeakerOutput(); - const synthesizer: SpeechSynthesizer | null = new SpeechSynthesizer(speechConfig, audioConfig); - - const completeCb = async ( - result: SpeechSynthesisResult, - resolve: (value: ArrayBuffer) => void, - ) => { - if (result.reason === ResultReason.SynthesizingAudioCompleted) { - const audioData = result.audioData; - resolve(audioData); - } - synthesizer.close(); - }; - - const errCb = (err: string, reject: (err?: any) => void) => { - reject(err); - synthesizer.close(); - }; - - return new Promise((resolve, reject) => { - synthesizer.speakSsmlAsync( - ssml, - (result) => completeCb(result, resolve), - (err) => errCb(err, reject), - ); - }); -}; - -export const handleAzureSpeechRequest = async (req: Request) => { - const { ssml, ...options } = req.body as any; - const data = await fetchAzureSpeech(ssml, options); - return new Response(data); -}; diff --git a/src/server/types.ts b/src/server/types.ts new file mode 100644 index 0000000..4ddfbdf --- /dev/null +++ b/src/server/types.ts @@ -0,0 +1,58 @@ +import { SsmlOptions } from '@/utils/genSSML'; +import { RecordMineType } from '@/utils/getRecordMineType'; + +export interface MicrosoftSpeechPayload { + /** + * @title 语音合成的文本 + */ + input: string; + /** + * @title SSML 语音合成的配置 + */ + options: SsmlOptions; +} + +export interface EdgeSpeechPayload { + /** + * @title 语音合成的文本 + */ + input: string; + /** + * @title SSML 语音合成的配置 + */ + options: Pick; +} + +export interface OpenAITTSPayload { + /** + * @title 语音合成的文本 + */ + input: string; + options: { + /** + * @title 语音合成的模型名称 + */ + model: string; + /** + * @title 语音合成的声音名称 + */ + voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer'; + }; +} + +export interface OpenAISTTPayload { + /** + * @title 语音识别的文件 + */ + blob: Blob; + options: { + /** + * @title 语音文件格式 + */ + mineType: RecordMineType; + /** + * @title 语音识别的模型名称 + */ + model: string; + }; +} diff --git a/src/services/fetchAzureSpeech.ts b/src/services/fetchAzureSpeech.ts deleted file mode 100644 index ea495a3..0000000 --- a/src/services/fetchAzureSpeech.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { AZURE_SPEECH_KEY, AZURE_SPEECH_PROXY_URL, AZURE_SPEECH_REGION } from '@/const/api'; -import { arrayBufferConvert } from '@/utils/arrayBufferConvert'; -import { type SsmlOptions, genSSML } from '@/utils/genSSML'; - -export interface AzureSpeechOptions extends SsmlOptions { - api?: { - key?: string; - proxy?: string; - region?: string; - }; -} - -export const fetchAzureSpeech = async ( - text: string, - { api = {}, ...options }: AzureSpeechOptions, -): Promise => { - const data = JSON.stringify({ - api: { - key: api?.key || AZURE_SPEECH_KEY, - region: api?.region || AZURE_SPEECH_REGION, - }, - ssml: genSSML(text, options), - }); - const url = api?.proxy || AZURE_SPEECH_PROXY_URL; - - const response: Response = await fetch(url, { - body: data, - method: 'POST', - // @ts-ignore - responseType: 'arraybuffer', - }); - - if (!response.ok) { - throw new Error('Network response was not ok'); - } - - const arrayBuffer = await response.arrayBuffer(); - return await arrayBufferConvert(arrayBuffer); -}; diff --git a/src/services/fetchEdgeSpeech.ts b/src/services/fetchEdgeSpeech.ts index 7ab2b50..c85cd0c 100644 --- a/src/services/fetchEdgeSpeech.ts +++ b/src/services/fetchEdgeSpeech.ts @@ -1,104 +1,29 @@ -import qs from 'query-string'; -import { v4 as uuidv4 } from 'uuid'; - -import { EDDGE_API_TOKEN, EDDGE_PROXY_URL } from '@/const/api'; +import { createEdgeSpeechComletion } from '@/server/createEdgeSpeechComletion'; +import { EdgeSpeechPayload } from '@/server/types'; import { arrayBufferConvert } from '@/utils/arrayBufferConvert'; -import { type SsmlOptions, genSSML } from '@/utils/genSSML'; -import { genSendContent } from '@/utils/genSendContent'; -import { getHeadersAndData } from '@/utils/getHeadersAndData'; - -const configConent = JSON.stringify({ - context: { - synthesis: { - audio: { - metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true }, - outputFormat: 'audio-24khz-48kbitrate-mono-mp3', - }, - }, - }, -}); +import { type SsmlOptions } from '@/utils/genSSML'; -const genHeader = (connectId: string) => { - const date = new Date().toString(); - const configHeader = { - 'Content-Type': 'application/json; charset=utf-8', - 'Path': 'speech.config', - 'X-Timestamp': date, - }; - const contentHeader = { - 'Content-Type': 'application/ssml+xml', - 'Path': 'ssml', - 'X-RequestId': connectId, - 'X-Timestamp': date, - }; - return { - configHeader, - contentHeader, - }; -}; - -export interface EdgeSpeechOptions extends Pick { +export interface EdgeSpeechOptions extends Pick { api?: { - key?: string; - proxy?: string; + url?: string; }; } + export const fetchEdgeSpeech = async ( - text: string, - { api = {}, ...options }: EdgeSpeechOptions, + input: string, + { api, ...options }: EdgeSpeechOptions, ): Promise => { - const connectId = uuidv4().replaceAll('-', ''); - const url = qs.stringifyUrl({ - query: { - ConnectionId: connectId, - TrustedClientToken: api?.key || EDDGE_API_TOKEN, - }, - url: api?.proxy || EDDGE_PROXY_URL, - }); + const payload: EdgeSpeechPayload = { input, options }; + + const response = await (api?.url + ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' }) + : createEdgeSpeechComletion({ payload })); - const { configHeader, contentHeader } = genHeader(connectId); - const config = genSendContent(configHeader, configConent); - const content = genSendContent(contentHeader, genSSML(text, options)); + if (!response.ok) { + throw new Error('Network response was not ok'); + } - return new Promise((resolve, reject) => { - const ws = new WebSocket(url); - ws.binaryType = 'arraybuffer'; - const onOpen = () => { - ws.send(config); - ws.send(content); - }; - let audioData = new ArrayBuffer(0); - const onMessage = async (event: MessageEvent) => { - if (typeof event.data === 'string') { - const { headers } = getHeadersAndData(event.data); - switch (headers['Path']) { - case 'turn.end': { - ws.close(); - if (!audioData.byteLength) return; - const audioBuffer = await arrayBufferConvert(audioData); - resolve(audioBuffer); - break; - } - } - } else if (event.data instanceof ArrayBuffer) { - const dataview = new DataView(event.data); - const headerLength = dataview.getInt16(0); - if (event.data.byteLength > headerLength + 2) { - const newBody = event.data.slice(2 + headerLength); - const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength); - const mergedUint8Array = new Uint8Array(newAudioData); - mergedUint8Array.set(new Uint8Array(audioData), 0); - mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength); - audioData = newAudioData; - } - } - }; - const onError = () => { - reject(new Error('WebSocket error occurred.')); - ws.close(); - }; - ws.addEventListener('open', onOpen); - ws.addEventListener('message', onMessage); - ws.addEventListener('error', onError); - }); + const arrayBuffer = await response.arrayBuffer(); + const audioBuffer = await arrayBufferConvert(arrayBuffer); + return audioBuffer; }; diff --git a/src/services/fetchMicrosoftSpeech.ts b/src/services/fetchMicrosoftSpeech.ts index 323c80f..4a0594a 100644 --- a/src/services/fetchMicrosoftSpeech.ts +++ b/src/services/fetchMicrosoftSpeech.ts @@ -1,39 +1,29 @@ -import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api'; +import { createMicrosoftSpeechComletion } from '@/server/createMicrosoftSpeechComletion'; +import { MicrosoftSpeechPayload } from '@/server/types'; import { arrayBufferConvert } from '@/utils/arrayBufferConvert'; import { type SsmlOptions } from '@/utils/genSSML'; -import { genSSML } from '@/utils/genSSML'; export interface MicrosoftSpeechOptions extends SsmlOptions { api?: { - proxy?: string; + url?: string; }; } export const fetchMicrosoftSpeech = async ( - text: string, - { api = {}, ...options }: MicrosoftSpeechOptions, + input: string, + { api, ...options }: MicrosoftSpeechOptions, ): Promise => { - const data = JSON.stringify({ - offsetInPlainText: 0, - properties: { - SpeakTriggerSource: 'AccTuningPagePlayButton', - }, - ssml: genSSML(text, options), - ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3', - }); - const url = api?.proxy || MICROSOFT_SPEECH_PROXY_URL; + const payload: MicrosoftSpeechPayload = { input, options }; - const response: Response = await fetch(url, { - body: data, - method: 'POST', - // @ts-ignore - responseType: 'arraybuffer', - }); + const response = await (api?.url + ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' }) + : createMicrosoftSpeechComletion({ payload })); if (!response.ok) { throw new Error('Network response was not ok'); } const arrayBuffer = await response.arrayBuffer(); - return await arrayBufferConvert(arrayBuffer); + const audioBuffer = await arrayBufferConvert(arrayBuffer); + return audioBuffer; }; diff --git a/src/services/fetchOpenaiSTT.ts b/src/services/fetchOpenaiSTT.ts index d29e643..4fa8677 100644 --- a/src/services/fetchOpenaiSTT.ts +++ b/src/services/fetchOpenaiSTT.ts @@ -1,39 +1,50 @@ -import { v4 as uuidv4 } from 'uuid'; - -import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api'; +import { OPENAI_BASE_URL, OPENAI_STT_URL } from '@/const/api'; +import { OpenAISTTPayload } from '@/server/types'; import { RecordMineType, getRecordMineType } from '@/utils/getRecordMineType'; +const genSTTBody = ({ blob, options }: OpenAISTTPayload) => { + const filename = `${Date.now()}.${options.mineType.extension}`; + const file = new File([blob], filename, { + type: options.mineType.mineType, + }); + + const body = new FormData(); + body.append('file', file); + body.append('model', options.model); + return body; +}; export interface OpenaiSttOptions { api?: { key?: string; proxy?: string; + url?: string; }; mineType?: RecordMineType; model?: 'whisper-1'; } - -// 纯文本生成语音 export const fetchOpenaiSTT = async ( speech: Blob, { api = {}, model = 'whisper-1', mineType }: OpenaiSttOptions, ): Promise => { - const key = api?.key || OPENAI_API_KEY; - const url = OPENAI_STT_URL(api?.proxy); - - const headers = new Headers({ - Authorization: `Bearer ${key}`, - }); - - const filename = `${uuidv4()}.${mineType?.extension || getRecordMineType().extension}`; - const file = new File([speech], filename, { - type: mineType?.mineType || getRecordMineType().mineType, - }); - - const body = new FormData(); - body.append('file', file); - body.append('model', model); + const { key, url = OPENAI_BASE_URL } = api; + + const payload: OpenAISTTPayload = { + blob: speech, + options: { + mineType: mineType || getRecordMineType(), + model, + }, + }; - const response: Response = await fetch(url, { body, headers, method: 'POST' }); + const response = await (api?.url + ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' }) + : fetch(OPENAI_STT_URL(url), { + body: genSTTBody(payload), + headers: new Headers({ + Authorization: `Bearer ${key}`, + }), + method: 'POST', + })); if (!response.ok) { throw new Error('Network response was not ok'); @@ -41,5 +52,5 @@ export const fetchOpenaiSTT = async ( const json = await response.json(); - return json?.text; + return json.text; }; diff --git a/src/services/fetchOpenaiTTS.ts b/src/services/fetchOpenaiTTS.ts index 75d6218..45d45a9 100644 --- a/src/services/fetchOpenaiTTS.ts +++ b/src/services/fetchOpenaiTTS.ts @@ -1,36 +1,47 @@ -import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api'; +import { OPENAI_BASE_URL, OPENAI_TTS_URL } from '@/const/api'; +import { OpenAITTSPayload } from '@/server/types'; import { arrayBufferConvert } from '@/utils/arrayBufferConvert'; import { type SsmlOptions } from '@/utils/genSSML'; export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer'; -export interface OpenaiTtsOptions extends Pick { +export interface OpenaiTtsOptions extends Pick, OpenAITTSPayload { api: { key?: string; proxy?: string; + url?: string; }; model?: 'tts-1' | 'tts-1-hd'; - name: OpenaiVoice | string; + voice: OpenaiVoice; } export const fetchOpenaiTTS = async ( - text: string, - { api = {}, model = 'tts-1', ...options }: OpenaiTtsOptions, + input: string, + { api = {}, model = 'tts-1', voice }: OpenaiTtsOptions, ): Promise => { - const key = api?.key || OPENAI_API_KEY; - const url = OPENAI_TTS_URL(api?.proxy); + const { key, url = OPENAI_BASE_URL } = api; - const headers = new Headers({ - 'Authorization': `Bearer ${key}`, - 'Content-Type': 'application/json', - }); - - const body = JSON.stringify({ - input: text, - model, - voice: options.name, - }); + const payload: OpenAITTSPayload = { + input, + options: { + model, + voice, + }, + }; - const response: Response = await fetch(url, { body, headers, method: 'POST' }); + const response = await (api?.url + ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' }) + : fetch(OPENAI_TTS_URL(url), { + body: JSON.stringify({ + input, + model, + voice, + }), + headers: new Headers({ + 'Authorization': `Bearer ${key}`, + 'Content-Type': 'application/json', + }), + method: 'POST', + })); if (!response.ok) { throw new Error('Network response was not ok'); diff --git a/src/useAzureSpeech/demos/index.tsx b/src/useAzureSpeech/demos/index.tsx deleted file mode 100644 index 8d5e7b6..0000000 --- a/src/useAzureSpeech/demos/index.tsx +++ /dev/null @@ -1,84 +0,0 @@ -import { AudioPlayer, genLevaOptions, getAzureVoiceOptions, useAzureSpeech } from '@lobehub/tts'; -import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui'; -import { Button, Input } from 'antd'; -import { Volume2 } from 'lucide-react'; -import { Flexbox } from 'react-layout-kit'; - -const defaultText = '这是一段使用 Azure Speech 的语音演示'; - -export default () => { - const store = useCreateStore(); - - const api: any = useControls( - { - key: { - label: 'AZURE_SPEECH_KEY', - value: '', - }, - region: { - label: 'AZURE_SPEECH_REGION', - value: '', - }, - }, - { store }, - ); - - const options: any = useControls( - { - name: { - options: genLevaOptions(getAzureVoiceOptions()), - value: 'zh-CN-YunxiaNeural', - }, - pitch: { - max: 1, - min: -1, - step: 0.1, - value: 0, - }, - rate: { - max: 1, - min: -1, - step: 0.1, - value: 0, - }, - style: { - options: [ - 'affectionate', - 'angry', - 'calm', - 'cheerful', - 'disgruntled', - 'embarrassed', - 'fearful', - 'general', - 'gentle', - 'sad', - 'serious', - ], - value: 'general', - }, - }, - { store }, - ); - const { setText, isGlobalLoading, audio, start, stop } = useAzureSpeech(defaultText, { - api, - ...options, - }); - return ( - - - {isGlobalLoading ? ( - - ) : ( - - )} - setText(e.target.value)} /> - - - - ); -}; diff --git a/src/useAzureSpeech/index.md b/src/useAzureSpeech/index.md deleted file mode 100644 index 1bbc032..0000000 --- a/src/useAzureSpeech/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -nav: Components -group: TTS -title: useAzureSpeech ---- - -## hooks - -- ENV: `AZURE_SPEECH_KEY` `AZURE_SPEECH_REGION` - - diff --git a/src/useAzureSpeech/index.ts b/src/useAzureSpeech/index.ts deleted file mode 100644 index 6e29ae5..0000000 --- a/src/useAzureSpeech/index.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { useState } from 'react'; - -import { AzureSpeechOptions, fetchAzureSpeech } from '@/services/fetchAzureSpeech'; -import { useTTS } from '@/useTTS'; - -export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => { - const [text, setText] = useState(defaultText); - const rest = useTTS(options.name, text, (segmentText: string) => - fetchAzureSpeech(segmentText, options), - ); - return { - setText, - ...rest, - }; -}; diff --git a/src/useEdgeSpeech/index.ts b/src/useEdgeSpeech/index.ts deleted file mode 100644 index ff2f7fc..0000000 --- a/src/useEdgeSpeech/index.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { useState } from 'react'; - -import { EdgeSpeechOptions, fetchEdgeSpeech } from '@/services/fetchEdgeSpeech'; -import { useTTS } from '@/useTTS'; - -export const useEdgeSpeech = (defaultText: string, options: EdgeSpeechOptions) => { - const [text, setText] = useState(defaultText); - const rest = useTTS(options.name, text, (segmentText: string) => - fetchEdgeSpeech(segmentText, options), - ); - return { - setText, - ...rest, - }; -}; diff --git a/src/useMicrosoftSpeech/index.ts b/src/useMicrosoftSpeech/index.ts deleted file mode 100644 index 637d2c6..0000000 --- a/src/useMicrosoftSpeech/index.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { useState } from 'react'; - -import { type MicrosoftSpeechOptions, fetchMicrosoftSpeech } from '@/services/fetchMicrosoftSpeech'; -import { useTTS } from '@/useTTS'; - -export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeechOptions) => { - const [text, setText] = useState(defaultText); - const rest = useTTS(options.name, text, (segmentText: string) => - fetchMicrosoftSpeech(segmentText, options), - ); - return { - setText, - ...rest, - }; -}; diff --git a/src/useOpenaiTTS/index.ts b/src/useOpenaiTTS/index.ts deleted file mode 100644 index d04cc68..0000000 --- a/src/useOpenaiTTS/index.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { useState } from 'react'; - -import { type OpenaiTtsOptions, fetchOpenaiTTS } from '@/services/fetchOpenaiTTS'; -import { useTTS } from '@/useTTS'; - -export const useOpenaiTTS = (defaultText: string, options: OpenaiTtsOptions) => { - const [text, setText] = useState(defaultText); - const rest = useTTS(options.name, text, (segmentText: string) => - fetchOpenaiTTS(segmentText, options), - ); - return { - setText, - ...rest, - }; -}; diff --git a/src/utils/genSSML.ts b/src/utils/genSSML.ts index 1e0bc1e..57dd52a 100644 --- a/src/utils/genSSML.ts +++ b/src/utils/genSSML.ts @@ -1,5 +1,3 @@ -import { Document, ServiceProvider } from 'ssml-document'; - export type StyleName = | 'affectionate' | 'angry' @@ -14,16 +12,34 @@ export type StyleName = | 'serious'; export interface SsmlOptions { - name: string; pitch?: number; rate?: number; style?: StyleName; + voice: string; } -export const genSSML = (text: string, options: SsmlOptions) => { - let ssml = new Document().voice(options.name); - if (options.style) ssml.expressAs({ style: options.style }); - if (options.pitch || options.rate) ssml.prosody({ pitch: options.pitch, rate: options.rate }); - const result = ssml.say(text).render({ provider: ServiceProvider.Microsoft }); - return `${result}`; +const voiceTemplate = (input: string, { voice }: Pick) => + `${input}`; + +const styleTemplate = (input: string, { style }: Pick) => { + if (!style) return input; + return `${input}`; +}; + +const prosodyTemplate = (input: string, { pitch, rate }: Pick) => { + if (!pitch && !rate) return input; + return `${input}`; +}; +const speackTemplate = (input: string) => + `${input}`; + +export const genSSML = (input: string, options: SsmlOptions) => { + let ssml = prosodyTemplate(input, options); + ssml = styleTemplate(ssml, options); + ssml = voiceTemplate(ssml, options); + ssml = speackTemplate(ssml); + + return ssml; }; diff --git a/src/utils/getVoiceList.ts b/src/utils/getVoiceList.ts index 841b040..35e5c7b 100644 --- a/src/utils/getVoiceList.ts +++ b/src/utils/getVoiceList.ts @@ -4,9 +4,9 @@ import { flatten } from 'lodash-es'; import azureVoiceList from '@/data/azureVoiceList'; import edgeVoiceList from '@/data/edgeVoiceList'; import voiceLocale from '@/data/locales'; -import nameList from '@/data/nameList'; import openaiVoiceList from '@/data/openaiVoiceList'; import speechSynthesVoiceList from '@/data/speechSynthesVoiceList'; +import voiceList from '@/data/voiceList'; export const genSpeechSynthesVoiceList = () => { const data = speechSynthesis.getVoices(); @@ -38,7 +38,7 @@ export const getAzureVoiceOptions = (locale?: string): SelectProps['options'] => ? (azureVoiceList as any)?.[locale] || [] : flatten(Object.values(azureVoiceList)); - return data.map((voice: any) => ({ label: (nameList as any)?.[voice] || voice, value: voice })); + return data.map((voice: any) => ({ label: (voiceList as any)?.[voice] || voice, value: voice })); }; export const getEdgeVoiceOptions = (locale?: string): SelectProps['options'] => { @@ -46,7 +46,7 @@ export const getEdgeVoiceOptions = (locale?: string): SelectProps['options'] => locale && (edgeVoiceList as any)[locale] ? (edgeVoiceList as any)[locale] || [] : flatten(Object.values(edgeVoiceList)); - return data.map((voice: any) => ({ label: (nameList as any)?.[voice] || voice, value: voice })); + return data.map((voice: any) => ({ label: (voiceList as any)?.[voice] || voice, value: voice })); }; export const getOpenaiVoiceOptions = (): SelectProps['options'] => { @@ -56,9 +56,3 @@ export const getOpenaiVoiceOptions = (): SelectProps['options'] => { export const getVoiceLocaleOptions = (): SelectProps['options'] => { return Object.entries(voiceLocale).map(([value, label]) => ({ label, value })); }; - -export const genLevaOptions = (options: SelectProps['options']) => { - const data: any = {}; - options?.forEach((item: any) => (data[item?.label || item?.value] = item?.value)); - return data; -};