diff --git a/.changelogrc.js b/.changelogrc.cjs
similarity index 100%
rename from .changelogrc.js
rename to .changelogrc.cjs
diff --git a/.commitlintrc.js b/.commitlintrc.cjs
similarity index 100%
rename from .commitlintrc.js
rename to .commitlintrc.cjs
diff --git a/.dumirc.ts b/.dumirc.ts
index 4e5e797..d957106 100644
--- a/.dumirc.ts
+++ b/.dumirc.ts
@@ -1,4 +1,5 @@
import { defineConfig } from 'dumi';
+import path from 'node:path';
import { description, homepage, name } from './package.json';
@@ -41,6 +42,9 @@ const themeConfig = {
};
export default defineConfig({
+ alias: {
+ '@lobehub/tts/react': path.join(__dirname, './src/react'),
+ },
apiParser: isProduction ? {} : false,
base: '/',
define: {
@@ -51,11 +55,10 @@ export default defineConfig({
mfsu: isWin ? undefined : {},
npmClient: 'pnpm',
publicPath: '/',
- resolve: isProduction
- ? {
- entryFile: './src/index.ts',
- }
- : undefined,
+ resolve: {
+ atomDirs: [{ dir: 'src/react', type: 'component' }],
+ entryFile: isProduction ? './src/index.ts' : undefined,
+ },
styles: [
`html, body { background: transparent; }
diff --git a/.eslintrc.js b/.eslintrc.cjs
similarity index 100%
rename from .eslintrc.js
rename to .eslintrc.cjs
diff --git a/.prettierrc.js b/.prettierrc.cjs
similarity index 100%
rename from .prettierrc.js
rename to .prettierrc.cjs
diff --git a/.releaserc.js b/.releaserc.cjs
similarity index 100%
rename from .releaserc.js
rename to .releaserc.cjs
diff --git a/.remarkrc.js b/.remarkrc.cjs
similarity index 100%
rename from .remarkrc.js
rename to .remarkrc.cjs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 957cd37..1f51e45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,231 @@
# Changelog
+## [Version 1.13.0-beta.3](https://github.com/lobehub/lobe-tts/compare/v1.13.0-beta.2...v1.13.0-beta.3)
+
+Released on **2023-11-15**
+
+#### ♻ Code Refactoring
+
+- **misc**: Refactor the demo usage.
+
+
+
+
+Improvements and Fixes
+
+#### Code refactoring
+
+- **misc**: Refactor the demo usage ([802c59a](https://github.com/lobehub/lobe-tts/commit/802c59a))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+## [Version 1.13.0-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.13.0-beta.1...v1.13.0-beta.2)
+
+Released on **2023-11-15**
+
+#### ✨ Features
+
+- **misc**: Add SWR config to hooks.
+
+
+
+
+Improvements and Fixes
+
+#### What's improved
+
+- **misc**: Add SWR config to hooks ([2c49e02](https://github.com/lobehub/lobe-tts/commit/2c49e02))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+## [Version 1.13.0-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.6...v1.13.0-beta.1)
+
+Released on **2023-11-15**
+
+#### ✨ Features
+
+- **misc**: Add class.
+
+
+
+
+Improvements and Fixes
+
+#### What's improved
+
+- **misc**: Add class ([07245e3](https://github.com/lobehub/lobe-tts/commit/07245e3))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+### [Version 1.12.1-beta.6](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.5...v1.12.1-beta.6)
+
+Released on **2023-11-15**
+
+#### ♻ Code Refactoring
+
+- **misc**: Refactor to fix build.
+
+
+
+
+Improvements and Fixes
+
+#### Code refactoring
+
+- **misc**: Refactor to fix build ([9858fcb](https://github.com/lobehub/lobe-tts/commit/9858fcb))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+### [Version 1.12.1-beta.5](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.4...v1.12.1-beta.5)
+
+Released on **2023-11-15**
+
+#### ♻ Code Refactoring
+
+- **misc**: Refactor to fix build.
+
+
+
+
+Improvements and Fixes
+
+#### Code refactoring
+
+- **misc**: Refactor to fix build ([d875be6](https://github.com/lobehub/lobe-tts/commit/d875be6))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+### [Version 1.12.1-beta.4](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.3...v1.12.1-beta.4)
+
+Released on **2023-11-15**
+
+#### 🐛 Bug Fixes
+
+- **misc**: Fix client fetch.
+
+
+
+
+Improvements and Fixes
+
+#### What's fixed
+
+- **misc**: Fix client fetch ([9262608](https://github.com/lobehub/lobe-tts/commit/9262608))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+### [Version 1.12.1-beta.3](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.2...v1.12.1-beta.3)
+
+Released on **2023-11-15**
+
+#### ♻ Code Refactoring
+
+- **misc**: Fix react entry.
+
+
+
+
+Improvements and Fixes
+
+#### Code refactoring
+
+- **misc**: Fix react entry ([6a4631d](https://github.com/lobehub/lobe-tts/commit/6a4631d))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+### [Version 1.12.1-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.12.1-beta.1...v1.12.1-beta.2)
+
+Released on **2023-11-15**
+
+#### ♻ Code Refactoring
+
+- **misc**: 将 react 部分抽取到 /react 子级路径下.
+
+
+
+
+Improvements and Fixes
+
+#### Code refactoring
+
+- **misc**: 将 react 部分抽取到 /react 子级路径下 ([80b24e8](https://github.com/lobehub/lobe-tts/commit/80b24e8))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
+### [Version 1.12.1-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.12.0...v1.12.1-beta.1)
+
+Released on **2023-11-15**
+
+#### ♻ Code Refactoring
+
+- **misc**: Refactor api.
+
+
+
+
+Improvements and Fixes
+
+#### Code refactoring
+
+- **misc**: Refactor api ([a7c0095](https://github.com/lobehub/lobe-tts/commit/a7c0095))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
## [Version 1.12.0](https://github.com/lobehub/lobe-tts/compare/v1.11.1...v1.12.0)
Released on **2023-11-13**
diff --git a/README.md b/README.md
index f1c9683..f01621d 100644
--- a/README.md
+++ b/README.md
@@ -86,17 +86,17 @@ Click button below to deploy your private plugins' gateway.
This project provides some additional configuration items set with environment variables:
-| Environment Variable | Description | Default |
-| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- |
-| `ALLOW_ORIGINS` | Allow origins , string or string array | |
-| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` |
-| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.openai.com/v1` |
-| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | |
-| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | |
-| `AZURE_SPEECH_PROXY_URL` | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/azure-speech` |
-| `MICROSOFT_SPEECH_PROXY_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech` |
-| `EDDGE_API_TOKEN` | This is the API key of Edge Speech Service | |
-| `EDDGE_PROXY_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | |
+| Environment Variable | Description | Default |
+| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- |
+| `ALLOW_ORIGINS` | Allow origins , string or string array | |
+| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` |
+| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.openai.com/v1` |
+| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | |
+| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | |
+| `AZURE_SPEECH_PROXY_URL` | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/azure-speech` |
+| `MICROSOFT_SPEECH_API_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech` |
+| `EDGE_API_TOKEN` | This is the API key of Edge Speech Service | |
+| `EDGE_SPEECH_API_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | |
diff --git a/api/azure-speech.ts b/api/azure-speech.ts
deleted file mode 100644
index f6d90da..0000000
--- a/api/azure-speech.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-// TODO: fix vercel error
-// Error: The Edge Function "api/azure-speech" is referencing unsupported modules:
-// - https-proxy-agent: net, tls, url
-// - microsoft-cognitiveservices-speech-sdk: vc-blob-asset:speech-processor.js, fs, net, tls
-
-/*
-import cors from '../src/server/cors';
-import { getAllowOrigins } from '../src/server/getAllowOrigins';
-import { handleAzureSpeechRequest } from '../src/server/handleAzureSpeechRequest';
-
-export const config = {
- runtime: 'edge',
-};
-
-export default async (req: Request) => {
- if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
- const origin = getAllowOrigins(req);
- if (!origin) return new Response('Origin Not Allowed', { status: 403 });
- const res = await handleAzureSpeechRequest(req);
- return cors(req, res, { methods: ['POST'], origin });
-};
-*/
-
-export default async (req: Request) => {
- if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
-
- return new Response('WIP');
-};
diff --git a/api/edge-speech.ts b/api/edge-speech.ts
new file mode 100644
index 0000000..9bee67d
--- /dev/null
+++ b/api/edge-speech.ts
@@ -0,0 +1,14 @@
+import { createEdgeSpeechComletion } from '../src/server/createEdgeSpeechComletion';
+import { EdgeSpeechPayload } from '../src/server/types';
+
+export const config = {
+ runtime: 'edge',
+};
+
+export default async (req: Request) => {
+ if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+
+ const payload = (await req.json()) as EdgeSpeechPayload;
+
+ return createEdgeSpeechComletion({ payload });
+};
diff --git a/api/microsoft-speech.ts b/api/microsoft-speech.ts
index 9b62c17..48e2408 100644
--- a/api/microsoft-speech.ts
+++ b/api/microsoft-speech.ts
@@ -1,6 +1,5 @@
-import cors from '../src/server/cors';
-import { getAllowOrigins } from '../src/server/getAllowOrigins';
-import { handleMicrosoftSpeechRequest } from '../src/server/handleMicrosoftSpeechRequest';
+import { createMicrosoftSpeechComletion } from '../src/server/createMicrosoftSpeechComletion';
+import { MicrosoftSpeechPayload } from '../src/server/types';
export const config = {
runtime: 'edge',
@@ -8,8 +7,7 @@ export const config = {
export default async (req: Request) => {
if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
- const origin = getAllowOrigins(req);
- if (!origin) return new Response('Origin Not Allowed', { status: 403 });
- const res = await handleMicrosoftSpeechRequest(req);
- return cors(req, new Response(res.body, res), { methods: ['POST'], origin });
+ const payload = (await req.json()) as MicrosoftSpeechPayload;
+
+ return createMicrosoftSpeechComletion({ payload });
};
diff --git a/api/open-stt.ts b/api/open-stt.ts
new file mode 100644
index 0000000..9576821
--- /dev/null
+++ b/api/open-stt.ts
@@ -0,0 +1,28 @@
+import OpenAI from 'openai';
+
+import { createOpenaiAudioTranscriptionsCompletion } from '../src/server/createOpenaiAudioTranscriptionsCompletion';
+import { OpenAISTTPayload } from '../src/server/types';
+
+export const config = {
+ runtime: 'edge',
+};
+
+export default async (req: Request) => {
+ if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+
+ const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+ const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL;
+
+ if (!OPENAI_API_KEY) return new Response('OPENAI_API_KEY is not set', { status: 500 });
+
+ const payload = (await req.json()) as OpenAISTTPayload;
+
+ const openai = new OpenAI({ apiKey: OPENAI_API_KEY, baseURL: OPENAI_PROXY_URL });
+ const res = await createOpenaiAudioTranscriptionsCompletion({ openai, payload });
+
+ return new Response(JSON.stringify(res), {
+ headers: {
+ 'content-type': 'application/json;charset=UTF-8',
+ },
+ });
+};
diff --git a/api/openai-tts.ts b/api/openai-tts.ts
new file mode 100644
index 0000000..2338729
--- /dev/null
+++ b/api/openai-tts.ts
@@ -0,0 +1,21 @@
+import OpenAI from 'openai';
+
+import { createOpenaiAudioSpeechCompletion } from '../src/server/createOpenaiAudioSpeechCompletion';
+import { OpenAITTSPayload } from '../src/server/types';
+
+export const config = {
+ runtime: 'edge',
+};
+
+export default async (req: Request) => {
+ if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+ const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+ const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL;
+
+ if (!OPENAI_API_KEY) return new Response('OPENAI_API_KEY is not set', { status: 500 });
+ const payload = (await req.json()) as OpenAITTSPayload;
+
+ const openai = new OpenAI({ apiKey: OPENAI_API_KEY, baseURL: OPENAI_PROXY_URL });
+
+ return createOpenaiAudioSpeechCompletion({ openai, payload });
+};
diff --git a/package.json b/package.json
index c8b5aa4..4be404d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@lobehub/tts",
- "version": "1.12.0",
+ "version": "1.13.0-beta.3",
"description": "A high-quality & reliable TTS React Hooks library",
"homepage": "https://github.com/lobehub/lobe-tts",
"bugs": {
@@ -13,6 +13,20 @@
"license": "MIT",
"author": "LobeHub
",
"sideEffects": false,
+ "type": "module",
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./es/index.d.ts",
+ "import": "./es/index.js",
+ "module": "./es/index.js"
+ },
+ "./react": {
+ "types": "./es/react/index.d.ts",
+ "import": "./es/react/index.js",
+ "module": "./es/react/index.js"
+ }
+ },
"main": "es/index.js",
"module": "es/index.js",
"types": "es/index.d.ts",
@@ -21,10 +35,9 @@
],
"scripts": {
"build": "father build",
- "build:server": "tsc server.ts --declaration",
"ci": "npm run lint && npm run type-check",
"dev": "father dev",
- "docs:build": "npm run setup && dumi build",
+ "docs:build": "npm run setup && npm run build && dumi build",
"docs:build-analyze": "ANALYZE=1 dumi build",
"docs:dev": "npm run setup && dumi dev",
"doctor": "father doctor",
@@ -66,11 +79,10 @@
"antd-style": "^3",
"lodash-es": "^4",
"lucide-react": "latest",
- "microsoft-cognitiveservices-speech-sdk": "^1",
+ "openai": "^4.17.3",
"query-string": "^8",
"react-error-boundary": "^4.0.11",
"react-layout-kit": "^1",
- "ssml-document": "^1",
"swr": "^2",
"url-join": "^5",
"uuid": "^9"
diff --git a/src/class/EdgeSpeechTTS.ts b/src/class/EdgeSpeechTTS.ts
new file mode 100644
index 0000000..3bc721b
--- /dev/null
+++ b/src/class/EdgeSpeechTTS.ts
@@ -0,0 +1,20 @@
+import edgeVoiceList from '@/data/edgeVoiceList';
+import voiceName from '@/data/voiceList';
+import { fetchEdgeSpeech } from '@/services/fetchEdgeSpeech';
+import { getEdgeVoiceOptions, getVoiceLocaleOptions } from '@/utils/getVoiceList';
+
+export class EdgeSpeechTTS {
+ private locale?: string;
+ constructor(locale?: string) {
+ this.locale = locale;
+ }
+
+ get voiceOptions() {
+ return getEdgeVoiceOptions(this.locale);
+ }
+
+ static localeOptions = getVoiceLocaleOptions();
+ static voiceList = edgeVoiceList;
+ static voiceName = voiceName;
+ fetch = fetchEdgeSpeech;
+}
diff --git a/src/class/MicrosoftSpeechTTS.ts b/src/class/MicrosoftSpeechTTS.ts
new file mode 100644
index 0000000..414be55
--- /dev/null
+++ b/src/class/MicrosoftSpeechTTS.ts
@@ -0,0 +1,20 @@
+import azureVoiceList from '@/data/azureVoiceList';
+import voiceName from '@/data/voiceList';
+import { fetchMicrosoftSpeech } from '@/services/fetchMicrosoftSpeech';
+import { getAzureVoiceOptions, getVoiceLocaleOptions } from '@/utils/getVoiceList';
+
+export class MicrosoftSpeechTTS {
+ private locale?: string;
+ constructor(locale?: string) {
+ this.locale = locale;
+ }
+ get voiceOptions() {
+ return getAzureVoiceOptions(this.locale);
+ }
+
+ static localeOptions = getVoiceLocaleOptions();
+
+ voiceList = azureVoiceList;
+ voiceName = voiceName;
+ fetch = fetchMicrosoftSpeech;
+}
diff --git a/src/class/OpenaiSTT.ts b/src/class/OpenaiSTT.ts
new file mode 100644
index 0000000..eafd676
--- /dev/null
+++ b/src/class/OpenaiSTT.ts
@@ -0,0 +1,5 @@
+import { fetchOpenaiSTT } from '@/services/fetchOpenaiSTT';
+
+export class OpenaiSTT {
+ fetch = fetchOpenaiSTT;
+}
diff --git a/src/class/OpenaiTTS.ts b/src/class/OpenaiTTS.ts
new file mode 100644
index 0000000..4edd084
--- /dev/null
+++ b/src/class/OpenaiTTS.ts
@@ -0,0 +1,18 @@
+import openaiVoiceList from '@/data/openaiVoiceList';
+import { fetchOpenaiTTS } from '@/services/fetchOpenaiTTS';
+import { getOpenaiVoiceOptions, getVoiceLocaleOptions } from '@/utils/getVoiceList';
+
+export class OpenaiTTS {
+ static voiceList = openaiVoiceList;
+
+ get voiceOptions() {
+ return getOpenaiVoiceOptions();
+ }
+ get localeOptions() {
+ return getVoiceLocaleOptions();
+ }
+
+ static localeOptions = getVoiceLocaleOptions();
+
+ fetch = fetchOpenaiTTS;
+}
diff --git a/src/class/VoiceList.ts b/src/class/VoiceList.ts
new file mode 100644
index 0000000..18c6c5c
--- /dev/null
+++ b/src/class/VoiceList.ts
@@ -0,0 +1,38 @@
+import {
+ getAzureVoiceOptions,
+ getEdgeVoiceOptions,
+ getOpenaiVoiceOptions,
+ getSpeechSynthesVoiceOptions,
+ getVoiceLocaleOptions,
+} from '@/utils/getVoiceList';
+
+export class VoiceList {
+ private locale?: string;
+ constructor(locale?: string) {
+ this.locale = locale;
+ }
+
+ get speechSynthesVoiceOptions() {
+ return getSpeechSynthesVoiceOptions(this.locale);
+ }
+
+ get azureVoiceOptions() {
+ return getAzureVoiceOptions(this.locale);
+ }
+
+ get edgeVoiceOptions() {
+ return getEdgeVoiceOptions(this.locale);
+ }
+
+ get microsoftVoiceOptions() {
+ return getEdgeVoiceOptions(this.locale);
+ }
+
+ get openaiVoiceOptions() {
+ return getOpenaiVoiceOptions();
+ }
+
+ get localeOptions() {
+ return getVoiceLocaleOptions();
+ }
+}
diff --git a/src/const/api.ts b/src/const/api.ts
index 89afd3c..2b5efd8 100644
--- a/src/const/api.ts
+++ b/src/const/api.ts
@@ -1,33 +1,21 @@
import urlJoin from 'url-join';
-export const MICROSOFT_SPPECH_URL =
+export const MICROSOFT_SPEECH_URL =
'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak';
-export const MICROSOFT_SPEECH_PROXY_URL =
- process.env.MICROSOFT_SPEECH_PROXY_URL ||
- process.env.NEXT_PUBLIC_MICROSOFT_SPEECH_PROXY_URL ||
- '/api/microsoft-speech';
-export const AZURE_SPEECH_PROXY_URL =
- process.env.AZURE_SPEECH_PROXY_URL ||
- process.env.NEXT_PUBLIC_AZURE_SPEECH_PROXY_URL ||
- '/api/azure-speech';
+export const EDGE_SPEECH_URL =
+ 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
+export const EDGE_API_TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
+export const OPENAI_BASE_URL = 'https://api.openai.com/v1';
+
+export const MICROSOFT_SPEECH_API_URL = '/api/microsoft-speech';
+export const EDGE_SPEECH_API_URL = '/api/edge-speech';
+export const OPENAI_TTS_API_URL = '/api/openai-tts';
+export const OPENAI_STT_API_URL = '/api/openai-stt';
+
export const AZURE_SPEECH_KEY =
process.env.AZURE_SPEECH_KEY || process.env.NEXT_PUBLIC_AZURE_SPEECH_KEY || '';
export const AZURE_SPEECH_REGION =
process.env.AZURE_SPEECH_REGION || process.env.NEXT_PUBLIC_AZURE_SPEECH_REGION || '';
-export const OPENAI_API_KEY =
- process.env.OPENAI_API_KEY || process.env.NEXT_PUBLIC_OPENAI_API_KEY || '';
-export const OPENAI_PROXY_URL =
- process.env.OPENAI_PROXY_URL ||
- process.env.NEXT_PUBLIC_OPENAI_PROXY_URL ||
- 'https://api.openai.com/v1';
-export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech');
-export const OPENAI_STT_URL = (api?: string) =>
- urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions');
-export const EDDGE_PROXY_URL =
- process.env.EDDGE_PROXY_URL ||
- process.env.NEXT_PUBLIC_EDDGE_PROXY_UR ||
- 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
-export const EDDGE_API_TOKEN =
- process.env.EDDGE_API_TOKEN ||
- process.env.NEXT_PUBLIC_EDDGE_API_TOKEN ||
- '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
+
+export const OPENAI_TTS_URL = (api: string) => urlJoin(api, 'audio/speech');
+export const OPENAI_STT_URL = (api: string) => urlJoin(api, 'audio/transcriptions');
diff --git a/src/data/nameList.ts b/src/data/voiceList.ts
similarity index 100%
rename from src/data/nameList.ts
rename to src/data/voiceList.ts
diff --git a/src/index.ts b/src/index.ts
index 5f2b637..0f034cc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,44 +1,22 @@
-export { default as AudioPlayer, type AudioPlayerProps } from './AudioPlayer';
-export { default as AudioVisualizer, type AudioVisualizerProps } from './AudioVisualizer';
-export { default as azureVoiceList } from './data/azureVoiceList';
-export { default as edgeVoiceList } from './data/edgeVoiceList';
-export { default as voiceLocale } from './data/locales';
-export { default as nameList } from './data/nameList';
-export { default as openaiVoiceList } from './data/openaiVoiceList';
-export { useAudioPlayer } from './hooks/useAudioPlayer';
-export { useAudioVisualizer } from './hooks/useAudioVisualizer';
-export { useBlobUrl } from './hooks/useBlobUrl';
-export { useStreamAudioPlayer } from './hooks/useStreamAudioPlayer';
-export { type AzureSpeechOptions, fetchAzureSpeech } from './services/fetchAzureSpeech';
-export { type EdgeSpeechOptions, fetchEdgeSpeech } from './services/fetchEdgeSpeech';
-export { fetchMicrosoftSpeech, type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech';
-export { fetchOpenaiSTT, type OpenaiSttOptions } from './services/fetchOpenaiSTT';
-export { fetchOpenaiTTS, type OpenaiTtsOptions } from './services/fetchOpenaiTTS';
-export { useAudioRecorder } from './useAudioRecorder';
-export { useAzureSpeech } from './useAzureSpeech';
-export { useEdgeSpeech } from './useEdgeSpeech';
-export { useMicrosoftSpeech } from './useMicrosoftSpeech';
-export {
- type OpenaiSpeechRecognitionOptions,
- type OpenaiSTTFetcher,
- useOpenaiSTT,
- useOpenaiSTTWithPSR,
- useOpenaiSTTWithRecord,
- useOpenaiSTTWithSR,
-} from './useOpenaiSTT';
-export { useOpenaiTTS } from './useOpenaiTTS';
-export { usePersistedSpeechRecognition } from './useSpeechRecognition/usePersistedSpeechRecognition';
-export {
- type SpeechRecognitionOptions,
- useSpeechRecognition,
-} from './useSpeechRecognition/useSpeechRecognition';
-export { useSpeechSynthes } from './useSpeechSynthes';
+export { EdgeSpeechTTS } from './class/EdgeSpeechTTS';
+export { MicrosoftSpeechTTS } from './class/MicrosoftSpeechTTS';
+export { OpenaiSTT } from './class/OpenaiSTT';
+export { OpenaiTTS } from './class/OpenaiTTS';
+export { VoiceList } from './class/VoiceList';
+export { type EdgeSpeechOptions } from './services/fetchEdgeSpeech';
+export { type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech';
+export { type OpenaiSttOptions } from './services/fetchOpenaiSTT';
+export { type OpenaiTtsOptions } from './services/fetchOpenaiTTS';
export { getRecordMineType, type RecordMineType } from './utils/getRecordMineType';
+export { getSpeechSynthesVoiceOptions } from './utils/getVoiceList';
export {
- genLevaOptions,
- getAzureVoiceOptions,
- getEdgeVoiceOptions,
- getOpenaiVoiceOptions,
- getSpeechSynthesVoiceOptions,
- getVoiceLocaleOptions,
-} from './utils/getVoiceList';
+ EDGE_SPEECH_API_URL,
+ MICROSOFT_SPEECH_API_URL,
+ OPENAI_STT_API_URL,
+ OPENAI_TTS_API_URL,
+} from '@/const/api';
+export { createEdgeSpeechComletion } from '@/server/createEdgeSpeechComletion';
+export { createMicrosoftSpeechComletion } from '@/server/createMicrosoftSpeechComletion';
+export { createOpenaiAudioSpeechCompletion } from '@/server/createOpenaiAudioSpeechCompletion';
+export { createOpenaiAudioTranscriptionsCompletion } from '@/server/createOpenaiAudioTranscriptionsCompletion';
+export * from '@/server/types';
diff --git a/src/AudioPlayer/demos/index.tsx b/src/react/AudioPlayer/demos/index.tsx
similarity index 91%
rename from src/AudioPlayer/demos/index.tsx
rename to src/react/AudioPlayer/demos/index.tsx
index 0310eb6..3bc6309 100644
--- a/src/AudioPlayer/demos/index.tsx
+++ b/src/react/AudioPlayer/demos/index.tsx
@@ -1,4 +1,4 @@
-import { AudioPlayer, useAudioPlayer } from '@lobehub/tts';
+import { AudioPlayer, useAudioPlayer } from '@lobehub/tts/react';
import { StoryBook, useControls, useCreateStore } from '@lobehub/ui';
export default () => {
diff --git a/src/AudioPlayer/index.md b/src/react/AudioPlayer/index.md
similarity index 100%
rename from src/AudioPlayer/index.md
rename to src/react/AudioPlayer/index.md
diff --git a/src/AudioPlayer/index.tsx b/src/react/AudioPlayer/index.tsx
similarity index 100%
rename from src/AudioPlayer/index.tsx
rename to src/react/AudioPlayer/index.tsx
diff --git a/src/AudioVisualizer/Visualizer.tsx b/src/react/AudioVisualizer/Visualizer.tsx
similarity index 93%
rename from src/AudioVisualizer/Visualizer.tsx
rename to src/react/AudioVisualizer/Visualizer.tsx
index 0fa7e65..dd889c2 100644
--- a/src/AudioVisualizer/Visualizer.tsx
+++ b/src/react/AudioVisualizer/Visualizer.tsx
@@ -1,7 +1,8 @@
-import { useAudioVisualizer } from '@lobehub/tts';
import { useTheme } from 'antd-style';
import { RefObject, memo } from 'react';
+import { useAudioVisualizer } from '../hooks/useAudioVisualizer';
+
export interface VisualizerProps {
borderRadius?: number;
color?: string;
diff --git a/src/AudioVisualizer/demos/index.tsx b/src/react/AudioVisualizer/demos/index.tsx
similarity index 98%
rename from src/AudioVisualizer/demos/index.tsx
rename to src/react/AudioVisualizer/demos/index.tsx
index d6d67b8..d195e03 100644
--- a/src/AudioVisualizer/demos/index.tsx
+++ b/src/react/AudioVisualizer/demos/index.tsx
@@ -1,4 +1,4 @@
-import { AudioPlayer, AudioVisualizer, useAudioPlayer } from '@lobehub/tts';
+import { AudioPlayer, AudioVisualizer, useAudioPlayer } from '@lobehub/tts/react';
import { StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Flexbox } from 'react-layout-kit';
diff --git a/src/AudioVisualizer/index.md b/src/react/AudioVisualizer/index.md
similarity index 100%
rename from src/AudioVisualizer/index.md
rename to src/react/AudioVisualizer/index.md
diff --git a/src/AudioVisualizer/index.tsx b/src/react/AudioVisualizer/index.tsx
similarity index 94%
rename from src/AudioVisualizer/index.tsx
rename to src/react/AudioVisualizer/index.tsx
index 2e80d1b..b8de105 100644
--- a/src/AudioVisualizer/index.tsx
+++ b/src/react/AudioVisualizer/index.tsx
@@ -4,7 +4,7 @@ import { CSSProperties, RefObject, memo } from 'react';
import { ErrorBoundary } from 'react-error-boundary';
import { Flexbox } from 'react-layout-kit';
-import Visualizer, { VisualizerProps } from '@/AudioVisualizer/Visualizer';
+import Visualizer, { VisualizerProps } from '@/react/AudioVisualizer/Visualizer';
export interface AudioVisualizerProps {
audioRef: RefObject;
diff --git a/src/react/_util/leva.ts b/src/react/_util/leva.ts
new file mode 100644
index 0000000..1e54a9a
--- /dev/null
+++ b/src/react/_util/leva.ts
@@ -0,0 +1,7 @@
+import { SelectProps } from 'antd';
+
+export const genLevaOptions = (options: SelectProps['options']) => {
+ const data: any = {};
+ options?.forEach((item: any) => (data[item?.label || item?.value] = item?.value));
+ return data;
+};
diff --git a/src/hooks/useAudioPlayer.ts b/src/react/hooks/useAudioPlayer.ts
similarity index 98%
rename from src/hooks/useAudioPlayer.ts
rename to src/react/hooks/useAudioPlayer.ts
index 66d2867..2bb7390 100644
--- a/src/hooks/useAudioPlayer.ts
+++ b/src/react/hooks/useAudioPlayer.ts
@@ -1,7 +1,7 @@
import { RefObject, useCallback, useEffect, useRef, useState } from 'react';
import useSWR from 'swr';
-import { AudioProps } from '@/AudioPlayer';
+import { AudioProps } from '@/react/AudioPlayer';
import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
import { audioBufferToBlob } from '@/utils/audioBufferToBlob';
diff --git a/src/hooks/useAudioVisualizer.ts b/src/react/hooks/useAudioVisualizer.ts
similarity index 100%
rename from src/hooks/useAudioVisualizer.ts
rename to src/react/hooks/useAudioVisualizer.ts
diff --git a/src/hooks/useBlobUrl.ts b/src/react/hooks/useBlobUrl.ts
similarity index 100%
rename from src/hooks/useBlobUrl.ts
rename to src/react/hooks/useBlobUrl.ts
diff --git a/src/hooks/useStreamAudioPlayer.ts b/src/react/hooks/useStreamAudioPlayer.ts
similarity index 98%
rename from src/hooks/useStreamAudioPlayer.ts
rename to src/react/hooks/useStreamAudioPlayer.ts
index 07b1d7a..224c296 100644
--- a/src/hooks/useStreamAudioPlayer.ts
+++ b/src/react/hooks/useStreamAudioPlayer.ts
@@ -1,6 +1,6 @@
import { RefObject, useCallback, useEffect, useRef, useState } from 'react';
-import { AudioProps } from '@/AudioPlayer';
+import { AudioProps } from '@/react/AudioPlayer';
import { audioBufferToBlob, audioBuffersToBlob } from '@/utils/audioBufferToBlob';
export interface StreamAudioPlayerHook extends AudioProps {
diff --git a/src/react/index.ts b/src/react/index.ts
new file mode 100644
index 0000000..f5bc749
--- /dev/null
+++ b/src/react/index.ts
@@ -0,0 +1,23 @@
+export { default as AudioPlayer, type AudioPlayerProps } from './AudioPlayer';
+export { default as AudioVisualizer, type AudioVisualizerProps } from './AudioVisualizer';
+export { useAudioPlayer } from './hooks/useAudioPlayer';
+export { useAudioVisualizer } from './hooks/useAudioVisualizer';
+export { useBlobUrl } from './hooks/useBlobUrl';
+export { useStreamAudioPlayer } from './hooks/useStreamAudioPlayer';
+export { useAudioRecorder } from './useAudioRecorder';
+export { useEdgeSpeech } from './useEdgeSpeech';
+export { useMicrosoftSpeech } from './useMicrosoftSpeech';
+export {
+ type OpenaiSpeechRecognitionOptions,
+ useOpenaiSTT,
+ useOpenaiSTTWithPSR,
+ useOpenaiSTTWithRecord,
+ useOpenaiSTTWithSR,
+} from './useOpenaiSTT';
+export { useOpenaiTTS } from './useOpenaiTTS';
+export { usePersistedSpeechRecognition } from './useSpeechRecognition/usePersistedSpeechRecognition';
+export {
+ type SpeechRecognitionOptions,
+ useSpeechRecognition,
+} from './useSpeechRecognition/useSpeechRecognition';
+export { useSpeechSynthes } from './useSpeechSynthes';
diff --git a/src/useAudioRecorder/demos/index.tsx b/src/react/useAudioRecorder/demos/index.tsx
similarity index 92%
rename from src/useAudioRecorder/demos/index.tsx
rename to src/react/useAudioRecorder/demos/index.tsx
index d9b8442..40e3c9e 100644
--- a/src/useAudioRecorder/demos/index.tsx
+++ b/src/react/useAudioRecorder/demos/index.tsx
@@ -1,4 +1,4 @@
-import { useAudioRecorder } from '@lobehub/tts';
+import { useAudioRecorder } from '@lobehub/tts/react';
import { Icon } from '@lobehub/ui';
import { Button } from 'antd';
import { Mic, StopCircle } from 'lucide-react';
diff --git a/src/useAudioRecorder/index.md b/src/react/useAudioRecorder/index.md
similarity index 100%
rename from src/useAudioRecorder/index.md
rename to src/react/useAudioRecorder/index.md
diff --git a/src/useAudioRecorder/index.ts b/src/react/useAudioRecorder/index.ts
similarity index 100%
rename from src/useAudioRecorder/index.ts
rename to src/react/useAudioRecorder/index.ts
diff --git a/src/useEdgeSpeech/demos/index.tsx b/src/react/useEdgeSpeech/demos/index.tsx
similarity index 78%
rename from src/useEdgeSpeech/demos/index.tsx
rename to src/react/useEdgeSpeech/demos/index.tsx
index dae1ef8..a01234b 100644
--- a/src/useEdgeSpeech/demos/index.tsx
+++ b/src/react/useEdgeSpeech/demos/index.tsx
@@ -1,9 +1,12 @@
-import { AudioPlayer, genLevaOptions, getEdgeVoiceOptions, useEdgeSpeech } from '@lobehub/tts';
+import { EDGE_SPEECH_API_URL, EdgeSpeechTTS } from '@lobehub/tts';
+import { AudioPlayer, useEdgeSpeech } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Volume2 } from 'lucide-react';
import { Flexbox } from 'react-layout-kit';
+import { genLevaOptions } from '../../_util/leva';
+
const defaultText = '这是一段使用 Edge Speech 的语音演示';
export default () => {
@@ -11,22 +14,15 @@ export default () => {
const api: any = useControls(
{
- key: {
- label: 'EDDGE_API_TOKEN',
- value: '',
- },
- proxy: {
- label: 'EDDGE_PROXY_URL',
- value: '',
- },
+ url: EDGE_SPEECH_API_URL,
},
{ store },
);
const options: any = useControls(
{
- name: {
- options: genLevaOptions(getEdgeVoiceOptions()),
+ voice: {
+ options: genLevaOptions(new EdgeSpeechTTS().voiceOptions),
value: 'zh-CN-YunxiaNeural',
},
},
diff --git a/src/useEdgeSpeech/index.md b/src/react/useEdgeSpeech/index.md
similarity index 100%
rename from src/useEdgeSpeech/index.md
rename to src/react/useEdgeSpeech/index.md
diff --git a/src/react/useEdgeSpeech/index.ts b/src/react/useEdgeSpeech/index.ts
new file mode 100644
index 0000000..e717728
--- /dev/null
+++ b/src/react/useEdgeSpeech/index.ts
@@ -0,0 +1,22 @@
+import { useState } from 'react';
+
+import { TTSConfig, useTTS } from '@/react/useTTS';
+import { EdgeSpeechOptions, fetchEdgeSpeech } from '@/services/fetchEdgeSpeech';
+
+export const useEdgeSpeech = (
+ defaultText: string,
+ options: EdgeSpeechOptions,
+ config?: TTSConfig,
+) => {
+ const [text, setText] = useState(defaultText);
+ const rest = useTTS(
+ options.voice,
+ text,
+ (segmentText: string) => fetchEdgeSpeech(segmentText, options),
+ config,
+ );
+ return {
+ setText,
+ ...rest,
+ };
+};
diff --git a/src/useMicrosoftSpeech/demos/index.tsx b/src/react/useMicrosoftSpeech/demos/index.tsx
similarity index 75%
rename from src/useMicrosoftSpeech/demos/index.tsx
rename to src/react/useMicrosoftSpeech/demos/index.tsx
index ac31eba..756866c 100644
--- a/src/useMicrosoftSpeech/demos/index.tsx
+++ b/src/react/useMicrosoftSpeech/demos/index.tsx
@@ -1,23 +1,27 @@
-import { AudioPlayer, genLevaOptions, getEdgeVoiceOptions, useMicrosoftSpeech } from '@lobehub/tts';
+import { MICROSOFT_SPEECH_API_URL, MicrosoftSpeechTTS } from '@lobehub/tts';
+import { AudioPlayer, useMicrosoftSpeech } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Volume2 } from 'lucide-react';
import { Flexbox } from 'react-layout-kit';
+import { genLevaOptions } from '../../_util/leva';
+
const defaultText = '这是一段使用 Microsoft Speech 的语音演示';
export default () => {
const store = useCreateStore();
- const options: any = useControls(
+ const api: any = useControls(
{
- api: {
- label: 'MICROSOFT_SPEECH_PROXY_URL',
- value: '',
- },
- name: {
- options: genLevaOptions(getEdgeVoiceOptions()),
- value: 'zh-CN-YunxiaNeural',
+ url: {
+ label: 'MICROSOFT_SPEECH_API_URL',
+ value: MICROSOFT_SPEECH_API_URL,
},
+ },
+ { store },
+ );
+ const options: any = useControls(
+ {
pitch: {
max: 1,
min: -1,
@@ -46,10 +50,17 @@ export default () => {
],
value: 'general',
},
+ voice: {
+ options: genLevaOptions(new MicrosoftSpeechTTS().voiceOptions),
+ value: 'zh-CN-YunxiaNeural',
+ },
},
{ store },
);
- const { setText, isGlobalLoading, audio, start, stop } = useMicrosoftSpeech(defaultText, options);
+ const { setText, isGlobalLoading, audio, start, stop } = useMicrosoftSpeech(defaultText, {
+ api,
+ ...options,
+ });
return (
diff --git a/src/useMicrosoftSpeech/index.md b/src/react/useMicrosoftSpeech/index.md
similarity index 77%
rename from src/useMicrosoftSpeech/index.md
rename to src/react/useMicrosoftSpeech/index.md
index 8ab707e..b431580 100644
--- a/src/useMicrosoftSpeech/index.md
+++ b/src/react/useMicrosoftSpeech/index.md
@@ -6,6 +6,6 @@ title: useMicrosoftSpeech
## hooks
-- ENV: `MICROSOFT_SPEECH_PROXY_URL`
+- ENV: `MICROSOFT_SPEECH_API_URL`
diff --git a/src/react/useMicrosoftSpeech/index.ts b/src/react/useMicrosoftSpeech/index.ts
new file mode 100644
index 0000000..bd17726
--- /dev/null
+++ b/src/react/useMicrosoftSpeech/index.ts
@@ -0,0 +1,22 @@
+import { useState } from 'react';
+
+import { TTSConfig, useTTS } from '@/react/useTTS';
+import { type MicrosoftSpeechOptions, fetchMicrosoftSpeech } from '@/services/fetchMicrosoftSpeech';
+
+export const useMicrosoftSpeech = (
+ defaultText: string,
+ options: MicrosoftSpeechOptions,
+ config?: TTSConfig,
+) => {
+ const [text, setText] = useState(defaultText);
+ const rest = useTTS(
+ options.voice,
+ text,
+ (segmentText: string) => fetchMicrosoftSpeech(segmentText, options),
+ config,
+ );
+ return {
+ setText,
+ ...rest,
+ };
+};
diff --git a/src/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx b/src/react/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx
similarity index 95%
rename from src/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx
rename to src/react/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx
index 88c15e4..3ff7ec3 100644
--- a/src/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx
+++ b/src/react/useOpenaiSTT/demos/OpenaiSTTWithPSR.tsx
@@ -1,4 +1,4 @@
-import { useOpenaiSTTWithPSR } from '@lobehub/tts';
+import { useOpenaiSTTWithPSR } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Mic, StopCircle } from 'lucide-react';
diff --git a/src/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx b/src/react/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx
similarity index 95%
rename from src/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx
rename to src/react/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx
index 6cfd7c6..3103aac 100644
--- a/src/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx
+++ b/src/react/useOpenaiSTT/demos/OpenaiSTTWithSR.tsx
@@ -1,4 +1,4 @@
-import { useOpenaiSTTWithSR } from '@lobehub/tts';
+import { useOpenaiSTTWithSR } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Mic, StopCircle } from 'lucide-react';
diff --git a/src/useOpenaiSTT/demos/index.tsx b/src/react/useOpenaiSTT/demos/index.tsx
similarity index 89%
rename from src/useOpenaiSTT/demos/index.tsx
rename to src/react/useOpenaiSTT/demos/index.tsx
index e638a02..aa1b687 100644
--- a/src/useOpenaiSTT/demos/index.tsx
+++ b/src/react/useOpenaiSTT/demos/index.tsx
@@ -1,4 +1,5 @@
-import { useOpenaiSTTWithRecord } from '@lobehub/tts';
+import { OPENAI_STT_API_URL } from '@lobehub/tts';
+import { useOpenaiSTTWithRecord } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Mic, StopCircle } from 'lucide-react';
@@ -16,6 +17,7 @@ export default () => {
label: 'OPENAI_PROXY_URL',
value: '',
},
+ url: OPENAI_STT_API_URL,
},
{ store },
);
diff --git a/src/useOpenaiSTT/index.md b/src/react/useOpenaiSTT/index.md
similarity index 100%
rename from src/useOpenaiSTT/index.md
rename to src/react/useOpenaiSTT/index.md
diff --git a/src/useOpenaiSTT/index.ts b/src/react/useOpenaiSTT/index.ts
similarity index 76%
rename from src/useOpenaiSTT/index.ts
rename to src/react/useOpenaiSTT/index.ts
index 8d5af8b..728150a 100644
--- a/src/useOpenaiSTT/index.ts
+++ b/src/react/useOpenaiSTT/index.ts
@@ -1,4 +1,4 @@
-export { type OpenaiSTTFetcher, useOpenaiSTT } from './useOpenaiSTT';
+export { useOpenaiSTT } from './useOpenaiSTT';
export { useOpenaiSTTWithPSR } from './useOpenaiSTTWithPSR';
export {
type OpenaiSpeechRecognitionOptions,
diff --git a/src/useOpenaiSTT/useOpenaiSTT.ts b/src/react/useOpenaiSTT/useOpenaiSTT.ts
similarity index 68%
rename from src/useOpenaiSTT/useOpenaiSTT.ts
rename to src/react/useOpenaiSTT/useOpenaiSTT.ts
index 1f99fac..7080b3b 100644
--- a/src/useOpenaiSTT/useOpenaiSTT.ts
+++ b/src/react/useOpenaiSTT/useOpenaiSTT.ts
@@ -3,23 +3,19 @@ import useSWR, { type SWRConfiguration } from 'swr';
import { OpenaiSttOptions, fetchOpenaiSTT } from '@/services/fetchOpenaiSTT';
import { getRecordMineType } from '@/utils/getRecordMineType';
-export type OpenaiSTTFetcher = (blob: Blob, sttOptions: OpenaiSttOptions) => Promise;
export const useOpenaiSTT = (
shouldFetch?: boolean,
blob?: Blob,
options?: OpenaiSttOptions,
config?: SWRConfiguration,
- fetcher?: OpenaiSTTFetcher,
) => {
const key = new Date().getDate().toString();
const optionsWithMineType: OpenaiSttOptions = { ...options, mineType: getRecordMineType() };
- const openaiSTTFetcher = fetcher ?? fetchOpenaiSTT;
-
return useSWR(
shouldFetch && blob ? key : null,
- async () => await openaiSTTFetcher(blob as Blob, optionsWithMineType),
+ async () => await fetchOpenaiSTT(blob as Blob, optionsWithMineType),
config,
);
};
diff --git a/src/useOpenaiSTT/useOpenaiSTTWithPSR.ts b/src/react/useOpenaiSTT/useOpenaiSTTWithPSR.ts
similarity index 57%
rename from src/useOpenaiSTT/useOpenaiSTTWithPSR.ts
rename to src/react/useOpenaiSTT/useOpenaiSTTWithPSR.ts
index 72b65ff..dfff989 100644
--- a/src/useOpenaiSTT/useOpenaiSTTWithPSR.ts
+++ b/src/react/useOpenaiSTT/useOpenaiSTTWithPSR.ts
@@ -1,14 +1,22 @@
import { useCallback, useState } from 'react';
-import { OpenaiSTTFetcher, useOpenaiSTT } from '@/useOpenaiSTT/useOpenaiSTT';
-import { usePersistedSpeechRecognition } from '@/useSpeechRecognition';
+import { useOpenaiSTT } from '@/react/useOpenaiSTT/useOpenaiSTT';
+import { usePersistedSpeechRecognition } from '@/react/useSpeechRecognition';
-import { OpenaiSpeechRecognitionOptions } from './useOpenaiSTTWithRecord';
+import { OpenaiSpeechRecognitionOptions, STTConfig } from './useOpenaiSTTWithRecord';
export const useOpenaiSTTWithPSR = (
locale: string,
- { onBolbAvailable, onTextChange, ...options }: OpenaiSpeechRecognitionOptions,
- fetcher?: OpenaiSTTFetcher,
+ options: OpenaiSpeechRecognitionOptions,
+ {
+ onBolbAvailable,
+ onTextChange,
+ onSuccess,
+ onError,
+ onFinished,
+ onStart,
+ onStop,
+ }: STTConfig = {},
) => {
const [isGlobalLoading, setIsGlobalLoading] = useState(false);
const [shouldFetch, setShouldFetch] = useState(false);
@@ -33,34 +41,33 @@ export const useOpenaiSTTWithPSR = (
});
const handleStart = useCallback(() => {
+ onStart?.();
setIsGlobalLoading(true);
start();
setText('');
}, [start]);
const handleStop = useCallback(() => {
+ onStop?.();
stop();
setShouldFetch(false);
setIsGlobalLoading(false);
}, [stop]);
- const { isLoading } = useOpenaiSTT(
- shouldFetch,
- blob,
- options,
- {
- onError: (err) => {
- console.error(err);
- handleStop();
- },
- onSuccess: (data) => {
- setText(data);
- onTextChange?.(data);
- handleStop();
- },
+ const { isLoading } = useOpenaiSTT(shouldFetch, blob, options, {
+ onError: (err, ...rest) => {
+ onError?.(err, ...rest);
+ console.error(err);
+ handleStop();
},
- fetcher,
- );
+ onSuccess: (data, ...rest) => {
+ onSuccess?.(data, ...rest);
+ setText(data);
+ onTextChange?.(data);
+ handleStop();
+ onFinished?.(data, ...rest);
+ },
+ });
return {
blob,
diff --git a/src/useOpenaiSTT/useOpenaiSTTWithRecord.ts b/src/react/useOpenaiSTT/useOpenaiSTTWithRecord.ts
similarity index 51%
rename from src/useOpenaiSTT/useOpenaiSTTWithRecord.ts
rename to src/react/useOpenaiSTT/useOpenaiSTTWithRecord.ts
index abc037f..fd98e02 100644
--- a/src/useOpenaiSTT/useOpenaiSTTWithRecord.ts
+++ b/src/react/useOpenaiSTT/useOpenaiSTTWithRecord.ts
@@ -1,15 +1,30 @@
import { useCallback, useState } from 'react';
+import { SWRConfiguration } from 'swr';
+import { useAudioRecorder } from '@/react/useAudioRecorder';
+import { useOpenaiSTT } from '@/react/useOpenaiSTT/useOpenaiSTT';
+import { SpeechRecognitionOptions } from '@/react/useSpeechRecognition/useSpeechRecognition';
import { OpenaiSttOptions } from '@/services/fetchOpenaiSTT';
-import { useAudioRecorder } from '@/useAudioRecorder';
-import { OpenaiSTTFetcher, useOpenaiSTT } from '@/useOpenaiSTT/useOpenaiSTT';
-import { SpeechRecognitionOptions } from '@/useSpeechRecognition/useSpeechRecognition';
export type OpenaiSpeechRecognitionOptions = SpeechRecognitionOptions & OpenaiSttOptions;
+export interface STTConfig extends SpeechRecognitionOptions, SWRConfiguration {
+ onFinished?: SWRConfiguration['onSuccess'];
+ onStart?: () => void;
+ onStop?: () => void;
+}
+
export const useOpenaiSTTWithRecord = (
- { onBolbAvailable, onTextChange, ...options }: OpenaiSpeechRecognitionOptions,
- fetcher?: OpenaiSTTFetcher,
+ options: OpenaiSttOptions,
+ {
+ onBolbAvailable,
+ onTextChange,
+ onSuccess,
+ onError,
+ onFinished,
+ onStart,
+ onStop,
+ }: STTConfig = {},
) => {
const [isGlobalLoading, setIsGlobalLoading] = useState(false);
const [shouldFetch, setShouldFetch] = useState(false);
@@ -22,34 +37,33 @@ export const useOpenaiSTTWithRecord = (
);
const handleStart = useCallback(() => {
+ onStart?.();
setIsGlobalLoading(true);
start();
setText('');
}, [start]);
const handleStop = useCallback(() => {
+ onStop?.();
stop();
setShouldFetch(false);
setIsGlobalLoading(false);
}, [stop]);
- const { isLoading } = useOpenaiSTT(
- shouldFetch,
- blob,
- options,
- {
- onError: (err) => {
- console.error(err);
- handleStop();
- },
- onSuccess: (data, value) => {
- setText(data);
- onTextChange?.(value);
- handleStop();
- },
+ const { isLoading } = useOpenaiSTT(shouldFetch, blob, options, {
+ onError: (err, ...rest) => {
+ onError?.(err, ...rest);
+ console.error(err);
+ handleStop();
},
- fetcher,
- );
+ onSuccess: (data, ...rest) => {
+ onSuccess?.(data, ...rest);
+ setText(data);
+ onTextChange?.(data);
+ handleStop();
+ onFinished?.(data, ...rest);
+ },
+ });
return {
blob,
diff --git a/src/useOpenaiSTT/useOpenaiSTTWithSR.ts b/src/react/useOpenaiSTT/useOpenaiSTTWithSR.ts
similarity index 57%
rename from src/useOpenaiSTT/useOpenaiSTTWithSR.ts
rename to src/react/useOpenaiSTT/useOpenaiSTTWithSR.ts
index 6e92f74..b1bb7f6 100644
--- a/src/useOpenaiSTT/useOpenaiSTTWithSR.ts
+++ b/src/react/useOpenaiSTT/useOpenaiSTTWithSR.ts
@@ -1,14 +1,22 @@
import { useCallback, useState } from 'react';
-import { OpenaiSTTFetcher, useOpenaiSTT } from '@/useOpenaiSTT/useOpenaiSTT';
-import { useSpeechRecognition } from '@/useSpeechRecognition';
+import { useOpenaiSTT } from '@/react/useOpenaiSTT/useOpenaiSTT';
+import { useSpeechRecognition } from '@/react/useSpeechRecognition';
-import { OpenaiSpeechRecognitionOptions } from './useOpenaiSTTWithRecord';
+import { OpenaiSpeechRecognitionOptions, STTConfig } from './useOpenaiSTTWithRecord';
export const useOpenaiSTTWithSR = (
locale: string,
- { onBolbAvailable, onTextChange, ...options }: OpenaiSpeechRecognitionOptions,
- fetcher?: OpenaiSTTFetcher,
+ options: OpenaiSpeechRecognitionOptions,
+ {
+ onBolbAvailable,
+ onTextChange,
+ onSuccess,
+ onError,
+ onFinished,
+ onStart,
+ onStop,
+ }: STTConfig = {},
) => {
const [isGlobalLoading, setIsGlobalLoading] = useState(false);
const [shouldFetch, setShouldFetch] = useState(false);
@@ -33,34 +41,33 @@ export const useOpenaiSTTWithSR = (
});
const handleStart = useCallback(() => {
+ onStart?.();
setIsGlobalLoading(true);
start();
setText('');
}, [start]);
const handleStop = useCallback(() => {
+ onStop?.();
stop();
setShouldFetch(false);
setIsGlobalLoading(false);
}, [stop]);
- const { isLoading } = useOpenaiSTT(
- shouldFetch,
- blob,
- options,
- {
- onError: (err) => {
- console.error(err);
- handleStop();
- },
- onSuccess: (data) => {
- setText(data);
- onTextChange?.(data);
- handleStop();
- },
+ const { isLoading } = useOpenaiSTT(shouldFetch, blob, options, {
+ onError: (err, ...rest) => {
+ onError?.(err, ...rest);
+ console.error(err);
+ handleStop();
},
- fetcher,
- );
+ onSuccess: (data, ...rest) => {
+ onSuccess?.(data, ...rest);
+ setText(data);
+ onTextChange?.(data);
+ handleStop();
+ onFinished?.(data, ...rest);
+ },
+ });
return {
blob,
diff --git a/src/useOpenaiTTS/demos/index.tsx b/src/react/useOpenaiTTS/demos/index.tsx
similarity index 86%
rename from src/useOpenaiTTS/demos/index.tsx
rename to src/react/useOpenaiTTS/demos/index.tsx
index b07e534..5cdc0be 100644
--- a/src/useOpenaiTTS/demos/index.tsx
+++ b/src/react/useOpenaiTTS/demos/index.tsx
@@ -1,4 +1,5 @@
-import { AudioPlayer, openaiVoiceList, useOpenaiTTS } from '@lobehub/tts';
+import { OPENAI_TTS_API_URL, OpenaiTTS } from '@lobehub/tts';
+import { AudioPlayer, useOpenaiTTS } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Volume2 } from 'lucide-react';
@@ -19,14 +20,15 @@ export default () => {
label: 'OPENAI_PROXY_URL',
value: '',
},
+ url: OPENAI_TTS_API_URL,
},
{ store },
);
const options: any = useControls(
{
- name: {
- options: openaiVoiceList,
+ voice: {
+ options: OpenaiTTS.voiceList,
value: 'alloy',
},
},
diff --git a/src/useOpenaiTTS/index.md b/src/react/useOpenaiTTS/index.md
similarity index 100%
rename from src/useOpenaiTTS/index.md
rename to src/react/useOpenaiTTS/index.md
diff --git a/src/react/useOpenaiTTS/index.ts b/src/react/useOpenaiTTS/index.ts
new file mode 100644
index 0000000..66648f5
--- /dev/null
+++ b/src/react/useOpenaiTTS/index.ts
@@ -0,0 +1,22 @@
+import { useState } from 'react';
+
+import { TTSConfig, useTTS } from '@/react/useTTS';
+import { type OpenaiTtsOptions, fetchOpenaiTTS } from '@/services/fetchOpenaiTTS';
+
+export const useOpenaiTTS = (
+ defaultText: string,
+ options: OpenaiTtsOptions,
+ config?: TTSConfig,
+) => {
+ const [text, setText] = useState(defaultText);
+ const rest = useTTS(
+ options.voice,
+ text,
+ (segmentText: string) => fetchOpenaiTTS(segmentText, options),
+ config,
+ );
+ return {
+ setText,
+ ...rest,
+ };
+};
diff --git a/src/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx b/src/react/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx
similarity index 93%
rename from src/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx
rename to src/react/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx
index 5b0400c..57cf7c6 100644
--- a/src/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx
+++ b/src/react/useSpeechRecognition/demos/PersistedSpeechRecognition.tsx
@@ -1,4 +1,4 @@
-import { usePersistedSpeechRecognition } from '@lobehub/tts';
+import { usePersistedSpeechRecognition } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Mic, StopCircle } from 'lucide-react';
diff --git a/src/useSpeechRecognition/demos/index.tsx b/src/react/useSpeechRecognition/demos/index.tsx
similarity index 94%
rename from src/useSpeechRecognition/demos/index.tsx
rename to src/react/useSpeechRecognition/demos/index.tsx
index 59ff68c..f361c38 100644
--- a/src/useSpeechRecognition/demos/index.tsx
+++ b/src/react/useSpeechRecognition/demos/index.tsx
@@ -1,4 +1,4 @@
-import { useSpeechRecognition } from '@lobehub/tts';
+import { useSpeechRecognition } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { Mic, StopCircle } from 'lucide-react';
diff --git a/src/useSpeechRecognition/index.md b/src/react/useSpeechRecognition/index.md
similarity index 100%
rename from src/useSpeechRecognition/index.md
rename to src/react/useSpeechRecognition/index.md
diff --git a/src/useSpeechRecognition/index.ts b/src/react/useSpeechRecognition/index.ts
similarity index 100%
rename from src/useSpeechRecognition/index.ts
rename to src/react/useSpeechRecognition/index.ts
diff --git a/src/useSpeechRecognition/usePersistedSpeechRecognition.ts b/src/react/useSpeechRecognition/usePersistedSpeechRecognition.ts
similarity index 95%
rename from src/useSpeechRecognition/usePersistedSpeechRecognition.ts
rename to src/react/useSpeechRecognition/usePersistedSpeechRecognition.ts
index f2bd740..6b0e385 100644
--- a/src/useSpeechRecognition/usePersistedSpeechRecognition.ts
+++ b/src/react/useSpeechRecognition/usePersistedSpeechRecognition.ts
@@ -1,6 +1,6 @@
import { useCallback, useMemo, useState } from 'react';
-import { useAudioRecorder } from '@/useAudioRecorder';
+import { useAudioRecorder } from '@/react/useAudioRecorder';
import { useRecognition } from './useRecognition';
import { SpeechRecognitionOptions } from './useSpeechRecognition';
diff --git a/src/useSpeechRecognition/useRecognition.ts b/src/react/useSpeechRecognition/useRecognition.ts
similarity index 100%
rename from src/useSpeechRecognition/useRecognition.ts
rename to src/react/useSpeechRecognition/useRecognition.ts
diff --git a/src/useSpeechRecognition/useSpeechRecognition.ts b/src/react/useSpeechRecognition/useSpeechRecognition.ts
similarity index 86%
rename from src/useSpeechRecognition/useSpeechRecognition.ts
rename to src/react/useSpeechRecognition/useSpeechRecognition.ts
index 285b666..8e80c93 100644
--- a/src/useSpeechRecognition/useSpeechRecognition.ts
+++ b/src/react/useSpeechRecognition/useSpeechRecognition.ts
@@ -1,7 +1,7 @@
import { useCallback } from 'react';
-import { useAudioRecorder } from '@/useAudioRecorder';
-import { useRecognition } from '@/useSpeechRecognition/useRecognition';
+import { useAudioRecorder } from '@/react/useAudioRecorder';
+import { useRecognition } from '@/react/useSpeechRecognition/useRecognition';
export interface SpeechRecognitionOptions {
onBolbAvailable?: (blob: Blob) => void;
diff --git a/src/useSpeechSynthes/demos/index.tsx b/src/react/useSpeechSynthes/demos/index.tsx
similarity index 87%
rename from src/useSpeechSynthes/demos/index.tsx
rename to src/react/useSpeechSynthes/demos/index.tsx
index 162c9d6..9ea314a 100644
--- a/src/useSpeechSynthes/demos/index.tsx
+++ b/src/react/useSpeechSynthes/demos/index.tsx
@@ -1,19 +1,18 @@
-import { genLevaOptions, getSpeechSynthesVoiceOptions, useSpeechSynthes } from '@lobehub/tts';
+import { getSpeechSynthesVoiceOptions } from '@lobehub/tts';
+import { useSpeechSynthes } from '@lobehub/tts/react';
import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
import { Button, Input } from 'antd';
import { StopCircle, Volume2 } from 'lucide-react';
import { Flexbox } from 'react-layout-kit';
+import { genLevaOptions } from '../../_util/leva';
+
const defaultText = '这是一段使用 Speech Synthes 的语音演示';
export default () => {
const store = useCreateStore();
const options: any = useControls(
{
- name: {
- options: genLevaOptions(getSpeechSynthesVoiceOptions()),
- value: '婷婷',
- },
pitch: {
max: 1,
min: -1,
@@ -26,6 +25,10 @@ export default () => {
step: 0.1,
value: 0,
},
+ voice: {
+ options: genLevaOptions(getSpeechSynthesVoiceOptions()),
+ value: '婷婷',
+ },
},
{ store },
);
diff --git a/src/useSpeechSynthes/index.md b/src/react/useSpeechSynthes/index.md
similarity index 100%
rename from src/useSpeechSynthes/index.md
rename to src/react/useSpeechSynthes/index.md
diff --git a/src/useSpeechSynthes/index.ts b/src/react/useSpeechSynthes/index.ts
similarity index 83%
rename from src/useSpeechSynthes/index.ts
rename to src/react/useSpeechSynthes/index.ts
index e34cea2..af9b347 100644
--- a/src/useSpeechSynthes/index.ts
+++ b/src/react/useSpeechSynthes/index.ts
@@ -2,18 +2,18 @@ import { useCallback, useMemo, useState } from 'react';
import { SsmlOptions } from '@/utils/genSSML';
-export const useSpeechSynthes = (defaultText: string, { name, rate, pitch }: SsmlOptions) => {
+export const useSpeechSynthes = (defaultText: string, { voice, rate, pitch }: SsmlOptions) => {
const [voiceList, setVoiceList] = useState(speechSynthesis.getVoices());
const [text, setText] = useState(defaultText);
const [isLoading, setIsLoading] = useState(false);
const speechSynthesisUtterance = useMemo(() => {
const utterance = new SpeechSynthesisUtterance(text);
- utterance.voice = voiceList.find((item) => item.name === name) as any;
+ utterance.voice = voiceList.find((item) => item.name === voice) as any;
if (pitch) utterance.pitch = pitch * 10;
if (rate) utterance.rate = rate * 10;
return utterance;
- }, [text, voiceList, rate, pitch, name]);
+ }, [text, voiceList, rate, pitch, voice]);
speechSynthesis.onvoiceschanged = () => {
setVoiceList(speechSynthesis.getVoices());
diff --git a/src/useTTS/index.ts b/src/react/useTTS/index.ts
similarity index 72%
rename from src/useTTS/index.ts
rename to src/react/useTTS/index.ts
index b548b24..48be2bb 100644
--- a/src/useTTS/index.ts
+++ b/src/react/useTTS/index.ts
@@ -1,11 +1,11 @@
import { useCallback, useEffect, useState } from 'react';
-import useSWR from 'swr';
+import useSWR, { type SWRConfiguration } from 'swr';
-import { AudioProps } from '@/AudioPlayer';
-import { useStreamAudioPlayer } from '@/hooks/useStreamAudioPlayer';
+import { AudioProps } from '@/react/AudioPlayer';
+import { useStreamAudioPlayer } from '@/react/hooks/useStreamAudioPlayer';
import { splitTextIntoSegments } from '@/utils/splitTextIntoSegments';
-export interface TTSHook {
+export interface TTSHook extends SWRConfiguration {
audio: AudioProps;
isGlobalLoading: boolean;
isLoading: boolean;
@@ -13,10 +13,17 @@ export interface TTSHook {
stop: () => void;
}
+export interface TTSConfig extends SWRConfiguration {
+ onFinish?: SWRConfiguration['onSuccess'];
+ onStart?: () => void;
+ onStop?: () => void;
+}
+
export const useTTS = (
key: string,
text: string,
fetchTTS: (segmentText: string) => Promise,
+ { onError, onSuccess, onFinish, onStart, onStop, ...restSWRConfig }: TTSConfig = {},
): TTSHook => {
const { load, reset, ...rest } = useStreamAudioPlayer();
const [shouldFetch, setShouldFetch] = useState(false);
@@ -33,6 +40,7 @@ export const useTTS = (
}, []);
const handleStop = useCallback(() => {
+ onStop?.();
handleReset();
}, []);
@@ -40,24 +48,29 @@ export const useTTS = (
shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]] : null,
async () => await fetchTTS(textArray[index]),
{
- onError: (err) => {
+ onError: (err, ...rest) => {
+ onError?.(err, ...rest);
console.error(err);
handleReset();
},
- onSuccess: (data) => {
+ onSuccess: (data, ...rest) => {
+ onSuccess?.(data, ...rest);
load(data);
if (index < textArray.length - 1) {
setIndex(index + 1);
} else {
+ onFinish?.(data, ...rest);
setShouldFetch(false);
setIsGlobalLoading(false);
}
},
+ ...restSWRConfig,
},
);
const handleStart = useCallback(() => {
if (isLoading) return;
+ onStart?.();
reset();
setShouldFetch(true);
setIsGlobalLoading(true);
diff --git a/src/server.ts b/src/server.ts
deleted file mode 100644
index 4c232a9..0000000
--- a/src/server.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export { handleAzureSpeechRequest } from './server/handleAzureSpeechRequest';
-export { handleMicrosoftSpeechRequest } from './server/handleMicrosoftSpeechRequest';
diff --git a/src/server/cors.ts b/src/server/cors.ts
deleted file mode 100644
index a8be6c5..0000000
--- a/src/server/cors.ts
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- * Multi purpose CORS lib.
- * Note: Based on the `cors` package in npm but using only
- * web APIs. Feel free to use it in your own projects.
- */
-
-type StaticOrigin = boolean | string | RegExp | (boolean | string | RegExp)[];
-
-type OriginFn = (origin: string | undefined, req: Request) => StaticOrigin | Promise;
-
-interface CorsOptions {
- allowedHeaders?: string | string[];
- credentials?: boolean;
- exposedHeaders?: string | string[];
- maxAge?: number;
- methods?: string | string[];
- optionsSuccessStatus?: number;
- origin?: StaticOrigin | OriginFn;
- preflightContinue?: boolean;
-}
-
-const defaultOptions: CorsOptions = {
- methods: 'GET,HEAD,PUT,PATCH,POST,DELETE',
- optionsSuccessStatus: 204,
- origin: '*',
- preflightContinue: false,
-};
-
-function isOriginAllowed(origin: string, allowed: StaticOrigin): boolean {
- return Array.isArray(allowed)
- ? allowed.some((o) => isOriginAllowed(origin, o))
- : typeof allowed === 'string'
- ? origin === allowed
- : allowed instanceof RegExp
- ? allowed.test(origin)
- : !!allowed;
-}
-
-function getOriginHeaders(reqOrigin: string | undefined, origin: StaticOrigin) {
- const headers = new Headers();
-
- if (origin === '*') {
- // Allow any origin
- headers.set('Access-Control-Allow-Origin', '*');
- } else if (typeof origin === 'string') {
- // Fixed origin
- headers.set('Access-Control-Allow-Origin', origin);
- headers.append('Vary', 'Origin');
- } else {
- const allowed = isOriginAllowed(reqOrigin ?? '', origin);
-
- if (allowed && reqOrigin) {
- headers.set('Access-Control-Allow-Origin', reqOrigin);
- }
- headers.append('Vary', 'Origin');
- }
-
- return headers;
-}
-
-// originHeadersFromReq
-
-async function originHeadersFromReq(req: Request, origin: StaticOrigin | OriginFn) {
- const reqOrigin = req.headers.get('Origin') || undefined;
- const value = typeof origin === 'function' ? await origin(reqOrigin, req) : origin;
-
- if (!value) return;
- return getOriginHeaders(reqOrigin, value);
-}
-
-function getAllowedHeaders(req: Request, allowed?: string | string[]) {
- const headers = new Headers();
-
- if (!allowed) {
- allowed = req.headers.get('Access-Control-Request-Headers')!;
- headers.append('Vary', 'Access-Control-Request-Headers');
- } else if (Array.isArray(allowed)) {
- // If the allowed headers is an array, turn it into a string
- allowed = allowed.join(',');
- }
- if (allowed) {
- headers.set('Access-Control-Allow-Headers', allowed);
- }
-
- return headers;
-}
-
-export default async function cors(req: Request, res: Response, options?: CorsOptions) {
- const opts = { ...defaultOptions, ...options };
- const { headers } = res;
- const originHeaders = await originHeadersFromReq(req, opts.origin ?? false);
- const mergeHeaders = (v: string, k: string) => {
- if (k === 'Vary') headers.append(k, v);
- else headers.set(k, v);
- };
-
- // If there's no origin we won't touch the response
- if (!originHeaders) return res;
-
- originHeaders.forEach(mergeHeaders);
-
- if (opts.credentials) {
- headers.set('Access-Control-Allow-Credentials', 'true');
- }
-
- const exposed = Array.isArray(opts.exposedHeaders)
- ? opts.exposedHeaders.join(',')
- : opts.exposedHeaders;
-
- if (exposed) {
- headers.set('Access-Control-Expose-Headers', exposed);
- }
-
- // Handle the preflight request
- if (req.method === 'OPTIONS') {
- if (opts.methods) {
- const methods = Array.isArray(opts.methods) ? opts.methods.join(',') : opts.methods;
-
- headers.set('Access-Control-Allow-Methods', methods);
- }
-
- getAllowedHeaders(req, opts.allowedHeaders).forEach(mergeHeaders);
-
- if (typeof opts.maxAge === 'number') {
- headers.set('Access-Control-Max-Age', String(opts.maxAge));
- }
-
- if (opts.preflightContinue) return res;
-
- headers.set('Content-Length', '0');
- return new Response(null, { headers, status: opts.optionsSuccessStatus });
- }
-
- // If we got here, it's a normal request
- return res;
-}
-
-export function initCors(options?: CorsOptions) {
- return (req: Request, res: Response) => cors(req, res, options);
-}
diff --git a/src/server/createEdgeSpeechComletion.ts b/src/server/createEdgeSpeechComletion.ts
new file mode 100644
index 0000000..c0a478c
--- /dev/null
+++ b/src/server/createEdgeSpeechComletion.ts
@@ -0,0 +1,103 @@
+import qs from 'query-string';
+import { v4 as uuidv4 } from 'uuid';
+
+import { EDGE_API_TOKEN, EDGE_SPEECH_URL } from '../const/api';
+import { EdgeSpeechPayload } from '../server/types';
+import { genSSML } from '../utils/genSSML';
+import { genSendContent } from '../utils/genSendContent';
+import { getHeadersAndData } from '../utils/getHeadersAndData';
+
+const configConent = JSON.stringify({
+ context: {
+ synthesis: {
+ audio: {
+ metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true },
+ outputFormat: 'audio-24khz-48kbitrate-mono-mp3',
+ },
+ },
+ },
+});
+
+const genHeader = (connectId: string) => {
+ const date = new Date().toString();
+ const configHeader = {
+ 'Content-Type': 'application/json; charset=utf-8',
+ 'Path': 'speech.config',
+ 'X-Timestamp': date,
+ };
+ const contentHeader = {
+ 'Content-Type': 'application/ssml+xml',
+ 'Path': 'ssml',
+ 'X-RequestId': connectId,
+ 'X-Timestamp': date,
+ };
+ return {
+ configHeader,
+ contentHeader,
+ };
+};
+
+interface CreateEdgeSpeechComletionOptions {
+ payload: EdgeSpeechPayload;
+}
+
+export const createEdgeSpeechComletion = async ({
+ payload,
+}: CreateEdgeSpeechComletionOptions): Promise => {
+ const { input, options } = payload;
+
+ const connectId = uuidv4().replaceAll('-', '');
+ const url = qs.stringifyUrl({
+ query: {
+ ConnectionId: connectId,
+ TrustedClientToken: EDGE_API_TOKEN,
+ },
+ url: EDGE_SPEECH_URL,
+ });
+
+ const { configHeader, contentHeader } = genHeader(connectId);
+ const config = genSendContent(configHeader, configConent);
+ const content = genSendContent(contentHeader, genSSML(input, options));
+
+ return new Promise((resolve, reject) => {
+ const ws = new WebSocket(url);
+ ws.binaryType = 'arraybuffer';
+ const onOpen = () => {
+ ws.send(config);
+ ws.send(content);
+ };
+ let audioData = new ArrayBuffer(0);
+ const onMessage = async (event: MessageEvent) => {
+ if (typeof event.data === 'string') {
+ const { headers } = getHeadersAndData(event.data);
+ switch (headers['Path']) {
+ case 'turn.end': {
+ ws.close();
+ if (!audioData.byteLength) return;
+ const res = new Response(audioData);
+ resolve(res);
+ break;
+ }
+ }
+ } else if (event.data instanceof ArrayBuffer) {
+ const dataview = new DataView(event.data);
+ const headerLength = dataview.getInt16(0);
+ if (event.data.byteLength > headerLength + 2) {
+ const newBody = event.data.slice(2 + headerLength);
+ const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength);
+ const mergedUint8Array = new Uint8Array(newAudioData);
+ mergedUint8Array.set(new Uint8Array(audioData), 0);
+ mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength);
+ audioData = newAudioData;
+ }
+ }
+ };
+ const onError = () => {
+ reject(new Error('WebSocket error occurred.'));
+ ws.close();
+ };
+ ws.addEventListener('open', onOpen);
+ ws.addEventListener('message', onMessage);
+ ws.addEventListener('error', onError);
+ });
+};
diff --git a/src/server/handleMicrosoftSpeechRequest.ts b/src/server/createMicrosoftSpeechComletion.ts
similarity index 55%
rename from src/server/handleMicrosoftSpeechRequest.ts
rename to src/server/createMicrosoftSpeechComletion.ts
index 1032970..9b2f854 100644
--- a/src/server/handleMicrosoftSpeechRequest.ts
+++ b/src/server/createMicrosoftSpeechComletion.ts
@@ -1,8 +1,18 @@
import { v4 as uuidv4 } from 'uuid';
-import { MICROSOFT_SPPECH_URL } from '../const/api';
+import { MICROSOFT_SPEECH_URL } from '../const/api';
+import { MicrosoftSpeechPayload } from '../server/types';
+import { genSSML } from '../utils/genSSML';
+
+interface CreateMicrosoftSpeechComletionOptions {
+ payload: MicrosoftSpeechPayload;
+}
+
+export const createMicrosoftSpeechComletion = async ({
+ payload,
+}: CreateMicrosoftSpeechComletionOptions) => {
+ const { input, options } = payload;
-export const handleMicrosoftSpeechRequest = async (req: Request, options?: any) => {
const DEFAULT_HEADERS = new Headers({
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9',
@@ -20,13 +30,21 @@ export const handleMicrosoftSpeechRequest = async (req: Request, options?: any)
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
});
- const res = await fetch(MICROSOFT_SPPECH_URL, {
- body: req.body,
+ const body = JSON.stringify({
+ offsetInPlainText: 0,
+ properties: {
+ SpeakTriggerSource: 'AccTuningPagePlayButton',
+ },
+ ssml: genSSML(input, options),
+ ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
+ });
+
+ const res = await fetch(MICROSOFT_SPEECH_URL, {
+ body,
headers: DEFAULT_HEADERS,
method: 'POST',
// @ts-ignore
responseType: 'arraybuffer',
- ...options,
});
return res;
diff --git a/src/server/createOpenaiAudioSpeechCompletion.ts b/src/server/createOpenaiAudioSpeechCompletion.ts
new file mode 100644
index 0000000..8b0a1fe
--- /dev/null
+++ b/src/server/createOpenaiAudioSpeechCompletion.ts
@@ -0,0 +1,26 @@
+import OpenAI from 'openai';
+
+import { OpenAITTSPayload } from './types';
+
+interface CreateOpenaiAudioSpeechCompletionOptions {
+ openai: OpenAI;
+ payload: OpenAITTSPayload;
+}
+
+export const createOpenaiAudioSpeechCompletion = async ({
+ payload,
+ openai,
+}: CreateOpenaiAudioSpeechCompletionOptions) => {
+ const { options, input } = payload;
+
+ const response = await openai.audio.speech.create(
+ {
+ input,
+ model: options.model,
+ voice: options.voice,
+ },
+ { headers: { Accept: '*/*' } },
+ );
+
+ return response;
+};
diff --git a/src/server/createOpenaiAudioTranscriptionsCompletion.ts b/src/server/createOpenaiAudioTranscriptionsCompletion.ts
new file mode 100644
index 0000000..880cb3c
--- /dev/null
+++ b/src/server/createOpenaiAudioTranscriptionsCompletion.ts
@@ -0,0 +1,29 @@
+import OpenAI from 'openai';
+
+import { OpenAISTTPayload } from './types';
+
+interface CreateOpenaiAudioTranscriptionsOptions {
+ openai: OpenAI;
+ payload: OpenAISTTPayload;
+}
+
+export const createOpenaiAudioTranscriptionsCompletion = async ({
+ payload,
+ openai,
+}: CreateOpenaiAudioTranscriptionsOptions) => {
+ const { blob, options } = payload;
+
+ const file = new File([blob], `${Date.now()}.${options.mineType.extension}`, {
+ type: options.mineType.mineType,
+ });
+
+ const response = await openai.audio.transcriptions.create(
+ {
+ file,
+ model: options.model,
+ },
+ { headers: { Accept: '*/*' } },
+ );
+
+ return response;
+};
diff --git a/src/server/getAllowOrigins.ts b/src/server/getAllowOrigins.ts
deleted file mode 100644
index 3fccfdf..0000000
--- a/src/server/getAllowOrigins.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-const ALLOW_ORIGINS = process.env?.ALLOW_ORIGINS?.split(',') || undefined;
-
-export const getAllowOrigins = (req: Request) => {
- let origin = '*';
-
- if (ALLOW_ORIGINS) {
- const reqOrigin = req.headers.get('origin');
- if (reqOrigin && ALLOW_ORIGINS.includes(reqOrigin)) {
- origin = reqOrigin;
- } else {
- return;
- }
- }
- return origin;
-};
diff --git a/src/server/handleAzureSpeechRequest.ts b/src/server/handleAzureSpeechRequest.ts
deleted file mode 100644
index 9c9033f..0000000
--- a/src/server/handleAzureSpeechRequest.ts
+++ /dev/null
@@ -1,52 +0,0 @@
-import {
- AudioConfig,
- PropertyId,
- ResultReason,
- SpeechConfig,
- SpeechSynthesisOutputFormat,
- SpeechSynthesisResult,
- SpeechSynthesizer,
-} from 'microsoft-cognitiveservices-speech-sdk';
-
-import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '../const/api';
-
-const fetchAzureSpeech = async (ssml: string, { api }: any): Promise => {
- const key = api.key || AZURE_SPEECH_KEY;
- const region = api.key || AZURE_SPEECH_REGION;
- const speechConfig = SpeechConfig.fromSubscription(key, region);
- speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true');
- speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus;
-
- const audioConfig = AudioConfig.fromDefaultSpeakerOutput();
- const synthesizer: SpeechSynthesizer | null = new SpeechSynthesizer(speechConfig, audioConfig);
-
- const completeCb = async (
- result: SpeechSynthesisResult,
- resolve: (value: ArrayBuffer) => void,
- ) => {
- if (result.reason === ResultReason.SynthesizingAudioCompleted) {
- const audioData = result.audioData;
- resolve(audioData);
- }
- synthesizer.close();
- };
-
- const errCb = (err: string, reject: (err?: any) => void) => {
- reject(err);
- synthesizer.close();
- };
-
- return new Promise((resolve, reject) => {
- synthesizer.speakSsmlAsync(
- ssml,
- (result) => completeCb(result, resolve),
- (err) => errCb(err, reject),
- );
- });
-};
-
-export const handleAzureSpeechRequest = async (req: Request) => {
- const { ssml, ...options } = req.body as any;
- const data = await fetchAzureSpeech(ssml, options);
- return new Response(data);
-};
diff --git a/src/server/types.ts b/src/server/types.ts
new file mode 100644
index 0000000..4ddfbdf
--- /dev/null
+++ b/src/server/types.ts
@@ -0,0 +1,58 @@
+import { SsmlOptions } from '@/utils/genSSML';
+import { RecordMineType } from '@/utils/getRecordMineType';
+
+export interface MicrosoftSpeechPayload {
+ /**
+ * @title 语音合成的文本
+ */
+ input: string;
+ /**
+ * @title SSML 语音合成的配置
+ */
+ options: SsmlOptions;
+}
+
+export interface EdgeSpeechPayload {
+ /**
+ * @title 语音合成的文本
+ */
+ input: string;
+ /**
+ * @title SSML 语音合成的配置
+ */
+ options: Pick;
+}
+
+export interface OpenAITTSPayload {
+ /**
+ * @title 语音合成的文本
+ */
+ input: string;
+ options: {
+ /**
+ * @title 语音合成的模型名称
+ */
+ model: string;
+ /**
+ * @title 语音合成的声音名称
+ */
+ voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
+ };
+}
+
+export interface OpenAISTTPayload {
+ /**
+ * @title 语音识别的文件
+ */
+ blob: Blob;
+ options: {
+ /**
+ * @title 语音文件格式
+ */
+ mineType: RecordMineType;
+ /**
+ * @title 语音识别的模型名称
+ */
+ model: string;
+ };
+}
diff --git a/src/services/fetchAzureSpeech.ts b/src/services/fetchAzureSpeech.ts
deleted file mode 100644
index ea495a3..0000000
--- a/src/services/fetchAzureSpeech.ts
+++ /dev/null
@@ -1,39 +0,0 @@
-import { AZURE_SPEECH_KEY, AZURE_SPEECH_PROXY_URL, AZURE_SPEECH_REGION } from '@/const/api';
-import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
-import { type SsmlOptions, genSSML } from '@/utils/genSSML';
-
-export interface AzureSpeechOptions extends SsmlOptions {
- api?: {
- key?: string;
- proxy?: string;
- region?: string;
- };
-}
-
-export const fetchAzureSpeech = async (
- text: string,
- { api = {}, ...options }: AzureSpeechOptions,
-): Promise => {
- const data = JSON.stringify({
- api: {
- key: api?.key || AZURE_SPEECH_KEY,
- region: api?.region || AZURE_SPEECH_REGION,
- },
- ssml: genSSML(text, options),
- });
- const url = api?.proxy || AZURE_SPEECH_PROXY_URL;
-
- const response: Response = await fetch(url, {
- body: data,
- method: 'POST',
- // @ts-ignore
- responseType: 'arraybuffer',
- });
-
- if (!response.ok) {
- throw new Error('Network response was not ok');
- }
-
- const arrayBuffer = await response.arrayBuffer();
- return await arrayBufferConvert(arrayBuffer);
-};
diff --git a/src/services/fetchEdgeSpeech.ts b/src/services/fetchEdgeSpeech.ts
index 7ab2b50..c85cd0c 100644
--- a/src/services/fetchEdgeSpeech.ts
+++ b/src/services/fetchEdgeSpeech.ts
@@ -1,104 +1,29 @@
-import qs from 'query-string';
-import { v4 as uuidv4 } from 'uuid';
-
-import { EDDGE_API_TOKEN, EDDGE_PROXY_URL } from '@/const/api';
+import { createEdgeSpeechComletion } from '@/server/createEdgeSpeechComletion';
+import { EdgeSpeechPayload } from '@/server/types';
import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
-import { type SsmlOptions, genSSML } from '@/utils/genSSML';
-import { genSendContent } from '@/utils/genSendContent';
-import { getHeadersAndData } from '@/utils/getHeadersAndData';
-
-const configConent = JSON.stringify({
- context: {
- synthesis: {
- audio: {
- metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true },
- outputFormat: 'audio-24khz-48kbitrate-mono-mp3',
- },
- },
- },
-});
+import { type SsmlOptions } from '@/utils/genSSML';
-const genHeader = (connectId: string) => {
- const date = new Date().toString();
- const configHeader = {
- 'Content-Type': 'application/json; charset=utf-8',
- 'Path': 'speech.config',
- 'X-Timestamp': date,
- };
- const contentHeader = {
- 'Content-Type': 'application/ssml+xml',
- 'Path': 'ssml',
- 'X-RequestId': connectId,
- 'X-Timestamp': date,
- };
- return {
- configHeader,
- contentHeader,
- };
-};
-
-export interface EdgeSpeechOptions extends Pick {
+export interface EdgeSpeechOptions extends Pick {
api?: {
- key?: string;
- proxy?: string;
+ url?: string;
};
}
+
export const fetchEdgeSpeech = async (
- text: string,
- { api = {}, ...options }: EdgeSpeechOptions,
+ input: string,
+ { api, ...options }: EdgeSpeechOptions,
): Promise => {
- const connectId = uuidv4().replaceAll('-', '');
- const url = qs.stringifyUrl({
- query: {
- ConnectionId: connectId,
- TrustedClientToken: api?.key || EDDGE_API_TOKEN,
- },
- url: api?.proxy || EDDGE_PROXY_URL,
- });
+ const payload: EdgeSpeechPayload = { input, options };
+
+ const response = await (api?.url
+ ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+ : createEdgeSpeechComletion({ payload }));
- const { configHeader, contentHeader } = genHeader(connectId);
- const config = genSendContent(configHeader, configConent);
- const content = genSendContent(contentHeader, genSSML(text, options));
+ if (!response.ok) {
+ throw new Error('Network response was not ok');
+ }
- return new Promise((resolve, reject) => {
- const ws = new WebSocket(url);
- ws.binaryType = 'arraybuffer';
- const onOpen = () => {
- ws.send(config);
- ws.send(content);
- };
- let audioData = new ArrayBuffer(0);
- const onMessage = async (event: MessageEvent) => {
- if (typeof event.data === 'string') {
- const { headers } = getHeadersAndData(event.data);
- switch (headers['Path']) {
- case 'turn.end': {
- ws.close();
- if (!audioData.byteLength) return;
- const audioBuffer = await arrayBufferConvert(audioData);
- resolve(audioBuffer);
- break;
- }
- }
- } else if (event.data instanceof ArrayBuffer) {
- const dataview = new DataView(event.data);
- const headerLength = dataview.getInt16(0);
- if (event.data.byteLength > headerLength + 2) {
- const newBody = event.data.slice(2 + headerLength);
- const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength);
- const mergedUint8Array = new Uint8Array(newAudioData);
- mergedUint8Array.set(new Uint8Array(audioData), 0);
- mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength);
- audioData = newAudioData;
- }
- }
- };
- const onError = () => {
- reject(new Error('WebSocket error occurred.'));
- ws.close();
- };
- ws.addEventListener('open', onOpen);
- ws.addEventListener('message', onMessage);
- ws.addEventListener('error', onError);
- });
+ const arrayBuffer = await response.arrayBuffer();
+ const audioBuffer = await arrayBufferConvert(arrayBuffer);
+ return audioBuffer;
};
diff --git a/src/services/fetchMicrosoftSpeech.ts b/src/services/fetchMicrosoftSpeech.ts
index 323c80f..4a0594a 100644
--- a/src/services/fetchMicrosoftSpeech.ts
+++ b/src/services/fetchMicrosoftSpeech.ts
@@ -1,39 +1,29 @@
-import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api';
+import { createMicrosoftSpeechComletion } from '@/server/createMicrosoftSpeechComletion';
+import { MicrosoftSpeechPayload } from '@/server/types';
import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
import { type SsmlOptions } from '@/utils/genSSML';
-import { genSSML } from '@/utils/genSSML';
export interface MicrosoftSpeechOptions extends SsmlOptions {
api?: {
- proxy?: string;
+ url?: string;
};
}
export const fetchMicrosoftSpeech = async (
- text: string,
- { api = {}, ...options }: MicrosoftSpeechOptions,
+ input: string,
+ { api, ...options }: MicrosoftSpeechOptions,
): Promise => {
- const data = JSON.stringify({
- offsetInPlainText: 0,
- properties: {
- SpeakTriggerSource: 'AccTuningPagePlayButton',
- },
- ssml: genSSML(text, options),
- ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
- });
- const url = api?.proxy || MICROSOFT_SPEECH_PROXY_URL;
+ const payload: MicrosoftSpeechPayload = { input, options };
- const response: Response = await fetch(url, {
- body: data,
- method: 'POST',
- // @ts-ignore
- responseType: 'arraybuffer',
- });
+ const response = await (api?.url
+ ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+ : createMicrosoftSpeechComletion({ payload }));
if (!response.ok) {
throw new Error('Network response was not ok');
}
const arrayBuffer = await response.arrayBuffer();
- return await arrayBufferConvert(arrayBuffer);
+ const audioBuffer = await arrayBufferConvert(arrayBuffer);
+ return audioBuffer;
};
diff --git a/src/services/fetchOpenaiSTT.ts b/src/services/fetchOpenaiSTT.ts
index d29e643..4fa8677 100644
--- a/src/services/fetchOpenaiSTT.ts
+++ b/src/services/fetchOpenaiSTT.ts
@@ -1,39 +1,50 @@
-import { v4 as uuidv4 } from 'uuid';
-
-import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api';
+import { OPENAI_BASE_URL, OPENAI_STT_URL } from '@/const/api';
+import { OpenAISTTPayload } from '@/server/types';
import { RecordMineType, getRecordMineType } from '@/utils/getRecordMineType';
+const genSTTBody = ({ blob, options }: OpenAISTTPayload) => {
+ const filename = `${Date.now()}.${options.mineType.extension}`;
+ const file = new File([blob], filename, {
+ type: options.mineType.mineType,
+ });
+
+ const body = new FormData();
+ body.append('file', file);
+ body.append('model', options.model);
+ return body;
+};
export interface OpenaiSttOptions {
api?: {
key?: string;
proxy?: string;
+ url?: string;
};
mineType?: RecordMineType;
model?: 'whisper-1';
}
-
-// 纯文本生成语音
export const fetchOpenaiSTT = async (
speech: Blob,
{ api = {}, model = 'whisper-1', mineType }: OpenaiSttOptions,
): Promise => {
- const key = api?.key || OPENAI_API_KEY;
- const url = OPENAI_STT_URL(api?.proxy);
-
- const headers = new Headers({
- Authorization: `Bearer ${key}`,
- });
-
- const filename = `${uuidv4()}.${mineType?.extension || getRecordMineType().extension}`;
- const file = new File([speech], filename, {
- type: mineType?.mineType || getRecordMineType().mineType,
- });
-
- const body = new FormData();
- body.append('file', file);
- body.append('model', model);
+ const { key, url = OPENAI_BASE_URL } = api;
+
+ const payload: OpenAISTTPayload = {
+ blob: speech,
+ options: {
+ mineType: mineType || getRecordMineType(),
+ model,
+ },
+ };
- const response: Response = await fetch(url, { body, headers, method: 'POST' });
+ const response = await (api?.url
+ ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+ : fetch(OPENAI_STT_URL(url), {
+ body: genSTTBody(payload),
+ headers: new Headers({
+ Authorization: `Bearer ${key}`,
+ }),
+ method: 'POST',
+ }));
if (!response.ok) {
throw new Error('Network response was not ok');
@@ -41,5 +52,5 @@ export const fetchOpenaiSTT = async (
const json = await response.json();
- return json?.text;
+ return json.text;
};
diff --git a/src/services/fetchOpenaiTTS.ts b/src/services/fetchOpenaiTTS.ts
index 75d6218..45d45a9 100644
--- a/src/services/fetchOpenaiTTS.ts
+++ b/src/services/fetchOpenaiTTS.ts
@@ -1,36 +1,47 @@
-import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api';
+import { OPENAI_BASE_URL, OPENAI_TTS_URL } from '@/const/api';
+import { OpenAITTSPayload } from '@/server/types';
import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
import { type SsmlOptions } from '@/utils/genSSML';
export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
-export interface OpenaiTtsOptions extends Pick {
+export interface OpenaiTtsOptions extends Pick, OpenAITTSPayload {
api: {
key?: string;
proxy?: string;
+ url?: string;
};
model?: 'tts-1' | 'tts-1-hd';
- name: OpenaiVoice | string;
+ voice: OpenaiVoice;
}
export const fetchOpenaiTTS = async (
- text: string,
- { api = {}, model = 'tts-1', ...options }: OpenaiTtsOptions,
+ input: string,
+ { api = {}, model = 'tts-1', voice }: OpenaiTtsOptions,
): Promise => {
- const key = api?.key || OPENAI_API_KEY;
- const url = OPENAI_TTS_URL(api?.proxy);
+ const { key, url = OPENAI_BASE_URL } = api;
- const headers = new Headers({
- 'Authorization': `Bearer ${key}`,
- 'Content-Type': 'application/json',
- });
-
- const body = JSON.stringify({
- input: text,
- model,
- voice: options.name,
- });
+ const payload: OpenAITTSPayload = {
+ input,
+ options: {
+ model,
+ voice,
+ },
+ };
- const response: Response = await fetch(url, { body, headers, method: 'POST' });
+ const response = await (api?.url
+ ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+ : fetch(OPENAI_TTS_URL(url), {
+ body: JSON.stringify({
+ input,
+ model,
+ voice,
+ }),
+ headers: new Headers({
+ 'Authorization': `Bearer ${key}`,
+ 'Content-Type': 'application/json',
+ }),
+ method: 'POST',
+ }));
if (!response.ok) {
throw new Error('Network response was not ok');
diff --git a/src/useAzureSpeech/demos/index.tsx b/src/useAzureSpeech/demos/index.tsx
deleted file mode 100644
index 8d5e7b6..0000000
--- a/src/useAzureSpeech/demos/index.tsx
+++ /dev/null
@@ -1,84 +0,0 @@
-import { AudioPlayer, genLevaOptions, getAzureVoiceOptions, useAzureSpeech } from '@lobehub/tts';
-import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
-import { Button, Input } from 'antd';
-import { Volume2 } from 'lucide-react';
-import { Flexbox } from 'react-layout-kit';
-
-const defaultText = '这是一段使用 Azure Speech 的语音演示';
-
-export default () => {
- const store = useCreateStore();
-
- const api: any = useControls(
- {
- key: {
- label: 'AZURE_SPEECH_KEY',
- value: '',
- },
- region: {
- label: 'AZURE_SPEECH_REGION',
- value: '',
- },
- },
- { store },
- );
-
- const options: any = useControls(
- {
- name: {
- options: genLevaOptions(getAzureVoiceOptions()),
- value: 'zh-CN-YunxiaNeural',
- },
- pitch: {
- max: 1,
- min: -1,
- step: 0.1,
- value: 0,
- },
- rate: {
- max: 1,
- min: -1,
- step: 0.1,
- value: 0,
- },
- style: {
- options: [
- 'affectionate',
- 'angry',
- 'calm',
- 'cheerful',
- 'disgruntled',
- 'embarrassed',
- 'fearful',
- 'general',
- 'gentle',
- 'sad',
- 'serious',
- ],
- value: 'general',
- },
- },
- { store },
- );
- const { setText, isGlobalLoading, audio, start, stop } = useAzureSpeech(defaultText, {
- api,
- ...options,
- });
- return (
-
-
- {isGlobalLoading ? (
-
- Generating...
-
- ) : (
- } onClick={start} type={'primary'}>
- Speak
-
- )}
- setText(e.target.value)} />
-
-
-
- );
-};
diff --git a/src/useAzureSpeech/index.md b/src/useAzureSpeech/index.md
deleted file mode 100644
index 1bbc032..0000000
--- a/src/useAzureSpeech/index.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-nav: Components
-group: TTS
-title: useAzureSpeech
----
-
-## hooks
-
-- ENV: `AZURE_SPEECH_KEY` `AZURE_SPEECH_REGION`
-
-
diff --git a/src/useAzureSpeech/index.ts b/src/useAzureSpeech/index.ts
deleted file mode 100644
index 6e29ae5..0000000
--- a/src/useAzureSpeech/index.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useState } from 'react';
-
-import { AzureSpeechOptions, fetchAzureSpeech } from '@/services/fetchAzureSpeech';
-import { useTTS } from '@/useTTS';
-
-export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => {
- const [text, setText] = useState(defaultText);
- const rest = useTTS(options.name, text, (segmentText: string) =>
- fetchAzureSpeech(segmentText, options),
- );
- return {
- setText,
- ...rest,
- };
-};
diff --git a/src/useEdgeSpeech/index.ts b/src/useEdgeSpeech/index.ts
deleted file mode 100644
index ff2f7fc..0000000
--- a/src/useEdgeSpeech/index.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useState } from 'react';
-
-import { EdgeSpeechOptions, fetchEdgeSpeech } from '@/services/fetchEdgeSpeech';
-import { useTTS } from '@/useTTS';
-
-export const useEdgeSpeech = (defaultText: string, options: EdgeSpeechOptions) => {
- const [text, setText] = useState(defaultText);
- const rest = useTTS(options.name, text, (segmentText: string) =>
- fetchEdgeSpeech(segmentText, options),
- );
- return {
- setText,
- ...rest,
- };
-};
diff --git a/src/useMicrosoftSpeech/index.ts b/src/useMicrosoftSpeech/index.ts
deleted file mode 100644
index 637d2c6..0000000
--- a/src/useMicrosoftSpeech/index.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useState } from 'react';
-
-import { type MicrosoftSpeechOptions, fetchMicrosoftSpeech } from '@/services/fetchMicrosoftSpeech';
-import { useTTS } from '@/useTTS';
-
-export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeechOptions) => {
- const [text, setText] = useState(defaultText);
- const rest = useTTS(options.name, text, (segmentText: string) =>
- fetchMicrosoftSpeech(segmentText, options),
- );
- return {
- setText,
- ...rest,
- };
-};
diff --git a/src/useOpenaiTTS/index.ts b/src/useOpenaiTTS/index.ts
deleted file mode 100644
index d04cc68..0000000
--- a/src/useOpenaiTTS/index.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useState } from 'react';
-
-import { type OpenaiTtsOptions, fetchOpenaiTTS } from '@/services/fetchOpenaiTTS';
-import { useTTS } from '@/useTTS';
-
-export const useOpenaiTTS = (defaultText: string, options: OpenaiTtsOptions) => {
- const [text, setText] = useState(defaultText);
- const rest = useTTS(options.name, text, (segmentText: string) =>
- fetchOpenaiTTS(segmentText, options),
- );
- return {
- setText,
- ...rest,
- };
-};
diff --git a/src/utils/genSSML.ts b/src/utils/genSSML.ts
index 1e0bc1e..57dd52a 100644
--- a/src/utils/genSSML.ts
+++ b/src/utils/genSSML.ts
@@ -1,5 +1,3 @@
-import { Document, ServiceProvider } from 'ssml-document';
-
export type StyleName =
| 'affectionate'
| 'angry'
@@ -14,16 +12,34 @@ export type StyleName =
| 'serious';
export interface SsmlOptions {
- name: string;
pitch?: number;
rate?: number;
style?: StyleName;
+ voice: string;
}
-export const genSSML = (text: string, options: SsmlOptions) => {
- let ssml = new Document().voice(options.name);
- if (options.style) ssml.expressAs({ style: options.style });
- if (options.pitch || options.rate) ssml.prosody({ pitch: options.pitch, rate: options.rate });
- const result = ssml.say(text).render({ provider: ServiceProvider.Microsoft });
- return `${result} `;
+const voiceTemplate = (input: string, { voice }: Pick) =>
+ `${input} `;
+
+const styleTemplate = (input: string, { style }: Pick) => {
+ if (!style) return input;
+ return `${input} `;
+};
+
+const prosodyTemplate = (input: string, { pitch, rate }: Pick) => {
+ if (!pitch && !rate) return input;
+ return `${input} `;
+};
+const speackTemplate = (input: string) =>
+ `${input} `;
+
+export const genSSML = (input: string, options: SsmlOptions) => {
+ let ssml = prosodyTemplate(input, options);
+ ssml = styleTemplate(ssml, options);
+ ssml = voiceTemplate(ssml, options);
+ ssml = speackTemplate(ssml);
+
+ return ssml;
};
diff --git a/src/utils/getVoiceList.ts b/src/utils/getVoiceList.ts
index 841b040..35e5c7b 100644
--- a/src/utils/getVoiceList.ts
+++ b/src/utils/getVoiceList.ts
@@ -4,9 +4,9 @@ import { flatten } from 'lodash-es';
import azureVoiceList from '@/data/azureVoiceList';
import edgeVoiceList from '@/data/edgeVoiceList';
import voiceLocale from '@/data/locales';
-import nameList from '@/data/nameList';
import openaiVoiceList from '@/data/openaiVoiceList';
import speechSynthesVoiceList from '@/data/speechSynthesVoiceList';
+import voiceList from '@/data/voiceList';
export const genSpeechSynthesVoiceList = () => {
const data = speechSynthesis.getVoices();
@@ -38,7 +38,7 @@ export const getAzureVoiceOptions = (locale?: string): SelectProps['options'] =>
? (azureVoiceList as any)?.[locale] || []
: flatten(Object.values(azureVoiceList));
- return data.map((voice: any) => ({ label: (nameList as any)?.[voice] || voice, value: voice }));
+ return data.map((voice: any) => ({ label: (voiceList as any)?.[voice] || voice, value: voice }));
};
export const getEdgeVoiceOptions = (locale?: string): SelectProps['options'] => {
@@ -46,7 +46,7 @@ export const getEdgeVoiceOptions = (locale?: string): SelectProps['options'] =>
locale && (edgeVoiceList as any)[locale]
? (edgeVoiceList as any)[locale] || []
: flatten(Object.values(edgeVoiceList));
- return data.map((voice: any) => ({ label: (nameList as any)?.[voice] || voice, value: voice }));
+ return data.map((voice: any) => ({ label: (voiceList as any)?.[voice] || voice, value: voice }));
};
export const getOpenaiVoiceOptions = (): SelectProps['options'] => {
@@ -56,9 +56,3 @@ export const getOpenaiVoiceOptions = (): SelectProps['options'] => {
export const getVoiceLocaleOptions = (): SelectProps['options'] => {
return Object.entries(voiceLocale).map(([value, label]) => ({ label, value }));
};
-
-export const genLevaOptions = (options: SelectProps['options']) => {
- const data: any = {};
- options?.forEach((item: any) => (data[item?.label || item?.value] = item?.value));
- return data;
-};