-
Notifications
You must be signed in to change notification settings - Fork 3
/
build.py
executable file
·60 lines (51 loc) · 1.45 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import edge_tts
from os.path import isfile
from collections import deque
from pathlib import Path
import prompts as p
import conf
rep = {
'\n': ' ',
'%¶%': '\n',
'fi': 'fi',
'ff': 'ff',
'fl': 'fl',
'ffi': 'ffi',
}
async def synth(pg, text) -> None:
AUDIO = f"page-{pg}.mp3"
SUBS = f"page-{pg}"
if isfile(f"page-{pg}.mp3"): # Skip if exists
with open(SUBS, "r") as file:
file.seek(0, 0)
if file.readline() != "*": return
if not text: # Skip if empty
Path(SUBS).touch(); return
prompt = p.SubMaker()
communicate = edge_tts.Communicate(text, conf.VOICE, rate=conf.RATE)
text = text.replace("\n\n", "%¶% ")
prompt.puncted = deque(text.split())
with open(AUDIO, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
prompt.times.append(chunk["offset"])
prompt.plain.append(chunk["text"])
with open(SUBS, "w", encoding="utf-8") as file:
file.write(prompt.generate_subs())
file.seek(0, 0)
file.write("*")
return
def clean(raw, pg):
text = raw[pg-1].replace('\n\n', '%¶%') # preserve paragraphs
for key, val in rep.items(): # Format + ligature fix
text = text.replace(key, val)
page = ""
# Catch most headers, subtitles, & text fragments
for line in text.splitlines():
words = line.count(' ') + 1
if words < 6: continue
if words and words < 12 and line[-1] != '.': continue
page += line.strip() + '\n\n'
return page