-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
51 lines (42 loc) · 1.98 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import inspect
class Config:
# Note that when continuing to train or generating speech from a checkpoint, we will automatically load the original
# configuration. If you wish to use different parameters, run the scripts with the flag '--cc'.
# When training a new model, the current parameters in this file will be used.
vocab = u"PE abcdefghijklmnopqrstuvwxyz'.?" # P = Padding, E = End of text
vocab_padding_index = 0 # Index of the padding character
vocab_end_of_text = "E"
# Network dimensions
e = 128
d = 256
c = 512
F = 80 # Mel spectrogram size
F_ = 1025 # STFT spectrogram size. The original paper uses 513 instead of 1025
# Parameters for audio processing. Refer to https://github.com/Kyubyong/dc_tts
sample_rate = 22050
num_fft_samples = (F_ - 1) * 2
frame_length = 0.05 # seconds, original paper uses ~11.6 ms approx 0.012 s
frame_shift = frame_length / 4 # seconds
hop_length = int(sample_rate * frame_shift)
window_length = int(sample_rate * frame_length)
mel_size = F
power = 1.5
preemphasis = .97
n_iter = 50
max_db = 100
ref_db = 20
time_reduction = 4 # Only consider every n-th time step
# The following parameters are only needed for training.
# Maximum number of characters and mel frames. These will be used to ignore training samples, that are too long.
# Can be tweaked to limit memory consumption.
max_N, max_T = 180, 210
g = 0.2 # Guided attention
dropout_rate = 0.05
@staticmethod
def get_config():
attributes = inspect.getmembers(Config, lambda a: not (inspect.isroutine(a)))
return [a for a in attributes if not (a[0].startswith('__') and a[0].endswith('__'))]
@staticmethod
def set_config(a):
for name, value in a:
setattr(Config, name, value)