examples/jasper/config.yml

# Copyright 2020 Huy Le Nguyen (@usimarit)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

speech_config:
  sample_rate: 16000
  frame_ms: 25
  stride_ms: 10
  num_feature_bins: 80
  feature_type: log_mel_spectrogram
  preemphasis: 0.97
  normalize_signal: True
  normalize_feature: True
  normalize_per_frame: False

decoder_config:
  vocabulary: null
  blank_at_zero: False
  beam_width: 500
  lm_config:
    model_path: null
    alpha: 2.0
    beta: 1.0

model_config:
  name: jasper
  dense: True
  first_additional_block_channels: 256
  first_additional_block_kernels: 11
  first_additional_block_strides: 2
  first_additional_block_dilation: 1
  first_additional_block_dropout: 0.2
  nsubblocks: 3
  block_channels: [256, 384, 512, 640, 768]
  block_kernels: [11, 13, 17, 21, 25]
  block_dropout: [0.2, 0.2, 0.2, 0.3, 0.3]
  second_additional_block_channels: 896
  second_additional_block_kernels: 1
  second_additional_block_strides: 1
  second_additional_block_dilation: 2
  second_additional_block_dropout: 0.4
  third_additional_block_channels: 1024
  third_additional_block_kernels: 1
  third_additional_block_strides: 1
  third_additional_block_dilation: 1
  third_additional_block_dropout: 0.4

learning_config:
  train_dataset_config:
    use_tf: True
    data_paths:
      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
    tfrecords_dir: null
    shuffle: True
    cache: True
    buffer_size: 100
    drop_remainder: True
    stage: train

  eval_dataset_config:
    use_tf: True
    data_paths: null
    tfrecords_dir: null
    shuffle: False
    cache: True
    buffer_size: 100
    drop_remainder: True
    stage: eval

  test_dataset_config:
    use_tf: True
    data_paths:
      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/test-clean/transcripts.tsv
    tfrecords_dir: null
    shuffle: False
    cache: True
    buffer_size: 100
    drop_remainder: True
    stage: test

  optimizer_config:
    class_name: adam
    config:
      learning_rate: 0.0001

  running_config:
    batch_size: 4
    num_epochs: 20
    checkpoint:
      filepath: /mnt/e/Models/local/jasper/checkpoints/{epoch:02d}.h5
      save_best_only: True
      save_weights_only: True
      save_freq: epoch
    states_dir: /mnt/e/Models/local/jasper/states
    tensorboard:
      log_dir: /mnt/e/Models/local/jasper/tensorboard
      histogram_freq: 1
      write_graph: True
      write_images: True
      update_freq: epoch
      profile_batch: 2