forked from txya900619/whisper-finetune-formosan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscribe_audio.py
80 lines (74 loc) · 2.49 KB
/
transcribe_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import argparse
from transformers import pipeline
parser = argparse.ArgumentParser(description='Script to transcribe a custom audio file of any length using Whisper Models of various sizes.')
parser.add_argument(
"--is_public_repo",
required=False,
default=True,
type=lambda x: (str(x).lower() == 'true'),
help="If the model is available for download on huggingface.",
)
parser.add_argument(
"--hf_model",
type=str,
required=False,
default="openai/whisper-tiny",
help="Huggingface model name. Example: openai/whisper-tiny",
)
parser.add_argument(
"--ckpt_dir",
type=str,
required=False,
default=".",
help="Folder with the pytorch_model.bin file",
)
parser.add_argument(
"--temp_ckpt_folder",
type=str,
required=False,
default="temp_dir",
help="Path to create a temporary folder containing the model and related files needed for inference",
)
parser.add_argument(
"--path_to_audio",
type=str,
required=True,
help="Path to the audio file to be transcribed.",
)
parser.add_argument(
"--language",
type=str,
required=False,
default="hi",
help="Two letter language code for the transcription language, e.g. use 'hi' for Hindi. This helps initialize the tokenizer.",
)
parser.add_argument(
"--device",
type=int,
required=False,
default=0,
help="The device to run the pipeline on. -1 for CPU, 0 for the first GPU (default) and so on.",
)
args = parser.parse_args()
if args.is_public_repo == False:
os.system(f"mkdir -p {args.temp_ckpt_folder}")
ckpt_dir_parent = str(Path(args.ckpt_dir).parent)
os.system(f"cp {ckpt_dir_parent}/added_tokens.json {ckpt_dir_parent}/normalizer.json \
{ckpt_dir_parent}/preprocessor_config.json {ckpt_dir_parent}/special_tokens_map.json \
{ckpt_dir_parent}/tokenizer_config.json {ckpt_dir_parent}/merges.txt \
{ckpt_dir_parent}/vocab.json {args.ckpt_dir}/config.json {args.ckpt_dir}/pytorch_model.bin \
{args.ckpt_dir}/training_args.bin {args.temp_ckpt_folder}")
model_id = args.temp_ckpt_folder
else:
model_id = args.hf_model
transcribe = pipeline(
task="automatic-speech-recognition",
model=model_id,
chunk_length_s=30,
device=args.device,
)
transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language=args.language, task="transcribe")
print('Transcription: ')
print(transcribe(args.path_to_audio)["text"])
if args.is_public_repo == False:
os.system(f"rm -r {args.temp_ckpt_folder}")