diff --git a/set.ini b/set.ini index 2f8d7bb..168e98e 100644 --- a/set.ini +++ b/set.ini @@ -6,4 +6,13 @@ lang= ; cpu or cuda devtype=cpu ; int8 or float32 only gpu -cuda_com_type=int8 \ No newline at end of file +cuda_com_type=int8 +;Reducing these two numbers will use less graphics memory +beam_size=1 +best_of=1 +;vad set to false,use litter GPU memory,true is more +vad=true +;0 is use litter GPU,other is more +temperature=0 +;false is litter GPU,ture is more +condition_on_previous_text=false diff --git a/start.py b/start.py index cb11d0a..de1eb7d 100644 --- a/start.py +++ b/start.py @@ -104,7 +104,7 @@ def shibie(*, wav_name=None, model=None, language=None, data_type=None, wav_file sets=cfg.parse_ini() modelobj = WhisperModel(model, device=sets.get('devtype'), compute_type=sets.get('cuda_com_type'), download_root=cfg.ROOT_DIR + "/models", local_files_only=True) cfg.progressbar=0 - segments,info = modelobj.transcribe(wav_file, beam_size=1,best_of=1,temperature=0, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=500),language=language) + segments,info = modelobj.transcribe(wav_file, beam_size=sets.get('beam_size'),best_of=sets.get('best_of'),temperature=0 if sets.get('temperature')==0 else [0.0,0.2,0.4,0.6,0.8,1.0],condition_on_previous_text=sets.get('condition_on_previous_text'),vad_filter=sets.get('vad'), vad_parameters=dict(min_silence_duration_ms=500),language=language) total_duration = round(info.duration, 2) # Same precision as the Whisper timestamps. raw_subtitles = [] @@ -223,7 +223,7 @@ def api(): sets=cfg.parse_ini() model = WhisperModel(model, device=sets.get('devtype'), compute_type=sets.get('cuda_com_type'), download_root=cfg.ROOT_DIR + "/models", local_files_only=True) - segments,_ = model.transcribe(wav_file, beam_size=1,best_of=1,temperature=0, vad_filter=True, + segments,_ = model.transcribe(wav_file, beam_size=sets.get('beam_size'),best_of=sets.get('best_of'),temperature=0 if sets.get('temperature')==0 else [0.0,0.2,0.4,0.6,0.8,1.0],condition_on_previous_text=sets.get('condition_on_previous_text'),vad_filter=sets.get('vad'), vad_parameters=dict(min_silence_duration_ms=500),language=language) raw_subtitles = [] for segment in segments: diff --git a/stslib/__init__.py b/stslib/__init__.py index 3d7c935..c5323df 100644 --- a/stslib/__init__.py +++ b/stslib/__init__.py @@ -1,2 +1,2 @@ -VERSION=9 -version_str="v0.0.9" \ No newline at end of file +VERSION=91 +version_str="v0.0.91" \ No newline at end of file diff --git a/stslib/cfg.py b/stslib/cfg.py index fe0ac51..304b73d 100644 --- a/stslib/cfg.py +++ b/stslib/cfg.py @@ -10,7 +10,13 @@ def parse_ini(file=os.path.join(ROOT_DIR,'set.ini')): "web_address":"127.0.0.1:9977", "lang":"en" if locale.getdefaultlocale()[0].split('_')[0].lower() != 'zh' else "zh", "devtype":"cpu", - "cuda_com_type":"int8" + "cuda_com_type":"int8", + "beam_size":1, + "best_of":1, + "vad":True, + "temperature":0, + "condition_on_previous_text":False + } if not os.path.exists(file): return sets @@ -21,8 +27,10 @@ def parse_ini(file=os.path.join(ROOT_DIR,'set.ini')): line=[ x.strip() for x in line.strip().split('=', maxsplit=1)] if len(line)!=2: continue - if line[1]=='false' or line[1]=='true': - sets[line[0]] = True if line[1]=='true' else False + if line[1]=='false': + sets[line[0]] = False + elif line[1]=='true': + sets[line[0]] = True elif re.match(r'^\d+$', line[1]): sets[line[0]]=int(line[1]) elif line[1]: diff --git a/version.json b/version.json index ae97b61..1f033e5 100644 --- a/version.json +++ b/version.json @@ -1,4 +1,4 @@ { - "version":"v0.0.9", - "version_num":9 + "version":"v0.0.91", + "version_num":91 } \ No newline at end of file