Skip to content

Commit

Permalink
temporary bugfix for issue SYSTRAN#716
Browse files Browse the repository at this point in the history
  • Loading branch information
Sharrnah authored Feb 28, 2024
1 parent 06d32bf commit a667e69
Showing 1 changed file with 38 additions and 22 deletions.
60 changes: 38 additions & 22 deletions faster_whisper/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ def generate_segments(

idx = 0
clip_idx = 0
seek = seek_clips[clip_idx][0]
#seek = seek_clips[clip_idx][0]
seek = 0
all_tokens = []
prompt_reset_since = 0

Expand All @@ -469,30 +470,41 @@ def generate_segments(
# A later commit should turn this into a simpler nested loop.
# for seek_clip_start, seek_clip_end in seek_clips:
# while seek < seek_clip_end
while clip_idx < len(seek_clips):
seek_clip_start, seek_clip_end = seek_clips[clip_idx]
if seek_clip_end > content_frames:
seek_clip_end = content_frames
if seek < seek_clip_start:
seek = seek_clip_start
if seek >= seek_clip_end:
clip_idx += 1
if clip_idx < len(seek_clips):
seek = seek_clips[clip_idx][0]
continue
while seek < content_frames:
time_offset = seek * self.feature_extractor.time_per_frame
window_end_time = float(
(seek + self.feature_extractor.nb_max_frames)
* self.feature_extractor.time_per_frame
)
segment = features[:, seek : seek + self.feature_extractor.nb_max_frames]
segment_size = min(
self.feature_extractor.nb_max_frames,
content_frames - seek,
seek_clip_end - seek,
self.feature_extractor.nb_max_frames, content_frames - seek
)
segment = features[:, seek : seek + segment_size]
segment_duration = segment_size * self.feature_extractor.time_per_frame




#while clip_idx < len(seek_clips):
# seek_clip_start, seek_clip_end = seek_clips[clip_idx]
# if seek_clip_end > content_frames:
# seek_clip_end = content_frames
# if seek < seek_clip_start:
# seek = seek_clip_start
# if seek >= seek_clip_end:
# clip_idx += 1
# if clip_idx < len(seek_clips):
# seek = seek_clips[clip_idx][0]
# continue
# time_offset = seek * self.feature_extractor.time_per_frame
# window_end_time = float(
# (seek + self.feature_extractor.nb_max_frames)
# * self.feature_extractor.time_per_frame
# )
# segment_size = min(
# self.feature_extractor.nb_max_frames,
# content_frames - seek,
# seek_clip_end - seek,
# )
# segment = features[:, seek : seek + segment_size]
# segment_duration = segment_size * self.feature_extractor.time_per_frame

if self.logger.isEnabledFor(logging.DEBUG):
self.logger.debug(
"Processing segment at %s", format_timestamp(time_offset)
Expand Down Expand Up @@ -569,7 +581,11 @@ def next_words_segment(segments: List[dict]) -> Optional[dict]:

single_timestamp_ending = (
len(tokens) >= 2
and tokens[-2] < tokenizer.timestamp_begin <= tokens[-1]

and tokens[-2] < tokenizer.timestamp_begin
and tokens[-1] >= tokenizer.timestamp_begin

#and tokens[-2] < tokenizer.timestamp_begin <= tokens[-1]
)

consecutive_timestamps = [
Expand Down Expand Up @@ -951,7 +967,7 @@ def add_word_timestamps(
word_durations = np.array([word["end"] - word["start"] for word in alignment])
word_durations = word_durations[word_durations.nonzero()]
median_duration = np.median(word_durations) if len(word_durations) > 0 else 0.0
median_duration = min(0.7, float(median_duration))
#median_duration = min(0.7, float(median_duration))
max_duration = median_duration * 2

# hack: truncate long words at sentence boundaries.
Expand Down

0 comments on commit a667e69

Please sign in to comment.