temporary bugfix for issue SYSTRAN#716

Sharrnah · Feb 28, 2024 · a667e69 · a667e69
1 parent 06d32bf
commit a667e69
Showing 1 changed file with 38 additions and 22 deletions.
diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
@@ -452,7 +452,8 @@ def generate_segments(
 
         idx = 0
         clip_idx = 0
-        seek = seek_clips[clip_idx][0]
+        #seek = seek_clips[clip_idx][0]
+        seek = 0
         all_tokens = []
         prompt_reset_since = 0
 
@@ -469,30 +470,41 @@ def generate_segments(
         # A later commit should turn this into a simpler nested loop.
         # for seek_clip_start, seek_clip_end in seek_clips:
         #     while seek < seek_clip_end
-        while clip_idx < len(seek_clips):
-            seek_clip_start, seek_clip_end = seek_clips[clip_idx]
-            if seek_clip_end > content_frames:
-                seek_clip_end = content_frames
-            if seek < seek_clip_start:
-                seek = seek_clip_start
-            if seek >= seek_clip_end:
-                clip_idx += 1
-                if clip_idx < len(seek_clips):
-                    seek = seek_clips[clip_idx][0]
-                continue
+        while seek < content_frames:
             time_offset = seek * self.feature_extractor.time_per_frame
-            window_end_time = float(
-                (seek + self.feature_extractor.nb_max_frames)
-                * self.feature_extractor.time_per_frame
-            )
+            segment = features[:, seek : seek + self.feature_extractor.nb_max_frames]
             segment_size = min(
-                self.feature_extractor.nb_max_frames,
-                content_frames - seek,
-                seek_clip_end - seek,
+                self.feature_extractor.nb_max_frames, content_frames - seek
             )
-            segment = features[:, seek : seek + segment_size]
             segment_duration = segment_size * self.feature_extractor.time_per_frame
 
+
+
+
+        #while clip_idx < len(seek_clips):
+        #    seek_clip_start, seek_clip_end = seek_clips[clip_idx]
+        #    if seek_clip_end > content_frames:
+        #        seek_clip_end = content_frames
+        #    if seek < seek_clip_start:
+        #        seek = seek_clip_start
+        #    if seek >= seek_clip_end:
+        #        clip_idx += 1
+        #        if clip_idx < len(seek_clips):
+        #            seek = seek_clips[clip_idx][0]
+        #        continue
+        #    time_offset = seek * self.feature_extractor.time_per_frame
+        #    window_end_time = float(
+        #        (seek + self.feature_extractor.nb_max_frames)
+        #        * self.feature_extractor.time_per_frame
+        #    )
+        #    segment_size = min(
+        #        self.feature_extractor.nb_max_frames,
+        #        content_frames - seek,
+        #        seek_clip_end - seek,
+        #    )
+        #    segment = features[:, seek : seek + segment_size]
+        #    segment_duration = segment_size * self.feature_extractor.time_per_frame
+
             if self.logger.isEnabledFor(logging.DEBUG):
                 self.logger.debug(
                     "Processing segment at %s", format_timestamp(time_offset)
@@ -569,7 +581,11 @@ def next_words_segment(segments: List[dict]) -> Optional[dict]:
 
             single_timestamp_ending = (
                 len(tokens) >= 2
-                and tokens[-2] < tokenizer.timestamp_begin <= tokens[-1]
+
+                and tokens[-2] < tokenizer.timestamp_begin
+                and tokens[-1] >= tokenizer.timestamp_begin
+
+                #and tokens[-2] < tokenizer.timestamp_begin <= tokens[-1]
             )
 
             consecutive_timestamps = [
@@ -951,7 +967,7 @@ def add_word_timestamps(
         word_durations = np.array([word["end"] - word["start"] for word in alignment])
         word_durations = word_durations[word_durations.nonzero()]
         median_duration = np.median(word_durations) if len(word_durations) > 0 else 0.0
-        median_duration = min(0.7, float(median_duration))
+        #median_duration = min(0.7, float(median_duration))
         max_duration = median_duration * 2
 
         # hack: truncate long words at sentence boundaries.