Skip to content

Commit f830c6f

Browse files
Fix list index out of range in word timestamps (#1157)
1 parent bcd8ce0 commit f830c6f

File tree

1 file changed

+8
-10
lines changed

1 file changed

+8
-10
lines changed

faster_whisper/transcribe.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,12 +1699,14 @@ def find_alignment(
16991699
# array([0.])
17001700
# This results in crashes when we lookup jump_times with float, like
17011701
# IndexError: arrays used as indices must be of integer (or boolean) type
1702-
return []
1702+
return_list.append([])
1703+
continue
17031704
word_boundaries = np.pad(
17041705
np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0)
17051706
)
17061707
if len(word_boundaries) <= 1:
1707-
return []
1708+
return_list.append([])
1709+
continue
17081710

17091711
jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(
17101712
bool
@@ -1884,11 +1886,9 @@ def merge_punctuations(alignment: List[dict], prepended: str, appended: str) ->
18841886
if previous["word"].startswith(" ") and previous["word"].strip() in prepended:
18851887
# prepend it to the following word
18861888
following["word"] = previous["word"] + following["word"]
1887-
if "tokens" in alignment[0].keys():
1888-
following["tokens"] = previous["tokens"] + following["tokens"]
1889-
previous["tokens"] = []
1889+
following["tokens"] = previous["tokens"] + following["tokens"]
18901890
previous["word"] = ""
1891-
1891+
previous["tokens"] = []
18921892
else:
18931893
j = i
18941894
i -= 1
@@ -1902,11 +1902,9 @@ def merge_punctuations(alignment: List[dict], prepended: str, appended: str) ->
19021902
if not previous["word"].endswith(" ") and following["word"] in appended:
19031903
# append it to the previous word
19041904
previous["word"] = previous["word"] + following["word"]
1905-
if "tokens" in alignment[0].keys():
1906-
previous["tokens"] = previous["tokens"] + following["tokens"]
1907-
following["tokens"] = []
1905+
previous["tokens"] = previous["tokens"] + following["tokens"]
19081906
following["word"] = ""
1909-
1907+
following["tokens"] = []
19101908
else:
19111909
i = j
19121910
j += 1

0 commit comments

Comments
 (0)