@@ -1699,12 +1699,14 @@ def find_alignment(
16991699 # array([0.])
17001700 # This results in crashes when we lookup jump_times with float, like
17011701 # IndexError: arrays used as indices must be of integer (or boolean) type
1702- return []
1702+ return_list .append ([])
1703+ continue
17031704 word_boundaries = np .pad (
17041705 np .cumsum ([len (t ) for t in word_tokens [:- 1 ]]), (1 , 0 )
17051706 )
17061707 if len (word_boundaries ) <= 1 :
1707- return []
1708+ return_list .append ([])
1709+ continue
17081710
17091711 jumps = np .pad (np .diff (text_indices ), (1 , 0 ), constant_values = 1 ).astype (
17101712 bool
@@ -1884,11 +1886,9 @@ def merge_punctuations(alignment: List[dict], prepended: str, appended: str) ->
18841886 if previous ["word" ].startswith (" " ) and previous ["word" ].strip () in prepended :
18851887 # prepend it to the following word
18861888 following ["word" ] = previous ["word" ] + following ["word" ]
1887- if "tokens" in alignment [0 ].keys ():
1888- following ["tokens" ] = previous ["tokens" ] + following ["tokens" ]
1889- previous ["tokens" ] = []
1889+ following ["tokens" ] = previous ["tokens" ] + following ["tokens" ]
18901890 previous ["word" ] = ""
1891-
1891+ previous [ "tokens" ] = []
18921892 else :
18931893 j = i
18941894 i -= 1
@@ -1902,11 +1902,9 @@ def merge_punctuations(alignment: List[dict], prepended: str, appended: str) ->
19021902 if not previous ["word" ].endswith (" " ) and following ["word" ] in appended :
19031903 # append it to the previous word
19041904 previous ["word" ] = previous ["word" ] + following ["word" ]
1905- if "tokens" in alignment [0 ].keys ():
1906- previous ["tokens" ] = previous ["tokens" ] + following ["tokens" ]
1907- following ["tokens" ] = []
1905+ previous ["tokens" ] = previous ["tokens" ] + following ["tokens" ]
19081906 following ["word" ] = ""
1909-
1907+ following [ "tokens" ] = []
19101908 else :
19111909 i = j
19121910 j += 1
0 commit comments