@@ -72,4 +72,129 @@ public void TokensNotEndWithNothing()
7272 var result = tokens . TokensEndsWithAnyString ( ( IList < string > ) Array . Empty < string > ( ) , _model . NativeHandle , Encoding . UTF8 ) ;
7373 Assert . False ( result ) ;
7474 }
75+
76+ [ Fact ]
77+ public void TokensEndWith2 ( )
78+ {
79+ var tokens = _model . NativeHandle . Tokenize ( "The cat sat on the edge of the mat" , false , true , Encoding . UTF8 ) ;
80+
81+ var decoder = new StreamingTokenDecoder ( Encoding . UTF8 , _model ) ;
82+ decoder . AddRange ( tokens ) ;
83+
84+ var processor = new AntipromptProcessor ( new [ ]
85+ {
86+ "a fish" ,
87+ "the mat" ,
88+ "this is an improbably long query to be using for this method"
89+ } ) ;
90+ var result = processor . Add ( decoder . Read ( ) ) ;
91+
92+ Assert . True ( result ) ;
93+ }
94+
95+ [ Fact ]
96+ public void TokensEndSubstring2 ( )
97+ {
98+ var tokens = _model . NativeHandle . Tokenize ( "The cat sat on the edge of the mat" , false , true , Encoding . UTF8 ) ;
99+
100+ var decoder = new StreamingTokenDecoder ( Encoding . UTF8 , _model ) ;
101+ decoder . AddRange ( tokens ) ;
102+
103+ var processor = new AntipromptProcessor ( new [ ] { "at" } ) ;
104+ var result = processor . Add ( decoder . Read ( ) ) ;
105+
106+ Assert . True ( result ) ;
107+ }
108+
109+ [ Fact ]
110+ public void TokensNotEndWith2 ( )
111+ {
112+ var tokens = _model . NativeHandle . Tokenize ( "The cat sat on the edge of the mat" , false , true , Encoding . UTF8 ) ;
113+
114+ var decoder = new StreamingTokenDecoder ( Encoding . UTF8 , _model ) ;
115+ decoder . AddRange ( tokens ) ;
116+
117+ var processor = new AntipromptProcessor ( new [ ]
118+ {
119+ "a fish" ,
120+ "The cat sat on the edge of the ma" ,
121+ "this is an improbably long query to be using for this method"
122+ } ) ;
123+ var result = processor . Add ( decoder . Read ( ) ) ;
124+
125+ Assert . False ( result ) ;
126+ }
127+
128+ [ Fact ]
129+ public void TokensNotEndWithNothing2 ( )
130+ {
131+ var tokens = _model . NativeHandle . Tokenize ( "The cat sat on the edge of the mat" , false , true , Encoding . UTF8 ) ;
132+
133+ var decoder = new StreamingTokenDecoder ( Encoding . UTF8 , _model ) ;
134+ decoder . AddRange ( tokens ) ;
135+
136+ var processor = new AntipromptProcessor ( ) ;
137+ var result = processor . Add ( decoder . Read ( ) ) ;
138+
139+ Assert . False ( result ) ;
140+ }
141+
142+ [ Fact ]
143+ public void RoundTrip ( )
144+ {
145+ var strings = new [ ]
146+ {
147+ "Hello world" ,
148+ "철수" ,
149+ "😀 😃 😄 😁 😆철수😅 😂 😊 😇 🙂 " ,
150+ } ;
151+
152+ var charsArr = new char [ 1024 ] ;
153+
154+ foreach ( var input in strings )
155+ {
156+ // Convert into llama tokens
157+ var tokens = _model . NativeHandle . Tokenize ( input , false , false , Encoding . UTF8 ) ;
158+
159+ // Convert tokens back into characters
160+ var chars = _model . NativeHandle . TokensToSpan ( tokens , charsArr . AsSpan ( ) , Encoding . UTF8 ) ;
161+
162+ // llama.cpp adds a space to the start of strings, remove that
163+ var output = new string ( chars ) . TrimStart ( ' ' ) ;
164+
165+ // Check that the input equals the output
166+ Assert . Equal ( input , output ) ;
167+ }
168+ }
169+
170+ [ Fact ]
171+ public void StreamingDecoderRoundTrip ( )
172+ {
173+ var decoder = new StreamingTokenDecoder ( Encoding . UTF8 , _model ) ;
174+
175+ var strings = new [ ]
176+ {
177+ "Hello world" ,
178+ "철수" ,
179+ "😀 😃 😄 😁 😆철수😅 😂 😊 😇 🙂 " ,
180+ } ;
181+
182+ foreach ( var input in strings )
183+ {
184+ decoder . Reset ( ) ;
185+
186+ // Convert into llama tokens
187+ var tokens = _model . NativeHandle . Tokenize ( input , false , false , Encoding . UTF8 ) ;
188+
189+ // Add tokens to decoder
190+ foreach ( var token in tokens )
191+ decoder . Add ( token ) ;
192+
193+ // llama.cpp adds a space to the start of strings, remove that
194+ var output = decoder . Read ( ) . TrimStart ( ' ' ) ;
195+
196+ // Check that the input equals the output
197+ Assert . Equal ( input , output ) ;
198+ }
199+ }
75200}
0 commit comments