@@ -358,29 +358,50 @@ fn eu_iterator_specializations() {
358358
359359#[ test]
360360fn test_decode_utf8 ( ) {
361- use core:: char:: * ;
362- use core:: iter:: FromIterator ;
363-
364- for & ( str, bs) in [ ( "" , & [ ] as & [ u8 ] ) ,
365- ( "A" , & [ 0x41u8 ] as & [ u8 ] ) ,
366- ( "�" , & [ 0xC1u8 , 0x81u8 ] as & [ u8 ] ) ,
367- ( "♥" , & [ 0xE2u8 , 0x99u8 , 0xA5u8 ] ) ,
368- ( "♥A" , & [ 0xE2u8 , 0x99u8 , 0xA5u8 , 0x41u8 ] as & [ u8 ] ) ,
369- ( "�" , & [ 0xE2u8 , 0x99u8 ] as & [ u8 ] ) ,
370- ( "�A" , & [ 0xE2u8 , 0x99u8 , 0x41u8 ] as & [ u8 ] ) ,
371- ( "�" , & [ 0xC0u8 ] as & [ u8 ] ) ,
372- ( "�A" , & [ 0xC0u8 , 0x41u8 ] as & [ u8 ] ) ,
373- ( "�" , & [ 0x80u8 ] as & [ u8 ] ) ,
374- ( "�A" , & [ 0x80u8 , 0x41u8 ] as & [ u8 ] ) ,
375- ( "�" , & [ 0xFEu8 ] as & [ u8 ] ) ,
376- ( "�A" , & [ 0xFEu8 , 0x41u8 ] as & [ u8 ] ) ,
377- ( "�" , & [ 0xFFu8 ] as & [ u8 ] ) ,
378- ( "�A" , & [ 0xFFu8 , 0x41u8 ] as & [ u8 ] ) ] . into_iter ( ) {
379- assert ! ( Iterator :: eq( str . chars( ) ,
380- decode_utf8( bs. into_iter( ) . map( |& b|b) )
381- . map( |r_b| r_b. unwrap_or( '\u{FFFD}' ) ) ) ,
382- "chars = {}, bytes = {:?}, decoded = {:?}" , str , bs,
383- Vec :: from_iter( decode_utf8( bs. into_iter( ) . map( |& b|b) )
384- . map( |r_b| r_b. unwrap_or( '\u{FFFD}' ) ) ) ) ;
361+ macro_rules! assert_decode_utf8 {
362+ ( $input_bytes: expr, $expected_str: expr) => {
363+ let input_bytes: & [ u8 ] = & $input_bytes;
364+ let s = char :: decode_utf8( input_bytes. iter( ) . cloned( ) )
365+ . map( |r_b| r_b. unwrap_or( '\u{FFFD}' ) )
366+ . collect:: <String >( ) ;
367+ assert_eq!( s, $expected_str,
368+ "input bytes: {:?}, expected str: {:?}, result: {:?}" ,
369+ input_bytes, $expected_str, s) ;
370+ assert_eq!( String :: from_utf8_lossy( & $input_bytes) , $expected_str) ;
371+ }
385372 }
373+
374+ assert_decode_utf8 ! ( [ ] , "" ) ;
375+ assert_decode_utf8 ! ( [ 0x41 ] , "A" ) ;
376+ assert_decode_utf8 ! ( [ 0xC1 , 0x81 ] , "��" ) ;
377+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 , 0xA5 ] , "♥" ) ;
378+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 , 0xA5 , 0x41 ] , "♥A" ) ;
379+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 ] , "�" ) ;
380+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 , 0x41 ] , "�A" ) ;
381+ assert_decode_utf8 ! ( [ 0xC0 ] , "�" ) ;
382+ assert_decode_utf8 ! ( [ 0xC0 , 0x41 ] , "�A" ) ;
383+ assert_decode_utf8 ! ( [ 0x80 ] , "�" ) ;
384+ assert_decode_utf8 ! ( [ 0x80 , 0x41 ] , "�A" ) ;
385+ assert_decode_utf8 ! ( [ 0xFE ] , "�" ) ;
386+ assert_decode_utf8 ! ( [ 0xFE , 0x41 ] , "�A" ) ;
387+ assert_decode_utf8 ! ( [ 0xFF ] , "�" ) ;
388+ assert_decode_utf8 ! ( [ 0xFF , 0x41 ] , "�A" ) ;
389+ assert_decode_utf8 ! ( [ 0xC0 , 0x80 ] , "��" ) ;
390+
391+ // Surrogates
392+ assert_decode_utf8 ! ( [ 0xED , 0x9F , 0xBF ] , "\u{D7FF} " ) ;
393+ assert_decode_utf8 ! ( [ 0xED , 0xA0 , 0x80 ] , "���" ) ;
394+ assert_decode_utf8 ! ( [ 0xED , 0xBF , 0x80 ] , "���" ) ;
395+ assert_decode_utf8 ! ( [ 0xEE , 0x80 , 0x80 ] , "\u{E000} " ) ;
396+
397+ // char::MAX
398+ assert_decode_utf8 ! ( [ 0xF4 , 0x8F , 0xBF , 0xBF ] , "\u{10FFFF} " ) ;
399+ assert_decode_utf8 ! ( [ 0xF4 , 0x8F , 0xBF , 0x41 ] , "�A" ) ;
400+ assert_decode_utf8 ! ( [ 0xF4 , 0x90 , 0x80 , 0x80 ] , "����" ) ;
401+
402+ // 5 and 6 bytes sequence
403+ // Part of the original design of UTF-8,
404+ // but invalid now that UTF-8 is artificially restricted to match the range of UTF-16.
405+ assert_decode_utf8 ! ( [ 0xF8 , 0x80 , 0x80 , 0x80 , 0x80 ] , "�����" ) ;
406+ assert_decode_utf8 ! ( [ 0xFC , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , "������" ) ;
386407}
0 commit comments