@@ -452,42 +452,6 @@ def test_illegal_declarations(self):
452452 self ._run_check ('<!spacer type="block" height="25">' ,
453453 [('comment' , 'spacer type="block" height="25"' )])
454454
455- def test_with_unquoted_attributes (self ):
456- # see #12008
457- html = ("<html><body bgcolor=d0ca90 text='181008'>"
458- "<table cellspacing=0 cellpadding=1 width=100% ><tr>"
459- "<td align=left><font size=-1>"
460- "- <a href=/rabota/><span class=en> software-and-i</span></a>"
461- "- <a href='/1/'><span class=en> library</span></a></table>" )
462- expected = [
463- ('starttag' , 'html' , []),
464- ('starttag' , 'body' , [('bgcolor' , 'd0ca90' ), ('text' , '181008' )]),
465- ('starttag' , 'table' ,
466- [('cellspacing' , '0' ), ('cellpadding' , '1' ), ('width' , '100%' )]),
467- ('starttag' , 'tr' , []),
468- ('starttag' , 'td' , [('align' , 'left' )]),
469- ('starttag' , 'font' , [('size' , '-1' )]),
470- ('data' , '- ' ), ('starttag' , 'a' , [('href' , '/rabota/' )]),
471- ('starttag' , 'span' , [('class' , 'en' )]), ('data' , ' software-and-i' ),
472- ('endtag' , 'span' ), ('endtag' , 'a' ),
473- ('data' , '- ' ), ('starttag' , 'a' , [('href' , '/1/' )]),
474- ('starttag' , 'span' , [('class' , 'en' )]), ('data' , ' library' ),
475- ('endtag' , 'span' ), ('endtag' , 'a' ), ('endtag' , 'table' )
476- ]
477- self ._run_check (html , expected )
478-
479- def test_comma_between_attributes (self ):
480- self ._run_check ('<form action="/xxx.php?a=1&b=2&", '
481- 'method="post">' , [
482- ('starttag' , 'form' ,
483- [('action' , '/xxx.php?a=1&b=2&' ),
484- (',' , None ), ('method' , 'post' )])])
485-
486- def test_weird_chars_in_unquoted_attribute_values (self ):
487- self ._run_check ('<form action=bogus|&#()value>' , [
488- ('starttag' , 'form' ,
489- [('action' , 'bogus|&#()value' )])])
490-
491455 def test_invalid_end_tags (self ):
492456 # A collection of broken end tags. <br> is used as separator.
493457 # see http://www.w3.org/TR/html5/tokenization.html#end-tag-open-state
@@ -773,6 +737,62 @@ def test_end_tag_in_attribute_value(self):
773737 [("href" , "http://www.example.org/\" >;" )]),
774738 ("data" , "spam" ), ("endtag" , "a" )])
775739
740+ def test_with_unquoted_attributes (self ):
741+ # see #12008
742+ html = ("<html><body bgcolor=d0ca90 text='181008'>"
743+ "<table cellspacing=0 cellpadding=1 width=100% ><tr>"
744+ "<td align=left><font size=-1>"
745+ "- <a href=/rabota/><span class=en> software-and-i</span></a>"
746+ "- <a href='/1/'><span class=en> library</span></a></table>" )
747+ expected = [
748+ ('starttag' , 'html' , []),
749+ ('starttag' , 'body' , [('bgcolor' , 'd0ca90' ), ('text' , '181008' )]),
750+ ('starttag' , 'table' ,
751+ [('cellspacing' , '0' ), ('cellpadding' , '1' ), ('width' , '100%' )]),
752+ ('starttag' , 'tr' , []),
753+ ('starttag' , 'td' , [('align' , 'left' )]),
754+ ('starttag' , 'font' , [('size' , '-1' )]),
755+ ('data' , '- ' ), ('starttag' , 'a' , [('href' , '/rabota/' )]),
756+ ('starttag' , 'span' , [('class' , 'en' )]), ('data' , ' software-and-i' ),
757+ ('endtag' , 'span' ), ('endtag' , 'a' ),
758+ ('data' , '- ' ), ('starttag' , 'a' , [('href' , '/1/' )]),
759+ ('starttag' , 'span' , [('class' , 'en' )]), ('data' , ' library' ),
760+ ('endtag' , 'span' ), ('endtag' , 'a' ), ('endtag' , 'table' )
761+ ]
762+ self ._run_check (html , expected )
763+
764+ def test_comma_between_attributes (self ):
765+ # see bpo 41478
766+ # HTMLParser preserves duplicate attributes, leaving the task of
767+ # removing duplicate attributes to a conformant html tree builder
768+ html = ('<div class=bar,baz=asd>' # between attrs (unquoted)
769+ '<div class="bar",baz="asd">' # between attrs (quoted)
770+ '<div class=bar, baz=asd,>' # after values (unquoted)
771+ '<div class="bar", baz="asd",>' # after values (quoted)
772+ '<div class="bar",>' # one comma values (quoted)
773+ '<div class=,bar baz=,asd>' # before values (unquoted)
774+ '<div class=,"bar" baz=,"asd">' # before values (quoted)
775+ '<div ,class=bar ,baz=asd>' # before names
776+ '<div class,="bar" baz,="asd">' # after names
777+ )
778+ expected = [
779+ ('starttag' , 'div' , [('class' , 'bar,baz=asd' ),]),
780+ ('starttag' , 'div' , [('class' , 'bar' ), (',baz' , 'asd' )]),
781+ ('starttag' , 'div' , [('class' , 'bar,' ), ('baz' , 'asd,' )]),
782+ ('starttag' , 'div' , [('class' , 'bar' ), (',' , None ),
783+ ('baz' , 'asd' ), (',' , None )]),
784+ ('starttag' , 'div' , [('class' , 'bar' ), (',' , None )]),
785+ ('starttag' , 'div' , [('class' , ',bar' ), ('baz' , ',asd' )]),
786+ ('starttag' , 'div' , [('class' , ',"bar"' ), ('baz' , ',"asd"' )]),
787+ ('starttag' , 'div' , [(',class' , 'bar' ), (',baz' , 'asd' )]),
788+ ('starttag' , 'div' , [('class,' , 'bar' ), ('baz,' , 'asd' )]),
789+ ]
790+ self ._run_check (html , expected )
791+
792+ def test_weird_chars_in_unquoted_attribute_values (self ):
793+ self ._run_check ('<form action=bogus|&#()value>' , [
794+ ('starttag' , 'form' ,
795+ [('action' , 'bogus|&#()value' )])])
776796
777797if __name__ == "__main__" :
778798 unittest .main ()
0 commit comments