Skip to content

Commit e45e53c

Browse files
authored
Complete adoption agency algorithm (#1517)
Follow adoption agency algorithm
1 parent f49f92c commit e45e53c

File tree

3 files changed

+76
-5
lines changed

3 files changed

+76
-5
lines changed

src/main/java/org/jsoup/parser/HtmlTreeBuilder.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,14 @@ Element lastFormattingElement() {
613613
return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
614614
}
615615

616+
int positionOfElement(Element el){
617+
for (int i = 0; i < formattingElements.size(); i++){
618+
if (el == formattingElements.get(i))
619+
return i;
620+
}
621+
return -1;
622+
}
623+
616624
Element removeLastFormattingElement() {
617625
int size = formattingElements.size();
618626
if (size > 0)
@@ -623,6 +631,16 @@ Element removeLastFormattingElement() {
623631

624632
// active formatting elements
625633
void pushActiveFormattingElements(Element in) {
634+
this.checkActiveFormattingElements(in);
635+
formattingElements.add(in);
636+
}
637+
638+
void pushWithBookmark(Element in,int bookmark){
639+
this.checkActiveFormattingElements(in);
640+
formattingElements.add(bookmark, in);
641+
}
642+
643+
void checkActiveFormattingElements(Element in){
626644
int numSeen = 0;
627645
for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
628646
Element el = formattingElements.get(pos);
@@ -637,7 +655,6 @@ void pushActiveFormattingElements(Element in) {
637655
break;
638656
}
639657
}
640-
formattingElements.add(in);
641658
}
642659

643660
private boolean isSameFormattingElement(Element a, Element b) {

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -806,11 +806,14 @@ else if (!tb.onStack(formatEl)) {
806806
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
807807
// run-aways
808808
final int stackSize = stack.size();
809+
int bookmark = -1;
809810
for (int si = 0; si < stackSize && si < 64; si++) {
810811
el = stack.get(si);
811812
if (el == formatEl) {
812813
commonAncestor = stack.get(si - 1);
813814
seenFormattingElement = true;
815+
// Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
816+
bookmark = tb.positionOfElement(el);
814817
} else if (seenFormattingElement && tb.isSpecial(el)) {
815818
furthestBlock = el;
816819
break;
@@ -822,8 +825,6 @@ else if (!tb.onStack(formatEl)) {
822825
return true;
823826
}
824827

825-
// todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
826-
// does that mean: int pos of format el in list?
827828
Element node = furthestBlock;
828829
Element lastNode = furthestBlock;
829830
for (int j = 0; j < 3; j++) {
@@ -843,8 +844,9 @@ else if (!tb.onStack(formatEl)) {
843844

844845
//noinspection StatementWithEmptyBody
845846
if (lastNode == furthestBlock) {
846-
// todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
847+
// move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
847848
// not getting how this bookmark both straddles the element above, but is inbetween here...
849+
bookmark = tb.positionOfElement(node) + 1;
848850
}
849851
if (lastNode.parent() != null)
850852
lastNode.remove();
@@ -871,7 +873,8 @@ else if (!tb.onStack(formatEl)) {
871873
}
872874
furthestBlock.appendChild(adopter);
873875
tb.removeFromActiveFormattingElements(formatEl);
874-
// todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
876+
// insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
877+
tb.pushWithBookmark(adopter, bookmark);
875878
tb.removeFromStack(formatEl);
876879
tb.insertOnStackAfter(furthestBlock, adopter);
877880
}

src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.jsoup.parser;
22

3+
import org.jsoup.Jsoup;
34
import org.jsoup.parser.HtmlTreeBuilderState.Constants;
45
import org.junit.jupiter.api.Test;
56

@@ -46,4 +47,54 @@ public void ensureArraysAreSorted() {
4647
assertEquals(38, constants.size());
4748
}
4849

50+
51+
@Test
52+
public void nestedAnchorElements01() {
53+
String html = "<html>\n" +
54+
" <body>\n" +
55+
" <a href='#1'>\n" +
56+
" <div>\n" +
57+
" <a href='#2'>child</a>\n" +
58+
" </div>\n" +
59+
" </a>\n" +
60+
" </body>\n" +
61+
"</html>";
62+
String s = Jsoup.parse(html).toString();
63+
assertEquals("<html> \n" +
64+
" <head></head>\n" +
65+
" <body> <a href=\"#1\"> </a>\n" +
66+
" <div>\n" +
67+
" <a href=\"#1\"> </a><a href=\"#2\">child</a> \n" +
68+
" </div> \n" +
69+
" </body>\n" +
70+
"</html>", s);
71+
}
72+
73+
@Test
74+
public void nestedAnchorElements02() {
75+
String html = "<html>\n" +
76+
" <body>\n" +
77+
" <a href='#1'>\n" +
78+
" <div>\n" +
79+
" <div>\n" +
80+
" <a href='#2'>child</a>\n" +
81+
" </div>\n" +
82+
" </div>\n" +
83+
" </a>\n" +
84+
" </body>\n" +
85+
"</html>";
86+
String s = Jsoup.parse(html).toString();
87+
assertEquals("<html> \n" +
88+
" <head></head>\n" +
89+
" <body> <a href=\"#1\"> </a>\n" +
90+
" <div>\n" +
91+
" <a href=\"#1\"> </a>\n" +
92+
" <div>\n" +
93+
" <a href=\"#1\"> </a><a href=\"#2\">child</a> \n" +
94+
" </div> \n" +
95+
" </div> \n" +
96+
" </body>\n" +
97+
"</html>", s);
98+
}
99+
49100
}

0 commit comments

Comments
 (0)