Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2574,7 +2574,8 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
string originalDoneLabel = doneLabel;

// Save off pos. We'll need to reset this upon successful completion of the lookaround.
string startingPos = ReserveName((node.Options & RegexOptions.RightToLeft) != 0 ? "negativelookbehind_starting_pos" : "negativelookahead_starting_pos");
string variablePrefix = (node.Options & RegexOptions.RightToLeft) != 0 ? "negativelookbehind_" : "negativelookahead_";
string startingPos = ReserveName($"{variablePrefix}_starting_pos");
writer.WriteLine($"int {startingPos} = pos;");
int startingSliceStaticPos = sliceStaticPos;

Expand All @@ -2585,8 +2586,30 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
// technically backtracking, it's appropriate to have a timeout check.
EmitTimeoutCheckIfNeeded(writer, rm);

// Emit the child.
RegexNode child = node.Child(0);

// Ensure we're able to uncapture anything captured by the child.
bool isInLoop = false;
string? capturePos = null;
bool hasCaptures = rm.Analysis.MayContainCapture(child);
if (hasCaptures)
{
// If we're inside a loop, push the current crawl position onto the stack,
// so that each iteration tracks its own value. Otherwise, store it into a local.
isInLoop = rm.Analysis.IsInLoop(node);
if (isInLoop)
{
EmitStackPush("base.Crawlpos()");
}
else
{
capturePos = ReserveName($"{variablePrefix}_capture_pos");
additionalDeclarations.Add($"int {capturePos} = 0;");
writer.WriteLine($"{capturePos} = base.Crawlpos();");
}
}

// Emit the child.
if (rm.Analysis.MayBacktrack(child))
{
// Lookarounds are implicitly atomic, so we need to emit the node as atomic if it might backtrack.
Expand All @@ -2611,6 +2634,12 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
SliceInputSpan();
sliceStaticPos = startingSliceStaticPos;

// And uncapture anything if necessary. Negative lookaround captures don't persist beyond the lookaround.
if (hasCaptures)
{
EmitUncaptureUntil(isInLoop ? StackPop() : capturePos!);
}

doneLabel = originalDoneLabel;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2577,8 +2577,41 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
// technically backtracking, it's appropriate to have a timeout check.
EmitTimeoutCheckIfNeeded();

// Emit the child.
RegexNode child = node.Child(0);

// Ensure we're able to uncapture anything captured by the child.
// Note that this differs ever so slightly from the source generator. The source
// generator only defines a local for capturePos if not in a loop (as it calls to a helper
// method where the argument acts implicitly as a local), but the compiler
// needs to store the popped stack value somewhere so that it can repeatedly compare
// that value against Crawlpos, so capturePos is always declared if there are captures.
bool isInLoop = false;
LocalBuilder? capturePos = analysis.MayContainCapture(child) ? DeclareInt32() : null;
if (capturePos is not null)
{
// If we're inside a loop, push the current crawl position onto the stack,
// so that each iteration tracks its own value. Otherwise, store it into a local.
isInLoop = analysis.IsInLoop(node);
if (isInLoop)
{
EmitStackResizeIfNeeded(1);
EmitStackPush(() =>
{
// base.Crawlpos();
Ldthis();
Call(s_crawlposMethod);
});
}
else
{
// capturePos = base.Crawlpos();
Ldthis();
Call(s_crawlposMethod);
Stloc(capturePos);
}
}

// Emit the child.
if (analysis.MayBacktrack(child))
{
// Lookarounds are implicitly atomic, so we need to emit the node as atomic if it might backtrack.
Expand Down Expand Up @@ -2608,6 +2641,20 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
SliceInputSpan();
sliceStaticPos = startingTextSpanPos;

// And uncapture anything if necessary. Negative lookaround captures don't persist beyond the lookaround.
if (capturePos is not null)
{
if (isInLoop)
{
// capturepos = base.runstack[--stackpos];
EmitStackPop();
Stloc(capturePos);
}

// while (base.Crawlpos() > capturepos) base.Uncapture();
EmitUncaptureUntil(capturePos);
}

doneLabel = originalDoneLabel;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ public static IEnumerable<object[]> Match_MemberData()
{
// Zero-width negative lookahead assertion
yield return (@"abc(?!XXX)\w+", "abcXXXdef", RegexOptions.None, 0, 9, false, string.Empty);
yield return (@"(?!(b)b)\1", "ba", RegexOptions.None, 0, 2, false, string.Empty);
yield return (@"(?:(?!(b)b)\1a)+", "babababa", RegexOptions.None, 0, 8, false, string.Empty);
yield return (@"(?:(?!(b)b)\1a)*", "babababa", RegexOptions.None, 0, 8, true, string.Empty);
yield return (@"(.*?)a(?!(a+)b\2c)\2(.*)", "baaabaac", RegexOptions.None, 0, 8, false, string.Empty);

// Zero-width positive lookbehind assertion
yield return (@"(\w){6}(?<=XXX)def", "abcXXXdef", RegexOptions.None, 0, 9, true, "abcXXXdef");
Expand All @@ -114,6 +118,8 @@ public static IEnumerable<object[]> Match_MemberData()
yield return (@"(?<=\d?)a{4}", "123aaaaaaaaa", RegexOptions.None, 0, 12, true, "aaaa");
yield return (@"(?<=a{3,5}[ab]*)1234", "aaaaaaa1234", RegexOptions.None, 0, 11, true, "1234");
yield return (@"(\w)*?3(?<=33)$", "1233", RegexOptions.None, 0, 4, true, "1233");
yield return (@"(?=(\d))4\1", "44", RegexOptions.None, 0, 2, true, "44");
yield return (@"(?=(\d))4\1", "43", RegexOptions.None, 0, 2, false, "");

// Zero-width negative lookbehind assertion: Actual - "(\\w){6}(?<!XXX)def"
yield return (@"(\w){6}(?<!XXX)def", "XXXabcdef", RegexOptions.None, 0, 9, true, "XXXabcdef");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stephentoub are we covered for testing analogous case for negative lookbehind? eg., (?<!(b)a)\1 shouldn't match bb

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not from a test perspective; the src fix covers both directions. Feel free to submit a pr.

Expand Down