@@ -2044,6 +2044,43 @@ private RegexNode ReduceLookaround()
2044
2044
Debug . Assert ( Kind is RegexNodeKind . PositiveLookaround or RegexNodeKind . NegativeLookaround ) ;
2045
2045
Debug . Assert ( ChildCount ( ) == 1 ) ;
2046
2046
2047
+ // Captures inside of negative lookarounds are undone after the lookaround. Thus, if there's nothing
2048
+ // inside of the negative lookaround that needs that capture group (namely a backreference), we can
2049
+ // remove the capture.
2050
+ if ( Kind is RegexNodeKind . NegativeLookaround && ContainsBackreference ( Child ( 0 ) ) is false )
2051
+ {
2052
+ if ( RemoveCaptures ( this , 0 ) )
2053
+ {
2054
+ // If we removed captures, we may have changed the structure of the tree in a way that exposed more
2055
+ // optimization possibility, so re-reduce the children.
2056
+ ReplaceChild ( 0 , Child ( 0 ) ) ;
2057
+ }
2058
+
2059
+ static bool RemoveCaptures ( RegexNode parent , int nodeIndex )
2060
+ {
2061
+ RegexNode node = parent . Child ( nodeIndex ) ;
2062
+
2063
+ if ( node . Kind is RegexNodeKind . Capture )
2064
+ {
2065
+ parent . ReplaceChild ( nodeIndex , node . Child ( 0 ) ) ;
2066
+ RemoveCaptures ( parent , nodeIndex ) ;
2067
+ return true ;
2068
+ }
2069
+
2070
+ bool changesMade = false ;
2071
+ if ( StackHelper . TryEnsureSufficientExecutionStack ( ) )
2072
+ {
2073
+ int childCount = node . ChildCount ( ) ;
2074
+ for ( int i = 0 ; i < childCount ; i ++ )
2075
+ {
2076
+ changesMade |= RemoveCaptures ( node , i ) ;
2077
+ }
2078
+ }
2079
+
2080
+ return changesMade ;
2081
+ }
2082
+ }
2083
+
2047
2084
// A lookaround is a zero-width atomic assertion.
2048
2085
// As it's atomic, nothing will backtrack into it, and we can
2049
2086
// eliminate any ending backtracking from it.
@@ -2066,6 +2103,32 @@ private RegexNode ReduceLookaround()
2066
2103
return this ;
2067
2104
}
2068
2105
2106
+ /// <summary>Gets whether the node contains a backreference anywhere in its tree.</summary>
2107
+ private static bool ? ContainsBackreference ( RegexNode node )
2108
+ {
2109
+ if ( node . Kind is RegexNodeKind . Backreference or RegexNodeKind . BackreferenceConditional )
2110
+ {
2111
+ return true ;
2112
+ }
2113
+
2114
+ if ( ! StackHelper . TryEnsureSufficientExecutionStack ( ) )
2115
+ {
2116
+ // If we can't recur further, just stop optimizing.
2117
+ return null ;
2118
+ }
2119
+
2120
+ int childCount = node . ChildCount ( ) ;
2121
+ for ( int i = 0 ; i < childCount ; i ++ )
2122
+ {
2123
+ if ( ContainsBackreference ( node . Child ( i ) ) is true )
2124
+ {
2125
+ return true ;
2126
+ }
2127
+ }
2128
+
2129
+ return false ;
2130
+ }
2131
+
2069
2132
/// <summary>Optimizations for backreference conditionals.</summary>
2070
2133
private RegexNode ReduceBackreferenceConditional ( )
2071
2134
{
0 commit comments