@@ -231,63 +231,65 @@ code (meta_extra_lengths, just below) must be updated to remain in step. */
231231#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
232232#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
233233#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
234- #define META_DOLLAR 0x80160000u /* $ metacharacter */
235- #define META_DOT 0x80170000u /* . metacharacter */
236- #define META_ESCAPE 0x80180000u /* \d and friends */
237- #define META_KET 0x80190000u /* closing parenthesis */
238- #define META_NOCAPTURE 0x801a0000u /* no capture parens */
239- #define META_OPTIONS 0x801b0000u /* (?i) and friends */
240- #define META_POSIX 0x801c0000u /* POSIX class item */
241- #define META_POSIX_NEG 0x801d0000u /* negative POSIX class item */
242- #define META_RANGE_ESCAPED 0x801e0000u /* range with at least one escape */
243- #define META_RANGE_LITERAL 0x801f0000u /* range defined literally */
244- #define META_RECURSE 0x80200000u /* Recursion */
245- #define META_RECURSE_BYNAME 0x80210000u /* (?&name) */
246- #define META_SCRIPT_RUN 0x80220000u /* (*script_run:...) */
234+ #define META_SCS_NAME 0x80160000u /* (*scan_substring:(<name>)... */
235+ #define META_SCS_NUMBER 0x80170000u /* (*scan_substring:(digits)... */
236+ #define META_DOLLAR 0x80180000u /* $ metacharacter */
237+ #define META_DOT 0x80190000u /* . metacharacter */
238+ #define META_ESCAPE 0x801a0000u /* \d and friends */
239+ #define META_KET 0x801b0000u /* closing parenthesis */
240+ #define META_NOCAPTURE 0x801c0000u /* no capture parens */
241+ #define META_OPTIONS 0x801d0000u /* (?i) and friends */
242+ #define META_POSIX 0x801e0000u /* POSIX class item */
243+ #define META_POSIX_NEG 0x801f0000u /* negative POSIX class item */
244+ #define META_RANGE_ESCAPED 0x80200000u /* range with at least one escape */
245+ #define META_RANGE_LITERAL 0x80210000u /* range defined literally */
246+ #define META_RECURSE 0x80220000u /* Recursion */
247+ #define META_RECURSE_BYNAME 0x80230000u /* (?&name) */
248+ #define META_SCRIPT_RUN 0x80240000u /* (*script_run:...) */
247249
248250/* These must be kept together to make it easy to check that an assertion
249251is present where expected in a conditional group. */
250252
251- #define META_LOOKAHEAD 0x80230000u /* (?= */
252- #define META_LOOKAHEADNOT 0x80240000u /* (?! */
253- #define META_LOOKBEHIND 0x80250000u /* (?<= */
254- #define META_LOOKBEHINDNOT 0x80260000u /* (?<! */
253+ #define META_LOOKAHEAD 0x80250000u /* (?= */
254+ #define META_LOOKAHEADNOT 0x80260000u /* (?! */
255+ #define META_LOOKBEHIND 0x80270000u /* (?<= */
256+ #define META_LOOKBEHINDNOT 0x80280000u /* (?<! */
255257
256258/* These cannot be conditions */
257259
258- #define META_LOOKAHEAD_NA 0x80270000u /* (*napla: */
259- #define META_LOOKBEHIND_NA 0x80280000u /* (*naplb: */
260+ #define META_LOOKAHEAD_NA 0x80290000u /* (*napla: */
261+ #define META_LOOKBEHIND_NA 0x802a0000u /* (*naplb: */
260262
261263/* These must be kept in this order, with consecutive values, and the _ARG
262264versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
263265versions. */
264266
265- #define META_MARK 0x80290000u /* (*MARK) */
266- #define META_ACCEPT 0x802a0000u /* (*ACCEPT) */
267- #define META_FAIL 0x802b0000u /* (*FAIL) */
268- #define META_COMMIT 0x802c0000u /* These */
269- #define META_COMMIT_ARG 0x802d0000u /* pairs */
270- #define META_PRUNE 0x802e0000u /* must */
271- #define META_PRUNE_ARG 0x802f0000u /* be */
272- #define META_SKIP 0x80300000u /* kept */
273- #define META_SKIP_ARG 0x80310000u /* in */
274- #define META_THEN 0x80320000u /* this */
275- #define META_THEN_ARG 0x80330000u /* order */
267+ #define META_MARK 0x802b0000u /* (*MARK) */
268+ #define META_ACCEPT 0x802c0000u /* (*ACCEPT) */
269+ #define META_FAIL 0x802d0000u /* (*FAIL) */
270+ #define META_COMMIT 0x802e0000u /* These */
271+ #define META_COMMIT_ARG 0x802f0000u /* pairs */
272+ #define META_PRUNE 0x80300000u /* must */
273+ #define META_PRUNE_ARG 0x80310000u /* be */
274+ #define META_SKIP 0x80320000u /* kept */
275+ #define META_SKIP_ARG 0x80330000u /* in */
276+ #define META_THEN 0x80340000u /* this */
277+ #define META_THEN_ARG 0x80350000u /* order */
276278
277279/* These must be kept in groups of adjacent 3 values, and all together. */
278280
279- #define META_ASTERISK 0x80340000u /* * */
280- #define META_ASTERISK_PLUS 0x80350000u /* *+ */
281- #define META_ASTERISK_QUERY 0x80360000u /* *? */
282- #define META_PLUS 0x80370000u /* + */
283- #define META_PLUS_PLUS 0x80380000u /* ++ */
284- #define META_PLUS_QUERY 0x80390000u /* +? */
285- #define META_QUERY 0x803a0000u /* ? */
286- #define META_QUERY_PLUS 0x803b0000u /* ?+ */
287- #define META_QUERY_QUERY 0x803c0000u /* ?? */
288- #define META_MINMAX 0x803d0000u /* {n,m} repeat */
289- #define META_MINMAX_PLUS 0x803e0000u /* {n,m}+ repeat */
290- #define META_MINMAX_QUERY 0x803f0000u /* {n,m}? repeat */
281+ #define META_ASTERISK 0x80360000u /* * */
282+ #define META_ASTERISK_PLUS 0x80370000u /* *+ */
283+ #define META_ASTERISK_QUERY 0x80380000u /* *? */
284+ #define META_PLUS 0x80390000u /* + */
285+ #define META_PLUS_PLUS 0x803a0000u /* ++ */
286+ #define META_PLUS_QUERY 0x803b0000u /* +? */
287+ #define META_QUERY 0x803c0000u /* ? */
288+ #define META_QUERY_PLUS 0x803d0000u /* ?+ */
289+ #define META_QUERY_QUERY 0x803e0000u /* ?? */
290+ #define META_MINMAX 0x803f0000u /* {n,m} repeat */
291+ #define META_MINMAX_PLUS 0x80400000u /* {n,m}+ repeat */
292+ #define META_MINMAX_QUERY 0x80410000u /* {n,m}? repeat */
291293
292294#define META_FIRST_QUANTIFIER META_ASTERISK
293295#define META_LAST_QUANTIFIER META_MINMAX_QUERY
@@ -326,6 +328,8 @@ static unsigned char meta_extra_lengths[] = {
326328 1 + SIZEOFFSET , /* META_COND_RNAME */
327329 1 + SIZEOFFSET , /* META_COND_RNUMBER */
328330 3 , /* META_COND_VERSION */
331+ 1 + SIZEOFFSET , /* META_SCS_NAME */
332+ 1 + SIZEOFFSET , /* META_SCS_NUMBER */
329333 0 , /* META_DOLLAR */
330334 0 , /* META_DOT */
331335 0 , /* META_ESCAPE - one more for ESC_P and ESC_p */
@@ -656,6 +660,8 @@ static const char alasnames[] =
656660 STRING_non_atomic_positive_lookbehind0
657661 STRING_negative_lookahead0
658662 STRING_negative_lookbehind0
663+ STRING_scs0
664+ STRING_scan_substring0
659665 STRING_atomic0
660666 STRING_sr0
661667 STRING_asr0
@@ -675,6 +681,8 @@ static const alasitem alasmeta[] = {
675681 { 30 , META_LOOKBEHIND_NA },
676682 { 18 , META_LOOKAHEADNOT },
677683 { 19 , META_LOOKBEHINDNOT },
684+ { 3 , META_SCS_NUMBER }, /* placeholder, updated later */
685+ { 14 , META_SCS_NUMBER }, /* placeholder, updated later */
678686 { 6 , META_ATOMIC },
679687 { 2 , META_SCRIPT_RUN }, /* sr = script run */
680688 { 3 , META_ATOMIC_SCRIPT_RUN }, /* asr = atomic script run */
@@ -1152,6 +1160,19 @@ for (;;)
11521160 fprintf (stderr , "%zd" , offset );
11531161 break ;
11541162
1163+ case META_SCS_NAME :
1164+ fprintf (stderr , "META (*scan_substring:(<name>) length=%d offset=" , * pptr ++ );
1165+ GETOFFSET (offset , pptr );
1166+ fprintf (stderr , "%zd" , offset );
1167+ break ;
1168+
1169+ case META_SCS_NUMBER :
1170+ fprintf (stderr , "META_SCS_NUMBER %d offset=" , pptr [SIZEOFFSET ]);
1171+ GETOFFSET (offset , pptr );
1172+ fprintf (stderr , "%zd" , offset );
1173+ pptr ++ ;
1174+ break ;
1175+
11551176 case META_MARK :
11561177 fprintf (stderr , "META (*MARK:" );
11571178 goto SHOWARG ;
@@ -4053,6 +4074,67 @@ while (ptr < ptrend)
40534074 case META_LOOKAHEADNOT :
40544075 goto NEGATIVE_LOOK_AHEAD ;
40554076
4077+ case META_SCS_NUMBER :
4078+ nest_depth ++ ;
4079+
4080+ if (++ ptr >= ptrend ) goto UNCLOSED_PARENTHESIS ;
4081+
4082+ if (* ptr != CHAR_LEFT_PARENTHESIS )
4083+ {
4084+ errorcode = ERR15 ;
4085+ goto FAILED ;
4086+ }
4087+
4088+ ptr ++ ;
4089+
4090+ /* Handle (scan_substring:([+-]number)... */
4091+ if (read_number (& ptr , ptrend , cb -> bracount , MAX_GROUP_NUMBER , ERR61 ,
4092+ & i , & errorcode ))
4093+ {
4094+ if (i <= 0 )
4095+ {
4096+ errorcode = ERR15 ;
4097+ goto FAILED ;
4098+ }
4099+ * parsed_pattern ++ = META_SCS_NUMBER ;
4100+ offset = (PCRE2_SIZE )(ptr - cb -> start_pattern - 2 );
4101+ PUTOFFSET (offset , parsed_pattern );
4102+ * parsed_pattern ++ = i ;
4103+ }
4104+ else if (errorcode != 0 ) goto FAILED ; /* Number too big */
4105+ else
4106+ {
4107+ if (ptr >= ptrend ) goto UNCLOSED_PARENTHESIS ;
4108+
4109+ /* Handle (*scan_substring:('name') or (*scan_substring:(<name>) */
4110+ if (* ptr == CHAR_LESS_THAN_SIGN )
4111+ terminator = CHAR_GREATER_THAN_SIGN ;
4112+ else if (* ptr == CHAR_APOSTROPHE )
4113+ terminator = CHAR_APOSTROPHE ;
4114+ else
4115+ {
4116+ errorcode = ERR15 ;
4117+ goto FAILED ;
4118+ }
4119+
4120+ if (!read_name (& ptr , ptrend , utf , terminator , & offset , & name ,
4121+ & namelen , & errorcode , cb )) goto FAILED ;
4122+
4123+ * parsed_pattern ++ = META_SCS_NAME ;
4124+ * parsed_pattern ++ = namelen ;
4125+ PUTOFFSET (offset , parsed_pattern );
4126+ }
4127+
4128+ if (++ ptr >= ptrend ) goto UNCLOSED_PARENTHESIS ;
4129+
4130+ if (* ptr != CHAR_RIGHT_PARENTHESIS )
4131+ {
4132+ errorcode = ERR24 ;
4133+ break ;
4134+ }
4135+ ptr ++ ;
4136+ break ;
4137+
40564138 case META_LOOKBEHIND :
40574139 case META_LOOKBEHINDNOT :
40584140 case META_LOOKBEHIND_NA :
@@ -6642,7 +6724,8 @@ for (;; pptr++)
66426724 case META_COND_RNUMBER : /* (?(Rdigits) */
66436725 case META_COND_NAME : /* (?(name) or (?'name') or ?(<name>) */
66446726 case META_COND_RNAME : /* (?(R&name) - test for recursion */
6645- bravalue = OP_COND ;
6727+ case META_SCS_NAME : /* (*scan_substring:'name') or (*scan_substring:(<name>)) */
6728+ bravalue = meta == META_SCS_NAME ? OP_ASSERT_SCS : OP_COND ;
66466729 {
66476730 int count , index ;
66486731 unsigned int i ;
@@ -6736,7 +6819,9 @@ for (;; pptr++)
67366819 PUT2 (code , 2 + LINK_SIZE , index );
67376820 PUT2 (code , 2 + LINK_SIZE + IMM2_SIZE , count );
67386821 }
6739- goto GROUP_PROCESS_NOTE_EMPTY ;
6822+ if (meta != META_SCS_NAME ) goto GROUP_PROCESS_NOTE_EMPTY ;
6823+ cb -> assert_depth += 1 ;
6824+ goto GROUP_PROCESS ;
67406825
67416826 /* The DEFINE condition is always false. Its internal groups may never
67426827 be called, so matched_char must remain false, hence the jump to
@@ -6752,7 +6837,8 @@ for (;; pptr++)
67526837 /* Conditional test of a group's being set. */
67536838
67546839 case META_COND_NUMBER :
6755- bravalue = OP_COND ;
6840+ case META_SCS_NUMBER :
6841+ bravalue = meta == META_SCS_NUMBER ? OP_ASSERT_SCS : OP_COND ;
67566842 GETPLUSOFFSET (offset , pptr );
67576843 groupnumber = * (++ pptr );
67586844 if (groupnumber > cb -> bracount )
@@ -6762,11 +6848,14 @@ for (;; pptr++)
67626848 return 0 ;
67636849 }
67646850 if (groupnumber > cb -> top_backref ) cb -> top_backref = groupnumber ;
6765- offset -= 2 ; /* Point at initial ( for too many branches error */
6851+ /* Point at initial ( for too many branches error */
6852+ if (meta != META_SCS_NUMBER ) offset -= 2 ;
67666853 code [1 + LINK_SIZE ] = OP_CREF ;
67676854 skipunits = 1 + IMM2_SIZE ;
67686855 PUT2 (code , 2 + LINK_SIZE , groupnumber );
6769- goto GROUP_PROCESS_NOTE_EMPTY ;
6856+ if (meta != META_SCS_NUMBER ) goto GROUP_PROCESS_NOTE_EMPTY ;
6857+ cb -> assert_depth += 1 ;
6858+ goto GROUP_PROCESS ;
67706859
67716860 /* Test for the PCRE2 version. */
67726861
@@ -6900,7 +6989,7 @@ for (;; pptr++)
69006989
69016990 /* If we've just compiled an assertion, pop the assert depth. */
69026991
6903- if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NA )
6992+ if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERT_SCS )
69046993 cb -> assert_depth -= 1 ;
69056994
69066995 /* At the end of compiling, code is still pointing to the start of the
0 commit comments