Lib/sre_compile.py | 91 +++++++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 0867200a59..dc260a8cec 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -67,16 +67,23 @@ _ignorecase_fixes = {i: tuple(j for j in t if i != j) for t in _equivalences for i in t} +class CompileData: + __slots__ = ('code', 'repeat_count') + def __init__(self): + self.code = [] + self.repeat_count = 0 + def _combine_flags(flags, add_flags, del_flags, TYPE_FLAGS=sre_parse.TYPE_FLAGS): if add_flags & TYPE_FLAGS: flags &= ~TYPE_FLAGS return (flags | add_flags) & ~del_flags -def _compile(code, pattern, flags): +def _compile(data, pattern, flags): # internal: compile a (sub)pattern + code = data.code emit = code.append - _len = len + code_len = code.__len__ LITERAL_CODES = _LITERAL_CODES REPEATING_CODES = _REPEATING_CODES SUCCESS_CODES = _SUCCESS_CODES @@ -113,14 +120,14 @@ def _compile(code, pattern, flags): emit(lo) else: emit(IN_UNI_IGNORE) - skip = _len(code); emit(0) + skip = code_len(); emit(0) if op is NOT_LITERAL: emit(NEGATE) for k in (lo,) + fixes[lo]: emit(LITERAL) emit(k) emit(FAILURE) - code[skip] = _len(code) - skip + code[skip] = code_len() - skip elif op is IN: charset, hascased = _optimize_charset(av, iscased, tolower, fixes) if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: @@ -131,9 +138,9 @@ def _compile(code, pattern, flags): emit(IN_IGNORE) else: emit(IN_UNI_IGNORE) - skip = _len(code); emit(0) + skip = code_len(); emit(0) _compile_charset(charset, flags, code) - code[skip] = _len(code) - skip + code[skip] = code_len() - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: emit(ANY_ALL) @@ -144,19 +151,19 @@ def _compile(code, pattern, flags): raise error("internal: unsupported template operator %r" % (op,)) if _simple(av[2]): emit(REPEATING_CODES[op][2]) - skip = _len(code); emit(0) + skip = code_len(); emit(0) emit(av[0]) emit(av[1]) - _compile(code, av[2], flags) + _compile(data, av[2], flags) emit(SUCCESS) - code[skip] = _len(code) - skip + code[skip] = code_len() - skip else: emit(REPEATING_CODES[op][0]) - skip = _len(code); emit(0) + skip = code_len(); emit(0) emit(av[0]) emit(av[1]) - _compile(code, av[2], flags) - code[skip] = _len(code) - skip + _compile(data, av[2], flags) + code[skip] = code_len() - skip emit(REPEATING_CODES[op][1]) elif op is SUBPATTERN: group, add_flags, del_flags, p = av @@ -164,7 +171,7 @@ def _compile(code, pattern, flags): emit(MARK) emit((group-1)*2) # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags)) - _compile(code, p, _combine_flags(flags, add_flags, del_flags)) + _compile(data, p, _combine_flags(flags, add_flags, del_flags)) if group: emit(MARK) emit((group-1)*2+1) @@ -175,15 +182,15 @@ def _compile(code, pattern, flags): # operations within the Atomic Group to stop eating and # pop their stack if they reach it emit(ATOMIC_GROUP) - skip = _len(code); emit(0) - _compile(code, av, flags) + skip = code_len(); emit(0) + _compile(data, av, flags) emit(SUCCESS) - code[skip] = _len(code) - skip + code[skip] = code_len() - skip elif op in SUCCESS_CODES: emit(op) elif op in ASSERT_CODES: emit(op) - skip = _len(code); emit(0) + skip = code_len(); emit(0) if av[0] >= 0: emit(0) # look ahead else: @@ -191,15 +198,15 @@ def _compile(code, pattern, flags): if lo != hi: raise error("look-behind requires fixed-width pattern") emit(lo) # look behind - _compile(code, av[1], flags) + _compile(data, av[1], flags) emit(SUCCESS) - code[skip] = _len(code) - skip + code[skip] = code_len() - skip elif op is CALL: emit(op) - skip = _len(code); emit(0) - _compile(code, av, flags) + skip = code_len(); emit(0) + _compile(data, av, flags) emit(SUCCESS) - code[skip] = _len(code) - skip + code[skip] = code_len() - skip elif op is AT: emit(op) if flags & SRE_FLAG_MULTILINE: @@ -214,15 +221,15 @@ def _compile(code, pattern, flags): tail = [] tailappend = tail.append for av in av[1]: - skip = _len(code); emit(0) + skip = code_len(); emit(0) # _compile_info(code, av, flags) - _compile(code, av, flags) + _compile(data, av, flags) emit(JUMP) - tailappend(_len(code)); emit(0) - code[skip] = _len(code) - skip + tailappend(code_len()); emit(0) + code[skip] = code_len() - skip emit(FAILURE) # end of branch for tail in tail: - code[tail] = _len(code) - tail + code[tail] = code_len() - tail elif op is CATEGORY: emit(op) if flags & SRE_FLAG_LOCALE: @@ -243,16 +250,16 @@ def _compile(code, pattern, flags): elif op is GROUPREF_EXISTS: emit(op) emit(av[0]-1) - skipyes = _len(code); emit(0) - _compile(code, av[1], flags) + skipyes = code_len(); emit(0) + _compile(data, av[1], flags) if av[2]: emit(JUMP) - skipno = _len(code); emit(0) - code[skipyes] = _len(code) - skipyes + 1 - _compile(code, av[2], flags) - code[skipno] = _len(code) - skipno + skipno = code_len(); emit(0) + code[skipyes] = code_len() - skipyes + 1 + _compile(data, av[2], flags) + code[skipno] = code_len() - skipno else: - code[skipyes] = _len(code) - skipyes + 1 + code[skipyes] = code_len() - skipyes + 1 else: raise error("internal: unsupported operand type %r" % (op,)) @@ -608,17 +615,17 @@ def isstring(obj): def _code(p, flags): flags = p.state.flags | flags - code = [] + data = CompileData() # compile info block - _compile_info(code, p, flags) + _compile_info(data.code, p, flags) # compile the pattern - _compile(code, p.data, flags) + _compile(data, p.data, flags) - code.append(SUCCESS) + data.code.append(SUCCESS) - return code + return data def _hex_code(code): return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code) @@ -781,11 +788,11 @@ def compile(p, flags=0): else: pattern = None - code = _code(p, flags) + data = _code(p, flags) if flags & SRE_FLAG_DEBUG: print() - dis(code) + dis(data.code) # map in either direction groupindex = p.state.groupdict @@ -794,7 +801,7 @@ def compile(p, flags=0): indexgroup[i] = k return _sre.compile( - pattern, flags | p.state.flags, code, + pattern, flags | p.state.flags, data.code, p.state.groups-1, groupindex, tuple(indexgroup) )