1- import pickle
2- import json
31import itertools
2+ import json
3+ import pickle
44import re
55import sys
6- from collections import namedtuple
6+ from dataclasses import dataclass
7+ from typing import List , Match , Optional , Pattern , Union
78
8- from .._types import Logs , Benchmark , Block
9+ from .._types import Benchmark , Block , Logs
910
10- _RE_REGION_INFO = re .compile (r'EVENT:.*ProcessDag.*"name": "(?P<name>[^"]*)"' )
11+ _REGION_DELIMITER = 'INFO: ********** Opt Scheduling **********'
12+ _RE_REGION_DELIMITER = re .compile (re .escape (_REGION_DELIMITER ))
1113
1214
1315def import_main (parsefn , * , description ):
@@ -24,18 +26,39 @@ def import_main(parsefn, *, description):
2426 pickle .dump (result , f )
2527
2628
27- def parse_multi_bench_file (logtext , * , benchstart , filename = None ):
29+ def parse_multi_bench_file (logtext : str , * , benchstart : Union [Pattern , str ], filename : Optional [Union [Pattern , str ]] = None ):
30+ if filename is not None :
31+ filename = re .compile (filename )
32+ benchstart = re .compile (benchstart )
33+
34+ def parse_bench (benchm : Match , nextm : Union [Match , _DummyEnd ], is_first : bool = False ):
35+ # The RE can specify any extra properties.
36+ info = benchm .groupdict ()
37+ # If this is the first benchmark in the file, we want to start from the
38+ # start of the file so that we don't lose any information.
39+ start = 0 if is_first else benchm .start ()
40+ end = nextm .start ()
41+ return _parse_benchmark (info , logtext ,
42+ start , end ,
43+ filenamere = filename )
44+
45+ bench_matches = list (benchstart .finditer (logtext ))
2846 benchmarks = []
29- for benchm , nextm in _splititer (benchstart , logtext ):
30- bench = _parse_benchmark (benchm .groupdict (), logtext ,
31- benchm .end (), nextm .start (),
32- filenamere = filename )
33- benchmarks .append (bench )
47+
48+ is_first : bool = True
49+ for benchm , nextm in zip (
50+ bench_matches ,
51+ [* bench_matches [1 :], _DummyEnd (len (logtext ))]
52+ ):
53+ benchmarks .append (parse_bench (benchm , nextm , is_first ))
54+ is_first = False
3455
3556 return Logs (benchmarks )
3657
3758
38- def parse_single_bench_file (logtext , * , benchname , filename = None ):
59+ def parse_single_bench_file (logtext , * , benchname , filename : Optional [Union [Pattern , str ]] = None ):
60+ if filename is not None :
61+ filename = re .compile (filename )
3962 return Logs ([
4063 _parse_benchmark (
4164 {'name' : benchname },
@@ -45,21 +68,10 @@ def parse_single_bench_file(logtext, *, benchname, filename=None):
4568 ])
4669
4770
48- _FileInfo = namedtuple ('_FileInfo' , ('filename' , 'from_pos' ))
49-
50-
51- def _each_cons (iterable , n ):
52- '''
53- Iterates over each consecutive n items of the iterable.
54-
55- _each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4)
56- '''
57- iters = [None ] * n
58- iters [0 ] = iter (iterable )
59- for i in range (1 , n ):
60- iters [i - 1 ], iters [i ] = itertools .tee (iters [i - 1 ])
61- next (iters [i ], None )
62- return zip (* iters )
71+ @dataclass
72+ class _FileInfo :
73+ filename : Optional [str ]
74+ from_pos : int
6375
6476
6577class _DummyEnd :
@@ -73,65 +85,68 @@ def end(self):
7385 return self ._end
7486
7587
76- def _splititer (regex , text , pos = 0 , endpos = None ):
77- '''
78- 'Splits' the string by the regular expression, using an iterable.
79- Returns both where the regex matches and where it matched next (or the end).
80- '''
81- if endpos is None :
82- endpos = len (text ) - 1
88+ def _filename_info (filenamere : Optional [Pattern ], logtext : str , start : int , end : int ) -> List [_FileInfo ]:
89+ if filenamere is None :
90+ filenamere = re .compile (r'.^' ) # RE that doesn't match anything
91+ files = []
8392
84- return _each_cons (
85- itertools .chain (regex .finditer (text , pos , endpos ),
86- (_DummyEnd (endpos + 1 ),)),
87- 2
88- )
93+ for filem in filenamere .finditer (logtext , start , end ):
94+ filename = filem .group (1 )
95+ filestart = filem .end ()
96+ files .append (_FileInfo (filename = filename , from_pos = filestart ))
8997
98+ return files
9099
91- def _parse_benchmark (info , logtext : str , start , end , * , filenamere ):
92- NAME = info ['name' ]
100+
101+ def _parse_benchmark (info : dict , logtext : str , start : int , end : int , * , filenamere : Optional [Pattern ]):
102+ BENCHNAME = info ['name' ]
93103
94104 blocks = []
95105
96- if filenamere and filenamere .search (logtext , start , end ):
97- files = [
98- * (_FileInfo (filename = r .group (1 ), from_pos = r .end ())
99- for r in filenamere .finditer (logtext , start , end )),
100- _FileInfo (filename = None , from_pos = len (logtext )),
101- ][::- 1 ]
102- else :
103- files = [
104- _FileInfo (filename = None , from_pos = start ),
105- _FileInfo (filename = None , from_pos = len (logtext )),
106- ][::- 1 ]
106+ files : List [_FileInfo ] = _filename_info (filenamere , logtext , start , end )
107+ if not files :
108+ # We have an unknown file starting from the very beginning
109+ files = [_FileInfo (filename = None , from_pos = start )]
110+
111+ # Allow us to peek ahead by giving a dummy "file" at the end which will never match a block
112+ files .append (_FileInfo (filename = None , from_pos = end ))
113+ assert len (files ) >= 2
114+ file_pos = 0
115+
116+ block_matches1 , block_matches2 = itertools .tee (_RE_REGION_DELIMITER .finditer (logtext , start , end ))
117+ next (block_matches2 ) # Drop first
118+ block_matches2 = itertools .chain (block_matches2 , (_DummyEnd (end ),))
107119
108120 blocks = []
109121
110- for regionm , nextm in _splititer (_RE_REGION_INFO , logtext , start , end ):
111- assert regionm .end () > files [- 1 ].from_pos
112- if regionm .end () > files [- 2 ].from_pos :
113- files .pop ()
122+ is_first = True
123+ for regionm , nextm in zip (block_matches1 , block_matches2 ):
124+ region_start = regionm .end ()
125+ if region_start > files [file_pos + 1 ].from_pos :
126+ file_pos += 1
127+
128+ assert region_start > files [file_pos ].from_pos
114129
115- try :
116- filename = files [- 1 ].filename
117- except NameError :
118- filename = None
130+ filename = files [file_pos ].filename if files [file_pos ] else None
119131
120132 regioninfo = {
121- 'name' : regionm ['name' ],
122133 'file' : filename ,
123- 'benchmark' : NAME ,
134+ 'benchmark' : BENCHNAME ,
124135 }
125- block = _parse_block (regioninfo , logtext ,
126- regionm .start () - 1 , nextm .start ())
127- blocks .append (block )
136+ blk_start = start if is_first else regionm .start ()
137+ blk_end = nextm .start ()
138+ blocks .append (_parse_block (regioninfo , logtext ,
139+ blk_start , blk_end ))
140+ is_first = False
128141
129142 return Benchmark (info , blocks )
130143
131144
132145def _parse_block (info , logtext : str , start , end ):
133146 events = _parse_events (logtext , start , end )
134147 raw_log = logtext [start :end ]
148+ assert 'ProcessDag' in events
149+ info ['name' ] = events ['ProcessDag' ][0 ]['name' ]
135150
136151 return Block (info , raw_log , events )
137152
0 commit comments