1- import  pickle 
2- import  json 
31import  itertools 
2+ import  json 
3+ import  pickle 
44import  re 
55import  sys 
6- from  collections  import  namedtuple 
6+ from  dataclasses  import  dataclass 
7+ from  typing  import  List , Match , Optional , Pattern , Union 
78
8- from  .._types  import  Logs ,  Benchmark , Block 
9+ from  .._types  import  Benchmark , Block ,  Logs 
910
10- _RE_REGION_INFO  =  re .compile (r'EVENT:.*ProcessDag.*"name": "(?P<name>[^"]*)"' )
11+ _REGION_DELIMITER  =  'INFO: ********** Opt Scheduling **********' 
12+ _RE_REGION_DELIMITER  =  re .compile (re .escape (_REGION_DELIMITER ))
1113
1214
1315def  import_main (parsefn , * , description ):
@@ -24,18 +26,39 @@ def import_main(parsefn, *, description):
2426        pickle .dump (result , f )
2527
2628
27- def  parse_multi_bench_file (logtext , * , benchstart , filename = None ):
29+ def  parse_multi_bench_file (logtext : str , * , benchstart : Union [Pattern , str ], filename : Optional [Union [Pattern , str ]] =  None ):
30+     if  filename  is  not   None :
31+         filename  =  re .compile (filename )
32+     benchstart  =  re .compile (benchstart )
33+ 
34+     def  parse_bench (benchm : Match , nextm : Union [Match , _DummyEnd ], is_first : bool  =  False ):
35+         # The RE can specify any extra properties. 
36+         info  =  benchm .groupdict ()
37+         # If this is the first benchmark in the file, we want to start from the 
38+         # start of the file so that we don't lose any information. 
39+         start  =  0  if  is_first  else  benchm .start ()
40+         end  =  nextm .start ()
41+         return  _parse_benchmark (info , logtext ,
42+                                 start , end ,
43+                                 filenamere = filename )
44+ 
45+     bench_matches  =  list (benchstart .finditer (logtext ))
2846    benchmarks  =  []
29-     for  benchm , nextm  in  _splititer (benchstart , logtext ):
30-         bench  =  _parse_benchmark (benchm .groupdict (), logtext ,
31-                                  benchm .end (), nextm .start (),
32-                                  filenamere = filename )
33-         benchmarks .append (bench )
47+ 
48+     is_first : bool  =  True 
49+     for  benchm , nextm  in  zip (
50+             bench_matches ,
51+             [* bench_matches [1 :], _DummyEnd (len (logtext ))]
52+     ):
53+         benchmarks .append (parse_bench (benchm , nextm , is_first ))
54+         is_first  =  False 
3455
3556    return  Logs (benchmarks )
3657
3758
38- def  parse_single_bench_file (logtext , * , benchname , filename = None ):
59+ def  parse_single_bench_file (logtext , * , benchname , filename : Optional [Union [Pattern , str ]] =  None ):
60+     if  filename  is  not   None :
61+         filename  =  re .compile (filename )
3962    return  Logs ([
4063        _parse_benchmark (
4164            {'name' : benchname },
@@ -45,21 +68,10 @@ def parse_single_bench_file(logtext, *, benchname, filename=None):
4568    ])
4669
4770
48- _FileInfo  =  namedtuple ('_FileInfo' , ('filename' , 'from_pos' ))
49- 
50- 
51- def  _each_cons (iterable , n ):
52-     ''' 
53-     Iterates over each consecutive n items of the iterable. 
54- 
55-     _each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4) 
56-     ''' 
57-     iters  =  [None ] *  n 
58-     iters [0 ] =  iter (iterable )
59-     for  i  in  range (1 , n ):
60-         iters [i  -  1 ], iters [i ] =  itertools .tee (iters [i  -  1 ])
61-         next (iters [i ], None )
62-     return  zip (* iters )
71+ @dataclass  
72+ class  _FileInfo :
73+     filename : Optional [str ]
74+     from_pos : int 
6375
6476
6577class  _DummyEnd :
@@ -73,65 +85,68 @@ def end(self):
7385        return  self ._end 
7486
7587
76- def  _splititer (regex , text , pos = 0 , endpos = None ):
77-     ''' 
78-     'Splits' the string by the regular expression, using an iterable. 
79-     Returns both where the regex matches and where it matched next (or the end). 
80-     ''' 
81-     if  endpos  is  None :
82-         endpos  =  len (text ) -  1 
88+ def  _filename_info (filenamere : Optional [Pattern ], logtext : str , start : int , end : int ) ->  List [_FileInfo ]:
89+     if  filenamere  is  None :
90+         filenamere  =  re .compile (r'.^' )  # RE that doesn't match anything 
91+     files  =  []
8392
84-     return  _each_cons (
85-         itertools .chain (regex .finditer (text , pos , endpos ),
86-                         (_DummyEnd (endpos  +  1 ),)),
87-         2 
88-     )
93+     for  filem  in  filenamere .finditer (logtext , start , end ):
94+         filename  =  filem .group (1 )
95+         filestart  =  filem .end ()
96+         files .append (_FileInfo (filename = filename , from_pos = filestart ))
8997
98+     return  files 
9099
91- def  _parse_benchmark (info , logtext : str , start , end , * , filenamere ):
92-     NAME  =  info ['name' ]
100+ 
101+ def  _parse_benchmark (info : dict , logtext : str , start : int , end : int , * , filenamere : Optional [Pattern ]):
102+     BENCHNAME  =  info ['name' ]
93103
94104    blocks  =  []
95105
96-     if  filenamere  and  filenamere .search (logtext , start , end ):
97-         files  =  [
98-             * (_FileInfo (filename = r .group (1 ), from_pos = r .end ())
99-               for  r  in  filenamere .finditer (logtext , start , end )),
100-             _FileInfo (filename = None , from_pos = len (logtext )),
101-         ][::- 1 ]
102-     else :
103-         files  =  [
104-             _FileInfo (filename = None , from_pos = start ),
105-             _FileInfo (filename = None , from_pos = len (logtext )),
106-         ][::- 1 ]
106+     files : List [_FileInfo ] =  _filename_info (filenamere , logtext , start , end )
107+     if  not  files :
108+         # We have an unknown file starting from the very beginning 
109+         files  =  [_FileInfo (filename = None , from_pos = start )]
110+ 
111+     # Allow us to peek ahead by giving a dummy "file" at the end which will never match a block 
112+     files .append (_FileInfo (filename = None , from_pos = end ))
113+     assert  len (files ) >=  2 
114+     file_pos  =  0 
115+ 
116+     block_matches1 , block_matches2  =  itertools .tee (_RE_REGION_DELIMITER .finditer (logtext , start , end ))
117+     next (block_matches2 )  # Drop first 
118+     block_matches2  =  itertools .chain (block_matches2 , (_DummyEnd (end ),))
107119
108120    blocks  =  []
109121
110-     for  regionm , nextm  in  _splititer (_RE_REGION_INFO , logtext , start , end ):
111-         assert  regionm .end () >  files [- 1 ].from_pos 
112-         if  regionm .end () >  files [- 2 ].from_pos :
113-             files .pop ()
122+     is_first  =  True 
123+     for  regionm , nextm  in  zip (block_matches1 , block_matches2 ):
124+         region_start  =  regionm .end ()
125+         if  region_start  >  files [file_pos  +  1 ].from_pos :
126+             file_pos  +=  1 
127+ 
128+         assert  region_start  >  files [file_pos ].from_pos 
114129
115-         try :
116-             filename  =  files [- 1 ].filename 
117-         except  NameError :
118-             filename  =  None 
130+         filename  =  files [file_pos ].filename  if  files [file_pos ] else  None 
119131
120132        regioninfo  =  {
121-             'name' : regionm ['name' ],
122133            'file' : filename ,
123-             'benchmark' : NAME ,
134+             'benchmark' : BENCHNAME ,
124135        }
125-         block  =  _parse_block (regioninfo , logtext ,
126-                              regionm .start () -  1 , nextm .start ())
127-         blocks .append (block )
136+         blk_start  =  start  if  is_first  else  regionm .start ()
137+         blk_end  =  nextm .start ()
138+         blocks .append (_parse_block (regioninfo , logtext ,
139+                                    blk_start , blk_end ))
140+         is_first  =  False 
128141
129142    return  Benchmark (info , blocks )
130143
131144
132145def  _parse_block (info , logtext : str , start , end ):
133146    events  =  _parse_events (logtext , start , end )
134147    raw_log  =  logtext [start :end ]
148+     assert  'ProcessDag'  in  events 
149+     info ['name' ] =  events ['ProcessDag' ][0 ]['name' ]
135150
136151    return  Block (info , raw_log , events )
137152
0 commit comments