11use std:: fmt;
22use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
33
4- use tracing:: instrument;
5-
6- use super :: { Byte , Nfa , Ref , nfa} ;
4+ use super :: { Byte , Ref , Tree , Uninhabited } ;
75use crate :: Map ;
86
9- #[ derive( PartialEq , Clone , Debug ) ]
7+ #[ derive( PartialEq , Clone ) ]
108pub ( crate ) struct Dfa < R >
119where
1210 R : Ref ,
@@ -34,35 +32,15 @@ where
3432 }
3533}
3634
37- impl < R > Transitions < R >
38- where
39- R : Ref ,
40- {
41- #[ cfg( test) ]
42- fn insert ( & mut self , transition : Transition < R > , state : State ) {
43- match transition {
44- Transition :: Byte ( b) => {
45- self . byte_transitions . insert ( b, state) ;
46- }
47- Transition :: Ref ( r) => {
48- self . ref_transitions . insert ( r, state) ;
49- }
50- }
51- }
52- }
53-
5435/// The states in a `Nfa` represent byte offsets.
5536#[ derive( Hash , Eq , PartialEq , PartialOrd , Ord , Copy , Clone ) ]
56- pub ( crate ) struct State ( u32 ) ;
37+ pub ( crate ) struct State ( pub ( crate ) u32 ) ;
5738
58- #[ cfg( test) ]
59- #[ derive( Hash , Eq , PartialEq , Clone , Copy ) ]
60- pub ( crate ) enum Transition < R >
61- where
62- R : Ref ,
63- {
64- Byte ( Byte ) ,
65- Ref ( R ) ,
39+ impl State {
40+ pub ( crate ) fn new ( ) -> Self {
41+ static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
42+ Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
43+ }
6644}
6745
6846impl fmt:: Debug for State {
@@ -71,19 +49,6 @@ impl fmt::Debug for State {
7149 }
7250}
7351
74- #[ cfg( test) ]
75- impl < R > fmt:: Debug for Transition < R >
76- where
77- R : Ref ,
78- {
79- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
80- match & self {
81- Self :: Byte ( b) => b. fmt ( f) ,
82- Self :: Ref ( r) => r. fmt ( f) ,
83- }
84- }
85- }
86-
8752impl < R > Dfa < R >
8853where
8954 R : Ref ,
@@ -94,58 +59,161 @@ where
9459 let start = State :: new ( ) ;
9560 let accepting = State :: new ( ) ;
9661
97- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , accepting) ;
62+ transitions. entry ( start) . or_default ( ) . byte_transitions . insert ( Byte :: Init ( 0x00 ) , accepting) ;
9863
99- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , accepting) ;
64+ transitions. entry ( start) . or_default ( ) . byte_transitions . insert ( Byte :: Init ( 0x01 ) , accepting) ;
10065
10166 Self { transitions, start, accepting }
10267 }
10368
104- #[ instrument( level = "debug" ) ]
105- pub ( crate ) fn from_nfa ( nfa : Nfa < R > ) -> Self {
106- let Nfa { transitions : nfa_transitions, start : nfa_start, accepting : nfa_accepting } = nfa;
69+ pub ( crate ) fn unit ( ) -> Self {
70+ let transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
71+ let start = State :: new ( ) ;
72+ let accepting = start;
10773
108- let mut dfa_transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
109- let mut nfa_to_dfa: Map < nfa:: State , State > = Map :: default ( ) ;
110- let dfa_start = State :: new ( ) ;
111- nfa_to_dfa. insert ( nfa_start, dfa_start) ;
74+ Self { transitions, start, accepting }
75+ }
11276
113- let mut queue = vec ! [ ( nfa_start, dfa_start) ] ;
77+ pub ( crate ) fn from_byte ( byte : Byte ) -> Self {
78+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
79+ let start = State :: new ( ) ;
80+ let accepting = State :: new ( ) ;
11481
115- while let Some ( ( nfa_state , dfa_state ) ) = queue . pop ( ) {
116- if nfa_state == nfa_accepting {
117- continue ;
118- }
82+ transitions . entry ( start ) . or_default ( ) . byte_transitions . insert ( byte , accepting ) ;
83+
84+ Self { transitions , start , accepting }
85+ }
11986
120- for ( nfa_transition, next_nfa_states) in nfa_transitions[ & nfa_state] . iter ( ) {
121- let dfa_transitions =
122- dfa_transitions. entry ( dfa_state) . or_insert_with ( Default :: default) ;
123-
124- let mapped_state = next_nfa_states. iter ( ) . find_map ( |x| nfa_to_dfa. get ( x) . copied ( ) ) ;
125-
126- let next_dfa_state = match nfa_transition {
127- & nfa:: Transition :: Byte ( b) => * dfa_transitions
128- . byte_transitions
129- . entry ( b)
130- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
131- & nfa:: Transition :: Ref ( r) => * dfa_transitions
132- . ref_transitions
133- . entry ( r)
134- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
135- } ;
136-
137- for & next_nfa_state in next_nfa_states {
138- nfa_to_dfa. entry ( next_nfa_state) . or_insert_with ( || {
139- queue. push ( ( next_nfa_state, next_dfa_state) ) ;
140- next_dfa_state
141- } ) ;
87+ pub ( crate ) fn from_ref ( r : R ) -> Self {
88+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
89+ let start = State :: new ( ) ;
90+ let accepting = State :: new ( ) ;
91+
92+ transitions. entry ( start) . or_default ( ) . ref_transitions . insert ( r, accepting) ;
93+
94+ Self { transitions, start, accepting }
95+ }
96+
97+ pub ( crate ) fn from_tree ( tree : Tree < !, R > ) -> Result < Self , Uninhabited > {
98+ Ok ( match tree {
99+ Tree :: Byte ( b) => Self :: from_byte ( b) ,
100+ Tree :: Ref ( r) => Self :: from_ref ( r) ,
101+ Tree :: Alt ( alts) => {
102+ let mut alts = alts. into_iter ( ) . map ( Self :: from_tree) ;
103+ let mut dfa = alts. next ( ) . ok_or ( Uninhabited ) ??;
104+ for alt in alts {
105+ dfa = dfa. union ( alt?, State :: new) ;
106+ }
107+ dfa
108+ }
109+ Tree :: Seq ( elts) => {
110+ let mut dfa = Self :: unit ( ) ;
111+ for elt in elts. into_iter ( ) . map ( Self :: from_tree) {
112+ dfa = dfa. concat ( elt?) ;
142113 }
114+ dfa
143115 }
116+ } )
117+ }
118+
119+ /// Concatenate two `Dfa`s.
120+ pub ( crate ) fn concat ( self , other : Self ) -> Self {
121+ if self . start == self . accepting {
122+ return other;
123+ } else if other. start == other. accepting {
124+ return self ;
144125 }
145126
146- let dfa_accepting = nfa_to_dfa[ & nfa_accepting] ;
127+ let start = self . start ;
128+ let accepting = other. accepting ;
147129
148- Self { transitions : dfa_transitions, start : dfa_start, accepting : dfa_accepting }
130+ let mut transitions: Map < State , Transitions < R > > = self . transitions ;
131+
132+ for ( source, transition) in other. transitions {
133+ let fix_state = |state| if state == other. start { self . accepting } else { state } ;
134+ let entry = transitions. entry ( fix_state ( source) ) . or_default ( ) ;
135+ for ( edge, destination) in transition. byte_transitions {
136+ entry. byte_transitions . insert ( edge, fix_state ( destination) ) ;
137+ }
138+ for ( edge, destination) in transition. ref_transitions {
139+ entry. ref_transitions . insert ( edge, fix_state ( destination) ) ;
140+ }
141+ }
142+
143+ Self { transitions, start, accepting }
144+ }
145+
146+ /// Compute the union of two `Nfa`s.
147+ pub ( crate ) fn union ( self , other : Self , mut new_state : impl FnMut ( ) -> State ) -> Self {
148+ // We implement `union` by lazily initializing a set of states
149+ // corresponding to the product of states in `self` and `other`, and
150+ // then add transitions between these states that correspond to where
151+ // they exist between `self` and `other`.
152+
153+ let a = self ;
154+ let b = other;
155+
156+ let accepting = new_state ( ) ;
157+
158+ let mut mapping: Map < ( Option < State > , Option < State > ) , State > = Map :: default ( ) ;
159+
160+ let mut mapped = |( a_state, b_state) | {
161+ if Some ( a. accepting ) == a_state || Some ( b. accepting ) == b_state {
162+ // If either `a_state` or `b_state` are accepting, map to a
163+ // common `accepting` state.
164+ accepting
165+ } else {
166+ * mapping. entry ( ( a_state, b_state) ) . or_insert_with ( & mut new_state)
167+ }
168+ } ;
169+
170+ let start = mapped ( ( Some ( a. start ) , Some ( b. start ) ) ) ;
171+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
172+ let mut queue = vec ! [ ( Some ( a. start) , Some ( b. start) ) ] ;
173+ let empty_transitions = Transitions :: default ( ) ;
174+
175+ while let Some ( ( a_src, b_src) ) = queue. pop ( ) {
176+ let a_transitions =
177+ a_src. and_then ( |a_src| a. transitions . get ( & a_src) ) . unwrap_or ( & empty_transitions) ;
178+ let b_transitions =
179+ b_src. and_then ( |b_src| b. transitions . get ( & b_src) ) . unwrap_or ( & empty_transitions) ;
180+
181+ let byte_transitions =
182+ a_transitions. byte_transitions . keys ( ) . chain ( b_transitions. byte_transitions . keys ( ) ) ;
183+
184+ for byte_transition in byte_transitions {
185+ let a_dst = a_transitions. byte_transitions . get ( byte_transition) . copied ( ) ;
186+ let b_dst = b_transitions. byte_transitions . get ( byte_transition) . copied ( ) ;
187+
188+ assert ! ( a_dst. is_some( ) || b_dst. is_some( ) ) ;
189+
190+ let src = mapped ( ( a_src, b_src) ) ;
191+ let dst = mapped ( ( a_dst, b_dst) ) ;
192+
193+ transitions. entry ( src) . or_default ( ) . byte_transitions . insert ( * byte_transition, dst) ;
194+
195+ queue. push ( ( a_dst, b_dst) )
196+ }
197+
198+ let ref_transitions =
199+ a_transitions. ref_transitions . keys ( ) . chain ( b_transitions. ref_transitions . keys ( ) ) ;
200+
201+ for ref_transition in ref_transitions {
202+ let a_dst = a_transitions. ref_transitions . get ( ref_transition) . copied ( ) ;
203+ let b_dst = b_transitions. ref_transitions . get ( ref_transition) . copied ( ) ;
204+
205+ assert ! ( a_dst. is_some( ) || b_dst. is_some( ) ) ;
206+
207+ let src = mapped ( ( a_src, b_src) ) ;
208+ let dst = mapped ( ( a_dst, b_dst) ) ;
209+
210+ transitions. entry ( src) . or_default ( ) . ref_transitions . insert ( * ref_transition, dst) ;
211+
212+ queue. push ( ( a_dst, b_dst) )
213+ }
214+ }
215+
216+ Self { transitions, start, accepting }
149217 }
150218
151219 pub ( crate ) fn bytes_from ( & self , start : State ) -> Option < & Map < Byte , State > > {
@@ -159,24 +227,48 @@ where
159227 pub ( crate ) fn refs_from ( & self , start : State ) -> Option < & Map < R , State > > {
160228 Some ( & self . transitions . get ( & start) ?. ref_transitions )
161229 }
162- }
163230
164- impl State {
165- pub ( crate ) fn new ( ) -> Self {
166- static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
167- Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
231+ #[ cfg( test) ]
232+ pub ( crate ) fn from_edges < B : Copy + Into < Byte > > (
233+ start : u32 ,
234+ accept : u32 ,
235+ edges : & [ ( u32 , B , u32 ) ] ,
236+ ) -> Self {
237+ let start = State ( start) ;
238+ let accepting = State ( accept) ;
239+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
240+
241+ for & ( src, edge, dst) in edges {
242+ let src = State ( src) ;
243+ let dst = State ( dst) ;
244+ let old = transitions. entry ( src) . or_default ( ) . byte_transitions . insert ( edge. into ( ) , dst) ;
245+ assert ! ( old. is_none( ) ) ;
246+ }
247+
248+ Self { start, accepting, transitions }
168249 }
169250}
170251
171- # [ cfg ( test ) ]
172- impl < R > From < nfa :: Transition < R > > for Transition < R >
252+ /// Serialize the DFA using the Graphviz DOT format.
253+ impl < R > fmt :: Debug for Dfa < R >
173254where
174255 R : Ref ,
175256{
176- fn from ( nfa_transition : nfa:: Transition < R > ) -> Self {
177- match nfa_transition {
178- nfa:: Transition :: Byte ( byte) => Transition :: Byte ( byte) ,
179- nfa:: Transition :: Ref ( r) => Transition :: Ref ( r) ,
257+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
258+ writeln ! ( f, "digraph {{" ) ?;
259+ writeln ! ( f, " {:?} [shape = doublecircle]" , self . start) ?;
260+ writeln ! ( f, " {:?} [shape = doublecircle]" , self . accepting) ?;
261+
262+ for ( src, transitions) in self . transitions . iter ( ) {
263+ for ( t, dst) in transitions. byte_transitions . iter ( ) {
264+ writeln ! ( f, " {src:?} -> {dst:?} [label=\" {t:?}\" ]" ) ?;
265+ }
266+
267+ for ( t, dst) in transitions. ref_transitions . iter ( ) {
268+ writeln ! ( f, " {src:?} -> {dst:?} [label=\" {t:?}\" ]" ) ?;
269+ }
180270 }
271+
272+ writeln ! ( f, "}}" )
181273 }
182274}
0 commit comments