77//! the reachable states is an accepting state
88use std:: collections:: { HashMap , HashSet , VecDeque } ;
99
10+ use crate :: parser:: Pattern ;
11+
1012use super :: Translation ;
1113
1214/// Reference to a state in the [NFA] states vector
@@ -181,6 +183,16 @@ impl NFA {
181183 }
182184 }
183185
186+ fn add_character_class ( & mut self , chars : & HashSet < char > ) -> Fragment {
187+ let start = self . add_state ( State { translation : None } ) ;
188+ let end = self . add_state ( State { translation : None } ) ;
189+ for c in chars {
190+ self . transitions
191+ . insert ( ( start, Transition :: Character ( * c) ) , end) ;
192+ }
193+ Fragment { start, end }
194+ }
195+
184196 fn add_fragment ( & mut self , ast : & AST ) -> Fragment {
185197 match ast {
186198 AST :: Character ( c) => self . add_char ( * c, None ) ,
@@ -229,6 +241,46 @@ impl NFA {
229241 nfa
230242 }
231243
244+ fn add_pattern_fragment ( & mut self , pattern : & Pattern ) -> Fragment {
245+ match pattern {
246+ Pattern :: Empty => todo ! ( ) ,
247+ Pattern :: Characters ( s) => self . add_string ( s, None ) ,
248+ Pattern :: Boundary => todo ! ( ) ,
249+ Pattern :: Any => self . add_any ( None ) ,
250+ Pattern :: Set ( chars) => self . add_character_class ( chars) ,
251+ Pattern :: Attributes ( hash_set) => todo ! ( ) ,
252+ Pattern :: Group ( vec) => todo ! ( ) ,
253+ Pattern :: Negate ( pattern) => todo ! ( ) ,
254+ Pattern :: Optional ( pattern) => {
255+ let fragment = self . add_pattern_fragment ( pattern) ;
256+ self . add_optional ( & fragment)
257+ }
258+ Pattern :: ZeroOrMore ( pattern) => {
259+ let fragment = self . add_pattern_fragment ( pattern) ;
260+ self . add_kleene ( & fragment)
261+ }
262+ Pattern :: OneOrMore ( pattern) => {
263+ let one = self . add_pattern_fragment ( pattern) ;
264+ let fragment = self . add_pattern_fragment ( pattern) ;
265+ let kleene = self . add_kleene ( & fragment) ;
266+ self . add_concatenation ( & one, & kleene)
267+ }
268+ Pattern :: Either ( pattern, other) => {
269+ let r1 = self . add_pattern_fragment ( pattern) ;
270+ let r2 = self . add_pattern_fragment ( other) ;
271+ self . add_union ( & r1, & r2)
272+ }
273+ }
274+ }
275+
276+ fn from_match_pattern ( pattern : & Pattern , translation : & Translation ) -> NFA {
277+ let mut nfa = NFA :: new ( ) ;
278+ let body = nfa. add_pattern_fragment ( pattern) ;
279+ nfa. start = body. start ;
280+ nfa. states [ body. end ] . translation = Some ( translation. clone ( ) ) ;
281+ nfa
282+ }
283+
232284 /// Return all states that are reachable from a set of `states`
233285 /// via epsilon stransitions
234286 fn epsilon_closure ( & self , states : & HashSet < StateId > ) -> HashSet < StateId > {
@@ -289,7 +341,7 @@ impl NFA {
289341 ) -> Vec < Translation > {
290342 dbg ! ( & state) ;
291343 let mut matching_rules = Vec :: new ( ) ;
292- let mut next_states = self . epsilon_closure ( & HashSet :: from ( [ state] ) ) ;
344+ let next_states = dbg ! ( self . epsilon_closure( & HashSet :: from( [ state] ) ) ) ;
293345
294346 // if any of the states in the epsilon closure (reachable via epsilon transition)
295347 // has a translation add it to the list of matching rules
@@ -320,36 +372,30 @@ impl NFA {
320372 ) ) ;
321373 }
322374
323- match input. chars ( ) . next ( ) {
324- Some ( c) => {
325- let reachable_via_character =
326- self . move_state ( & next_states, Transition :: Character ( c) ) ;
327- let reachable_via_any = self . move_state ( & next_states, Transition :: Any ) ;
328- next_states = reachable_via_character
329- . union ( & reachable_via_any)
330- . cloned ( )
331- . collect ( ) ;
332- next_states = self . epsilon_closure ( & next_states) ;
333- for state in next_states {
334- let bytes = c. len_utf8 ( ) ;
335- matching_rules. extend ( self . find_translations_from_state (
336- state,
337- & input[ bytes..] ,
338- match_length + 1 ,
339- offset,
340- ) ) ;
341- }
342- matching_rules
375+ if let Some ( c) = input. chars ( ) . next ( ) {
376+ let reachable_via_character = self . move_state ( & next_states, Transition :: Character ( c) ) ;
377+ let reachable_via_any = self . move_state ( & next_states, Transition :: Any ) ;
378+ let mut next_states = reachable_via_character
379+ . union ( & reachable_via_any)
380+ . cloned ( )
381+ . collect ( ) ;
382+ next_states = self . epsilon_closure ( & next_states) ;
383+ for state in next_states {
384+ let bytes = c. len_utf8 ( ) ;
385+ matching_rules. extend ( self . find_translations_from_state (
386+ state,
387+ & input[ bytes..] ,
388+ match_length + 1 ,
389+ offset,
390+ ) ) ;
343391 }
344- None => matching_rules,
345392 }
393+
394+ matching_rules
346395 }
347396
348397 pub fn find_translations ( & self , input : & str ) -> Vec < Translation > {
349- let mut matching_rules = Vec :: new ( ) ;
350-
351- matching_rules. extend ( self . find_translations_from_state ( self . start , input, 0 , 0 ) ) ;
352- matching_rules
398+ self . find_translations_from_state ( self . start , input, 0 , 0 )
353399 }
354400}
355401
@@ -401,6 +447,7 @@ pub fn nfa_dot(nfa: &NFA) -> String {
401447#[ cfg( test) ]
402448mod tests {
403449 use super :: * ;
450+ use crate :: parser:: PatternParser ;
404451
405452 #[ test]
406453 fn character ( ) {
@@ -480,7 +527,7 @@ mod tests {
480527 #[ test]
481528 fn find_kleene ( ) {
482529 let ast = AST :: ZeroOrMore ( Box :: new ( AST :: Character ( 'a' ) ) ) ;
483- let nfa = dbg ! ( NFA :: from( & ast) ) ;
530+ let nfa = NFA :: from ( & ast) ;
484531 assert ! ( !nfa. find_translations( "" ) . is_empty( ) ) ;
485532 assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
486533 assert ! ( !nfa. find_translations( "aa" ) . is_empty( ) ) ;
@@ -495,7 +542,7 @@ mod tests {
495542 Box :: new ( AST :: Character ( 'a' ) ) ,
496543 Box :: new ( AST :: ZeroOrMore ( Box :: new ( AST :: Character ( 'b' ) ) ) ) ,
497544 ) ;
498- let nfa = dbg ! ( NFA :: from( & ast) ) ;
545+ let nfa = NFA :: from ( & ast) ;
499546 assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
500547 assert ! ( !nfa. find_translations( "aa" ) . is_empty( ) ) ;
501548 assert ! ( !nfa. find_translations( "ab" ) . is_empty( ) ) ;
@@ -637,4 +684,42 @@ mod tests {
637684 assert ! ( nfa. find_translations( "()" ) . is_empty( ) ) ;
638685 assert ! ( nfa. find_translations( "(helo)" ) . is_empty( ) ) ;
639686 }
687+
688+ #[ test]
689+ fn find_pattern ( ) {
690+ let patterns = PatternParser :: new ( "abc" ) . pattern ( ) . unwrap ( ) ;
691+ let pattern = patterns. first ( ) . unwrap ( ) ;
692+ let blank = String :: new ( ) ;
693+ let translation = Translation :: new ( blank. clone ( ) , blank, 0 ) ;
694+ let nfa = dbg ! ( NFA :: from_match_pattern( & pattern, & translation) ) ;
695+ assert_eq ! ( nfa. find_translations( "abc" ) , vec![ translation] ) ;
696+ assert ! ( nfa. find_translations( "def" ) . is_empty( ) ) ;
697+ }
698+
699+ #[ test]
700+ fn find_character_class ( ) {
701+ let patterns = PatternParser :: new ( "[abc]" ) . pattern ( ) . unwrap ( ) ;
702+ let pattern = patterns. first ( ) . unwrap ( ) ;
703+ let blank = String :: new ( ) ;
704+ let translation = Translation :: new ( blank. clone ( ) , blank, 0 ) ;
705+ let nfa = NFA :: from_match_pattern ( & pattern, & translation) ;
706+ assert_eq ! ( nfa. find_translations( "a" ) , vec![ translation. clone( ) ] ) ;
707+ assert_eq ! ( nfa. find_translations( "b" ) , vec![ translation. clone( ) ] ) ;
708+ assert_eq ! ( nfa. find_translations( "c" ) , vec![ translation] ) ;
709+ assert ! ( nfa. find_translations( "def" ) . is_empty( ) ) ;
710+ }
711+
712+ #[ test]
713+ #[ ignore = "finds the same translation multiple times" ]
714+ fn find_character_class_one_or_more ( ) {
715+ let patterns = PatternParser :: new ( "[abc]+" ) . pattern ( ) . unwrap ( ) ;
716+ let pattern = patterns. first ( ) . unwrap ( ) ;
717+ let blank = String :: new ( ) ;
718+ let translation = Translation :: new ( blank. clone ( ) , blank, 0 ) ;
719+ let nfa = NFA :: from_match_pattern ( & pattern, & translation) ;
720+ assert_eq ! ( nfa. find_translations( "a" ) , vec![ translation. clone( ) ] ) ;
721+ assert_eq ! ( nfa. find_translations( "b" ) , vec![ translation. clone( ) ] ) ;
722+ assert_eq ! ( nfa. find_translations( "c" ) , vec![ translation] ) ;
723+ assert ! ( nfa. find_translations( "def" ) . is_empty( ) ) ;
724+ }
640725}
0 commit comments