@@ -34,11 +34,17 @@ enum Boundary {
3434/// An transition between two [States](State) in the [NFA]
3535#[ derive( Debug , PartialEq , Eq , Hash , Clone ) ]
3636enum Transition {
37+ /// A transition that accepts a character
3738 Character ( char ) ,
39+ /// A transition that accepts any character
3840 Any ,
3941 Start ( Boundary ) ,
4042 End ( Boundary ) ,
43+ /// An epsilon transition that accepts the empty string
4144 Epsilon ,
45+ /// An Offset transition is essentially an epsilon transition that marks the end of a
46+ /// non-capturing group. It is used to mark the end of the pre pattern in match rules
47+ Offset ,
4248}
4349
4450/// An NFA consisting of a set of states and transitions between them
@@ -273,6 +279,78 @@ impl NFA {
273279 . iter ( )
274280 . any ( |s| self . states [ * s] . translation . is_some ( ) )
275281 }
282+
283+ fn find_translations_from_state (
284+ & self ,
285+ state : StateId ,
286+ input : & str ,
287+ match_length : usize ,
288+ offset : usize ,
289+ ) -> Vec < Translation > {
290+ dbg ! ( & state) ;
291+ let mut matching_rules = Vec :: new ( ) ;
292+ let mut next_states = self . epsilon_closure ( & HashSet :: from ( [ state] ) ) ;
293+
294+ // if any of the states in the epsilon closure (reachable via epsilon transition)
295+ // has a translation add it to the list of matching rules
296+ matching_rules. extend (
297+ next_states
298+ . iter ( )
299+ . flat_map ( |state| & self . states [ * state] . translation )
300+ . map ( |translation| {
301+ translation
302+ . clone ( )
303+ . with_offset ( offset)
304+ // if there is an offset (typically in a match opcode), the weight needs
305+ // to be calculated at run-time. The weight is the actual length of match.
306+ . with_weight_if_offset ( match_length, offset)
307+ } ) ,
308+ ) ;
309+
310+ // traverse all states that are reachable via an offset transition (essentially an
311+ // epsilon transition that marks the end of a non-capture group)
312+ let reachable_via_offset = self . move_state ( & next_states, Transition :: Offset ) ;
313+ let next_states_with_offset = self . epsilon_closure ( & reachable_via_offset) ;
314+ for state in next_states_with_offset {
315+ matching_rules. extend ( self . find_translations_from_state (
316+ state,
317+ input,
318+ match_length + 1 ,
319+ offset + match_length,
320+ ) ) ;
321+ }
322+
323+ match input. chars ( ) . next ( ) {
324+ Some ( c) => {
325+ let reachable_via_character =
326+ self . move_state ( & next_states, Transition :: Character ( c) ) ;
327+ let reachable_via_any = self . move_state ( & next_states, Transition :: Any ) ;
328+ next_states = reachable_via_character
329+ . union ( & reachable_via_any)
330+ . cloned ( )
331+ . collect ( ) ;
332+ next_states = self . epsilon_closure ( & next_states) ;
333+ for state in next_states {
334+ let bytes = c. len_utf8 ( ) ;
335+ matching_rules. extend ( self . find_translations_from_state (
336+ state,
337+ & input[ bytes..] ,
338+ match_length + 1 ,
339+ offset,
340+ ) ) ;
341+ }
342+ matching_rules
343+ }
344+ None => matching_rules,
345+ }
346+ }
347+
348+ pub fn find_translations ( & self , input : & str ) -> Vec < Translation > {
349+ let mut matching_rules = Vec :: new ( ) ;
350+
351+ matching_rules. extend ( self . find_translations_from_state ( self . start , input, 0 , 0 ) ) ;
352+ matching_rules
353+ }
276354}
277355
278356/**
@@ -297,6 +375,9 @@ pub fn nfa_dot(nfa: &NFA) -> String {
297375 Transition :: Any => {
298376 dot. push_str ( & format ! ( "\t {} -> {} [label=\" {}\" ]\n " , from, to, "Any" ) )
299377 }
378+ Transition :: Offset => {
379+ dot. push_str ( & format ! ( "\t {} -> {} [label=\" {}\" ]\n " , from, to, "Offset" ) )
380+ }
300381 Transition :: Start ( boundary) => dot. push_str ( & format ! (
301382 "\t {} -> {} [label=\" {:?}\" ]\n " ,
302383 from, to, boundary
@@ -329,6 +410,14 @@ mod tests {
329410 assert ! ( !nfa. accepts( "b" ) ) ;
330411 }
331412
413+ #[ test]
414+ fn find_character ( ) {
415+ let ast = AST :: Character ( 'a' ) ;
416+ let nfa = NFA :: from ( & ast) ;
417+ assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
418+ assert ! ( nfa. find_translations( "b" ) . is_empty( ) ) ;
419+ }
420+
332421 #[ test]
333422 fn alteration ( ) {
334423 let ast = AST :: Either ( Box :: new ( AST :: Character ( 'a' ) ) , Box :: new ( AST :: Character ( 'b' ) ) ) ;
@@ -339,6 +428,16 @@ mod tests {
339428 assert ! ( !nfa. accepts( "c" ) ) ;
340429 }
341430
431+ #[ test]
432+ fn find_alteration ( ) {
433+ let ast = AST :: Either ( Box :: new ( AST :: Character ( 'a' ) ) , Box :: new ( AST :: Character ( 'b' ) ) ) ;
434+ let nfa = NFA :: from ( & ast) ;
435+ assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
436+ assert ! ( !nfa. find_translations( "b" ) . is_empty( ) ) ;
437+ assert ! ( !nfa. find_translations( "ab" ) . is_empty( ) ) ;
438+ assert ! ( nfa. find_translations( "c" ) . is_empty( ) ) ;
439+ }
440+
342441 #[ test]
343442 fn concatenation ( ) {
344443 let ast = AST :: Concat ( Box :: new ( AST :: Character ( 'a' ) ) , Box :: new ( AST :: Character ( 'b' ) ) ) ;
@@ -351,6 +450,18 @@ mod tests {
351450 assert ! ( !nfa. accepts( "abc" ) ) ;
352451 }
353452
453+ #[ test]
454+ fn find_concatenation ( ) {
455+ let ast = AST :: Concat ( Box :: new ( AST :: Character ( 'a' ) ) , Box :: new ( AST :: Character ( 'b' ) ) ) ;
456+ let nfa = NFA :: from ( & ast) ;
457+ assert ! ( !nfa. find_translations( "ab" ) . is_empty( ) ) ;
458+ assert ! ( !nfa. find_translations( "abc" ) . is_empty( ) ) ;
459+ assert ! ( nfa. find_translations( "a" ) . is_empty( ) ) ;
460+ assert ! ( nfa. find_translations( "b" ) . is_empty( ) ) ;
461+ assert ! ( nfa. find_translations( "ba" ) . is_empty( ) ) ;
462+ assert ! ( nfa. find_translations( "c" ) . is_empty( ) ) ;
463+ }
464+
354465 #[ test]
355466 fn kleene ( ) {
356467 let ast = AST :: ZeroOrMore ( Box :: new ( AST :: Character ( 'a' ) ) ) ;
@@ -366,6 +477,38 @@ mod tests {
366477 assert ! ( !nfa. accepts( "abc" ) ) ;
367478 }
368479
480+ #[ test]
481+ fn find_kleene ( ) {
482+ let ast = AST :: ZeroOrMore ( Box :: new ( AST :: Character ( 'a' ) ) ) ;
483+ let nfa = dbg ! ( NFA :: from( & ast) ) ;
484+ assert ! ( !nfa. find_translations( "" ) . is_empty( ) ) ;
485+ assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
486+ assert ! ( !nfa. find_translations( "aa" ) . is_empty( ) ) ;
487+ assert ! ( !nfa. find_translations( "aaaaa" ) . is_empty( ) ) ;
488+ assert ! ( !nfa. find_translations( "ab" ) . is_empty( ) ) ;
489+ assert ! ( !nfa. find_translations( "abc" ) . is_empty( ) ) ;
490+ assert ! ( !nfa. find_translations( "b" ) . is_empty( ) ) ;
491+ assert ! ( !nfa. find_translations( "ba" ) . is_empty( ) ) ;
492+ assert ! ( !nfa. find_translations( "c" ) . is_empty( ) ) ;
493+
494+ let ast = AST :: Concat (
495+ Box :: new ( AST :: Character ( 'a' ) ) ,
496+ Box :: new ( AST :: ZeroOrMore ( Box :: new ( AST :: Character ( 'b' ) ) ) ) ,
497+ ) ;
498+ let nfa = dbg ! ( NFA :: from( & ast) ) ;
499+ assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
500+ assert ! ( !nfa. find_translations( "aa" ) . is_empty( ) ) ;
501+ assert ! ( !nfa. find_translations( "ab" ) . is_empty( ) ) ;
502+ assert ! ( !nfa. find_translations( "abbbb" ) . is_empty( ) ) ;
503+ assert ! ( nfa. find_translations( "" ) . is_empty( ) ) ;
504+ assert ! ( nfa. find_translations( "ccccc" ) . is_empty( ) ) ;
505+ assert ! ( nfa. find_translations( "cb" ) . is_empty( ) ) ;
506+ assert ! ( nfa. find_translations( "cba" ) . is_empty( ) ) ;
507+ assert ! ( nfa. find_translations( "b" ) . is_empty( ) ) ;
508+ assert ! ( nfa. find_translations( "ba" ) . is_empty( ) ) ;
509+ assert ! ( nfa. find_translations( "c" ) . is_empty( ) ) ;
510+ }
511+
369512 #[ test]
370513 fn one_or_more ( ) {
371514 let ast = AST :: OneOrMore ( Box :: new ( AST :: Character ( 'a' ) ) ) ;
@@ -381,6 +524,21 @@ mod tests {
381524 assert ! ( !nfa. accepts( "abc" ) ) ;
382525 }
383526
527+ #[ test]
528+ fn find_one_or_more ( ) {
529+ let ast = AST :: OneOrMore ( Box :: new ( AST :: Character ( 'a' ) ) ) ;
530+ let nfa = NFA :: from ( & ast) ;
531+ assert ! ( nfa. find_translations( "" ) . is_empty( ) ) ;
532+ assert ! ( !nfa. find_translations( "a" ) . is_empty( ) ) ;
533+ assert ! ( !nfa. find_translations( "aa" ) . is_empty( ) ) ;
534+ assert ! ( !nfa. find_translations( "aaaaa" ) . is_empty( ) ) ;
535+ assert ! ( nfa. find_translations( "b" ) . is_empty( ) ) ;
536+ assert ! ( nfa. find_translations( "ba" ) . is_empty( ) ) ;
537+ assert ! ( !nfa. find_translations( "ab" ) . is_empty( ) ) ;
538+ assert ! ( nfa. find_translations( "c" ) . is_empty( ) ) ;
539+ assert ! ( !nfa. find_translations( "abc" ) . is_empty( ) ) ;
540+ }
541+
384542 #[ test]
385543 fn any ( ) {
386544 let ast = AST :: Concat (
@@ -394,6 +552,19 @@ mod tests {
394552 assert ! ( nfa. accepts( "abb" ) ) ;
395553 }
396554
555+ #[ test]
556+ fn find_any ( ) {
557+ let ast = AST :: Concat (
558+ Box :: new ( AST :: Concat (
559+ Box :: new ( AST :: Character ( 'a' ) ) ,
560+ Box :: new ( AST :: Any ) ,
561+ ) ) ,
562+ Box :: new ( AST :: Character ( 'b' ) ) ,
563+ ) ;
564+ let nfa = NFA :: from ( & ast) ;
565+ assert ! ( !nfa. find_translations( "abb" ) . is_empty( ) ) ;
566+ }
567+
397568 #[ test]
398569 fn optional ( ) {
399570 let ast = AST :: Concat (
@@ -411,6 +582,24 @@ mod tests {
411582 assert ! ( !nfa. accepts( "bbb" ) ) ;
412583 }
413584
585+ #[ test]
586+ fn find_optional ( ) {
587+ let ast = AST :: Concat (
588+ Box :: new ( AST :: Optional ( Box :: new ( AST :: Concat (
589+ Box :: new ( AST :: Character ( 'a' ) ) ,
590+ Box :: new ( AST :: Any ) ,
591+ ) ) ) ) ,
592+ Box :: new ( AST :: Character ( 'b' ) ) ,
593+ ) ;
594+ let nfa = NFA :: from ( & ast) ;
595+ assert ! ( !nfa. find_translations( "acb" ) . is_empty( ) ) ;
596+ assert ! ( !nfa. find_translations( "axb" ) . is_empty( ) ) ;
597+ assert ! ( !nfa. find_translations( "b" ) . is_empty( ) ) ;
598+ assert ! ( !nfa. find_translations( "bbb" ) . is_empty( ) ) ;
599+ assert ! ( nfa. find_translations( "c" ) . is_empty( ) ) ;
600+ assert ! ( nfa. find_translations( "" ) . is_empty( ) ) ;
601+ }
602+
414603 #[ test]
415604 fn string ( ) {
416605 let ast = AST :: Concat (
@@ -429,4 +618,23 @@ mod tests {
429618 assert ! ( !nfa. accepts( "()" ) ) ;
430619 assert ! ( !nfa. accepts( "(helo)" ) ) ;
431620 }
621+
622+ #[ test]
623+ fn find_string ( ) {
624+ let ast = AST :: Concat (
625+ Box :: new ( AST :: Concat (
626+ Box :: new ( AST :: OneOrMore ( Box :: new ( AST :: Character ( '(' ) ) ) ) ,
627+ Box :: new ( AST :: String ( "hello" . to_string ( ) ) ) ,
628+ ) ) ,
629+ Box :: new ( AST :: OneOrMore ( Box :: new ( AST :: Character ( ')' ) ) ) ) ,
630+ ) ;
631+ let nfa = NFA :: from ( & ast) ;
632+ assert ! ( !nfa. find_translations( "(hello)" ) . is_empty( ) ) ;
633+ assert ! ( !nfa. find_translations( "(((((hello)))" ) . is_empty( ) ) ;
634+ assert ! ( nfa. find_translations( "hello" ) . is_empty( ) ) ;
635+ assert ! ( nfa. find_translations( "(hello" ) . is_empty( ) ) ;
636+ assert ! ( nfa. find_translations( "hello)" ) . is_empty( ) ) ;
637+ assert ! ( nfa. find_translations( "()" ) . is_empty( ) ) ;
638+ assert ! ( nfa. find_translations( "(helo)" ) . is_empty( ) ) ;
639+ }
432640}
0 commit comments