1
1
@synopsis {
2
- Types and functions to analyse delimiters in productions
2
+ Types and functions to analyze delimiters in productions
3
3
}
4
4
5
5
module lang ::rascal ::grammar ::analyze ::Delimiters
@@ -8,103 +8,175 @@ import Grammar;
8
8
import ParseTree ;
9
9
import util ::Maybe ;
10
10
11
+ import Prelude ;
12
+
11
13
import lang ::rascal ::grammar ::Util ;
12
14
13
- alias DelimiterPair = tuple [Symbol begin , Symbol end ];
15
+ alias DelimiterPair = tuple [Maybe [Symbol ] begin , Maybe [Symbol ] end ];
16
+
17
+ data Direction // Traverse lists of symbols (in productions)...
18
+ = forward () // - ...from left to right;
19
+ | backward () // - ...from right to left.
20
+ ;
14
21
15
22
@synopsis {
16
- Gets all delimiter pairs that enclose symbol `s` in grammar `g` when `s` is
17
- always enclosed by delimiters. Returns the empty set when at least one
18
- occurrence of `s` in `g` is not enclosed by delimiters.
23
+ Reorder a list according to the specified direction
19
24
}
20
25
21
- set [ DelimiterPair ] getDelimiterPairs ( Grammar g , Symbol s ) {
22
- map [ Symbol , set [ DelimiterPair ]] index = ( );
26
+ list [& T ] reorder ( list [& T ] l , forward ()) = l ;
27
+ list [& T ] reorder ( list [& T ] l , backward ()) = reverse ( l );
23
28
24
- set [DelimiterPair ] getDelimiterPairs (Symbol s ) {
25
- set [DelimiterPair ] pairs = {};
26
- index += (s : pairs ); // Provisionally added for cycle detection
29
+ @synopsis {
30
+ Gets the unique leftmost delimiter (`begin`) and the unique rightmost
31
+ delimiter (`end`), if any, that occur **inside** production `p` in grammar
32
+ `g`. If `getOnlyFirst` is `true` (default: `false`), then only the first
33
+ (resp. last) symbol of the production can be considered as leftmost (resp.
34
+ rightmost).
35
+ }
27
36
28
- // For each production in which `s` occurs, search for delimiter pairs
29
- // that enclose `s`.
30
- for (/prod(sParent, symbols: [*_, /s , *_], _) := g ) {
37
+ @description {
38
+ For instance, consider the following grammar:
39
+
40
+ ```
41
+ lexical X = Y;
42
+ lexical Y = Y1 | Y2;
43
+ lexical Y1 = "[" Z "]";
44
+ lexical Y2 = "[" Z ")" [a-z];
45
+ lexical Z = [a-z];
46
+ ```
47
+
48
+ The unique leftmost delimiter of the `Y1` production is `[`. The unique
49
+ leftmost delimiter of the `Y2` production is `[`. The unique leftmost
50
+ delimiter of the `X` production is `[`. The remaining productions do not
51
+ have a unique leftmost delimiter.
52
+
53
+ The unique rightmost delimiter of the `Y1` production is `]`. The unique
54
+ rightmost delimiter of the `Y2` production is `)`. The remaining productions
55
+ do not have a unique rightmost delimiter. In particular, the `X` production
56
+ has two rightmost delimiters, but not one unique.
57
+
58
+ If `getOnlyFirst` is `true`, then the `Y2` production does not have a
59
+ rightmost delimiter.
60
+ }
31
61
32
- // Case 1: The production itself has enclosing delimiters for `s`
33
- if (just (DelimiterPair pair ) := getDelimiterPair (symbols , s )) {
34
- pairs += {pair };
35
- }
36
-
37
- // Case 2: The production itself does not have enclosing delimiters
38
- // for `s`. In this case, proceed by searching for delimiter pairs
39
- // that enclose the parent of `s`.
40
- else {
41
-
42
- // Case 2a: `sParent` is already being searched for (i.e., there
43
- // is a cyclic dependency). In this case, `sParent` can be
44
- // ignored by the present call of this function (top of the call
45
- // stack), as it is already dealt with by a past/ongoing call of
46
- // this function (middle of the call stack).
47
- if (delabel (sParent ) in index ) {
48
- continue ;
62
+ DelimiterPair getInnerDelimiterPair (Grammar g , Production p , bool getOnlyFirst = false ) {
63
+ Maybe [Symbol ] begin = getInnerDelimiterByProduction (g , forward () , getOnlyFirst = getOnlyFirst )[p ];
64
+ Maybe [Symbol ] end = getInnerDelimiterByProduction (g , backward (), getOnlyFirst = getOnlyFirst )[p ];
65
+ return <begin , end >;
66
+ }
67
+
68
+ @memo
69
+ private map [Symbol , Maybe [Symbol ]] getInnerDelimiterBySymbol (Grammar g , Direction direction , bool getOnlyFirst = false ) {
70
+ map [Production , Maybe [Symbol ]] m = getInnerDelimiterByProduction (g , direction , getOnlyFirst = getOnlyFirst );
71
+ return (s : unique ({m [p ] | p <- m , s == delabel (p .def )}) | p <- m , s := delabel (p .def ));
72
+ }
73
+
74
+ @memo
75
+ private map [Production , Maybe [Symbol ]] getInnerDelimiterByProduction (Grammar g , Direction direction , bool getOnlyFirst = false ) {
76
+ map [Production , Maybe [Symbol ]] ret = (p : nothing () | /p: prod(_, _, _) := g);
77
+
78
+ solve (ret) {
79
+ for (p <- ret , ret [p ] == nothing ()) {
80
+ for (s <- reorder (p .symbols , direction )) {
81
+ s = delabel (s );
82
+ if (isDelimiter (s )) {
83
+ ret [p ] = just (s );
84
+ break ;
49
85
}
50
-
51
- // Case 2b: `sParent` has delimiter pairs
52
- else if (morePairs := getDelimiterPairs (delabel (sParent )), _ <- morePairs ) {
53
- pairs += morePairs ;
86
+ if (isNonTerminalType (s ) && just (delimiter ) := unique ({ret [child ] | child <- getChildren (g , s )})) {
87
+ ret [p ] = just (delimiter );
88
+ break ;
54
89
}
55
-
56
- // Case 2c: `sParent` does not have delimiter pairs. In this
57
- // case, at least one occurrence of `s` in `g` is not enclosed
58
- // by delimiters. Thus, the empty set is returned (and
59
- // registered in the index), while the remaining productions in
60
- // which `s` occurs, are ignored.
61
- else {
62
- pairs = {};
90
+ if (getOnlyFirst ) {
63
91
break ;
64
92
}
65
93
}
66
94
}
67
-
68
- index += (s : pairs ); // Definitively added
69
- return pairs ;
70
95
}
71
96
72
- return getDelimiterPairs (s );
73
-
74
- // TODO: The current version of this function does not find delimiter pairs
75
- // that are spread across multiple productions. For instance:
76
- //
77
- // ```
78
- // lexical DelimitedNumber = Left Number Right;
79
- //
80
- // lexical Left = "<";
81
- // lexical Right = ">";
82
- // lexical Number = [0-9]+ !>> [0-9];
83
- // ```
84
- //
85
- // In this example, `getDelimiterPairs(lex("Number"))` returns the empty
86
- // set. This could be further improved.
97
+ return ret ;
87
98
}
88
99
100
+ private set [Production ] getChildren (Grammar g , Symbol s )
101
+ = {*lookup (g , s )};
102
+
89
103
@synopsis {
90
- Gets the delimiter pair that encloses symbol `s` in a list, if any
104
+ Gets the unique rightmost delimiter (`begin`) and the unique leftmost
105
+ delimiter (`end`), if any, that occur **outside** production `p` in grammar
106
+ `g`.
91
107
}
92
108
93
- Maybe [DelimiterPair ] getDelimiterPair ([*_, Symbol begin , *between , Symbol end , *_], Symbol s )
94
- = just (<begin , end >)
95
- when isDelimiter (begin ) && isDelimiter (end ),
96
- [*between1 , /s, *between2] := between,
97
- !containsDelimiter(between1 + between2);
109
+ @description {
110
+ For instance, consider the following grammar:
98
111
99
- default Maybe[DelimiterPair] getDelimiterPair(list[Symbol] _, Symbol _)
100
- = nothing();
112
+ ```
113
+ lexical X = Y;
114
+ lexical Y = Y1 | Y2;
115
+ lexical Y1 = "[" Z "]";
116
+ lexical Y2 = "[" Z ")" [a-z];
117
+ lexical Z = [a-z];
118
+ ```
119
+
120
+ The unique rightmost delimiter of the `Z` production is `[`. The remaining
121
+ productions do not have a unique rightmost delimiter.
122
+
123
+ The productions do not have a unique leftmost delimiter. In particular, the
124
+ `Z` productions has two leftmost delimiters, but not one unique.
125
+ }
126
+
127
+ DelimiterPair getOuterDelimiterPair (Grammar g , Production p )
128
+ = <getOuterDelimiterByProduction (g , backward ())[p ], getOuterDelimiterByProduction (g , forward ())[p ]> ;
129
+
130
+ @memo
131
+ private map[Symbol, Maybe[Symbol]] getOuterDelimiterBySymbol(Grammar g, Direction direction) {
132
+ map[Symbol, Maybe[Symbol]] ret = (s: nothing() | / p : prod (_, _, _) := g , s := delabel (p .def ));
133
+
134
+ solve (ret ) {
135
+ for (s <- ret , ret [s ] == nothing ()) {
136
+ set [Maybe [Symbol ]] delimiters = {};
137
+ for (prod (def , symbols , _) <- getParents (g , s )) {
138
+ if ([*_, /s, *rest] := reorder(symbols, direction) && /s !:= rest ) {
139
+ // Note: `rest` contains the symbols that follow/precede
140
+ // (depending on `direction`) `s` in the parent production
141
+ Maybe [Symbol ] delimiter = nothing ();
142
+ for (Symbol s <- rest ) {
143
+ s = delabel (s );
144
+ if (isDelimiter (s )) {
145
+ delimiter = just (s );
146
+ break ;
147
+ }
148
+ if (isNonTerminalType (s ) && d : just (_) := getInnerDelimiterBySymbol (g , direction )[s ]) {
149
+ delimiter = d ;
150
+ break ;
151
+ }
152
+ }
153
+ delimiters += just (_) := delimiter ? delimiter : ret [delabel (def )];
154
+ }
155
+ }
156
+ ret [s ] = unique (delimiters );
157
+ }
158
+ }
159
+
160
+ return ret ;
161
+ }
162
+
163
+ @memo
164
+ private map [Production , Maybe [Symbol ]] getOuterDelimiterByProduction (Grammar g , Direction direction ) {
165
+ map [Symbol , Maybe [Symbol ]] m = getOuterDelimiterBySymbol (g , direction );
166
+ return (p : m [delabel (p .def )] | /p: prod(_, _, _) := g);
167
+ }
168
+
169
+ private set[Production] getParents(Grammar g, Symbol s)
170
+ = {parent | / parent : prod (_, [*_, /s, *_], _) := g, s != delabel(parent.def)};
101
171
102
172
@synopsis{
103
- Checks if a list contains a delimiter
173
+ Returns the single delimiter if set `delimiters` is a singleton. Returns
174
+ `nothing()` otherwise.
104
175
}
105
176
106
- bool containsDelimiter(list[Symbol] symbols)
107
- = any(s <- symbols , isDelimiter (s ));
177
+ Maybe[Symbol] unique({d: just(Symbol _)}) = d;
178
+
179
+ default Maybe[Symbol] unique(set[Maybe[Symbol]] _) = nothing();
108
180
109
181
@synopsis{
110
182
Checks if a symbol is a delimiter
0 commit comments