Skip to content

Commit 36975db

Browse files
authored
Merge pull request #12 from SWAT-engineering/optimistic-conversion-as-if-single-line
Optimistic conversion as if single line
2 parents 814cbe7 + efd28dd commit 36975db

20 files changed

+915
-237
lines changed

rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc

Lines changed: 143 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
@synopsis{
2-
Types and functions to analyse delimiters in productions
2+
Types and functions to analyze delimiters in productions
33
}
44

55
module lang::rascal::grammar::analyze::Delimiters
@@ -8,103 +8,175 @@ import Grammar;
88
import ParseTree;
99
import util::Maybe;
1010

11+
import Prelude;
12+
1113
import lang::rascal::grammar::Util;
1214

13-
alias DelimiterPair = tuple[Symbol begin, Symbol end];
15+
alias DelimiterPair = tuple[Maybe[Symbol] begin, Maybe[Symbol] end];
16+
17+
data Direction // Traverse lists of symbols (in productions)...
18+
= forward() // - ...from left to right;
19+
| backward() // - ...from right to left.
20+
;
1421

1522
@synopsis{
16-
Gets all delimiter pairs that enclose symbol `s` in grammar `g` when `s` is
17-
always enclosed by delimiters. Returns the empty set when at least one
18-
occurrence of `s` in `g` is not enclosed by delimiters.
23+
Reorder a list according to the specified direction
1924
}
2025

21-
set[DelimiterPair] getDelimiterPairs(Grammar g, Symbol s) {
22-
map[Symbol, set[DelimiterPair]] index = ();
26+
list[&T] reorder(list[&T] l, forward()) = l;
27+
list[&T] reorder(list[&T] l, backward()) = reverse(l);
2328

24-
set[DelimiterPair] getDelimiterPairs(Symbol s) {
25-
set[DelimiterPair] pairs = {};
26-
index += (s: pairs); // Provisionally added for cycle detection
29+
@synopsis{
30+
Gets the unique leftmost delimiter (`begin`) and the unique rightmost
31+
delimiter (`end`), if any, that occur **inside** production `p` in grammar
32+
`g`. If `getOnlyFirst` is `true` (default: `false`), then only the first
33+
(resp. last) symbol of the production can be considered as leftmost (resp.
34+
rightmost).
35+
}
2736

28-
// For each production in which `s` occurs, search for delimiter pairs
29-
// that enclose `s`.
30-
for (/prod(sParent, symbols: [*_, /s, *_], _) := g) {
37+
@description{
38+
For instance, consider the following grammar:
39+
40+
```
41+
lexical X = Y;
42+
lexical Y = Y1 | Y2;
43+
lexical Y1 = "[" Z "]";
44+
lexical Y2 = "[" Z ")" [a-z];
45+
lexical Z = [a-z];
46+
```
47+
48+
The unique leftmost delimiter of the `Y1` production is `[`. The unique
49+
leftmost delimiter of the `Y2` production is `[`. The unique leftmost
50+
delimiter of the `X` production is `[`. The remaining productions do not
51+
have a unique leftmost delimiter.
52+
53+
The unique rightmost delimiter of the `Y1` production is `]`. The unique
54+
rightmost delimiter of the `Y2` production is `)`. The remaining productions
55+
do not have a unique rightmost delimiter. In particular, the `X` production
56+
has two rightmost delimiters, but not one unique.
57+
58+
If `getOnlyFirst` is `true`, then the `Y2` production does not have a
59+
rightmost delimiter.
60+
}
3161

32-
// Case 1: The production itself has enclosing delimiters for `s`
33-
if (just(DelimiterPair pair) := getDelimiterPair(symbols, s)) {
34-
pairs += {pair};
35-
}
36-
37-
// Case 2: The production itself does not have enclosing delimiters
38-
// for `s`. In this case, proceed by searching for delimiter pairs
39-
// that enclose the parent of `s`.
40-
else {
41-
42-
// Case 2a: `sParent` is already being searched for (i.e., there
43-
// is a cyclic dependency). In this case, `sParent` can be
44-
// ignored by the present call of this function (top of the call
45-
// stack), as it is already dealt with by a past/ongoing call of
46-
// this function (middle of the call stack).
47-
if (delabel(sParent) in index) {
48-
continue;
62+
DelimiterPair getInnerDelimiterPair(Grammar g, Production p, bool getOnlyFirst = false) {
63+
Maybe[Symbol] begin = getInnerDelimiterByProduction(g, forward() , getOnlyFirst = getOnlyFirst)[p];
64+
Maybe[Symbol] end = getInnerDelimiterByProduction(g, backward(), getOnlyFirst = getOnlyFirst)[p];
65+
return <begin, end>;
66+
}
67+
68+
@memo
69+
private map[Symbol, Maybe[Symbol]] getInnerDelimiterBySymbol(Grammar g, Direction direction, bool getOnlyFirst = false) {
70+
map[Production, Maybe[Symbol]] m = getInnerDelimiterByProduction(g, direction, getOnlyFirst = getOnlyFirst);
71+
return (s: unique({m[p] | p <- m, s == delabel(p.def)}) | p <- m, s := delabel(p.def));
72+
}
73+
74+
@memo
75+
private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, Direction direction, bool getOnlyFirst = false) {
76+
map[Production, Maybe[Symbol]] ret = (p: nothing() | /p: prod(_, _, _) := g);
77+
78+
solve (ret) {
79+
for (p <- ret, ret[p] == nothing()) {
80+
for (s <- reorder(p.symbols, direction)) {
81+
s = delabel(s);
82+
if (isDelimiter(s)) {
83+
ret[p] = just(s);
84+
break;
4985
}
50-
51-
// Case 2b: `sParent` has delimiter pairs
52-
else if (morePairs := getDelimiterPairs(delabel(sParent)), _ <- morePairs) {
53-
pairs += morePairs;
86+
if (isNonTerminalType(s) && just(delimiter) := unique({ret[child] | child <- getChildren(g, s)})) {
87+
ret[p] = just(delimiter);
88+
break;
5489
}
55-
56-
// Case 2c: `sParent` does not have delimiter pairs. In this
57-
// case, at least one occurrence of `s` in `g` is not enclosed
58-
// by delimiters. Thus, the empty set is returned (and
59-
// registered in the index), while the remaining productions in
60-
// which `s` occurs, are ignored.
61-
else {
62-
pairs = {};
90+
if (getOnlyFirst) {
6391
break;
6492
}
6593
}
6694
}
67-
68-
index += (s: pairs); // Definitively added
69-
return pairs;
7095
}
7196
72-
return getDelimiterPairs(s);
73-
74-
// TODO: The current version of this function does not find delimiter pairs
75-
// that are spread across multiple productions. For instance:
76-
//
77-
// ```
78-
// lexical DelimitedNumber = Left Number Right;
79-
//
80-
// lexical Left = "<";
81-
// lexical Right = ">";
82-
// lexical Number = [0-9]+ !>> [0-9];
83-
// ```
84-
//
85-
// In this example, `getDelimiterPairs(lex("Number"))` returns the empty
86-
// set. This could be further improved.
97+
return ret;
8798
}
8899
100+
private set[Production] getChildren(Grammar g, Symbol s)
101+
= {*lookup(g, s)};
102+
89103
@synopsis{
90-
Gets the delimiter pair that encloses symbol `s` in a list, if any
104+
Gets the unique rightmost delimiter (`begin`) and the unique leftmost
105+
delimiter (`end`), if any, that occur **outside** production `p` in grammar
106+
`g`.
91107
}
92108
93-
Maybe[DelimiterPair] getDelimiterPair([*_, Symbol begin, *between, Symbol end, *_], Symbol s)
94-
= just(<begin, end>)
95-
when isDelimiter(begin) && isDelimiter(end),
96-
[*between1, /s, *between2] := between,
97-
!containsDelimiter(between1 + between2);
109+
@description{
110+
For instance, consider the following grammar:
98111
99-
default Maybe[DelimiterPair] getDelimiterPair(list[Symbol] _, Symbol _)
100-
= nothing();
112+
```
113+
lexical X = Y;
114+
lexical Y = Y1 | Y2;
115+
lexical Y1 = "[" Z "]";
116+
lexical Y2 = "[" Z ")" [a-z];
117+
lexical Z = [a-z];
118+
```
119+
120+
The unique rightmost delimiter of the `Z` production is `[`. The remaining
121+
productions do not have a unique rightmost delimiter.
122+
123+
The productions do not have a unique leftmost delimiter. In particular, the
124+
`Z` productions has two leftmost delimiters, but not one unique.
125+
}
126+
127+
DelimiterPair getOuterDelimiterPair(Grammar g, Production p)
128+
= <getOuterDelimiterByProduction(g, backward())[p], getOuterDelimiterByProduction(g, forward())[p]>;
129+
130+
@memo
131+
private map[Symbol, Maybe[Symbol]] getOuterDelimiterBySymbol(Grammar g, Direction direction) {
132+
map[Symbol, Maybe[Symbol]] ret = (s: nothing() | /p: prod(_, _, _) := g, s := delabel(p.def));
133+
134+
solve (ret) {
135+
for (s <- ret, ret[s] == nothing()) {
136+
set[Maybe[Symbol]] delimiters = {};
137+
for (prod(def, symbols, _) <- getParents(g, s)) {
138+
if ([*_, /s, *rest] := reorder(symbols, direction) && /s !:= rest) {
139+
// Note: `rest` contains the symbols that follow/precede
140+
// (depending on `direction`) `s` in the parent production
141+
Maybe[Symbol] delimiter = nothing();
142+
for (Symbol s <- rest) {
143+
s = delabel(s);
144+
if (isDelimiter(s)) {
145+
delimiter = just(s);
146+
break;
147+
}
148+
if (isNonTerminalType(s) && d: just(_) := getInnerDelimiterBySymbol(g, direction)[s]) {
149+
delimiter = d;
150+
break;
151+
}
152+
}
153+
delimiters += just(_) := delimiter ? delimiter : ret[delabel(def)];
154+
}
155+
}
156+
ret[s] = unique(delimiters);
157+
}
158+
}
159+
160+
return ret;
161+
}
162+
163+
@memo
164+
private map[Production, Maybe[Symbol]] getOuterDelimiterByProduction(Grammar g, Direction direction) {
165+
map[Symbol, Maybe[Symbol]] m = getOuterDelimiterBySymbol(g, direction);
166+
return (p: m[delabel(p.def)] | /p: prod(_, _, _) := g);
167+
}
168+
169+
private set[Production] getParents(Grammar g, Symbol s)
170+
= {parent | /parent: prod(_, [*_, /s, *_], _) := g, s != delabel(parent.def)};
101171
102172
@synopsis{
103-
Checks if a list contains a delimiter
173+
Returns the single delimiter if set `delimiters` is a singleton. Returns
174+
`nothing()` otherwise.
104175
}
105176
106-
bool containsDelimiter(list[Symbol] symbols)
107-
= any(s <- symbols, isDelimiter(s));
177+
Maybe[Symbol] unique({d: just(Symbol _)}) = d;
178+
179+
default Maybe[Symbol] unique(set[Maybe[Symbol]] _) = nothing();
108180
109181
@synopsis{
110182
Checks if a symbol is a delimiter

rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ module lang::textmate::Conversion
77
import Grammar;
88
import IO;
99
import ParseTree;
10+
import util::Maybe;
1011

1112
import lang::oniguruma::Conversion;
1213
import lang::oniguruma::RegExp;
@@ -21,7 +22,9 @@ alias RscGrammar = Grammar;
2122

2223
data ConversionUnit = unit(
2324
RscGrammar rsc,
24-
Production prod);
25+
Production prod,
26+
DelimiterPair outerDelimiters,
27+
DelimiterPair innerDelimiters);
2528

2629
@synopsis{
2730
Converts Rascal grammar `rsc` to a TextMate grammar
@@ -91,8 +94,8 @@ list[ConversionUnit] analyze(RscGrammar rsc) {
9194
// Define auxiliary predicates
9295
bool isCyclic(Production p, set[Production] ancestors, _)
9396
= p in ancestors;
94-
bool isSingleLine(Production p, _, _)
95-
= !hasNewline(rsc, p);
97+
// bool isSingleLine(Production p, _, _)
98+
// = !hasNewline(rsc, p);
9699
bool isNonEmpty(prod(def, _, _), _, _)
97100
= !tryParse(rsc, delabel(def), "");
98101
bool hasCategory(prod(_, _, attributes), _, _)
@@ -103,17 +106,14 @@ list[ConversionUnit] analyze(RscGrammar rsc) {
103106
Dependencies dependencies = deps(toGraph(rsc));
104107
list[Production] prods = dependencies
105108
.removeProds(isCyclic, true) // `true` means "also remove ancestors"
106-
.filterProds(isSingleLine)
109+
// .filterProds(isSingleLine)
107110
.filterProds(isNonEmpty)
108111
.filterProds(hasCategory)
109112
.getProds();
110113
111114
// Analyze delimiters
112115
println("[LOG] Analyzing delimiters");
113116
set[Symbol] delimiters = {s | /Symbol s := rsc, isDelimiter(delabel(s))};
114-
delimiters -= getStrictPrefixes(delimiters);
115-
delimiters -= {s | prod(_, [s, *_], _) <- prods, isDelimiter(delabel(s))};
116-
delimiters -= {s | prod(def, _, _) <- prods, /s := getDelimiterPairs(rsc, delabel(def))};
117117
list[Production] prodsDelimiters = [prod(lex(DELIMITERS_PRODUCTION_NAME), [\alt(delimiters)], {})];
118118

119119
// Analyze keywords
@@ -124,15 +124,54 @@ list[ConversionUnit] analyze(RscGrammar rsc) {
124124
// Return
125125
bool isEmptyProd(prod(_, [\alt(alternatives)], _)) = alternatives == {};
126126
list[ConversionUnit] units
127-
= [unit(rsc, p) | p <- prodsDelimiters, !isEmptyProd(p)]
128-
+ [unit(rsc, p) | p <- prods]
129-
+ [unit(rsc, p) | p <- prodsKeywords, !isEmptyProd(p)];
127+
= [unit(rsc, p, getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods]
128+
+ [unit(rsc, p, <nothing(), nothing()>, <nothing(), nothing()>) | p <- prodsDelimiters, !isEmptyProd(p)]
129+
+ [unit(rsc, p, <nothing(), nothing()>, <nothing(), nothing()>) | p <- prodsKeywords, !isEmptyProd(p)];
130130
131-
return units;
131+
return sort(units, less);
132132
}
133133
134-
public str DELIMITERS_PRODUCTION_NAME = "$delimiters";
135-
public str KEYWORDS_PRODUCTION_NAME = "$keywords";
134+
private bool less(ConversionUnit u1, ConversionUnit u2) {
135+
136+
Maybe[Symbol] getKey(ConversionUnit u)
137+
= <just(begin), _> := u.outerDelimiters ? just(begin)
138+
: <just(begin), _> := u.innerDelimiters ? just(begin)
139+
: nothing();
140+
141+
Maybe[Symbol] key1 = getKey(u1);
142+
Maybe[Symbol] key2 = getKey(u2);
143+
144+
if (just(begin1) := key1 && just(begin2) := key2) {
145+
if (begin2.string < begin1.string) {
146+
// If `begin2` is a prefix of `begin1`, then the rule for `u1` should be
147+
// tried *before* the rule for `u2` (i.e., `u1` is less than `u2` for
148+
// sorting purposes)
149+
return true;
150+
} else if (begin1.string < begin2.string) {
151+
// Symmetrical case
152+
return false;
153+
} else {
154+
// Otherwise, sort arbitrarily by name and stringified production
155+
return toName(u1.prod.def) + "<u1.prod>" < toName(u2.prod.def) + "<u2.prod>";
156+
}
157+
} else if (nothing() != key1 && nothing() == key2) {
158+
// If `u1` has a `begin` delimiter, but `u2` hasn't, then `u1` is less
159+
// than `u2` for sorting purposes (arbitrarily)
160+
return true;
161+
} else if (nothing() == key1 && nothing() != key2) {
162+
// Symmetrical case
163+
return false;
164+
} else {
165+
// Otherwise, sort arbitrarily by name and stringified production
166+
return toName(u1.prod.def) + "<u1.prod>" < toName(u2.prod.def) + "<u2.prod>";
167+
}
168+
}
169+
170+
public str DELIMITERS_PRODUCTION_NAME = "~delimiters";
171+
public str KEYWORDS_PRODUCTION_NAME = "~keywords";
172+
173+
private bool isSynthetic(Symbol s)
174+
= lex(name) := s && name in {DELIMITERS_PRODUCTION_NAME, KEYWORDS_PRODUCTION_NAME};
136175
137176
@synopsis{
138177
Transforms a list of productions, in the form of conversion units, to a
@@ -166,6 +205,11 @@ TmGrammar transform(list[ConversionUnit] units, NameGeneration nameGeneration =
166205
}
167206
tm = addRule(tm, r);
168207
}
208+
for (name <- tm.repository, tm.repository[name] is beginEnd) {
209+
// Inject top-level patterns into begin/end patterns
210+
TmRule r = tm.repository[name];
211+
tm.repository += (name: r[patterns = r.patterns + tm.patterns - include("#<name>")]);
212+
}
169213
170214
// Return
171215
return tm[patterns = tm.patterns];
@@ -179,7 +223,7 @@ TmRule toTmRule(ConversionUnit u, NameGenerator g)
179223
= toTmRule(u.rsc, u.prod, g(u.prod));
180224
181225
private TmRule toTmRule(RscGrammar rsc, p: prod(def, _, _), str name)
182-
= {<begin, end>} := getDelimiterPairs(rsc, delabel(def)) // TODO: Support non-singleton sets of delimiter pairs
226+
= !isSynthetic(def) && <just(begin), just(end)> := getOuterDelimiterPair(rsc, p)
183227
? toTmRule(toRegExp(rsc, begin), toRegExp(rsc, end), "<begin.string><end.string>", [toTmRule(toRegExp(rsc, p), name)])
184228
: toTmRule(toRegExp(rsc, p), name);
185229

0 commit comments

Comments
 (0)