Skip to content

Commit 56662a5

Browse files
authored
Merge pull request #4815 from ScottDennison/next
Reduces target language dependence of Antlr grammar (also adds Java target, to help debugging the grammar with Java+Antlr tools)
2 parents c337646 + 1f074c3 commit 56662a5

File tree

20 files changed

+616
-15
lines changed

20 files changed

+616
-15
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,6 @@ CodeGraphData/
183183
/Rubberduck.Deployment/Properties/launchSettings.json
184184
/Rubberduck.Deployment/Rubberduck.API.idl
185185
/Rubberduck.Deployment/Rubberduck.idl
186+
187+
#Gradle
188+
/.gradle/
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
using Antlr4.Runtime;
2+
3+
namespace Rubberduck.Parsing.Grammar
4+
{
5+
public abstract class VBABaseLexer : Lexer
6+
{
7+
public VBABaseLexer(ICharStream input) : base(input) { }
8+
9+
#region Semantic predicate helper methods
10+
protected int CharAtRelativePosition(int i)
11+
{
12+
return _input.La(i);
13+
}
14+
15+
protected bool IsChar(int actual, char expected)
16+
{
17+
return (char)actual == expected;
18+
}
19+
20+
protected bool IsChar(int actual, params char[] expectedOptions)
21+
{
22+
char actualAsChar = (char)actual;
23+
foreach (char expected in expectedOptions)
24+
{
25+
if (actualAsChar == expected)
26+
{
27+
return true;
28+
}
29+
}
30+
return false;
31+
}
32+
#endregion
33+
}
34+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
using Antlr4.Runtime;
2+
using System;
3+
using System.Text.RegularExpressions;
4+
5+
namespace Rubberduck.Parsing.Grammar
6+
{
7+
public abstract class VBABaseParser : Parser
8+
{
9+
public VBABaseParser(ITokenStream input) : base(input) { }
10+
11+
#region Semantic predicate helper methods
12+
protected int TokenTypeAtRelativePosition(int i)
13+
{
14+
return _input.La(i);
15+
}
16+
17+
protected IToken TokenAtRelativePosition(int i)
18+
{
19+
return _input.Lt(i);
20+
}
21+
22+
protected string TextOf(IToken token)
23+
{
24+
return token.Text;
25+
}
26+
27+
protected bool MatchesRegex(string text, string pattern)
28+
{
29+
return Regex.Match(text,pattern).Success;
30+
}
31+
32+
protected bool EqualsStringIgnoringCase(string actual, string expected)
33+
{
34+
return actual.Equals(expected,StringComparison.OrdinalIgnoreCase);
35+
}
36+
37+
protected bool EqualsStringIgnoringCase(string actual, params string[] expectedOptions)
38+
{
39+
foreach (string expected in expectedOptions)
40+
{
41+
if (actual.Equals(expected,StringComparison.OrdinalIgnoreCase))
42+
{
43+
return true;
44+
}
45+
}
46+
return false;
47+
}
48+
49+
protected bool EqualsString(string actual, string expected)
50+
{
51+
return actual.Equals(expected,StringComparison.Ordinal);
52+
}
53+
54+
protected bool EqualsString(string actual, params string[] expectedOptions)
55+
{
56+
foreach (string expected in expectedOptions)
57+
{
58+
if (actual.Equals(expected,StringComparison.Ordinal))
59+
{
60+
return true;
61+
}
62+
}
63+
return false;
64+
}
65+
66+
protected bool IsTokenType(int actual, params int[] expectedOptions)
67+
{
68+
foreach (int expected in expectedOptions)
69+
{
70+
if (actual == expected)
71+
{
72+
return true;
73+
}
74+
}
75+
return false;
76+
}
77+
#endregion
78+
}
79+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
using Antlr4.Runtime;
2+
using System;
3+
using System.Text.RegularExpressions;
4+
5+
namespace Rubberduck.Parsing.Grammar
6+
{
7+
// Currently this class does nothing, except allow other languages/implementations to define a custom contextSuperclass without having to change the grammar.
8+
public abstract class VBABaseParserRuleContext : ParserRuleContext
9+
{
10+
public VBABaseParserRuleContext() : base() { }
11+
12+
public VBABaseParserRuleContext(Antlr4.Runtime.ParserRuleContext parent, int invokingStateNumber) : base(parent, invokingStateNumber) { }
13+
}
14+
}

Rubberduck.Parsing/Grammar/VBALexer.g4

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
lexer grammar VBALexer;
1919

20+
options {
21+
superClass = VBABaseLexer;
22+
contextSuperClass = VBABaseParser;
23+
}
2024

2125
ABS : A B S;
2226
ANY : A N Y;
@@ -305,11 +309,7 @@ IDENTIFIER : ~[[\](){}\r\n\t.,'"|!@#$%^&*\-+:=; 0-9-/\\-] ~[[\](){}\r\n\t.,'"|!
305309
LINE_CONTINUATION : [ \t]+ UNDERSCORE [ \t]* '\r'? '\n' WS_NOT_FOLLOWED_BY_LINE_CONTINUATION*;
306310
// The following rule is needed in order to capture hex literals without format prefixes which start with a digit. Needed for VBForm resources.
307311
BARE_HEX_LITERAL : [0-9] [0-9a-fA-F]*;
308-
fragment WS_NOT_FOLLOWED_BY_LINE_CONTINUATION : [ \t] {(char)_input.La(1) != '_'
309-
|| ((char)_input.La(2) != '\r'
310-
&& (char)_input.La(2) != '\n'
311-
&& (char)_input.La(2) != '\t'
312-
&& (char)_input.La(2) != ' ')}?;
312+
fragment WS_NOT_FOLLOWED_BY_LINE_CONTINUATION : [ \t] {!IsChar(CharAtRelativePosition(1),'_') || !IsChar(CharAtRelativePosition(2),'\r','\n','\t',' ')}?;
313313
fragment LETTER : [a-zA-Z_äöüÄÖÜ];
314314
fragment DIGIT : [0-9];
315315
fragment LETTERORDIGIT : [a-zA-Z0-9_äöüÄÖÜ];

Rubberduck.Parsing/Grammar/VBAParser.g4

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919

2020
parser grammar VBAParser;
2121

22-
options { tokenVocab = VBALexer; }
23-
24-
@header { using System.Text.RegularExpressions; }
22+
options {
23+
tokenVocab = VBALexer;
24+
superClass = VBABaseParser;
25+
contextSuperClass = VBABaseParserRuleContext;
26+
}
2527

2628
startRule : module EOF;
2729

@@ -321,14 +323,14 @@ defType :
321323
// singleLetter must appear at the end to prevent premature bailout
322324
letterSpec : universalLetterRange | letterRange | singleLetter;
323325

324-
singleLetter : {_input.Lt(1).Text.Length == 1 && Regex.Match(_input.Lt(1).Text, @"[a-zA-Z]").Success}? IDENTIFIER;
326+
singleLetter : {MatchesRegex(TextOf(TokenAtRelativePosition(1)),"^[a-zA-Z]$")}? IDENTIFIER;
325327

326328
// We make a separate universalLetterRange rule because it is treated specially in VBA. This makes it easy for users of the parser
327329
// to identify this case. Quoting MS VBAL:
328330
// "A <universal-letter-range> defines a single implicit declared type for every <IDENTIFIER> within
329331
// a module, even those with a first character that would otherwise fall outside this range if it was
330332
// interpreted as a <letter-range> from A-Z.""
331-
universalLetterRange : {_input.Lt(1).Text.Equals("A") && _input.Lt(3).Text.Equals("Z")}? IDENTIFIER MINUS IDENTIFIER;
333+
universalLetterRange : {EqualsString(TextOf(TokenAtRelativePosition(1)),"A") && EqualsString(TextOf(TokenAtRelativePosition(3)),"Z")}? IDENTIFIER MINUS IDENTIFIER;
332334

333335
letterRange : singleLetter MINUS singleLetter;
334336

@@ -571,22 +573,22 @@ circleSpecialForm : (expression whiteSpace? DOT whiteSpace?)? CIRCLE whiteSpace
571573
scaleSpecialForm : (expression whiteSpace? DOT whiteSpace?)? SCALE whiteSpace tuple whiteSpace? MINUS whiteSpace? tuple;
572574
pSetSpecialForm : (expression whiteSpace? DOT whiteSpace?)? PSET (whiteSpace STEP)? whiteSpace? tuple whiteSpace? (COMMA whiteSpace? expression)?;
573575
tuple : LPAREN whiteSpace? expression whiteSpace? COMMA whiteSpace? expression whiteSpace? RPAREN;
574-
lineSpecialFormOption : {_input.Lt(1).Text.ToLower().Equals("b") || _input.Lt(1).Text.ToLower().Equals("bf")}? unrestrictedIdentifier;
576+
lineSpecialFormOption : {EqualsStringIgnoringCase(TextOf(TokenAtRelativePosition(1)),"b","bf")}? unrestrictedIdentifier;
575577

576578
subscripts : subscript (whiteSpace? COMMA whiteSpace? subscript)*;
577579

578580
subscript : (expression whiteSpace TO whiteSpace)? expression;
579581

580582
unrestrictedIdentifier : identifier | statementKeyword | markerKeyword;
581-
legalLabelIdentifier : { !(new[]{DOEVENTS,END,CLOSE,ELSE,LOOP,NEXT,RANDOMIZE,REM,RESUME,RETURN,STOP,WEND}).Contains(_input.La(1))}? identifier | markerKeyword;
583+
legalLabelIdentifier : { !IsTokenType(TokenTypeAtRelativePosition(1),DOEVENTS,END,CLOSE,ELSE,LOOP,NEXT,RANDOMIZE,REM,RESUME,RETURN,STOP,WEND)}? identifier | markerKeyword;
582584
//The predicate in the following rule has been introduced to lessen the problem that VBA uses the same characters used as type hints in other syntactical constructs,
583585
//e.g. in the bang notation (see withDictionaryAccessExpr). Generally, it is not legal to have an identifier or opening bracket follow immediately after a type hint.
584586
//The first part of the predicate tries to exclude these two situations. Unfortunately, predicates have to be at the start of a rule. So, an assumption about the number
585587
//of tokens in the identifier is made. All untypedIdentifers not a foreignNames consist of exactly one token and a typedIdentifier is an untyped one followed by a typeHint,
586588
//again a single token. So, in the majority of situations, the third token is the token following the potential type hint.
587589
//For foreignNames, no assumption can be made because they consist of a pair of brackets containing arbitrarily many tokens.
588590
//That is why the second part of the predicate looks at the first character in order to determine whether the identifier is a foreignName.
589-
identifier : {_input.La(3) != IDENTIFIER && _input.La(3) != L_SQUARE_BRACKET || _input.La(1) == L_SQUARE_BRACKET}? typedIdentifier
591+
identifier : {!IsTokenType(TokenTypeAtRelativePosition(3),IDENTIFIER,L_SQUARE_BRACKET) || IsTokenType(TokenTypeAtRelativePosition(1),L_SQUARE_BRACKET)}? typedIdentifier
590592
| untypedIdentifier;
591593
untypedIdentifier : identifierValue;
592594
typedIdentifier : untypedIdentifier typeHint;
@@ -614,7 +616,7 @@ complexType :
614616
fieldLength : MULT whiteSpace? (numberLiteral | identifierValue);
615617

616618
//Statement labels can only appear at the start of a line.
617-
statementLabelDefinition : {_input.La(-1) == NEWLINE || _input.La(-1) == LINE_CONTINUATION}? (combinedLabels | identifierStatementLabel | standaloneLineNumberLabel);
619+
statementLabelDefinition : {IsTokenType(TokenTypeAtRelativePosition(-1),NEWLINE,LINE_CONTINUATION)}? (combinedLabels | identifierStatementLabel | standaloneLineNumberLabel);
618620
identifierStatementLabel : legalLabelIdentifier whiteSpace? COLON;
619621
standaloneLineNumberLabel :
620622
lineNumberLabel whiteSpace? COLON

appveyor.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ platform: Any CPU
3737
# cache the nuget packages unless something changed there
3838
cache:
3939
- packages/ -> **/packages.config
40+
- '%USERPROFILE%/.gradle/wrapper/dists'
4041

4142
install:
42-
set PATH=C:\Program Files (x86)\MSBuild\15.0\Bin;%PATH%
43+
set PATH=C:\Program Files (x86)\MSBuild\15.0\Bin;C:\Program Files (x86)\Java\jdk1.8.0;%PATH%
4344

4445
# patch version specifiers in the base project
4546
dotnet_csproj:
@@ -51,6 +52,7 @@ dotnet_csproj:
5152

5253

5354
before_build:
55+
- development/java/Rubberduck.Parsing/Grammar/gradlew.bat -p development/java/Rubberduck.Parsing/Grammar clean build
5456
- cinst innosetup
5557
- cinst codecov
5658
- cinst opencover.portable
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
2+
# Created by https://www.gitignore.io/api/git,gradle
3+
# Edit at https://www.gitignore.io/?templates=git,gradle
4+
5+
### Git ###
6+
# Created by git for backups. To disable backups in Git:
7+
# $ git config --global mergetool.keepBackup false
8+
*.orig
9+
10+
# Created by git when using merge tools for conflicts
11+
*.BACKUP.*
12+
*.BASE.*
13+
*.LOCAL.*
14+
*.REMOTE.*
15+
*_BACKUP_*.txt
16+
*_BASE_*.txt
17+
*_LOCAL_*.txt
18+
*_REMOTE_*.txt
19+
20+
### Gradle ###
21+
.gradle
22+
build/
23+
24+
# Ignore Gradle GUI config
25+
gradle-app.setting
26+
27+
# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
28+
!gradle-wrapper.jar
29+
30+
# Cache of project
31+
.gradletasknamecache
32+
33+
# # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
34+
# gradle/wrapper/gradle-wrapper.properties
35+
36+
### Gradle Patch ###
37+
**/build/
38+
39+
# End of https://www.gitignore.io/api/git,gradle
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
buildscript {
2+
project.ext.antlrVersion = '4.7.2'
3+
4+
repositories {
5+
mavenCentral()
6+
}
7+
8+
dependencies {
9+
classpath group: 'org.antlr', name: 'antlr4', version: "${project.ext.antlrVersion}"
10+
}
11+
}
12+
13+
plugins {
14+
id 'java'
15+
}
16+
17+
def envVersion = System.getenv("APPVEYOR_VERSION")
18+
19+
group 'com.rubberduckvba.rubberduck.parsing'
20+
version envVersion==null?'snapshot':envVersion
21+
22+
repositories {
23+
mavenCentral()
24+
}
25+
26+
dependencies {
27+
compile group: 'org.antlr', name: 'antlr4-runtime', version: "${project.ext.antlrVersion}"
28+
}
29+
30+
def grammarCodeGenDest = "${projectDir}/src/main/gen"
31+
def grammarSource = "${projectDir}/../../../../Rubberduck.Parsing/Grammar/"
32+
def grammarDest = "${projectDir}/src/main/antlr/com/rubberduckvba/rubberduck/parsing/grammar"
33+
34+
sourceSets {
35+
main {
36+
java {
37+
srcDir "${grammarCodeGenDest}"
38+
}
39+
}
40+
}
41+
42+
task copyGrammarFiles(type: Copy) {
43+
from grammarSource
44+
into grammarDest
45+
include "VBALexer.g4"
46+
include "VBAParser.g4"
47+
}
48+
49+
task generateGrammarSources(type: JavaExec) {
50+
main 'org.antlr.v4.Tool'
51+
classpath = buildscript.configurations.classpath
52+
args "-o", "${grammarCodeGenDest}", "-visitor", "-package", "com.rubberduckvba.rubberduck.parsing.grammar", "${grammarDest}/VBALexer.g4", "${grammarDest}/VBAParser.g4"
53+
}
54+
55+
generateGrammarSources.dependsOn copyGrammarFiles
56+
build.dependsOn generateGrammarSources
Binary file not shown.

0 commit comments

Comments
 (0)