Skip to content

Commit 69d2db6

Browse files
committed
Added regex support to the tokenizer and syntax definition.
1 parent 4571cb2 commit 69d2db6

File tree

7 files changed

+71
-28
lines changed

7 files changed

+71
-28
lines changed

include/eepp/system/luapattern.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ class EE_API LuaPattern : public PatternMatcher {
3333

3434
const std::string_view& getPattern() const { return mPattern; }
3535

36+
virtual bool isValid() const { return true; }
37+
3638
protected:
3739
std::string_view mPattern;
3840
mutable size_t mMatchNum;

include/eepp/system/patternmatcher.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ class EE_API PatternMatcher {
128128

129129
virtual const size_t& getNumMatches() const = 0;
130130

131+
virtual bool isValid() const = 0;
132+
131133
protected:
132134
PatternType mType;
133135
};

include/eepp/system/regex.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class EE_API RegEx : public PatternMatcher {
6464

6565
virtual ~RegEx();
6666

67-
bool isValid() const { return mValid; }
67+
virtual bool isValid() const override { return mValid; }
6868

6969
virtual bool matches( const char* stringSearch, int stringStartOffset,
7070
PatternMatcher::Range* matchList, size_t stringLength ) const override;

include/eepp/ui/doc/syntaxdefinition.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,19 @@ struct EE_API SyntaxPattern {
3737
std::vector<std::string> typesNames;
3838
std::string syntax{ "" };
3939
DynamicSyntax dynSyntax;
40+
bool isRegEx{ false };
4041

4142
SyntaxPattern( std::vector<std::string>&& _patterns, const std::string& _type,
42-
const std::string& _syntax = "" );
43+
const std::string& _syntax = "", bool isRegEx = false );
4344

4445
SyntaxPattern( std::vector<std::string>&& _patterns, std::vector<std::string>&& _types,
45-
const std::string& _syntax = "" );
46+
const std::string& _syntax = "", bool isRegEx = false );
4647

4748
SyntaxPattern( std::vector<std::string>&& _patterns, const std::string& _type,
48-
DynamicSyntax&& _syntax );
49+
DynamicSyntax&& _syntax, bool isRegEx = false );
4950

5051
SyntaxPattern( std::vector<std::string>&& _patterns, std::vector<std::string>&& _types,
51-
DynamicSyntax&& _syntax );
52+
DynamicSyntax&& _syntax, bool isRegEx = false );
5253

5354
bool hasSyntax() const { return !syntax.empty() || dynSyntax; }
5455
};

src/eepp/ui/doc/syntaxdefinition.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -255,38 +255,44 @@ const String::HashType& SyntaxDefinition::getLanguageId() const {
255255
}
256256

257257
SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns, const std::string& _type,
258-
const std::string& _syntax ) :
258+
const std::string& _syntax, bool isRegEx ) :
259259
patterns( std::move( _patterns ) ),
260260
types( toSyntaxStyleTypeV( std::vector<std::string>{ _type } ) ),
261261
typesNames( { _type } ),
262-
syntax( _syntax ) {
262+
syntax( _syntax ),
263+
isRegEx( isRegEx ) {
263264
updateCache<SyntaxStyleType>( *this );
264265
}
265266

266267
SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
267-
std::vector<std::string>&& _types, const std::string& _syntax ) :
268+
std::vector<std::string>&& _types, const std::string& _syntax,
269+
bool isRegEx ) :
268270
patterns( std::move( _patterns ) ),
269271
types( toSyntaxStyleTypeV( _types ) ),
270272
typesNames( std::move( _types ) ),
271-
syntax( _syntax ) {
273+
syntax( _syntax ),
274+
isRegEx( isRegEx ) {
272275
updateCache<SyntaxStyleType>( *this );
273276
}
274277

275278
SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns, const std::string& _type,
276-
DynamicSyntax&& _syntax ) :
279+
DynamicSyntax&& _syntax, bool isRegEx ) :
277280
patterns( std::move( _patterns ) ),
278281
types( toSyntaxStyleTypeV( std::vector<std::string>{ _type } ) ),
279282
typesNames( { _type } ),
280-
dynSyntax( std::move( _syntax ) ) {
283+
dynSyntax( std::move( _syntax ) ),
284+
isRegEx( isRegEx ) {
281285
updateCache<SyntaxStyleType>( *this );
282286
}
283287

284288
SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
285-
std::vector<std::string>&& _types, DynamicSyntax&& _syntax ) :
289+
std::vector<std::string>&& _types, DynamicSyntax&& _syntax,
290+
bool isRegEx ) :
286291
patterns( std::move( _patterns ) ),
287292
types( toSyntaxStyleTypeV( _types ) ),
288293
typesNames( std::move( _types ) ),
289-
dynSyntax( std::move( _syntax ) ) {
294+
dynSyntax( std::move( _syntax ) ),
295+
isRegEx( isRegEx ) {
290296
updateCache<SyntaxStyleType>( *this );
291297
}
292298

src/eepp/ui/doc/syntaxdefinitionmanager.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,11 @@ static json toJson( const SyntaxDefinition& def ) {
233233
j["patterns"] = json::array();
234234
for ( const auto& ptrn : def.getPatterns() ) {
235235
json pattern;
236+
auto ptrnType = ptrn.isRegEx ? "regex" : "pattern";
236237
if ( ptrn.patterns.size() == 1 ) {
237-
pattern["pattern"] = ptrn.patterns[0];
238+
pattern[ptrnType] = ptrn.patterns[0];
238239
} else {
239-
pattern["pattern"] = ptrn.patterns;
240+
pattern[ptrnType] = ptrn.patterns;
240241
}
241242
if ( ptrn.typesNames.size() == 1 ) {
242243
pattern["type"] = ptrn.typesNames[0];
@@ -352,9 +353,13 @@ namespace EE { namespace UI { namespace Doc { namespace Language {
352353
buf += join( def.getFiles() ) + ",\n";
353354
// patterns
354355
buf += "{\n";
355-
for ( const auto& pattern : def.getPatterns() )
356+
for ( const auto& pattern : def.getPatterns() ) {
356357
buf += "{ " + join( pattern.patterns ) + ", " + join( pattern.typesNames, true, true ) +
357-
str( pattern.syntax, ", ", "", false ) + " },\n";
358+
str( pattern.syntax, ", ", "", false );
359+
if ( pattern.isRegEx )
360+
buf += ", true";
361+
buf += " },\n";
362+
}
358363
buf += "\n},\n";
359364
// symbols
360365
buf += "{\n";
@@ -521,6 +526,7 @@ static SyntaxDefinition loadLanguage( const nlohmann::json& json ) {
521526
? ""
522527
: pattern.value( "syntax", "" );
523528
std::vector<std::string> ptrns;
529+
bool isRegEx = false;
524530
if ( pattern.contains( "pattern" ) ) {
525531
if ( pattern["pattern"].is_array() ) {
526532
const auto& ptrnIt = pattern["pattern"];
@@ -529,8 +535,18 @@ static SyntaxDefinition loadLanguage( const nlohmann::json& json ) {
529535
} else if ( pattern["pattern"].is_string() ) {
530536
ptrns.emplace_back( pattern["pattern"] );
531537
}
538+
} else if ( pattern.contains( "regex" ) ) {
539+
isRegEx = true;
540+
if ( pattern["regex"].is_array() ) {
541+
const auto& ptrnIt = pattern["regex"];
542+
for ( const auto& ptrn : ptrnIt )
543+
ptrns.emplace_back( ptrn );
544+
} else if ( pattern["regex"].is_string() ) {
545+
ptrns.emplace_back( pattern["regex"] );
546+
}
532547
}
533-
def.addPattern( SyntaxPattern( std::move( ptrns ), std::move( type ), syntax ) );
548+
def.addPattern(
549+
SyntaxPattern( std::move( ptrns ), std::move( type ), syntax, isRegEx ) );
534550
}
535551
}
536552
if ( json.contains( "symbols" ) ) {

src/eepp/ui/doc/syntaxtokenizer.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#include <eepp/system/log.hpp>
22
#include <eepp/system/luapattern.hpp>
3+
#include <eepp/system/regex.hpp>
34
#include <eepp/ui/doc/syntaxdefinitionmanager.hpp>
45
#include <eepp/ui/doc/syntaxtokenizer.hpp>
6+
#include <variant>
57

68
using namespace EE::System;
79

@@ -84,7 +86,8 @@ static void pushToken( std::vector<T>& tokens, const SyntaxStyleType& type,
8486
}
8587
}
8688

87-
bool isScaped( const std::string& text, const size_t& startIndex, const std::string& escapeStr ) {
89+
static bool isScaped( const std::string& text, const size_t& startIndex,
90+
const std::string& escapeStr ) {
8891
char escapeByte = escapeStr.empty() ? '\\' : escapeStr[0];
8992
int count = 0;
9093
for ( int i = startIndex - 1; i >= 0; i-- ) {
@@ -95,12 +98,17 @@ bool isScaped( const std::string& text, const size_t& startIndex, const std::str
9598
return count % 2 == 1;
9699
}
97100

98-
std::pair<int, int> findNonEscaped( const std::string& text, const std::string& pattern, int offset,
99-
const std::string& escapeStr ) {
101+
static std::pair<int, int> findNonEscaped( const std::string& text, const std::string& pattern,
102+
int offset, const std::string& escapeStr,
103+
bool isRegEx ) {
100104
eeASSERT( !pattern.empty() );
101105
if ( pattern.empty() )
102106
return std::make_pair( -1, -1 );
103-
LuaPattern words( pattern );
107+
std::variant<RegEx, LuaPattern> wordsVar =
108+
isRegEx ? std::variant<RegEx, LuaPattern>( RegEx( pattern ) )
109+
: std::variant<RegEx, LuaPattern>( LuaPattern( pattern ) );
110+
PatternMatcher& words =
111+
std::visit( []( auto& patternType ) -> PatternMatcher& { return patternType; }, wordsVar );
104112
int start, end;
105113
while ( words.find( text, start, end, offset ) ) {
106114
if ( !escapeStr.empty() && isScaped( text, start, escapeStr ) ) {
@@ -202,9 +210,9 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
202210
if ( curState.currentPatternIdx != SYNTAX_TOKENIZER_STATE_NONE ) {
203211
const SyntaxPattern& pattern =
204212
curState.currentSyntax->getPatterns()[curState.currentPatternIdx - 1];
205-
std::pair<int, int> range =
206-
findNonEscaped( text, pattern.patterns[1], i,
207-
pattern.patterns.size() >= 3 ? pattern.patterns[2] : "" );
213+
std::pair<int, int> range = findNonEscaped(
214+
text, pattern.patterns[1], i,
215+
pattern.patterns.size() >= 3 ? pattern.patterns[2] : "", pattern.isRegEx );
208216

209217
bool skip = false;
210218

@@ -213,7 +221,8 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
213221
findNonEscaped( text, curState.subsyntaxInfo->patterns[1], i,
214222
curState.subsyntaxInfo->patterns.size() >= 3
215223
? curState.subsyntaxInfo->patterns[2]
216-
: "" );
224+
: "",
225+
pattern.isRegEx );
217226

218227
if ( rangeSubsyntax.first != -1 &&
219228
( range.first == -1 || rangeSubsyntax.first < range.first ) ) {
@@ -249,7 +258,8 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
249258
std::pair<int, int> rangeSubsyntax = findNonEscaped(
250259
text, "^" + curState.subsyntaxInfo->patterns[1], i,
251260
curState.subsyntaxInfo->patterns.size() >= 3 ? curState.subsyntaxInfo->patterns[2]
252-
: "" );
261+
: "",
262+
curState.subsyntaxInfo->isRegEx );
253263

254264
if ( rangeSubsyntax.first != -1 ) {
255265
if ( !skipSubSyntaxSeparator ) {
@@ -270,7 +280,13 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
270280
continue;
271281
patternStr =
272282
pattern.patterns[0][0] == '^' ? pattern.patterns[0] : "^" + pattern.patterns[0];
273-
LuaPattern words( patternStr );
283+
std::variant<RegEx, LuaPattern> wordsVar =
284+
pattern.isRegEx ? std::variant<RegEx, LuaPattern>( RegEx( patternStr ) )
285+
: std::variant<RegEx, LuaPattern>( LuaPattern( patternStr ) );
286+
PatternMatcher& words = std::visit(
287+
[]( auto& patternType ) -> PatternMatcher& { return patternType; }, wordsVar );
288+
if ( !words.isValid() ) // Skip invalid patterns
289+
continue;
274290
if ( words.matches( text, matches, i ) && ( numMatches = words.getNumMatches() ) > 0 ) {
275291
if ( numMatches > 1 ) {
276292
int patternMatchStart = matches[0].start;

0 commit comments

Comments
 (0)