Skip to content

Commit faadc02

Browse files
authored
Support unicode letters (#10)
* Added initial support for Unicode letters * Updated unit tests with Unicode characters
1 parent 5b28e40 commit faadc02

File tree

4 files changed

+808
-434
lines changed

4 files changed

+808
-434
lines changed

src/main/kotlin/RegexBuilder.kt

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -180,52 +180,52 @@ class RegexBuilder {
180180
fun nonDigit(quantifier: RegexQuantifier? = null) = append("\\D", quantifier)
181181

182182
/**
183-
* Add an element to match any letter in the Roman alphabet (a-z, A-Z)
183+
* Add an element to match any Unicode letter.
184184
*
185185
* @param quantifier Quantifier to apply to this element
186186
* @return The current [RegexBuilder] object, for method chaining
187187
*/
188-
fun letter(quantifier: RegexQuantifier? = null) = append("[a-zA-Z]", quantifier)
188+
fun letter(quantifier: RegexQuantifier? = null) = append("\\p{L}", quantifier)
189189

190190
/**
191-
* Add an element to match any character that is not a letter in the Roman alphabet (a-z, A-Z)
191+
* Add an element to match any character that is not a Unicode letter.
192192
*
193193
* @param quantifier Quantifier to apply to this element
194194
* @return The current [RegexBuilder] object, for method chaining
195195
*/
196-
fun nonLetter(quantifier: RegexQuantifier? = null) = append("[^a-zA-Z]", quantifier)
196+
fun nonLetter(quantifier: RegexQuantifier? = null) = append("\\P{L}", quantifier)
197197

198198
/**
199-
* Add an element to match any upper-case letter in the Roman alphabet (A-Z).
199+
* Add an element to match any upper-case Unicode letter.
200200
*
201201
* @param quantifier Quantifier to apply to this element
202202
* @return The current [RegexBuilder] object, for method chaining
203203
*/
204-
fun uppercaseLetter(quantifier: RegexQuantifier? = null) = append("[A-Z]", quantifier)
204+
fun uppercaseLetter(quantifier: RegexQuantifier? = null) = append("\\p{Lu}", quantifier)
205205

206206
/**
207-
* Add an element to match any lowercase letter in the Roman alphabet (a-z)
207+
* Add an element to match any lowercase Unicode letter.
208208
*
209209
* @param quantifier Quantifier to apply to this element
210210
* @return The current [RegexBuilder] object, for method chaining
211211
*/
212-
fun lowercaseLetter(quantifier: RegexQuantifier? = null) = append("[a-z]", quantifier)
212+
fun lowercaseLetter(quantifier: RegexQuantifier? = null) = append("\\p{Ll}", quantifier)
213213

214214
/**
215-
* Add an element to match any letter in the Roman alphabet or decimal digit (a-z, A-Z, 0-9)
215+
* Add an element to match any Unicode letter or decimal digit.
216216
*
217217
* @param quantifier Quantifier to apply to this element
218218
* @return The current [RegexBuilder] object, for method chaining
219219
*/
220-
fun letterOrDigit(quantifier: RegexQuantifier? = null) = append("[a-zA-Z0-9]", quantifier)
220+
fun letterOrDigit(quantifier: RegexQuantifier? = null) = append("[\\p{L}0-9]", quantifier)
221221

222222
/**
223-
* Add an element to match any character that is not letter in the Roman alphabet or a decimal digit (a-z, A-Z, 0-9)
223+
* Add an element to match any character that is not a Unicode letter or a decimal digit.
224224
*
225225
* @param quantifier Quantifier to apply to this element
226226
* @return The current [RegexBuilder] object, for method chaining
227227
*/
228-
fun nonLetterOrDigit(quantifier: RegexQuantifier? = null) = append("[^a-zA-Z0-9]", quantifier)
228+
fun nonLetterOrDigit(quantifier: RegexQuantifier? = null) = append("[^\\p{L}0-9]", quantifier)
229229

230230
/**
231231
* Add an element to match any hexadecimal digit (a-f, A-F, 0-9)
@@ -260,21 +260,20 @@ class RegexBuilder {
260260
fun nonHexDigit(quantifier: RegexQuantifier? = null) = append("[^0-9A-Fa-f]", quantifier)
261261

262262
/**
263-
* Add an element to match any Roman alphabet letter, decimal digit, or underscore (a-z, A-Z, 0-9, _)
263+
* Add an element to match any Unicode letter, decimal digit or underscore
264264
*
265265
* @param quantifier Quantifier to apply to this element
266266
* @return The current [RegexBuilder] object, for method chaining
267267
*/
268-
fun wordCharacter(quantifier: RegexQuantifier? = null) = append("\\w", quantifier)
268+
fun wordCharacter(quantifier: RegexQuantifier? = null) = append("[\\p{L}0-9_]", quantifier)
269269

270270
/**
271-
* Add an element to match any character that is not a Roman alphabet letter, decimal digit, or underscore
272-
* (a-z, A-Z, 0-9, _)
271+
* Add an element to match any character that is not a Unicode letter, decimal digit or underscore
273272
*
274273
* @param quantifier Quantifier to apply to this element
275274
* @return The current [RegexBuilder] object, for method chaining
276275
*/
277-
fun nonWordCharacter(quantifier: RegexQuantifier? = null) = append("\\W", quantifier)
276+
fun nonWordCharacter(quantifier: RegexQuantifier? = null) = append("[^\\p{L}0-9_]", quantifier)
278277

279278
/**
280279
* Add an element (a character class) to match any of the characters provided.

0 commit comments

Comments
 (0)