Skip to content

Commit 6ca99ce

Browse files
Christoph Hellwigkrisman-at-collabora
authored andcommitted
unicode: cache the normalization tables in struct unicode_map
Instead of repeatedly looking up the version add pointers to the NFD and NFD+CF tables to struct unicode_map, and pass a unicode_map plus index to the functions using the normalization tables. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
1 parent fbc59d6 commit 6ca99ce

File tree

5 files changed

+97
-94
lines changed

5 files changed

+97
-94
lines changed

fs/unicode/utf8-core.c

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,13 @@
55
#include <linux/slab.h>
66
#include <linux/parser.h>
77
#include <linux/errno.h>
8-
#include <linux/unicode.h>
98
#include <linux/stringhash.h>
109

1110
#include "utf8n.h"
1211

1312
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
1413
{
15-
const struct utf8data *data = utf8nfdi(um->version);
16-
17-
if (utf8nlen(data, str->name, str->len) < 0)
14+
if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
1815
return -1;
1916
return 0;
2017
}
@@ -23,14 +20,13 @@ EXPORT_SYMBOL(utf8_validate);
2320
int utf8_strncmp(const struct unicode_map *um,
2421
const struct qstr *s1, const struct qstr *s2)
2522
{
26-
const struct utf8data *data = utf8nfdi(um->version);
2723
struct utf8cursor cur1, cur2;
2824
int c1, c2;
2925

30-
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
26+
if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
3127
return -EINVAL;
3228

33-
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
29+
if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
3430
return -EINVAL;
3531

3632
do {
@@ -50,14 +46,13 @@ EXPORT_SYMBOL(utf8_strncmp);
5046
int utf8_strncasecmp(const struct unicode_map *um,
5147
const struct qstr *s1, const struct qstr *s2)
5248
{
53-
const struct utf8data *data = utf8nfdicf(um->version);
5449
struct utf8cursor cur1, cur2;
5550
int c1, c2;
5651

57-
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
52+
if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
5853
return -EINVAL;
5954

60-
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
55+
if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
6156
return -EINVAL;
6257

6358
do {
@@ -81,12 +76,11 @@ int utf8_strncasecmp_folded(const struct unicode_map *um,
8176
const struct qstr *cf,
8277
const struct qstr *s1)
8378
{
84-
const struct utf8data *data = utf8nfdicf(um->version);
8579
struct utf8cursor cur1;
8680
int c1, c2;
8781
int i = 0;
8882

89-
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
83+
if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
9084
return -EINVAL;
9185

9286
do {
@@ -105,11 +99,10 @@ EXPORT_SYMBOL(utf8_strncasecmp_folded);
10599
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
106100
unsigned char *dest, size_t dlen)
107101
{
108-
const struct utf8data *data = utf8nfdicf(um->version);
109102
struct utf8cursor cur;
110103
size_t nlen = 0;
111104

112-
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
105+
if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
113106
return -EINVAL;
114107

115108
for (nlen = 0; nlen < dlen; nlen++) {
@@ -128,12 +121,11 @@ EXPORT_SYMBOL(utf8_casefold);
128121
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
129122
struct qstr *str)
130123
{
131-
const struct utf8data *data = utf8nfdicf(um->version);
132124
struct utf8cursor cur;
133125
int c;
134126
unsigned long hash = init_name_hash(salt);
135127

136-
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
128+
if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
137129
return -EINVAL;
138130

139131
while ((c = utf8byte(&cur))) {
@@ -149,11 +141,10 @@ EXPORT_SYMBOL(utf8_casefold_hash);
149141
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
150142
unsigned char *dest, size_t dlen)
151143
{
152-
const struct utf8data *data = utf8nfdi(um->version);
153144
struct utf8cursor cur;
154145
ssize_t nlen = 0;
155146

156-
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
147+
if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
157148
return -EINVAL;
158149

159150
for (nlen = 0; nlen < dlen; nlen++) {
@@ -180,7 +171,17 @@ struct unicode_map *utf8_load(unsigned int version)
180171
if (!um)
181172
return ERR_PTR(-ENOMEM);
182173
um->version = version;
174+
um->ntab[UTF8_NFDI] = utf8nfdi(version);
175+
if (!um->ntab[UTF8_NFDI])
176+
goto out_free_um;
177+
um->ntab[UTF8_NFDICF] = utf8nfdicf(version);
178+
if (!um->ntab[UTF8_NFDICF])
179+
goto out_free_um;
183180
return um;
181+
182+
out_free_um:
183+
kfree(um);
184+
return ERR_PTR(-EINVAL);
184185
}
185186
EXPORT_SYMBOL(utf8_load);
186187

fs/unicode/utf8-norm.c

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -309,21 +309,19 @@ utf8hangul(const char *str, unsigned char *hangul)
309309
* is well-formed and corresponds to a known unicode code point. The
310310
* shorthand for this will be "is valid UTF-8 unicode".
311311
*/
312-
static utf8leaf_t *utf8nlookup(const struct utf8data *data,
313-
unsigned char *hangul, const char *s, size_t len)
312+
static utf8leaf_t *utf8nlookup(const struct unicode_map *um,
313+
enum utf8_normalization n, unsigned char *hangul, const char *s,
314+
size_t len)
314315
{
315-
utf8trie_t *trie = NULL;
316+
utf8trie_t *trie = utf8data + um->ntab[n]->offset;
316317
int offlen;
317318
int offset;
318319
int mask;
319320
int node;
320321

321-
if (!data)
322-
return NULL;
323322
if (len == 0)
324323
return NULL;
325324

326-
trie = utf8data + data->offset;
327325
node = 1;
328326
while (node) {
329327
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
@@ -385,29 +383,28 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data,
385383
*
386384
* Forwards to utf8nlookup().
387385
*/
388-
static utf8leaf_t *utf8lookup(const struct utf8data *data,
389-
unsigned char *hangul, const char *s)
386+
static utf8leaf_t *utf8lookup(const struct unicode_map *um,
387+
enum utf8_normalization n, unsigned char *hangul, const char *s)
390388
{
391-
return utf8nlookup(data, hangul, s, (size_t)-1);
389+
return utf8nlookup(um, n, hangul, s, (size_t)-1);
392390
}
393391

394392
/*
395393
* Length of the normalization of s, touch at most len bytes.
396394
* Return -1 if s is not valid UTF-8 unicode.
397395
*/
398-
ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
396+
ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
397+
const char *s, size_t len)
399398
{
400399
utf8leaf_t *leaf;
401400
size_t ret = 0;
402401
unsigned char hangul[UTF8HANGULLEAF];
403402

404-
if (!data)
405-
return -1;
406403
while (len && *s) {
407-
leaf = utf8nlookup(data, hangul, s, len);
404+
leaf = utf8nlookup(um, n, hangul, s, len);
408405
if (!leaf)
409406
return -1;
410-
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
407+
if (utf8agetab[LEAF_GEN(leaf)] > um->ntab[n]->maxage)
411408
ret += utf8clen(s);
412409
else if (LEAF_CCC(leaf) == DECOMPOSE)
413410
ret += strlen(LEAF_STR(leaf));
@@ -430,14 +427,13 @@ EXPORT_SYMBOL(utf8nlen);
430427
*
431428
* Returns -1 on error, 0 on success.
432429
*/
433-
int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
434-
const char *s, size_t len)
430+
int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
431+
enum utf8_normalization n, const char *s, size_t len)
435432
{
436-
if (!data)
437-
return -1;
438433
if (!s)
439434
return -1;
440-
u8c->data = data;
435+
u8c->um = um;
436+
u8c->n = n;
441437
u8c->s = s;
442438
u8c->p = NULL;
443439
u8c->ss = NULL;
@@ -512,9 +508,9 @@ int utf8byte(struct utf8cursor *u8c)
512508

513509
/* Look up the data for the current character. */
514510
if (u8c->p) {
515-
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
511+
leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
516512
} else {
517-
leaf = utf8nlookup(u8c->data, u8c->hangul,
513+
leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul,
518514
u8c->s, u8c->len);
519515
}
520516

@@ -524,7 +520,8 @@ int utf8byte(struct utf8cursor *u8c)
524520

525521
ccc = LEAF_CCC(leaf);
526522
/* Characters that are too new have CCC 0. */
527-
if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
523+
if (utf8agetab[LEAF_GEN(leaf)] >
524+
u8c->um->ntab[u8c->n]->maxage) {
528525
ccc = STOPPER;
529526
} else if (ccc == DECOMPOSE) {
530527
u8c->len -= utf8clen(u8c->s);
@@ -538,7 +535,7 @@ int utf8byte(struct utf8cursor *u8c)
538535
goto ccc_mismatch;
539536
}
540537

541-
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
538+
leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
542539
if (!leaf)
543540
return -1;
544541
ccc = LEAF_CCC(leaf);
@@ -611,7 +608,6 @@ const struct utf8data *utf8nfdi(unsigned int maxage)
611608
return NULL;
612609
return &utf8nfdidata[i];
613610
}
614-
EXPORT_SYMBOL(utf8nfdi);
615611

616612
const struct utf8data *utf8nfdicf(unsigned int maxage)
617613
{
@@ -623,4 +619,3 @@ const struct utf8data *utf8nfdicf(unsigned int maxage)
623619
return NULL;
624620
return &utf8nfdicfdata[i];
625621
}
626-
EXPORT_SYMBOL(utf8nfdicf);

0 commit comments

Comments
 (0)