Skip to content

Commit 6661224

Browse files
committed
Merge tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode
Pull unicode updates from Gabriel Krisman Bertazi: "This includes patches from Christoph Hellwig to split the large data tables of the unicode subsystem into a loadable module, which allow users to not have them around if case-insensitive filesystems are not to be used. It also includes minor code fixes to unicode and its users, from the same author. All the patches here have been on linux-next releases for the past months" * tag 'unicode-for-next-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode: unicode: only export internal symbols for the selftests unicode: Add utf8-data module unicode: cache the normalization tables in struct unicode_map unicode: move utf8cursor to utf8-selftest.c unicode: simplify utf8len unicode: remove the unused utf8{,n}age{min,max} functions unicode: pass a UNICODE_AGE() tripple to utf8_load unicode: mark the version field in struct unicode_map unsigned unicode: remove the charset field from struct unicode_map f2fs: simplify f2fs_sb_read_encoding ext4: simplify ext4_sb_read_encoding
2 parents 79e06c4 + e2a58d2 commit 6661224

File tree

12 files changed

+291
-456
lines changed

12 files changed

+291
-456
lines changed

fs/ext4/super.c

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,29 +1966,22 @@ static const struct mount_opts {
19661966
static const struct ext4_sb_encodings {
19671967
__u16 magic;
19681968
char *name;
1969-
char *version;
1969+
unsigned int version;
19701970
} ext4_sb_encoding_map[] = {
1971-
{EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
1971+
{EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
19721972
};
19731973

1974-
static int ext4_sb_read_encoding(const struct ext4_super_block *es,
1975-
const struct ext4_sb_encodings **encoding,
1976-
__u16 *flags)
1974+
static const struct ext4_sb_encodings *
1975+
ext4_sb_read_encoding(const struct ext4_super_block *es)
19771976
{
19781977
__u16 magic = le16_to_cpu(es->s_encoding);
19791978
int i;
19801979

19811980
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
19821981
if (magic == ext4_sb_encoding_map[i].magic)
1983-
break;
1984-
1985-
if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
1986-
return -EINVAL;
1982+
return &ext4_sb_encoding_map[i];
19871983

1988-
*encoding = &ext4_sb_encoding_map[i];
1989-
*flags = le16_to_cpu(es->s_encoding_flags);
1990-
1991-
return 0;
1984+
return NULL;
19921985
}
19931986
#endif
19941987

@@ -4624,10 +4617,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
46244617
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
46254618
const struct ext4_sb_encodings *encoding_info;
46264619
struct unicode_map *encoding;
4627-
__u16 encoding_flags;
4620+
__u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
46284621

4629-
if (ext4_sb_read_encoding(es, &encoding_info,
4630-
&encoding_flags)) {
4622+
encoding_info = ext4_sb_read_encoding(es);
4623+
if (!encoding_info) {
46314624
ext4_msg(sb, KERN_ERR,
46324625
"Encoding requested by superblock is unknown");
46334626
goto failed_mount;
@@ -4636,15 +4629,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
46364629
encoding = utf8_load(encoding_info->version);
46374630
if (IS_ERR(encoding)) {
46384631
ext4_msg(sb, KERN_ERR,
4639-
"can't mount with superblock charset: %s-%s "
4632+
"can't mount with superblock charset: %s-%u.%u.%u "
46404633
"not supported by the kernel. flags: 0x%x.",
4641-
encoding_info->name, encoding_info->version,
4634+
encoding_info->name,
4635+
unicode_major(encoding_info->version),
4636+
unicode_minor(encoding_info->version),
4637+
unicode_rev(encoding_info->version),
46424638
encoding_flags);
46434639
goto failed_mount;
46444640
}
46454641
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4646-
"%s-%s with flags 0x%hx", encoding_info->name,
4647-
encoding_info->version?:"\b", encoding_flags);
4642+
"%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4643+
unicode_major(encoding_info->version),
4644+
unicode_minor(encoding_info->version),
4645+
unicode_rev(encoding_info->version),
4646+
encoding_flags);
46484647

46494648
sb->s_encoding = encoding;
46504649
sb->s_encoding_flags = encoding_flags;

fs/f2fs/super.c

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -260,29 +260,22 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
260260
static const struct f2fs_sb_encodings {
261261
__u16 magic;
262262
char *name;
263-
char *version;
263+
unsigned int version;
264264
} f2fs_sb_encoding_map[] = {
265-
{F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
265+
{F2FS_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
266266
};
267267

268-
static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
269-
const struct f2fs_sb_encodings **encoding,
270-
__u16 *flags)
268+
static const struct f2fs_sb_encodings *
269+
f2fs_sb_read_encoding(const struct f2fs_super_block *sb)
271270
{
272271
__u16 magic = le16_to_cpu(sb->s_encoding);
273272
int i;
274273

275274
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
276275
if (magic == f2fs_sb_encoding_map[i].magic)
277-
break;
278-
279-
if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
280-
return -EINVAL;
276+
return &f2fs_sb_encoding_map[i];
281277

282-
*encoding = &f2fs_sb_encoding_map[i];
283-
*flags = le16_to_cpu(sb->s_encoding_flags);
284-
285-
return 0;
278+
return NULL;
286279
}
287280

288281
struct kmem_cache *f2fs_cf_name_slab;
@@ -3874,25 +3867,32 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
38743867
struct unicode_map *encoding;
38753868
__u16 encoding_flags;
38763869

3877-
if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
3878-
&encoding_flags)) {
3870+
encoding_info = f2fs_sb_read_encoding(sbi->raw_super);
3871+
if (!encoding_info) {
38793872
f2fs_err(sbi,
38803873
"Encoding requested by superblock is unknown");
38813874
return -EINVAL;
38823875
}
38833876

3877+
encoding_flags = le16_to_cpu(sbi->raw_super->s_encoding_flags);
38843878
encoding = utf8_load(encoding_info->version);
38853879
if (IS_ERR(encoding)) {
38863880
f2fs_err(sbi,
3887-
"can't mount with superblock charset: %s-%s "
3881+
"can't mount with superblock charset: %s-%u.%u.%u "
38883882
"not supported by the kernel. flags: 0x%x.",
3889-
encoding_info->name, encoding_info->version,
3883+
encoding_info->name,
3884+
unicode_major(encoding_info->version),
3885+
unicode_minor(encoding_info->version),
3886+
unicode_rev(encoding_info->version),
38903887
encoding_flags);
38913888
return PTR_ERR(encoding);
38923889
}
38933890
f2fs_info(sbi, "Using encoding defined by superblock: "
3894-
"%s-%s with flags 0x%hx", encoding_info->name,
3895-
encoding_info->version?:"\b", encoding_flags);
3891+
"%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
3892+
unicode_major(encoding_info->version),
3893+
unicode_minor(encoding_info->version),
3894+
unicode_rev(encoding_info->version),
3895+
encoding_flags);
38963896

38973897
sbi->sb->s_encoding = encoding;
38983898
sbi->sb->s_encoding_flags = encoding_flags;

fs/f2fs/sysfs.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,7 @@ static ssize_t encoding_show(struct f2fs_attr *a,
196196
struct super_block *sb = sbi->sb;
197197

198198
if (f2fs_sb_has_casefold(sbi))
199-
return sysfs_emit(buf, "%s (%d.%d.%d)\n",
200-
sb->s_encoding->charset,
199+
return sysfs_emit(buf, "UTF-8 (%d.%d.%d)\n",
201200
(sb->s_encoding->version >> 16) & 0xff,
202201
(sb->s_encoding->version >> 8) & 0xff,
203202
sb->s_encoding->version & 0xff);

fs/unicode/Kconfig

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,16 @@ config UNICODE
88
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
99
support.
1010

11+
config UNICODE_UTF8_DATA
12+
tristate "UTF-8 normalization and casefolding tables"
13+
depends on UNICODE
14+
default UNICODE
15+
help
16+
This contains a large table of case foldings, which can be loaded as
17+
a separate module if you say M here. To be on the safe side stick
18+
to the default of Y. Saying N here makes no sense, if you do not want
19+
utf8 casefolding support, disable CONFIG_UNICODE instead.
20+
1121
config UNICODE_NORMALIZATION_SELFTEST
1222
tristate "Test UTF-8 normalization support"
13-
depends on UNICODE
14-
default n
23+
depends on UNICODE_UTF8_DATA

fs/unicode/Makefile

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22

33
obj-$(CONFIG_UNICODE) += unicode.o
44
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
5+
obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o
56

67
unicode-y := utf8-norm.o utf8-core.o
78

8-
$(obj)/utf8-norm.o: $(obj)/utf8data.h
9+
$(obj)/utf8-data.o: $(obj)/utf8data.c
910

10-
# In the normal build, the checked-in utf8data.h is just shipped.
11+
# In the normal build, the checked-in utf8data.c is just shipped.
1112
#
12-
# To generate utf8data.h from UCD, put *.txt files in this directory
13+
# To generate utf8data.c from UCD, put *.txt files in this directory
1314
# and pass REGENERATE_UTF8DATA=1 from the command line.
1415
ifdef REGENERATE_UTF8DATA
1516

@@ -24,15 +25,15 @@ quiet_cmd_utf8data = GEN $@
2425
-t $(srctree)/$(src)/NormalizationTest.txt \
2526
-o $@
2627

27-
$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
28+
$(obj)/utf8data.c: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
2829
$(call if_changed,utf8data)
2930

3031
else
3132

32-
$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
33+
$(obj)/utf8data.c: $(src)/utf8data.c_shipped FORCE
3334
$(call if_changed,shipped)
3435

3536
endif
3637

37-
targets += utf8data.h
38+
targets += utf8data.c
3839
hostprogs += mkutf8data

fs/unicode/mkutf8data.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3287,12 +3287,10 @@ static void write_file(void)
32873287
open_fail(utf8_name, errno);
32883288

32893289
fprintf(file, "/* This file is generated code, do not edit. */\n");
3290-
fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n");
3291-
fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n");
3292-
fprintf(file, "#endif\n");
32933290
fprintf(file, "\n");
3294-
fprintf(file, "static const unsigned int utf8vers = %#x;\n",
3295-
unicode_maxage);
3291+
fprintf(file, "#include <linux/module.h>\n");
3292+
fprintf(file, "#include <linux/kernel.h>\n");
3293+
fprintf(file, "#include \"utf8n.h\"\n");
32963294
fprintf(file, "\n");
32973295
fprintf(file, "static const unsigned int utf8agetab[] = {\n");
32983296
for (i = 0; i != ages_count; i++)
@@ -3339,6 +3337,22 @@ static void write_file(void)
33393337
fprintf(file, "\n");
33403338
}
33413339
fprintf(file, "};\n");
3340+
fprintf(file, "\n");
3341+
fprintf(file, "struct utf8data_table utf8_data_table = {\n");
3342+
fprintf(file, "\t.utf8agetab = utf8agetab,\n");
3343+
fprintf(file, "\t.utf8agetab_size = ARRAY_SIZE(utf8agetab),\n");
3344+
fprintf(file, "\n");
3345+
fprintf(file, "\t.utf8nfdicfdata = utf8nfdicfdata,\n");
3346+
fprintf(file, "\t.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),\n");
3347+
fprintf(file, "\n");
3348+
fprintf(file, "\t.utf8nfdidata = utf8nfdidata,\n");
3349+
fprintf(file, "\t.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),\n");
3350+
fprintf(file, "\n");
3351+
fprintf(file, "\t.utf8data = utf8data,\n");
3352+
fprintf(file, "};\n");
3353+
fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);");
3354+
fprintf(file, "\n");
3355+
fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n");
33423356
fclose(file);
33433357
}
33443358

0 commit comments

Comments
 (0)