Skip to content

Commit 9b0f67a

Browse files
committed
fix: allow UTF-8 encoded object names
1 parent ddc5163 commit 9b0f67a

File tree

12 files changed

+561
-9
lines changed

12 files changed

+561
-9
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
ALTER TABLE "storage"."objects"
2+
ADD CONSTRAINT objects_name_check
3+
CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+');
4+
5+
CREATE OR REPLACE FUNCTION storage.search (
6+
prefix TEXT,
7+
bucketname TEXT,
8+
limits INT DEFAULT 100,
9+
levels INT DEFAULT 1,
10+
offsets INT DEFAULT 0,
11+
search TEXT DEFAULT '',
12+
sortcolumn TEXT DEFAULT 'name',
13+
sortorder TEXT DEFAULT 'asc'
14+
) RETURNS TABLE (
15+
name TEXT,
16+
id UUID,
17+
updated_at TIMESTAMPTZ,
18+
created_at TIMESTAMPTZ,
19+
last_accessed_at TIMESTAMPTZ,
20+
metadata JSONB
21+
)
22+
AS $$
23+
DECLARE
24+
v_order_by TEXT;
25+
v_sort_order TEXT;
26+
BEGIN
27+
CASE
28+
WHEN sortcolumn = 'name' THEN
29+
v_order_by = 'name';
30+
WHEN sortcolumn = 'updated_at' THEN
31+
v_order_by = 'updated_at';
32+
WHEN sortcolumn = 'created_at' THEN
33+
v_order_by = 'created_at';
34+
WHEN sortcolumn = 'last_accessed_at' THEN
35+
v_order_by = 'last_accessed_at';
36+
ELSE
37+
v_order_by = 'name';
38+
END CASE;
39+
40+
CASE
41+
WHEN sortorder = 'asc' THEN
42+
v_sort_order = 'asc';
43+
WHEN sortorder = 'desc' THEN
44+
v_sort_order = 'desc';
45+
ELSE
46+
v_sort_order = 'asc';
47+
END CASE;
48+
49+
v_order_by = v_order_by || ' ' || v_sort_order;
50+
51+
RETURN QUERY EXECUTE
52+
'WITH folders AS (
53+
SELECT path_tokens[$1] AS folder
54+
FROM storage.objects
55+
WHERE STARTS_WITH(LOWER(objects.name), $2 || $3)
56+
AND bucket_id = $4
57+
AND ARRAY_LENGTH(objects.path_tokens, 1) <> $1
58+
GROUP BY folder
59+
ORDER BY folder ' || v_sort_order || '
60+
)
61+
(SELECT folder AS "name",
62+
NULL AS id,
63+
NULL AS updated_at,
64+
NULL AS created_at,
65+
NULL AS last_accessed_at,
66+
NULL AS metadata FROM folders)
67+
UNION ALL
68+
(SELECT path_tokens[$1] AS "name",
69+
id,
70+
updated_at,
71+
created_at,
72+
last_accessed_at,
73+
metadata
74+
FROM storage.objects
75+
WHERE STARTS_WITH(LOWER(objects.name), $2 || $3)
76+
AND bucket_id = $4
77+
AND ARRAY_LENGTH(objects.path_tokens, 1) = $1
78+
ORDER BY ' || v_order_by || ')
79+
LIMIT $5
80+
OFFSET $6' USING levels, LOWER(prefix), LOWER(search), bucketname, limits, offsets;
81+
END;
82+
$$ LANGUAGE plpgsql STABLE;
83+
84+
CREATE OR REPLACE FUNCTION storage.list_objects_with_delimiter(bucket_id TEXT, prefix_param TEXT, delimiter_param TEXT, max_keys INTEGER DEFAULT 100, start_after TEXT DEFAULT '', next_token TEXT DEFAULT '')
85+
RETURNS TABLE (name TEXT, id UUID, metadata JSONB, updated_at TIMESTAMPTZ) AS
86+
$$
87+
BEGIN
88+
RETURN QUERY EXECUTE
89+
'SELECT DISTINCT ON(name COLLATE "C") * FROM (
90+
SELECT
91+
CASE
92+
WHEN POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)) > 0 THEN
93+
SUBSTRING(name FROM 1 for LENGTH($1) + POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)))
94+
ELSE
95+
name
96+
END AS name, id, metadata, updated_at
97+
FROM
98+
storage.objects
99+
WHERE
100+
bucket_id = $5 AND
101+
STARTS_WITH(LOWER(name), $1) AND
102+
CASE
103+
WHEN $6 != '''' THEN
104+
name COLLATE "C" > $6
105+
ELSE true END
106+
AND CASE
107+
WHEN $4 != '''' THEN
108+
CASE
109+
WHEN POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)) > 0 THEN
110+
SUBSTRING(name FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1))) COLLATE "C" > $4
111+
ELSE
112+
name COLLATE "C" > $4
113+
END
114+
ELSE
115+
TRUE
116+
END
117+
ORDER BY
118+
name COLLATE "C" ASC) AS e ORDER BY name COLLATE "C" LIMIT $3'
119+
USING LOWER(prefix_param), delimiter_param, max_keys, next_token, bucket_id, start_after;
120+
END;
121+
$$ LANGUAGE plpgsql;
122+
123+
CREATE OR REPLACE FUNCTION storage.list_multipart_uploads_with_delimiter(bucket_id text, prefix_param text, delimiter_param text, max_keys integer default 100, next_key_token text DEFAULT '', next_upload_token text default '')
124+
RETURNS TABLE (key text, id text, created_at timestamptz) AS
125+
$$
126+
BEGIN
127+
RETURN QUERY EXECUTE
128+
'SELECT DISTINCT ON(key COLLATE "C") * FROM (
129+
SELECT
130+
CASE
131+
WHEN POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)) > 0 THEN
132+
SUBSTRING(key FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)))
133+
ELSE
134+
key
135+
END AS key, id, created_at
136+
FROM
137+
storage.s3_multipart_uploads
138+
WHERE
139+
bucket_id = $5 AND
140+
STARTS_WITH(LOWER(key), $1) AND
141+
CASE
142+
WHEN $4 != '''' AND $6 = '''' THEN
143+
CASE
144+
WHEN POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)) > 0 THEN
145+
SUBSTRING(key FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1))) COLLATE "C" > $4
146+
ELSE
147+
key COLLATE "C" > $4
148+
END
149+
ELSE
150+
TRUE
151+
END AND
152+
CASE
153+
WHEN $6 != '''' THEN
154+
id COLLATE "C" > $6
155+
ELSE
156+
TRUE
157+
END
158+
ORDER BY
159+
key COLLATE "C" ASC, created_at ASC) AS e ORDER BY key COLLATE "C" LIMIT $3'
160+
USING LOWER(prefix_param), delimiter_param, max_keys, next_key_token, bucket_id, next_upload_token;
161+
END;
162+
$$ LANGUAGE plpgsql;

src/http/plugins/xml.ts

+4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin(
2121
isArray: (_: string, jpath: string) => {
2222
return opts.parseAsArray?.includes(jpath)
2323
},
24+
tagValueProcessor: (name: string, value: string) =>
25+
value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) =>
26+
String.fromCharCode(Number.parseInt(str, 16))
27+
),
2428
})
2529
}
2630

src/internal/database/migrations/types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ export const DBMigration = {
2424
'optimize-search-function': 22,
2525
'operation-function': 23,
2626
'custom-metadata': 24,
27+
'unicode-object-names': 25,
2728
} as const

src/internal/errors/codes.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ export const ERRORS = {
265265
code: ErrorCode.InvalidKey,
266266
resource: key,
267267
httpStatusCode: 400,
268-
message: `Invalid key: ${key}`,
268+
message: `Invalid key: ${encodeURIComponent(key)}`,
269269
originalError: e,
270270
}),
271271

src/storage/database/knex.ts

+5-1
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,11 @@ export class DBError extends StorageBackendError implements RenderableError {
877877
code: pgError.code,
878878
})
879879
default:
880-
return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({
880+
const errorMessage =
881+
pgError.code === '23514' && pgError.constraint === 'objects_name_check'
882+
? 'Invalid object name'
883+
: pgError.message
884+
return ERRORS.DatabaseError(errorMessage, pgError).withMetadata({
881885
query,
882886
code: pgError.code,
883887
})

src/storage/limits.ts

+9-3
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) {
4747
* @param key
4848
*/
4949
export function isValidKey(key: string): boolean {
50-
// only allow s3 safe characters and characters which require special handling for now
51-
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
52-
return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key)
50+
// Allow any sequence of Unicode characters with UTF-8 encoding,
51+
// except characters not allowed in XML 1.0.
52+
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
53+
// See: https://www.w3.org/TR/REC-xml/#charsets
54+
//
55+
const regex =
56+
/[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/
57+
58+
return key.length > 0 && !regex.test(key)
5359
}
5460

5561
/**

src/storage/protocols/s3/s3-handler.ts

+34
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,9 @@ export class S3ProtocolHandler {
506506
throw ERRORS.InvalidUploadId()
507507
}
508508

509+
mustBeValidBucketName(Bucket)
510+
mustBeValidKey(Key)
511+
509512
await uploader.canUpload({
510513
bucketId: Bucket as string,
511514
objectName: Key as string,
@@ -601,6 +604,9 @@ export class S3ProtocolHandler {
601604
throw ERRORS.MissingContentLength()
602605
}
603606

607+
mustBeValidBucketName(Bucket)
608+
mustBeValidKey(Key)
609+
604610
const bucket = await this.storage.asSuperUser().findBucket(Bucket, 'file_size_limit')
605611
const maxFileSize = await getFileSizeLimit(this.storage.db.tenantId, bucket?.file_size_limit)
606612

@@ -755,6 +761,9 @@ export class S3ProtocolHandler {
755761
throw ERRORS.MissingParameter('Key')
756762
}
757763

764+
mustBeValidBucketName(Bucket)
765+
mustBeValidKey(Key)
766+
758767
const multipart = await this.storage.db
759768
.asSuperUser()
760769
.findMultipartUpload(UploadId, 'id,version')
@@ -797,6 +806,9 @@ export class S3ProtocolHandler {
797806
throw ERRORS.MissingParameter('Bucket')
798807
}
799808

809+
mustBeValidBucketName(Bucket)
810+
mustBeValidKey(Key)
811+
800812
const object = await this.storage
801813
.from(Bucket)
802814
.findObject(Key, 'metadata,user_metadata,created_at,updated_at')
@@ -836,6 +848,9 @@ export class S3ProtocolHandler {
836848
throw ERRORS.MissingParameter('Key')
837849
}
838850

851+
mustBeValidBucketName(Bucket)
852+
mustBeValidKey(Key)
853+
839854
const object = await this.storage.from(Bucket).findObject(Key, 'id')
840855

841856
if (!object) {
@@ -864,6 +879,9 @@ export class S3ProtocolHandler {
864879
const bucket = command.Bucket as string
865880
const key = command.Key as string
866881

882+
mustBeValidBucketName(bucket)
883+
mustBeValidKey(key)
884+
867885
const object = await this.storage.from(bucket).findObject(key, 'version,user_metadata')
868886
const response = await this.storage.backend.getObject(
869887
storageS3Bucket,
@@ -916,6 +934,9 @@ export class S3ProtocolHandler {
916934
throw ERRORS.MissingParameter('Key')
917935
}
918936

937+
mustBeValidBucketName(Bucket)
938+
mustBeValidKey(Key)
939+
919940
await this.storage.from(Bucket).deleteObject(Key)
920941

921942
return {}
@@ -947,6 +968,9 @@ export class S3ProtocolHandler {
947968
return {}
948969
}
949970

971+
mustBeValidBucketName(Bucket)
972+
Delete.Objects.forEach((o) => mustBeValidKey(o.Key))
973+
950974
const deletedResult = await this.storage
951975
.from(Bucket)
952976
.deleteObjects(Delete.Objects.map((o) => o.Key || ''))
@@ -1017,6 +1041,11 @@ export class S3ProtocolHandler {
10171041
command.MetadataDirective = 'COPY'
10181042
}
10191043

1044+
mustBeValidBucketName(Bucket)
1045+
mustBeValidKey(Key)
1046+
mustBeValidBucketName(sourceBucket)
1047+
mustBeValidKey(sourceKey)
1048+
10201049
const copyResult = await this.storage.from(sourceBucket).copyObject({
10211050
sourceKey,
10221051
destinationBucket: Bucket,
@@ -1147,6 +1176,11 @@ export class S3ProtocolHandler {
11471176
throw ERRORS.NoSuchKey('')
11481177
}
11491178

1179+
mustBeValidBucketName(Bucket)
1180+
mustBeValidKey(Key)
1181+
mustBeValidBucketName(sourceBucketName)
1182+
mustBeValidKey(sourceKey)
1183+
11501184
// Check if copy source exists
11511185
const copySource = await this.storage.db.findObject(
11521186
sourceBucketName,

src/test/common.ts

+37
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,43 @@ export const adminApp = app({})
88

99
const ENV = process.env
1010

11+
/**
12+
* Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including:
13+
* - Safe characters: 0-9 a-z A-Z !-_.*'()
14+
* - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r.
15+
* - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters).
16+
* - Astral code points
17+
*
18+
* The following characters are not allowed:
19+
* - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D.
20+
* - Unicode \u{FFFE} and \u{FFFF}.
21+
* - Lone surrogate characters.
22+
* See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
23+
* See: https://www.w3.org/TR/REC-xml/#charsets
24+
*/
25+
export function getUnicodeObjectName(): string {
26+
const objectName = 'test'
27+
.concat("!-_*.'()")
28+
// Characters that might require special handling
29+
.concat('&$@=;:+,? \x09\x0A\x0D')
30+
// Characters to avoid
31+
.concat('\\{}^%`[]"<>~#|\xFF')
32+
// MinIO max. length for each '/' separated segment is 255
33+
.concat('/')
34+
.concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join(''))
35+
.concat('/')
36+
// Some special Unicode characters
37+
.concat('\u2028\u202F\u{0001FFFF}')
38+
// Some other Unicode characters
39+
.concat('일이삼\u{0001f642}')
40+
41+
return objectName
42+
}
43+
44+
export function getBadObjectName(): string {
45+
return 'test '.concat('\x01\x02\x03')
46+
}
47+
1148
export function useMockQueue() {
1249
const queueSpy: jest.SpyInstance | undefined = undefined
1350
beforeEach(() => {

0 commit comments

Comments
 (0)