Skip to content

Commit 5844d75

Browse files
author
Harvinder Singh Rathor
committed
serde_derive_internals: Unicode-aware camelCase first-char lowering
Problem `#[serde(rename_all = "camelCase")]` could panic when a field or variant name begins with a non-ASCII character due to byte slicing (`[..1]`) on UTF-8. Examples: `İstanbul` (U+0130). Fix Use Unicode-aware `char::to_lowercase()` on the first scalar, then append the remainder. Updates both CamelCase arms. Compatibility ASCII behavior unchanged; only first scalar is lowercased. Tests ASCII regression; Σ->σ; `İstanbul` expansion; Chinese/Hindi stable; mixed Devanagari+ASCII snake_case -> `परियोजनाName`. Closes: #2953 Signed-off-by: Harvinder Singh Rathor <harvinderrathor9@gmail.com>
1 parent a866b33 commit 5844d75

File tree

1 file changed

+119
-2
lines changed

1 file changed

+119
-2
lines changed

serde_derive/src/internals/case.rs

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ impl RenameRule {
5959
None | PascalCase => variant.to_owned(),
6060
LowerCase => variant.to_ascii_lowercase(),
6161
UpperCase => variant.to_ascii_uppercase(),
62-
CamelCase => variant[..1].to_ascii_lowercase() + &variant[1..],
62+
CamelCase => Self::lowercase_first_char_unicode(variant),
6363
SnakeCase => {
6464
let mut snake = String::new();
6565
for (i, ch) in variant.char_indices() {
@@ -100,14 +100,30 @@ impl RenameRule {
100100
}
101101
CamelCase => {
102102
let pascal = PascalCase.apply_to_field(field);
103-
pascal[..1].to_ascii_lowercase() + &pascal[1..]
103+
Self::lowercase_first_char_unicode(&pascal)
104104
}
105105
ScreamingSnakeCase => field.to_ascii_uppercase(),
106106
KebabCase => field.replace('_', "-"),
107107
ScreamingKebabCase => ScreamingSnakeCase.apply_to_field(field).replace('_', "-"),
108108
}
109109
}
110110

111+
/// Lowercase the first Unicode scalar using full Unicode case mapping,
112+
/// then append the remainder unchanged. Avoids UTF-8 slicing panics.
113+
fn lowercase_first_char_unicode(s: &str) -> String {
114+
let mut chars = s.chars();
115+
match chars.next() {
116+
::std::option::Option::None => String::new(),
117+
::std::option::Option::Some(first) => {
118+
// `to_lowercase()` may expand (e.g., 'İ' -> 'i' + U+0307)
119+
let mut out = String::with_capacity(s.len());
120+
out.extend(first.to_lowercase());
121+
out.extend(chars);
122+
out
123+
}
124+
}
125+
}
126+
111127
/// Returns the `RenameRule` if it is not `None`, `rule_b` otherwise.
112128
pub fn or(self, rule_b: Self) -> Self {
113129
match self {
@@ -198,3 +214,104 @@ fn rename_fields() {
198214
assert_eq!(ScreamingKebabCase.apply_to_field(original), screaming_kebab);
199215
}
200216
}
217+
218+
#[cfg(test)]
219+
mod unicode_camelcase_tests {
220+
use super::RenameRule;
221+
222+
// --- ASCII regressions: behavior must remain identical ---
223+
224+
#[test]
225+
fn camelcase_variant_ascii_regression() {
226+
assert_eq!(
227+
RenameRule::CamelCase.apply_to_variant("FieldName"),
228+
"fieldName"
229+
);
230+
assert_eq!(
231+
RenameRule::CamelCase.apply_to_variant("URLValue"),
232+
"uRLValue" // existing behavior: only first scalar is lowercased
233+
);
234+
}
235+
236+
#[test]
237+
fn camelcase_field_ascii_regression() {
238+
assert_eq!(
239+
RenameRule::CamelCase.apply_to_field("field_name"),
240+
"fieldName"
241+
);
242+
assert_eq!(
243+
RenameRule::CamelCase.apply_to_field("long_field_name"),
244+
"longFieldName"
245+
);
246+
}
247+
248+
// --- Unicode behavior: first scalar lowercased using full Unicode mapping ---
249+
250+
#[test]
251+
fn camelcase_variant_non_ascii_basic() {
252+
// Greek capital sigma -> small sigma (single-scalar mapping)
253+
assert_eq!(
254+
RenameRule::CamelCase.apply_to_variant("Σomething"),
255+
"σomething"
256+
);
257+
}
258+
259+
#[test]
260+
fn camelcase_variant_non_ascii_expanding() {
261+
// LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) lowercases to
262+
// 'i' + COMBINING DOT ABOVE (U+0307) in Unicode
263+
assert_eq!(
264+
RenameRule::CamelCase.apply_to_variant("İstanbul"),
265+
"i\u{307}stanbul"
266+
);
267+
}
268+
269+
#[test]
270+
fn camelcase_field_mixed_identifier_unicode() {
271+
// apply_to_field: first makes PascalCase from snake_case,
272+
// then CamelCase lowercases the first Unicode scalar only.
273+
// Non-ASCII first scalar stays semantically lowercased by Unicode rules;
274+
// ASCII segment after '_' still Pascalizes into 'Feature'.
275+
assert_eq!(
276+
RenameRule::CamelCase.apply_to_field("परिणाम_feature"),
277+
"परिणामFeature"
278+
);
279+
}
280+
281+
#[test]
282+
fn camelcase_field_chinese_identifiers_stable() {
283+
// Fields with non-cased Han characters should remain unchanged.
284+
assert_eq!(RenameRule::CamelCase.apply_to_field("项目名称"), "项目名称");
285+
assert_eq!(RenameRule::CamelCase.apply_to_field("项目地址"), "项目地址");
286+
}
287+
288+
#[test]
289+
fn camelcase_variant_chinese_identifiers_stable() {
290+
// Same expectation for variant names.
291+
assert_eq!(
292+
RenameRule::CamelCase.apply_to_variant("项目名称"),
293+
"项目名称"
294+
);
295+
assert_eq!(
296+
RenameRule::CamelCase.apply_to_variant("项目地址"),
297+
"项目地址"
298+
);
299+
}
300+
301+
// Sanity: other rename rules remain unaffected by our change
302+
#[test]
303+
fn other_rules_unchanged() {
304+
assert_eq!(
305+
RenameRule::SnakeCase.apply_to_variant("FieldName"),
306+
"field_name"
307+
);
308+
assert_eq!(
309+
RenameRule::KebabCase.apply_to_field("field_name"),
310+
"field-name"
311+
);
312+
assert_eq!(
313+
RenameRule::ScreamingSnakeCase.apply_to_variant("FieldName"),
314+
"FIELD_NAME"
315+
);
316+
}
317+
}

0 commit comments

Comments
 (0)