@@ -42,7 +42,7 @@ fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F)
42
42
}
43
43
44
44
// Perform decomposition for Hangul
45
- if is_hangul ( c) {
45
+ if is_hangul_syllable ( c) {
46
46
decompose_hangul ( c, emit_char) ;
47
47
return ;
48
48
}
@@ -77,26 +77,30 @@ const T_COUNT: u32 = 28;
77
77
const N_COUNT : u32 = ( V_COUNT * T_COUNT ) ;
78
78
const S_COUNT : u32 = ( L_COUNT * N_COUNT ) ;
79
79
80
- pub ( crate ) fn is_hangul ( c : char ) -> bool {
80
+ const S_END : u32 = S_BASE + S_COUNT - 1 ;
81
+ const L_END : u32 = L_BASE + L_COUNT - 1 ;
82
+ const V_END : u32 = V_BASE + V_COUNT - 1 ;
83
+ const T_END : u32 = T_BASE + T_COUNT - 1 ;
84
+
85
+ pub ( crate ) fn is_hangul_syllable ( c : char ) -> bool {
81
86
( c as u32 ) >= S_BASE && ( c as u32 ) < ( S_BASE + S_COUNT )
82
87
}
83
88
84
89
// Decompose a precomposed Hangul syllable
85
90
#[ allow( unsafe_code) ]
86
91
#[ inline( always) ]
87
92
fn decompose_hangul < F > ( s : char , mut emit_char : F ) where F : FnMut ( char ) {
88
- let si = s as u32 - S_BASE ;
89
-
90
- let li = si / N_COUNT ;
93
+ let s_index = s as u32 - S_BASE ;
94
+ let l_index = s_index / N_COUNT ;
91
95
unsafe {
92
- emit_char ( char:: from_u32_unchecked ( L_BASE + li ) ) ;
96
+ emit_char ( char:: from_u32_unchecked ( L_BASE + l_index ) ) ;
93
97
94
- let vi = ( si % N_COUNT ) / T_COUNT ;
95
- emit_char ( char:: from_u32_unchecked ( V_BASE + vi ) ) ;
98
+ let v_index = ( s_index % N_COUNT ) / T_COUNT ;
99
+ emit_char ( char:: from_u32_unchecked ( V_BASE + v_index ) ) ;
96
100
97
- let ti = si % T_COUNT ;
98
- if ti > 0 {
99
- emit_char ( char:: from_u32_unchecked ( T_BASE + ti ) ) ;
101
+ let t_index = s_index % T_COUNT ;
102
+ if t_index > 0 {
103
+ emit_char ( char:: from_u32_unchecked ( T_BASE + t_index ) ) ;
100
104
}
101
105
}
102
106
}
@@ -112,20 +116,33 @@ pub(crate) fn hangul_decomposition_length(s: char) -> usize {
112
116
#[ allow( unsafe_code) ]
113
117
#[ inline( always) ]
114
118
fn compose_hangul ( a : char , b : char ) -> Option < char > {
115
- let l = a as u32 ;
116
- let v = b as u32 ;
117
- // Compose an LPart and a VPart
118
- if L_BASE <= l && l < ( L_BASE + L_COUNT ) // l should be an L choseong jamo
119
- && V_BASE <= v && v < ( V_BASE + V_COUNT ) { // v should be a V jungseong jamo
120
- let r = S_BASE + ( l - L_BASE ) * N_COUNT + ( v - V_BASE ) * T_COUNT ;
121
- return unsafe { Some ( char:: from_u32_unchecked ( r) ) } ;
119
+ let ( a, b) = ( a as u32 , b as u32 ) ;
120
+ match ( a, b) {
121
+ // Compose a leading consonant and a vowel together into an LV_Syllable
122
+ ( L_BASE ... L_END , V_BASE ... V_END ) => {
123
+ let l_index = a - L_BASE ;
124
+ let v_index = b - V_BASE ;
125
+ let lv_index = l_index * N_COUNT + v_index * T_COUNT ;
126
+ let s = S_BASE + lv_index;
127
+ Some ( unsafe { char:: from_u32_unchecked ( s) } )
128
+ } ,
129
+ // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
130
+ ( S_BASE ... S_END , T_BASE ... T_END ) if ( a - S_BASE ) % T_COUNT == 0 && ( b - T_BASE ) > 0 => {
131
+ Some ( unsafe { char:: from_u32_unchecked ( a + ( b - T_BASE ) ) } )
132
+ } ,
133
+ _ => None ,
122
134
}
123
- // Compose an LVPart and a TPart
124
- if S_BASE <= l && l <= ( S_BASE +S_COUNT -T_COUNT ) // l should be a syllable block
125
- && T_BASE <= v && v < ( T_BASE +T_COUNT ) // v should be a T jongseong jamo
126
- && ( l - S_BASE ) % T_COUNT == 0 { // l should be an LV syllable block (not LVT)
127
- let r = l + ( v - T_BASE ) ;
128
- return unsafe { Some ( char:: from_u32_unchecked ( r) ) } ;
135
+ }
136
+
137
+ #[ cfg( test) ]
138
+ mod tests {
139
+ use super :: compose_hangul;
140
+
141
+ // Regression test from a bugfix where we were composing an LV_Syllable with
142
+ // T_BASE directly. (We should only compose an LV_Syllable with a character
143
+ // in the range `T_BASE + 1 ... T_END`.)
144
+ #[ test]
145
+ fn test_hangul_composition ( ) {
146
+ assert_eq ! ( compose_hangul( '\u{c8e0}' , '\u{11a7}' ) , None ) ;
129
147
}
130
- None
131
148
}
0 commit comments