@@ -30,8 +30,6 @@ use common_expression::Value;
30
30
use common_expression:: ValueRef ;
31
31
use itertools:: izip;
32
32
33
- use super :: soundex:: Soundex ;
34
-
35
33
pub fn register ( registry : & mut FunctionRegistry ) {
36
34
registry. register_passthrough_nullable_1_arg :: < StringType , StringType , _ , _ > (
37
35
"upper" ,
@@ -574,16 +572,16 @@ pub fn register(registry: &mut FunctionRegistry) {
574
572
let mut count = 0 ;
575
573
576
574
for ch in String :: from_utf8_lossy ( val) . chars ( ) {
577
- let score = Soundex :: number_map ( ch) ;
575
+ let score = soundex :: number_map ( ch) ;
578
576
if last. is_none ( ) {
579
- if !Soundex :: is_uni_alphabetic ( ch) {
577
+ if !soundex :: is_uni_alphabetic ( ch) {
580
578
continue ;
581
579
}
582
580
last = score;
583
581
writer. put_char ( ch. to_ascii_uppercase ( ) ) ;
584
582
} else {
585
583
if !ch. is_ascii_alphabetic ( )
586
- || Soundex :: is_drop ( ch)
584
+ || soundex :: is_drop ( ch)
587
585
|| score. is_none ( )
588
586
|| score == last
589
587
{
@@ -607,6 +605,35 @@ pub fn register(registry: &mut FunctionRegistry) {
607
605
) ;
608
606
}
609
607
608
+ mod soundex {
609
+ #[ inline( always) ]
610
+ pub fn number_map ( i : char ) -> Option < u8 > {
611
+ match i. to_ascii_lowercase ( ) {
612
+ 'b' | 'f' | 'p' | 'v' => Some ( b'1' ) ,
613
+ 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => Some ( b'2' ) ,
614
+ 'd' | 't' => Some ( b'3' ) ,
615
+ 'l' => Some ( b'4' ) ,
616
+ 'm' | 'n' => Some ( b'5' ) ,
617
+ 'r' => Some ( b'6' ) ,
618
+ _ => Some ( b'0' ) ,
619
+ }
620
+ }
621
+
622
+ #[ inline( always) ]
623
+ pub fn is_drop ( c : char ) -> bool {
624
+ matches ! (
625
+ c. to_ascii_lowercase( ) ,
626
+ 'a' | 'e' | 'i' | 'o' | 'u' | 'y' | 'h' | 'w'
627
+ )
628
+ }
629
+
630
+ // https://github.com/mysql/mysql-server/blob/3290a66c89eb1625a7058e0ef732432b6952b435/sql/item_strfunc.cc#L1919
631
+ #[ inline( always) ]
632
+ pub fn is_uni_alphabetic ( c : char ) -> bool {
633
+ ( 'a' ..='z' ) . contains ( & c) || ( 'A' ..='Z' ) . contains ( & c) || c as i32 >= 0xC0
634
+ }
635
+ }
636
+
610
637
// Vectorize string to string function with customer estimate_bytes.
611
638
fn vectorize_string_to_string (
612
639
estimate_bytes : impl Fn ( & StringColumn ) -> usize + Copy ,
0 commit comments