Skip to content

Commit fd47c3b

Browse files
authored
Merge pull request #7557 from andylokandy/ref
2 parents 2f8945a + 9eb2213 commit fd47c3b

File tree

3 files changed

+33
-50
lines changed

3 files changed

+33
-50
lines changed

src/query/functions-v2/src/scalars/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ mod boolean;
2222
mod control;
2323
mod datetime;
2424
mod math;
25-
mod soundex;
25+
2626
mod string;
2727
mod string_multi_args;
2828

src/query/functions-v2/src/scalars/soundex.rs

Lines changed: 0 additions & 44 deletions
This file was deleted.

src/query/functions-v2/src/scalars/string.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ use common_expression::Value;
3030
use common_expression::ValueRef;
3131
use itertools::izip;
3232

33-
use super::soundex::Soundex;
34-
3533
pub fn register(registry: &mut FunctionRegistry) {
3634
registry.register_passthrough_nullable_1_arg::<StringType, StringType, _, _>(
3735
"upper",
@@ -574,16 +572,16 @@ pub fn register(registry: &mut FunctionRegistry) {
574572
let mut count = 0;
575573

576574
for ch in String::from_utf8_lossy(val).chars() {
577-
let score = Soundex::number_map(ch);
575+
let score = soundex::number_map(ch);
578576
if last.is_none() {
579-
if !Soundex::is_uni_alphabetic(ch) {
577+
if !soundex::is_uni_alphabetic(ch) {
580578
continue;
581579
}
582580
last = score;
583581
writer.put_char(ch.to_ascii_uppercase());
584582
} else {
585583
if !ch.is_ascii_alphabetic()
586-
|| Soundex::is_drop(ch)
584+
|| soundex::is_drop(ch)
587585
|| score.is_none()
588586
|| score == last
589587
{
@@ -607,6 +605,35 @@ pub fn register(registry: &mut FunctionRegistry) {
607605
);
608606
}
609607

608+
mod soundex {
609+
#[inline(always)]
610+
pub fn number_map(i: char) -> Option<u8> {
611+
match i.to_ascii_lowercase() {
612+
'b' | 'f' | 'p' | 'v' => Some(b'1'),
613+
'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => Some(b'2'),
614+
'd' | 't' => Some(b'3'),
615+
'l' => Some(b'4'),
616+
'm' | 'n' => Some(b'5'),
617+
'r' => Some(b'6'),
618+
_ => Some(b'0'),
619+
}
620+
}
621+
622+
#[inline(always)]
623+
pub fn is_drop(c: char) -> bool {
624+
matches!(
625+
c.to_ascii_lowercase(),
626+
'a' | 'e' | 'i' | 'o' | 'u' | 'y' | 'h' | 'w'
627+
)
628+
}
629+
630+
// https://github.com/mysql/mysql-server/blob/3290a66c89eb1625a7058e0ef732432b6952b435/sql/item_strfunc.cc#L1919
631+
#[inline(always)]
632+
pub fn is_uni_alphabetic(c: char) -> bool {
633+
('a'..='z').contains(&c) || ('A'..='Z').contains(&c) || c as i32 >= 0xC0
634+
}
635+
}
636+
610637
// Vectorize string to string function with customer estimate_bytes.
611638
fn vectorize_string_to_string(
612639
estimate_bytes: impl Fn(&StringColumn) -> usize + Copy,

0 commit comments

Comments
 (0)