Skip to content

Commit 68ea86c

Browse files
authored
Merge pull request #9190 from BohuTANG/dev-refine-string
refactor: move string func from base/io to base/string_func
2 parents dd5f9c6 + c790bbe commit 68ea86c

File tree

16 files changed

+146
-171
lines changed

16 files changed

+146
-171
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/base/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ async-channel = "1.7.1"
3030
async-trait = "0.1.57"
3131
bytesize = "1.1.0"
3232
ctrlc = { version = "3.2.3", features = ["termination"] }
33+
enquote = "1.1.0"
3334
futures = "0.3.24"
3435
libc = "0.2.133"
3536
num_cpus = "1.13.1"

src/common/base/src/base/mod.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ mod shutdown_signal;
2424
mod singleton_instance;
2525
mod stop_handle;
2626
mod stoppable;
27-
mod string_func;
27+
mod string;
2828
mod thread;
2929
mod thread_pool;
3030
mod uniq_id;
@@ -52,10 +52,13 @@ pub use shutdown_signal::SignalType;
5252
pub use singleton_instance::GlobalInstance;
5353
pub use stop_handle::StopHandle;
5454
pub use stoppable::Stoppable;
55-
pub use string_func::escape_for_key;
56-
pub use string_func::mask_string;
57-
pub use string_func::replace_nth_char;
58-
pub use string_func::unescape_for_key;
55+
pub use string::convert_byte_size;
56+
pub use string::convert_number_size;
57+
pub use string::escape_for_key;
58+
pub use string::mask_string;
59+
pub use string::replace_nth_char;
60+
pub use string::unescape_for_key;
61+
pub use string::unescape_string;
5962
pub use thread::Thread;
6063
pub use thread::ThreadJoinHandle;
6164
pub use thread_pool::TaskJoinHandler;

src/common/base/src/base/string_func.rs renamed to src/common/base/src/base/string.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
use std::string::FromUtf8Error;
1616

17+
use common_exception::ErrorCode;
18+
use common_exception::Result;
19+
1720
/// Function that escapes special characters in a string.
1821
///
1922
/// All characters except digit, alphabet and '_' are treated as special characters.
@@ -114,3 +117,55 @@ pub fn replace_nth_char(s: &str, idx: usize, newchar: char) -> String {
114117
.map(|(i, c)| if i == idx { newchar } else { c })
115118
.collect()
116119
}
120+
121+
/// Returns string after processing escapes.
122+
/// This used for settings string unescape, like unescape format_field_delimiter from `\\x01` to `\x01`.
123+
pub fn unescape_string(escape_str: &str) -> Result<String> {
124+
enquote::unescape(escape_str, None)
125+
.map_err(|e| ErrorCode::Internal(format!("unescape:{} error:{:?}", escape_str, e)))
126+
}
127+
128+
pub fn convert_byte_size(num: f64) -> String {
129+
let negative = if num.is_sign_positive() { "" } else { "-" };
130+
let num = num.abs();
131+
let units = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
132+
if num < 1_f64 {
133+
return format!("{}{:.02} {}", negative, num, "B");
134+
}
135+
let delimiter = 1024_f64;
136+
let exponent = std::cmp::min(
137+
(num.ln() / delimiter.ln()).floor() as i32,
138+
(units.len() - 1) as i32,
139+
);
140+
let pretty_bytes = format!("{:.02}", num / delimiter.powi(exponent));
141+
let unit = units[exponent as usize];
142+
format!("{}{} {}", negative, pretty_bytes, unit)
143+
}
144+
145+
pub fn convert_number_size(num: f64) -> String {
146+
let negative = if num.is_sign_positive() { "" } else { "-" };
147+
let num = num.abs();
148+
let units = [
149+
"",
150+
" thousand",
151+
" million",
152+
" billion",
153+
" trillion",
154+
" quadrillion",
155+
];
156+
157+
if num < 1_f64 {
158+
return format!("{}{}", negative, num);
159+
}
160+
let delimiter = 1000_f64;
161+
let exponent = std::cmp::min(
162+
(num.ln() / delimiter.ln()).floor() as i32,
163+
(units.len() - 1) as i32,
164+
);
165+
let pretty_bytes = format!("{:.2}", num / delimiter.powi(exponent))
166+
.parse::<f64>()
167+
.unwrap()
168+
* 1_f64;
169+
let unit = units[exponent as usize];
170+
format!("{}{}{}", negative, pretty_bytes, unit)
171+
}

src/common/base/tests/it/string_func.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,42 @@ fn replace_nth_char_test() {
4242
assert_eq!("a23".to_string(), replace_nth_char("a13", 1, '2'));
4343
assert_eq!("a13".to_string(), replace_nth_char("a13", 10, '2'));
4444
}
45+
46+
#[test]
47+
fn convert_test() {
48+
assert_eq!(convert_byte_size(0_f64), "0.00 B");
49+
assert_eq!(convert_byte_size(0.1_f64), "0.10 B");
50+
assert_eq!(convert_byte_size(1_f64), "1.00 B");
51+
assert_eq!(convert_byte_size(1023_f64), "1023.00 B");
52+
assert_eq!(convert_byte_size(1024_f64), "1.00 KiB");
53+
assert_eq!(convert_byte_size(1229_f64), "1.20 KiB");
54+
assert_eq!(
55+
convert_byte_size(1024_f64 * 1024_f64 * 1024_f64),
56+
"1.00 GiB"
57+
);
58+
59+
assert_eq!(convert_number_size(1_f64), "1");
60+
assert_eq!(convert_number_size(1022_f64), "1.02 thousand");
61+
assert_eq!(convert_number_size(10222_f64), "10.22 thousand");
62+
}
63+
64+
#[test]
65+
fn test_unescape_string() {
66+
let cases = vec![
67+
vec!["a", "a"],
68+
vec!["abc", "abc"],
69+
vec!["\\x01", "\x01"],
70+
vec!["\x01", "\x01"],
71+
vec!["\t\nabc", "\t\nabc"],
72+
vec!["\"\t\nabc\"", "\"\t\nabc\""],
73+
vec!["\"\\t\nabc\"", "\"\t\nabc\""],
74+
vec!["'\\t\nabc'", "'\t\nabc'"],
75+
vec!["\\t\\nabc", "\t\nabc"],
76+
vec!["\\\\", r"\"],
77+
vec!["\\\\", "\\"],
78+
];
79+
80+
for c in cases {
81+
assert_eq!(unescape_string(c[0]).unwrap(), c[1]);
82+
}
83+
}

src/common/io/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ bincode = { version = "2.0.0-rc.1", features = ["serde", "std"] }
2121
bytes = "1.2.1"
2222
chrono = { workspace = true }
2323
chrono-tz = { workspace = true }
24-
enquote = "1.1.0"
2524
lexical-core = "0.8.5"
2625
micromarshal = "0.2.1"
2726
ordered-float = "3.1.0"

src/common/io/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ mod file_split;
3535
mod format_settings;
3636
mod options_deserializer;
3737
mod position;
38+
mod serialization;
3839
mod stat_buffer;
39-
mod utils;

src/common/io/src/prelude.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,5 @@ pub use crate::format_settings::FormatSettings;
3131
pub use crate::options_deserializer::OptionsDeserializer;
3232
pub use crate::options_deserializer::OptionsDeserializerError;
3333
pub use crate::position::*;
34+
pub use crate::serialization::*;
3435
pub use crate::stat_buffer::StatBuffer;
35-
pub use crate::utils::*;

src/common/io/src/serialization.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2021 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use common_exception::Result;
16+
17+
/// bincode seralize_into wrap with optimized config
18+
#[inline]
19+
pub fn serialize_into_buf<W: std::io::Write, T: serde::Serialize>(
20+
writer: &mut W,
21+
value: &T,
22+
) -> Result<()> {
23+
bincode::serde::encode_into_std_write(value, writer, bincode::config::standard())?;
24+
Ok(())
25+
}
26+
27+
/// bincode deserialize_from wrap with optimized config
28+
#[inline]
29+
pub fn deserialize_from_slice<T: serde::de::DeserializeOwned>(slice: &mut &[u8]) -> Result<T> {
30+
let (value, bytes_read) =
31+
bincode::serde::decode_from_slice(slice, bincode::config::standard())?;
32+
*slice = &slice[bytes_read..];
33+
Ok(value)
34+
}

src/common/io/src/utils.rs

Lines changed: 0 additions & 103 deletions
This file was deleted.

0 commit comments

Comments
 (0)