Skip to content

Commit 18db15e

Browse files
authored
Merge pull request #7208 from jtracey/printf-go
printf: improve support of printing multi-byte values of characters
2 parents b7f2815 + 42d06f9 commit 18db15e

File tree

3 files changed

+85
-127
lines changed

3 files changed

+85
-127
lines changed

src/uu/printf/src/printf.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::io::stdout;
77
use std::ops::ControlFlow;
88
use uucore::error::{UResult, UUsageError};
99
use uucore::format::{FormatArgument, FormatItem, parse_spec_and_escape};
10-
use uucore::{format_usage, help_about, help_section, help_usage, show_warning};
10+
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes, show_warning};
1111

1212
const VERSION: &str = "version";
1313
const HELP: &str = "help";
@@ -19,23 +19,30 @@ mod options {
1919
pub const FORMAT: &str = "FORMAT";
2020
pub const ARGUMENT: &str = "ARGUMENT";
2121
}
22-
2322
#[uucore::main]
2423
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
2524
let matches = uu_app().get_matches_from(args);
2625

2726
let format = matches
28-
.get_one::<String>(options::FORMAT)
27+
.get_one::<std::ffi::OsString>(options::FORMAT)
2928
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;
29+
let format = os_str_as_bytes(format)?;
3030

31-
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) {
32-
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(),
31+
let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
32+
// FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
33+
Some(s) => s
34+
.map(|os_string| {
35+
FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
36+
})
37+
.collect(),
3338
None => vec![],
3439
};
3540

3641
let mut format_seen = false;
3742
let mut args = values.iter().peekable();
38-
for item in parse_spec_and_escape(format.as_ref()) {
43+
44+
// Parse and process the format string
45+
for item in parse_spec_and_escape(format) {
3946
if let Ok(FormatItem::Spec(_)) = item {
4047
format_seen = true;
4148
}
@@ -58,7 +65,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
5865
}
5966

6067
while args.peek().is_some() {
61-
for item in parse_spec_and_escape(format.as_ref()) {
68+
for item in parse_spec_and_escape(format) {
6269
match item?.write(stdout(), &mut args)? {
6370
ControlFlow::Continue(()) => {}
6471
ControlFlow::Break(()) => return Ok(()),
@@ -90,6 +97,10 @@ pub fn uu_app() -> Command {
9097
.help("Print version information")
9198
.action(ArgAction::Version),
9299
)
93-
.arg(Arg::new(options::FORMAT))
94-
.arg(Arg::new(options::ARGUMENT).action(ArgAction::Append))
100+
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
101+
.arg(
102+
Arg::new(options::ARGUMENT)
103+
.action(ArgAction::Append)
104+
.value_parser(clap::value_parser!(std::ffi::OsString)),
105+
)
95106
}

src/uucore/src/lib/features/format/argument.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,26 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
5858
};
5959
match next {
6060
FormatArgument::UnsignedInt(n) => *n,
61-
FormatArgument::Unparsed(s) => extract_value(u64::extended_parse(s), s),
61+
FormatArgument::Unparsed(s) => {
62+
// Check if the string is a character literal enclosed in quotes
63+
if s.starts_with(['"', '\'']) {
64+
// Extract the content between the quotes safely using chars
65+
let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
66+
if let Some(first_char) = chars.next() {
67+
if chars.clone().count() > 0 {
68+
// Emit a warning if there are additional characters
69+
let remaining: String = chars.collect();
70+
show_warning!(
71+
"{}: character(s) following character constant have been ignored",
72+
remaining
73+
);
74+
}
75+
return first_char as u64; // Use only the first character
76+
}
77+
return 0; // Empty quotes
78+
}
79+
extract_value(u64::extended_parse(s), s)
80+
}
6281
_ => 0,
6382
}
6483
}

tests/by-util/test_printf.rs

Lines changed: 45 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,6 @@ fn basic_literal() {
1616
.stdout_only("hello world");
1717
}
1818

19-
#[test]
20-
fn escaped_tab() {
21-
new_ucmd!()
22-
.args(&["hello\\t world"])
23-
.succeeds()
24-
.stdout_only("hello\t world");
25-
}
26-
27-
#[test]
28-
fn escaped_newline() {
29-
new_ucmd!()
30-
.args(&["hello\\n world"])
31-
.succeeds()
32-
.stdout_only("hello\n world");
33-
}
34-
35-
#[test]
36-
fn escaped_slash() {
37-
new_ucmd!()
38-
.args(&["hello\\\\ world"])
39-
.succeeds()
40-
.stdout_only("hello\\ world");
41-
}
42-
43-
#[test]
44-
fn unescaped_double_quote() {
45-
new_ucmd!().args(&["\\\""]).succeeds().stdout_only("\"");
46-
}
47-
48-
#[test]
49-
fn escaped_hex() {
50-
new_ucmd!().args(&["\\x41"]).succeeds().stdout_only("A");
51-
}
52-
5319
#[test]
5420
fn test_missing_escaped_hex_value() {
5521
new_ucmd!()
@@ -58,17 +24,12 @@ fn test_missing_escaped_hex_value() {
5824
.stderr_only("printf: missing hexadecimal number in escape\n");
5925
}
6026

61-
#[test]
62-
fn escaped_octal() {
63-
new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A");
64-
}
65-
6627
#[test]
6728
fn escaped_octal_and_newline() {
6829
new_ucmd!()
69-
.args(&["\\0377\\n"])
30+
.args(&["\\101\\0377\\n"])
7031
.succeeds()
71-
.stdout_only("\x1F7\n");
32+
.stdout_only("A\x1F7\n");
7233
}
7334

7435
#[test]
@@ -145,38 +106,6 @@ fn escaped_unrecognized() {
145106
new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d");
146107
}
147108

148-
#[test]
149-
fn sub_string() {
150-
new_ucmd!()
151-
.args(&["hello %s", "world"])
152-
.succeeds()
153-
.stdout_only("hello world");
154-
}
155-
156-
#[test]
157-
fn sub_multi_field() {
158-
new_ucmd!()
159-
.args(&["%s %s", "hello", "world"])
160-
.succeeds()
161-
.stdout_only("hello world");
162-
}
163-
164-
#[test]
165-
fn sub_repeat_format_str() {
166-
new_ucmd!()
167-
.args(&["%s.", "hello", "world"])
168-
.succeeds()
169-
.stdout_only("hello.world.");
170-
}
171-
172-
#[test]
173-
fn sub_string_ignore_escapes() {
174-
new_ucmd!()
175-
.args(&["hello %s", "\\tworld"])
176-
.succeeds()
177-
.stdout_only("hello \\tworld");
178-
}
179-
180109
#[test]
181110
fn sub_b_string_handle_escapes() {
182111
new_ucmd!()
@@ -705,27 +634,11 @@ fn sub_any_asterisk_second_param_with_integer() {
705634
}
706635

707636
#[test]
708-
fn sub_any_specifiers_no_params() {
709-
new_ucmd!()
710-
.args(&["%ztlhLji", "3"]) //spell-checker:disable-line
711-
.succeeds()
712-
.stdout_only("3");
713-
}
714-
715-
#[test]
716-
fn sub_any_specifiers_after_first_param() {
717-
new_ucmd!()
718-
.args(&["%0ztlhLji", "3"]) //spell-checker:disable-line
719-
.succeeds()
720-
.stdout_only("3");
721-
}
722-
723-
#[test]
724-
fn sub_any_specifiers_after_period() {
725-
new_ucmd!()
726-
.args(&["%0.ztlhLji", "3"]) //spell-checker:disable-line
727-
.succeeds()
728-
.stdout_only("3");
637+
fn sub_any_specifiers() {
638+
// spell-checker:disable-next-line
639+
for format in ["%ztlhLji", "%0ztlhLji", "%0.ztlhLji"] {
640+
new_ucmd!().args(&[format, "3"]).succeeds().stdout_only("3");
641+
}
729642
}
730643

731644
#[test]
@@ -1027,33 +940,23 @@ fn pad_string() {
1027940
}
1028941

1029942
#[test]
1030-
fn format_spec_zero_char_fails() {
1031-
// It is invalid to have the format spec '%0c'
1032-
new_ucmd!().args(&["%0c", "3"]).fails_with_code(1);
1033-
}
1034-
1035-
#[test]
1036-
fn format_spec_zero_string_fails() {
1037-
// It is invalid to have the format spec '%0s'
1038-
new_ucmd!().args(&["%0s", "3"]).fails_with_code(1);
1039-
}
1040-
1041-
#[test]
1042-
fn invalid_precision_fails() {
1043-
// It is invalid to have length of output string greater than i32::MAX
1044-
new_ucmd!()
1045-
.args(&["%.*d", "2147483648", "0"])
1046-
.fails()
1047-
.stderr_is("printf: invalid precision: '2147483648'\n");
943+
fn format_spec_zero_fails() {
944+
// It is invalid to have the format spec
945+
for format in ["%0c", "%0s"] {
946+
new_ucmd!().args(&[format, "3"]).fails_with_code(1);
947+
}
1048948
}
1049949

1050950
#[test]
1051-
fn float_invalid_precision_fails() {
951+
fn invalid_precision_tests() {
1052952
// It is invalid to have length of output string greater than i32::MAX
1053-
new_ucmd!()
1054-
.args(&["%.*f", "2147483648", "0"])
1055-
.fails()
1056-
.stderr_is("printf: invalid precision: '2147483648'\n");
953+
for format in ["%.*d", "%.*f"] {
954+
let expected_error = "printf: invalid precision: '2147483648'\n";
955+
new_ucmd!()
956+
.args(&[format, "2147483648", "0"])
957+
.fails()
958+
.stderr_is(expected_error);
959+
}
1057960
}
1058961

1059962
// The following padding-tests test for the cases in which flags in ['0', ' '] are given.
@@ -1385,3 +1288,28 @@ fn float_arg_with_whitespace() {
13851288
.fails()
13861289
.stderr_contains("expected a numeric value");
13871290
}
1291+
1292+
#[test]
1293+
fn mb_input() {
1294+
for format in ["\"á", "\'á", "'\u{e1}"] {
1295+
new_ucmd!()
1296+
.args(&["%04x\n", format])
1297+
.succeeds()
1298+
.stdout_only("00e1\n");
1299+
}
1300+
1301+
let cases = vec![
1302+
("\"á=", "="),
1303+
("\'á-", "-"),
1304+
("\'á=-==", "=-=="),
1305+
("'\u{e1}++", "++"),
1306+
];
1307+
1308+
for (format, expected) in cases {
1309+
new_ucmd!()
1310+
.args(&["%04x\n", format])
1311+
.succeeds()
1312+
.stdout_is("00e1\n")
1313+
.stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
1314+
}
1315+
}

0 commit comments

Comments
 (0)