uutils · sylvestre · Jan 1, 2025 · Jan 1, 2025 · Jan 1, 2025 · Jan 1, 2025
diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs
@@ -3,11 +3,13 @@
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 
-#![allow(dead_code)]
-
 use clap::{crate_version, Arg, ArgAction, Command};
 use std::io::stdout;
 use std::ops::ControlFlow;
+#[cfg(unix)]
+use std::os::unix::ffi::{OsStrExt, OsStringExt};
+#[cfg(windows)]
+use std::os::windows::ffi::OsStrExt;
 use uucore::error::{UResult, UUsageError};
 use uucore::format::{parse_spec_and_escape, FormatArgument, FormatItem};
 use uucore::{format_usage, help_about, help_section, help_usage};
@@ -22,23 +24,50 @@ mod options {
     pub const FORMAT: &str = "FORMAT";
     pub const ARGUMENT: &str = "ARGUMENT";
 }
-
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
     let matches = uu_app().get_matches_from(args);
 
     let format = matches
-        .get_one::<String>(options::FORMAT)
+        .get_one::<std::ffi::OsString>(options::FORMAT)
         .ok_or_else(|| UUsageError::new(1, "missing operand"))?;
 
-    let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) {
-        Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(),
+    #[cfg(unix)]
+    let format = format.as_bytes();
+
+    #[cfg(windows)]
+    let format_vec: Vec<u8> = format
+        .encode_wide()
+        .flat_map(|wchar| wchar.to_le_bytes())
+        .collect();
+    #[cfg(windows)]
+    let format = format_vec.as_slice();
+
+    let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
+        Some(s) => s
+            .map(|os_str| {
+                #[cfg(unix)]
+                let raw_bytes: Vec<u8> = os_str.clone().into_vec();
+
+                #[cfg(windows)]
+                let raw_bytes: Vec<u8> = os_str
+                    .encode_wide()
+                    .flat_map(|wchar| wchar.to_le_bytes())
+                    .collect();
+                FormatArgument::Unparsed(
+                    String::from_utf8(raw_bytes.clone())
+                        .unwrap_or_else(|_| raw_bytes.iter().map(|&b| b as char).collect()),
+                )
+            })
+            .collect(),
         None => vec![],
     };
 
     let mut format_seen = false;
     let mut args = values.iter().peekable();
-    for item in parse_spec_and_escape(format.as_ref()) {
+
+    // Parse and process the format string
+    for item in parse_spec_and_escape(format) {
         if let Ok(FormatItem::Spec(_)) = item {
             format_seen = true;
         }
@@ -55,7 +84,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
     }
 
     while args.peek().is_some() {
-        for item in parse_spec_and_escape(format.as_ref()) {
+        for item in parse_spec_and_escape(format) {
             match item?.write(stdout(), &mut args)? {
                 ControlFlow::Continue(()) => {}
                 ControlFlow::Break(()) => return Ok(()),
@@ -86,6 +115,10 @@ pub fn uu_app() -> Command {
                 .help("Print version information")
                 .action(ArgAction::Version),
         )
-        .arg(Arg::new(options::FORMAT))
-        .arg(Arg::new(options::ARGUMENT).action(ArgAction::Append))
+        .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
+        .arg(
+            Arg::new(options::ARGUMENT)
+                .action(ArgAction::Append)
+                .value_parser(clap::value_parser!(std::ffi::OsString)),
+        )
 }
diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs
@@ -56,7 +56,26 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
         };
         match next {
             FormatArgument::UnsignedInt(n) => *n,
-            FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s),
+            FormatArgument::Unparsed(s) => {
+                // Check if the string is a character literal enclosed in quotes
+                if s.starts_with(['"', '\'']) {
+                    // Extract the content between the quotes safely using chars
+                    let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
+                    if let Some(first_char) = chars.next() {
+                        if chars.clone().count() > 0 {
+                            // Emit a warning if there are additional characters
+                            let remaining: String = chars.collect();
+                            show_warning!(
+                                "{}: character(s) following character constant have been ignored",
+                                remaining
+                            );
+                        }
+                        return first_char as u64; // Use only the first character
+                    }
+                    return 0; // Empty quotes
+                }
+                extract_value(ParsedNumber::parse_u64(s), s)
+            }
             _ => 0,
         }
     }

diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs
@@ -12,45 +12,6 @@ fn basic_literal() {
         .stdout_only("hello world");
 }
 
-#[test]
-fn escaped_tab() {
-    new_ucmd!()
-        .args(&["hello\\t world"])
-        .succeeds()
-        .stdout_only("hello\t world");
-}
-
-#[test]
-fn escaped_newline() {
-    new_ucmd!()
-        .args(&["hello\\n world"])
-        .succeeds()
-        .stdout_only("hello\n world");
-}
-
-#[test]
-fn escaped_slash() {
-    new_ucmd!()
-        .args(&["hello\\\\ world"])
-        .succeeds()
-        .stdout_only("hello\\ world");
-}
-
-#[test]
-fn unescaped_double_quote() {
-    new_ucmd!().args(&["\\\""]).succeeds().stdout_only("\"");
-}
-
-#[test]
-fn escaped_hex() {
-    new_ucmd!().args(&["\\x41"]).succeeds().stdout_only("A");
-}
-
-#[test]
-fn escaped_octal() {
-    new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A");
-}
-
 #[test]
 fn escaped_unicode_four_digit() {
     new_ucmd!().args(&["\\u0125"]).succeeds().stdout_only("ĥ");
@@ -77,38 +38,6 @@ fn escaped_unrecognized() {
     new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d");
 }
 
-#[test]
-fn sub_string() {
-    new_ucmd!()
-        .args(&["hello %s", "world"])
-        .succeeds()
-        .stdout_only("hello world");
-}
-
-#[test]
-fn sub_multi_field() {
-    new_ucmd!()
-        .args(&["%s %s", "hello", "world"])
-        .succeeds()
-        .stdout_only("hello world");
-}
-
-#[test]
-fn sub_repeat_format_str() {
-    new_ucmd!()
-        .args(&["%s.", "hello", "world"])
-        .succeeds()
-        .stdout_only("hello.world.");
-}
-
-#[test]
-fn sub_string_ignore_escapes() {
-    new_ucmd!()
-        .args(&["hello %s", "\\tworld"])
-        .succeeds()
-        .stdout_only("hello \\tworld");
-}
-
 #[test]
 fn sub_b_string_handle_escapes() {
     new_ucmd!()
@@ -496,27 +425,11 @@ fn sub_any_asterisk_hex_arg() {
 }
 
 #[test]
-fn sub_any_specifiers_no_params() {
-    new_ucmd!()
-        .args(&["%ztlhLji", "3"]) //spell-checker:disable-line
-        .succeeds()
-        .stdout_only("3");
-}
-
-#[test]
-fn sub_any_specifiers_after_first_param() {
-    new_ucmd!()
-        .args(&["%0ztlhLji", "3"]) //spell-checker:disable-line
-        .succeeds()
-        .stdout_only("3");
-}
-
-#[test]
-fn sub_any_specifiers_after_period() {
-    new_ucmd!()
-        .args(&["%0.ztlhLji", "3"]) //spell-checker:disable-line
-        .succeeds()
-        .stdout_only("3");
+fn sub_any_specifiers() {
+    // spell-checker:disable-next-line
+    for format in ["%ztlhLji", "%0ztlhLji", "%0.ztlhLji"] {
+        new_ucmd!().args(&[format, "3"]).succeeds().stdout_only("3");
+    }
 }
 
 #[test]
@@ -764,33 +677,23 @@ fn pad_string() {
 }
 
 #[test]
-fn format_spec_zero_char_fails() {
-    // It is invalid to have the format spec '%0c'
-    new_ucmd!().args(&["%0c", "3"]).fails().code_is(1);
-}
-
-#[test]
-fn format_spec_zero_string_fails() {
-    // It is invalid to have the format spec '%0s'
-    new_ucmd!().args(&["%0s", "3"]).fails().code_is(1);
-}
-
-#[test]
-fn invalid_precision_fails() {
-    // It is invalid to have length of output string greater than i32::MAX
-    new_ucmd!()
-        .args(&["%.*d", "2147483648", "0"])
-        .fails()
-        .stderr_is("printf: invalid precision: '2147483648'\n");
+fn format_spec_zero_fails() {
+    // It is invalid to have the format spec
+    for format in ["%0c", "%0s"] {
+        new_ucmd!().args(&[format, "3"]).fails().code_is(1);
+    }
 }
 
 #[test]
-fn float_invalid_precision_fails() {
+fn invalid_precision_tests() {
     // It is invalid to have length of output string greater than i32::MAX
-    new_ucmd!()
-        .args(&["%.*f", "2147483648", "0"])
-        .fails()
-        .stderr_is("printf: invalid precision: '2147483648'\n");
+    for format in ["%.*d", "%.*f"] {
+        let expected_error = "printf: invalid precision: '2147483648'\n";
+        new_ucmd!()
+            .args(&[format, "2147483648", "0"])
+            .fails()
+            .stderr_is(expected_error);
+    }
 }
 
 // The following padding-tests test for the cases in which flags in ['0', ' '] are given.
@@ -963,3 +866,69 @@ fn float_switch_switch_decimal_scientific() {
         .succeeds()
         .stdout_only("1e-05");
 }
+
+#[test]
+fn mb_input() {
+    for format in ["\"á", "\'á", "'\u{e1}"] {
+        new_ucmd!()
+            .args(&["%04x\n", format])
+            .succeeds()
+            .stdout_only("00e1\n");
+    }
+
+    let cases = vec![
+        ("\"á=", "="),
+        ("\'á-", "-"),
+        ("\'á=-==", "=-=="),
+        ("'\u{e1}++", "++"),
+    ];
+
+    for (format, expected) in cases {
+        new_ucmd!()
+            .args(&["%04x\n", format])
+            .succeeds()
+            .stdout_is("00e1\n")
+            .stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
+    }
+}
+
+#[test]
+fn escaped_characters() {
+    fn test_escaped_character(input: &str, expected: &str) {
+        new_ucmd!().args(&[input]).succeeds().stdout_only(expected);
+    }
+
+    let cases = vec![
+        ("hello\\t world", "hello\t world"),
+        ("hello\\n world", "hello\n world"),
+        ("hello\\\\ world", "hello\\ world"),
+        ("\\\"", "\""),
+        ("\\x41", "A"),
+        ("\\101", "A"),
+    ];
+
+    for (input, expected) in cases {
+        test_escaped_character(input, expected);
+    }
+}
+
+#[test]
+fn substitution_tests() {
+    fn test_substitution(format: &str, args: Vec<&str>, expected: &str) {
+        let mut cmd = new_ucmd!();
+        cmd.args(&[format]);
+        for arg in args {
+            cmd.args(&[arg]);
+        }
+        cmd.succeeds().stdout_only(expected);
+    }
+    let cases = vec![
+        ("%s %s", vec!["hello", "world"], "hello world"),
+        ("%s.", vec!["hello", "world"], "hello.world."),
+        ("hello %s", vec!["\\tworld"], "hello \\tworld"),
+    ];
+
+    for (format, args, expected) in cases {
+        test_substitution(format, args, expected);
+    }
+}