-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
printf: improve support of printing multi-byte values of characters #7048
base: main
Are you sure you want to change the base?
Changes from all commits
ea485bc
650eef9
7c589f3
b8e8ef6
60f9570
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,13 @@ | |
// For the full copyright and license information, please view the LICENSE | ||
// file that was distributed with this source code. | ||
|
||
#![allow(dead_code)] | ||
|
||
use clap::{crate_version, Arg, ArgAction, Command}; | ||
use std::io::stdout; | ||
use std::ops::ControlFlow; | ||
#[cfg(unix)] | ||
use std::os::unix::ffi::{OsStrExt, OsStringExt}; | ||
#[cfg(windows)] | ||
use std::os::windows::ffi::OsStrExt; | ||
use uucore::error::{UResult, UUsageError}; | ||
use uucore::format::{parse_spec_and_escape, FormatArgument, FormatItem}; | ||
use uucore::{format_usage, help_about, help_section, help_usage}; | ||
|
@@ -22,23 +24,50 @@ mod options { | |
pub const FORMAT: &str = "FORMAT"; | ||
pub const ARGUMENT: &str = "ARGUMENT"; | ||
} | ||
|
||
#[uucore::main] | ||
pub fn uumain(args: impl uucore::Args) -> UResult<()> { | ||
let matches = uu_app().get_matches_from(args); | ||
|
||
let format = matches | ||
.get_one::<String>(options::FORMAT) | ||
.get_one::<std::ffi::OsString>(options::FORMAT) | ||
.ok_or_else(|| UUsageError::new(1, "missing operand"))?; | ||
|
||
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) { | ||
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), | ||
#[cfg(unix)] | ||
let format = format.as_bytes(); | ||
|
||
#[cfg(windows)] | ||
let format_vec: Vec<u8> = format | ||
.encode_wide() | ||
.flat_map(|wchar| wchar.to_le_bytes()) | ||
.collect(); | ||
#[cfg(windows)] | ||
let format = format_vec.as_slice(); | ||
|
||
let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) { | ||
Some(s) => s | ||
.map(|os_str| { | ||
#[cfg(unix)] | ||
let raw_bytes: Vec<u8> = os_str.clone().into_vec(); | ||
|
||
#[cfg(windows)] | ||
let raw_bytes: Vec<u8> = os_str | ||
.encode_wide() | ||
.flat_map(|wchar| wchar.to_le_bytes()) | ||
.collect(); | ||
FormatArgument::Unparsed( | ||
String::from_utf8(raw_bytes.clone()) | ||
.unwrap_or_else(|_| raw_bytes.iter().map(|&b| b as char).collect()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't find it documented anywhere, but collecting Ideally, the |
||
) | ||
}) | ||
.collect(), | ||
None => vec![], | ||
}; | ||
|
||
let mut format_seen = false; | ||
let mut args = values.iter().peekable(); | ||
for item in parse_spec_and_escape(format.as_ref()) { | ||
|
||
// Parse and process the format string | ||
for item in parse_spec_and_escape(format) { | ||
if let Ok(FormatItem::Spec(_)) = item { | ||
format_seen = true; | ||
} | ||
|
@@ -55,7 +84,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { | |
} | ||
|
||
while args.peek().is_some() { | ||
for item in parse_spec_and_escape(format.as_ref()) { | ||
for item in parse_spec_and_escape(format) { | ||
match item?.write(stdout(), &mut args)? { | ||
ControlFlow::Continue(()) => {} | ||
ControlFlow::Break(()) => return Ok(()), | ||
|
@@ -86,6 +115,10 @@ pub fn uu_app() -> Command { | |
.help("Print version information") | ||
.action(ArgAction::Version), | ||
) | ||
.arg(Arg::new(options::FORMAT)) | ||
.arg(Arg::new(options::ARGUMENT).action(ArgAction::Append)) | ||
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString))) | ||
.arg( | ||
Arg::new(options::ARGUMENT) | ||
.action(ArgAction::Append) | ||
.value_parser(clap::value_parser!(std::ffi::OsString)), | ||
) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This won't do what we want, because it's effectively casting UTF-16 into a byte array, and UTF-16 is not byte-compatible with UTF-8 (e.g., ASCII in UTF-8 is just the literal ASCII byte per char, while ASCII in UTF-16 will be two bytes each). Because invalid unicode is much rarer in Windows, the way to turn OsStr(ing)s into bytes for us is using
os_str_as_bytes
, which will error on invalid unicode on Windows, oros_str_as_bytes_lossy
, which will turn invalid unicode sequences into the replacement character on Windows. Windows makes it difficult to pass invalid unicode as an argument, so whichever feels more appropriate should be fine here.Also, a nit that won't be relevant after the change, but cfgs assuming unix or windows is a code smell, since there are platforms that are neither (none that we support yet, but I hope to change that at some point 😛).