-
-
Notifications
You must be signed in to change notification settings - Fork 644
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reimplement the ArgSplitter in Rust. #21814
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,7 +109,7 @@ def _get_known_goal_scopes( | |
yield alias, si | ||
|
||
def split_args(self, args: Sequence[str]) -> SplitArgs: | ||
"""Split the specified arg list (or sys.argv if unspecified). | ||
"""Split the specified arg list. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The previous statement was false... |
||
|
||
args[0] is ignored. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// Copyright 2025 Pants project contributors (see CONTRIBUTORS.md). | ||
// Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
use lazy_static::lazy_static; | ||
use regex::Regex; | ||
use std::collections::HashSet; | ||
use std::path::{Path, PathBuf}; | ||
|
||
benjyw marked this conversation as resolved.
Show resolved
Hide resolved
|
||
lazy_static! { | ||
static ref SPEC_RE: Regex = Regex::new(r"[/\\.:*#]").unwrap(); | ||
static ref SINGLE_DASH_FLAGS: HashSet<&'static str> = | ||
HashSet::from(["-ltrace", "-ldebug", "-linfo", "-lwarn", "-lerror", "-h", "-v", "-V"]); | ||
} | ||
|
||
#[derive(Debug, Eq, PartialEq)] | ||
pub struct SplitArgs { | ||
pub goals: Vec<String>, // The requested known goals. | ||
pub unknown_goals: Vec<String>, // Any requested but unknown goals. | ||
pub specs: Vec<String>, // What to run against, e.g. targets or files/dirs. | ||
pub passthru: Vec<String>, // Any remaining args specified after a -- separator. | ||
} | ||
|
||
pub struct ArgSplitter { | ||
build_root: PathBuf, | ||
known_goal_names: HashSet<String>, | ||
} | ||
|
||
impl ArgSplitter { | ||
pub fn new<'a, I: IntoIterator<Item = &'a str>>( | ||
build_root: &Path, | ||
known_goal_names: I, | ||
) -> ArgSplitter { | ||
ArgSplitter { | ||
build_root: build_root.to_owned(), | ||
known_goal_names: known_goal_names.into_iter().map(str::to_string).collect(), | ||
} | ||
} | ||
|
||
pub fn split_args(&self, args: Vec<String>) -> SplitArgs { | ||
let mut goals = vec![]; | ||
let mut unknown_goals: Vec<String> = vec![]; | ||
let mut specs = vec![]; | ||
let mut passthru = vec![]; | ||
|
||
let mut unconsumed_args = args; | ||
unconsumed_args.reverse(); | ||
// The first arg is the binary name, so skip it. | ||
unconsumed_args.pop(); | ||
|
||
// Scan the args looking for goals and specs. | ||
// The one hard case is a single word like `foo` with no path- or target-like qualities | ||
// (e.g., a slash or a colon). It could be a goal, or it could be a top-level directory. | ||
// We disambiguate thus: If it is a known goal name, assume the user intended a goal. | ||
// Otherwise, if it looks like a path or target, or exists on the filesystem, assume | ||
// the user intended a spec, otherwise it's an unknown goal. | ||
// TODO: This is probably not good logic, since the CLI behavior will change based on | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic is preserved for compatibility, but we should probably consider changing it at some point. |
||
// changes to plugins (as they can introduce new goals) or changes on the filesystem. | ||
// We might want to deprecate this behavior and consistently assume that these are goals, | ||
// since the user can always add a `./` prefix to override. | ||
while let Some(arg) = unconsumed_args.pop() { | ||
// Some special flags, such as `-v` and `--help`, are implemented as | ||
// goal aliases, so we must check this before checking for any dash prefixes. | ||
if self.known_goal_names.contains(&arg) { | ||
goals.push(arg); | ||
} else if arg == "--" { | ||
// Arg is the passthru delimiter. | ||
for item in unconsumed_args.drain(..) { | ||
passthru.push(item); | ||
} | ||
passthru.reverse(); | ||
} else if !(arg.starts_with("--") || SINGLE_DASH_FLAGS.contains(arg.as_str())) { | ||
// This is not a flag, so it must be an unknown goal or a spec (or a negative spec, | ||
// which starts with a single dash, and we know is not a single dash flag). | ||
if arg.starts_with("-") | ||
|| SPEC_RE.is_match(&arg) | ||
|| self.build_root.join(Path::new(&arg)).exists() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is sync IO. Does sync vs. async matter at this point of the application? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so - the Python equivalent was blocking on the same thing at the same point. And args are split long before we create a scheduler - we have to split them in order to get the option values for setting up the scheduler. |
||
{ | ||
// Arg is a spec. | ||
specs.push(arg); | ||
} else { | ||
// Arg is an unknown goal. | ||
unknown_goals.push(arg); | ||
} | ||
} | ||
} | ||
|
||
SplitArgs { | ||
goals, | ||
unknown_goals, | ||
specs, | ||
passthru, | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's possible that we won't need to access this in Python in the future, but for now I've implemented this, to make the port less invasive, and also so that I can run the existing Python tests against this new implementation.