From b21b8fc555414789d835f454a0dce3bf685fdb4c Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 30 Dec 2024 10:08:58 -0800 Subject: [PATCH 1/2] object-name: fix resolution of object names containing curly braces Given a branch name of 'foo{bar', commands like git cat-file -p foo{bar:README.md should succeed (assuming that branch had a README.md file, of course). However, the change in cce91a2caef9 (Change 'master@noon' syntax to 'master@{noon}'., 2006-05-19) presumed that curly braces would always come after an '@' or '^' and be paired, causing e.g. 'foo{bar:README.md' to entirely miss the ':' and assume there's no object being referenced. In short, git would report: fatal: Not a valid object name foo{bar:README.md Change the parsing to only make the assumption of paired curly braces immediately after either a '@' or '^' character appears. Add tests for this, as well as for a few other test cases that initial versions of this patch broke: * 'foo@@{...}' * 'foo^{/${SEARCH_TEXT_WITH_COLON}}:${PATH}' Note that we'd prefer duplicating the special logic for "@^" characters here, because if get_oid_basic() or interpret_nth_prior_checkout() or get_oid_basic() or similar gain extra methods of using curly braces, then the logic in get_oid_with_context_1() would need to be updated as well. But it's not clear how to refactor all of these to have a simple common callpoint with the specialized logic. Reported-by: Gabriel Amaral Helped-by: Michael Haggerty Signed-off-by: Elijah Newren --- object-name.c | 8 +++++--- t/t1006-cat-file.sh | 31 ++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/object-name.c b/object-name.c index a563635a8cbbd9..8e80841acd3cc1 100644 --- a/object-name.c +++ b/object-name.c @@ -2051,12 +2051,14 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo, return -1; } for (cp = name, bracket_depth = 0; *cp; cp++) { - if (*cp == '{') + if (strchr("@^", *cp) && cp[1] == '{') { + cp++; bracket_depth++; - else if (bracket_depth && *cp == '}') + } else if (bracket_depth && *cp == '}') { bracket_depth--; - else if (!bracket_depth && *cp == ':') + } else if (!bracket_depth && *cp == ':') { break; + } } if (*cp == ':') { struct object_id tree_oid; diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index ff9bf213aa2c73..398865d6ebe9c6 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -240,7 +240,8 @@ test_expect_success "setup" ' git config extensions.objectformat $test_hash_algo && git config extensions.compatobjectformat $test_compat_hash_algo && echo_without_newline "$hello_content" > hello && - git update-index --add hello + git update-index --add hello && + git commit -m "add hello file" ' run_blob_tests () { @@ -602,6 +603,34 @@ test_expect_success FUNNYNAMES '--batch-check, -Z with newline in input' ' test_cmp expect actual ' +test_expect_success 'setup with curly braches in input' ' + git branch "foo{bar" HEAD && + git branch "foo@" HEAD +' + +test_expect_success 'object reference with curly brace' ' + git cat-file -p "foo{bar:hello" >actual && + git cat-file -p HEAD:hello >expect && + test_cmp expect actual +' + +test_expect_success 'object reference with at-sign' ' + git cat-file -p "foo@@{0}:hello" >actual && + git cat-file -p HEAD:hello >expect && + test_cmp expect actual +' + +test_expect_success 'setup with commit with colon' ' + git commit-tree -m "testing: just a bunch of junk" HEAD^{tree} >out && + git branch other $(cat out) +' + +test_expect_success 'object reference via commit text search' ' + git cat-file -p "other^{/testing:}:hello" >actual && + git cat-file -p HEAD:hello >expect && + test_cmp expect actual +' + test_expect_success 'setup blobs which are likely to delta' ' test-tool genrandom foo 10240 >foo && { cat foo && echo plus; } >foo-plus && From 19f84dfc9cc33616f3720d740c449b29de8d492e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 3 Jan 2025 13:09:02 -0800 Subject: [PATCH 2/2] object-name: be more strict in parsing describe-like output From Documentation/revisions.txt: '', e.g. 'v1.7.4.2-679-g3bee7fb':: Output from `git describe`; i.e. a closest tag, optionally followed by a dash and a number of commits, followed by a dash, a 'g', and an abbreviated object name. which means that output of the format ${REFNAME}-${INTEGER}-g${HASH} should parse to fully expanded ${HASH}. This is fine. However, we currently don't validate any of ${REFNAME}-${INTEGER}, we only parse -g${HASH} and assume the rest is valid. That is problematic, since it breaks things like git cat-file -p branchname:path/to/file/named/i-gaffed which, when commit (or tree or blob) affed exists, will not return us information about the file we are looking for but will instead erroneously tell us about object affed. A few additional notes: - This is a slight backward incompatibility break, because we used to allow ${GARBAGE}-g${HASH} as a way to spell ${HASH}. However, a backward incompatible break is necessary, because there is no other way for someone to be more specific and disambiguate that they want the blob master:path/to/who-gabbed instead of the object abbed. - There is a possibility that check_refname_format() rules change in the future. However, we can only realistically loosen the rules for what that function accepts rather than tighten. If we were to tighten the rules, some real world repositories may already have refnames that suddenly become unacceptable and we break those repositories. As such, any describe-like syntax of the form ${VALID_FOR_A_REFNAME}-${INTEGER}-g${HASH} that is valid with the changes in this commit will remain valid in the future. - The fact that check_refname_format() rules could loosen in the future is probably also an important reason to make this change. If the rules loosen, there might be additional cases within ${GARBAGE}-g${HASH} that become ambiguous in the future. While abbreviated hashes can be disambiguated by abbreviating less, it may well be that these alternative object names have no way of being disambiguated (much like pathnames cannot be). Accepting all random ${GARBAGE} thus makes it difficult for us to allow future extensions to object naming. So, tighten up the parsing to make sure ${REFNAME} and ${INTEGER} are present in the string, and would be considered a valid ref and non-negative integer. Also, add a few tests for git describe using object names of the form ${REVISION_NAME}${MODIFIERS} since an early version of this patch failed on constructs like git describe v2.48.0-rc2-161-g6c2274cdbc^0 Reported-by: Gabriel Amaral Signed-off-by: Elijah Newren --- object-name.c | 55 ++++++++++++++++++++++++++++++++++++++++++++- t/t6120-describe.sh | 24 ++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/object-name.c b/object-name.c index 8e80841acd3cc1..cb96a0e61619f2 100644 --- a/object-name.c +++ b/object-name.c @@ -1271,6 +1271,58 @@ static int peel_onion(struct repository *r, const char *name, int len, return 0; } +/* + * Documentation/revisions.txt says: + * '', e.g. 'v1.7.4.2-679-g3bee7fb':: + * Output from `git describe`; i.e. a closest tag, optionally + * followed by a dash and a number of commits, followed by a dash, a + * 'g', and an abbreviated object name. + * + * which means that the stuff before '-g${HASH}' needs to be a valid + * refname, a dash, and a non-negative integer. This function verifies + * that. + * + * In particular, we do not want to treat + * branchname:path/to/file/named/i-gaffed + * as a request for commit affed. + * + * More generally, we should probably not treat + * 'refs/heads/./../.../ ~^:/?*[////\\\&}/busted.lock-g050e0ef6ead' + * as a request for object 050e0ef6ead either. + * + * We are called with name[len] == '-' and name[len+1] == 'g', i.e. + * we are verifying ${REFNAME}-{INTEGER} part of the name. + */ +static int ref_and_count_parts_valid(const char *name, int len) +{ + struct strbuf sb; + const char *cp; + int flags = REFNAME_ALLOW_ONELEVEL; + int ret = 1; + + /* Ensure we have at least one digit */ + if (!isxdigit(name[len-1])) + return 0; + + /* Skip over digits backwards until we get to the dash */ + for (cp = name + len - 2; name < cp; cp--) { + if (*cp == '-') + break; + if (!isxdigit(*cp)) + return 0; + } + /* Ensure we found the leading dash */ + if (*cp != '-') + return 0; + + len = cp - name; + strbuf_init(&sb, len); + strbuf_add(&sb, name, len); + ret = !check_refname_format(sb.buf, flags); + strbuf_release(&sb); + return ret; +} + static int get_describe_name(struct repository *r, const char *name, int len, struct object_id *oid) @@ -1284,7 +1336,8 @@ static int get_describe_name(struct repository *r, /* We must be looking at g in "SOMETHING-g" * for it to be describe output. */ - if (ch == 'g' && cp[-1] == '-') { + if (ch == 'g' && cp[-1] == '-' && + ref_and_count_parts_valid(name, cp - 1 - name)) { cp++; len -= cp - name; return get_short_oid(r, diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index 3f6160d702bc20..76843a61691cb5 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -82,11 +82,13 @@ check_describe R-2-gHASH HEAD^^ check_describe A-3-gHASH HEAD^^2 check_describe B HEAD^^2^ check_describe R-1-gHASH HEAD^^^ +check_describe R-1-gHASH R-1-g$(git rev-parse --short HEAD^^)~1 check_describe c-7-gHASH --tags HEAD check_describe c-6-gHASH --tags HEAD^ check_describe e-1-gHASH --tags HEAD^^ check_describe c-2-gHASH --tags HEAD^^2 +check_describe c-2-gHASH --tags c-2-g$(git rev-parse --short HEAD^^2)^0 check_describe B --tags HEAD^^2^ check_describe e --tags HEAD^^^ check_describe e --tags --exact-match HEAD^^^ @@ -725,4 +727,26 @@ test_expect_success '--exact-match does not show --always fallback' ' test_must_fail git describe --exact-match --always ' +test_expect_success 'avoid being fooled by describe-like filename' ' + test_when_finished rm out && + + git rev-parse --short HEAD >out && + FILENAME=filename-g$(cat out) && + touch $FILENAME && + git add $FILENAME && + git commit -m "Add $FILENAME" && + + git cat-file -t HEAD:$FILENAME >actual && + + echo blob >expect && + test_cmp expect actual +' + +test_expect_success 'do not be fooled by invalid describe format ' ' + test_when_finished rm out && + + git rev-parse --short HEAD >out && + test_must_fail git cat-file -t "refs/tags/super-invalid/./../...../ ~^:/?*[////\\\\\\&}/busted.lock-42-g"$(cat out) +' + test_done