Skip to content

Commit

Permalink
maintenance: add prune-remote-refs task
Browse files Browse the repository at this point in the history
Remote-tracking refs can accumulate in local repositories even as branches
are deleted on remotes, impacting git performance negatively. Existing
alternatives to keep refs pruned have a few issues:

  1. Running `git fetch` with either `--prune` or `fetch.prune=true`
     set, with the default refspec to copy all their branches into
     our remote-tracking branches, will prune stale refs, but also
     pulls in new branches from remote.  That is undesirable if the
     user wants to only work with a selected few remote branches.

  2. `git remote prune` cleans up refs without adding to the
     existing list but requires periodic user intervention.

Add a new maintenance task 'prune-remote-refs' that runs 'git remote
prune' for each configured remote daily.  Leave the task disabled by
default, as it may be unexpected to see their remote-tracking
branches to disappear while they are not watching for unsuspecting
users.

Signed-off-by: Shubham Kanodia <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
pastelsky committed Jan 3, 2025
1 parent 76cf4f6 commit 9ebcee7
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 7 deletions.
20 changes: 20 additions & 0 deletions Documentation/git-maintenance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,26 @@ pack-refs::
need to iterate across many references. See linkgit:git-pack-refs[1]
for more information.

prune-remote-refs::
The `prune-remote-refs` task runs `git remote prune` on each remote
repository registered in the local repository. This task helps clean
up deleted remote branches, improving the performance of operations
that iterate through the refs. See linkgit:git-remote[1] for more
information. This task is disabled by default.
+
NOTE: This task is opt-in to prevent unexpected removal of remote refs
for users of linkgit:git-maintenance[1]. For most users, configuring `fetch.prune=true`
is an acceptable solution, as it will automatically clean up stale remote-tracking
branches during normal fetch operations. However, this task can be useful in
specific scenarios:
+
--
* When using selective fetching (e.g., `git fetch origin +foo:refs/remotes/origin/foo`)
where `fetch.prune` would only affect refs that are explicitly fetched.
* When third-party tools might perform unexpected full fetches, and you want
periodic cleanup independently of fetch operations.
--

OPTIONS
-------
--auto::
Expand Down
92 changes: 85 additions & 7 deletions builtin/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "lockfile.h"
#include "parse-options.h"
#include "run-command.h"
#include "remote.h"
#include "sigchain.h"
#include "strvec.h"
#include "commit.h"
Expand Down Expand Up @@ -916,6 +917,63 @@ static int maintenance_opt_schedule(const struct option *opt, const char *arg,
return 0;
}

struct remote_cb_data {
struct maintenance_run_opts *opts;
struct string_list failed_remotes;
};

static void report_failed_remotes(struct string_list *failed_remotes,
const char *action_name)
{
if (failed_remotes->nr) {
int i;
struct strbuf msg = STRBUF_INIT;
strbuf_addf(&msg, _("failed to %s the following remotes: "),
action_name);
for (i = 0; i < failed_remotes->nr; i++) {
if (i)
strbuf_addstr(&msg, ", ");
strbuf_addstr(&msg, failed_remotes->items[i].string);
}
error("%s", msg.buf);
strbuf_release(&msg);
}
}

static int prune_remote(struct remote *remote, void *cb_data)
{
struct child_process child = CHILD_PROCESS_INIT;
struct remote_cb_data *data = cb_data;

if (!remote->url.nr)
return 0;

child.git_cmd = 1;
strvec_pushl(&child.args, "remote", "prune", remote->name, NULL);

if (run_command(&child))
string_list_append(&data->failed_remotes, remote->name);

return 0;
}

static int maintenance_task_prune_remote(struct maintenance_run_opts *opts,
struct gc_config *cfg UNUSED)
{
struct remote_cb_data cbdata = { .opts = opts,
.failed_remotes = STRING_LIST_INIT_DUP };

int result;
result = for_each_remote(prune_remote, &cbdata);

report_failed_remotes(&cbdata.failed_remotes, "prune");
if (cbdata.failed_remotes.nr)
result = 1;

string_list_clear(&cbdata.failed_remotes, 0);
return result;
}

/* Remember to update object flag allocation in object.h */
#define SEEN (1u<<0)

Expand Down Expand Up @@ -1036,8 +1094,8 @@ static int maintenance_task_commit_graph(struct maintenance_run_opts *opts,

static int fetch_remote(struct remote *remote, void *cbdata)
{
struct maintenance_run_opts *opts = cbdata;
struct child_process child = CHILD_PROCESS_INIT;
struct remote_cb_data *data = cbdata;

if (remote->skip_default_update)
return 0;
Expand All @@ -1048,21 +1106,34 @@ static int fetch_remote(struct remote *remote, void *cbdata)
"--no-write-fetch-head", "--recurse-submodules=no",
NULL);

if (opts->quiet)
if (data->opts->quiet)
strvec_push(&child.args, "--quiet");

return !!run_command(&child);
if (run_command(&child))
string_list_append(&data->failed_remotes, remote->name);

return 0;
}

static int maintenance_task_prefetch(struct maintenance_run_opts *opts,
struct gc_config *cfg UNUSED)
{
if (for_each_remote(fetch_remote, opts)) {
error(_("failed to prefetch remotes"));
return 1;
struct remote_cb_data cbdata = { .opts = opts,
.failed_remotes = STRING_LIST_INIT_DUP };

int result = 0;

if (for_each_remote(fetch_remote, &cbdata)) {
error(_("failed to prefetch some remotes"));
result = 1;
}

return 0;
report_failed_remotes(&cbdata.failed_remotes, "prefetch");
if (cbdata.failed_remotes.nr)
result = 1;

string_list_clear(&cbdata.failed_remotes, 0);
return result;
}

static int maintenance_task_gc(struct maintenance_run_opts *opts,
Expand Down Expand Up @@ -1378,6 +1449,7 @@ enum maintenance_task_label {
TASK_GC,
TASK_COMMIT_GRAPH,
TASK_PACK_REFS,
TASK_PRUNE_REMOTE_REFS,

/* Leave as final value */
TASK__COUNT
Expand Down Expand Up @@ -1414,6 +1486,10 @@ static struct maintenance_task tasks[] = {
maintenance_task_pack_refs,
pack_refs_condition,
},
[TASK_PRUNE_REMOTE_REFS] = {
"prune-remote-refs",
maintenance_task_prune_remote,
},
};

static int compare_tasks_by_selection(const void *a_, const void *b_)
Expand Down Expand Up @@ -1508,6 +1584,8 @@ static void initialize_maintenance_strategy(void)
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
tasks[TASK_PACK_REFS].enabled = 1;
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
tasks[TASK_PRUNE_REMOTE_REFS].enabled = 0;
tasks[TASK_PRUNE_REMOTE_REFS].schedule = SCHEDULE_DAILY;
}
}

Expand Down
82 changes: 82 additions & 0 deletions t/t7900-maintenance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,88 @@ test_expect_success 'pack-refs task' '
test_subcommand git pack-refs --all --prune <pack-refs.txt
'

test_expect_success 'prune-remote-refs task not enabled by default' '
git clone . prune-test &&
(
cd prune-test &&
GIT_TRACE2_EVENT="$(pwd)/prune.txt" git maintenance run 2>err &&
test_subcommand ! git remote prune origin <prune.txt
)
'

test_expect_success 'prune-remote-refs task cleans stale remote refs' '
test_commit initial &&
# Create two separate remote repos
git clone . remote1 &&
git clone . remote2 &&
git clone . prune-test-clean &&
(
cd prune-test-clean &&
git config maintenance.prune-remote-refs.enabled true &&
# Add both remotes
git remote add remote1 "../remote1" &&
git remote add remote2 "../remote2" &&
# Create and push branches to both remotes
git branch -f side2 HEAD &&
git push remote1 side2 &&
git push remote2 side2 &&
# Rename branches in each remote to simulate a stale branch
git -C ../remote1 branch -m side2 side3 &&
git -C ../remote2 branch -m side2 side4 &&
GIT_TRACE2_EVENT="$(pwd)/prune.txt" git maintenance run --task=prune-remote-refs &&
# Verify pruning happened for both remotes
test_subcommand git remote prune remote1 <prune.txt &&
test_subcommand git remote prune remote2 <prune.txt &&
test_must_fail git rev-parse refs/remotes/remote1/side2 &&
test_must_fail git rev-parse refs/remotes/remote2/side2
)
'

test_expect_success 'prune-remote-refs task continues to prune remotes even if some fail' '
test_commit initial &&
git clone . remote-bad1 &&
git clone . remote-bad2 &&
git clone . remote-good &&
git clone . prune-test-partial &&
(
cd prune-test-partial &&
git config maintenance.prune-remote-refs.enabled true &&
# Add remotes in alphabetical order to ensure processing order
git remote add aaa-bad1 "../remote-bad1" &&
git remote add bbb-bad2 "../remote-bad2" &&
git remote add ccc-good "../remote-good" &&
# Create and push branches to all remotes
git branch -f side2 HEAD &&
git push aaa-bad1 side2 &&
git push bbb-bad2 side2 &&
git push ccc-good side2 &&
# Rename branch in good remote to simulate a stale branch
git -C ../remote-good branch -m side2 side3 &&
# Break the bad remotes by removing their directories
rm -rf ../remote-bad1 ../remote-bad2 &&
GIT_TRACE2_EVENT="$(pwd)/prune.txt" git maintenance run --task=prune-remote-refs 2>err || true &&
# Verify pruning happened for good remote despite bad remote failures
test_subcommand git remote prune ccc-good <prune.txt &&
test_must_fail git rev-parse refs/remotes/ccc-good/side2 &&
test_grep "error: failed to prune the following remotes: aaa-bad1, bbb-bad2" err
)
'

test_expect_success '--auto and --schedule incompatible' '
test_must_fail git maintenance run --auto --schedule=daily 2>err &&
test_grep "at most one" err
Expand Down

0 comments on commit 9ebcee7

Please sign in to comment.