-
Notifications
You must be signed in to change notification settings - Fork 96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
indexserver: add debug endpoint for deleting repository shards #485
base: main
Are you sure you want to change the base?
Changes from 7 commits
2cb0213
76a18b6
828806c
e53fada
1ab8eb7
76ae04c
4d58e64
4d8cf41
3270e03
83f68e7
4c2077d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"io/fs" | ||
"log" | ||
"os" | ||
"os/exec" | ||
|
@@ -172,6 +174,38 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) | |
} | ||
} | ||
|
||
// remove any Zoekt metadata files in the given dir that don't have an | ||
// associated shard file | ||
metaFiles, err := filepath.Glob(filepath.Join(indexDir, "*.meta")) | ||
if err != nil { | ||
log.Printf("failed to glob %q for stranded metadata files: %s", indexDir, err) | ||
} else { | ||
for _, metaFile := range metaFiles { | ||
shard := strings.TrimSuffix(metaFile, ".meta") | ||
_, err := os.Stat(shard) | ||
if err == nil { | ||
// metadata file has associated shard | ||
continue | ||
} | ||
|
||
if !errors.Is(err, fs.ErrNotExist) { | ||
log.Printf("failed to stat metadata file %q: %s", metaFile, err) | ||
continue | ||
} | ||
|
||
// metadata doesn't have an associated shard file, remove the metadata file | ||
|
||
err = os.Remove(metaFile) | ||
if err != nil { | ||
log.Printf("failed to remove stranded metadata file %q: %s", metaFile, err) | ||
continue | ||
} else { | ||
log.Printf("removed stranded metadata file: %s", metaFile) | ||
} | ||
|
||
} | ||
} | ||
|
||
metricCleanupDuration.Observe(time.Since(start).Seconds()) | ||
} | ||
|
||
|
@@ -515,3 +549,68 @@ func removeTombstones(fn string) ([]*zoekt.Repository, error) { | |
} | ||
return tombstones, nil | ||
} | ||
|
||
// deleteShards deletes all the shards in indexDir that are associated with | ||
// the given repoID. If the repository specified by repoID happens to be in a | ||
// compound shard, the repository is tombstoned instead. | ||
// | ||
// deleteShards returns errRepositoryNotFound if the repository specified by repoID | ||
// isn't present in indexDir. | ||
// | ||
// Users must hold the global indexDir lock before calling deleteShards. | ||
func deleteShards(indexDir string, repoID uint32) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quoting from cleanup:
Can't we just call that instead? I guess that is what @keegancsmith was refering to(?). We could factor it out into its own function and call it from your handler and from cleanup. WDYT? |
||
shardMap := getShards(indexDir) | ||
|
||
shards, ok := shardMap[repoID] | ||
if !ok { | ||
return errRepositoryNotFound | ||
} | ||
|
||
// Ensure that the paths are in reverse sorted order to ensure that Zoekt's repository <-> shard matching logic | ||
// works correctly. | ||
// | ||
// Example: - repoA_v16.00002.zoekt | ||
// - repoA_v16.00001.zoekt | ||
// - repoA_v16.00000.zoekt | ||
// | ||
// zoekt-indexserver checks whether it has indexed "repoA" by first checking to see if the 0th shard | ||
// is present (repoA_v16.00000.zoekt). If it's present, then it gathers all rest of the shards names in ascending order | ||
// (...00001.zoekt, ...00002.zoekt). If it's missing, then zoekt assumes that it never indexed "repoA". | ||
// | ||
// If this function were to crash while deleting repoA, and we only deleted the 0th shard, then shard's 1 & 2 would never | ||
// be cleaned up by Zoekt indexserver (since the 0th shard is the only shard that's tested). | ||
// | ||
// Deleting shards in reverse sorted order (2 -> 1 -> 0) always ensures that we don't leave an inconsistent | ||
// state behind even if we crash. | ||
|
||
sort.Slice(shards, func(i, j int) bool { | ||
return shards[i].Path > shards[j].Path | ||
}) | ||
|
||
for _, s := range shards { | ||
// Is this repository inside a compound shard? If so, set a tombstone | ||
// instead of deleting the shard outright. | ||
if zoekt.ShardMergingEnabled() && maybeSetTombstone([]shard{s}, repoID) { | ||
shardsLog(indexDir, "tomb", []shard{s}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: Looking at the implementation of shardslog, I'm unsure if it's threadsafe (what happens if there are two writers to the same file)? deleteShard's requirement that we hold the global index lock protects us against this, but I wonder if we should make this more explicit (e.g. have a dedicated mutex just for shardslog). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. quoting: https://github.com/natefinch/lumberjack
|
||
continue | ||
} | ||
|
||
paths, err := zoekt.IndexFilePaths(s.Path) | ||
if err != nil { | ||
return fmt.Errorf("listing files for shard %q: %w", s.Path, err) | ||
} | ||
|
||
for _, p := range paths { | ||
err := os.Remove(p) | ||
if err != nil { | ||
return fmt.Errorf("deleting %q: %w", p, err) | ||
} | ||
} | ||
|
||
shardsLog(indexDir, "delete", []shard{s}) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
var errRepositoryNotFound = errors.New("repository not found") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this unrelated cleanup? IE maybe should be another PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I added this functionality since we switched the implementation. Before, the implementation made sure to delete the metadata file before its associated shard file. Since we switched to using zoekt.IndexFilePaths that order isn't guaranteed anymore (leaving open the possibility of a metadata file not having an associated shard if we delete the shard and then crash). I added this additional logic to cleanup so that we would have a background process that would reap those "stranded" files.
I am happy to pull bit into a separate PR though.