Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix probe fail and exit while probe fail #237

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions states/etcd/common/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,28 +123,24 @@ func GetCollectionByIDVersion(ctx context.Context, cli kv.MetaKV, basePath strin
// meta before database
prefix := path.Join(basePath, CollectionMetaPrefix, strconv.FormatInt(collID, 10))
val, err := cli.Load(ctx, prefix)
if err != nil {
fmt.Println("get error", err.Error())
return nil, err
}
if len(val) > 0 {
if err == nil && len(val) > 0 {
found = true
ck = prefix
cv = []byte(val)
}

// with database, dbID unknown here
prefix = path.Join(basePath, DBCollectionMetaPrefix)
keys, _, _ := cli.LoadWithPrefix(ctx, prefix)
keys, values, _ := cli.LoadWithPrefix(ctx, prefix)
suffix := strconv.FormatInt(collID, 10)
for _, key := range keys {
for i, key := range keys {
if strings.HasSuffix(key, suffix) {
if found {
return nil, fmt.Errorf("multiple key found for collection %d: %s, %s", collID, ck, key)
}
found = true
ck = prefix
cv = []byte(val)
cv = []byte(values[i])
}
}
if !found {
Expand Down
18 changes: 16 additions & 2 deletions states/probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"math"
"math/rand"
"os"
"path"
"strconv"
"strings"
Expand Down Expand Up @@ -87,10 +88,12 @@ func getProbeQueryCmd(cli kv.MetaKV, basePath string) *cobra.Command {
return
}

errCount := 0
for _, collection := range loaded {
fmt.Println("probing collection", collection.CollectionID)
req, err := getMockSearchRequest(ctx, cli, basePath, collection)
if err != nil {
errCount++
fmt.Println("failed to generated mock request", err.Error())
continue
}
Expand All @@ -101,14 +104,20 @@ func getProbeQueryCmd(cli kv.MetaKV, basePath string) *cobra.Command {
})
if err != nil {
fmt.Println("querycoord get shard leaders error", err.Error())
errCount++
continue
}
if leaders.GetStatus().GetErrorCode() != commonpbv2.ErrorCode_Success {
fmt.Printf("collection[%d] failed to get shard leader, error: %s\n", collection.CollectionID, leaders.GetStatus().GetReason())
errCount++
continue
}

for _, shard := range leaders.GetShards() {

for _, nodeID := range shard.GetNodeIds() {
qn, ok := qns[nodeID]
if !ok {
errCount++
fmt.Printf("Shard leader %d not online\n", nodeID)
continue
}
Expand All @@ -120,17 +129,22 @@ func getProbeQueryCmd(cli kv.MetaKV, basePath string) *cobra.Command {
cancel()
if err != nil {
fmt.Printf("Shard %s Leader[%d] failed to search with eventually consistency level, err: %s\n", shard.GetChannelName(), nodeID, err.Error())
errCount++
continue
}
if resp.GetStatus().GetErrorCode() != commonpbv2.ErrorCode_Success {
fmt.Printf("Shard %s Leader[%d] failed to search,error code: %s reason:%s\n", shard.GetChannelName(), nodeID, resp.GetStatus().GetErrorCode().String(), resp.GetStatus().GetReason())
errCount++
continue
}
fmt.Printf("Shard %s leader[%d] probe with search success.\n", shard.GetChannelName(), nodeID)
}
}
}

if errCount != 0 {
fmt.Printf("probe failed, hit %d errors", errCount)
os.Exit(-1)
}
Comment on lines +144 to +147
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may cause birdwatcher quit in normal mode quit
maybe we shall add a paramter for error handling here?

},
}

Expand Down
Loading