Skip to content

Commit

Permalink
fix(monitoring): create duplicated pipelines (#225)
Browse files Browse the repository at this point in the history
* fix(monitoring): create duplicated pipelines

* fix(standalone): remove conflict data home settings

* refactor(e2e): pull vector image and push to local registry

* chore: debug ci

* fix: panic for fetching terminated pods

* chore: fix e2e errors
  • Loading branch information
zyy17 authored Dec 3, 2024
1 parent 6c905bf commit 3b0b025
Show file tree
Hide file tree
Showing 9 changed files with 90 additions and 6 deletions.
57 changes: 55 additions & 2 deletions controllers/greptimedbcluster/deployers/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package deployers
import (
"bytes"
"context"
"database/sql"
"fmt"
"io"
"io/fs"
Expand All @@ -26,6 +27,7 @@ import (
"time"

"github.com/avast/retry-go"
"github.com/go-sql-driver/mysql"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
Expand Down Expand Up @@ -93,11 +95,20 @@ func (d *MonitoringDeployer) CheckAndUpdateStatus(ctx context.Context, crdObject
}

if cluster.GetMonitoring().IsEnabled() && standalone.Status.StandalonePhase == v1alpha1.PhaseRunning {
if err := d.createPipeline(cluster); err != nil {
klog.Errorf("failed to create pipeline for standalone, err: '%v'", err)
pipeline, err := d.getPipeline(ctx, cluster)
if err != nil {
klog.Errorf("Failed to get pipeline for standalone, err: '%v'", err)
return false, err
}

if pipeline == "" {
klog.Infof("Create pipeline '%s' for standalone monitoring", common.LogsPipelineName(cluster.Namespace, cluster.Name))
if err := d.createPipeline(cluster); err != nil {
klog.Errorf("Failed to create pipeline for standalone, err: '%v'", err)
return false, err
}
}

cluster.Status.Monitoring.InternalDNSName = fmt.Sprintf("%s.%s.svc.cluster.local", common.ResourceName(common.MonitoringServiceName(cluster.Name), v1alpha1.StandaloneKind), cluster.Namespace)
if err := UpdateStatus(ctx, cluster, d.Client); err != nil {
klog.Errorf("Failed to update status: %s", err)
Expand Down Expand Up @@ -189,6 +200,48 @@ func (d *MonitoringDeployer) defaultPipeline() (string, error) {
return string(data), nil
}

func (d *MonitoringDeployer) getPipeline(ctx context.Context, cluster *v1alpha1.GreptimeDBCluster) (string, error) {
cfg := mysql.Config{
Net: "tcp",
Addr: fmt.Sprintf("%s.%s.svc.cluster.local:%d", common.ResourceName(common.MonitoringServiceName(cluster.Name), v1alpha1.StandaloneKind), cluster.Namespace, v1alpha1.DefaultMySQLPort),
DBName: "greptime_private",
AllowNativePasswords: true,
Timeout: 5 * time.Second,
}
db, err := sql.Open("mysql", cfg.FormatDSN())
if err != nil {
return "", err
}
defer db.Close()

// Check if the `greptime_private.pipelines` table exists.
rows, err := db.QueryContext(ctx, "SELECT 1 FROM pipelines LIMIT 1")
if err != nil {
if strings.Contains(err.Error(), "TableNotFound") {
return "", nil
}
return "", err
}
defer rows.Close()

pipelineName := common.LogsPipelineName(cluster.Namespace, cluster.Name)
rows, err = db.QueryContext(ctx, "SELECT pipeline FROM pipelines WHERE name = ? LIMIT 1", pipelineName)
if err != nil {
return "", err
}

if rows.Next() {
var pipeline string
if err := rows.Scan(&pipeline); err != nil {
return "", err
}
return pipeline, nil
}
defer rows.Close()

return "", nil
}

var _ deployer.Builder = &monitoringBuilder{}

type monitoringBuilder struct {
Expand Down
1 change: 0 additions & 1 deletion controllers/greptimedbstandalone/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ func (b *standaloneBuilder) containerPorts() []corev1.ContainerPort {
func (b *standaloneBuilder) generateMainContainerArgs() []string {
var args = []string{
"standalone", "start",
"--data-home", "/data",
"--rpc-addr", fmt.Sprintf("0.0.0.0:%d", b.standalone.Spec.RPCPort),
"--mysql-addr", fmt.Sprintf("0.0.0.0:%d", b.standalone.Spec.MySQLPort),
"--http-addr", fmt.Sprintf("0.0.0.0:%d", b.standalone.Spec.HTTPPort),
Expand Down
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ require (
dario.cat/mergo v1.0.1
github.com/avast/retry-go v3.0.0+incompatible
github.com/gin-gonic/gin v1.10.0
github.com/go-sql-driver/mysql v1.8.1
github.com/google/go-cmp v0.6.0
github.com/jackc/pgx/v5 v5.6.0
github.com/onsi/ginkgo/v2 v2.11.0
github.com/onsi/gomega v1.27.10
github.com/pelletier/go-toml v1.9.5
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.52.0
github.com/prometheus/client_golang v1.16.0
github.com/sergi/go-diff v1.3.1
github.com/spf13/cobra v1.7.0
github.com/spf13/pflag v1.0.5
Expand All @@ -29,6 +31,7 @@ require (
)

require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bytedance/sonic v1.11.6 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
Expand Down Expand Up @@ -76,7 +79,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.16.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
Expand Down Expand Up @@ -62,6 +64,8 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
Expand Down
6 changes: 5 additions & 1 deletion pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,11 @@ func (c *MetricsCollector) getPods(ctx context.Context, cluster *greptimev1alpha
}

pods := &corev1.PodList{}
if err := c.client.List(ctx, pods, client.InNamespace(cluster.Namespace), client.MatchingLabels(selector.MatchLabels)); err != nil {
if err := c.client.List(ctx, pods,
client.InNamespace(cluster.Namespace),
client.MatchingLabels(selector.MatchLabels),
client.MatchingFields{"status.phase": string(corev1.PodRunning)},
); err != nil {
return nil, err
}

Expand Down
12 changes: 12 additions & 0 deletions tests/e2e/setup/create-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ DEFAULT_TIMEOUT=300s
# We always use the latest released greptimedb image for testing.
GREPTIMEDB_IMAGE=greptime/greptimedb:latest

# We always use the latest released vector image for testing.
VECTOR_IMAGE=timberio/vector:nightly-alpine

# Define the color for the output.
RED='\033[1;31m'
GREEN='\033[1;32m'
Expand Down Expand Up @@ -107,6 +110,14 @@ function pull_greptimedb_image() {
echo -e "${GREEN}<= Greptimedb image is pulled and pushed.${RESET}"
}

function pull_vector_image() {
echo -e "${GREEN}=> Pull and push vector image...${RESET}"
docker pull "$VECTOR_IMAGE"
docker tag "$VECTOR_IMAGE" localhost:${REGISTRY_PORT}/timberio/vector:nightly-alpine
docker push localhost:${REGISTRY_PORT}/timberio/vector:nightly-alpine
echo -e "${GREEN}<= Vector image is pulled and pushed.${RESET}"
}

function create_kind_cluster() {
echo -e "${GREEN}=> Create kind cluster...${RESET}"
# check cluster
Expand Down Expand Up @@ -282,6 +293,7 @@ function main() {
build_operator_image
build_initializer_image
pull_greptimedb_image
pull_vector_image
create_kind_cluster
deploy_cloud_provider_kind
deploy_greptimedb_operator
Expand Down
6 changes: 6 additions & 0 deletions tests/e2e/setup/diagnostic-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ function dump_events() {
kubectl get events --sort-by=.metadata.creationTimestamp -A
}

function dump_greptime_resources() {
kubectl get greptimedbclusters.greptime.io -A
kubectl get greptimedbstandalones.greptime.io -A
}

function dump_e2e_pods_details() {
kubectl get pods -A | grep -E 'e2e|greptimedb-operator' | awk '{print $2 " " $1}' | while read -r line; do
namespace=$(echo "$line" | awk '{print $2}')
Expand All @@ -40,6 +45,7 @@ function dump_e2e_pods_details() {

function main() {
dump_basic_info
dump_greptime_resources
dump_events
dump_e2e_pods_details
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ spec:
replicas: 1
monitoring:
enabled: true
vector:
image: localhost:5001/timberio/vector:nightly-alpine
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ metadata:
name: e2e-cluster-with-standalone-wal
namespace: default
spec:
initializer:
image: localhost:5001/greptime/greptimedb-initializer:latest
base:
main:
image: greptime/greptimedb:latest
image: localhost:5001/greptime/greptimedb:latest
frontend:
replicas: 1
meta:
Expand Down

0 comments on commit 3b0b025

Please sign in to comment.