Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: set GWC accepted condition properly #1021

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
For more information on this migration please consult
[kubernetes-sigs/kubebuilder#3907][kubebuilder_3907].
[#956](https://github.com/Kong/gateway-operator/pull/956)
- The `GatewayClass` Accepted Condition is set to `False` with reason `InvalidParameters`
in case the `.spec.parametersRef` field is not a valid reference to an existing
`GatewayConfiguration` object.
[#1021](https://github.com/Kong/gateway-operator/pull/1021)

[kubebuilder_3907]: https://github.com/kubernetes-sigs/kubebuilder/discussions/3907

Expand Down
9 changes: 8 additions & 1 deletion controller/gateway/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,20 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
log.Trace(logger, "checking gatewayclass")
gwc, err := gatewayclass.Get(ctx, r.Client, string(gateway.Spec.GatewayClassName))
if err != nil {
if errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}) {
switch {
case errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}):
log.Debug(logger, "resource not supported, ignoring",
"expectedGatewayClass", vars.ControllerName(),
"gatewayClass", gateway.Spec.GatewayClassName,
"reason", err.Error(),
)
return ctrl.Result{}, nil
case errors.As(err, &operatorerrors.ErrNotAcceptedGatewayClass{}):
log.Debug(logger, "GatewayClass not accepted, ignoring",
"gatewayClass", gateway.Spec.GatewayClassName,
"reason", err.Error(),
)
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}
Expand Down
10 changes: 7 additions & 3 deletions controller/gateway/controller_watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ func (r *Reconciler) gatewayHasMatchingGatewayClass(obj client.Object) bool {
// class as well. If we fail here it's most likely because of some failure
// of the Kubernetes API and it's technically better to enqueue the object
// than to drop it for eventual consistency during cluster outages.
return !errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{})
return !errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}) &&
!errors.As(err, &operatorerrors.ErrNotAcceptedGatewayClass{})
}

return true
Expand Down Expand Up @@ -229,9 +230,12 @@ func (r *Reconciler) listManagedGatewaysInNamespace(ctx context.Context, obj cli
objKey := client.ObjectKey{Name: string(gateway.Spec.GatewayClassName)}

if _, err := gatewayclass.Get(ctx, r.Client, string(gateway.Spec.GatewayClassName)); err != nil {
if errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}) {
switch {
case errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}):
log.Debug(logger, "gateway class not supported, ignoring")
} else {
case errors.As(err, &operatorerrors.ErrNotAcceptedGatewayClass{}):
log.Debug(logger, "gateway class not accepted, ignoring")
default:
log.Error(logger, err, "failed to get Gateway's GatewayClass",
"gatewayClass", objKey.Name,
"gateway", gateway.Name,
Expand Down
41 changes: 18 additions & 23 deletions controller/gatewayclass/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"

k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
Expand All @@ -16,7 +15,6 @@ import (
"github.com/kong/gateway-operator/controller"
"github.com/kong/gateway-operator/controller/pkg/log"
"github.com/kong/gateway-operator/internal/utils/gatewayclass"
"github.com/kong/gateway-operator/pkg/consts"
k8sutils "github.com/kong/gateway-operator/pkg/utils/kubernetes"
)

Expand Down Expand Up @@ -55,29 +53,26 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
return ctrl.Result{}, nil
}

if !gwc.IsAccepted() {
oldGwc := gwc.DeepCopy()
oldGwc := gwc.DeepCopy()

k8sutils.SetCondition(
k8sutils.NewConditionWithGeneration(
consts.ConditionType(gatewayv1.GatewayClassConditionStatusAccepted),
metav1.ConditionTrue,
consts.ConditionReason(gatewayv1.GatewayClassReasonAccepted),
"the gatewayclass has been accepted by the operator",
gwc.GetGeneration(),
),
gwc,
)
if err := r.Status().Patch(ctx, gwc.GatewayClass, client.MergeFrom(oldGwc)); err != nil {
if k8serrors.IsConflict(err) {
log.Debug(logger, "conflict found when updating GatewayClass, retrying")
return ctrl.Result{
Requeue: true,
RequeueAfter: controller.RequeueWithoutBackoff,
}, nil
}
return ctrl.Result{}, fmt.Errorf("failed patching GatewayClass: %w", err)
condition, err := getAcceptedCondition(ctx, r.Client, gwc.GatewayClass)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get accepted condition: %w", err)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we set the condition here with some error message so that users do not have to look into operators logs? Ideally I'd consider adding the error's message into the condition message so that users know what happened. WDYT?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The AcceptedCondition message is properly set in the getAcceptedCondition function. Isn't this what you are referring to?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was specifically alluding here to lines 42-44 https://github.com/Kong/gateway-operator/pull/1021/files/31ff623213f0fed16947060e8a0eccec5e02f4ff#diff-f4e83572e27854d40d56ef54e8f982cdca3ccaaa3f17900c3ab6e3f4fb04e971R42-R44 where the err is simply returned. Then we seem to return it here as well. Not setting anything on the GatewayClass. Or am I missing something here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That can be only a transient error caused by the get function. I don't think we should change the status of the resource when such an error is raised. Do you?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't have to be transient. It can also be a problem with e.g. permissions. I believe it would be better UX to make the error visible in the status rather than forcing users to go through operator's logs. WDYT?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so honestly. If that's an error related to permissions, for example, that isn't related to the resource, but to the operator itself. An object status should include information related to the resource(s) themselves, not to the operators managing it IMO.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If that's an error related to permissions, for example, that isn't related to the resource, but to the operator itself.

I agree but the error in object's status is a breadcrumb which can help you as a user debug this problem faster.

The permission might not be directly related to the operator. There might exist policies in the cluster which would prevent the Get from succeeding.

I have already seen reports of several issues where users were confused because an object didn't get any status condition set (for different reasons but still). For this reason I do believe that putting information that can help debug a problem in status condition is going to allow people to quickly resolve an issue that caused it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hear you and tend to agree with this general statement of helping users to debug problems. But here we are discussing about putting information that has nothing to do with the resource itself. I think we should instead try to educate users about what kind of information can be found where. I am very opinionated on this matter, and for the reasons above I think this is not the way to go honestly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But here we are discussing about putting information that has nothing to do with the resource itself.

Agreed. Nevertheless this information is beneficial to the user to quickly resolve an issue with this object not being accepted.

I think we should instead try to educate users about what kind of information can be found where.

Agreed on this as well. Do you have an idea how we could educate users about this one so that they can better debug problems at we discuss here?


I see your view and I understand it yet I do not share it. Feel free to continue with the way this is proposed as of now 👍

}
k8sutils.SetCondition(
*condition,
gwc,
)
mlavacca marked this conversation as resolved.
Show resolved Hide resolved

if err := r.Status().Patch(ctx, gwc.GatewayClass, client.MergeFrom(oldGwc)); err != nil {
if k8serrors.IsConflict(err) {
log.Debug(logger, "conflict found when updating GatewayClass, retrying")
return ctrl.Result{
Requeue: true,
RequeueAfter: controller.RequeueWithoutBackoff,
}, nil
}
return ctrl.Result{}, fmt.Errorf("failed patching GatewayClass: %w", err)
}

return ctrl.Result{}, nil
Expand Down
65 changes: 65 additions & 0 deletions controller/gatewayclass/controller_reconciler_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package gatewayclass

import (
"context"
"strings"

k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"

operatorv1beta1 "github.com/kong/gateway-operator/api/v1beta1"
"github.com/kong/gateway-operator/pkg/consts"
k8sutils "github.com/kong/gateway-operator/pkg/utils/kubernetes"
)

// getAcceptedCondition returns the accepted condition for the GatewayClass, with
// the proper status, reason and message.
func getAcceptedCondition(ctx context.Context, cl client.Client, gwc *gatewayv1.GatewayClass) (*metav1.Condition, error) {
reason := string(gatewayv1.GatewayClassReasonAccepted)
mlavacca marked this conversation as resolved.
Show resolved Hide resolved
message := []string{}
mlavacca marked this conversation as resolved.
Show resolved Hide resolved
status := metav1.ConditionFalse

if gwc.Spec.ParametersRef != nil {
validRef := true
if gwc.Spec.ParametersRef.Group != gatewayv1.Group(operatorv1beta1.SchemeGroupVersion.Group) ||
gwc.Spec.ParametersRef.Kind != "GatewayConfiguration" {
reason = string(gatewayv1.GatewayClassReasonInvalidParameters)
message = append(message, "ParametersRef must reference a gateway-operator.konghq.com/GatewayConfiguration")
validRef = false
}

if gwc.Spec.ParametersRef.Namespace == nil {
reason = string(gatewayv1.GatewayClassReasonInvalidParameters)
message = append(message, "ParametersRef must reference a namespaced resource")
validRef = false
}

if validRef {
gatewayConfig := operatorv1beta1.GatewayConfiguration{}
err := cl.Get(ctx, client.ObjectKey{Name: gwc.Spec.ParametersRef.Name, Namespace: string(*gwc.Spec.ParametersRef.Namespace)}, &gatewayConfig)
if client.IgnoreNotFound(err) != nil {
return nil, err
}
if k8serrors.IsNotFound(err) {
reason = string(gatewayv1.GatewayClassReasonInvalidParameters)
message = append(message, "The referenced GatewayConfiguration does not exist")
}
programmer04 marked this conversation as resolved.
Show resolved Hide resolved
}
}
if reason == string(gatewayv1.GatewayClassReasonAccepted) {
status = metav1.ConditionTrue
message = []string{"GatewayClass is accepted"}
}

acceptedCondition := k8sutils.NewConditionWithGeneration(
consts.ConditionType(gatewayv1.GatewayClassConditionStatusAccepted),
status,
consts.ConditionReason(reason),
strings.Join(message, ". "),
gwc.GetGeneration(),
)

return &acceptedCondition, nil
}
131 changes: 131 additions & 0 deletions controller/gatewayclass/controller_reconciler_utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package gatewayclass

import (
"context"
"testing"

"github.com/samber/lo"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"

operatorv1beta1 "github.com/kong/gateway-operator/api/v1beta1"
)

func TestGetAcceptedCondition(t *testing.T) {
scheme := runtime.NewScheme()
assert.NoError(t, gatewayv1.Install(scheme))
assert.NoError(t, operatorv1beta1.AddToScheme(scheme))

tests := []struct {
name string
gwc *gatewayv1.GatewayClass
existingObjs []runtime.Object
expectedStatus metav1.ConditionStatus
expectedReason string
expectedMsg string
}{
{
name: "ParametersRef is nil",
gwc: &gatewayv1.GatewayClass{
Spec: gatewayv1.GatewayClassSpec{
ParametersRef: nil,
},
},
expectedStatus: metav1.ConditionTrue,
expectedReason: string(gatewayv1.GatewayClassReasonAccepted),
expectedMsg: "GatewayClass is accepted",
},
{
name: "Invalid ParametersRef Group and kind",
gwc: &gatewayv1.GatewayClass{
Spec: gatewayv1.GatewayClassSpec{

ParametersRef: &gatewayv1.ParametersReference{
Group: "invalid.group",
Kind: "InvalidKind",
Namespace: lo.ToPtr(gatewayv1.Namespace("default")),
Name: "invalid",
},
},
},
expectedStatus: metav1.ConditionFalse,
expectedReason: string(gatewayv1.GatewayClassReasonInvalidParameters),
expectedMsg: "ParametersRef must reference a gateway-operator.konghq.com/GatewayConfiguration",
},
{
name: "ParametersRef Namespace is nil",
gwc: &gatewayv1.GatewayClass{
Spec: gatewayv1.GatewayClassSpec{
ParametersRef: &gatewayv1.ParametersReference{
Group: gatewayv1.Group(operatorv1beta1.SchemeGroupVersion.Group),
Kind: "GatewayConfiguration",
Name: "no-namespace",
},
},
},
expectedStatus: metav1.ConditionFalse,
expectedReason: string(gatewayv1.GatewayClassReasonInvalidParameters),
expectedMsg: "ParametersRef must reference a namespaced resource",
},
{
name: "GatewayConfiguration does not exist",
gwc: &gatewayv1.GatewayClass{
Spec: gatewayv1.GatewayClassSpec{
ParametersRef: &gatewayv1.ParametersReference{
Group: gatewayv1.Group(operatorv1beta1.SchemeGroupVersion.Group),
Kind: "GatewayConfiguration",
Name: "nonexistent",
Namespace: lo.ToPtr(gatewayv1.Namespace("default")),
},
},
},
expectedStatus: metav1.ConditionFalse,
expectedReason: string(gatewayv1.GatewayClassReasonInvalidParameters),
expectedMsg: "The referenced GatewayConfiguration does not exist",
},
{
name: "Valid ParametersRef",
gwc: &gatewayv1.GatewayClass{
Spec: gatewayv1.GatewayClassSpec{
ParametersRef: &gatewayv1.ParametersReference{
Group: gatewayv1.Group(operatorv1beta1.SchemeGroupVersion.Group),
Kind: "GatewayConfiguration",
Name: "valid-config",
Namespace: lo.ToPtr(gatewayv1.Namespace("default")),
},
},
},
existingObjs: []runtime.Object{
&operatorv1beta1.GatewayConfiguration{
ObjectMeta: metav1.ObjectMeta{
Name: "valid-config",
Namespace: "default",
},
},
},
expectedStatus: metav1.ConditionTrue,
expectedReason: string(gatewayv1.GatewayClassReasonAccepted),
expectedMsg: "GatewayClass is accepted",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx := context.TODO()
mlavacca marked this conversation as resolved.
Show resolved Hide resolved
cl := fake.NewClientBuilder().
WithScheme(scheme).
WithRuntimeObjects(tt.existingObjs...).
Build()

condition, err := getAcceptedCondition(ctx, cl, tt.gwc)
assert.NoError(t, err)
assert.NotNil(t, condition)
assert.Equal(t, tt.expectedStatus, condition.Status)
assert.Equal(t, tt.expectedReason, condition.Reason)
assert.Equal(t, tt.expectedMsg, condition.Message)
})
}
}
15 changes: 13 additions & 2 deletions controller/specialized/aigateway_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,19 @@ func (r *AIGatewayReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// See: https://github.com/kubernetes-sigs/controller-runtime/issues/1996
gwc, err := gatewayclass.Get(ctx, r.Client, aigateway.Spec.GatewayClassName)
if err != nil {
if errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}) {
log.Debug(logger, "resource not supported, ignoring", "ExpectedGatewayClass", vars.ControllerName())
switch {
case errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}):
log.Debug(logger, "resource not supported, ignoring",
"expectedGatewayClass", vars.ControllerName(),
"gatewayClass", aigateway.Spec.GatewayClassName,
"reason", err.Error(),
)
return ctrl.Result{}, nil
case errors.As(err, &operatorerrors.ErrNotAcceptedGatewayClass{}):
log.Debug(logger, "GatewayClass not accepted, ignoring",
"gatewayClass", aigateway.Spec.GatewayClassName,
"reason", err.Error(),
)
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
Expand Down
3 changes: 2 additions & 1 deletion controller/specialized/aigateway_controller_watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ func (r *AIGatewayReconciler) aiGatewayHasMatchingGatewayClass(obj client.Object
// class as well. If we fail here it's most likely because of some failure
// of the Kubernetes API and it's technically better to enqueue the object
// than to drop it for eventual consistency during cluster outages.
return !errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{})
return !errors.As(err, &operatorerrors.ErrUnsupportedGatewayClass{}) &&
!errors.As(err, &operatorerrors.ErrNotAcceptedGatewayClass{})
}

return true
Expand Down
17 changes: 17 additions & 0 deletions internal/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package errors
import (
"errors"
"fmt"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -31,6 +33,21 @@ func (e ErrUnsupportedGatewayClass) Error() string {
return fmt.Sprintf("unsupported gateway class: %s", e.reason)
}

// ErrNotAcceptedGatewayClass is an error which indicates that a provided GatewayClass
// is not accepted.
type ErrNotAcceptedGatewayClass struct {
gatewayClass string
condition metav1.Condition
}

func NewErrNotAcceptedGatewayClass(gatewayClass string, condition metav1.Condition) ErrNotAcceptedGatewayClass {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This func should have a godoc

return ErrNotAcceptedGatewayClass{gatewayClass: gatewayClass, condition: condition}
}

func (e ErrNotAcceptedGatewayClass) Error() string {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto

return fmt.Sprintf("gateway class %s not accepted; reason: %s, message: %s", e.gatewayClass, e.condition.Reason, e.condition.Message)
}

// -----------------------------------------------------------------------------
// GatewayClass - Errors
// -----------------------------------------------------------------------------
Expand Down
Loading
Loading