Skip to content

Commit

Permalink
Fix ClusterInfo type ResourceExport recreation bug
Browse files Browse the repository at this point in the history
After Gateway HA is enabled, the ClusterInfo type of ResourceExport
will be recreated when the active Gateway is changed. But there is a case
that a new ClusterInfo of ResourceExport creation may fail when the
leader controller process is slow and existing ResourceExport is not
deleted in time.
Fix the issue through retry when the existing ResourceExport's DeletionTimestamp
is not zero.

Signed-off-by: Lan Luo <luola@vmware.com>
  • Loading branch information
luolanzone committed Dec 5, 2022
1 parent b977b1d commit 407ecda
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
17 changes: 7 additions & 10 deletions multicluster/controllers/multicluster/gateway_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package multicluster
import (
"context"
"fmt"
"reflect"

apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -104,15 +103,18 @@ func (r *GatewayReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct

createOrUpdate := func(gwIP string) error {
existingResExport := &mcsv1alpha1.ResourceExport{}
if err := commonArea.Get(ctx, resExportNamespacedName, existingResExport); err != nil {
if !apierrors.IsNotFound(err) {
return err
}
err := commonArea.Get(ctx, resExportNamespacedName, existingResExport)
if err != nil && !apierrors.IsNotFound(err) {
return err
}
if apierrors.IsNotFound(err) || !existingResExport.DeletionTimestamp.IsZero() {
if err = r.createResourceExport(ctx, req, commonArea, gwIP); err != nil {
return err
}
return nil
}
// updateResourceExport will update latest Gateway information with the existing ResourceExport's resourceVersion.
// It will return an error and retry when there is a version conflict.
if err = r.updateResourceExport(ctx, req, commonArea, existingResExport, &mcsv1alpha1.GatewayInfo{GatewayIP: gwIP}); err != nil {
return err
}
Expand Down Expand Up @@ -150,11 +152,6 @@ func (r *GatewayReconciler) updateResourceExport(ctx context.Context, req ctrl.R
PodCIDRs: r.podCIDRs,
GatewayInfos: []mcsv1alpha1.GatewayInfo{*gwInfo},
}
if reflect.DeepEqual(existingResExport.Spec, resExportSpec) {
klog.V(2).InfoS("Skip updating ClusterInfo kind of ResourceExport due to no change", "clusterinfo", klog.KObj(existingResExport),
"gateway", req.NamespacedName)
return nil
}
klog.V(2).InfoS("Updating ClusterInfo kind of ResourceExport", "clusterinfo", klog.KObj(existingResExport),
"gateway", req.NamespacedName)
existingResExport.Spec = resExportSpec
Expand Down
32 changes: 29 additions & 3 deletions multicluster/controllers/multicluster/gateway_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ limitations under the License.
package multicluster

import (
"context"
"reflect"
"testing"
"time"

"github.com/stretchr/testify/assert"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -74,14 +76,15 @@ var (
func TestGatewayReconciler(t *testing.T) {
gwNode1New := gwNode1
gwNode1New.GatewayIP = "10.10.10.12"

staleExistingResExport := existingResExport.DeepCopy()
staleExistingResExport.DeletionTimestamp = &metav1.Time{Time: time.Now()}
tests := []struct {
name string
te mcsv1alpha1.Gateway
namespacedName types.NamespacedName
gateway []mcsv1alpha1.Gateway
resExport *mcsv1alpha1.ResourceExport
expectedInfo []mcsv1alpha1.GatewayInfo
expectedErr string
isDelete bool
}{
{
Expand All @@ -99,6 +102,18 @@ func TestGatewayReconciler(t *testing.T) {
},
},
},
{
name: "error creating a ResourceExport when existing ResourceExport is being deleted",
namespacedName: types.NamespacedName{
Namespace: "default",
Name: "node-1",
},
gateway: []mcsv1alpha1.Gateway{
gwNode1,
},
resExport: staleExistingResExport,
expectedErr: "resourceexports.multicluster.crd.antrea.io \"cluster-a-clusterinfo\" already exists",
},
{
name: "update a ResourceExport successfully by updating an existing Gateway",
namespacedName: types.NamespacedName{
Expand Down Expand Up @@ -145,7 +160,11 @@ func TestGatewayReconciler(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
req := ctrl.Request{NamespacedName: tt.namespacedName}
if _, err := r.Reconcile(ctx, req); err != nil {
t.Errorf("Gateway Reconciler should handle ResourceExports events successfully but got error = %v", err)
if tt.expectedErr != "" {
assert.Equal(t, tt.expectedErr, err.Error())
} else {
t.Errorf("Gateway Reconciler should handle ResourceExports events successfully but got error = %v", err)
}
} else {
ciExport := mcsv1alpha1.ResourceExport{}
ciExportName := types.NamespacedName{
Expand All @@ -166,3 +185,10 @@ func TestGatewayReconciler(t *testing.T) {
})
}
}

func TestGetServiceCIDR(t *testing.T) {
fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects().Build()
r := NewGatewayReconciler(fakeClient, scheme, "default", "", []string{"10.200.1.1/16"}, nil)
err := r.getServiceCIDR(context.TODO())
assert.Contains(t, err.Error(), "expected a specific error but none was returned")
}

0 comments on commit 407ecda

Please sign in to comment.