Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Make ClusterResourceSet controller more predictable #10869

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions exp/addons/internal/controllers/clusterresourceset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,27 @@ func (r *ClusterResourceSetReconciler) Reconcile(ctx context.Context, req ctrl.R

// Return an aggregated error if errors occurred.
if len(errs) > 0 {
// When there are more than one ClusterResourceSet targeting the same cluster,
// there might be conflict when reconciling those ClusterResourceSet in parallel because they all try to
// patch the same ClusterResourceSetBinding Object.
// In case of patching conflicts we don't want to go on exponential backlog, otherwise it might take an
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
// arbitrary long time to get to stable state due to the backoff delay quickly growing.
// Instead, we are requeing with an interval to make the system a little bit more predictable (and stabilize tests).
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
// NOTE: The fact that we rely on conflict errors + requeue to reach the stable state isn't ideal, and
// it might also become an issue at scale.
// e.g. From an empirical observation, it takes 20s for 10 ClusterResourceSet to get to a stable state
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
// on the same ClusterResourceSetBinding; with less ClusterResourceSet the issue is less relevant
// (e.g. with 5 ClusterResourceSet it takes about 4 seconds).
// NOTE: Conflicts happens mostly when ClusterResourceSetBinding is initialized / an entry is added for each
// cluster resource set targeting the same cluster.
for _, err := range errs {
if aggregate, ok := err.(kerrors.Aggregate); ok {
if len(aggregate.Errors()) == 1 && apierrors.IsConflict(aggregate.Errors()[0]) {
log.Info("Conflict in patching a ClusterResourceSetBinding that is updated by more than one ClusterResourceSet, requeing")
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
return ctrl.Result{RequeueAfter: 100 * time.Millisecond}, nil
}
}
}
return ctrl.Result{}, kerrors.NewAggregate(errs)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,7 @@ func TestClusterResourceSetReconciler(t *testing.T) {
resourceConfigMapsNamespace = "default"
)

setup := func(t *testing.T, g *WithT) *corev1.Namespace {
t.Helper()

clusterResourceSetName = fmt.Sprintf("clusterresourceset-%s", util.RandomString(6))
labels = map[string]string{clusterResourceSetName: "bar"}

ns, err := env.CreateNamespace(ctx, namespacePrefix)
g.Expect(err).ToNot(HaveOccurred())

clusterName = fmt.Sprintf("cluster-%s", util.RandomString(6))
testCluster = &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns.Name}}

t.Log("Creating the Cluster")
g.Expect(env.Create(ctx, testCluster)).To(Succeed())
t.Log("Creating the remote Cluster kubeconfig")
g.Expect(env.CreateKubeconfigSecret(ctx, testCluster)).To(Succeed())

createConfigMapAndSecret := func(g Gomega, namespaceName, configmapName, secretName string) {
resourceConfigMap1Content := fmt.Sprintf(`metadata:
name: %s
namespace: %s
Expand All @@ -82,7 +66,7 @@ apiVersion: v1`, resourceConfigMap1Name, resourceConfigMapsNamespace)
testConfigmap := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: configmapName,
Namespace: ns.Name,
Namespace: namespaceName,
},
Data: map[string]string{
"cm": resourceConfigMap1Content,
Expand All @@ -98,7 +82,7 @@ metadata:
testSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: secretName,
Namespace: ns.Name,
Namespace: namespaceName,
},
Type: "addons.cluster.x-k8s.io/resource-set",
StringData: map[string]string{
Expand All @@ -108,7 +92,28 @@ metadata:
t.Log("Creating a Secret and a ConfigMap with ConfigMap in their data field")
g.Expect(env.Create(ctx, testConfigmap)).To(Succeed())
g.Expect(env.Create(ctx, testSecret)).To(Succeed())
}

setup := func(t *testing.T, g *WithT) *corev1.Namespace {
t.Helper()

clusterResourceSetName = fmt.Sprintf("clusterresourceset-%s", util.RandomString(6))
labels = map[string]string{clusterResourceSetName: "bar"}

ns, err := env.CreateNamespace(ctx, namespacePrefix)
g.Expect(err).ToNot(HaveOccurred())

clusterName = fmt.Sprintf("cluster-%s", util.RandomString(6))
testCluster = &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns.Name}}

t.Log("Creating the Cluster")
g.Expect(env.CreateAndWait(ctx, testCluster)).To(Succeed())
t.Log("Creating the remote Cluster kubeconfig")
g.Expect(env.CreateKubeconfigSecret(ctx, testCluster)).To(Succeed())
_, err = tracker.GetClient(ctx, client.ObjectKeyFromObject(testCluster))
g.Expect(err).ToNot(HaveOccurred())

createConfigMapAndSecret(g, ns.Name, configmapName, secretName)
return ns
}

Expand Down Expand Up @@ -1031,10 +1036,14 @@ metadata:
defer teardown(t, g, ns)

t.Log("Creating ClusterResourceSet instances that have same labels as selector")
for range 10 {
for i := range 10 {
configmapName := fmt.Sprintf("%s-%d", configmapName, i)
secretName := fmt.Sprintf("%s-%d", secretName, i)
createConfigMapAndSecret(g, ns.Name, configmapName, secretName)

clusterResourceSetInstance := &addonsv1.ClusterResourceSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("clusterresourceset-%s", util.RandomString(6)),
Name: fmt.Sprintf("clusterresourceset-%d", i),
Namespace: ns.Name,
},
Spec: addonsv1.ClusterResourceSetSpec{
Expand Down
7 changes: 4 additions & 3 deletions exp/addons/internal/controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ import (
)

var (
env *envtest.Environment
ctx = ctrl.SetupSignalHandler()
env *envtest.Environment
tracker *remote.ClusterCacheTracker
ctx = ctrl.SetupSignalHandler()
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -76,7 +77,7 @@ func TestMain(m *testing.M) {
panic(fmt.Sprintf("Failed to start cache for metadata only Secret watches: %v", err))
}

tracker, err := remote.NewClusterCacheTracker(mgr, remote.ClusterCacheTrackerOptions{})
tracker, err = remote.NewClusterCacheTracker(mgr, remote.ClusterCacheTrackerOptions{})
if err != nil {
panic(fmt.Sprintf("Failed to create new cluster cache tracker: %v", err))
}
Expand Down
2 changes: 1 addition & 1 deletion internal/test/envtest/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ func (e *Environment) waitForWebhooks() {

// CreateKubeconfigSecret generates a new Kubeconfig secret from the envtest config.
func (e *Environment) CreateKubeconfigSecret(ctx context.Context, cluster *clusterv1.Cluster) error {
return e.Create(ctx, kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(e.Config, cluster)))
return e.CreateAndWait(ctx, kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(e.Config, cluster)))
}

// Cleanup deletes all the given objects.
Expand Down
Loading