From 4e443da42e688798443507343f4e2b79c6ba7fbe Mon Sep 17 00:00:00 2001 From: Nahshon Unna-Tsameret Date: Thu, 6 Jul 2023 09:37:40 +0300 Subject: [PATCH] Fix a bug in olm install When installing OLM, retry creating resources, if the CRD is not ready yet. Signed-off-by: Nahshon Unna-Tsameret --- changelog/fragments/02-olm-install-retry.yaml | 20 +++ internal/olm/client/client.go | 47 ++++- internal/olm/client/client_test.go | 160 +++++++++++++++++- 3 files changed, 216 insertions(+), 11 deletions(-) create mode 100644 changelog/fragments/02-olm-install-retry.yaml diff --git a/changelog/fragments/02-olm-install-retry.yaml b/changelog/fragments/02-olm-install-retry.yaml new file mode 100644 index 0000000000..31e374b3e4 --- /dev/null +++ b/changelog/fragments/02-olm-install-retry.yaml @@ -0,0 +1,20 @@ +# entries is a list of entries to include in +# release notes and/or the migration guide +entries: + - description: > + Fix a bug where `olm install` command is failed fo "no-match" error. + + The output in this case is something like: + + ``` + $ operator-sdk olm install --verbose + ... + FATA[0001] Failed to install OLM version "latest": failed to create CRDs and resources: no matches for kind "OLMConfig" in version "operators.coreos.com/v1" + ``` + + Now, in this case, operator-sdk tries to create the resource again, until it succeed (or until the timeout exceeded). + + kind: "bugfix" + + # Is this a breaking change? + breaking: false diff --git a/internal/olm/client/client.go b/internal/olm/client/client.go index dbd23c3a4c..6059590b39 100644 --- a/internal/olm/client/client.go +++ b/internal/olm/client/client.go @@ -30,7 +30,6 @@ import ( log "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -112,18 +111,50 @@ func NewClientForConfig(cfg *rest.Config) (*Client, error) { func (c Client) DoCreate(ctx context.Context, objs ...client.Object) error { for _, obj := range objs { kind := obj.GetObjectKind().GroupVersionKind().Kind - log.Infof(" Creating %s %q", kind, getName(obj.GetNamespace(), obj.GetName())) - err := c.KubeClient.Create(ctx, obj) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - log.Infof(" %s %q already exists", kind, getName(obj.GetNamespace(), obj.GetName())) + resourceName := getName(obj.GetNamespace(), obj.GetName()) + + log.Infof(" Creating %s %q", kind, resourceName) + + if err := c.safeCreateOneResource(ctx, obj, kind, resourceName); err != nil { + log.Infof(" failed to create %s %q; %v", kind, resourceName, err) + return err } } return nil } +// try to create 10 times before giving up +func (c Client) safeCreateOneResource(ctx context.Context, obj client.Object, kind string, resourceName string) error { + backoff := wait.Backoff{ + // retrying every one seconds. We're relaying on the timeout context, so the number of steps is very large, so + // we could use the timeout flag (or its default value), as it used to create the context. + Duration: time.Second, + Steps: 1000, + Factor: 1, + } + + err := wait.ExponentialBackoffWithContext(ctx, backoff, func() (bool, error) { + err := c.KubeClient.Create(ctx, obj) + if err == nil || apierrors.IsAlreadyExists(err) { + log.Infof(" %s %q created", kind, resourceName) + return true, nil + } + + if meta.IsNoMatchError(err) { + log.Infof(" Failed to create %s %q. CRD is not ready yet. Retrying...", kind, resourceName) + return false, nil + } + + return false, err + }) + + if err != nil && errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("timeout") + } + + return err +} + func (c Client) DoDelete(ctx context.Context, objs ...client.Object) error { for _, obj := range objs { kind := obj.GetObjectKind().GroupVersionKind().Kind diff --git a/internal/olm/client/client_test.go b/internal/olm/client/client_test.go index 261f6c0ca8..f68c7a2704 100644 --- a/internal/olm/client/client_test.go +++ b/internal/olm/client/client_test.go @@ -16,18 +16,20 @@ package client import ( "context" + "errors" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" olmapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - fake "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) var _ = Describe("Client", func() { @@ -258,4 +260,156 @@ var _ = Describe("Client", func() { }) }) }) + + Describe("test DoCreate", func() { + var fakeClient client.Client + + BeforeEach(func() { + fakeClient = &errClient{cli: fake.NewClientBuilder().Build()} + }) + + AfterEach(func() { + fakeClient.(*errClient).reset() + }) + + It("should create all the resources successfully", func() { + cli := Client{KubeClient: fakeClient} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + Expect(cli.DoCreate(ctx, + &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: "test-ns"}, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "test-ns"}, + }, + )).To(Succeed()) + + ns := &corev1.Namespace{} + Expect(fakeClient.Get(context.Background(), client.ObjectKey{Name: "test-ns"}, ns)).To(Succeed()) + + pod := &corev1.Pod{} + Expect(fakeClient.Get(context.Background(), client.ObjectKey{Namespace: "test-ns", Name: "test-pod"}, pod)).To(Succeed()) + }) + + It("should eventually create all the resources successfully", func() { + cli := Client{KubeClient: fakeClient} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + Expect(cli.DoCreate(ctx, + &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: "test-ns"}, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "eventually-match", Namespace: "test-ns"}, + }, + )).To(Succeed()) + + ns := &corev1.Namespace{} + Expect(fakeClient.Get(context.Background(), client.ObjectKey{Name: "test-ns"}, ns)).To(Succeed()) + + pod := &corev1.Pod{} + Expect(fakeClient.Get(context.Background(), client.ObjectKey{Namespace: "test-ns", Name: "eventually-match"}, pod)).To(Succeed()) + }) + + It("should fail with no-match error", func() { + cli := Client{KubeClient: fakeClient} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + Expect(cli.DoCreate(ctx, + &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: "test-ns"}, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "no-match", Namespace: "test-ns"}, + }, + )).ToNot(Succeed()) + }) + + It("should fail with unknown-error error", func() { + cli := Client{KubeClient: fakeClient} + + Expect(cli.DoCreate(context.Background(), + &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: "test-ns"}, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "unknown-error", Namespace: "test-ns"}, + }, + )).ToNot(Succeed()) + }) + }) }) + +type errClient struct { + cli client.Client + noMatchCounter int +} + +func (c *errClient) reset() { + c.noMatchCounter = 0 +} + +func (c *errClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + return c.cli.Get(ctx, key, obj, opts...) +} + +func (c *errClient) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return c.cli.List(ctx, list, opts...) +} +func (c *errClient) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + switch obj.GetName() { + case "no-match": + return &meta.NoResourceMatchError{} + + case "eventually-match": + if c.noMatchCounter >= 4 { + return c.cli.Create(ctx, obj, opts...) + } + c.noMatchCounter++ + return &meta.NoResourceMatchError{} + + case "unknown-error": + return errors.New("fake error") + + default: + return c.cli.Create(ctx, obj, opts...) + } +} + +func (c *errClient) Delete(ctx context.Context, obj client.Object, opts ...client.DeleteOption) error { + return c.cli.Delete(ctx, obj, opts...) +} + +func (c *errClient) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return c.cli.Update(ctx, obj, opts...) +} + +func (c *errClient) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + return c.cli.Patch(ctx, obj, patch, opts...) +} + +func (c *errClient) DeleteAllOf(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption) error { + return c.cli.DeleteAllOf(ctx, obj, opts...) +} + +func (c *errClient) SubResource(subResource string) client.SubResourceClient { + return c.cli.SubResource(subResource) +} + +func (c *errClient) Scheme() *runtime.Scheme { + return c.cli.Scheme() +} + +func (c *errClient) RESTMapper() meta.RESTMapper { + return c.cli.RESTMapper() +} + +func (c *errClient) Status() client.SubResourceWriter { + return c.cli.Status() +}