Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a bug in olm install #6490

Merged
merged 1 commit into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions changelog/fragments/02-olm-install-retry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# entries is a list of entries to include in
# release notes and/or the migration guide
entries:
- description: >
Fix a bug where `olm install` command is failed fo "no-match" error.

The output in this case is something like:

```
$ operator-sdk olm install --verbose
...
FATA[0001] Failed to install OLM version "latest": failed to create CRDs and resources: no matches for kind "OLMConfig" in version "operators.coreos.com/v1"
```

Now, in this case, operator-sdk tries to create the resource again, until it succeed (or until the timeout exceeded).

kind: "bugfix"

# Is this a breaking change?
breaking: false
47 changes: 39 additions & 8 deletions internal/olm/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
log "github.com/sirupsen/logrus"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"

apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -112,18 +111,50 @@ func NewClientForConfig(cfg *rest.Config) (*Client, error) {
func (c Client) DoCreate(ctx context.Context, objs ...client.Object) error {
for _, obj := range objs {
kind := obj.GetObjectKind().GroupVersionKind().Kind
log.Infof(" Creating %s %q", kind, getName(obj.GetNamespace(), obj.GetName()))
err := c.KubeClient.Create(ctx, obj)
if err != nil {
if !apierrors.IsAlreadyExists(err) {
return err
}
log.Infof(" %s %q already exists", kind, getName(obj.GetNamespace(), obj.GetName()))
resourceName := getName(obj.GetNamespace(), obj.GetName())

log.Infof(" Creating %s %q", kind, resourceName)

if err := c.safeCreateOneResource(ctx, obj, kind, resourceName); err != nil {
log.Infof(" failed to create %s %q; %v", kind, resourceName, err)
return err
}
}
return nil
}

// try to create 10 times before giving up
grokspawn marked this conversation as resolved.
Show resolved Hide resolved
func (c Client) safeCreateOneResource(ctx context.Context, obj client.Object, kind string, resourceName string) error {
backoff := wait.Backoff{
// retrying every one seconds. We're relaying on the timeout context, so the number of steps is very large, so
// we could use the timeout flag (or its default value), as it used to create the context.
Duration: time.Second,
Steps: 1000,
Factor: 1,
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a bit more idiomatic (at least from our kubernetes adjacency) to use the client-go retry package.

See: https://pkg.go.dev/k8s.io/client-go/util/retry#OnError

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to using the retry package

err := wait.ExponentialBackoffWithContext(ctx, backoff, func() (bool, error) {
err := c.KubeClient.Create(ctx, obj)
if err == nil || apierrors.IsAlreadyExists(err) {
log.Infof(" %s %q created", kind, resourceName)
return true, nil
}

if meta.IsNoMatchError(err) {
oceanc80 marked this conversation as resolved.
Show resolved Hide resolved
log.Infof(" Failed to create %s %q. CRD is not ready yet. Retrying...", kind, resourceName)
return false, nil
}

return false, err
})

if err != nil && errors.Is(err, context.DeadlineExceeded) {
return fmt.Errorf("timeout")
}

return err
}

func (c Client) DoDelete(ctx context.Context, objs ...client.Object) error {
for _, obj := range objs {
kind := obj.GetObjectKind().GroupVersionKind().Kind
Expand Down
160 changes: 157 additions & 3 deletions internal/olm/client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@ package client

import (
"context"
"errors"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
olmapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"

"sigs.k8s.io/controller-runtime/pkg/client"
fake "sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

var _ = Describe("Client", func() {
Expand Down Expand Up @@ -258,4 +260,156 @@ var _ = Describe("Client", func() {
})
})
})

Describe("test DoCreate", func() {
var fakeClient client.Client

BeforeEach(func() {
fakeClient = &errClient{cli: fake.NewClientBuilder().Build()}
})

AfterEach(func() {
fakeClient.(*errClient).reset()
})

It("should create all the resources successfully", func() {
cli := Client{KubeClient: fakeClient}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

Expect(cli.DoCreate(ctx,
&corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{Name: "test-ns"},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "test-ns"},
},
)).To(Succeed())

ns := &corev1.Namespace{}
Expect(fakeClient.Get(context.Background(), client.ObjectKey{Name: "test-ns"}, ns)).To(Succeed())

pod := &corev1.Pod{}
Expect(fakeClient.Get(context.Background(), client.ObjectKey{Namespace: "test-ns", Name: "test-pod"}, pod)).To(Succeed())
})

It("should eventually create all the resources successfully", func() {
cli := Client{KubeClient: fakeClient}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

Expect(cli.DoCreate(ctx,
&corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{Name: "test-ns"},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "eventually-match", Namespace: "test-ns"},
},
)).To(Succeed())

ns := &corev1.Namespace{}
Expect(fakeClient.Get(context.Background(), client.ObjectKey{Name: "test-ns"}, ns)).To(Succeed())

pod := &corev1.Pod{}
Expect(fakeClient.Get(context.Background(), client.ObjectKey{Namespace: "test-ns", Name: "eventually-match"}, pod)).To(Succeed())
})

It("should fail with no-match error", func() {
cli := Client{KubeClient: fakeClient}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
Expect(cli.DoCreate(ctx,
&corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{Name: "test-ns"},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "no-match", Namespace: "test-ns"},
},
)).ToNot(Succeed())
})

It("should fail with unknown-error error", func() {
cli := Client{KubeClient: fakeClient}

Expect(cli.DoCreate(context.Background(),
&corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{Name: "test-ns"},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "unknown-error", Namespace: "test-ns"},
},
)).ToNot(Succeed())
})
})
})

type errClient struct {
cli client.Client
noMatchCounter int
}

func (c *errClient) reset() {
c.noMatchCounter = 0
}

func (c *errClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
return c.cli.Get(ctx, key, obj, opts...)
}

func (c *errClient) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
return c.cli.List(ctx, list, opts...)
}
func (c *errClient) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error {
switch obj.GetName() {
case "no-match":
return &meta.NoResourceMatchError{}

case "eventually-match":
if c.noMatchCounter >= 4 {
return c.cli.Create(ctx, obj, opts...)
}
c.noMatchCounter++
return &meta.NoResourceMatchError{}

case "unknown-error":
return errors.New("fake error")

default:
return c.cli.Create(ctx, obj, opts...)
}
}

func (c *errClient) Delete(ctx context.Context, obj client.Object, opts ...client.DeleteOption) error {
return c.cli.Delete(ctx, obj, opts...)
}

func (c *errClient) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error {
return c.cli.Update(ctx, obj, opts...)
}

func (c *errClient) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
return c.cli.Patch(ctx, obj, patch, opts...)
}

func (c *errClient) DeleteAllOf(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption) error {
return c.cli.DeleteAllOf(ctx, obj, opts...)
}

func (c *errClient) SubResource(subResource string) client.SubResourceClient {
return c.cli.SubResource(subResource)
}

func (c *errClient) Scheme() *runtime.Scheme {
return c.cli.Scheme()
}

func (c *errClient) RESTMapper() meta.RESTMapper {
return c.cli.RESTMapper()
}

func (c *errClient) Status() client.SubResourceWriter {
return c.cli.Status()
}
Loading