diff --git a/go.mod b/go.mod index 97646e3849..1f22568d32 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/anchore/clio v0.0.0-20240705045624-ac88e09ad9d0 github.com/anchore/stereoscope v0.0.1 github.com/anchore/syft v0.100.0 + github.com/avast/retry-go/v4 v4.6.0 github.com/defenseunicorns/pkg/helpers/v2 v2.0.1 github.com/defenseunicorns/pkg/kubernetes v0.2.0 github.com/defenseunicorns/pkg/oci v1.0.1 diff --git a/go.sum b/go.sum index 3821f05f82..2599e607f4 100644 --- a/go.sum +++ b/go.sum @@ -421,6 +421,8 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:W github.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= +github.com/avast/retry-go/v4 v4.6.0 h1:K9xNA+KeB8HHc2aWFuLb25Offp+0iVRXEvFx8IinRJA= +github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRSHKIr2PyOE= github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/aws/aws-sdk-go v1.54.9 h1:e0Czh9AhrCVPuyaIUnibYmih3cYexJKlqlHSJ2eMKbI= github.com/aws/aws-sdk-go v1.54.9/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index 26f81e1a9b..134bb4c493 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -11,6 +11,7 @@ import ( "time" "github.com/Masterminds/semver/v3" + "github.com/avast/retry-go/v4" plutoversionsfile "github.com/fairwindsops/pluto/v5" plutoapi "github.com/fairwindsops/pluto/v5/pkg/api" goyaml "github.com/goccy/go-yaml" @@ -23,8 +24,6 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/yaml" - "github.com/defenseunicorns/pkg/helpers/v2" - "github.com/zarf-dev/zarf/src/config" "github.com/zarf-dev/zarf/src/pkg/message" "github.com/zarf-dev/zarf/src/types" @@ -59,7 +58,8 @@ func (h *Helm) InstallOrUpgradeChart(ctx context.Context) (types.ConnectStrings, } histClient := action.NewHistory(h.actionConfig) - tryHelm := func() error { + + err = retry.Do(func() error { var err error releases, histErr := histClient.Run(h.chart.ReleaseName) @@ -89,9 +89,7 @@ func (h *Helm) InstallOrUpgradeChart(ctx context.Context) (types.ConnectStrings, spinner.Success() return nil - } - - err = helpers.Retry(tryHelm, h.retries, 5*time.Second, message.Warnf) + }, retry.Context(ctx), retry.Attempts(uint(h.retries)), retry.Delay(500*time.Millisecond)) if err != nil { releases, _ := histClient.Run(h.chart.ReleaseName) previouslyDeployedVersion := 0 diff --git a/src/internal/packager/images/pull.go b/src/internal/packager/images/pull.go index 67907feb9d..2e206742c6 100644 --- a/src/internal/packager/images/pull.go +++ b/src/internal/packager/images/pull.go @@ -16,8 +16,8 @@ import ( "strings" "sync" "sync/atomic" - "time" + "github.com/avast/retry-go/v4" "github.com/defenseunicorns/pkg/helpers/v2" "github.com/google/go-containerregistry/pkg/crane" "github.com/google/go-containerregistry/pkg/logs" @@ -229,26 +229,23 @@ func Pull(ctx context.Context, cfg PullConfig) (map[transform.Image]v1.Image, er toPull := maps.Clone(fetched) - sc := func() error { + err = retry.Do(func() error { saved, err := SaveConcurrent(ctx, cranePath, toPull) for k := range saved { delete(toPull, k) } return err - } - - ss := func() error { - saved, err := SaveSequential(ctx, cranePath, toPull) - for k := range saved { - delete(toPull, k) - } - return err - } - - if err := helpers.RetryWithContext(ctx, sc, 2, 5*time.Second, message.Warnf); err != nil { + }, retry.Context(ctx), retry.Attempts(2)) + if err != nil { message.Warnf("Failed to save images in parallel, falling back to sequential save: %s", err.Error()) - - if err := helpers.RetryWithContext(ctx, ss, 2, 5*time.Second, message.Warnf); err != nil { + err = retry.Do(func() error { + saved, err := SaveSequential(ctx, cranePath, toPull) + for k := range saved { + delete(toPull, k) + } + return err + }, retry.Context(ctx), retry.Attempts(2)) + if err != nil { return nil, err } } diff --git a/src/internal/packager/images/push.go b/src/internal/packager/images/push.go index 0d0752f9fe..7bac363001 100644 --- a/src/internal/packager/images/push.go +++ b/src/internal/packager/images/push.go @@ -9,10 +9,12 @@ import ( "fmt" "time" + "github.com/avast/retry-go/v4" "github.com/defenseunicorns/pkg/helpers/v2" "github.com/google/go-containerregistry/pkg/crane" "github.com/google/go-containerregistry/pkg/logs" v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/zarf-dev/zarf/src/pkg/cluster" "github.com/zarf-dev/zarf/src/pkg/message" "github.com/zarf-dev/zarf/src/pkg/transform" @@ -54,7 +56,7 @@ func Push(ctx context.Context, cfg PushConfig) error { progress := message.NewProgressBar(totalSize, fmt.Sprintf("Pushing %d images", len(toPush))) defer progress.Close() - if err := helpers.Retry(func() error { + err = retry.Do(func() error { c, _ := cluster.NewCluster() if c != nil { registryURL, tunnel, err = c.ConnectToZarfRegistryEndpoint(ctx, cfg.RegInfo) @@ -123,7 +125,8 @@ func Push(ctx context.Context, cfg PushConfig) error { totalSize -= size } return nil - }, cfg.Retries, 5*time.Second, message.Warnf); err != nil { + }, retry.Context(ctx), retry.Attempts(uint(cfg.Retries)), retry.Delay(500*time.Millisecond)) + if err != nil { return err } diff --git a/src/pkg/cluster/cluster.go b/src/pkg/cluster/cluster.go index 801fd50173..5db77e0c9c 100644 --- a/src/pkg/cluster/cluster.go +++ b/src/pkg/cluster/cluster.go @@ -16,6 +16,7 @@ import ( "k8s.io/client-go/rest" "sigs.k8s.io/cli-utils/pkg/kstatus/watcher" + "github.com/avast/retry-go/v4" pkgkubernetes "github.com/defenseunicorns/pkg/kubernetes" "github.com/zarf-dev/zarf/src/pkg/message" @@ -44,7 +45,25 @@ func NewClusterWithWait(ctx context.Context) (*Cluster, error) { if err != nil { return nil, err } - err = waitForHealthyCluster(ctx, c.Clientset) + err = retry.Do(func() error { + nodeList, err := c.Clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + if len(nodeList.Items) < 1 { + return fmt.Errorf("cluster does not have any nodes") + } + pods, err := c.Clientset.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + for _, pod := range pods.Items { + if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodRunning { + return nil + } + } + return fmt.Errorf("no pods are in succeeded or running state") + }, retry.Context(ctx), retry.Attempts(0), retry.DelayType(retry.FixedDelay), retry.Delay(time.Second)) if err != nil { return nil, err } @@ -77,44 +96,3 @@ func NewCluster() (*Cluster, error) { } return c, nil } - -// WaitForHealthyCluster checks for an available K8s cluster every second until timeout. -func waitForHealthyCluster(ctx context.Context, client kubernetes.Interface) error { - const waitDuration = 1 * time.Second - - timer := time.NewTimer(0) - defer timer.Stop() - - for { - select { - case <-ctx.Done(): - return fmt.Errorf("error waiting for cluster to report healthy: %w", ctx.Err()) - case <-timer.C: - // Make sure there is at least one running Node - nodeList, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) - if err != nil || len(nodeList.Items) < 1 { - message.Debugf("No nodes reporting healthy yet: %v\n", err) - timer.Reset(waitDuration) - continue - } - - // Get the cluster pod list - pods, err := client.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{}) - if err != nil { - message.Debugf("Could not get the pod list: %v", err) - timer.Reset(waitDuration) - continue - } - - // Check that at least one pod is in the 'succeeded' or 'running' state - for _, pod := range pods.Items { - if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodRunning { - return nil - } - } - - message.Debug("No pods reported 'succeeded' or 'running' state yet.") - timer.Reset(waitDuration) - } - } -} diff --git a/src/pkg/cluster/data.go b/src/pkg/cluster/data.go index 7c57780a26..bd4b51aea5 100644 --- a/src/pkg/cluster/data.go +++ b/src/pkg/cluster/data.go @@ -19,6 +19,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" + "github.com/avast/retry-go/v4" "github.com/defenseunicorns/pkg/helpers/v2" "github.com/zarf-dev/zarf/src/api/v1alpha1" @@ -59,6 +60,7 @@ func (c *Cluster) HandleDataInjection(ctx context.Context, data v1alpha1.ZarfDat return fmt.Errorf("unable to execute tar, ensure it is installed in the $PATH: %w", err) } + // TODO: Refactor to use retry. for { select { case <-ctx.Done(): @@ -180,76 +182,68 @@ type podFilter func(pod corev1.Pod) bool func waitForPodsAndContainers(ctx context.Context, clientset kubernetes.Interface, target podLookup, include podFilter) ([]corev1.Pod, error) { waitCtx, cancel := context.WithTimeout(ctx, 90*time.Second) defer cancel() - - timer := time.NewTimer(0) - defer timer.Stop() - - for { - select { - case <-waitCtx.Done(): - return nil, ctx.Err() - case <-timer.C: - listOpts := metav1.ListOptions{ - LabelSelector: target.Selector, - } - podList, err := clientset.CoreV1().Pods(target.Namespace).List(ctx, listOpts) - if err != nil { - return nil, err + readyPods, err := retry.DoWithData(func() ([]corev1.Pod, error) { + listOpts := metav1.ListOptions{ + LabelSelector: target.Selector, + } + podList, err := clientset.CoreV1().Pods(target.Namespace).List(waitCtx, listOpts) + if err != nil { + return nil, err + } + message.Debugf("Found %d pods for target %#v", len(podList.Items), target) + // Sort the pods from newest to oldest + sort.Slice(podList.Items, func(i, j int) bool { + return podList.Items[i].CreationTimestamp.After(podList.Items[j].CreationTimestamp.Time) + }) + + readyPods := []corev1.Pod{} + for _, pod := range podList.Items { + message.Debugf("Testing pod %q", pod.Name) + + // If an include function is provided, only keep pods that return true + if include != nil && !include(pod) { + continue } - message.Debugf("Found %d pods for target %#v", len(podList.Items), target) - - var readyPods = []corev1.Pod{} - - // Sort the pods from newest to oldest - sort.Slice(podList.Items, func(i, j int) bool { - return podList.Items[i].CreationTimestamp.After(podList.Items[j].CreationTimestamp.Time) - }) + // Handle container targeting + if target.Container != "" { + message.Debugf("Testing pod %q for container %q", pod.Name, target.Container) - for _, pod := range podList.Items { - message.Debugf("Testing pod %q", pod.Name) - - // If an include function is provided, only keep pods that return true - if include != nil && !include(pod) { - continue - } - - // Handle container targeting - if target.Container != "" { - message.Debugf("Testing pod %q for container %q", pod.Name, target.Container) - - // Check the status of initContainers for a running match - for _, initContainer := range pod.Status.InitContainerStatuses { - isRunning := initContainer.State.Running != nil - if initContainer.Name == target.Container && isRunning { - // On running match in initContainer break this loop - readyPods = append(readyPods, pod) - break - } + // Check the status of initContainers for a running match + for _, initContainer := range pod.Status.InitContainerStatuses { + isRunning := initContainer.State.Running != nil + if initContainer.Name == target.Container && isRunning { + // On running match in initContainer break this loop + readyPods = append(readyPods, pod) + break } + } - // Check the status of regular containers for a running match - for _, container := range pod.Status.ContainerStatuses { - isRunning := container.State.Running != nil - if container.Name == target.Container && isRunning { - readyPods = append(readyPods, pod) - break - } - } - } else { - status := pod.Status.Phase - message.Debugf("Testing pod %q phase, want (%q) got (%q)", pod.Name, corev1.PodRunning, status) - // Regular status checking without a container - if status == corev1.PodRunning { + // Check the status of regular containers for a running match + for _, container := range pod.Status.ContainerStatuses { + isRunning := container.State.Running != nil + if container.Name == target.Container && isRunning { readyPods = append(readyPods, pod) break } } + } else { + status := pod.Status.Phase + message.Debugf("Testing pod %q phase, want (%q) got (%q)", pod.Name, corev1.PodRunning, status) + // Regular status checking without a container + if status == corev1.PodRunning { + readyPods = append(readyPods, pod) + break + } } - if len(readyPods) > 0 { - return readyPods, nil - } - timer.Reset(3 * time.Second) } + if len(readyPods) == 0 { + return nil, fmt.Errorf("no ready pods found") + } + return readyPods, nil + }, retry.Context(waitCtx), retry.Attempts(0), retry.DelayType(retry.FixedDelay), retry.Delay(time.Second)) + if err != nil { + return nil, err } + return readyPods, nil } diff --git a/src/pkg/cluster/namespace.go b/src/pkg/cluster/namespace.go index 6d4256ef83..99d50a5f5e 100644 --- a/src/pkg/cluster/namespace.go +++ b/src/pkg/cluster/namespace.go @@ -6,8 +6,10 @@ package cluster import ( "context" + "fmt" "time" + "github.com/avast/retry-go/v4" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -27,23 +29,20 @@ func (c *Cluster) DeleteZarfNamespace(ctx context.Context) error { if err != nil { return err } - timer := time.NewTimer(0) - defer timer.Stop() - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-timer.C: - _, err := c.Clientset.CoreV1().Namespaces().Get(ctx, ZarfNamespaceName, metav1.GetOptions{}) - if kerrors.IsNotFound(err) { - return nil - } - if err != nil { - return err - } - timer.Reset(1 * time.Second) + err = retry.Do(func() error { + _, err := c.Clientset.CoreV1().Namespaces().Get(ctx, ZarfNamespaceName, metav1.GetOptions{}) + if kerrors.IsNotFound(err) { + return nil } + if err != nil { + return err + } + return fmt.Errorf("namespace still exists") + }, retry.Context(ctx), retry.Attempts(0), retry.DelayType(retry.FixedDelay), retry.Delay(time.Second)) + if err != nil { + return err } + return nil } // NewZarfManagedNamespace returns a corev1.Namespace with Zarf-managed labels diff --git a/src/pkg/cluster/state.go b/src/pkg/cluster/state.go index 7ce8a3fa6d..3c31ccf128 100644 --- a/src/pkg/cluster/state.go +++ b/src/pkg/cluster/state.go @@ -16,6 +16,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/avast/retry-go/v4" "github.com/defenseunicorns/pkg/helpers/v2" "github.com/zarf-dev/zarf/src/config" "github.com/zarf-dev/zarf/src/config/lang" @@ -128,27 +129,13 @@ func (c *Cluster) InitZarfState(ctx context.Context, initOptions types.ZarfInitO // The default SA is required for pods to start properly. saCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() - err = func(ctx context.Context, ns, name string) error { - timer := time.NewTimer(0) - defer timer.Stop() - for { - select { - case <-ctx.Done(): - return fmt.Errorf("failed to get service account %s/%s: %w", ns, name, ctx.Err()) - case <-timer.C: - _, err := c.Clientset.CoreV1().ServiceAccounts(ns).Get(ctx, name, metav1.GetOptions{}) - if err != nil && !kerrors.IsNotFound(err) { - return err - } - if kerrors.IsNotFound(err) { - message.Debug("Service account %s/%s not found, retrying...", ns, name) - timer.Reset(1 * time.Second) - continue - } - return nil - } + err = retry.Do(func() error { + _, err := c.Clientset.CoreV1().ServiceAccounts(ZarfNamespaceName).Get(saCtx, "default", metav1.GetOptions{}) + if err != nil { + return err } - }(saCtx, ZarfNamespaceName, "default") + return nil + }, retry.Context(saCtx), retry.Attempts(0), retry.DelayType(retry.FixedDelay), retry.Delay(time.Second)) if err != nil { return fmt.Errorf("unable get default Zarf service account: %w", err) } diff --git a/src/pkg/cluster/tunnel.go b/src/pkg/cluster/tunnel.go index 61fd090546..764ae9a6eb 100644 --- a/src/pkg/cluster/tunnel.go +++ b/src/pkg/cluster/tunnel.go @@ -13,7 +13,6 @@ import ( "strconv" "strings" "sync" - "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -22,6 +21,7 @@ import ( "k8s.io/client-go/tools/portforward" "k8s.io/client-go/transport/spdy" + "github.com/avast/retry-go/v4" "github.com/defenseunicorns/pkg/helpers/v2" "github.com/zarf-dev/zarf/src/pkg/message" "github.com/zarf-dev/zarf/src/types" @@ -308,7 +308,6 @@ type Tunnel struct { resourceType string resourceName string urlSuffix string - attempt int stopChan chan struct{} readyChan chan struct{} errChan chan error @@ -352,38 +351,16 @@ func (tunnel *Tunnel) Wrap(function func() error) error { // Connect will establish a tunnel to the specified target. func (tunnel *Tunnel) Connect(ctx context.Context) (string, error) { - url, err := tunnel.establish(ctx) - - // Try to establish the tunnel up to 3 times. - if err != nil { - tunnel.attempt++ - - // If we have exceeded the number of attempts, exit with an error. - if tunnel.attempt > 3 { - return "", fmt.Errorf("unable to establish tunnel after 3 attempts: %w", err) - } - - // Otherwise, retry the connection but delay increasing intervals between attempts. - delay := tunnel.attempt * 10 - message.Debugf("%s", err.Error()) - message.Debugf("Delay creating tunnel, waiting %d seconds...", delay) - - timer := time.NewTimer(0) - defer timer.Stop() - - select { - case <-ctx.Done(): - return "", ctx.Err() - case <-timer.C: - url, err = tunnel.Connect(ctx) - if err != nil { - return "", err - } - - timer.Reset(time.Duration(delay) * time.Second) + url, err := retry.DoWithData(func() (string, error) { + url, err := tunnel.establish(ctx) + if err != nil { + return "", err } + return url, nil + }, retry.Context(ctx), retry.Attempts(6)) + if err != nil { + return "", err } - return url, nil } diff --git a/src/pkg/cluster/zarf.go b/src/pkg/cluster/zarf.go index 6172e3fa78..7145349fea 100644 --- a/src/pkg/cluster/zarf.go +++ b/src/pkg/cluster/zarf.go @@ -17,6 +17,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/avast/retry-go/v4" "github.com/zarf-dev/zarf/src/api/v1alpha1" "github.com/zarf-dev/zarf/src/config" "github.com/zarf-dev/zarf/src/pkg/message" @@ -132,8 +133,8 @@ func (c *Cluster) PackageSecretNeedsWait(deployedPackage *types.DeployedPackage, } // RecordPackageDeploymentAndWait records the deployment of a package to the cluster and waits for any webhooks to complete. -func (c *Cluster) RecordPackageDeploymentAndWait(ctx context.Context, pkg v1alpha1.ZarfPackage, components []types.DeployedComponent, connectStrings types.ConnectStrings, generation int, component v1alpha1.ZarfComponent, skipWebhooks bool) (deployedPackage *types.DeployedPackage, err error) { - deployedPackage, err = c.RecordPackageDeployment(ctx, pkg, components, connectStrings, generation) +func (c *Cluster) RecordPackageDeploymentAndWait(ctx context.Context, pkg v1alpha1.ZarfPackage, components []types.DeployedComponent, connectStrings types.ConnectStrings, generation int, component v1alpha1.ZarfComponent, skipWebhooks bool) (*types.DeployedPackage, error) { + deployedPackage, err := c.RecordPackageDeployment(ctx, pkg, components, connectStrings, generation) if err != nil { return nil, err } @@ -144,39 +145,31 @@ func (c *Cluster) RecordPackageDeploymentAndWait(ctx context.Context, pkg v1alph return deployedPackage, nil } + spinner := message.NewProgressSpinner("Waiting for webhook %q to complete for component %q", hookName, component.Name) + defer spinner.Stop() + waitDuration := types.DefaultWebhookWaitDuration if waitSeconds > 0 { waitDuration = time.Duration(waitSeconds) * time.Second } - waitCtx, cancel := context.WithTimeout(ctx, waitDuration) defer cancel() - - spinner := message.NewProgressSpinner("Waiting for webhook %q to complete for component %q", hookName, component.Name) - defer spinner.Stop() - - timer := time.NewTimer(0) - defer timer.Stop() - - for { - select { - case <-waitCtx.Done(): - return nil, fmt.Errorf("error waiting for webhook %q to complete for component %q: %w", hookName, component.Name, waitCtx.Err()) - case <-timer.C: - deployedPackage, err = c.GetDeployedPackage(ctx, deployedPackage.Name) - if err != nil { - return nil, err - } - - packageNeedsWait, _, _ = c.PackageSecretNeedsWait(deployedPackage, component, skipWebhooks) - if !packageNeedsWait { - spinner.Success() - return deployedPackage, nil - } - - timer.Reset(1 * time.Second) + deployedPackage, err = retry.DoWithData(func() (*types.DeployedPackage, error) { + deployedPackage, err = c.GetDeployedPackage(waitCtx, deployedPackage.Name) + if err != nil { + return nil, err + } + packageNeedsWait, _, _ = c.PackageSecretNeedsWait(deployedPackage, component, skipWebhooks) + if !packageNeedsWait { + return deployedPackage, nil } + return deployedPackage, nil + }, retry.Context(waitCtx), retry.Attempts(0), retry.DelayType(retry.FixedDelay), retry.Delay(time.Second)) + if err != nil { + return nil, err } + spinner.Success() + return deployedPackage, nil } // RecordPackageDeployment saves metadata about a package that has been deployed to the cluster. diff --git a/src/pkg/packager/actions/actions.go b/src/pkg/packager/actions/actions.go index 983f2f6bde..dbef82b613 100644 --- a/src/pkg/packager/actions/actions.go +++ b/src/pkg/packager/actions/actions.go @@ -93,6 +93,7 @@ func runAction(ctx context.Context, defaultCfg v1alpha1.ZarfComponentActionDefau timeout := time.After(duration) // Keep trying until the max retries is reached. + // TODO: Refactor using go-retry retryCmd: for remaining := actionDefaults.MaxRetries + 1; remaining > 0; remaining-- { // Perform the action run. diff --git a/src/pkg/packager/deploy.go b/src/pkg/packager/deploy.go index 86c2f43349..2c5f2e5f2d 100644 --- a/src/pkg/packager/deploy.go +++ b/src/pkg/packager/deploy.go @@ -18,12 +18,12 @@ import ( "golang.org/x/sync/errgroup" + "github.com/avast/retry-go/v4" + "github.com/defenseunicorns/pkg/helpers/v2" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/defenseunicorns/pkg/helpers/v2" - "github.com/zarf-dev/zarf/src/api/v1alpha1" "github.com/zarf-dev/zarf/src/config" "github.com/zarf-dev/zarf/src/internal/git" @@ -551,9 +551,7 @@ func (p *Packager) pushReposToRepository(ctx context.Context, reposPath string, if err != nil { return err } - - // Create an anonymous function to push the repo to the Zarf git server - tryPush := func() error { + err = retry.Do(func() error { namespace, name, port, err := serviceInfoFromServiceURL(p.state.GitServer.Address) // If this is a service (svcInfo is not nil), create a port-forward tunnel to that resource @@ -603,10 +601,8 @@ func (p *Packager) pushReposToRepository(ctx context.Context, reposPath string, return err } return nil - } - - // Try repo push up to retry limit - if err := helpers.RetryWithContext(ctx, tryPush, p.cfg.PkgOpts.Retries, 5*time.Second, message.Warnf); err != nil { + }, retry.Context(ctx), retry.Attempts(uint(p.cfg.PkgOpts.Retries)), retry.Delay(500*time.Millisecond)) + if err != nil { return fmt.Errorf("unable to push repo %s to the Git Server: %w", repoURL, err) } } diff --git a/src/test/e2e/35_custom_retries_test.go b/src/test/e2e/35_custom_retries_test.go index e6a87ff5e1..99d0a229f5 100644 --- a/src/test/e2e/35_custom_retries_test.go +++ b/src/test/e2e/35_custom_retries_test.go @@ -26,7 +26,6 @@ func TestRetries(t *testing.T) { stdOut, stdErr, err = e2e.Zarf(t, "package", "deploy", path.Join(tmpDir, pkgName), "--retries", "2", "--timeout", "3s", "--tmpdir", tmpDir, "--confirm") require.Error(t, err, stdOut, stdErr) - require.Contains(t, stdErr, "Retrying in 5s") require.Contains(t, e2e.StripMessageFormatting(stdErr), "unable to install chart after 2 attempts") _, _, err = e2e.Zarf(t, "package", "remove", "dos-games", "--confirm")