Skip to content

Commit

Permalink
Re-queue SnapshotContents that are readyToUse: false
Browse files Browse the repository at this point in the history
SnapshotContents with readyToUse: false should be periodically requeued
with exp. backoff until the CSI driver confirms the snapshot is ready.
  • Loading branch information
jsafrane committed Nov 16, 2023
1 parent b9f20c1 commit 3063863
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 45 deletions.
92 changes: 92 additions & 0 deletions pkg/sidecar-controller/content_create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,98 @@ func TestSyncContent(t *testing.T) {
errors: noerrors,
test: testSyncContent,
},
{
name: "1-7: Just created un-ready snapshot should be requeued",
// A new snapshot should be created
initialContents: withContentStatus(newContentArray("content1-7", "snapuid1-7", "snap1-7", "sid1-7", defaultClass, "", "volume-handle-1-7", retainPolicy, nil, &defaultSize, true),
nil),
expectedContents: withContentAnnotations(withContentStatus(newContentArray("content1-7", "snapuid1-7", "snap1-7", "sid1-7", defaultClass, "", "volume-handle-1-7", retainPolicy, nil, &defaultSize, true),
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-7"), RestoreSize: &defaultSize, ReadyToUse: &False}),
map[string]string{}),
expectedEvents: noevents,
expectedCreateCalls: []createCall{
{
volumeHandle: "volume-handle-1-7",
snapshotName: "snapshot-snapuid1-7",
driverName: mockDriverName,
snapshotId: "snapuid1-7",
parameters: map[string]string{
utils.PrefixedVolumeSnapshotNameKey: "snap1-7",
utils.PrefixedVolumeSnapshotNamespaceKey: "default",
utils.PrefixedVolumeSnapshotContentNameKey: "content1-7",
},
creationTime: timeNow,
readyToUse: false,
size: defaultSize,
},
},
errors: noerrors,
expectRequeue: true,
expectSuccess: true,
test: testSyncContent,
},
{
name: "1-8: Un-ready snapshot that remains un-ready should be requeued",
// An un-ready snapshot already exists, it will be refreshed
initialContents: withContentAnnotations(withContentStatus(newContentArray("content1-8", "snapuid1-8", "snap1-8", "sid1-8", defaultClass, "", "volume-handle-1-8", retainPolicy, nil, &defaultSize, true),
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-8"), RestoreSize: &defaultSize, ReadyToUse: &False}),
map[string]string{}),
expectedContents: withContentAnnotations(withContentStatus(newContentArray("content1-8", "snapuid1-8", "snap1-8", "sid1-8", defaultClass, "", "volume-handle-1-8", retainPolicy, nil, &defaultSize, true),
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-8"), RestoreSize: &defaultSize, ReadyToUse: &False}),
map[string]string{}),
expectedEvents: noevents,
expectedCreateCalls: []createCall{
{
volumeHandle: "volume-handle-1-8",
snapshotName: "snapshot-snapuid1-8",
driverName: mockDriverName,
snapshotId: "snapuid1-8",
parameters: map[string]string{
utils.PrefixedVolumeSnapshotNameKey: "snap1-8",
utils.PrefixedVolumeSnapshotNamespaceKey: "default",
utils.PrefixedVolumeSnapshotContentNameKey: "content1-8",
},
creationTime: timeNow,
readyToUse: false,
size: defaultSize,
},
},
errors: noerrors,
expectRequeue: true,
expectSuccess: true,
test: testSyncContent,
},
{
name: "1-9: Un-ready snapshot that becomes ready should not be requeued",
// An un-ready snapshot already exists, it will be refreshed
initialContents: withContentAnnotations(withContentStatus(newContentArray("content1-9", "snapuid1-9", "snap1-9", "sid1-9", defaultClass, "", "volume-handle-1-9", retainPolicy, nil, &defaultSize, true),
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-9"), RestoreSize: &defaultSize, ReadyToUse: &False}),
map[string]string{}),
expectedContents: withContentAnnotations(withContentStatus(newContentArray("content1-9", "snapuid1-9", "snap1-9", "sid1-9", defaultClass, "", "volume-handle-1-9", retainPolicy, nil, &defaultSize, true),
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-9"), RestoreSize: &defaultSize, ReadyToUse: &True}),
map[string]string{}),
expectedEvents: noevents,
expectedCreateCalls: []createCall{
{
volumeHandle: "volume-handle-1-9",
snapshotName: "snapshot-snapuid1-9",
driverName: mockDriverName,
snapshotId: "snapuid1-9",
parameters: map[string]string{
utils.PrefixedVolumeSnapshotNameKey: "snap1-9",
utils.PrefixedVolumeSnapshotNamespaceKey: "default",
utils.PrefixedVolumeSnapshotContentNameKey: "content1-9",
},
creationTime: timeNow,
readyToUse: true,
size: defaultSize,
},
},
errors: noerrors,
expectRequeue: false,
expectSuccess: true,
test: testSyncContent,
},
}

runSyncContentTests(t, tests, snapshotClasses)
Expand Down
29 changes: 19 additions & 10 deletions pkg/sidecar-controller/framework_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ type controllerTest struct {
// Function to call as the test.
test testCall
expectSuccess bool
expectRequeue bool
}

type testCall func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error
type testCall func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (requeue bool, err error)

const (
testNamespace = "default"
Expand Down Expand Up @@ -690,16 +691,16 @@ func withContentAnnotations(content []*crdv1.VolumeSnapshotContent, annotations
return content
}

func testSyncContent(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error {
func testSyncContent(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (bool, error) {
return ctrl.syncContent(test.initialContents[0])
}

func testSyncContentError(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error {
err := ctrl.syncContent(test.initialContents[0])
func testSyncContentError(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (bool, error) {
requeue, err := ctrl.syncContent(test.initialContents[0])
if err != nil {
return nil
return requeue, nil
}
return fmt.Errorf("syncSnapshotContent succeeded when failure was expected")
return requeue, fmt.Errorf("syncSnapshotContent succeeded when failure was expected")
}

var (
Expand All @@ -725,18 +726,19 @@ var (
// controller waits for the operation lock. Controller is then resumed and we
// check how it behaves.
func wrapTestWithInjectedOperation(toWrap testCall, injectBeforeOperation func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor)) testCall {
return func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error {
return func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (bool, error) {
// Inject a hook before async operation starts
klog.V(4).Infof("reactor:injecting call")
injectBeforeOperation(ctrl, reactor)

// Run the tested function (typically syncContent) in a
// separate goroutine.
var testError error
var requeue bool
var testFinished int32

go func() {
testError = toWrap(ctrl, reactor, test)
requeue, testError = toWrap(ctrl, reactor, test)
// Let the "main" test function know that syncContent has finished.
atomic.StoreInt32(&testFinished, 1)
}()
Expand All @@ -746,7 +748,7 @@ func wrapTestWithInjectedOperation(toWrap testCall, injectBeforeOperation func(c
time.Sleep(time.Millisecond * 10)
}

return testError
return requeue, testError
}
}

Expand Down Expand Up @@ -804,13 +806,20 @@ func runSyncContentTests(t *testing.T, tests []controllerTest, snapshotClasses [
ctrl.classLister = storagelisters.NewVolumeSnapshotClassLister(indexer)

// Run the tested functions
err = test.test(ctrl, reactor, test)
requeue, err := test.test(ctrl, reactor, test)
if test.expectSuccess && err != nil {
t.Errorf("Test %q failed: %v", test.name, err)
}
if !test.expectSuccess && err == nil {
t.Errorf("Test %q failed: expected error, got nil", test.name)
}
if !test.expectSuccess && err == nil {
t.Errorf("Test %q failed: expected error, got nil", test.name)
}
// requeue has meaning only when err == nil. A snapshot content is automatically requeued on error
if err == nil && requeue != test.expectRequeue {
t.Errorf("Test %q expected requeue %t, got %t", test.name, test.expectRequeue, requeue)
}

// Wait for the target state
err = reactor.waitTest(test)
Expand Down
50 changes: 35 additions & 15 deletions pkg/sidecar-controller/snapshot_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ import (

const controllerUpdateFailMsg = "snapshot controller failed to update"

// syncContent deals with one key off the queue. It returns false when it's time to quit.
func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnapshotContent) error {
// syncContent deals with one key off the queue. It returns flag indicating if the
// content should be requeued. On error, the content is always requeued.
func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnapshotContent) (requeue bool, err error) {
klog.V(5).Infof("synchronizing VolumeSnapshotContent[%s]", content.Name)

if ctrl.shouldDelete(content) {
Expand All @@ -63,13 +64,22 @@ func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnaps
// underlying storage system. Note that the deletion snapshot operation will
// update content SnapshotHandle to nil upon a successful deletion. At this
// point, the finalizer on content should NOT be removed to avoid leaking.
return ctrl.deleteCSISnapshot(content)
err := ctrl.deleteCSISnapshot(content)
if err != nil {
return true, err
}
return false, nil
}
// otherwise, either the snapshot has been deleted from the underlying
// storage system, or the deletion policy is Retain, remove the finalizer
// if there is one so that API server could delete the object if there is
// no other finalizer.
return ctrl.removeContentFinalizer(content)
err := ctrl.removeContentFinalizer(content)
if err != nil {
return true, err
}
return false, nil

}
if content.Spec.Source.VolumeHandle != nil && content.Status == nil {
klog.V(5).Infof("syncContent: Call CreateSnapshot for content %s", content.Name)
Expand All @@ -79,11 +89,13 @@ func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnaps
// already true. We don't want to keep calling CreateSnapshot
// or ListSnapshots CSI methods over and over again for
// performance reasons.
var err error
if content.Status != nil && content.Status.ReadyToUse != nil && *content.Status.ReadyToUse == true {
if contentIsReady(content) {
// Try to remove AnnVolumeSnapshotBeingCreated if it is not removed yet for some reason
_, err = ctrl.removeAnnVolumeSnapshotBeingCreated(content)
return err
if err != nil {
return true, err
}
return false, nil
}
return ctrl.checkandUpdateContentStatus(content)
}
Expand All @@ -98,39 +110,42 @@ func (ctrl *csiSnapshotSideCarController) storeContentUpdate(content interface{}
return utils.StoreObjectUpdate(ctrl.contentStore, content, "content")
}

// createSnapshot starts new asynchronous operation to create snapshot
func (ctrl *csiSnapshotSideCarController) createSnapshot(content *crdv1.VolumeSnapshotContent) error {
// createSnapshot starts new asynchronous operation to create snapshot. It returns flag indicating if the
// content should be requeued. On error, the content is always requeued.
func (ctrl *csiSnapshotSideCarController) createSnapshot(content *crdv1.VolumeSnapshotContent) (requeue bool, err error) {
klog.V(5).Infof("createSnapshot for content [%s]: started", content.Name)
contentObj, err := ctrl.createSnapshotWrapper(content)
if err != nil {
ctrl.updateContentErrorStatusWithEvent(contentObj, v1.EventTypeWarning, "SnapshotCreationFailed", fmt.Sprintf("Failed to create snapshot: %v", err))
klog.Errorf("createSnapshot for content [%s]: error occurred in createSnapshotWrapper: %v", content.Name, err)
return err
return true, err
}

_, updateErr := ctrl.storeContentUpdate(contentObj)
if updateErr != nil {
// We will get an "snapshot update" event soon, this is not a big error
klog.V(4).Infof("createSnapshot for content [%s]: cannot update internal content cache: %v", content.Name, updateErr)
}
return nil
return !contentIsReady(contentObj), nil
}

func (ctrl *csiSnapshotSideCarController) checkandUpdateContentStatus(content *crdv1.VolumeSnapshotContent) error {
// checkandUpdateContentStatus checks status of the volume snapshot in CSI driver and updates content.status
// accordingly. It returns flag indicating if the content should be requeued. On error, the content is
// always requeued.
func (ctrl *csiSnapshotSideCarController) checkandUpdateContentStatus(content *crdv1.VolumeSnapshotContent) (requeue bool, err error) {
klog.V(5).Infof("checkandUpdateContentStatus[%s] started", content.Name)
contentObj, err := ctrl.checkandUpdateContentStatusOperation(content)
if err != nil {
ctrl.updateContentErrorStatusWithEvent(contentObj, v1.EventTypeWarning, "SnapshotContentCheckandUpdateFailed", fmt.Sprintf("Failed to check and update snapshot content: %v", err))
klog.Errorf("checkandUpdateContentStatus [%s]: error occurred %v", content.Name, err)
return err
return true, err
}
_, updateErr := ctrl.storeContentUpdate(contentObj)
if updateErr != nil {
// We will get an "snapshot update" event soon, this is not a big error
klog.V(4).Infof("checkandUpdateContentStatus [%s]: cannot update internal cache: %v", content.Name, updateErr)
}

return nil
return !contentIsReady(contentObj), nil
}

// updateContentStatusWithEvent saves new content.Status to API server and emits
Expand Down Expand Up @@ -380,6 +395,7 @@ func (ctrl *csiSnapshotSideCarController) deleteCSISnapshotOperation(content *cr
return err
}
// trigger syncContent
// TODO: just enqueue the content object instead of calling syncContent directly
ctrl.updateContentInInformerCache(newContent)
return nil
}
Expand Down Expand Up @@ -689,3 +705,7 @@ func isCSIFinalError(err error) bool {
// even start or failed. It is for sure not in progress.
return true
}

func contentIsReady(content *crdv1.VolumeSnapshotContent) bool {
return content.Status != nil && content.Status.ReadyToUse != nil && *content.Status.ReadyToUse
}
Loading

0 comments on commit 3063863

Please sign in to comment.