Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Fix Machine adoption for KCP/MachineSet-owned Machines #7591

Merged
merged 1 commit into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ const (
// MachineDeploymentLabelName is the label set on machines if they're controlled by MachineDeployment.
MachineDeploymentLabelName = "cluster.x-k8s.io/deployment-name"

// MachineControlPlaneNameLabel is the label set on machines if they're controlled by a ControlPlane.
MachineControlPlaneNameLabel = "cluster.x-k8s.io/control-plane-name"

sbueringer marked this conversation as resolved.
Show resolved Hide resolved
// PreDrainDeleteHookAnnotationPrefix annotation specifies the prefix we
// search each annotation for during the pre-drain.delete lifecycle hook
// to pause reconciliation of deletion. These hooks will prevent removal of
Expand Down
1 change: 1 addition & 0 deletions controlplane/kubeadm/internal/cluster_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ func ControlPlaneMachineLabelsForCluster(kcp *controlplanev1.KubeadmControlPlane
// Always force these labels over the ones coming from the spec.
labels[clusterv1.ClusterLabelName] = clusterName
labels[clusterv1.MachineControlPlaneLabelName] = ""
labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name
return labels
}
13 changes: 13 additions & 0 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false))

// Ensure all required labels exist on the controlled Machines.
// This logic is needed to add the `cluster.x-k8s.io/control-plane-name` label to Machines
// which were created before the `cluster.x-k8s.io/control-plane-name` label was introduced
// or if a user manually removed the label.
// NOTE: Changes will be applied to the Machines in reconcileControlPlaneConditions.
// NOTE: cluster.x-k8s.io/control-plane is already set at this stage (it is used when reading controlPlane.Machines).
for i := range controlPlane.Machines {
machine := controlPlane.Machines[i]
if value, ok := machine.Labels[clusterv1.MachineControlPlaneNameLabel]; !ok || value != kcp.Name {
machine.Labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name
}
}

// Updates conditions reporting the status of static pods and the status of the etcd cluster.
// NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution.
if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() {
Expand Down
1 change: 1 addition & 0 deletions controlplane/kubeadm/internal/controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp
Namespace: kcp.Namespace,
Labels: internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name),
Annotations: map[string]string{},
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")),
},
Expand Down
5 changes: 5 additions & 0 deletions controlplane/kubeadm/internal/controllers/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,13 +549,18 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) {
for k, v := range kcpMachineTemplateObjectMeta.Labels {
g.Expect(machine.Labels[k]).To(Equal(v))
}
g.Expect(machine.Labels[clusterv1.ClusterLabelName]).To(Equal(cluster.Name))
g.Expect(machine.Labels[clusterv1.MachineControlPlaneLabelName]).To(Equal(""))
g.Expect(machine.Labels[clusterv1.MachineControlPlaneNameLabel]).To(Equal(kcp.Name))

for k, v := range kcpMachineTemplateObjectMeta.Annotations {
g.Expect(machine.Annotations[k]).To(Equal(v))
}

// Verify that machineTemplate.ObjectMeta in KCP has not been modified.
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.ClusterLabelName))
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneLabelName))
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneNameLabel))
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).NotTo(HaveKey(controlplanev1.KubeadmClusterConfigurationAnnotation))
}

Expand Down
14 changes: 10 additions & 4 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,10 +760,16 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
return false
}

// If the Machine is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone Machine.
// Note: this is required because after restore from a backup both the Machine controller and the
// MachineSet controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
if _, ok := m.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
// Note: following checks are required because after restore from a backup both the Machine controller and the
// MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529

// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineSetLabelName]; ok {
return false
}

// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
return false
}
return true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,

d.Labels[clusterv1.ClusterLabelName] = d.Spec.ClusterName

// Set the MachineDeployment as directly owned by the Cluster (if not already present).
if r.shouldAdopt(d) {
d.OwnerReferences = util.EnsureOwnerRef(d.OwnerReferences, metav1.OwnerReference{
APIVersion: clusterv1.GroupVersion.String(),
Expand All @@ -226,6 +227,27 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
return ctrl.Result{}, err
}

// If not already present, add a label specifying the MachineDeployment name to MachineSets.
// Ensure all required labels exist on the controlled MachineSets.
// This logic is needed to add the `cluster.x-k8s.io/deployment-name` label to MachineSets
// which were created before the `cluster.x-k8s.io/deployment-name` label was added
// to all MachineSets created by a MachineDeployment or if a user manually removed the label.
for idx := range msList {
machineSet := msList[idx]
if name, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok && name == d.Name {
continue
}

helper, err := patch.NewHelper(machineSet, r.Client)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name)
}
machineSet.Labels[clusterv1.MachineDeploymentLabelName] = d.Name
if err := helper.Patch(ctx, machineSet); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name)
}
}

if d.Spec.Paused {
return ctrl.Result{}, r.sync(ctx, d, msList)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,16 @@ func TestMachineDeploymentReconciler(t *testing.T) {
})
}, timeout).Should(BeTrue())

// Verify that expected number of machines are created
t.Log("Verify expected number of machines are created")
t.Log("Verify MachineSet has expected replicas and version")
firstMachineSet := machineSets.Items[0]
g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2))
g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3"))

t.Log("Verify MachineSet has expected ClusterLabelName and MachineDeploymentLabelName")
g.Expect(firstMachineSet.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name))
g.Expect(firstMachineSet.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name))

t.Log("Verify expected number of Machines are created")
machines := &clusterv1.MachineList{}
g.Eventually(func() int {
if err := env.List(ctx, machines, client.InNamespace(namespace.Name)); err != nil {
Expand All @@ -213,16 +221,13 @@ func TestMachineDeploymentReconciler(t *testing.T) {
return len(machines.Items)
}, timeout).Should(BeEquivalentTo(*deployment.Spec.Replicas))

// Verify that machines has MachineSetLabelName and MachineDeploymentLabelName labels
t.Log("Verify machines have expected MachineSetLabelName and MachineDeploymentLabelName")
t.Log("Verify Machines have expected ClusterLabelName, MachineDeploymentLabelName and MachineSetLabelName")
for _, m := range machines.Items {
g.Expect(m.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name))
g.Expect(m.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name))
g.Expect(m.Labels[clusterv1.MachineSetLabelName]).To(Equal(firstMachineSet.Name))
}

firstMachineSet := machineSets.Items[0]
g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2))
g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3"))

//
// Delete firstMachineSet and expect Reconcile to be called to replace it.
//
Expand Down Expand Up @@ -348,7 +353,7 @@ func TestMachineDeploymentReconciler(t *testing.T) {
clusterv1.ClusterLabelName: testCluster.Name,
}

t.Log("Updating MachineDeployment label")
t.Log("Updating MachineDeployment labels")
modifyFunc = func(d *clusterv1.MachineDeployment) {
d.Spec.Selector.MatchLabels = newLabels
d.Spec.Template.Labels = newLabels
Expand Down
19 changes: 16 additions & 3 deletions internal/controllers/machinedeployment/machinedeployment_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,10 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
newMS := clusterv1.MachineSet{
ObjectMeta: metav1.ObjectMeta{
// Make the name deterministic, to ensure idempotence
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
Namespace: d.Namespace,
Labels: newMSTemplate.Labels,
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
Namespace: d.Namespace,
Labels: make(map[string]string),
// Note: by setting the ownerRef on creation we signal to the MachineSet controller that this is not a stand-alone MachineSet.
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, machineDeploymentKind)},
},
Spec: clusterv1.MachineSetSpec{
Expand All @@ -176,6 +177,18 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
},
}

// Set the labels from newMSTemplate as top-level labels for the new MS.
// Note: We can't just set `newMSTemplate.Labels` directly and thus "share" the labels map between top-level and
// .spec.template.metadata.labels. This would mean that adding the MachineDeploymentLabelName later top-level
// would also add the label to .spec.template.metadata.labels.
for k, v := range newMSTemplate.Labels {
newMS.Labels[k] = v
}

// Enforce that the MachineDeploymentLabelName label is set
// Note: the MachineDeploymentLabelName is added by the default webhook to MachineDeployment.spec.template.labels if spec.selector is empty.
newMS.Labels[clusterv1.MachineDeploymentLabelName] = d.Name

if d.Spec.Strategy.RollingUpdate.DeletePolicy != nil {
newMS.Spec.DeletePolicy = *d.Spec.Strategy.RollingUpdate.DeletePolicy
}
Expand Down
4 changes: 2 additions & 2 deletions internal/controllers/machinedeployment/mdutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,9 @@ func FindNewMachineSet(deployment *clusterv1.MachineDeployment, msList []*cluste
return nil
}

// FindOldMachineSets returns the old machine sets targeted by the given Deployment, with the given slice of MSes.
// FindOldMachineSets returns the old machine sets targeted by the given Deployment, within the given slice of MSes.
// Returns two list of machine sets
// - the first contains all old machine sets with all non-zero replicas
// - the first contains all old machine sets with non-zero replicas
// - the second contains all old machine sets
func FindOldMachineSets(deployment *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) ([]*clusterv1.MachineSet, []*clusterv1.MachineSet) {
var requiredMSs []*clusterv1.MachineSet
Expand Down
60 changes: 54 additions & 6 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,38 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
filteredMachines = append(filteredMachines, machine)
}

// If not already present, add a label specifying the MachineSet name to Machines.
// Ensure all required labels exist on the controlled Machines.
// This logic is needed to add the `cluster.x-k8s.io/set-name` label to Machines
// which were created before the `cluster.x-k8s.io/set-name` label was added to
// all Machines created by a MachineSet or if a user manually removed the label.
for _, machine := range filteredMachines {
mdNameOnMachineSet, mdNameSetOnMachineSet := machineSet.Labels[clusterv1.MachineDeploymentLabelName]
mdNameOnMachine := machine.Labels[clusterv1.MachineDeploymentLabelName]

if msName, ok := machine.Labels[clusterv1.MachineSetLabelName]; ok && msName == machineSet.Name &&
(!mdNameSetOnMachineSet || mdNameOnMachineSet == mdNameOnMachine) {
// Continue if the MachineSet name label is already set correctly and
// either the MachineDeployment name label is not set on the MachineSet or
// the MachineDeployment name label is set correctly on the Machine.
continue
}

helper, err := patch.NewHelper(machine, r.Client)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetLabelName, machine.Name)
}
machine.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
// Propagate the MachineDeploymentLabelName from MachineSet to Machine if it is set on the MachineSet.
if mdNameSetOnMachineSet {
machine.Labels[clusterv1.MachineDeploymentLabelName] = mdNameOnMachineSet
}
if err := helper.Patch(ctx, machine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetLabelName, machine.Name)
}
}

// Remediate failed Machines by deleting them.
var errs []error
for _, machine := range filteredMachines {
log := log.WithValues("Machine", klog.KObj(machine))
Expand Down Expand Up @@ -500,10 +532,11 @@ func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1.
gv := clusterv1.GroupVersion
machine := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("%s-", machineSet.Name),
GenerateName: fmt.Sprintf("%s-", machineSet.Name),
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)},
Namespace: machineSet.Namespace,
Labels: machineSet.Spec.Template.Labels,
Labels: make(map[string]string),
Annotations: machineSet.Spec.Template.Annotations,
},
TypeMeta: metav1.TypeMeta{
Expand All @@ -513,9 +546,24 @@ func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1.
Spec: machineSet.Spec.Template.Spec,
}
machine.Spec.ClusterName = machineSet.Spec.ClusterName
if machine.Labels == nil {
machine.Labels = make(map[string]string)

// Set the labels from machineSet.Spec.Template.Labels as labels for the new Machine.
// Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels
// map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the
// MachineSetLabelName and MachineDeploymentLabelName later on the Machine would also add the labels
// to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet.
for k, v := range machineSet.Spec.Template.Labels {
machine.Labels[k] = v
}

// Enforce that the MachineSetLabelName label is set
// Note: the MachineSetLabelName is added by the default webhook to MachineSet.spec.template.labels if a spec.selector is empty.
machine.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
// Propagate the MachineDeploymentLabelName from MachineSet to Machine if it exists.
if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok {
machine.Labels[clusterv1.MachineDeploymentLabelName] = mdName
}

return machine
}

Expand Down Expand Up @@ -652,10 +700,10 @@ func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
return false
}

// If the MachineSet is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone MachineSet.
// If the MachineSet is originated by a MachineDeployment object, it should not be adopted directly by the Cluster as a stand-alone MachineSet.
// Note: this is required because after restore from a backup both the MachineSet controller and the
// MachineDeployment controller are racing to adopt MachineSets, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
if _, ok := ms.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
if _, ok := ms.Labels[clusterv1.MachineDeploymentLabelName]; ok {
return false
}
return true
Expand Down