Skip to content

Commit

Permalink
Improve Machine adoption
Browse files Browse the repository at this point in the history
Co-authored-by: fabriziopandini <fpandini@vmware.com>
Signed-off-by: Stefan Büringer buringerst@vmware.com
  • Loading branch information
sbueringer and fabriziopandini committed Nov 23, 2022
1 parent 3dd7f30 commit 7a5b3e8
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 24 deletions.
3 changes: 3 additions & 0 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ const (
// MachineDeploymentLabelName is the label set on machines if they're controlled by MachineDeployment.
MachineDeploymentLabelName = "cluster.x-k8s.io/deployment-name"

// MachineControlPlaneNameLabel is the label set on machines if they're controlled by a ControlPlane.
MachineControlPlaneNameLabel = "cluster.x-k8s.io/control-plane-name"

// PreDrainDeleteHookAnnotationPrefix annotation specifies the prefix we
// search each annotation for during the pre-drain.delete lifecycle hook
// to pause reconciliation of deletion. These hooks will prevent removal of
Expand Down
1 change: 1 addition & 0 deletions controlplane/kubeadm/internal/cluster_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ func ControlPlaneMachineLabelsForCluster(kcp *controlplanev1.KubeadmControlPlane
// Always force these labels over the ones coming from the spec.
labels[clusterv1.ClusterLabelName] = clusterName
labels[clusterv1.MachineControlPlaneLabelName] = ""
labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name
return labels
}
13 changes: 13 additions & 0 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false))

// Ensure all required labels exist on the controlled Machines.
// This logic is needed to add the `cluster.x-k8s.io/control-plane-name` label to Machines
// which were created before the `cluster.x-k8s.io/control-plane-name` label was introduced.
// NOTE: Changes will be applied to the Machines in reconcileControlPlaneConditions.
// NOTE: cluster.x-k8s.io/control-plane is already set at this stage (it is used when reading controlPlane.Machines).
// TODO(sbueringer): Drop the following code with v1.4 after all existing Machines are guaranteed to have the new label.
for i := range controlPlane.Machines {
machine := controlPlane.Machines[i]
if value, ok := machine.Labels[clusterv1.MachineControlPlaneNameLabel]; !ok || value != kcp.Name {
machine.Labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name
}
}

// Updates conditions reporting the status of static pods and the status of the etcd cluster.
// NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution.
if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() {
Expand Down
1 change: 1 addition & 0 deletions controlplane/kubeadm/internal/controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp
Namespace: kcp.Namespace,
Labels: internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name),
Annotations: map[string]string{},
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")),
},
Expand Down
5 changes: 5 additions & 0 deletions controlplane/kubeadm/internal/controllers/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,13 +549,18 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) {
for k, v := range kcpMachineTemplateObjectMeta.Labels {
g.Expect(machine.Labels[k]).To(Equal(v))
}
g.Expect(machine.Labels[clusterv1.ClusterLabelName]).To(Equal(cluster.Name))
g.Expect(machine.Labels[clusterv1.MachineControlPlaneLabelName]).To(Equal(""))
g.Expect(machine.Labels[clusterv1.MachineControlPlaneNameLabel]).To(Equal(kcp.Name))

for k, v := range kcpMachineTemplateObjectMeta.Annotations {
g.Expect(machine.Annotations[k]).To(Equal(v))
}

// Verify that machineTemplate.ObjectMeta in KCP has not been modified.
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.ClusterLabelName))
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneLabelName))
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneNameLabel))
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).NotTo(HaveKey(controlplanev1.KubeadmClusterConfigurationAnnotation))
}

Expand Down
14 changes: 10 additions & 4 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,10 +760,16 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
return false
}

// If the Machine is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone Machine.
// Note: this is required because after restore from a backup both the Machine controller and the
// MachineSet controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
if _, ok := m.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
// Note: following checks are required because after restore from a backup both the Machine controller and the
// MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529

// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineSetLabelName]; ok {
return false
}

// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
return false
}
return true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,

d.Labels[clusterv1.ClusterLabelName] = d.Spec.ClusterName

// Set the MachineDeployment as directly owned by the Cluster (if not already present).
if r.shouldAdopt(d) {
d.OwnerReferences = util.EnsureOwnerRef(d.OwnerReferences, metav1.OwnerReference{
APIVersion: clusterv1.GroupVersion.String(),
Expand All @@ -226,6 +227,28 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
return ctrl.Result{}, err
}

// If not already present, add a label specifying the MachineDeployment name to MachineSets.
// Ensure all required labels exist on the controlled MachineSets.
// This logic is needed to add the `cluster.x-k8s.io/deployment-name` label to MachineSets
// which were created before the `cluster.x-k8s.io/deployment-name` label was added
// to all MachineSets created by a MachineDeployment.
// TODO(sbueringer): Drop the following code with v1.4 after all existing MachineSets are guaranteed to have the new label.
for idx := range msList {
machineSet := msList[idx]
if name, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok && name == d.Name {
continue
}

helper, err := patch.NewHelper(machineSet, r.Client)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name)
}
machineSet.Labels[clusterv1.MachineDeploymentLabelName] = d.Name
if err := helper.Patch(ctx, machineSet); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name)
}
}

if d.Spec.Paused {
return ctrl.Result{}, r.sync(ctx, d, msList)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,16 @@ func TestMachineDeploymentReconciler(t *testing.T) {
})
}, timeout).Should(BeTrue())

// Verify that expected number of machines are created
t.Log("Verify expected number of machines are created")
t.Log("Verify MachineSet has expected replicas and version")
firstMachineSet := machineSets.Items[0]
g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2))
g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3"))

t.Log("Verify MachineSet has expected ClusterLabelName and MachineDeploymentLabelName")
g.Expect(firstMachineSet.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name))
g.Expect(firstMachineSet.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name))

t.Log("Verify expected number of Machines are created")
machines := &clusterv1.MachineList{}
g.Eventually(func() int {
if err := env.List(ctx, machines, client.InNamespace(namespace.Name)); err != nil {
Expand All @@ -213,16 +221,13 @@ func TestMachineDeploymentReconciler(t *testing.T) {
return len(machines.Items)
}, timeout).Should(BeEquivalentTo(*deployment.Spec.Replicas))

// Verify that machines has MachineSetLabelName and MachineDeploymentLabelName labels
t.Log("Verify machines have expected MachineSetLabelName and MachineDeploymentLabelName")
t.Log("Verify Machines have expected ClusterLabelName, MachineDeploymentLabelName and MachineSetLabelName")
for _, m := range machines.Items {
g.Expect(m.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name))
g.Expect(m.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name))
g.Expect(m.Labels[clusterv1.MachineSetLabelName]).To(Equal(firstMachineSet.Name))
}

firstMachineSet := machineSets.Items[0]
g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2))
g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3"))

//
// Delete firstMachineSet and expect Reconcile to be called to replace it.
//
Expand Down Expand Up @@ -348,7 +353,7 @@ func TestMachineDeploymentReconciler(t *testing.T) {
clusterv1.ClusterLabelName: testCluster.Name,
}

t.Log("Updating MachineDeployment label")
t.Log("Updating MachineDeployment labels")
modifyFunc = func(d *clusterv1.MachineDeployment) {
d.Spec.Selector.MatchLabels = newLabels
d.Spec.Template.Labels = newLabels
Expand Down
19 changes: 16 additions & 3 deletions internal/controllers/machinedeployment/machinedeployment_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,10 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
newMS := clusterv1.MachineSet{
ObjectMeta: metav1.ObjectMeta{
// Make the name deterministic, to ensure idempotence
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
Namespace: d.Namespace,
Labels: newMSTemplate.Labels,
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
Namespace: d.Namespace,
Labels: make(map[string]string),
// Note: by setting the ownerRef on creation we signal to the MachineSet controller that this is not a stand-alone MachineSet.
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, machineDeploymentKind)},
},
Spec: clusterv1.MachineSetSpec{
Expand All @@ -176,6 +177,18 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
},
}

// Set the labels from newMSTemplate as top-level labels for the new MS.
// Note: We can't just set `newMSTemplate.Labels` directly and thus "share" the labels map between top-level and
// .spec.template.metadata.labels. This would mean that adding the MachineDeploymentLabelName later top-level
// would also add the label to .spec.template.metadata.labels.
for k, v := range newMSTemplate.Labels {
newMS.Labels[k] = v
}

// Enforce that the MachineDeploymentLabelName label is set
// Note: the MachineDeploymentLabelName is added by the default webhook to MachineDeployment.spec.template.labels if spec.selector is empty.
newMS.Labels[clusterv1.MachineDeploymentLabelName] = d.Name

if d.Spec.Strategy.RollingUpdate.DeletePolicy != nil {
newMS.Spec.DeletePolicy = *d.Spec.Strategy.RollingUpdate.DeletePolicy
}
Expand Down
4 changes: 2 additions & 2 deletions internal/controllers/machinedeployment/mdutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,9 @@ func FindNewMachineSet(deployment *clusterv1.MachineDeployment, msList []*cluste
return nil
}

// FindOldMachineSets returns the old machine sets targeted by the given Deployment, with the given slice of MSes.
// FindOldMachineSets returns the old machine sets targeted by the given Deployment, within the given slice of MSes.
// Returns two list of machine sets
// - the first contains all old machine sets with all non-zero replicas
// - the first contains all old machine sets with non-zero replicas
// - the second contains all old machine sets
func FindOldMachineSets(deployment *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) ([]*clusterv1.MachineSet, []*clusterv1.MachineSet) {
var requiredMSs []*clusterv1.MachineSet
Expand Down
54 changes: 48 additions & 6 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,32 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
filteredMachines = append(filteredMachines, machine)
}

// If not already present, add a label specifying the MachineSet name to Machines.
// Ensure all required labels exist on the controlled Machines.
// This logic is needed to add the `cluster.x-k8s.io/set-name` label to Machines
// which were created before the `cluster.x-k8s.io/set-name` label was added to
// all Machines created by a MachineSet.
// TODO(sbueringer): Drop the following code with v1.4 after all existing Machines are guaranteed to have the new label.
for _, machine := range filteredMachines {
if name, ok := machine.Labels[clusterv1.MachineSetLabelName]; ok && name == machineSet.Name {
continue
}

helper, err := patch.NewHelper(machine, r.Client)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetLabelName, machine.Name)
}
machine.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
// Propagate the MachineDeploymentLabelName from MachineSet to Machine if it exists.
if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok {
machine.Labels[clusterv1.MachineDeploymentLabelName] = mdName
}
if err := helper.Patch(ctx, machine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetLabelName, machine.Name)
}
}

// Remediate failed Machines by deleting them.
var errs []error
for _, machine := range filteredMachines {
log := log.WithValues("Machine", klog.KObj(machine))
Expand Down Expand Up @@ -500,10 +526,11 @@ func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1.
gv := clusterv1.GroupVersion
machine := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("%s-", machineSet.Name),
GenerateName: fmt.Sprintf("%s-", machineSet.Name),
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)},
Namespace: machineSet.Namespace,
Labels: machineSet.Spec.Template.Labels,
Labels: make(map[string]string),
Annotations: machineSet.Spec.Template.Annotations,
},
TypeMeta: metav1.TypeMeta{
Expand All @@ -513,9 +540,24 @@ func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1.
Spec: machineSet.Spec.Template.Spec,
}
machine.Spec.ClusterName = machineSet.Spec.ClusterName
if machine.Labels == nil {
machine.Labels = make(map[string]string)

// Set the labels from machineSet.Spec.Template.Labels as labels for the new Machine.
// Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels
// map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the
// MachineSetLabelName and MachineDeploymentLabelName later on the Machine would also add the labels
// to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet.
for k, v := range machineSet.Spec.Template.Labels {
machine.Labels[k] = v
}

// Enforce that the MachineSetLabelName label is set
// Note: the MachineSetLabelName is added by the default webhook to MachineSet.spec.template.labels if a spec.selector is empty.
machine.Labels[clusterv1.MachineSetLabelName] = machineSet.Name
// Propagate the MachineDeploymentLabelName from MachineSet to Machine if it exists.
if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok {
machine.Labels[clusterv1.MachineDeploymentLabelName] = mdName
}

return machine
}

Expand Down Expand Up @@ -652,10 +694,10 @@ func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
return false
}

// If the MachineSet is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone MachineSet.
// If the MachineSet is originated by a MachineDeployment object, it should not be adopted directly by the Cluster as a stand-alone MachineSet.
// Note: this is required because after restore from a backup both the MachineSet controller and the
// MachineDeployment controller are racing to adopt MachineSets, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
if _, ok := ms.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
if _, ok := ms.Labels[clusterv1.MachineDeploymentLabelName]; ok {
return false
}
return true
Expand Down

0 comments on commit 7a5b3e8

Please sign in to comment.