From 7a5b3e88a525ce5213a0234ca989d435717a4da5 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Tue, 22 Nov 2022 20:17:04 +0700 Subject: [PATCH] Improve Machine adoption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: fabriziopandini Signed-off-by: Stefan Büringer buringerst@vmware.com --- api/v1beta1/machine_types.go | 3 ++ .../kubeadm/internal/cluster_labels.go | 1 + .../internal/controllers/controller.go | 13 +++++ .../kubeadm/internal/controllers/helpers.go | 1 + .../internal/controllers/helpers_test.go | 5 ++ .../controllers/machine/machine_controller.go | 14 +++-- .../machinedeployment_controller.go | 23 ++++++++ .../machinedeployment_controller_test.go | 23 ++++---- .../machinedeployment_sync.go | 19 +++++-- .../machinedeployment/mdutil/util.go | 4 +- .../machineset/machineset_controller.go | 54 ++++++++++++++++--- 11 files changed, 136 insertions(+), 24 deletions(-) diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index 7219c126cde6..48448db4d928 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -42,6 +42,9 @@ const ( // MachineDeploymentLabelName is the label set on machines if they're controlled by MachineDeployment. MachineDeploymentLabelName = "cluster.x-k8s.io/deployment-name" + // MachineControlPlaneNameLabel is the label set on machines if they're controlled by a ControlPlane. + MachineControlPlaneNameLabel = "cluster.x-k8s.io/control-plane-name" + // PreDrainDeleteHookAnnotationPrefix annotation specifies the prefix we // search each annotation for during the pre-drain.delete lifecycle hook // to pause reconciliation of deletion. These hooks will prevent removal of diff --git a/controlplane/kubeadm/internal/cluster_labels.go b/controlplane/kubeadm/internal/cluster_labels.go index cca11a27a354..288bfad2afa5 100644 --- a/controlplane/kubeadm/internal/cluster_labels.go +++ b/controlplane/kubeadm/internal/cluster_labels.go @@ -34,5 +34,6 @@ func ControlPlaneMachineLabelsForCluster(kcp *controlplanev1.KubeadmControlPlane // Always force these labels over the ones coming from the spec. labels[clusterv1.ClusterLabelName] = clusterName labels[clusterv1.MachineControlPlaneLabelName] = "" + labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name return labels } diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index a93fda08e739..12478be9fe4c 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -329,6 +329,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) + // Ensure all required labels exist on the controlled Machines. + // This logic is needed to add the `cluster.x-k8s.io/control-plane-name` label to Machines + // which were created before the `cluster.x-k8s.io/control-plane-name` label was introduced. + // NOTE: Changes will be applied to the Machines in reconcileControlPlaneConditions. + // NOTE: cluster.x-k8s.io/control-plane is already set at this stage (it is used when reading controlPlane.Machines). + // TODO(sbueringer): Drop the following code with v1.4 after all existing Machines are guaranteed to have the new label. + for i := range controlPlane.Machines { + machine := controlPlane.Machines[i] + if value, ok := machine.Labels[clusterv1.MachineControlPlaneNameLabel]; !ok || value != kcp.Name { + machine.Labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name + } + } + // Updates conditions reporting the status of static pods and the status of the etcd cluster. // NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution. if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() { diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go index e1e800555215..ea37bfef2c68 100644 --- a/controlplane/kubeadm/internal/controllers/helpers.go +++ b/controlplane/kubeadm/internal/controllers/helpers.go @@ -275,6 +275,7 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp Namespace: kcp.Namespace, Labels: internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name), Annotations: map[string]string{}, + // Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine. OwnerReferences: []metav1.OwnerReference{ *metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")), }, diff --git a/controlplane/kubeadm/internal/controllers/helpers_test.go b/controlplane/kubeadm/internal/controllers/helpers_test.go index 5b8a132d8e2c..51a9f40bf333 100644 --- a/controlplane/kubeadm/internal/controllers/helpers_test.go +++ b/controlplane/kubeadm/internal/controllers/helpers_test.go @@ -549,6 +549,10 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { for k, v := range kcpMachineTemplateObjectMeta.Labels { g.Expect(machine.Labels[k]).To(Equal(v)) } + g.Expect(machine.Labels[clusterv1.ClusterLabelName]).To(Equal(cluster.Name)) + g.Expect(machine.Labels[clusterv1.MachineControlPlaneLabelName]).To(Equal("")) + g.Expect(machine.Labels[clusterv1.MachineControlPlaneNameLabel]).To(Equal(kcp.Name)) + for k, v := range kcpMachineTemplateObjectMeta.Annotations { g.Expect(machine.Annotations[k]).To(Equal(v)) } @@ -556,6 +560,7 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { // Verify that machineTemplate.ObjectMeta in KCP has not been modified. g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.ClusterLabelName)) g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneLabelName)) + g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneNameLabel)) g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).NotTo(HaveKey(controlplanev1.KubeadmClusterConfigurationAnnotation)) } diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index bfdddefeccf9..c1c19f650208 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -760,10 +760,16 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool { return false } - // If the Machine is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone Machine. - // Note: this is required because after restore from a backup both the Machine controller and the - // MachineSet controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 - if _, ok := m.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok { + // Note: following checks are required because after restore from a backup both the Machine controller and the + // MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 + + // If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine. + if _, ok := m.Labels[clusterv1.MachineSetLabelName]; ok { + return false + } + + // If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine. + if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok { return false } return true diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index 4118c422e383..57b4707e9eef 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -200,6 +200,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, d.Labels[clusterv1.ClusterLabelName] = d.Spec.ClusterName + // Set the MachineDeployment as directly owned by the Cluster (if not already present). if r.shouldAdopt(d) { d.OwnerReferences = util.EnsureOwnerRef(d.OwnerReferences, metav1.OwnerReference{ APIVersion: clusterv1.GroupVersion.String(), @@ -226,6 +227,28 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, return ctrl.Result{}, err } + // If not already present, add a label specifying the MachineDeployment name to MachineSets. + // Ensure all required labels exist on the controlled MachineSets. + // This logic is needed to add the `cluster.x-k8s.io/deployment-name` label to MachineSets + // which were created before the `cluster.x-k8s.io/deployment-name` label was added + // to all MachineSets created by a MachineDeployment. + // TODO(sbueringer): Drop the following code with v1.4 after all existing MachineSets are guaranteed to have the new label. + for idx := range msList { + machineSet := msList[idx] + if name, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok && name == d.Name { + continue + } + + helper, err := patch.NewHelper(machineSet, r.Client) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name) + } + machineSet.Labels[clusterv1.MachineDeploymentLabelName] = d.Name + if err := helper.Patch(ctx, machineSet); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name) + } + } + if d.Spec.Paused { return ctrl.Result{}, r.sync(ctx, d, msList) } diff --git a/internal/controllers/machinedeployment/machinedeployment_controller_test.go b/internal/controllers/machinedeployment/machinedeployment_controller_test.go index 5ac3a3ff2263..40df6c3e3408 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller_test.go @@ -203,8 +203,16 @@ func TestMachineDeploymentReconciler(t *testing.T) { }) }, timeout).Should(BeTrue()) - // Verify that expected number of machines are created - t.Log("Verify expected number of machines are created") + t.Log("Verify MachineSet has expected replicas and version") + firstMachineSet := machineSets.Items[0] + g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2)) + g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3")) + + t.Log("Verify MachineSet has expected ClusterLabelName and MachineDeploymentLabelName") + g.Expect(firstMachineSet.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name)) + g.Expect(firstMachineSet.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name)) + + t.Log("Verify expected number of Machines are created") machines := &clusterv1.MachineList{} g.Eventually(func() int { if err := env.List(ctx, machines, client.InNamespace(namespace.Name)); err != nil { @@ -213,16 +221,13 @@ func TestMachineDeploymentReconciler(t *testing.T) { return len(machines.Items) }, timeout).Should(BeEquivalentTo(*deployment.Spec.Replicas)) - // Verify that machines has MachineSetLabelName and MachineDeploymentLabelName labels - t.Log("Verify machines have expected MachineSetLabelName and MachineDeploymentLabelName") + t.Log("Verify Machines have expected ClusterLabelName, MachineDeploymentLabelName and MachineSetLabelName") for _, m := range machines.Items { g.Expect(m.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name)) + g.Expect(m.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name)) + g.Expect(m.Labels[clusterv1.MachineSetLabelName]).To(Equal(firstMachineSet.Name)) } - firstMachineSet := machineSets.Items[0] - g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2)) - g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3")) - // // Delete firstMachineSet and expect Reconcile to be called to replace it. // @@ -348,7 +353,7 @@ func TestMachineDeploymentReconciler(t *testing.T) { clusterv1.ClusterLabelName: testCluster.Name, } - t.Log("Updating MachineDeployment label") + t.Log("Updating MachineDeployment labels") modifyFunc = func(d *clusterv1.MachineDeployment) { d.Spec.Selector.MatchLabels = newLabels d.Spec.Template.Labels = newLabels diff --git a/internal/controllers/machinedeployment/machinedeployment_sync.go b/internal/controllers/machinedeployment/machinedeployment_sync.go index 526629a1a2e8..459cb999f763 100644 --- a/internal/controllers/machinedeployment/machinedeployment_sync.go +++ b/internal/controllers/machinedeployment/machinedeployment_sync.go @@ -162,9 +162,10 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD newMS := clusterv1.MachineSet{ ObjectMeta: metav1.ObjectMeta{ // Make the name deterministic, to ensure idempotence - Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash), - Namespace: d.Namespace, - Labels: newMSTemplate.Labels, + Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash), + Namespace: d.Namespace, + Labels: make(map[string]string), + // Note: by setting the ownerRef on creation we signal to the MachineSet controller that this is not a stand-alone MachineSet. OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, machineDeploymentKind)}, }, Spec: clusterv1.MachineSetSpec{ @@ -176,6 +177,18 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD }, } + // Set the labels from newMSTemplate as top-level labels for the new MS. + // Note: We can't just set `newMSTemplate.Labels` directly and thus "share" the labels map between top-level and + // .spec.template.metadata.labels. This would mean that adding the MachineDeploymentLabelName later top-level + // would also add the label to .spec.template.metadata.labels. + for k, v := range newMSTemplate.Labels { + newMS.Labels[k] = v + } + + // Enforce that the MachineDeploymentLabelName label is set + // Note: the MachineDeploymentLabelName is added by the default webhook to MachineDeployment.spec.template.labels if spec.selector is empty. + newMS.Labels[clusterv1.MachineDeploymentLabelName] = d.Name + if d.Spec.Strategy.RollingUpdate.DeletePolicy != nil { newMS.Spec.DeletePolicy = *d.Spec.Strategy.RollingUpdate.DeletePolicy } diff --git a/internal/controllers/machinedeployment/mdutil/util.go b/internal/controllers/machinedeployment/mdutil/util.go index 8fddb9ba5b63..7f05f733d36d 100644 --- a/internal/controllers/machinedeployment/mdutil/util.go +++ b/internal/controllers/machinedeployment/mdutil/util.go @@ -410,9 +410,9 @@ func FindNewMachineSet(deployment *clusterv1.MachineDeployment, msList []*cluste return nil } -// FindOldMachineSets returns the old machine sets targeted by the given Deployment, with the given slice of MSes. +// FindOldMachineSets returns the old machine sets targeted by the given Deployment, within the given slice of MSes. // Returns two list of machine sets -// - the first contains all old machine sets with all non-zero replicas +// - the first contains all old machine sets with non-zero replicas // - the second contains all old machine sets func FindOldMachineSets(deployment *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) ([]*clusterv1.MachineSet, []*clusterv1.MachineSet) { var requiredMSs []*clusterv1.MachineSet diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index ab4940265378..26f8b6eba4d1 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -289,6 +289,32 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, filteredMachines = append(filteredMachines, machine) } + // If not already present, add a label specifying the MachineSet name to Machines. + // Ensure all required labels exist on the controlled Machines. + // This logic is needed to add the `cluster.x-k8s.io/set-name` label to Machines + // which were created before the `cluster.x-k8s.io/set-name` label was added to + // all Machines created by a MachineSet. + // TODO(sbueringer): Drop the following code with v1.4 after all existing Machines are guaranteed to have the new label. + for _, machine := range filteredMachines { + if name, ok := machine.Labels[clusterv1.MachineSetLabelName]; ok && name == machineSet.Name { + continue + } + + helper, err := patch.NewHelper(machine, r.Client) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetLabelName, machine.Name) + } + machine.Labels[clusterv1.MachineSetLabelName] = machineSet.Name + // Propagate the MachineDeploymentLabelName from MachineSet to Machine if it exists. + if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok { + machine.Labels[clusterv1.MachineDeploymentLabelName] = mdName + } + if err := helper.Patch(ctx, machine); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetLabelName, machine.Name) + } + } + + // Remediate failed Machines by deleting them. var errs []error for _, machine := range filteredMachines { log := log.WithValues("Machine", klog.KObj(machine)) @@ -500,10 +526,11 @@ func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1. gv := clusterv1.GroupVersion machine := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ - GenerateName: fmt.Sprintf("%s-", machineSet.Name), + GenerateName: fmt.Sprintf("%s-", machineSet.Name), + // Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine. OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)}, Namespace: machineSet.Namespace, - Labels: machineSet.Spec.Template.Labels, + Labels: make(map[string]string), Annotations: machineSet.Spec.Template.Annotations, }, TypeMeta: metav1.TypeMeta{ @@ -513,9 +540,24 @@ func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1. Spec: machineSet.Spec.Template.Spec, } machine.Spec.ClusterName = machineSet.Spec.ClusterName - if machine.Labels == nil { - machine.Labels = make(map[string]string) + + // Set the labels from machineSet.Spec.Template.Labels as labels for the new Machine. + // Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels + // map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the + // MachineSetLabelName and MachineDeploymentLabelName later on the Machine would also add the labels + // to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet. + for k, v := range machineSet.Spec.Template.Labels { + machine.Labels[k] = v } + + // Enforce that the MachineSetLabelName label is set + // Note: the MachineSetLabelName is added by the default webhook to MachineSet.spec.template.labels if a spec.selector is empty. + machine.Labels[clusterv1.MachineSetLabelName] = machineSet.Name + // Propagate the MachineDeploymentLabelName from MachineSet to Machine if it exists. + if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok { + machine.Labels[clusterv1.MachineDeploymentLabelName] = mdName + } + return machine } @@ -652,10 +694,10 @@ func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool { return false } - // If the MachineSet is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone MachineSet. + // If the MachineSet is originated by a MachineDeployment object, it should not be adopted directly by the Cluster as a stand-alone MachineSet. // Note: this is required because after restore from a backup both the MachineSet controller and the // MachineDeployment controller are racing to adopt MachineSets, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 - if _, ok := ms.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok { + if _, ok := ms.Labels[clusterv1.MachineDeploymentLabelName]; ok { return false } return true