From 61f5c5ce403f576d1fa9ec46dd926304e8391c5f Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 17 Nov 2023 01:30:49 +0000 Subject: [PATCH 1/4] Reorder snapshot configmap reconcile to reduce log spew during initial startup Signed-off-by: Brad Davidson --- pkg/etcd/snapshot_controller.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/etcd/snapshot_controller.go b/pkg/etcd/snapshot_controller.go index a21da8481719..b5e3be32251a 100644 --- a/pkg/etcd/snapshot_controller.go +++ b/pkg/etcd/snapshot_controller.go @@ -160,21 +160,6 @@ func (e *etcdSnapshotHandler) onRemove(key string, esf *apisv1.ETCDSnapshotFile) } func (e *etcdSnapshotHandler) reconcile() error { - logrus.Infof("Reconciling snapshot ConfigMap data") - - snapshotConfigMap, err := e.configmaps.Get(metav1.NamespaceSystem, snapshotConfigMapName, metav1.GetOptions{}) - if err != nil { - if !apierrors.IsNotFound(err) { - return errors.Wrap(err, "failed to get snapshot ConfigMap") - } - snapshotConfigMap = &v1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: snapshotConfigMapName, - Namespace: metav1.NamespaceSystem, - }, - } - } - // Get a list of all etcd nodes currently in the cluster. // We will use this list to prune local entries for any node that does not exist. nodes := e.etcd.config.Runtime.Core.Core().V1().Node() @@ -202,6 +187,8 @@ func (e *etcdSnapshotHandler) reconcile() error { return errNotReconciled } + logrus.Infof("Reconciling snapshot ConfigMap data") + // Get a list of existing snapshots snapshotList, err := e.snapshots.List(metav1.ListOptions{}) if err != nil { @@ -219,6 +206,19 @@ func (e *etcdSnapshotHandler) reconcile() error { snapshots[sfKey] = esf } + snapshotConfigMap, err := e.configmaps.Get(metav1.NamespaceSystem, snapshotConfigMapName, metav1.GetOptions{}) + if err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrap(err, "failed to get snapshot ConfigMap") + } + snapshotConfigMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotConfigMapName, + Namespace: metav1.NamespaceSystem, + }, + } + } + // Make a copy of the configmap for change detection existing := snapshotConfigMap.DeepCopyObject() From 7ea967d0d5a77120a0cacce476aca6e5918cf384 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 17 Nov 2023 01:52:21 +0000 Subject: [PATCH 2/4] Bump dynamiclistener to fix secret sync race Signed-off-by: Brad Davidson --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 232d1763a6a2..09fead60e9ed 100644 --- a/go.mod +++ b/go.mod @@ -123,8 +123,8 @@ require ( github.com/opencontainers/selinux v1.11.0 github.com/otiai10/copy v1.7.0 github.com/pkg/errors v0.9.1 - github.com/rancher/dynamiclistener v0.3.6-rc2 - github.com/rancher/lasso v0.0.0-20230629200414-8a54b32e6792 + github.com/rancher/dynamiclistener v0.3.6 + github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 github.com/rancher/remotedialer v0.3.0 github.com/rancher/wharfie v0.5.3 github.com/rancher/wrangler v1.1.1 diff --git a/go.sum b/go.sum index aba28c435406..66e964dea64a 100644 --- a/go.sum +++ b/go.sum @@ -980,10 +980,10 @@ github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0ua github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/rancher/dynamiclistener v0.3.6-rc2 h1:Y1nai+Xv+4qqlB3c+hmrY2uBo1EcCDU9kmN5hbnmZhA= -github.com/rancher/dynamiclistener v0.3.6-rc2/go.mod h1:wOh62hdJIgyqTdD/VAHO77UPKAbUsJJ5gYRjzgBL3Wo= -github.com/rancher/lasso v0.0.0-20230629200414-8a54b32e6792 h1:IaPhDqppVYX2v/nCR8j2i0nqOLD5yggzzy39QUlcqDw= -github.com/rancher/lasso v0.0.0-20230629200414-8a54b32e6792/go.mod h1:dNcwXjcqgdOuKFIVETNAPURRh3e5PAi/nWUjj+MLVZA= +github.com/rancher/dynamiclistener v0.3.6 h1:iAFWeiFNra6tYlt4k+jINrK3hOxZ8mjW2S/9nA6sxKs= +github.com/rancher/dynamiclistener v0.3.6/go.mod h1:VqBaJNi+bZmre0+gi+2Jb6jbn7ovHzRueW+M7QhVKsk= +github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 h1:+kige/h8/LnzWgPjB5NUIHz/pWiW/lFpqcTUkN5uulY= +github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29/go.mod h1:kgk9kJVMj9FIrrXU0iyM6u/9Je4bEjPImqswkTVaKsQ= github.com/rancher/remotedialer v0.3.0 h1:y1EO8JCsgZo0RcqTUp6U8FXcBAv27R+TLnWRcpvX1sM= github.com/rancher/remotedialer v0.3.0/go.mod h1:BwwztuvViX2JrLLUwDlsYt5DiyUwHLlzynRwkZLAY0Q= github.com/rancher/wharfie v0.5.3 h1:6hiO26H7YTgChbLAE6JppxFRjaH3tbKfMItv/LqV0Q0= From 9a434b6510fa206e7741c50169f25eb10c91a99b Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 17 Nov 2023 18:54:19 +0000 Subject: [PATCH 3/4] Revert e2e pipeline depends_on change Reverts part of the change from 7d38b4a3dbd335c155836c4413987589a64e6f3c Pipeline dependencies are apparently broken, and the e2e pipeline has been getting skipped whenever any other pipeline fails. Ex: https://drone-pr.k3s.io/k3s-io/k3s/7853 Signed-off-by: Brad Davidson --- .drone.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.drone.yml b/.drone.yml index 340cdfaf66cd..4f7e0746775e 100644 --- a/.drone.yml +++ b/.drone.yml @@ -582,9 +582,6 @@ platform: clone: retries: 3 -depends_on: -- amd64 - steps: - name: build-e2e-image image: rancher/dapper:v0.5.0 From cf7c788ea73399edd669e11c9aa03a9fa43f6764 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 17 Nov 2023 20:02:42 +0000 Subject: [PATCH 4/4] Fix flakey dynamic-cert.json in cert rotation e2e test Signed-off-by: Brad Davidson --- .../validatecluster/validatecluster_test.go | 50 +++++++++---------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/tests/e2e/validatecluster/validatecluster_test.go b/tests/e2e/validatecluster/validatecluster_test.go index b76d9e5a3ec1..2f77ad9156e1 100644 --- a/tests/e2e/validatecluster/validatecluster_test.go +++ b/tests/e2e/validatecluster/validatecluster_test.go @@ -305,9 +305,8 @@ var _ = Describe("Verify Create", Ordered, func() { for _, nodeName := range serverNodeNames { cmd := "k3s certificate rotate" - if _, err := e2e.RunCmdOnNode(cmd, nodeName); err != nil { - Expect(err).NotTo(HaveOccurred(), "Certificate could not be rotated successfully") - } + _, err := e2e.RunCmdOnNode(cmd, nodeName) + Expect(err).NotTo(HaveOccurred(), "Certificate could not be rotated successfully on "+nodeName) } }) @@ -320,12 +319,11 @@ var _ = Describe("Verify Create", Ordered, func() { Expect(err).NotTo(HaveOccurred(), "Cluster could not be started successfully") Eventually(func(g Gomega) { - nodes, err := e2e.ParseNodes(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, node := range nodes { - g.Expect(node.Status).Should(Equal("Ready")) + for _, nodeName := range serverNodeNames { + cmd := "test ! -e /var/lib/rancher/k3s/server/tls/dynamic-cert-regenerate" + _, err := e2e.RunCmdOnNode(cmd, nodeName) + Expect(err).NotTo(HaveOccurred(), "Dynamic cert regenerate file not removed on "+nodeName) } - fmt.Println("help") }, "620s", "5s").Should(Succeed()) Eventually(func(g Gomega) { @@ -340,41 +338,39 @@ var _ = Describe("Verify Create", Ordered, func() { } }, "620s", "5s").Should(Succeed()) }) + It("Validates certificates", func() { const grepCert = "ls -lt /var/lib/rancher/k3s/server/ | grep tls" - var expectResult = []string{"client-ca.crt", - "client-ca.key", - "client-ca.nochain.crt", + // This is a list of files that should be IDENTICAL after certificates are rotated. + // Everything else should be changed. + var expectResult = []string{ + "client-ca.crt", "client-ca.key", "client-ca.nochain.crt", "client-supervisor.crt", "client-supervisor.key", - "dynamic-cert.json", "peer-ca.crt", - "peer-ca.key", "server-ca.crt", - "server-ca.key", "request-header-ca.crt", - "request-header-ca.key", "server-ca.crt", - "server-ca.key", "server-ca.nochain.crt", + "peer-ca.crt", "peer-ca.key", + "server-ca.crt", "server-ca.key", + "request-header-ca.crt", "request-header-ca.key", + "server-ca.crt", "server-ca.key", "server-ca.nochain.crt", "service.current.key", "service.key", "apiserver-loopback-client__.crt", "apiserver-loopback-client__.key", "", } - var finalResult string - var finalErr error for _, nodeName := range serverNodeNames { grCert, errGrep := e2e.RunCmdOnNode(grepCert, nodeName) - Expect(errGrep).NotTo(HaveOccurred(), "Certificate could not be created successfully") + Expect(errGrep).NotTo(HaveOccurred(), "TLS dirs could not be listed on "+nodeName) re := regexp.MustCompile("tls-[0-9]+") tls := re.FindAllString(grCert, -1)[0] - final := fmt.Sprintf("diff -sr /var/lib/rancher/k3s/server/tls/ /var/lib/rancher/k3s/server/%s/"+ + diff := fmt.Sprintf("diff -sr /var/lib/rancher/k3s/server/tls/ /var/lib/rancher/k3s/server/%s/"+ "| grep -i identical | cut -f4 -d ' ' | xargs basename -a \n", tls) - finalResult, finalErr = e2e.RunCmdOnNode(final, nodeName) - Expect(finalErr).NotTo(HaveOccurred(), "Final Certification does not created successfully") + result, err := e2e.RunCmdOnNode(diff, nodeName) + Expect(err).NotTo(HaveOccurred(), "Certificate diff not created successfully on "+nodeName) + + certArray := strings.Split(result, "\n") + Expect((certArray)).Should((Equal(expectResult)), "Certificate diff does not match the expected results on "+nodeName) } + errRestartAgent := e2e.RestartCluster(agentNodeNames) Expect(errRestartAgent).NotTo(HaveOccurred(), "Agent could not be restart successfully") - - finalCert := strings.Replace(finalResult, "\n", ",", -1) - finalCertArray := strings.Split(finalCert, ",") - Expect((finalCertArray)).Should((Equal(expectResult)), "Final certification does not match the expected results") - }) })