diff --git a/action/apply.go b/action/apply.go index bfe18a89..e711263f 100644 --- a/action/apply.go +++ b/action/apply.go @@ -61,6 +61,7 @@ func NewApply(opts ApplyOptions) *Apply { &phase.ValidateHosts{}, &phase.GatherK0sFacts{}, &phase.ValidateFacts{SkipDowngradeCheck: opts.DisableDowngradeCheck}, + &phase.ValidateEtcdMembers{}, // if UploadBinaries: true &phase.DownloadBinaries{}, // downloads k0s binaries to local cache diff --git a/phase/validate_etcd_members.go b/phase/validate_etcd_members.go new file mode 100644 index 00000000..06a56148 --- /dev/null +++ b/phase/validate_etcd_members.go @@ -0,0 +1,90 @@ +package phase + +import ( + "fmt" + "slices" + + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1" + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster" + log "github.com/sirupsen/logrus" +) + +// ValidateEtcdMembers checks for existing etcd members with the same IP as a new controller +type ValidateEtcdMembers struct { + GenericPhase + hosts cluster.Hosts +} + +// Title for the phase +func (p *ValidateEtcdMembers) Title() string { + return "Validate etcd members" +} + +// Prepare the phase +func (p *ValidateEtcdMembers) Prepare(config *v1beta1.Cluster) error { + p.Config = config + p.hosts = p.Config.Spec.Hosts.Controllers().Filter(func(h *cluster.Host) bool { + return h.Metadata.K0sRunningVersion == nil // only check new controllers + }) + + return nil +} + +// ShouldRun is true when there are new controllers and etcd +func (p *ValidateEtcdMembers) ShouldRun() bool { + if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil { + log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader()) + return false + } + + if p.Config.Spec.K0sLeader().Role == "single" { + log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader()) + return false + } + + if len(p.Config.Spec.K0s.Config) > 0 { + storageType := p.Config.Spec.K0s.Config.DigString("spec", "storage", "type") + if storageType != "" && storageType != "etcd" { + log.Debugf("%s: storage type is %q, not k0s managed etcd", p.Config.Spec.K0sLeader(), storageType) + return false + } + } + return len(p.hosts) > 0 +} + +// Run the phase +func (p *ValidateEtcdMembers) Run() error { + if err := p.validateControllerSwap(); err != nil { + return err + } + + return nil +} + +func (p *ValidateEtcdMembers) validateControllerSwap() error { + if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) { + log.Warnf("there are more etcd members in the cluster than controllers listed in the configuration") + } + + for _, h := range p.hosts { + log.Debugf("%s: host is new, checking if etcd members list already contains %s", h, h.PrivateAddress) + if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) { + if Force { + log.Infof("%s: force used, running 'k0s etcd leave' for the host", h) + leader := p.Config.Spec.K0sLeader() + leaveCommand := leader.Configurer.K0sCmdf("etcd leave --peer-address %s", h.PrivateAddress) + err := p.Wet(h, fmt.Sprintf("remove host from etcd using %v", leaveCommand), func() error { + return leader.Exec(leaveCommand) + }) + if err != nil { + return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. attempted etcd leave for the address %s but it failed: %w", h, h.PrivateAddress, err) + } + continue + } + return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. check the host and use `k0s etcd leave --peer-address %s on a controller or re-run apply with --force", h, h.PrivateAddress) + } + log.Debugf("%s: no match, assuming its safe to install", h) + } + + return nil +} diff --git a/phase/validate_facts.go b/phase/validate_facts.go index 7434c62b..831929b2 100644 --- a/phase/validate_facts.go +++ b/phase/validate_facts.go @@ -2,7 +2,6 @@ package phase import ( "fmt" - "slices" log "github.com/sirupsen/logrus" ) @@ -28,10 +27,6 @@ func (p *ValidateFacts) Run() error { return err } - if err := p.validateControllerSwap(); err != nil { - return err - } - return nil } @@ -74,47 +69,3 @@ func (p *ValidateFacts) validateDefaultVersion() error { return nil } - -func (p *ValidateFacts) validateControllerSwap() error { - log.Debugf("validating controller list vs etcd member list") - if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil { - log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader()) - return nil - } - - if p.Config.Spec.K0sLeader().Role == "single" { - log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader()) - return nil - } - - if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) { - log.Warnf("there are more etcd members in the cluster than controllers listed in the k0sctl configuration") - } - - for _, h := range p.Config.Spec.Hosts.Controllers() { - if h.Metadata.K0sRunningVersion != nil { - log.Debugf("%s: host has k0s running, no need to check if it was replaced", h) - continue - } - - log.Debugf("%s: host is new, checking if etcd members list already contains %s", h, h.PrivateAddress) - if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) { - if Force { - log.Infof("%s: force used, running 'k0s etcd leave' for the host", h) - leader := p.Config.Spec.K0sLeader() - leaveCommand := leader.Configurer.K0sCmdf("etcd leave --peer-address %s", h.PrivateAddress) - err := p.Wet(h, fmt.Sprintf("remove host from etcd using %v", leaveCommand), func() error { - return leader.Exec(leaveCommand) - }) - if err != nil { - return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. attempted etcd leave for the address %s but it failed: %w", h, h.PrivateAddress, err) - } - continue - } - return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. check the host and use `k0s etcd leave --peer-address %s on a controller or re-run apply with --force", h, h.PrivateAddress) - } - log.Debugf("%s: no match, assuming its safe to install", h) - } - - return nil -}