Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract etcd membership validation into a separate phase #770

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions action/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func NewApply(opts ApplyOptions) *Apply {
&phase.ValidateHosts{},
&phase.GatherK0sFacts{},
&phase.ValidateFacts{SkipDowngradeCheck: opts.DisableDowngradeCheck},
&phase.ValidateEtcdMembers{},

// if UploadBinaries: true
&phase.DownloadBinaries{}, // downloads k0s binaries to local cache
Expand Down
90 changes: 90 additions & 0 deletions phase/validate_etcd_members.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package phase

import (
"fmt"
"slices"

"github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1"
"github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster"
log "github.com/sirupsen/logrus"
)

// ValidateEtcdMembers checks for existing etcd members with the same IP as a new controller
type ValidateEtcdMembers struct {
GenericPhase
hosts cluster.Hosts
}

// Title for the phase
func (p *ValidateEtcdMembers) Title() string {
return "Validate etcd members"
}

// Prepare the phase
func (p *ValidateEtcdMembers) Prepare(config *v1beta1.Cluster) error {
p.Config = config
p.hosts = p.Config.Spec.Hosts.Controllers().Filter(func(h *cluster.Host) bool {
return h.Metadata.K0sRunningVersion == nil // only check new controllers
})

return nil
}

// ShouldRun is true when there are new controllers and etcd
func (p *ValidateEtcdMembers) ShouldRun() bool {
if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil {
log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader())
return false
}

if p.Config.Spec.K0sLeader().Role == "single" {
log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader())
return false
}

if len(p.Config.Spec.K0s.Config) > 0 {
storageType := p.Config.Spec.K0s.Config.DigString("spec", "storage", "type")
if storageType != "" && storageType != "etcd" {
log.Debugf("%s: storage type is %q, not k0s managed etcd", p.Config.Spec.K0sLeader(), storageType)
return false
}
}
return len(p.hosts) > 0
}

// Run the phase
func (p *ValidateEtcdMembers) Run() error {
if err := p.validateControllerSwap(); err != nil {
return err
}

return nil
}

func (p *ValidateEtcdMembers) validateControllerSwap() error {
if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) {
log.Warnf("there are more etcd members in the cluster than controllers listed in the configuration")
}

for _, h := range p.hosts {
log.Debugf("%s: host is new, checking if etcd members list already contains %s", h, h.PrivateAddress)
if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) {
if Force {
log.Infof("%s: force used, running 'k0s etcd leave' for the host", h)
leader := p.Config.Spec.K0sLeader()
leaveCommand := leader.Configurer.K0sCmdf("etcd leave --peer-address %s", h.PrivateAddress)
err := p.Wet(h, fmt.Sprintf("remove host from etcd using %v", leaveCommand), func() error {
return leader.Exec(leaveCommand)
})
if err != nil {
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. attempted etcd leave for the address %s but it failed: %w", h, h.PrivateAddress, err)
}
continue
}
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. check the host and use `k0s etcd leave --peer-address %s on a controller or re-run apply with --force", h, h.PrivateAddress)
}
log.Debugf("%s: no match, assuming its safe to install", h)
}

return nil
}
49 changes: 0 additions & 49 deletions phase/validate_facts.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package phase

import (
"fmt"
"slices"

log "github.com/sirupsen/logrus"
)
Expand All @@ -28,10 +27,6 @@ func (p *ValidateFacts) Run() error {
return err
}

if err := p.validateControllerSwap(); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -74,47 +69,3 @@ func (p *ValidateFacts) validateDefaultVersion() error {

return nil
}

func (p *ValidateFacts) validateControllerSwap() error {
log.Debugf("validating controller list vs etcd member list")
if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil {
log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader())
return nil
}

if p.Config.Spec.K0sLeader().Role == "single" {
log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader())
return nil
}

if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) {
log.Warnf("there are more etcd members in the cluster than controllers listed in the k0sctl configuration")
}

for _, h := range p.Config.Spec.Hosts.Controllers() {
if h.Metadata.K0sRunningVersion != nil {
log.Debugf("%s: host has k0s running, no need to check if it was replaced", h)
continue
}

log.Debugf("%s: host is new, checking if etcd members list already contains %s", h, h.PrivateAddress)
if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) {
if Force {
log.Infof("%s: force used, running 'k0s etcd leave' for the host", h)
leader := p.Config.Spec.K0sLeader()
leaveCommand := leader.Configurer.K0sCmdf("etcd leave --peer-address %s", h.PrivateAddress)
err := p.Wet(h, fmt.Sprintf("remove host from etcd using %v", leaveCommand), func() error {
return leader.Exec(leaveCommand)
})
if err != nil {
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. attempted etcd leave for the address %s but it failed: %w", h, h.PrivateAddress, err)
}
continue
}
return fmt.Errorf("controller %s is listed as an existing etcd member but k0s is not found installed on it, the host may have been replaced. check the host and use `k0s etcd leave --peer-address %s on a controller or re-run apply with --force", h, h.PrivateAddress)
}
log.Debugf("%s: no match, assuming its safe to install", h)
}

return nil
}
Loading