Skip to content

Commit

Permalink
Add feature flag for controller leader election
Browse files Browse the repository at this point in the history
  • Loading branch information
chiayi committed Mar 24, 2023
1 parent f54f920 commit 15a49a5
Show file tree
Hide file tree
Showing 20 changed files with 1,446 additions and 21 deletions.
2 changes: 1 addition & 1 deletion cmd/allocator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I
totalRemoteAllocationTimeout,
allocationBatchWaitTime)

ctx := signals.NewSigKillContext()
ctx, _ := signals.NewSigKillContext()
h := serviceHandler{
allocationCallback: func(gsa *allocationv1.GameServerAllocation) (k8sruntime.Object, error) {
return allocator.Allocate(ctx, gsa)
Expand Down
87 changes: 76 additions & 11 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"agones.dev/agones/pkg/util/runtime"
"agones.dev/agones/pkg/util/signals"
"agones.dev/agones/pkg/util/webhooks"
"github.com/google/uuid"
"github.com/heptiolabs/healthcheck"
"github.com/pkg/errors"
prom "github.com/prometheus/client_golang/prometheus"
Expand All @@ -50,9 +51,12 @@ import (
corev1 "k8s.io/api/core/v1"
extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
)

const (
Expand Down Expand Up @@ -106,7 +110,7 @@ func setupLogging(logDir string, logSizeLimitMB int) {

// main starts the operator for the gameserver CRD
func main() {
ctx := signals.NewSigKillContext()
ctx, cancel := signals.NewSigKillContext()
ctlConf := parseEnvFlags()

if ctlConf.LogDir != "" {
Expand Down Expand Up @@ -234,19 +238,59 @@ func main() {
rs = append(rs, gasController)
}

kubeInformerFactory.Start(ctx.Done())
agonesInformerFactory.Start(ctx.Done())
// Start of the leader election code
id := uuid.New().String()

lock := &resourcelock.LeaseLock{
LeaseMeta: metav1.ObjectMeta{
Name: "agones-controller",
Namespace: "agones-system",
},
Client: kubeClient.CoordinationV1(),
LockConfig: resourcelock.ResourceLockConfig{
Identity: id,
},
}

logger.Infof("This controller ID is %s", id)

startLeading := func(ctx context.Context) {
logger.Infof("Leader elected: %s", id)

kubeInformerFactory.Start(ctx.Done())
agonesInformerFactory.Start(ctx.Done())

for _, r := range rs {
go func(rr runner) {
if runErr := rr.Run(ctx, ctlConf.NumWorkers); runErr != nil {
logger.WithError(runErr).Fatalf("could not start runner: %T", rr)
}
}(r)
}

for _, r := range rs {
go func(rr runner) {
if runErr := rr.Run(ctx, ctlConf.NumWorkers); runErr != nil {
logger.WithError(runErr).Fatalf("could not start runner: %T", rr)
}
}(r)
<-ctx.Done()
logger.Info("Shut down agones controllers")
}

<-ctx.Done()
logger.Info("Shut down agones controllers")
if !runtime.FeatureEnabled(runtime.FeatureControllerLeaderElection) {
startLeading(ctx)
} else {
runLeaderElection(ctx, lock,
startLeading,
func() {
logger.Infof("leader lost: %s", id)
time.Sleep(10 * time.Second)
cancel()
os.Exit(0)
},
func(identity string) {
if identity == id {
return
}
logger.Infof("new leader elected: %s", identity)
},
)
}
}

func parseEnvFlags() config {
Expand Down Expand Up @@ -433,6 +477,27 @@ type httpServer struct {
http.ServeMux
}

func runLeaderElection(ctx context.Context, lock *resourcelock.LeaseLock, startLeading func(_ context.Context), stopLeading func(), newLeader func(identity string)) {
leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
Lock: lock,
// IMPORTANT: you MUST ensure that any code you have that
// is protected by the lease must terminate **before**
// you call cancel. Otherwise, you could have a background
// loop still running and another process could
// get elected before your background loop finished, violating
// the stated goal of the lease.
ReleaseOnCancel: true,
LeaseDuration: 15 * time.Second,
RenewDeadline: 10 * time.Second,
RetryPeriod: 2 * time.Second,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: startLeading,
OnStoppedLeading: stopLeading,
OnNewLeader: newLeader,
},
})
}

func (h *httpServer) Run(_ context.Context, _ int) error {
logger.Info("Starting http server...")
srv := &http.Server{
Expand Down
1 change: 1 addition & 0 deletions cmd/extensions/pprof.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build profile
// +build profile

package main
Expand Down
2 changes: 1 addition & 1 deletion cmd/ping/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func main() {
logger.WithField("version", pkg.Version).WithField("featureGates", runtime.EncodeFeatures()).
WithField("ctlConf", ctlConf).Info("Starting ping...")

ctx := signals.NewSigKillContext()
ctx, _ := signals.NewSigKillContext()

udpSrv := serveUDP(ctx, ctlConf)
defer udpSrv.close()
Expand Down
2 changes: 1 addition & 1 deletion cmd/sdk-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func main() {
time.Sleep(time.Duration(ctlConf.Delay) * time.Second)
}

ctx := signals.NewSigKillContext()
ctx, _ := signals.NewSigKillContext()

grpcServer := grpc.NewServer()
// don't graceful stop, because if we get a SIGKILL signal
Expand Down
1 change: 1 addition & 0 deletions install/helm/agones/defaultfeaturegates.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ SplitControllerAndExtensions: false

# Pre-Alpha features
CountsAndLists: false
ControllerLeaderElection: false

# Example feature
Example: false
Expand Down
29 changes: 27 additions & 2 deletions install/helm/agones/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,19 @@ spec:
app: {{ template "agones.name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
replicas: 1
{{- if and $featureGates.ControllerLeaderElection (lt .Values.agones.controller.replicas 2) }}
{{- fail "Cannot have less than 2 replicas when leader election is enabled!" }}
{{- end}}
{{- if and not $featureGates.ControllerLeaderElection (ge .Values.agones.controller.replicas 2) }}
{{- fail "Cannot have leader election enabled when replicas is greater than 1!" }}
{{- end}}
replicas: {{ .Values.agones.controller.replicas | quote }}
strategy:
type: Recreate
template:
metadata:
annotations:
{{- if not $featureGates.SplitControllerAndExtensions }}
{{- if or not $featureGates.SplitControllerAndExtensions not $featureGates.ControllerLeaderElection}}
cluster-autoscaler.kubernetes.io/safe-to-evict: {{ .Values.agones.controller.safeToEvict | quote }}
{{- end }}
{{- if .Values.agones.controller.generateTLS }}
Expand Down Expand Up @@ -183,3 +189,22 @@ spec:
imagePullSecrets:
- name: {{.Values.agones.image.controller.pullSecret}}
{{- end }}
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: agones-controller-pdb
spec:
{{- if .Values.agones.extensions.pdb.minAvailable }}
{{- if .Values.agones.extensions.pdb.maxUnavailable }}
{{- fail "minAvailable and maxUnavailable are mutually exclusive!" }}
{{- end}}
{{- end}}
minAvailable: {{ .Values.agones.extensions.pdb.minAvailable }}
maxUnavailable: {{ .Values.agones.extensions.pdb.maxUnavailable }}
selector:
matchLabels:
agones.dev/role: controller
app: {{ template "agones.name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
3 changes: 3 additions & 0 deletions install/helm/agones/templates/serviceaccounts/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ rules:
- apiGroups: ["autoscaling.agones.dev"]
resources: ["fleetautoscalers/status"]
verbs: ["update"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create", "delete", "get", "list", "update", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
1 change: 1 addition & 0 deletions install/helm/agones/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ agones:
failureThreshold: 3
timeoutSeconds: 1
allocationBatchWaitTime: 500ms
replicas: 2
extensions:
<<: *controllerValues
pdb:
Expand Down
8 changes: 6 additions & 2 deletions pkg/util/runtime/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ const (
// (a generic implenetation of the player tracking feature).
FeatureCountsAndLists Feature = "CountsAndLists"

// FeatureControllerLeaderElection is a feature flag that enables/disables leader election for controller
FeatureControllerLeaderElection = "ControllerLeaderElection"

////////////////
// Example feature

Expand Down Expand Up @@ -122,8 +125,9 @@ var (
FeatureSplitControllerAndExtensions: false,

// Pre-Alpha features
FeatureCountsAndLists: false,
FeatureFleetAllocateOverflow: false,
FeatureCountsAndLists: false,
FeatureFleetAllocateOverflow: false,
FeatureControllerLeaderElection: false,

// Example feature
FeatureExample: false,
Expand Down
7 changes: 4 additions & 3 deletions pkg/util/signals/signals.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ import (
)

// NewSigKillContext returns a Context that cancels when os.Interrupt or os.Kill is received
func NewSigKillContext() context.Context {
ctx, _ := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
return ctx
func NewSigKillContext() (context.Context, context.CancelFunc) {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)

return ctx, cancel
}

// NewSigTermHandler creates a channel to listen to SIGTERM and runs the handle function
Expand Down
11 changes: 11 additions & 0 deletions vendor/k8s.io/client-go/tools/leaderelection/OWNERS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

69 changes: 69 additions & 0 deletions vendor/k8s.io/client-go/tools/leaderelection/healthzadaptor.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 15a49a5

Please sign in to comment.