From 1d3d9953ffe739889a47123cfd27a49af527db59 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 23 Jan 2019 12:55:02 -0800 Subject: [PATCH] improved isolation of Agones controllers using taints and priority Added high priority class called "agones-system", which defines the priority at which both controller and ping service run. This causes them to be scheduled before any game server pods are scheduled. Also added default affinity and tolerations to helm values: Agones Controller and Ping will prefer (but not require) nodes labeled with "stable.agones.dev/agones-system: true". They will also tolerate taint "stable.agones.dev/agones-system=true:NoExecute". With those two mechanisms in place, isolating Agones controller should be as simple as creating dedicated node pool with appropriate annotations/labels: ``` gcloud container node-pools create agones-system ... \ --node-taints stable.agones.dev/agones-system=true:NoExecute \ --node-labels stable.agones.dev/agones-system=true ``` Observe how pods are scheduled on 'agones-system' node pool. ``` $ kubectl get pod -n agones-system -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE agones-controller-cd857555b-zgpqw 1/1 Running 0 34s 10.8.5.6 gke-agones-scale-agones-system-b36e72f2-7ks0 agones-ping-76999c8cc9-9nghq 1/1 Running 0 42s 10.8.4.3 gke-agones-scale-agones-system-823b885e-15nz agones-ping-76999c8cc9-jhjq6 1/1 Running 0 39s 10.8.5.5 gke-agones-scale-agones-system-b36e72f2-7ks0 ``` --- build/gke-test-cluster/cluster.yml.jinja | 15 +++++++ install/helm/agones/templates/controller.yaml | 1 + install/helm/agones/templates/ping.yaml | 1 + .../helm/agones/templates/priority-class.yaml | 7 ++++ install/helm/agones/values.yaml | 33 ++++++++++++--- install/yaml/install.yaml | 42 +++++++++++++++++++ site/content/en/docs/Installation/helm.md | 16 +++++++ 7 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 install/helm/agones/templates/priority-class.yaml diff --git a/build/gke-test-cluster/cluster.yml.jinja b/build/gke-test-cluster/cluster.yml.jinja index 792ef30f28..48af537274 100644 --- a/build/gke-test-cluster/cluster.yml.jinja +++ b/build/gke-test-cluster/cluster.yml.jinja @@ -34,6 +34,21 @@ resources: - https://www.googleapis.com/auth/devstorage.read_only - https://www.googleapis.com/auth/logging.write - https://www.googleapis.com/auth/monitoring + - name: "agones-system" + initialNodeCount: 1 + config: + machineType: n1-standard-2 + oauthScopes: + - https://www.googleapis.com/auth/compute + - https://www.googleapis.com/auth/devstorage.read_only + - https://www.googleapis.com/auth/logging.write + - https://www.googleapis.com/auth/monitoring + labels: + stable.agones.dev/agones-system: "true" + taints: + - key: stable.agones.dev/agones-system + value: "true" + effect: "NO_EXECUTE" masterAuth: username: admin password: supersecretpassword diff --git a/install/helm/agones/templates/controller.yaml b/install/helm/agones/templates/controller.yaml index c96481f80c..b71337f104 100644 --- a/install/helm/agones/templates/controller.yaml +++ b/install/helm/agones/templates/controller.yaml @@ -63,6 +63,7 @@ spec: tolerations: {{ toYaml .Values.agones.controller.tolerations | indent 8 }} {{- end }} + priorityClassName: {{ .Values.agones.priorityClassName }} serviceAccountName: {{ .Values.agones.serviceaccount.controller }} containers: - name: agones-controller diff --git a/install/helm/agones/templates/ping.yaml b/install/helm/agones/templates/ping.yaml index 33e6dbe395..3d6f8ada04 100644 --- a/install/helm/agones/templates/ping.yaml +++ b/install/helm/agones/templates/ping.yaml @@ -52,6 +52,7 @@ spec: tolerations: {{ toYaml .Values.agones.ping.tolerations | indent 8 }} {{- end }} + priorityClassName: {{ .Values.agones.priorityClassName }} containers: - name: agones-ping image: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.ping.name}}:{{ .Values.agones.image.tag }}" diff --git a/install/helm/agones/templates/priority-class.yaml b/install/helm/agones/templates/priority-class.yaml new file mode 100644 index 0000000000..84a2b72190 --- /dev/null +++ b/install/helm/agones/templates/priority-class.yaml @@ -0,0 +1,7 @@ +apiVersion: scheduling.k8s.io/v1beta1 +kind: PriorityClass +metadata: + name: {{ .Values.agones.priorityClassName }} +value: 1000000 +globalDefault: false +description: "This priority class should be used for Agones service pods only." diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index 102f9a3466..a04192a3ab 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -25,11 +25,23 @@ agones: serviceaccount: controller: agones-controller sdk: agones-sdk + priorityClassName: agones-system controller: resources: {} nodeSelector: {} - tolerations: [] - affinity: {} + tolerations: + - key: "stable.agones.dev/agones-system" + operator: "Equal" + value: "true" + effect: "NoExecute" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: stable.agones.dev/agones-system + operator: Exists generateTLS: true safeToEvict: false http: @@ -43,8 +55,19 @@ agones: install: true resources: {} nodeSelector: {} - tolerations: [] - affinity: {} + tolerations: + - key: "stable.agones.dev/agones-system" + operator: "Equal" + value: "true" + effect: "NoExecute" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: stable.agones.dev/agones-system + operator: Exists replicas: 2 http: expose: true @@ -80,4 +103,4 @@ gameservers: namespaces: - default minPort: 7000 - maxPort: 8000 \ No newline at end of file + maxPort: 8000 diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index c7622af87a..55afec127a 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -1013,6 +1013,22 @@ spec: release: agones-manual heritage: Tiller spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: stable.agones.dev/agones-system + operator: Exists + weight: 1 + + tolerations: + - effect: NoExecute + key: stable.agones.dev/agones-system + operator: Equal + value: "true" + + priorityClassName: agones-system serviceAccountName: agones-controller containers: - name: agones-controller @@ -1094,6 +1110,22 @@ spec: release: agones-manual heritage: Tiller spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: stable.agones.dev/agones-system + operator: Exists + weight: 1 + + tolerations: + - effect: NoExecute + key: stable.agones.dev/agones-system + operator: Equal + value: "true" + + priorityClassName: agones-system containers: - name: agones-ping image: "gcr.io/agones-images/agones-ping:0.8.0-rc" @@ -1153,6 +1185,16 @@ spec: targetPort: 8080 protocol: UDP type: LoadBalancer +--- +# Source: agones/templates/priority-class.yaml +apiVersion: scheduling.k8s.io/v1beta1 +kind: PriorityClass +metadata: + name: agones-system +value: 1000000 +globalDefault: false +description: "This priority class should be used for Agones service pods only." + --- # Source: agones/templates/hooks/pre_delete_hook.yaml diff --git a/site/content/en/docs/Installation/helm.md b/site/content/en/docs/Installation/helm.md index 1f061255e9..9fe28f92c4 100644 --- a/site/content/en/docs/Installation/helm.md +++ b/site/content/en/docs/Installation/helm.md @@ -28,6 +28,22 @@ $ helm install --name my-release --namespace agones-system agones/agones _We recommend to install Agones in its own namespaces (like `agones-system` as shown above) you can use the helm `--namespace` parameter to specify a different namespace._ +{{% feature publishVersion="0.8.0" %}} + +When running in production, Agones should be scheduled on a dedicated pool of nodes, distinct from where Game Servers are scheduled for better isolation and resiliency. By default Agones prefers to be scheduled on nodes labeled with `stable.agones.dev/agones-system=true` and tolerates node taint `stable.agones.dev/agones-system=true:NoExecute`. If no dedicated nodes are available, Agones will +run on regular nodes, but that's not recommended for production use. + +As an example, to set up dedicated node pool for Agones on GKE, run the following command before installing Agones. Alternatively you can taint and label nodes manually. + + ``` +gcloud container node-pools create agones-system --cluster=... --zone=... \ + --node-taints stable.agones.dev/agones-system=true:NoExecute \ + --node-labels stable.agones.dev/agones-system=true \ + --num-nodes=1 +``` + +{{% /feature %}} + The command deploys Agones on the Kubernetes cluster with the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation. > **Tip**: List all releases using `helm list`