Skip to content

Commit

Permalink
Added back metrics support for extensions (#2988)
Browse files Browse the repository at this point in the history
  • Loading branch information
chiayi authored Feb 23, 2023
1 parent 140740b commit 08c1eaa
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 20 deletions.
98 changes: 78 additions & 20 deletions cmd/extensions/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ import (
"agones.dev/agones/pkg/gameserverallocations"
"agones.dev/agones/pkg/gameservers"
"agones.dev/agones/pkg/gameserversets"
"agones.dev/agones/pkg/metrics"
"agones.dev/agones/pkg/util/apiserver"
"agones.dev/agones/pkg/util/https"
"agones.dev/agones/pkg/util/runtime"
"agones.dev/agones/pkg/util/signals"
"agones.dev/agones/pkg/util/webhooks"
"github.com/heptiolabs/healthcheck"
"github.com/pkg/errors"
prom "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"github.com/spf13/pflag"
"github.com/spf13/viper"
Expand All @@ -50,18 +52,21 @@ import (
)

const (
projectIDFlag = "gcp-project-id"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
numWorkersFlag = "num-workers"
logDirFlag = "log-dir"
logLevelFlag = "log-level"
logSizeLimitMBFlag = "log-size-limit-mb"
allocationBatchWaitTime = "allocation-batch-wait-time"
kubeconfigFlag = "kubeconfig"
defaultResync = 30 * time.Second
apiServerSustainedQPSFlag = "api-server-qps"
apiServerBurstQPSFlag = "api-server-qps-burst"
enableStackdriverMetricsFlag = "stackdriver-exporter"
stackdriverLabels = "stackdriver-labels"
enablePrometheusMetricsFlag = "prometheus-exporter"
projectIDFlag = "gcp-project-id"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
numWorkersFlag = "num-workers"
logDirFlag = "log-dir"
logLevelFlag = "log-level"
logSizeLimitMBFlag = "log-size-limit-mb"
allocationBatchWaitTime = "allocation-batch-wait-time"
kubeconfigFlag = "kubeconfig"
defaultResync = 30 * time.Second
apiServerSustainedQPSFlag = "api-server-qps"
apiServerBurstQPSFlag = "api-server-qps-burst"
)

var (
Expand Down Expand Up @@ -139,8 +144,36 @@ func main() {
kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync)

server := &httpServer{}
var health healthcheck.Handler

health := healthcheck.NewHandler()
// Stackdriver metrics
if ctlConf.Stackdriver {
sd, err := metrics.RegisterStackdriverExporter(ctlConf.GCPProjectID, ctlConf.StackdriverLabels)
if err != nil {
logger.WithError(err).Fatal("Could not register stackdriver exporter")
}
// It is imperative to invoke flush before your main function exits
defer sd.Flush()
}

// Prometheus metrics
if ctlConf.PrometheusMetrics {
registry := prom.NewRegistry()
metricHandler, err := metrics.RegisterPrometheusExporter(registry)
if err != nil {
logger.WithError(err).Fatal("Could not register prometheus exporter in extensions")
}
server.Handle("/metrics", metricHandler)
health = healthcheck.NewMetricsHandler(registry, "agones")
} else {
logger.Info("Not registaring prometheus metrics")
health = healthcheck.NewHandler()
}

// If we are using Prometheus only exporter we can make reporting more often,
// every 1 seconds, if we are using Stackdriver we would use 60 seconds reporting period,
// which is a requirements of Stackdriver, otherwise most of time series would be invalid for Stackdriver
metrics.SetReportingPeriod(ctlConf.PrometheusMetrics, ctlConf.Stackdriver)

server.Handle("/", health)

Expand Down Expand Up @@ -180,6 +213,10 @@ func parseEnvFlags() config {
viper.SetDefault(keyFileFlag, filepath.Join(base, "certs", "server.key"))
viper.SetDefault(allocationBatchWaitTime, 500*time.Millisecond)

viper.SetDefault(enablePrometheusMetricsFlag, true)
viper.SetDefault(enableStackdriverMetricsFlag, false)
viper.SetDefault(stackdriverLabels, "")

viper.SetDefault(projectIDFlag, "")
viper.SetDefault(numWorkersFlag, 64)
viper.SetDefault(apiServerSustainedQPSFlag, 100)
Expand All @@ -191,6 +228,11 @@ func parseEnvFlags() config {
pflag.String(keyFileFlag, viper.GetString(keyFileFlag), "Optional. Path to the key file")
pflag.String(certFileFlag, viper.GetString(certFileFlag), "Optional. Path to the crt file")
pflag.String(kubeconfigFlag, viper.GetString(kubeconfigFlag), "Optional. kubeconfig to run the controller out of the cluster. Only use it for debugging as webhook won't works.")

pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.")
pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.")
pflag.String(stackdriverLabels, viper.GetString(stackdriverLabels), "A set of default labels to add to all stackdriver metrics generated. By default metadata are automatically added using Kubernetes API and GCP metadata enpoint.")

pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.")
pflag.Int32(numWorkersFlag, 64, "Number of controller workers per resource type")
pflag.Int32(apiServerSustainedQPSFlag, 100, "Maximum sustained queries per second to send to the API server")
Expand All @@ -204,9 +246,15 @@ func parseEnvFlags() config {
pflag.Parse()

viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))

runtime.Must(viper.BindEnv(keyFileFlag))
runtime.Must(viper.BindEnv(certFileFlag))
runtime.Must(viper.BindEnv(kubeconfigFlag))

runtime.Must(viper.BindEnv(enablePrometheusMetricsFlag))
runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag))
runtime.Must(viper.BindEnv(stackdriverLabels))

runtime.Must(viper.BindEnv(projectIDFlag))
runtime.Must(viper.BindEnv(numWorkersFlag))
runtime.Must(viper.BindEnv(apiServerSustainedQPSFlag))
Expand All @@ -221,10 +269,15 @@ func parseEnvFlags() config {
runtime.Must(runtime.ParseFeaturesFromEnv())

return config{
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
KubeConfig: viper.GetString(kubeconfigFlag),
GCPProjectID: viper.GetString(projectIDFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
KubeConfig: viper.GetString(kubeconfigFlag),
GCPProjectID: viper.GetString(projectIDFlag),

PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag),
Stackdriver: viper.GetBool(enableStackdriverMetricsFlag),
StackdriverLabels: viper.GetString(stackdriverLabels),

NumWorkers: int(viper.GetInt32(numWorkersFlag)),
APIServerSustainedQPS: int(viper.GetInt32(apiServerSustainedQPSFlag)),
APIServerBurstQPS: int(viper.GetInt32(apiServerBurstQPSFlag)),
Expand All @@ -237,9 +290,14 @@ func parseEnvFlags() config {

// config stores all required configuration to create a game server extensions.
type config struct {
KeyFile string
CertFile string
KubeConfig string
KeyFile string
CertFile string
KubeConfig string

PrometheusMetrics bool
Stackdriver bool
StackdriverLabels string

GCPProjectID string
NumWorkers int
APIServerSustainedQPS int
Expand Down
11 changes: 11 additions & 0 deletions install/helm/agones/templates/extensions-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ spec:
{{- if .Values.agones.extensions.generateTLS }}
revision/tls-cert: {{ .Release.Revision | quote }}
{{- end }}
{{- if and (.Values.agones.metrics.prometheusServiceDiscovery) (.Values.agones.metrics.prometheusEnabled) }}
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
{{- end }}
{{- if .Values.agones.extensions.annotations }}
{{- toYaml .Values.agones.extensions.annotations | nindent 8 }}
{{- end }}
Expand Down Expand Up @@ -71,6 +76,12 @@ spec:
image: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.extensions.name}}:{{ default .Values.agones.image.tag .Values.agones.image.extensions.tag }}"
imagePullPolicy: {{ .Values.agones.image.extensions.pullPolicy }}
env:
- name: PROMETHEUS_EXPORTER
value: {{ .Values.agones.metrics.prometheusEnabled | quote }}
- name: STACKDRIVER_EXPORTER
value: {{ .Values.agones.metrics.stackdriverEnabled | quote }}
- name: STACKDRIVER_LABELS
value: {{ .Values.agones.metrics.stackdriverLabels | quote }}
- name: GCP_PROJECT_ID
value: {{ .Values.agones.metrics.stackdriverProjectID | quote }}
- name: NUM_WORKERS
Expand Down

0 comments on commit 08c1eaa

Please sign in to comment.