Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug: Disabled health checking not implemented #317

Merged
merged 1 commit into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 14 additions & 39 deletions cmd/sdk-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"net"
"net/http"
"strings"
"time"

"agones.dev/agones/pkg"
"agones.dev/agones/pkg/client/clientset/versioned"
Expand All @@ -46,12 +45,8 @@ const (
podNamespaceEnv = "POD_NAMESPACE"

// Flags (that can also be env vars)
localFlag = "local"
addressFlag = "address"
healthDisabledFlag = "health-disabled"
healthTimeoutFlag = "health-timeout"
healthInitialDelayFlag = "health-initial-delay"
healthFailureThresholdFlag = "health-failure-threshold"
localFlag = "local"
addressFlag = "address"
)

var (
Expand Down Expand Up @@ -107,14 +102,18 @@ func main() {
}

var s *gameservers.SDKServer
s, err = gameservers.NewSDKServer(viper.GetString(gameServerNameEnv), viper.GetString(podNamespaceEnv),
ctlConf.HealthDisabled, ctlConf.HealthTimeout, ctlConf.HealthFailureThreshold,
ctlConf.HealthInitialDelay, kubeClient, agonesClient)
s, err = gameservers.NewSDKServer(viper.GetString(gameServerNameEnv),
viper.GetString(podNamespaceEnv), kubeClient, agonesClient)
if err != nil {
logger.WithError(err).Fatalf("Could not start sidecar")
}

go s.Run(ctx.Done())
go func() {
err := s.Run(ctx.Done())
if err != nil {
logger.WithError(err).Fatalf("Could not run sidecar")
}
}()
sdk.RegisterSDKServer(grpcServer, s)
}

Expand Down Expand Up @@ -159,49 +158,25 @@ func runGateway(ctx context.Context, grpcEndpoint string, mux *gwruntime.ServeMu
func parseEnvFlags() config {
viper.SetDefault(localFlag, false)
viper.SetDefault(addressFlag, "localhost")
viper.SetDefault(healthDisabledFlag, false)
viper.SetDefault(healthTimeoutFlag, 5)
viper.SetDefault(healthInitialDelayFlag, 5)
viper.SetDefault(healthFailureThresholdFlag, 3)
pflag.Bool(localFlag, viper.GetBool(localFlag),
"Set this, or LOCAL env, to 'true' to run this binary in local development mode. Defaults to 'false'")
pflag.String(addressFlag, viper.GetString(addressFlag), "The Address to bind the server grpcPort to. Defaults to 'localhost")
pflag.Bool(healthDisabledFlag, viper.GetBool(healthDisabledFlag),
"Set this, or HEALTH_ENABLED env, to 'true' to enable health checking on the GameServer. Defaults to 'true'")
pflag.Int64(healthTimeoutFlag, viper.GetInt64(healthTimeoutFlag),
"Set this or HEALTH_TIMEOUT env to the number of seconds that the health check times out at. Defaults to 5")
pflag.Int64(healthInitialDelayFlag, viper.GetInt64(healthInitialDelayFlag),
"Set this or HEALTH_INITIAL_DELAY env to the number of seconds that the health will wait before starting. Defaults to 5")
pflag.Int64(healthFailureThresholdFlag, viper.GetInt64(healthFailureThresholdFlag),
"Set this or HEALTH_FAILURE_THRESHOLD env to the number of times the health check needs to fail to be deemed unhealthy. Defaults to 3")
pflag.Parse()

viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
runtime.Must(viper.BindEnv(localFlag))
runtime.Must(viper.BindEnv(gameServerNameEnv))
runtime.Must(viper.BindEnv(podNamespaceEnv))
runtime.Must(viper.BindEnv(healthDisabledFlag))
runtime.Must(viper.BindEnv(healthTimeoutFlag))
runtime.Must(viper.BindEnv(healthInitialDelayFlag))
runtime.Must(viper.BindEnv(healthFailureThresholdFlag))
runtime.Must(viper.BindPFlags(pflag.CommandLine))

return config{
IsLocal: viper.GetBool(localFlag),
Address: viper.GetString(addressFlag),
HealthDisabled: viper.GetBool(healthDisabledFlag),
HealthTimeout: time.Duration(viper.GetInt64(healthTimeoutFlag)) * time.Second,
HealthInitialDelay: time.Duration(viper.GetInt64(healthInitialDelayFlag)) * time.Second,
HealthFailureThreshold: viper.GetInt64(healthFailureThresholdFlag),
IsLocal: viper.GetBool(localFlag),
Address: viper.GetString(addressFlag),
}
}

// config is all the configuration for this program
type config struct {
Address string
IsLocal bool
HealthDisabled bool
HealthTimeout time.Duration
HealthInitialDelay time.Duration
HealthFailureThreshold int64
Address string
IsLocal bool
}
3 changes: 3 additions & 0 deletions install/helm/agones/templates/serviceaccounts/sdk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ metadata:
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["create"]
- apiGroups: ["stable.agones.dev"]
resources: ["gameservers"]
verbs: ["list", "update", "watch"]
Expand Down
3 changes: 3 additions & 0 deletions install/yaml/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ metadata:
release: agones-manual
heritage: Tiller
rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["create"]
- apiGroups: ["stable.agones.dev"]
resources: ["gameservers"]
verbs: ["list", "update", "watch"]
Expand Down
90 changes: 49 additions & 41 deletions pkg/gameservers/sdkserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,33 +50,30 @@ var _ sdk.SDKServer = &SDKServer{}
// SDKServer is a gRPC server, that is meant to be a sidecar
// for a GameServer that will update the game server status on SDK requests
type SDKServer struct {
logger *logrus.Entry
gameServerName string
namespace string
informerFactory externalversions.SharedInformerFactory
gameServerGetter typedv1alpha1.GameServersGetter
gameServerLister v1alpha1.GameServerLister
gameServerSynced cache.InformerSynced
server *http.Server
clock clock.Clock
healthDisabled bool
healthTimeout time.Duration
healthFailureThreshold int64
healthMutex sync.RWMutex
healthLastUpdated time.Time
healthFailureCount int64
workerqueue *workerqueue.WorkerQueue
streamMutex sync.RWMutex
connectedStreams []sdk.SDK_WatchGameServerServer
stop <-chan struct{}
recorder record.EventRecorder
logger *logrus.Entry
gameServerName string
namespace string
informerFactory externalversions.SharedInformerFactory
gameServerGetter typedv1alpha1.GameServersGetter
gameServerLister v1alpha1.GameServerLister
gameServerSynced cache.InformerSynced
server *http.Server
clock clock.Clock
health stablev1alpha1.Health
healthTimeout time.Duration
healthMutex sync.RWMutex
healthLastUpdated time.Time
healthFailureCount int32
workerqueue *workerqueue.WorkerQueue
streamMutex sync.RWMutex
connectedStreams []sdk.SDK_WatchGameServerServer
stop <-chan struct{}
recorder record.EventRecorder
}

// NewSDKServer creates a SDKServer that sets up an
// InClusterConfig for Kubernetes
func NewSDKServer(gameServerName, namespace string,
healthDisabled bool, healthTimeout time.Duration, healthFailureThreshold int64, healthInitialDelay time.Duration,
kubeClient kubernetes.Interface,
func NewSDKServer(gameServerName, namespace string, kubeClient kubernetes.Interface,
agonesClient versioned.Interface) (*SDKServer, error) {
mux := http.NewServeMux()

Expand All @@ -97,13 +94,10 @@ func NewSDKServer(gameServerName, namespace string,
Addr: ":8080",
Handler: mux,
},
clock: clock.RealClock{},
healthDisabled: healthDisabled,
healthFailureThreshold: healthFailureThreshold,
healthTimeout: healthTimeout,
healthMutex: sync.RWMutex{},
healthFailureCount: 0,
streamMutex: sync.RWMutex{},
clock: clock.RealClock{},
healthMutex: sync.RWMutex{},
healthFailureCount: 0,
streamMutex: sync.RWMutex{},
}

s.informerFactory = factory
Expand Down Expand Up @@ -140,7 +134,6 @@ func NewSDKServer(gameServerName, namespace string,
}
})

s.initHealthLastUpdated(healthInitialDelay)
s.workerqueue = workerqueue.NewWorkerQueue(
func(key string) error {
return s.updateState(stablev1alpha1.State(key))
Expand All @@ -161,7 +154,28 @@ func (s *SDKServer) initHealthLastUpdated(healthInitialDelay time.Duration) {

// Run processes the rate limited queue.
// Will block until stop is closed
func (s *SDKServer) Run(stop <-chan struct{}) {
func (s *SDKServer) Run(stop <-chan struct{}) error {
s.informerFactory.Start(stop)
cache.WaitForCacheSync(stop, s.gameServerSynced)

gs, err := s.gameServerLister.GameServers(s.namespace).Get(s.gameServerName)
if err != nil {
return errors.Wrapf(err, "error retrieving gameserver %s/%s", s.namespace, s.gameServerName)
}

// grab configuration details
s.health = gs.Spec.Health
s.logger.WithField("health", s.health).Info("setting health configuration")
s.healthTimeout = time.Duration(gs.Spec.Health.PeriodSeconds) * time.Second
s.initHealthLastUpdated(time.Duration(gs.Spec.Health.InitialDelaySeconds) * time.Second)

// start health checking running
if !s.health.Disabled {
s.logger.Info("Starting GameServer health checking")
go wait.Until(s.runHealth, s.healthTimeout, stop)
}

// then start the http endpoints
s.logger.Info("Starting SDKServer http health check...")
go func() {
if err := s.server.ListenAndServe(); err != nil {
Expand All @@ -175,16 +189,10 @@ func (s *SDKServer) Run(stop <-chan struct{}) {
}()
defer s.server.Close() // nolint: errcheck

if !s.healthDisabled {
s.logger.Info("Starting GameServer health checking")
go wait.Until(s.runHealth, s.healthTimeout, stop)
}

s.informerFactory.Start(stop)
cache.WaitForCacheSync(stop, s.gameServerSynced)
// need this for streaming gRPC commands
s.stop = stop
s.workerqueue.Run(1, stop)
return nil
}

// updateState sets the GameServer Status's state to the state
Expand Down Expand Up @@ -371,11 +379,11 @@ func (s *SDKServer) checkHealth() {
// currently healthy or not based on the configured
// failure count vs failure threshold
func (s *SDKServer) healthy() bool {
if s.healthDisabled {
if s.health.Disabled {
return true
}

s.healthMutex.RLock()
defer s.healthMutex.RUnlock()
return s.healthFailureCount < s.healthFailureThreshold
return s.healthFailureCount < s.health.FailureThreshold
}
Loading