Skip to content

Commit

Permalink
Merge pull request #1197 from dcantah/retry-stdio-conns2
Browse files Browse the repository at this point in the history
Add reconnect logic for stdio pipes
  • Loading branch information
dcantah committed Oct 29, 2021
2 parents 5f5e3ea + 573c137 commit 27c580d
Show file tree
Hide file tree
Showing 51 changed files with 1,862 additions and 198 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
with:
version: v1.42.1 # Has fixes for stylecheck configuration https://github.com/golangci/golangci-lint/pull/2017/files
args: --timeout=5m -v
only-new-issues: true

verify-main-vendor:
runs-on: 'windows-2019'
Expand All @@ -40,7 +41,7 @@ jobs:
Write-Error "Main modules are not up to date. Please validate your go version >= this job's and run `go mod vendor` followed by `go mod tidy` in the repo root path."
}
exit $process.ExitCode
verify-test-vendor:
runs-on: 'windows-2019'
env:
Expand Down
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ issues:
linters:
- stylecheck
Text: "ST1003:"

- path: cmd\\ncproxy\\nodenetsvc\\
linters:
- stylecheck
Expand Down
156 changes: 96 additions & 60 deletions cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions cmd/containerd-shim-runhcs-v1/options/runhcs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ message Options {
// logrus log levels: "trace", "debug", "info", "warn", "error", "fatal", "panic". This setting will override
// the `debug` field if both are specified, unless the level specified is also "debug", as these are equivalent.
string log_level = 16;

// io_retry_timeout_in_sec is the timeout in seconds for how long to try and reconnect to an upstream IO provider if a connection is lost.
// The typical example is if Containerd has restarted but is expected to come back online. A 0 for this field is interpreted as an infinite
// timeout.
int32 io_retry_timeout_in_sec = 17;
}

// ProcessDetails contains additional information about a process. This is the additional
Expand Down
57 changes: 40 additions & 17 deletions cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,6 @@ func newHcsTask(
owner := filepath.Base(os.Args[0])
isTemplate := oci.ParseAnnotationsSaveAsTemplate(ctx, s)

io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal)
if err != nil {
return nil, err
}

var netNS string
if s.Windows != nil &&
s.Windows.Network != nil {
Expand All @@ -186,22 +181,33 @@ func newHcsTask(
shimOpts = v.(*runhcsopts.Options)
}

// Default to an infinite timeout (zero value)
var ioRetryTimeout time.Duration
if shimOpts != nil {
ioRetryTimeout = time.Duration(shimOpts.IoRetryTimeoutInSec) * time.Second
}
io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ioRetryTimeout)
if err != nil {
return nil, err
}

container, resources, err := createContainer(ctx, req.ID, owner, netNS, s, parent, shimOpts)
if err != nil {
return nil, err
}

ht := &hcsTask{
events: events,
id: req.ID,
isWCOW: oci.IsWCOW(s),
c: container,
cr: resources,
ownsHost: ownsParent,
host: parent,
closed: make(chan struct{}),
taskSpec: s,
isTemplate: isTemplate,
events: events,
id: req.ID,
isWCOW: oci.IsWCOW(s),
c: container,
cr: resources,
ownsHost: ownsParent,
host: parent,
closed: make(chan struct{}),
taskSpec: s,
isTemplate: isTemplate,
ioRetryTimeout: ioRetryTimeout,
}
ht.init = newHcsExec(
ctx,
Expand Down Expand Up @@ -279,7 +285,21 @@ func newClonedHcsTask(
return nil, fmt.Errorf("cloned task can only be created inside a windows host")
}

io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal)
var shimOpts *runhcsopts.Options
if req.Options != nil {
v, err := typeurl.UnmarshalAny(req.Options)
if err != nil {
return nil, err
}
shimOpts = v.(*runhcsopts.Options)
}

// Default to an infinite timeout (zero value)
var ioRetryTimeout time.Duration
if shimOpts != nil {
ioRetryTimeout = time.Duration(shimOpts.IoRetryTimeoutInSec) * time.Second
}
io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal, ioRetryTimeout)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -433,6 +453,9 @@ type hcsTask struct {

// taskSpec represents the spec/configuration for this task.
taskSpec *specs.Spec

// ioRetryTimeout is the time for how long to try reconnecting to stdio pipes from containerd.
ioRetryTimeout time.Duration
}

func (ht *hcsTask) ID() string {
Expand All @@ -453,7 +476,7 @@ func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest,
return errors.Wrapf(errdefs.ErrFailedPrecondition, "exec: '' in task: '%s' must be running to create additional execs", ht.id)
}

io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal)
io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ht.ioRetryTimeout)
if err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.13
require (
github.com/BurntSushi/toml v0.3.1
github.com/Microsoft/go-winio v0.4.17
github.com/cenkalti/backoff/v4 v4.1.1
github.com/containerd/cgroups v1.0.1
github.com/containerd/console v1.0.2
github.com/containerd/containerd v1.5.7
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7
github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8=
github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50=
github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
github.com/cenkalti/backoff/v4 v4.1.1 h1:G2HAfAmvm/GcKan2oOQpBXOd2tT2G57ZnZGWa1PxPBQ=
github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
Expand Down
Loading

0 comments on commit 27c580d

Please sign in to comment.