From 95095f8406ce1bfc938237babf13a7ac36f73c6c Mon Sep 17 00:00:00 2001 From: Maxim Vladimirskiy Date: Fri, 26 Oct 2018 15:06:15 +0300 Subject: [PATCH 01/38] etcdserver: Remove infinite loop in doSerialize Once chk(ai) fails with auth.ErrAuthOldRevision it will always do, regardless how many times you retry. So the error is better be returned to fail the pending request and make the client re-authenticate. --- etcdserver/v3_server.go | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/etcdserver/v3_server.go b/etcdserver/v3_server.go index 9d429e32951..bf4adf18888 100644 --- a/etcdserver/v3_server.go +++ b/etcdserver/v3_server.go @@ -523,29 +523,25 @@ func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) // doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure. func (s *EtcdServer) doSerialize(ctx context.Context, chk func(*auth.AuthInfo) error, get func()) error { - for { - ai, err := s.AuthInfoFromCtx(ctx) - if err != nil { - return err - } - if ai == nil { - // chk expects non-nil AuthInfo; use empty credentials - ai = &auth.AuthInfo{} - } - if err = chk(ai); err != nil { - if err == auth.ErrAuthOldRevision { - continue - } - return err - } - // fetch response for serialized request - get() - // empty credentials or current auth info means no need to retry - if ai.Revision == 0 || ai.Revision == s.authStore.Revision() { - return nil - } - // avoid TOCTOU error, retry of the request is required. + ai, err := s.AuthInfoFromCtx(ctx) + if err != nil { + return err + } + if ai == nil { + // chk expects non-nil AuthInfo; use empty credentials + ai = &auth.AuthInfo{} + } + if err = chk(ai); err != nil { + return err + } + // fetch response for serialized request + get() + // check for stale token revision in case the auth store was updated while + // the request has been handled. + if ai.Revision != 0 && ai.Revision != s.authStore.Revision() { + return auth.ErrAuthOldRevision } + return nil } func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) { From 5a4821721eb7d87f42b5d8be4a379111469c3f5e Mon Sep 17 00:00:00 2001 From: Jingyi Hu Date: Tue, 12 Feb 2019 15:40:39 -0800 Subject: [PATCH 02/38] etcdserver: remove auth validation loop Remove auth validation loop in v3_server.raftRequest(). Re-validation when error ErrAuthOldRevision occurs should be handled on client side. --- etcdserver/v3_server.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/etcdserver/v3_server.go b/etcdserver/v3_server.go index bf4adf18888..76ca8dee035 100644 --- a/etcdserver/v3_server.go +++ b/etcdserver/v3_server.go @@ -513,12 +513,7 @@ func (s *EtcdServer) raftRequestOnce(ctx context.Context, r pb.InternalRaftReque } func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) { - for { - resp, err := s.raftRequestOnce(ctx, r) - if err != auth.ErrAuthOldRevision { - return resp, err - } - } + return s.raftRequestOnce(ctx, r) } // doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure. From e1508f94b6a50eda0ea2cec0f5a520613b300c11 Mon Sep 17 00:00:00 2001 From: Jingyi Hu Date: Fri, 25 Oct 2019 18:27:40 -0700 Subject: [PATCH 03/38] integration: disable TestV3AuthOldRevConcurrent Disable TestV3AuthOldRevConcurrent for now. See https://github.com/etcd-io/etcd/pull/10468#issuecomment-463253361 --- integration/v3_auth_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/integration/v3_auth_test.go b/integration/v3_auth_test.go index 97017a07fae..ab6825892aa 100644 --- a/integration/v3_auth_test.go +++ b/integration/v3_auth_test.go @@ -347,6 +347,7 @@ func TestV3AuthNonAuthorizedRPCs(t *testing.T) { } func TestV3AuthOldRevConcurrent(t *testing.T) { + t.Skip() // TODO(jingyih): re-enable the test when #10408 is fixed. defer testutil.AfterTest(t) clus := NewClusterV3(t, &ClusterConfig{Size: 1}) defer clus.Terminate(t) From b3d9e290961a5e353aadfb24e46423ace6bf71fe Mon Sep 17 00:00:00 2001 From: Joe Betz Date: Wed, 12 Feb 2020 10:12:48 -0800 Subject: [PATCH 04/38] mvcc/backend: Delete orphaned db.tmp files before defrag --- mvcc/backend/backend.go | 21 ++++++++++++++++++--- snap/snapshotter.go | 17 +++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 55dc3fce8fd..33997b02207 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -310,21 +310,35 @@ func (b *backend) defrag() error { b.batchTx.unsafeCommit(true) b.batchTx.tx = nil - tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions) + // Create a temporary file to ensure we start with a clean slate. + // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. + dir := filepath.Dir(b.db.Path()) + temp, err := ioutil.TempFile(dir, "db.tmp.*") + if err != nil { + return err + } + options := *boltOpenOptions + options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { + return temp, nil + } + tdbp := temp.Name() + tmpdb, err := bolt.Open(tdbp, 0600, &options) if err != nil { return err } + // gofail: var defragBeforeCopy struct{} err = defragdb(b.db, tmpdb, defragLimit) if err != nil { tmpdb.Close() - os.RemoveAll(tmpdb.Path()) + if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { + plog.Fatalf("failed to remove db.tmp after defragmentation completed: %v", rmErr) + } return err } dbp := b.db.Path() - tdbp := tmpdb.Path() err = b.db.Close() if err != nil { @@ -334,6 +348,7 @@ func (b *backend) defrag() error { if err != nil { plog.Fatalf("cannot close database (%s)", err) } + // gofail: var defragBeforeRename struct{} err = os.Rename(tdbp, dbp) if err != nil { plog.Fatalf("cannot rename database (%s)", err) diff --git a/snap/snapshotter.go b/snap/snapshotter.go index 00755592129..5d79b175915 100644 --- a/snap/snapshotter.go +++ b/snap/snapshotter.go @@ -172,6 +172,9 @@ func (s *Snapshotter) snapNames() ([]string, error) { if err != nil { return nil, err } + if err = s.cleanupSnapdir(names); err != nil { + return nil, err + } snaps := checkSuffix(names) if len(snaps) == 0 { return nil, ErrNoSnapshot @@ -202,3 +205,17 @@ func renameBroken(path string) { plog.Warningf("cannot rename broken snapshot file %v to %v: %v", path, brokenPath, err) } } + +// cleanupSnapdir removes any files that should not be in the snapshot directory: +// - db.tmp prefixed files that can be orphaned by defragmentation +func (s *Snapshotter) cleanupSnapdir(filenames []string) error { + for _, filename := range filenames { + if strings.HasPrefix(filename, "db.tmp") { + plog.Infof("found orphaned defragmentation file; deleting: %s", filename) + if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { + return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + } + } + } + return nil +} From 7b1a92cb7c0e236fe8ee3ed989678612e8834023 Mon Sep 17 00:00:00 2001 From: jingyih Date: Fri, 14 Feb 2020 21:26:52 -0800 Subject: [PATCH 05/38] mvcc/backend: check for nil boltOpenOptions Check if boltOpenOptions is nil before use it. --- mvcc/backend/backend.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 33997b02207..2229d9ce1cb 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -317,7 +317,10 @@ func (b *backend) defrag() error { if err != nil { return err } - options := *boltOpenOptions + options := bolt.Options{} + if boltOpenOptions != nil { + options = *boltOpenOptions + } options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { return temp, nil } From c58133b2d462abb6845e0d6a3dee463842c3468d Mon Sep 17 00:00:00 2001 From: jingyih Date: Wed, 19 Feb 2020 00:05:13 -0800 Subject: [PATCH 06/38] etcdctl: fix member add command Use members information from member add response, which is guaranteed to be up to date. --- etcdctl/ctlv3/command/member_command.go | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/etcdctl/ctlv3/command/member_command.go b/etcdctl/ctlv3/command/member_command.go index a6090c9497c..de29ceaf5eb 100644 --- a/etcdctl/ctlv3/command/member_command.go +++ b/etcdctl/ctlv3/command/member_command.go @@ -118,30 +118,8 @@ func memberAddCommandFunc(cmd *cobra.Command, args []string) { display.MemberAdd(*resp) if _, ok := (display).(*simplePrinter); ok { - ctx, cancel = commandCtx(cmd) - listResp, err := cli.MemberList(ctx) - // make sure the member who served member list request has the latest member list. - syncedMemberSet := make(map[uint64]struct{}) - syncedMemberSet[resp.Header.MemberId] = struct{}{} // the member who served member add is guaranteed to have the latest member list. - for { - if err != nil { - ExitWithError(ExitError, err) - } - if _, ok := syncedMemberSet[listResp.Header.MemberId]; ok { - break - } - // quorum get to sync cluster list - gresp, gerr := cli.Get(ctx, "_") - if gerr != nil { - ExitWithError(ExitError, err) - } - syncedMemberSet[gresp.Header.MemberId] = struct{}{} - listResp, err = cli.MemberList(ctx) - } - cancel() - conf := []string{} - for _, memb := range listResp.Members { + for _, memb := range resp.Members { for _, u := range memb.PeerURLs { n := memb.Name if memb.ID == newID { From 1228d6c1e7fedf784ab424ede30db2bdc69e0653 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Fri, 13 Mar 2020 17:53:41 -0400 Subject: [PATCH 07/38] proxy/grpcproxy: add return on error for metrics handler Signed-off-by: Sam Batschelet --- proxy/grpcproxy/metrics.go | 1 + 1 file changed, 1 insertion(+) diff --git a/proxy/grpcproxy/metrics.go b/proxy/grpcproxy/metrics.go index ebb82bb727d..99630a32e54 100644 --- a/proxy/grpcproxy/metrics.go +++ b/proxy/grpcproxy/metrics.go @@ -89,6 +89,7 @@ func HandleMetrics(mux *http.ServeMux, c *http.Client, eps []string) { resp, err := c.Get(target) if err != nil { http.Error(w, "Internal server error", http.StatusInternalServerError) + return } defer resp.Body.Close() w.Header().Set("Content-Type", "text/plain; version=0.0.4") From 30aaceb1c3bf8d6c48e8479ef42cab5d0d15a400 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 15:43:03 -0700 Subject: [PATCH 08/38] etcdserver/api/etcdhttp: log server-side /health checks ref. https://github.com/etcd-io/etcd/pull/11704 Signed-off-by: Gyuho Lee --- etcdserver/api/etcdhttp/metrics.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/etcdserver/api/etcdhttp/metrics.go b/etcdserver/api/etcdhttp/metrics.go index e947abfdd0d..2f6a0a7b2ae 100644 --- a/etcdserver/api/etcdhttp/metrics.go +++ b/etcdserver/api/etcdhttp/metrics.go @@ -50,6 +50,7 @@ func NewHealthHandler(hfunc func() Health) http.HandlerFunc { if r.Method != http.MethodGet { w.Header().Set("Allow", http.MethodGet) http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) + plog.Warningf("/health error (status code %d)", http.StatusMethodNotAllowed) return } h := hfunc() @@ -97,11 +98,15 @@ func checkHealth(srv etcdserver.ServerV2) Health { as := srv.Alarms() if len(as) > 0 { h.Health = "false" + for _, v := range as { + plog.Warningf("/health error due to an alarm %s", v.String()) + } } if h.Health == "true" { if uint64(srv.Leader()) == raft.None { h.Health = "false" + plog.Warningf("/health error; no leader (status code %d)", http.StatusServiceUnavailable) } } @@ -111,11 +116,13 @@ func checkHealth(srv etcdserver.ServerV2) Health { cancel() if err != nil { h.Health = "false" + plog.Warningf("/health error; QGET failed %v (status code %d)", err, http.StatusServiceUnavailable) } } if h.Health == "true" { healthSuccess.Inc() + plog.Infof("/health OK (status code %d)", http.StatusOK) } else { healthFailed.Inc() } From 6f7ee076eafe0240c8b78d443472e14b2a04874d Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:12:22 -0700 Subject: [PATCH 09/38] clientv3: embed api version in metadata ref. https://github.com/etcd-io/etcd/pull/11687 Signed-off-by: Gyuho Lee clientv3: fix racy writes to context key === RUN TestWatchOverlapContextCancel ================== WARNING: DATA RACE Write at 0x00c42110dd40 by goroutine 99: runtime.mapassign() /usr/local/go/src/runtime/hashmap.go:485 +0x0 github.com/coreos/etcd/clientv3.metadataSet() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:61 +0x8c github.com/coreos/etcd/clientv3.withVersion() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:47 +0x137 github.com/coreos/etcd/clientv3.newStreamClientInterceptor.func1() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/client.go:309 +0x81 google.golang.org/grpc.NewClientStream() /go/src/github.com/coreos/etcd/gopath/src/google.golang.org/grpc/stream.go:101 +0x10e github.com/coreos/etcd/etcdserver/etcdserverpb.(*watchClient).Watch() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/etcdserver/etcdserverpb/rpc.pb.go:3193 +0xe9 github.com/coreos/etcd/clientv3.(*watchGrpcStream).openWatchClient() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:788 +0x143 github.com/coreos/etcd/clientv3.(*watchGrpcStream).newWatchClient() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:700 +0x5c3 github.com/coreos/etcd/clientv3.(*watchGrpcStream).run() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:431 +0x12b Previous read at 0x00c42110dd40 by goroutine 130: reflect.maplen() /usr/local/go/src/runtime/hashmap.go:1165 +0x0 reflect.Value.MapKeys() /usr/local/go/src/reflect/value.go:1090 +0x43b fmt.(*pp).printValue() /usr/local/go/src/fmt/print.go:741 +0x1885 fmt.(*pp).printArg() /usr/local/go/src/fmt/print.go:682 +0x1b1 fmt.(*pp).doPrintf() /usr/local/go/src/fmt/print.go:998 +0x1cad fmt.Sprintf() /usr/local/go/src/fmt/print.go:196 +0x77 github.com/coreos/etcd/clientv3.streamKeyFromCtx() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:825 +0xc8 github.com/coreos/etcd/clientv3.(*watcher).Watch() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:265 +0x426 github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e Goroutine 99 (running) created at: github.com/coreos/etcd/clientv3.(*watcher).newWatcherGrpcStream() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:236 +0x59d github.com/coreos/etcd/clientv3.(*watcher).Watch() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:278 +0xbb6 github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e Goroutine 130 (running) created at: github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:979 +0x76d github.com/coreos/etcd/clientv3/integration.TestWatchOverlapContextCancel() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:922 +0x44 testing.tRunner() /usr/local/go/src/testing/testing.go:657 +0x107 ================== Signed-off-by: Gyuho Lee --- clientv3/client.go | 8 ----- clientv3/ctx.go | 64 +++++++++++++++++++++++++++++++++ clientv3/ctx_test.go | 67 +++++++++++++++++++++++++++++++++++ clientv3/retry_interceptor.go | 2 ++ 4 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 clientv3/ctx.go create mode 100644 clientv3/ctx_test.go diff --git a/clientv3/client.go b/clientv3/client.go index 4c9df7a19a3..5a15cf5faea 100644 --- a/clientv3/client.go +++ b/clientv3/client.go @@ -37,7 +37,6 @@ import ( "google.golang.org/grpc/codes" grpccredentials "google.golang.org/grpc/credentials" "google.golang.org/grpc/keepalive" - "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" ) @@ -393,13 +392,6 @@ func (c *Client) dialWithBalancerCreds(ep string) grpccredentials.TransportCrede return creds } -// WithRequireLeader requires client requests to only succeed -// when the cluster has a leader. -func WithRequireLeader(ctx context.Context) context.Context { - md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader) - return metadata.NewOutgoingContext(ctx, md) -} - func newClient(cfg *Config) (*Client, error) { if cfg == nil { cfg = &Config{} diff --git a/clientv3/ctx.go b/clientv3/ctx.go new file mode 100644 index 00000000000..da8297b6c71 --- /dev/null +++ b/clientv3/ctx.go @@ -0,0 +1,64 @@ +// Copyright 2020 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clientv3 + +import ( + "context" + "strings" + + "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + "github.com/coreos/etcd/version" + "google.golang.org/grpc/metadata" +) + +// WithRequireLeader requires client requests to only succeed +// when the cluster has a leader. +func WithRequireLeader(ctx context.Context) context.Context { + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { // no outgoing metadata ctx key, create one + md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader) + return metadata.NewOutgoingContext(ctx, md) + } + copied := md.Copy() // avoid racey updates + // overwrite/add 'hasleader' key/value + metadataSet(copied, rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader) + return metadata.NewOutgoingContext(ctx, copied) +} + +// embeds client version +func withVersion(ctx context.Context) context.Context { + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { // no outgoing metadata ctx key, create one + md = metadata.Pairs(rpctypes.MetadataClientAPIVersionKey, version.APIVersion) + return metadata.NewOutgoingContext(ctx, md) + } + copied := md.Copy() // avoid racey updates + // overwrite/add version key/value + metadataSet(copied, rpctypes.MetadataClientAPIVersionKey, version.APIVersion) + return metadata.NewOutgoingContext(ctx, copied) +} + +func metadataGet(md metadata.MD, k string) []string { + k = strings.ToLower(k) + return md[k] +} + +func metadataSet(md metadata.MD, k string, vals ...string) { + if len(vals) == 0 { + return + } + k = strings.ToLower(k) + md[k] = vals +} diff --git a/clientv3/ctx_test.go b/clientv3/ctx_test.go new file mode 100644 index 00000000000..4a357dae685 --- /dev/null +++ b/clientv3/ctx_test.go @@ -0,0 +1,67 @@ +// Copyright 2020 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clientv3 + +import ( + "context" + "reflect" + "testing" + + "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + "github.com/coreos/etcd/version" + "google.golang.org/grpc/metadata" +) + +func TestMetadataWithRequireLeader(t *testing.T) { + ctx := context.TODO() + md, ok := metadata.FromOutgoingContext(ctx) + if ok { + t.Fatal("expected no outgoing metadata ctx key") + } + + // add a conflicting key with some other value + md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, "invalid") + // add a key, and expect not be overwritten + metadataSet(md, "hello", "1", "2") + ctx = metadata.NewOutgoingContext(ctx, md) + + // expect overwrites but still keep other keys + ctx = WithRequireLeader(ctx) + md, ok = metadata.FromOutgoingContext(ctx) + if !ok { + t.Fatal("expected outgoing metadata ctx key") + } + if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) { + t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss) + } + if ss := metadataGet(md, "hello"); !reflect.DeepEqual(ss, []string{"1", "2"}) { + t.Fatalf("unexpected metadata for 'hello' %v", ss) + } +} + +func TestMetadataWithClientAPIVersion(t *testing.T) { + ctx := withVersion(WithRequireLeader(context.TODO())) + + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { + t.Fatal("expected outgoing metadata ctx key") + } + if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) { + t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss) + } + if ss := metadataGet(md, rpctypes.MetadataClientAPIVersionKey); !reflect.DeepEqual(ss, []string{version.APIVersion}) { + t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataClientAPIVersionKey, ss) + } +} diff --git a/clientv3/retry_interceptor.go b/clientv3/retry_interceptor.go index ca72ed0bdea..f3c50570677 100644 --- a/clientv3/retry_interceptor.go +++ b/clientv3/retry_interceptor.go @@ -38,6 +38,7 @@ import ( func (c *Client) unaryClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.UnaryClientInterceptor { intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs) return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + ctx = withVersion(ctx) grpcOpts, retryOpts := filterCallOptions(opts) callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts) // short circuit for simplicity, and avoiding allocations. @@ -103,6 +104,7 @@ func (c *Client) unaryClientInterceptor(logger *zap.Logger, optFuncs ...retryOpt func (c *Client) streamClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.StreamClientInterceptor { intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs) return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { + ctx = withVersion(ctx) grpcOpts, retryOpts := filterCallOptions(opts) callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts) // short circuit for simplicity, and avoiding allocations. From d9027cecf25f3212b3601515d1245e7fec9797ca Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:12:55 -0700 Subject: [PATCH 10/38] etcdserver/api/v3rpc: handle api version metadata, add metrics ref. https://github.com/etcd-io/etcd/pull/11687 Signed-off-by: Gyuho Lee --- etcdserver/api/v3rpc/interceptor.go | 22 +++++++++++++++++++--- etcdserver/api/v3rpc/metrics.go | 10 ++++++++++ etcdserver/api/v3rpc/rpctypes/md.go | 2 ++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/etcdserver/api/v3rpc/interceptor.go b/etcdserver/api/v3rpc/interceptor.go index d594ae7f154..fbc2ba3ed8b 100644 --- a/etcdserver/api/v3rpc/interceptor.go +++ b/etcdserver/api/v3rpc/interceptor.go @@ -16,16 +16,16 @@ package v3rpc import ( "context" + "strings" "sync" "time" "github.com/coreos/etcd/etcdserver" "github.com/coreos/etcd/etcdserver/api" "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + pb "github.com/coreos/etcd/etcdserver/etcdserverpb" "github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/raft" - - pb "github.com/coreos/etcd/etcdserver/etcdserverpb" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/metadata" @@ -49,6 +49,12 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor { md, ok := metadata.FromIncomingContext(ctx) if ok { + ver, vs := "unknown", metadataGet(md, rpctypes.MetadataClientAPIVersionKey) + if len(vs) > 0 { + ver = vs[0] + } + clientRequests.WithLabelValues("unary", ver).Inc() + if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader { if s.Leader() == types.ID(raft.None) { return nil, rpctypes.ErrGRPCNoLeader @@ -187,6 +193,12 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor md, ok := metadata.FromIncomingContext(ss.Context()) if ok { + ver, vs := "unknown", metadataGet(md, rpctypes.MetadataClientAPIVersionKey) + if len(vs) > 0 { + ver = vs[0] + } + clientRequests.WithLabelValues("stream", ver).Inc() + if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader { if s.Leader() == types.ID(raft.None) { return rpctypes.ErrGRPCNoLeader @@ -205,7 +217,6 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor smap.mu.Unlock() cancel() }() - } } @@ -261,3 +272,8 @@ func monitorLeader(s *etcdserver.EtcdServer) *streamsMap { return smap } + +func metadataGet(md metadata.MD, k string) []string { + k = strings.ToLower(k) + return md[k] +} diff --git a/etcdserver/api/v3rpc/metrics.go b/etcdserver/api/v3rpc/metrics.go index 6cb41a61e56..46db8649c1c 100644 --- a/etcdserver/api/v3rpc/metrics.go +++ b/etcdserver/api/v3rpc/metrics.go @@ -30,9 +30,19 @@ var ( Name: "client_grpc_received_bytes_total", Help: "The total number of bytes received from grpc clients.", }) + + clientRequests = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "client_requests_total", + Help: "The total number of client requests per client version.", + }, + []string{"type", "client_api_version"}, + ) ) func init() { prometheus.MustRegister(sentBytes) prometheus.MustRegister(receivedBytes) + prometheus.MustRegister(clientRequests) } diff --git a/etcdserver/api/v3rpc/rpctypes/md.go b/etcdserver/api/v3rpc/rpctypes/md.go index 5c590e1aec9..90b8b835b16 100644 --- a/etcdserver/api/v3rpc/rpctypes/md.go +++ b/etcdserver/api/v3rpc/rpctypes/md.go @@ -17,4 +17,6 @@ package rpctypes var ( MetadataRequireLeaderKey = "hasleader" MetadataHasLeader = "true" + + MetadataClientAPIVersionKey = "client-api-version" ) From f9c89209f372160d242d290c3ea2984e766bad2b Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:23:25 -0700 Subject: [PATCH 11/38] version: 3.3.19 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 48e49bde5f1..da27633df52 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.18" + Version = "3.3.19" APIVersion = "unknown" // Git SHA Value will be set during build From 10d50e0662d93e66f44d064f8a246441ef97eab7 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:56:10 -0700 Subject: [PATCH 12/38] words: whitelist "hasleader" Signed-off-by: Gyuho Lee --- .words | 1 + 1 file changed, 1 insertion(+) diff --git a/.words b/.words index f41d120b058..df6c1816bbc 100644 --- a/.words +++ b/.words @@ -25,6 +25,7 @@ healthcheck iff inflight keepalive +hasleader keepalives keyspace linearization From 07562e235c05674ac27c9454500a78fa8f2ad078 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:18:34 -0700 Subject: [PATCH 13/38] Revert "version: 3.3.19" This reverts commit 3f6b978b0496080e8067e0d2d1270134a9a51ef8. --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index da27633df52..48e49bde5f1 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.19" + Version = "3.3.18" APIVersion = "unknown" // Git SHA Value will be set during build From acb9746d660850b5d42ddd44ac1025168e73496a Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:18:46 -0700 Subject: [PATCH 14/38] version: 3.3.19 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 48e49bde5f1..da27633df52 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.18" + Version = "3.3.19" APIVersion = "unknown" // Git SHA Value will be set during build From 508808010c771b967e327e47d68a262f8dcd1eb8 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:21:49 -0700 Subject: [PATCH 15/38] travis.yaml: use Go 1.12.12 Signed-off-by: Gyuho Lee --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b647f3fecf1..d8ab669db99 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ sudo: required services: docker go: -- 1.12.9 +- 1.12.12 env: - GO111MODULE=on @@ -28,7 +28,7 @@ env: matrix: fast_finish: true allow_failures: - - go: 1.12.9 + - go: 1.12.12 env: TARGET=linux-386-unit install: From cd200b49a2ae518877ac4c3ef4f1724e3c867213 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:22:12 -0700 Subject: [PATCH 16/38] Revert "version: 3.3.19" This reverts commit acb9746d660850b5d42ddd44ac1025168e73496a. --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index da27633df52..48e49bde5f1 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.19" + Version = "3.3.18" APIVersion = "unknown" // Git SHA Value will be set during build From a463bd54ae80dfc44aeeb3ce3cff0a722b3ee4ca Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:23:16 -0700 Subject: [PATCH 17/38] words: whitelist "racey" Signed-off-by: Gyuho Lee --- .words | 1 + 1 file changed, 1 insertion(+) diff --git a/.words b/.words index df6c1816bbc..f3d41caa5d0 100644 --- a/.words +++ b/.words @@ -26,6 +26,7 @@ iff inflight keepalive hasleader +racey keepalives keyspace linearization From 67da93f739e33ee9b34792fccba2d0c100264ea4 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:23:32 -0700 Subject: [PATCH 18/38] version: 3.3.19 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 48e49bde5f1..da27633df52 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.18" + Version = "3.3.19" APIVersion = "unknown" // Git SHA Value will be set during build From 89ecd194140fc912176229494dc571b97f1e9ef0 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Tue, 31 Mar 2020 20:31:24 -0700 Subject: [PATCH 19/38] pkg/ioutil: add "FlushN" Signed-off-by: Gyuho Lee --- pkg/ioutil/pagewriter.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pkg/ioutil/pagewriter.go b/pkg/ioutil/pagewriter.go index 72de1593d3a..cf9a8dc664d 100644 --- a/pkg/ioutil/pagewriter.go +++ b/pkg/ioutil/pagewriter.go @@ -95,12 +95,23 @@ func (pw *PageWriter) Write(p []byte) (n int, err error) { return n, werr } +// Flush flushes buffered data. func (pw *PageWriter) Flush() error { + _, err := pw.flush() + return err +} + +// FlushN flushes buffered data and returns the number of written bytes. +func (pw *PageWriter) FlushN() (int, error) { + return pw.flush() +} + +func (pw *PageWriter) flush() (int, error) { if pw.bufferedBytes == 0 { - return nil + return 0, nil } - _, err := pw.w.Write(pw.buf[:pw.bufferedBytes]) + n, err := pw.w.Write(pw.buf[:pw.bufferedBytes]) pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes pw.bufferedBytes = 0 - return err + return n, err } From 1aa5da9121755198497a4de8b904bd51808e1bb8 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 1 Apr 2020 10:43:03 -0700 Subject: [PATCH 20/38] wal: add "etcd_wal_writes_bytes_total" Signed-off-by: Gyuho Lee --- wal/encoder.go | 12 ++++++++---- wal/metrics.go | 7 +++++++ wal/repair.go | 5 +++++ wal/wal.go | 7 +++++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/wal/encoder.go b/wal/encoder.go index e8040b8dff1..e8890d88a9b 100644 --- a/wal/encoder.go +++ b/wal/encoder.go @@ -92,7 +92,8 @@ func (e *encoder) encode(rec *walpb.Record) error { if padBytes != 0 { data = append(data, make([]byte, padBytes)...) } - _, err = e.bw.Write(data) + n, err = e.bw.Write(data) + walWriteBytes.Add(float64(n)) return err } @@ -108,13 +109,16 @@ func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) { func (e *encoder) flush() error { e.mu.Lock() - defer e.mu.Unlock() - return e.bw.Flush() + n, err := e.bw.FlushN() + e.mu.Unlock() + walWriteBytes.Add(float64(n)) + return err } func writeUint64(w io.Writer, n uint64, buf []byte) error { // http://golang.org/src/encoding/binary/binary.go binary.LittleEndian.PutUint64(buf, n) - _, err := w.Write(buf) + nv, err := w.Write(buf) + walWriteBytes.Add(float64(nv)) return err } diff --git a/wal/metrics.go b/wal/metrics.go index 9e089d380f9..7261544165a 100644 --- a/wal/metrics.go +++ b/wal/metrics.go @@ -24,8 +24,15 @@ var ( Help: "The latency distributions of fsync called by wal.", Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) + walWriteBytes = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "wal_write_bytes_total", + Help: "Total number of bytes written in WAL.", + }) ) func init() { prometheus.MustRegister(syncDurations) + prometheus.MustRegister(walWriteBytes) } diff --git a/wal/repair.go b/wal/repair.go index 091036b57b9..f1e507683c3 100644 --- a/wal/repair.go +++ b/wal/repair.go @@ -18,6 +18,7 @@ import ( "io" "os" "path/filepath" + "time" "github.com/coreos/etcd/pkg/fileutil" "github.com/coreos/etcd/wal/walpb" @@ -76,10 +77,14 @@ func Repair(dirpath string) bool { plog.Errorf("could not repair %v, failed to truncate file", f.Name()) return false } + + start := time.Now() if err = fileutil.Fsync(f.File); err != nil { plog.Errorf("could not repair %v, failed to sync file", f.Name()) return false } + syncDurations.Observe(time.Since(start).Seconds()) + return true default: plog.Errorf("could not repair error (%v)", err) diff --git a/wal/wal.go b/wal/wal.go index ef63b52ccbc..6909e3ac7af 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -147,9 +147,13 @@ func Create(dirpath string, metadata []byte) (*WAL, error) { if perr != nil { return nil, perr } + + start := time.Now() if perr = fileutil.Fsync(pdir); perr != nil { return nil, perr } + syncDurations.Observe(time.Since(start).Seconds()) + if perr = pdir.Close(); err != nil { return nil, perr } @@ -548,9 +552,12 @@ func (w *WAL) cut() error { if err = os.Rename(newTail.Name(), fpath); err != nil { return err } + + start := time.Now() if err = fileutil.Fsync(w.dirFile); err != nil { return err } + syncDurations.Observe(time.Since(start).Seconds()) // reopen newTail with its new path so calls to Name() match the wal filename format newTail.Close() From 9fd7e2b8020c54e45023511cbbe4d29e8a3f4171 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 1 Apr 2020 10:46:23 -0700 Subject: [PATCH 21/38] version: 3.3.20 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index da27633df52..5a96c7aa51e 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.19" + Version = "3.3.20" APIVersion = "unknown" // Git SHA Value will be set during build From b7ab0262075f71dd023b78005230b74c93fed233 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Thu, 29 Nov 2018 11:39:48 -0500 Subject: [PATCH 22/38] version: openshift-v4.0 Signed-off-by: Sam Batschelet --- .dockerignore | 1 - Dockerfile.openshift | 20 ++++++++++++++++++++ Dockerfile.rhel | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.openshift create mode 100644 Dockerfile.rhel diff --git a/.dockerignore b/.dockerignore index 6b8710a711f..e69de29bb2d 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +0,0 @@ -.git diff --git a/Dockerfile.openshift b/Dockerfile.openshift new file mode 100644 index 00000000000..1327c0d095c --- /dev/null +++ b/Dockerfile.openshift @@ -0,0 +1,20 @@ +FROM registry.svc.ci.openshift.org/openshift/release:golang-1.10 AS builder + +ENV GOPATH /go + +COPY . $GOPATH/src/go.etcd.io/etcd + +RUN yum install -y git && \ + cd $GOPATH/src/go.etcd.io/etcd && \ + make build + +# stage 2 +FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base + +ENTRYPOINT ["/usr/bin/etcd"] + +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ + +LABEL io.k8s.display-name="etcd server" \ + io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + maintainer="Sam Batschelet " diff --git a/Dockerfile.rhel b/Dockerfile.rhel new file mode 100644 index 00000000000..991967cbf2a --- /dev/null +++ b/Dockerfile.rhel @@ -0,0 +1,20 @@ +FROM openshift/golang-builder:1.10 AS builder + +ENV GOPATH /go + +COPY . $GOPATH/src/go.etcd.io/etcd + +RUN yum install -y make && \ + cd $GOPATH/src/go.etcd.io/etcd && \ + make build + +# stage 2 +FROM openshift/origin-base + +ENTRYPOINT ["/usr/bin/etcd"] + +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ + +LABEL io.k8s.display-name="etcd server" \ + io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + maintainer="Sam Batschelet " From f89bec2a352d3e89661aea5d89ccc230ef5cc259 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Wed, 20 Feb 2019 07:06:51 -0500 Subject: [PATCH 23/38] Dockerfile: add etcdctl Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 1327c0d095c..262e75afe8d 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -13,7 +13,7 @@ FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 991967cbf2a..efe90ffe4b3 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -13,7 +13,7 @@ FROM openshift/origin-base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ From 3ecc3362dfa3d892cdfdca7569cc5b23f9b4411f Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 6 Feb 2019 22:49:01 -0800 Subject: [PATCH 24/38] Dockerfile.*: Fix "etcd is distributed" -> "etcd is a distributed" Correcting a typo from 2f109647 (version: openshift-v4.0, 2018-11-29). --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 262e75afe8d..d30cb6ac082 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -16,5 +16,5 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ - io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ maintainer="Sam Batschelet " diff --git a/Dockerfile.rhel b/Dockerfile.rhel index efe90ffe4b3..2318b99439a 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -16,5 +16,5 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ - io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ maintainer="Sam Batschelet " From 5342b523aaec1b1785658eec0e4ddd81d6a14a53 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Wed, 20 Feb 2019 13:49:09 -0500 Subject: [PATCH 25/38] Dockerfile: resolve issue where binary was not properly copied from build. Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 3 ++- Dockerfile.rhel | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index d30cb6ac082..9dc11e3dd7c 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -13,7 +13,8 @@ FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 2318b99439a..ae4b1468f8d 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -13,7 +13,8 @@ FROM openshift/origin-base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ From 9453114881106d339f923f9246edf649932e420a Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Sun, 5 May 2019 11:21:48 -0400 Subject: [PATCH 26/38] OWNERS: add Signed-off-by: Sam Batschelet --- OWNERS | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 OWNERS diff --git a/OWNERS b/OWNERS new file mode 100644 index 00000000000..572c3af9dfc --- /dev/null +++ b/OWNERS @@ -0,0 +1,8 @@ +approvers: +- hexfusion +reviewers: +- deads2k +- crawford +- hexfusion +- smarterclayton +- wking From b30829755943a60db29af605696b00a99626a317 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Sat, 4 May 2019 11:49:48 -0400 Subject: [PATCH 27/38] Dockerfile: set coreos org as canonical for release-3.3 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 12 +++++------- Dockerfile.rhel | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 9dc11e3dd7c..36e10d51454 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -1,20 +1,18 @@ FROM registry.svc.ci.openshift.org/openshift/release:golang-1.10 AS builder -ENV GOPATH /go +WORKDIR /go/src/github.com/coreos/etcd -COPY . $GOPATH/src/go.etcd.io/etcd +COPY . . -RUN yum install -y git && \ - cd $GOPATH/src/go.etcd.io/etcd && \ - make build +RUN make build # stage 2 FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index ae4b1468f8d..af3d9df97f8 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -1,20 +1,18 @@ FROM openshift/golang-builder:1.10 AS builder -ENV GOPATH /go +WORKDIR /go/src/github.com/coreos/etcd -COPY . $GOPATH/src/go.etcd.io/etcd +COPY . . -RUN yum install -y make && \ - cd $GOPATH/src/go.etcd.io/etcd && \ - make build +RUN make build # stage 2 FROM openshift/origin-base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ From 6ae7d0c57fa55bac504d1c3af073dacb50211a10 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Tue, 9 Jul 2019 14:39:00 -0400 Subject: [PATCH 28/38] Dockerfile: bump golang to 1.11 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 36e10d51454..c1eea2ca8c0 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -1,4 +1,4 @@ -FROM registry.svc.ci.openshift.org/openshift/release:golang-1.10 AS builder +FROM registry.svc.ci.openshift.org/openshift/release:golang-1.11 AS builder WORKDIR /go/src/github.com/coreos/etcd diff --git a/Dockerfile.rhel b/Dockerfile.rhel index af3d9df97f8..3ab2cdb801f 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -1,4 +1,4 @@ -FROM openshift/golang-builder:1.10 AS builder +FROM openshift/golang-builder:1.11 AS builder WORKDIR /go/src/github.com/coreos/etcd From d32f7a7a77e9347c0a2a3a3144cec487f7a2cbca Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Thu, 29 Aug 2019 17:08:06 -0400 Subject: [PATCH 29/38] Dockerfile: use build instead of make build make build performs a sanity test on the binary image which causes problems for unsupport arch. Because we run full CI tests against the image this check is not nessisary and will allow images to be build regardless of arch. Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index c1eea2ca8c0..f18d76fbcef 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -4,7 +4,7 @@ WORKDIR /go/src/github.com/coreos/etcd COPY . . -RUN make build +RUN ./build # stage 2 FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 3ab2cdb801f..bdd1929e617 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -4,7 +4,7 @@ WORKDIR /go/src/github.com/coreos/etcd COPY . . -RUN make build +RUN ./build # stage 2 FROM openshift/origin-base From dfb6d692d7188ec916d9640c15ba79576d334fb0 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Mon, 23 Sep 2019 08:38:21 -0400 Subject: [PATCH 30/38] Dockerfile: bump golang 1.12 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index f18d76fbcef..6aa8493e07c 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -1,4 +1,4 @@ -FROM registry.svc.ci.openshift.org/openshift/release:golang-1.11 AS builder +FROM registry.svc.ci.openshift.org/openshift/release:golang-1.12 AS builder WORKDIR /go/src/github.com/coreos/etcd diff --git a/Dockerfile.rhel b/Dockerfile.rhel index bdd1929e617..281214e255c 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -1,4 +1,4 @@ -FROM openshift/golang-builder:1.11 AS builder +FROM openshift/golang-builder:1.12 AS builder WORKDIR /go/src/github.com/coreos/etcd From addfb278b40198fc50c2b2931e6f9bd72ab88c9d Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 20 Feb 2020 14:26:47 -0500 Subject: [PATCH 31/38] add stub discovery-etcd-initial-cluster command --- .../discover-etcd-initial-cluster/main.go | 35 +++++++ .../initial-cluster.go | 92 +++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 openshift-tools/discover-etcd-initial-cluster/main.go create mode 100644 openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go diff --git a/openshift-tools/discover-etcd-initial-cluster/main.go b/openshift-tools/discover-etcd-initial-cluster/main.go new file mode 100644 index 00000000000..de62e32a9c3 --- /dev/null +++ b/openshift-tools/discover-etcd-initial-cluster/main.go @@ -0,0 +1,35 @@ +package main + +import ( + goflag "flag" + "fmt" + "math/rand" + "os" + "strings" + "time" + + discover_etcd_initial_cluster "github.com/coreos/etcd/openshift-tools/pkg/discover-etcd-initial-cluster" + "github.com/spf13/pflag" +) + +// copy from `utilflag "k8s.io/component-base/cli/flag"` +// WordSepNormalizeFunc changes all flags that contain "_" separators +func WordSepNormalizeFunc(f *pflag.FlagSet, name string) pflag.NormalizedName { + if strings.Contains(name, "_") { + return pflag.NormalizedName(strings.Replace(name, "_", "-", -1)) + } + return pflag.NormalizedName(name) +} + +func main() { + rand.Seed(time.Now().UTC().UnixNano()) + + pflag.CommandLine.SetNormalizeFunc(WordSepNormalizeFunc) + pflag.CommandLine.AddGoFlagSet(goflag.CommandLine) + + command := discover_etcd_initial_cluster.NewDiscoverEtcdInitialClusterCommand() + if err := command.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } +} diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go new file mode 100644 index 00000000000..0e9a8166f31 --- /dev/null +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -0,0 +1,92 @@ +package discover_etcd_initial_cluster + +import ( + "fmt" + "os" + "time" + + "github.com/spf13/pflag" + + "github.com/spf13/cobra" +) + +type DiscoverEtcdInitialClusterOptions struct { + // TargetPeerURLHost is the host portion of the peer URL. It is used to match on. (either IP or hostname) + TargetPeerURLHost string + + // CABundleFile is the file to use to trust the etcd server + CABundleFile string + // ClientCertFile is the client cert to use to authenticate this binary to etcd + ClientCertFile string + // ClientKeyFile is the client key to use to authenticate this binary to etcd + ClientKeyFile string + // Endpoints is a list of all the endpoints to use to try to contact etcd + Endpoints string + + // Revision is the revision value for the static pod + Revision string + // PreviousEtcdInitialClusterDir is the directory to store the previous etcd initial cluster value + PreviousEtcdInitialClusterDir string + + // TotalTimeToWait is the total time to wait before reporting failure and dumping logs + TotalTimeToWait time.Duration + // TimeToWaitBeforeUsingPreviousValue is the time to wait before checking to see if we have a previous value. + TimeToWaitBeforeUsingPreviousValue time.Duration +} + +func NewDiscoverEtcdInitialCluster() *DiscoverEtcdInitialClusterOptions { + return &DiscoverEtcdInitialClusterOptions{ + TotalTimeToWait: 30 * time.Second, + TimeToWaitBeforeUsingPreviousValue: 10 * time.Second, + } +} + +func NewDiscoverEtcdInitialClusterCommand() *cobra.Command { + o := NewDiscoverEtcdInitialCluster() + + cmd := &cobra.Command{ + Use: "discover-etcd-initial-cluster", + Short: "output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod", + Long: `output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod + +1. It tries to contact every available etcd to get a list of member. +2. Check each member to see if any one of them is the target. +3. If so, and if it is started, use the member list to create the ETCD_INITIAL_CLUSTER value and print it out. +4. If so, and if it it not started, use the existing member list and append the target value to create the ETCD_INITIAL_CLUSTER value and print it out. +5. If not, try again until either you have it or you have to check a cache. +6. If you have to check a cache and it is present, return +7. If the cache is not present, keep trying to contact etcd until total timeout is met. +`, + Run: func(cmd *cobra.Command, args []string) { + if err := o.Validate(); err != nil { + fmt.Fprint(os.Stderr, err) + os.Exit(1) + } + + if err := o.Run(); err != nil { + fmt.Fprint(os.Stderr, err) + os.Exit(1) + } + }, + } + o.BindFlags(cmd.Flags()) + + return cmd +} + +func (o *DiscoverEtcdInitialClusterOptions) BindFlags(flags *pflag.FlagSet) { + flags.StringVar(&o.CABundleFile, "cacert", o.CABundleFile, "file to use to verify the identity of the etcd server") + flags.StringVar(&o.ClientCertFile, "cert", o.ClientCertFile, "client cert to use to authenticate this binary to etcd") + flags.StringVar(&o.ClientKeyFile, "key", o.ClientKeyFile, "client key to use to authenticate this binary to etcd") + flags.StringVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") + flags.StringVar(&o.Revision, "revision", o.Revision, "revision value for the static pod") + flags.StringVar(&o.PreviousEtcdInitialClusterDir, "memory-dir", o.PreviousEtcdInitialClusterDir, "directory to store the previous etcd initial cluster value") +} + +func (o *DiscoverEtcdInitialClusterOptions) Validate() error { + return nil +} + +func (o *DiscoverEtcdInitialClusterOptions) Run() error { + return nil +} From 7b2c515afba9d700b802f703586d6f2849c01758 Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 20 Feb 2020 14:32:23 -0500 Subject: [PATCH 32/38] build openshift tools with etcd --- Dockerfile.openshift | 1 + Dockerfile.rhel | 1 + build | 15 +++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 6aa8493e07c..78303e525d3 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -13,6 +13,7 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/discover-etcd-initial-cluster /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 281214e255c..bd353e1a3f8 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -13,6 +13,7 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/discover-etcd-initial-cluster /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/build b/build index 5cf4d7c6426..e0787aaa8e2 100755 --- a/build +++ b/build @@ -64,6 +64,20 @@ etcd_build() { -o "${out}/etcdctl" ${REPO_PATH}/etcdctl || return } + +openshift_tools_build() { + out="bin" + if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi + toggle_failpoints_default + + # Static compilation is useful when etcd is run in a container. $GO_BUILD_FLAGS is OK + # shellcheck disable=SC2086 + CGO_ENABLED=0 go build $GO_BUILD_FLAGS \ + -installsuffix cgo \ + -ldflags "$GO_LDFLAGS" \ + -o "${out}/discover-etcd-initial-cluster" "github.com/coreos/etcd/openshift-tools/discover-etcd-initial-cluster" || return +} + tools_build() { out="bin" if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi @@ -91,4 +105,5 @@ fi # only build when called directly, not sourced if echo "$0" | grep "build$" >/dev/null; then etcd_build + openshift_tools_build fi From e5906d9f26e566bd260c443b3b34330b5b20652a Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 20 Feb 2020 17:04:39 -0500 Subject: [PATCH 33/38] codify the initial cluster check as golang code --- .../initial-cluster.go | 182 +++++++++++++++--- 1 file changed, 158 insertions(+), 24 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index 0e9a8166f31..736a14a3338 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -1,10 +1,19 @@ package discover_etcd_initial_cluster import ( + "context" "fmt" "os" + "strings" "time" + "github.com/coreos/etcd/etcdserver/etcdserverpb" + + "github.com/coreos/etcd/pkg/transport" + "google.golang.org/grpc" + + "github.com/coreos/etcd/clientv3" + "github.com/spf13/pflag" "github.com/spf13/cobra" @@ -13,6 +22,8 @@ import ( type DiscoverEtcdInitialClusterOptions struct { // TargetPeerURLHost is the host portion of the peer URL. It is used to match on. (either IP or hostname) TargetPeerURLHost string + // TargetName is the name to assign to this peer if we create it + TargetName string // CABundleFile is the file to use to trust the etcd server CABundleFile string @@ -21,24 +32,14 @@ type DiscoverEtcdInitialClusterOptions struct { // ClientKeyFile is the client key to use to authenticate this binary to etcd ClientKeyFile string // Endpoints is a list of all the endpoints to use to try to contact etcd - Endpoints string + Endpoints []string - // Revision is the revision value for the static pod - Revision string - // PreviousEtcdInitialClusterDir is the directory to store the previous etcd initial cluster value - PreviousEtcdInitialClusterDir string - - // TotalTimeToWait is the total time to wait before reporting failure and dumping logs - TotalTimeToWait time.Duration - // TimeToWaitBeforeUsingPreviousValue is the time to wait before checking to see if we have a previous value. - TimeToWaitBeforeUsingPreviousValue time.Duration + // MemberDir is the directory created when etcd starts the first time + MemberDir string } func NewDiscoverEtcdInitialCluster() *DiscoverEtcdInitialClusterOptions { - return &DiscoverEtcdInitialClusterOptions{ - TotalTimeToWait: 30 * time.Second, - TimeToWaitBeforeUsingPreviousValue: 10 * time.Second, - } + return &DiscoverEtcdInitialClusterOptions{} } func NewDiscoverEtcdInitialClusterCommand() *cobra.Command { @@ -49,13 +50,12 @@ func NewDiscoverEtcdInitialClusterCommand() *cobra.Command { Short: "output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod", Long: `output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod -1. It tries to contact every available etcd to get a list of member. -2. Check each member to see if any one of them is the target. -3. If so, and if it is started, use the member list to create the ETCD_INITIAL_CLUSTER value and print it out. -4. If so, and if it it not started, use the existing member list and append the target value to create the ETCD_INITIAL_CLUSTER value and print it out. -5. If not, try again until either you have it or you have to check a cache. -6. If you have to check a cache and it is present, return -7. If the cache is not present, keep trying to contact etcd until total timeout is met. +1. If --data-dir exists, output a marker value and exit. +2. It tries to contact every available etcd to get a list of member. +3. Check each member to see if any one of them is the target. +4. If so, and if it is started, use the member list to create the ETCD_INITIAL_CLUSTER value and print it out. +5. If so, and if it it not started, use the existing member list and append the target value to create the ETCD_INITIAL_CLUSTER value and print it out. +6. If not, try again until either you have it or you time out. `, Run: func(cmd *cobra.Command, args []string) { if err := o.Validate(); err != nil { @@ -78,15 +78,149 @@ func (o *DiscoverEtcdInitialClusterOptions) BindFlags(flags *pflag.FlagSet) { flags.StringVar(&o.CABundleFile, "cacert", o.CABundleFile, "file to use to verify the identity of the etcd server") flags.StringVar(&o.ClientCertFile, "cert", o.ClientCertFile, "client cert to use to authenticate this binary to etcd") flags.StringVar(&o.ClientKeyFile, "key", o.ClientKeyFile, "client key to use to authenticate this binary to etcd") - flags.StringVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") - flags.StringVar(&o.Revision, "revision", o.Revision, "revision value for the static pod") - flags.StringVar(&o.PreviousEtcdInitialClusterDir, "memory-dir", o.PreviousEtcdInitialClusterDir, "directory to store the previous etcd initial cluster value") + flags.StringSliceVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") + flags.StringVar(&o.MemberDir, "data-dir", o.MemberDir, "file to stat for existence of /var/lib/etcd/member") + flags.StringVar(&o.TargetPeerURLHost, "target-peer-url-host", o.TargetPeerURLHost, "host portion of the peer URL. It is used to match on. (either IP or hostname)") + flags.StringVar(&o.TargetName, "target-name", o.TargetName, "name to assign to this peer if we create it") } func (o *DiscoverEtcdInitialClusterOptions) Validate() error { + if len(o.CABundleFile) == 0 { + return fmt.Errorf("missing --cacert") + } + if len(o.ClientCertFile) == 0 { + return fmt.Errorf("missing --cert") + } + if len(o.ClientKeyFile) == 0 { + return fmt.Errorf("missing --key") + } + if len(o.Endpoints) == 0 { + return fmt.Errorf("missing --endpoints") + } + if len(o.MemberDir) == 0 { + return fmt.Errorf("missing --data-dir") + } + if len(o.TargetPeerURLHost) == 0 { + return fmt.Errorf("missing --target-peer-url-host") + } + if len(o.TargetName) == 0 { + return fmt.Errorf("missing --target-name") + } return nil } func (o *DiscoverEtcdInitialClusterOptions) Run() error { + _, err := os.Stat(o.MemberDir) + switch { + case os.IsNotExist(err): + // do nothing. This just means we fall through to the polling logic + + case err == nil: + fmt.Printf(o.TargetName) + return nil + + case err != nil: + return err + } + + client, err := o.getClient() + if err != nil { + return err + } + defer client.Close() + + var targetMember *etcdserverpb.Member + var allMembers []*etcdserverpb.Member + for i := 0; i < 10; i++ { + fmt.Fprintf(os.Stderr, "#### attempt %d\n", i) + targetMember, allMembers, err = o.checkForTarget(client) + + for _, member := range allMembers { + fmt.Fprintf(os.Stderr, " member=%v\n", stringifyMember(member)) + } + fmt.Fprintf(os.Stderr, " target=%v, err=%v\n", stringifyMember(targetMember), err) + + // we're done because we found what we want. + if targetMember != nil && err == nil { + break + } + + fmt.Fprintf(os.Stderr, "#### sleeping...\n") + time.Sleep(1 * time.Second) + } + if err != nil { + return err + } + if targetMember == nil { + return fmt.Errorf("timed out") + } + + etcdInitialClusterEntries := []string{} + for _, member := range allMembers { + if len(member.Name) == 0 { // this is the signal for whether or not a given peer is started + continue + } + etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, member.PeerURLs[0])) + } + if len(targetMember.Name) == 0 { + etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0])) + } + fmt.Printf(strings.Join(etcdInitialClusterEntries, ",")) + return nil } + +func stringifyMember(member *etcdserverpb.Member) string { + if member == nil { + return "nil" + } + + return fmt.Sprintf("{name=%q, peerURLs=[%s}, clientURLs=[%s]", member.Name, strings.Join(member.PeerURLs, ","), strings.Join(member.ClientURLs, ",")) +} + +func (o *DiscoverEtcdInitialClusterOptions) checkForTarget(client *clientv3.Client) (*etcdserverpb.Member, []*etcdserverpb.Member, error) { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + memberResponse, err := client.MemberList(ctx) + if err != nil { + return nil, nil, err + } + + var targetMember *etcdserverpb.Member + for i := range memberResponse.Members { + member := memberResponse.Members[i] + for _, peerURL := range member.PeerURLs { + if strings.Contains(peerURL, o.TargetPeerURLHost) { + targetMember = member + } + } + } + + return targetMember, memberResponse.Members, err +} + +func (o *DiscoverEtcdInitialClusterOptions) getClient() (*clientv3.Client, error) { + dialOptions := []grpc.DialOption{ + grpc.WithBlock(), // block until the underlying connection is up + } + + tlsInfo := transport.TLSInfo{ + CertFile: o.ClientCertFile, + KeyFile: o.ClientKeyFile, + TrustedCAFile: o.CABundleFile, + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return nil, err + } + + cfg := &clientv3.Config{ + DialOptions: dialOptions, + Endpoints: o.Endpoints, + DialTimeout: 15 * time.Second, + TLS: tlsConfig, + } + + return clientv3.New(*cfg) +} From 84baef303ad0e69ab961b8491f5b3ed985c064fb Mon Sep 17 00:00:00 2001 From: retroflexer Date: Sun, 23 Feb 2020 10:05:38 -0500 Subject: [PATCH 34/38] Archive data-dir if target member is unstarted --- .../initial-cluster.go | 62 ++++++++++++++++--- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index 736a14a3338..d061f433965 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "path/filepath" "strings" "time" @@ -34,8 +35,8 @@ type DiscoverEtcdInitialClusterOptions struct { // Endpoints is a list of all the endpoints to use to try to contact etcd Endpoints []string - // MemberDir is the directory created when etcd starts the first time - MemberDir string + // DataDir is the directory created when etcd starts the first time + DataDir string } func NewDiscoverEtcdInitialCluster() *DiscoverEtcdInitialClusterOptions { @@ -79,7 +80,7 @@ func (o *DiscoverEtcdInitialClusterOptions) BindFlags(flags *pflag.FlagSet) { flags.StringVar(&o.ClientCertFile, "cert", o.ClientCertFile, "client cert to use to authenticate this binary to etcd") flags.StringVar(&o.ClientKeyFile, "key", o.ClientKeyFile, "client key to use to authenticate this binary to etcd") flags.StringSliceVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") - flags.StringVar(&o.MemberDir, "data-dir", o.MemberDir, "file to stat for existence of /var/lib/etcd/member") + flags.StringVar(&o.DataDir, "data-dir", o.DataDir, "dir to stat for existence of the member directory") flags.StringVar(&o.TargetPeerURLHost, "target-peer-url-host", o.TargetPeerURLHost, "host portion of the peer URL. It is used to match on. (either IP or hostname)") flags.StringVar(&o.TargetName, "target-name", o.TargetName, "name to assign to this peer if we create it") } @@ -97,7 +98,7 @@ func (o *DiscoverEtcdInitialClusterOptions) Validate() error { if len(o.Endpoints) == 0 { return fmt.Errorf("missing --endpoints") } - if len(o.MemberDir) == 0 { + if len(o.DataDir) == 0 { return fmt.Errorf("missing --data-dir") } if len(o.TargetPeerURLHost) == 0 { @@ -110,14 +111,25 @@ func (o *DiscoverEtcdInitialClusterOptions) Validate() error { } func (o *DiscoverEtcdInitialClusterOptions) Run() error { - _, err := os.Stat(o.MemberDir) + + //Temporary hack to work with the current pod.yaml + var memberDir string + if strings.HasSuffix(o.DataDir, "member") { + memberDir = o.DataDir + o.DataDir = filepath.Dir(o.DataDir) + } else { + memberDir = filepath.Join(o.DataDir, "member") + } + + memberDirExists := false + _, err := os.Stat(memberDir) switch { case os.IsNotExist(err): // do nothing. This just means we fall through to the polling logic case err == nil: - fmt.Printf(o.TargetName) - return nil + fmt.Fprintf(os.Stderr, "memberDir %s is present on %s\n", memberDir, o.TargetName) + memberDirExists = true case err != nil: return err @@ -148,11 +160,26 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { fmt.Fprintf(os.Stderr, "#### sleeping...\n") time.Sleep(1 * time.Second) } - if err != nil { + + switch { + case err != nil: return err - } - if targetMember == nil { + + case targetMember == nil && memberDirExists: + // we weren't able to locate other members and need to return based previous memberDir so we can restart. This is the off and on again flow. + fmt.Printf(o.TargetName) + return nil + + case targetMember == nil && !memberDirExists: + // our member has not been added to the cluster and we have no previous data to start based on. return fmt.Errorf("timed out") + + case targetMember != nil && len(targetMember.Name) == 0 && memberDirExists: + // our member has been added to the cluster and has never been started before, but a data directory exists. This means that we have dirty data we must remove + archiveDataDir(memberDir) + + default: + // a target member was found, but no exception circumstances. } etcdInitialClusterEntries := []string{} @@ -165,11 +192,26 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { if len(targetMember.Name) == 0 { etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0])) } + fmt.Printf(strings.Join(etcdInitialClusterEntries, ",")) return nil } +// TO DO: instead of archiving, we should remove the directory to avoid any confusion with the backups. +func archiveDataDir(sourceDir string) error { + targetDir := filepath.Join(sourceDir+"-removed-archive", time.Now().Format(time.RFC3339)) + + // If dir already exists, add seconds to the dir name + if _, err := os.Stat(targetDir); err == nil { + targetDir = filepath.Join(sourceDir+"-removed-archive", time.Now().Add(time.Second).Format(time.RFC3339)) + } + if err := os.Rename(sourceDir, targetDir); err != nil && !os.IsNotExist(err) { + return err + } + return nil +} + func stringifyMember(member *etcdserverpb.Member) string { if member == nil { return "nil" From 3737eddd33f31db3a8797304061f42482ac1d660 Mon Sep 17 00:00:00 2001 From: Alay Patel Date: Thu, 27 Feb 2020 11:03:10 -0500 Subject: [PATCH 35/38] fix removed member name, unmask error --- .../pkg/discover-etcd-initial-cluster/initial-cluster.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index d061f433965..3b8f49048e1 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -200,13 +200,10 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { // TO DO: instead of archiving, we should remove the directory to avoid any confusion with the backups. func archiveDataDir(sourceDir string) error { - targetDir := filepath.Join(sourceDir+"-removed-archive", time.Now().Format(time.RFC3339)) + targetDir := filepath.Join(sourceDir + "-removed-archive-" + time.Now().Format("2006-01-02-030405")) - // If dir already exists, add seconds to the dir name - if _, err := os.Stat(targetDir); err == nil { - targetDir = filepath.Join(sourceDir+"-removed-archive", time.Now().Add(time.Second).Format(time.RFC3339)) - } - if err := os.Rename(sourceDir, targetDir); err != nil && !os.IsNotExist(err) { + fmt.Fprintf(os.Stderr, "attempting to archive %s to %s", sourceDir, targetDir) + if err := os.Rename(sourceDir, targetDir); err != nil { return err } return nil From f039fce0fbdf7952f3b340854edd5f9c34f0b5e1 Mon Sep 17 00:00:00 2001 From: retroflexer Date: Fri, 28 Feb 2020 10:31:04 -0500 Subject: [PATCH 36/38] If we weren't able to get client or get target member but memberDir exists, go ahead and start. --- .../initial-cluster.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index 3b8f49048e1..ac8c9e62b61 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -136,7 +136,12 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { } client, err := o.getClient() - if err != nil { + if err != nil && memberDirExists { + // we weren't able to get client and need to return based previous memberDir so we can restart. This is the off and on again flow. + fmt.Fprintf(os.Stderr, "Couldn't get client, but memberDir %s is present on %s, err=%s. Returning with no error.\n", memberDir, o.TargetName, err) + fmt.Printf(o.TargetName) + return nil + } else if err != nil { return err } defer client.Close() @@ -162,20 +167,23 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { } switch { - case err != nil: - return err - case targetMember == nil && memberDirExists: - // we weren't able to locate other members and need to return based previous memberDir so we can restart. This is the off and on again flow. + // we weren't able to locate other members and need to return based previous memberDir so we can restart. This is again the off and on flow. + fmt.Fprintf(os.Stderr, "Couldn't get targetMember, but memberDir %s is present on %s. Returning with no error.\n", memberDir, o.TargetName) fmt.Printf(o.TargetName) return nil + case err != nil: + fmt.Fprintf(os.Stderr, "Couldn't get targetMember. Returning error.\n") + return err + case targetMember == nil && !memberDirExists: // our member has not been added to the cluster and we have no previous data to start based on. return fmt.Errorf("timed out") case targetMember != nil && len(targetMember.Name) == 0 && memberDirExists: // our member has been added to the cluster and has never been started before, but a data directory exists. This means that we have dirty data we must remove + fmt.Fprintf(os.Stderr, "Found targetMember but is unstarted and memberDir exists. Archiving memberrDir\n") archiveDataDir(memberDir) default: @@ -190,6 +198,7 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, member.PeerURLs[0])) } if len(targetMember.Name) == 0 { + fmt.Fprintf(os.Stderr, "Adding the unstarted member to the end %s\n", o.TargetName) etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0])) } From 5ddce2d6171c5143f1ac1d05d6cc700e721b7794 Mon Sep 17 00:00:00 2001 From: David Eads Date: Wed, 4 Mar 2020 19:28:11 -0500 Subject: [PATCH 37/38] list all peers in initial-cluster --- .../pkg/discover-etcd-initial-cluster/initial-cluster.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index ac8c9e62b61..cbcb5de5f70 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -195,7 +195,9 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { if len(member.Name) == 0 { // this is the signal for whether or not a given peer is started continue } - etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, member.PeerURLs[0])) + for _, peerURL := range member.PeerURLs{ + etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, peerURL)) + } } if len(targetMember.Name) == 0 { fmt.Fprintf(os.Stderr, "Adding the unstarted member to the end %s\n", o.TargetName) From 151bb8434745e99e5d1cf2a4f0404cb5a2fbe27b Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Fri, 17 Apr 2020 10:42:17 -0400 Subject: [PATCH 38/38] vendor: bump gRPC-go to v1.23.1 CARRY: This bump resolves and issue where max frame size (16kb) was not defined which has security implications. Signed-off-by: Sam Batschelet --- glide.lock | 6 +++--- glide.yaml | 2 +- .../grpc/internal/transport/http2_server.go | 5 ++++- .../google.golang.org/grpc/internal/transport/http_util.go | 1 + vendor/google.golang.org/grpc/version.go | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/glide.lock b/glide.lock index 5fcf059ca41..31c787a2da2 100644 --- a/glide.lock +++ b/glide.lock @@ -1,5 +1,5 @@ -hash: 3fece95d13153dbb91034bd2708f8f7a39510c682dfda4c3ff691f09a61a2fc1 -updated: 2019-08-19T20:45:29.93927922-07:00 +hash: fb475a41a3ff034787f702d39abce833083d7fc3c37aa80e2bf35f82655c0a31 +updated: 2020-04-17T10:25:50.74702498-04:00 imports: - name: github.com/beorn7/perks version: 37c8de3658fcb183f997c4e13e8337516ab753e6 @@ -184,7 +184,7 @@ imports: - googleapis/api/annotations - googleapis/rpc/status - name: google.golang.org/grpc - version: 6eaf6f47437a6b4e2153a190160ef39a92c7eceb + version: 39e8a7b072a67ca2a75f57fa2e0d50995f5b22f6 subpackages: - balancer - balancer/base diff --git a/glide.yaml b/glide.yaml index 00b10851a1e..f9cbb2348db 100644 --- a/glide.yaml +++ b/glide.yaml @@ -122,7 +122,7 @@ import: subpackages: - rate - package: google.golang.org/grpc - version: v1.23.0 + version: v1.23.1 subpackages: - balancer - codes diff --git a/vendor/google.golang.org/grpc/internal/transport/http2_server.go b/vendor/google.golang.org/grpc/internal/transport/http2_server.go index 83439b5627d..4e26f6a1d6b 100644 --- a/vendor/google.golang.org/grpc/internal/transport/http2_server.go +++ b/vendor/google.golang.org/grpc/internal/transport/http2_server.go @@ -138,7 +138,10 @@ func newHTTP2Server(conn net.Conn, config *ServerConfig) (_ ServerTransport, err } framer := newFramer(conn, writeBufSize, readBufSize, maxHeaderListSize) // Send initial settings as connection preface to client. - var isettings []http2.Setting + isettings := []http2.Setting{{ + ID: http2.SettingMaxFrameSize, + Val: http2MaxFrameLen, + }} // TODO(zhaoq): Have a better way to signal "no limit" because 0 is // permitted in the HTTP2 spec. maxStreams := config.MaxStreams diff --git a/vendor/google.golang.org/grpc/internal/transport/http_util.go b/vendor/google.golang.org/grpc/internal/transport/http_util.go index 9d212867ce2..8f5f3349d90 100644 --- a/vendor/google.golang.org/grpc/internal/transport/http_util.go +++ b/vendor/google.golang.org/grpc/internal/transport/http_util.go @@ -667,6 +667,7 @@ func newFramer(conn net.Conn, writeBufferSize, readBufferSize int, maxHeaderList writer: w, fr: http2.NewFramer(w, r), } + f.fr.SetMaxReadFrameSize(http2MaxFrameLen) // Opt-in to Frame reuse API on framer to reduce garbage. // Frames aren't safe to read from after a subsequent call to ReadFrame. f.fr.SetReuseFrames() diff --git a/vendor/google.golang.org/grpc/version.go b/vendor/google.golang.org/grpc/version.go index 5411a73a22e..58885056385 100644 --- a/vendor/google.golang.org/grpc/version.go +++ b/vendor/google.golang.org/grpc/version.go @@ -19,4 +19,4 @@ package grpc // Version is the current grpc version. -const Version = "1.23.0" +const Version = "1.23.1"