From 95095f8406ce1bfc938237babf13a7ac36f73c6c Mon Sep 17 00:00:00 2001 From: Maxim Vladimirskiy Date: Fri, 26 Oct 2018 15:06:15 +0300 Subject: [PATCH 01/56] etcdserver: Remove infinite loop in doSerialize Once chk(ai) fails with auth.ErrAuthOldRevision it will always do, regardless how many times you retry. So the error is better be returned to fail the pending request and make the client re-authenticate. --- etcdserver/v3_server.go | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/etcdserver/v3_server.go b/etcdserver/v3_server.go index 9d429e32951..bf4adf18888 100644 --- a/etcdserver/v3_server.go +++ b/etcdserver/v3_server.go @@ -523,29 +523,25 @@ func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) // doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure. func (s *EtcdServer) doSerialize(ctx context.Context, chk func(*auth.AuthInfo) error, get func()) error { - for { - ai, err := s.AuthInfoFromCtx(ctx) - if err != nil { - return err - } - if ai == nil { - // chk expects non-nil AuthInfo; use empty credentials - ai = &auth.AuthInfo{} - } - if err = chk(ai); err != nil { - if err == auth.ErrAuthOldRevision { - continue - } - return err - } - // fetch response for serialized request - get() - // empty credentials or current auth info means no need to retry - if ai.Revision == 0 || ai.Revision == s.authStore.Revision() { - return nil - } - // avoid TOCTOU error, retry of the request is required. + ai, err := s.AuthInfoFromCtx(ctx) + if err != nil { + return err + } + if ai == nil { + // chk expects non-nil AuthInfo; use empty credentials + ai = &auth.AuthInfo{} + } + if err = chk(ai); err != nil { + return err + } + // fetch response for serialized request + get() + // check for stale token revision in case the auth store was updated while + // the request has been handled. + if ai.Revision != 0 && ai.Revision != s.authStore.Revision() { + return auth.ErrAuthOldRevision } + return nil } func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) { From 5a4821721eb7d87f42b5d8be4a379111469c3f5e Mon Sep 17 00:00:00 2001 From: Jingyi Hu Date: Tue, 12 Feb 2019 15:40:39 -0800 Subject: [PATCH 02/56] etcdserver: remove auth validation loop Remove auth validation loop in v3_server.raftRequest(). Re-validation when error ErrAuthOldRevision occurs should be handled on client side. --- etcdserver/v3_server.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/etcdserver/v3_server.go b/etcdserver/v3_server.go index bf4adf18888..76ca8dee035 100644 --- a/etcdserver/v3_server.go +++ b/etcdserver/v3_server.go @@ -513,12 +513,7 @@ func (s *EtcdServer) raftRequestOnce(ctx context.Context, r pb.InternalRaftReque } func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) { - for { - resp, err := s.raftRequestOnce(ctx, r) - if err != auth.ErrAuthOldRevision { - return resp, err - } - } + return s.raftRequestOnce(ctx, r) } // doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure. From e1508f94b6a50eda0ea2cec0f5a520613b300c11 Mon Sep 17 00:00:00 2001 From: Jingyi Hu Date: Fri, 25 Oct 2019 18:27:40 -0700 Subject: [PATCH 03/56] integration: disable TestV3AuthOldRevConcurrent Disable TestV3AuthOldRevConcurrent for now. See https://github.com/etcd-io/etcd/pull/10468#issuecomment-463253361 --- integration/v3_auth_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/integration/v3_auth_test.go b/integration/v3_auth_test.go index 97017a07fae..ab6825892aa 100644 --- a/integration/v3_auth_test.go +++ b/integration/v3_auth_test.go @@ -347,6 +347,7 @@ func TestV3AuthNonAuthorizedRPCs(t *testing.T) { } func TestV3AuthOldRevConcurrent(t *testing.T) { + t.Skip() // TODO(jingyih): re-enable the test when #10408 is fixed. defer testutil.AfterTest(t) clus := NewClusterV3(t, &ClusterConfig{Size: 1}) defer clus.Terminate(t) From b3d9e290961a5e353aadfb24e46423ace6bf71fe Mon Sep 17 00:00:00 2001 From: Joe Betz Date: Wed, 12 Feb 2020 10:12:48 -0800 Subject: [PATCH 04/56] mvcc/backend: Delete orphaned db.tmp files before defrag --- mvcc/backend/backend.go | 21 ++++++++++++++++++--- snap/snapshotter.go | 17 +++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 55dc3fce8fd..33997b02207 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -310,21 +310,35 @@ func (b *backend) defrag() error { b.batchTx.unsafeCommit(true) b.batchTx.tx = nil - tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions) + // Create a temporary file to ensure we start with a clean slate. + // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. + dir := filepath.Dir(b.db.Path()) + temp, err := ioutil.TempFile(dir, "db.tmp.*") + if err != nil { + return err + } + options := *boltOpenOptions + options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { + return temp, nil + } + tdbp := temp.Name() + tmpdb, err := bolt.Open(tdbp, 0600, &options) if err != nil { return err } + // gofail: var defragBeforeCopy struct{} err = defragdb(b.db, tmpdb, defragLimit) if err != nil { tmpdb.Close() - os.RemoveAll(tmpdb.Path()) + if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { + plog.Fatalf("failed to remove db.tmp after defragmentation completed: %v", rmErr) + } return err } dbp := b.db.Path() - tdbp := tmpdb.Path() err = b.db.Close() if err != nil { @@ -334,6 +348,7 @@ func (b *backend) defrag() error { if err != nil { plog.Fatalf("cannot close database (%s)", err) } + // gofail: var defragBeforeRename struct{} err = os.Rename(tdbp, dbp) if err != nil { plog.Fatalf("cannot rename database (%s)", err) diff --git a/snap/snapshotter.go b/snap/snapshotter.go index 00755592129..5d79b175915 100644 --- a/snap/snapshotter.go +++ b/snap/snapshotter.go @@ -172,6 +172,9 @@ func (s *Snapshotter) snapNames() ([]string, error) { if err != nil { return nil, err } + if err = s.cleanupSnapdir(names); err != nil { + return nil, err + } snaps := checkSuffix(names) if len(snaps) == 0 { return nil, ErrNoSnapshot @@ -202,3 +205,17 @@ func renameBroken(path string) { plog.Warningf("cannot rename broken snapshot file %v to %v: %v", path, brokenPath, err) } } + +// cleanupSnapdir removes any files that should not be in the snapshot directory: +// - db.tmp prefixed files that can be orphaned by defragmentation +func (s *Snapshotter) cleanupSnapdir(filenames []string) error { + for _, filename := range filenames { + if strings.HasPrefix(filename, "db.tmp") { + plog.Infof("found orphaned defragmentation file; deleting: %s", filename) + if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { + return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + } + } + } + return nil +} From 7b1a92cb7c0e236fe8ee3ed989678612e8834023 Mon Sep 17 00:00:00 2001 From: jingyih Date: Fri, 14 Feb 2020 21:26:52 -0800 Subject: [PATCH 05/56] mvcc/backend: check for nil boltOpenOptions Check if boltOpenOptions is nil before use it. --- mvcc/backend/backend.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index 33997b02207..2229d9ce1cb 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -317,7 +317,10 @@ func (b *backend) defrag() error { if err != nil { return err } - options := *boltOpenOptions + options := bolt.Options{} + if boltOpenOptions != nil { + options = *boltOpenOptions + } options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { return temp, nil } From c58133b2d462abb6845e0d6a3dee463842c3468d Mon Sep 17 00:00:00 2001 From: jingyih Date: Wed, 19 Feb 2020 00:05:13 -0800 Subject: [PATCH 06/56] etcdctl: fix member add command Use members information from member add response, which is guaranteed to be up to date. --- etcdctl/ctlv3/command/member_command.go | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/etcdctl/ctlv3/command/member_command.go b/etcdctl/ctlv3/command/member_command.go index a6090c9497c..de29ceaf5eb 100644 --- a/etcdctl/ctlv3/command/member_command.go +++ b/etcdctl/ctlv3/command/member_command.go @@ -118,30 +118,8 @@ func memberAddCommandFunc(cmd *cobra.Command, args []string) { display.MemberAdd(*resp) if _, ok := (display).(*simplePrinter); ok { - ctx, cancel = commandCtx(cmd) - listResp, err := cli.MemberList(ctx) - // make sure the member who served member list request has the latest member list. - syncedMemberSet := make(map[uint64]struct{}) - syncedMemberSet[resp.Header.MemberId] = struct{}{} // the member who served member add is guaranteed to have the latest member list. - for { - if err != nil { - ExitWithError(ExitError, err) - } - if _, ok := syncedMemberSet[listResp.Header.MemberId]; ok { - break - } - // quorum get to sync cluster list - gresp, gerr := cli.Get(ctx, "_") - if gerr != nil { - ExitWithError(ExitError, err) - } - syncedMemberSet[gresp.Header.MemberId] = struct{}{} - listResp, err = cli.MemberList(ctx) - } - cancel() - conf := []string{} - for _, memb := range listResp.Members { + for _, memb := range resp.Members { for _, u := range memb.PeerURLs { n := memb.Name if memb.ID == newID { From 1228d6c1e7fedf784ab424ede30db2bdc69e0653 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Fri, 13 Mar 2020 17:53:41 -0400 Subject: [PATCH 07/56] proxy/grpcproxy: add return on error for metrics handler Signed-off-by: Sam Batschelet --- proxy/grpcproxy/metrics.go | 1 + 1 file changed, 1 insertion(+) diff --git a/proxy/grpcproxy/metrics.go b/proxy/grpcproxy/metrics.go index ebb82bb727d..99630a32e54 100644 --- a/proxy/grpcproxy/metrics.go +++ b/proxy/grpcproxy/metrics.go @@ -89,6 +89,7 @@ func HandleMetrics(mux *http.ServeMux, c *http.Client, eps []string) { resp, err := c.Get(target) if err != nil { http.Error(w, "Internal server error", http.StatusInternalServerError) + return } defer resp.Body.Close() w.Header().Set("Content-Type", "text/plain; version=0.0.4") From 30aaceb1c3bf8d6c48e8479ef42cab5d0d15a400 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 15:43:03 -0700 Subject: [PATCH 08/56] etcdserver/api/etcdhttp: log server-side /health checks ref. https://github.com/etcd-io/etcd/pull/11704 Signed-off-by: Gyuho Lee --- etcdserver/api/etcdhttp/metrics.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/etcdserver/api/etcdhttp/metrics.go b/etcdserver/api/etcdhttp/metrics.go index e947abfdd0d..2f6a0a7b2ae 100644 --- a/etcdserver/api/etcdhttp/metrics.go +++ b/etcdserver/api/etcdhttp/metrics.go @@ -50,6 +50,7 @@ func NewHealthHandler(hfunc func() Health) http.HandlerFunc { if r.Method != http.MethodGet { w.Header().Set("Allow", http.MethodGet) http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) + plog.Warningf("/health error (status code %d)", http.StatusMethodNotAllowed) return } h := hfunc() @@ -97,11 +98,15 @@ func checkHealth(srv etcdserver.ServerV2) Health { as := srv.Alarms() if len(as) > 0 { h.Health = "false" + for _, v := range as { + plog.Warningf("/health error due to an alarm %s", v.String()) + } } if h.Health == "true" { if uint64(srv.Leader()) == raft.None { h.Health = "false" + plog.Warningf("/health error; no leader (status code %d)", http.StatusServiceUnavailable) } } @@ -111,11 +116,13 @@ func checkHealth(srv etcdserver.ServerV2) Health { cancel() if err != nil { h.Health = "false" + plog.Warningf("/health error; QGET failed %v (status code %d)", err, http.StatusServiceUnavailable) } } if h.Health == "true" { healthSuccess.Inc() + plog.Infof("/health OK (status code %d)", http.StatusOK) } else { healthFailed.Inc() } From 6f7ee076eafe0240c8b78d443472e14b2a04874d Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:12:22 -0700 Subject: [PATCH 09/56] clientv3: embed api version in metadata ref. https://github.com/etcd-io/etcd/pull/11687 Signed-off-by: Gyuho Lee clientv3: fix racy writes to context key === RUN TestWatchOverlapContextCancel ================== WARNING: DATA RACE Write at 0x00c42110dd40 by goroutine 99: runtime.mapassign() /usr/local/go/src/runtime/hashmap.go:485 +0x0 github.com/coreos/etcd/clientv3.metadataSet() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:61 +0x8c github.com/coreos/etcd/clientv3.withVersion() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:47 +0x137 github.com/coreos/etcd/clientv3.newStreamClientInterceptor.func1() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/client.go:309 +0x81 google.golang.org/grpc.NewClientStream() /go/src/github.com/coreos/etcd/gopath/src/google.golang.org/grpc/stream.go:101 +0x10e github.com/coreos/etcd/etcdserver/etcdserverpb.(*watchClient).Watch() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/etcdserver/etcdserverpb/rpc.pb.go:3193 +0xe9 github.com/coreos/etcd/clientv3.(*watchGrpcStream).openWatchClient() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:788 +0x143 github.com/coreos/etcd/clientv3.(*watchGrpcStream).newWatchClient() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:700 +0x5c3 github.com/coreos/etcd/clientv3.(*watchGrpcStream).run() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:431 +0x12b Previous read at 0x00c42110dd40 by goroutine 130: reflect.maplen() /usr/local/go/src/runtime/hashmap.go:1165 +0x0 reflect.Value.MapKeys() /usr/local/go/src/reflect/value.go:1090 +0x43b fmt.(*pp).printValue() /usr/local/go/src/fmt/print.go:741 +0x1885 fmt.(*pp).printArg() /usr/local/go/src/fmt/print.go:682 +0x1b1 fmt.(*pp).doPrintf() /usr/local/go/src/fmt/print.go:998 +0x1cad fmt.Sprintf() /usr/local/go/src/fmt/print.go:196 +0x77 github.com/coreos/etcd/clientv3.streamKeyFromCtx() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:825 +0xc8 github.com/coreos/etcd/clientv3.(*watcher).Watch() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:265 +0x426 github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e Goroutine 99 (running) created at: github.com/coreos/etcd/clientv3.(*watcher).newWatcherGrpcStream() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:236 +0x59d github.com/coreos/etcd/clientv3.(*watcher).Watch() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:278 +0xbb6 github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e Goroutine 130 (running) created at: github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:979 +0x76d github.com/coreos/etcd/clientv3/integration.TestWatchOverlapContextCancel() /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:922 +0x44 testing.tRunner() /usr/local/go/src/testing/testing.go:657 +0x107 ================== Signed-off-by: Gyuho Lee --- clientv3/client.go | 8 ----- clientv3/ctx.go | 64 +++++++++++++++++++++++++++++++++ clientv3/ctx_test.go | 67 +++++++++++++++++++++++++++++++++++ clientv3/retry_interceptor.go | 2 ++ 4 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 clientv3/ctx.go create mode 100644 clientv3/ctx_test.go diff --git a/clientv3/client.go b/clientv3/client.go index 4c9df7a19a3..5a15cf5faea 100644 --- a/clientv3/client.go +++ b/clientv3/client.go @@ -37,7 +37,6 @@ import ( "google.golang.org/grpc/codes" grpccredentials "google.golang.org/grpc/credentials" "google.golang.org/grpc/keepalive" - "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" ) @@ -393,13 +392,6 @@ func (c *Client) dialWithBalancerCreds(ep string) grpccredentials.TransportCrede return creds } -// WithRequireLeader requires client requests to only succeed -// when the cluster has a leader. -func WithRequireLeader(ctx context.Context) context.Context { - md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader) - return metadata.NewOutgoingContext(ctx, md) -} - func newClient(cfg *Config) (*Client, error) { if cfg == nil { cfg = &Config{} diff --git a/clientv3/ctx.go b/clientv3/ctx.go new file mode 100644 index 00000000000..da8297b6c71 --- /dev/null +++ b/clientv3/ctx.go @@ -0,0 +1,64 @@ +// Copyright 2020 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clientv3 + +import ( + "context" + "strings" + + "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + "github.com/coreos/etcd/version" + "google.golang.org/grpc/metadata" +) + +// WithRequireLeader requires client requests to only succeed +// when the cluster has a leader. +func WithRequireLeader(ctx context.Context) context.Context { + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { // no outgoing metadata ctx key, create one + md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader) + return metadata.NewOutgoingContext(ctx, md) + } + copied := md.Copy() // avoid racey updates + // overwrite/add 'hasleader' key/value + metadataSet(copied, rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader) + return metadata.NewOutgoingContext(ctx, copied) +} + +// embeds client version +func withVersion(ctx context.Context) context.Context { + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { // no outgoing metadata ctx key, create one + md = metadata.Pairs(rpctypes.MetadataClientAPIVersionKey, version.APIVersion) + return metadata.NewOutgoingContext(ctx, md) + } + copied := md.Copy() // avoid racey updates + // overwrite/add version key/value + metadataSet(copied, rpctypes.MetadataClientAPIVersionKey, version.APIVersion) + return metadata.NewOutgoingContext(ctx, copied) +} + +func metadataGet(md metadata.MD, k string) []string { + k = strings.ToLower(k) + return md[k] +} + +func metadataSet(md metadata.MD, k string, vals ...string) { + if len(vals) == 0 { + return + } + k = strings.ToLower(k) + md[k] = vals +} diff --git a/clientv3/ctx_test.go b/clientv3/ctx_test.go new file mode 100644 index 00000000000..4a357dae685 --- /dev/null +++ b/clientv3/ctx_test.go @@ -0,0 +1,67 @@ +// Copyright 2020 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clientv3 + +import ( + "context" + "reflect" + "testing" + + "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + "github.com/coreos/etcd/version" + "google.golang.org/grpc/metadata" +) + +func TestMetadataWithRequireLeader(t *testing.T) { + ctx := context.TODO() + md, ok := metadata.FromOutgoingContext(ctx) + if ok { + t.Fatal("expected no outgoing metadata ctx key") + } + + // add a conflicting key with some other value + md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, "invalid") + // add a key, and expect not be overwritten + metadataSet(md, "hello", "1", "2") + ctx = metadata.NewOutgoingContext(ctx, md) + + // expect overwrites but still keep other keys + ctx = WithRequireLeader(ctx) + md, ok = metadata.FromOutgoingContext(ctx) + if !ok { + t.Fatal("expected outgoing metadata ctx key") + } + if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) { + t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss) + } + if ss := metadataGet(md, "hello"); !reflect.DeepEqual(ss, []string{"1", "2"}) { + t.Fatalf("unexpected metadata for 'hello' %v", ss) + } +} + +func TestMetadataWithClientAPIVersion(t *testing.T) { + ctx := withVersion(WithRequireLeader(context.TODO())) + + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { + t.Fatal("expected outgoing metadata ctx key") + } + if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) { + t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss) + } + if ss := metadataGet(md, rpctypes.MetadataClientAPIVersionKey); !reflect.DeepEqual(ss, []string{version.APIVersion}) { + t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataClientAPIVersionKey, ss) + } +} diff --git a/clientv3/retry_interceptor.go b/clientv3/retry_interceptor.go index ca72ed0bdea..f3c50570677 100644 --- a/clientv3/retry_interceptor.go +++ b/clientv3/retry_interceptor.go @@ -38,6 +38,7 @@ import ( func (c *Client) unaryClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.UnaryClientInterceptor { intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs) return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + ctx = withVersion(ctx) grpcOpts, retryOpts := filterCallOptions(opts) callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts) // short circuit for simplicity, and avoiding allocations. @@ -103,6 +104,7 @@ func (c *Client) unaryClientInterceptor(logger *zap.Logger, optFuncs ...retryOpt func (c *Client) streamClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.StreamClientInterceptor { intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs) return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { + ctx = withVersion(ctx) grpcOpts, retryOpts := filterCallOptions(opts) callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts) // short circuit for simplicity, and avoiding allocations. From d9027cecf25f3212b3601515d1245e7fec9797ca Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:12:55 -0700 Subject: [PATCH 10/56] etcdserver/api/v3rpc: handle api version metadata, add metrics ref. https://github.com/etcd-io/etcd/pull/11687 Signed-off-by: Gyuho Lee --- etcdserver/api/v3rpc/interceptor.go | 22 +++++++++++++++++++--- etcdserver/api/v3rpc/metrics.go | 10 ++++++++++ etcdserver/api/v3rpc/rpctypes/md.go | 2 ++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/etcdserver/api/v3rpc/interceptor.go b/etcdserver/api/v3rpc/interceptor.go index d594ae7f154..fbc2ba3ed8b 100644 --- a/etcdserver/api/v3rpc/interceptor.go +++ b/etcdserver/api/v3rpc/interceptor.go @@ -16,16 +16,16 @@ package v3rpc import ( "context" + "strings" "sync" "time" "github.com/coreos/etcd/etcdserver" "github.com/coreos/etcd/etcdserver/api" "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + pb "github.com/coreos/etcd/etcdserver/etcdserverpb" "github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/raft" - - pb "github.com/coreos/etcd/etcdserver/etcdserverpb" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/metadata" @@ -49,6 +49,12 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor { md, ok := metadata.FromIncomingContext(ctx) if ok { + ver, vs := "unknown", metadataGet(md, rpctypes.MetadataClientAPIVersionKey) + if len(vs) > 0 { + ver = vs[0] + } + clientRequests.WithLabelValues("unary", ver).Inc() + if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader { if s.Leader() == types.ID(raft.None) { return nil, rpctypes.ErrGRPCNoLeader @@ -187,6 +193,12 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor md, ok := metadata.FromIncomingContext(ss.Context()) if ok { + ver, vs := "unknown", metadataGet(md, rpctypes.MetadataClientAPIVersionKey) + if len(vs) > 0 { + ver = vs[0] + } + clientRequests.WithLabelValues("stream", ver).Inc() + if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader { if s.Leader() == types.ID(raft.None) { return rpctypes.ErrGRPCNoLeader @@ -205,7 +217,6 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor smap.mu.Unlock() cancel() }() - } } @@ -261,3 +272,8 @@ func monitorLeader(s *etcdserver.EtcdServer) *streamsMap { return smap } + +func metadataGet(md metadata.MD, k string) []string { + k = strings.ToLower(k) + return md[k] +} diff --git a/etcdserver/api/v3rpc/metrics.go b/etcdserver/api/v3rpc/metrics.go index 6cb41a61e56..46db8649c1c 100644 --- a/etcdserver/api/v3rpc/metrics.go +++ b/etcdserver/api/v3rpc/metrics.go @@ -30,9 +30,19 @@ var ( Name: "client_grpc_received_bytes_total", Help: "The total number of bytes received from grpc clients.", }) + + clientRequests = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "client_requests_total", + Help: "The total number of client requests per client version.", + }, + []string{"type", "client_api_version"}, + ) ) func init() { prometheus.MustRegister(sentBytes) prometheus.MustRegister(receivedBytes) + prometheus.MustRegister(clientRequests) } diff --git a/etcdserver/api/v3rpc/rpctypes/md.go b/etcdserver/api/v3rpc/rpctypes/md.go index 5c590e1aec9..90b8b835b16 100644 --- a/etcdserver/api/v3rpc/rpctypes/md.go +++ b/etcdserver/api/v3rpc/rpctypes/md.go @@ -17,4 +17,6 @@ package rpctypes var ( MetadataRequireLeaderKey = "hasleader" MetadataHasLeader = "true" + + MetadataClientAPIVersionKey = "client-api-version" ) From f9c89209f372160d242d290c3ea2984e766bad2b Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:23:25 -0700 Subject: [PATCH 11/56] version: 3.3.19 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 48e49bde5f1..da27633df52 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.18" + Version = "3.3.19" APIVersion = "unknown" // Git SHA Value will be set during build From 10d50e0662d93e66f44d064f8a246441ef97eab7 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 16:56:10 -0700 Subject: [PATCH 12/56] words: whitelist "hasleader" Signed-off-by: Gyuho Lee --- .words | 1 + 1 file changed, 1 insertion(+) diff --git a/.words b/.words index f41d120b058..df6c1816bbc 100644 --- a/.words +++ b/.words @@ -25,6 +25,7 @@ healthcheck iff inflight keepalive +hasleader keepalives keyspace linearization From 07562e235c05674ac27c9454500a78fa8f2ad078 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:18:34 -0700 Subject: [PATCH 13/56] Revert "version: 3.3.19" This reverts commit 3f6b978b0496080e8067e0d2d1270134a9a51ef8. --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index da27633df52..48e49bde5f1 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.19" + Version = "3.3.18" APIVersion = "unknown" // Git SHA Value will be set during build From acb9746d660850b5d42ddd44ac1025168e73496a Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:18:46 -0700 Subject: [PATCH 14/56] version: 3.3.19 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 48e49bde5f1..da27633df52 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.18" + Version = "3.3.19" APIVersion = "unknown" // Git SHA Value will be set during build From 508808010c771b967e327e47d68a262f8dcd1eb8 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:21:49 -0700 Subject: [PATCH 15/56] travis.yaml: use Go 1.12.12 Signed-off-by: Gyuho Lee --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b647f3fecf1..d8ab669db99 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ sudo: required services: docker go: -- 1.12.9 +- 1.12.12 env: - GO111MODULE=on @@ -28,7 +28,7 @@ env: matrix: fast_finish: true allow_failures: - - go: 1.12.9 + - go: 1.12.12 env: TARGET=linux-386-unit install: From cd200b49a2ae518877ac4c3ef4f1724e3c867213 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:22:12 -0700 Subject: [PATCH 16/56] Revert "version: 3.3.19" This reverts commit acb9746d660850b5d42ddd44ac1025168e73496a. --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index da27633df52..48e49bde5f1 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.19" + Version = "3.3.18" APIVersion = "unknown" // Git SHA Value will be set during build From a463bd54ae80dfc44aeeb3ce3cff0a722b3ee4ca Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:23:16 -0700 Subject: [PATCH 17/56] words: whitelist "racey" Signed-off-by: Gyuho Lee --- .words | 1 + 1 file changed, 1 insertion(+) diff --git a/.words b/.words index df6c1816bbc..f3d41caa5d0 100644 --- a/.words +++ b/.words @@ -26,6 +26,7 @@ iff inflight keepalive hasleader +racey keepalives keyspace linearization From 67da93f739e33ee9b34792fccba2d0c100264ea4 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 18 Mar 2020 17:23:32 -0700 Subject: [PATCH 18/56] version: 3.3.19 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 48e49bde5f1..da27633df52 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.18" + Version = "3.3.19" APIVersion = "unknown" // Git SHA Value will be set during build From 89ecd194140fc912176229494dc571b97f1e9ef0 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Tue, 31 Mar 2020 20:31:24 -0700 Subject: [PATCH 19/56] pkg/ioutil: add "FlushN" Signed-off-by: Gyuho Lee --- pkg/ioutil/pagewriter.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pkg/ioutil/pagewriter.go b/pkg/ioutil/pagewriter.go index 72de1593d3a..cf9a8dc664d 100644 --- a/pkg/ioutil/pagewriter.go +++ b/pkg/ioutil/pagewriter.go @@ -95,12 +95,23 @@ func (pw *PageWriter) Write(p []byte) (n int, err error) { return n, werr } +// Flush flushes buffered data. func (pw *PageWriter) Flush() error { + _, err := pw.flush() + return err +} + +// FlushN flushes buffered data and returns the number of written bytes. +func (pw *PageWriter) FlushN() (int, error) { + return pw.flush() +} + +func (pw *PageWriter) flush() (int, error) { if pw.bufferedBytes == 0 { - return nil + return 0, nil } - _, err := pw.w.Write(pw.buf[:pw.bufferedBytes]) + n, err := pw.w.Write(pw.buf[:pw.bufferedBytes]) pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes pw.bufferedBytes = 0 - return err + return n, err } From 1aa5da9121755198497a4de8b904bd51808e1bb8 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 1 Apr 2020 10:43:03 -0700 Subject: [PATCH 20/56] wal: add "etcd_wal_writes_bytes_total" Signed-off-by: Gyuho Lee --- wal/encoder.go | 12 ++++++++---- wal/metrics.go | 7 +++++++ wal/repair.go | 5 +++++ wal/wal.go | 7 +++++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/wal/encoder.go b/wal/encoder.go index e8040b8dff1..e8890d88a9b 100644 --- a/wal/encoder.go +++ b/wal/encoder.go @@ -92,7 +92,8 @@ func (e *encoder) encode(rec *walpb.Record) error { if padBytes != 0 { data = append(data, make([]byte, padBytes)...) } - _, err = e.bw.Write(data) + n, err = e.bw.Write(data) + walWriteBytes.Add(float64(n)) return err } @@ -108,13 +109,16 @@ func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) { func (e *encoder) flush() error { e.mu.Lock() - defer e.mu.Unlock() - return e.bw.Flush() + n, err := e.bw.FlushN() + e.mu.Unlock() + walWriteBytes.Add(float64(n)) + return err } func writeUint64(w io.Writer, n uint64, buf []byte) error { // http://golang.org/src/encoding/binary/binary.go binary.LittleEndian.PutUint64(buf, n) - _, err := w.Write(buf) + nv, err := w.Write(buf) + walWriteBytes.Add(float64(nv)) return err } diff --git a/wal/metrics.go b/wal/metrics.go index 9e089d380f9..7261544165a 100644 --- a/wal/metrics.go +++ b/wal/metrics.go @@ -24,8 +24,15 @@ var ( Help: "The latency distributions of fsync called by wal.", Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) + walWriteBytes = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "wal_write_bytes_total", + Help: "Total number of bytes written in WAL.", + }) ) func init() { prometheus.MustRegister(syncDurations) + prometheus.MustRegister(walWriteBytes) } diff --git a/wal/repair.go b/wal/repair.go index 091036b57b9..f1e507683c3 100644 --- a/wal/repair.go +++ b/wal/repair.go @@ -18,6 +18,7 @@ import ( "io" "os" "path/filepath" + "time" "github.com/coreos/etcd/pkg/fileutil" "github.com/coreos/etcd/wal/walpb" @@ -76,10 +77,14 @@ func Repair(dirpath string) bool { plog.Errorf("could not repair %v, failed to truncate file", f.Name()) return false } + + start := time.Now() if err = fileutil.Fsync(f.File); err != nil { plog.Errorf("could not repair %v, failed to sync file", f.Name()) return false } + syncDurations.Observe(time.Since(start).Seconds()) + return true default: plog.Errorf("could not repair error (%v)", err) diff --git a/wal/wal.go b/wal/wal.go index ef63b52ccbc..6909e3ac7af 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -147,9 +147,13 @@ func Create(dirpath string, metadata []byte) (*WAL, error) { if perr != nil { return nil, perr } + + start := time.Now() if perr = fileutil.Fsync(pdir); perr != nil { return nil, perr } + syncDurations.Observe(time.Since(start).Seconds()) + if perr = pdir.Close(); err != nil { return nil, perr } @@ -548,9 +552,12 @@ func (w *WAL) cut() error { if err = os.Rename(newTail.Name(), fpath); err != nil { return err } + + start := time.Now() if err = fileutil.Fsync(w.dirFile); err != nil { return err } + syncDurations.Observe(time.Since(start).Seconds()) // reopen newTail with its new path so calls to Name() match the wal filename format newTail.Close() From 9fd7e2b8020c54e45023511cbbe4d29e8a3f4171 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 1 Apr 2020 10:46:23 -0700 Subject: [PATCH 21/56] version: 3.3.20 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index da27633df52..5a96c7aa51e 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.19" + Version = "3.3.20" APIVersion = "unknown" // Git SHA Value will be set during build From 140bf5321d3b9b0c41d2ca024a4b0fa9a46c55e8 Mon Sep 17 00:00:00 2001 From: tangcong Date: Mon, 24 Feb 2020 17:17:50 +0800 Subject: [PATCH 22/56] *: fix auth revision corruption bug --- auth/store.go | 43 +++++++++++++++++++++++++- auth/store_test.go | 49 ++++++++++++++++++++++++++++++ etcdserver/backend.go | 2 +- etcdserver/server.go | 33 ++++++++++++-------- etcdserver/server_test.go | 8 ++--- mvcc/kv_test.go | 2 +- mvcc/watchable_store.go | 11 +++++-- mvcc/watchable_store_bench_test.go | 8 ++--- mvcc/watchable_store_test.go | 44 +++++++++++++++++++++++++++ mvcc/watcher_bench_test.go | 4 +++ mvcc/watcher_test.go | 12 ++++---- 11 files changed, 184 insertions(+), 32 deletions(-) diff --git a/auth/store.go b/auth/store.go index d676cb5553b..e99ddee51ac 100644 --- a/auth/store.go +++ b/auth/store.go @@ -90,6 +90,10 @@ type AuthenticateParamIndex struct{} // AuthenticateParamSimpleTokenPrefix is used for a key of context in the parameters of Authenticate() type AuthenticateParamSimpleTokenPrefix struct{} +// saveConsistentIndexFunc is used to sync consistentIndex to backend, now reusing store.saveIndex +type saveConsistentIndexFunc func(tx backend.BatchTx) + +// AuthStore defines auth storage interface. type AuthStore interface { // AuthEnable turns on the authentication feature AuthEnable() error @@ -178,6 +182,9 @@ type AuthStore interface { // HasRole checks that user has role HasRole(user, role string) bool + + // SetConsistentIndexSyncer sets consistentIndex syncer + SetConsistentIndexSyncer(syncer saveConsistentIndexFunc) } type TokenProvider interface { @@ -200,9 +207,14 @@ type authStore struct { rangePermCache map[string]*unifiedRangePermissions // username -> unifiedRangePermissions - tokenProvider TokenProvider + tokenProvider TokenProvider + syncConsistentIndex saveConsistentIndexFunc + bcryptCost int // the algorithm cost / strength for hashing auth passwords } +func (as *authStore) SetConsistentIndexSyncer(syncer saveConsistentIndexFunc) { + as.syncConsistentIndex = syncer +} func (as *authStore) AuthEnable() error { as.enabledMu.Lock() defer as.enabledMu.Unlock() @@ -252,6 +264,7 @@ func (as *authStore) AuthDisable() { tx.Lock() tx.UnsafePut(authBucketName, enableFlagKey, authDisabled) as.commitRevision(tx) + as.saveConsistentIndex(tx) tx.Unlock() b.ForceCommit() @@ -368,6 +381,7 @@ func (as *authStore) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, putUser(tx, newUser) as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("added a new user: %s", r.Name) @@ -392,6 +406,7 @@ func (as *authStore) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDelete delUser(tx, r.Name) as.commitRevision(tx) + as.saveConsistentIndex(tx) as.invalidateCachedPerm(r.Name) as.tokenProvider.invalidateUser(r.Name) @@ -428,6 +443,7 @@ func (as *authStore) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*p putUser(tx, updatedUser) as.commitRevision(tx) + as.saveConsistentIndex(tx) as.invalidateCachedPerm(r.Name) as.tokenProvider.invalidateUser(r.Name) @@ -468,6 +484,7 @@ func (as *authStore) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUser as.invalidateCachedPerm(r.User) as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("granted role %s to user %s", r.Role, r.User) return &pb.AuthUserGrantRoleResponse{}, nil @@ -536,6 +553,7 @@ func (as *authStore) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUs as.invalidateCachedPerm(r.Name) as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("revoked role %s from user %s", r.Role, r.Name) return &pb.AuthUserRevokeRoleResponse{}, nil @@ -600,6 +618,7 @@ func (as *authStore) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) as.clearCachedPerm() as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("revoked key %s from role %s", r.Key, r.Role) return &pb.AuthRoleRevokePermissionResponse{}, nil @@ -645,6 +664,7 @@ func (as *authStore) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDelete } as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("deleted role %s", r.Role) return &pb.AuthRoleDeleteResponse{}, nil @@ -667,6 +687,7 @@ func (as *authStore) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, putRole(tx, newRole) as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("Role %s is created", r.Name) @@ -706,6 +727,16 @@ func (as *authStore) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) ( }) if idx < len(role.KeyPermission) && bytes.Equal(role.KeyPermission[idx].Key, r.Perm.Key) && bytes.Equal(role.KeyPermission[idx].RangeEnd, r.Perm.RangeEnd) { + if role.KeyPermission[idx].PermType == r.Perm.PermType { + as.lg.Warn( + "ignored grant permission request to a role, existing permission", + zap.String("role-name", r.Name), + zap.ByteString("key", r.Perm.Key), + zap.ByteString("range-end", r.Perm.RangeEnd), + zap.String("permission-type", authpb.Permission_Type_name[int32(r.Perm.PermType)]), + ) + return &pb.AuthRoleGrantPermissionResponse{}, nil + } // update existing permission role.KeyPermission[idx].PermType = r.Perm.PermType } else { @@ -727,6 +758,7 @@ func (as *authStore) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) ( as.clearCachedPerm() as.commitRevision(tx) + as.saveConsistentIndex(tx) plog.Noticef("role %s's permission of key %s is updated as %s", r.Name, r.Perm.Key, authpb.Permission_Type_name[int32(r.Perm.PermType)]) @@ -931,6 +963,7 @@ func NewAuthStore(be backend.Backend, tp TokenProvider) *authStore { if as.Revision() == 0 { as.commitRevision(tx) + as.saveConsistentIndex(tx) } tx.Unlock() @@ -1134,3 +1167,11 @@ func (as *authStore) HasRole(user, role string) bool { return false } + +func (as *authStore) saveConsistentIndex(tx backend.BatchTx) { + if as.syncConsistentIndex != nil { + as.syncConsistentIndex(tx) + } else { + plog.Errorf("failed to save consistentIndex,syncConsistentIndex is nil") + } +} diff --git a/auth/store_test.go b/auth/store_test.go index 155b2f07f52..43532c60e6b 100644 --- a/auth/store_test.go +++ b/auth/store_test.go @@ -301,6 +301,55 @@ func TestListUsers(t *testing.T) { } } +func TestRoleGrantPermissionRevision(t *testing.T) { + as, tearDown := setupAuthStore(t) + defer tearDown(t) + + _, err := as.RoleAdd(&pb.AuthRoleAddRequest{Name: "role-test-1"}) + if err != nil { + t.Fatal(err) + } + + perm := &authpb.Permission{ + PermType: authpb.WRITE, + Key: []byte("Keys"), + RangeEnd: []byte("RangeEnd"), + } + _, err = as.RoleGrantPermission(&pb.AuthRoleGrantPermissionRequest{ + Name: "role-test-1", + Perm: perm, + }) + + if err != nil { + t.Fatal(err) + } + + r, err := as.RoleGet(&pb.AuthRoleGetRequest{Role: "role-test-1"}) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(perm, r.Perm[0]) { + t.Errorf("expected %v, got %v", perm, r.Perm[0]) + } + + oldRevision := as.Revision() + + _, err = as.RoleGrantPermission(&pb.AuthRoleGrantPermissionRequest{ + Name: "role-test-1", + Perm: perm, + }) + + if err != nil { + t.Error(err) + } + newRevision := as.Revision() + + if oldRevision != newRevision { + t.Errorf("expected revision diff is 0, got %d", newRevision-oldRevision) + } +} + func TestRoleGrantPermission(t *testing.T) { as, tearDown := setupAuthStore(t) defer tearDown(t) diff --git a/etcdserver/backend.go b/etcdserver/backend.go index 647773d474f..3481f37a810 100644 --- a/etcdserver/backend.go +++ b/etcdserver/backend.go @@ -71,7 +71,7 @@ func openBackend(cfg ServerConfig) backend.Backend { // case, replace the db with the snapshot db sent by the leader. func recoverSnapshotBackend(cfg ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) { var cIndex consistentIndex - kv := mvcc.New(oldbe, &lease.FakeLessor{}, &cIndex) + kv := mvcc.New(cfg.Logger, oldbe, &lease.FakeLessor{}, nil, &cIndex, mvcc.StoreConfig{CompactionBatchLimit: cfg.CompactionBatchLimit}) defer kv.Close() if snapshot.Metadata.Index <= kv.ConsistentIndex() { return oldbe, nil diff --git a/etcdserver/server.go b/etcdserver/server.go index 083e9f72ac3..61cb91fbe08 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -447,8 +447,27 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers. - srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds()))) - srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex) + srv.lessor = lease.NewLessor( + srv.getLogger(), + srv.be, + lease.LessorConfig{ + MinLeaseTTL: int64(math.Ceil(minTTL.Seconds())), + CheckpointInterval: cfg.LeaseCheckpointInterval, + ExpiredLeasesRetryInterval: srv.Cfg.ReqTimeout(), + }) + + tp, err := auth.NewTokenProvider(cfg.Logger, cfg.AuthToken, + func(index uint64) <-chan struct{} { + return srv.applyWait.Wait(index) + }, + ) + if err != nil { + plog.Warningf("failed to create token provider,err is %v", err) + return nil, err + } + srv.authStore = auth.NewAuthStore(srv.getLogger(), srv.be, tp, int(cfg.BcryptCost)) + + srv.kv = mvcc.New(srv.getLogger(), srv.be, srv.lessor, srv.authStore, &srv.consistIndex, mvcc.StoreConfig{CompactionBatchLimit: cfg.CompactionBatchLimit}) if beExist { kvindex := srv.kv.ConsistentIndex() // TODO: remove kvindex != 0 checking when we do not expect users to upgrade @@ -470,16 +489,6 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { }() srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex()) - tp, err := auth.NewTokenProvider(cfg.AuthToken, - func(index uint64) <-chan struct{} { - return srv.applyWait.Wait(index) - }, - ) - if err != nil { - plog.Errorf("failed to create token provider: %s", err) - return nil, err - } - srv.authStore = auth.NewAuthStore(srv.be, tp) if num := cfg.AutoCompactionRetention; num != 0 { srv.compactor, err = compactor.New(cfg.AutoCompactionMode, num, srv.kv, srv) if err != nil { diff --git a/etcdserver/server_test.go b/etcdserver/server_test.go index e3ea0f9250c..61965e801b1 100644 --- a/etcdserver/server_test.go +++ b/etcdserver/server_test.go @@ -916,7 +916,7 @@ func TestSnapshot(t *testing.T) { r: *r, store: st, } - srv.kv = mvcc.New(be, &lease.FakeLessor{}, &srv.consistIndex) + srv.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &srv.consistIndex, mvcc.StoreConfig{}) srv.be = be ch := make(chan struct{}, 2) @@ -994,7 +994,7 @@ func TestSnapshotOrdering(t *testing.T) { be, tmpPath := backend.NewDefaultTmpBackend() defer os.RemoveAll(tmpPath) - s.kv = mvcc.New(be, &lease.FakeLessor{}, &s.consistIndex) + s.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &s.consistIndex, mvcc.StoreConfig{}) s.be = be s.start() @@ -1052,7 +1052,7 @@ func TestTriggerSnap(t *testing.T) { } srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster} - srv.kv = mvcc.New(be, &lease.FakeLessor{}, &srv.consistIndex) + srv.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &srv.consistIndex, mvcc.StoreConfig{}) srv.be = be srv.start() @@ -1121,7 +1121,7 @@ func TestConcurrentApplyAndSnapshotV3(t *testing.T) { defer func() { os.RemoveAll(tmpPath) }() - s.kv = mvcc.New(be, &lease.FakeLessor{}, &s.consistIndex) + s.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &s.consistIndex, mvcc.StoreConfig{}) s.be = be s.start() diff --git a/mvcc/kv_test.go b/mvcc/kv_test.go index d6f49ee14a9..722108c78c2 100644 --- a/mvcc/kv_test.go +++ b/mvcc/kv_test.go @@ -710,7 +710,7 @@ func TestKVSnapshot(t *testing.T) { func TestWatchableKVWatch(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil)) + s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() diff --git a/mvcc/watchable_store.go b/mvcc/watchable_store.go index 78df19326b9..2c332b36081 100644 --- a/mvcc/watchable_store.go +++ b/mvcc/watchable_store.go @@ -15,6 +15,7 @@ package mvcc import ( + "go.etcd.io/etcd/auth" "sync" "time" @@ -67,11 +68,11 @@ type watchableStore struct { // cancel operations. type cancelFunc func() -func New(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) ConsistentWatchableKV { - return newWatchableStore(b, le, ig) +func New(lg *zap.Logger, b backend.Backend, le lease.Lessor, as auth.AuthStore, ig ConsistentIndexGetter, cfg StoreConfig) ConsistentWatchableKV { + return newWatchableStore(lg, b, le, as, ig, cfg) } -func newWatchableStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) *watchableStore { +func newWatchableStore(lg *zap.Logger, b backend.Backend, le lease.Lessor, as auth.AuthStore, ig ConsistentIndexGetter, cfg StoreConfig) *watchableStore { s := &watchableStore{ store: NewStore(b, le, ig), victimc: make(chan struct{}, 1), @@ -85,6 +86,10 @@ func newWatchableStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGet // use this store as the deleter so revokes trigger watch events s.le.SetRangeDeleter(func() lease.TxnDelete { return s.Write() }) } + if as != nil { + // TODO: encapsulating consistentindex into a separate package + as.SetConsistentIndexSyncer(s.store.saveIndex) + } s.wg.Add(2) go s.syncWatchersLoop() go s.syncVictimsLoop() diff --git a/mvcc/watchable_store_bench_test.go b/mvcc/watchable_store_bench_test.go index 769d1bc38a8..053d841cd60 100644 --- a/mvcc/watchable_store_bench_test.go +++ b/mvcc/watchable_store_bench_test.go @@ -25,7 +25,7 @@ import ( func BenchmarkWatchableStorePut(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() - s := New(be, &lease.FakeLessor{}, nil) + s := New(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) defer cleanup(s, be, tmpPath) // arbitrary number of bytes @@ -46,7 +46,7 @@ func BenchmarkWatchableStorePut(b *testing.B) { func BenchmarkWatchableStoreTxnPut(b *testing.B) { var i fakeConsistentIndex be, tmpPath := backend.NewDefaultTmpBackend() - s := New(be, &lease.FakeLessor{}, &i) + s := New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &i, StoreConfig{}) defer cleanup(s, be, tmpPath) // arbitrary number of bytes @@ -67,7 +67,7 @@ func BenchmarkWatchableStoreTxnPut(b *testing.B) { // many synced watchers receiving a Put notification. func BenchmarkWatchableStoreWatchSyncPut(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() - s := newWatchableStore(be, &lease.FakeLessor{}, nil) + s := newWatchableStore(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) defer cleanup(s, be, tmpPath) k := []byte("testkey") @@ -162,7 +162,7 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) { func BenchmarkWatchableStoreSyncedCancel(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() - s := newWatchableStore(be, &lease.FakeLessor{}, nil) + s := newWatchableStore(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) defer func() { s.store.Close() diff --git a/mvcc/watchable_store_test.go b/mvcc/watchable_store_test.go index dc96d53366c..f6086079400 100644 --- a/mvcc/watchable_store_test.go +++ b/mvcc/watchable_store_test.go @@ -30,7 +30,11 @@ import ( func TestWatch(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer func() { s.store.Close() @@ -52,7 +56,11 @@ func TestWatch(t *testing.T) { func TestNewWatcherCancel(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer func() { s.store.Close() @@ -222,7 +230,11 @@ func TestSyncWatchers(t *testing.T) { // TestWatchCompacted tests a watcher that watches on a compacted revision. func TestWatchCompacted(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer func() { s.store.Close() @@ -259,7 +271,11 @@ func TestWatchCompacted(t *testing.T) { func TestWatchFutureRev(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer func() { s.store.Close() @@ -300,7 +316,11 @@ func TestWatchRestore(t *testing.T) { test := func(delay time.Duration) func(t *testing.T) { return func(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer cleanup(s, b, tmpPath) testKey := []byte("foo") @@ -308,7 +328,11 @@ func TestWatchRestore(t *testing.T) { rev := s.Put(testKey, testValue, lease.NoLease) newBackend, newPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD newStore := newWatchableStore(newBackend, &lease.FakeLessor{}, nil) +======= + newStore := newWatchableStore(zap.NewExample(), newBackend, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer cleanup(newStore, newBackend, newPath) w := newStore.NewWatchStream() @@ -346,11 +370,19 @@ func TestWatchRestore(t *testing.T) { // 5. choose the watcher from step 1, without panic func TestWatchRestoreSyncedWatcher(t *testing.T) { b1, b1Path := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s1 := newWatchableStore(b1, &lease.FakeLessor{}, nil) defer cleanup(s1, b1, b1Path) b2, b2Path := backend.NewDefaultTmpBackend() s2 := newWatchableStore(b2, &lease.FakeLessor{}, nil) +======= + s1 := newWatchableStore(zap.NewExample(), b1, &lease.FakeLessor{}, nil, nil, StoreConfig{}) + defer cleanup(s1, b1, b1Path) + + b2, b2Path := backend.NewDefaultTmpBackend() + s2 := newWatchableStore(zap.NewExample(), b2, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer cleanup(s2, b2, b2Path) testKey, testValue := []byte("foo"), []byte("bar") @@ -397,7 +429,11 @@ func TestWatchRestoreSyncedWatcher(t *testing.T) { // TestWatchBatchUnsynced tests batching on unsynced watchers func TestWatchBatchUnsynced(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug oldMaxRevs := watchBatchMaxRevs defer func() { @@ -531,7 +567,11 @@ func TestWatchVictims(t *testing.T) { oldChanBufLen, oldMaxWatchersPerSync := chanBufLen, maxWatchersPerSync b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer func() { s.store.Close() @@ -609,7 +649,11 @@ func TestWatchVictims(t *testing.T) { // canceling its watches. func TestStressWatchCancelClose(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD s := newWatchableStore(b, &lease.FakeLessor{}, nil) +======= + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer func() { s.store.Close() diff --git a/mvcc/watcher_bench_test.go b/mvcc/watcher_bench_test.go index 8a4242f3f20..fa79ec97195 100644 --- a/mvcc/watcher_bench_test.go +++ b/mvcc/watcher_bench_test.go @@ -24,7 +24,11 @@ import ( func BenchmarkKVWatcherMemoryUsage(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() +<<<<<<< HEAD watchable := newWatchableStore(be, &lease.FakeLessor{}, nil) +======= + watchable := newWatchableStore(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) +>>>>>>> *: fix auth revision corruption bug defer cleanup(watchable, be, tmpPath) diff --git a/mvcc/watcher_test.go b/mvcc/watcher_test.go index 3d259d1f160..b415c682236 100644 --- a/mvcc/watcher_test.go +++ b/mvcc/watcher_test.go @@ -31,7 +31,7 @@ import ( // and the watched event attaches the correct watchID. func TestWatcherWatchID(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil)) + s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -83,7 +83,7 @@ func TestWatcherWatchID(t *testing.T) { // and returns events with matching prefixes. func TestWatcherWatchPrefix(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil)) + s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -157,7 +157,7 @@ func TestWatcherWatchPrefix(t *testing.T) { // does not create watcher, which panics when canceling in range tree. func TestWatcherWatchWrongRange(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil)) + s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -177,7 +177,7 @@ func TestWatcherWatchWrongRange(t *testing.T) { func TestWatchDeleteRange(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := newWatchableStore(b, &lease.FakeLessor{}, nil) + s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) defer func() { s.store.Close() @@ -216,7 +216,7 @@ func TestWatchDeleteRange(t *testing.T) { // with given id inside watchStream. func TestWatchStreamCancelWatcherByID(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil)) + s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -308,7 +308,7 @@ func TestWatcherRequestProgress(t *testing.T) { func TestWatcherWatchWithFilter(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil)) + s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() From 06a2f816e98db69882450083bbd2b5ffeabf9bda Mon Sep 17 00:00:00 2001 From: shawwang Date: Mon, 24 Feb 2020 15:47:45 +0800 Subject: [PATCH 23/56] auth: add new metric 'etcd_debugging_auth_revision' --- auth/metrics.go | 42 ++++++++++++++++++++++++++++++++++++++++++ auth/store.go | 10 ++++++++++ 2 files changed, 52 insertions(+) create mode 100644 auth/metrics.go diff --git a/auth/metrics.go b/auth/metrics.go new file mode 100644 index 00000000000..fe0d28e22d5 --- /dev/null +++ b/auth/metrics.go @@ -0,0 +1,42 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "github.com/prometheus/client_golang/prometheus" + "sync" +) + +var ( + currentAuthRevision = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: "etcd_debugging", + Subsystem: "auth", + Name: "revision", + Help: "The current revision of auth store.", + }, + func() float64 { + reportCurrentAuthRevMu.RLock() + defer reportCurrentAuthRevMu.RUnlock() + return reportCurrentAuthRev() + }, + ) + // overridden by auth store initialization + reportCurrentAuthRevMu sync.RWMutex + reportCurrentAuthRev = func() float64 { return 0 } +) + +func init() { + prometheus.MustRegister(currentAuthRevision) +} diff --git a/auth/store.go b/auth/store.go index e99ddee51ac..02d18a6cef8 100644 --- a/auth/store.go +++ b/auth/store.go @@ -966,6 +966,8 @@ func NewAuthStore(be backend.Backend, tp TokenProvider) *authStore { as.saveConsistentIndex(tx) } + as.setupMetricsReporter() + tx.Unlock() be.ForceCommit() @@ -1175,3 +1177,11 @@ func (as *authStore) saveConsistentIndex(tx backend.BatchTx) { plog.Errorf("failed to save consistentIndex,syncConsistentIndex is nil") } } + +func (as *authStore) setupMetricsReporter() { + reportCurrentAuthRevMu.Lock() + reportCurrentAuthRev = func() float64 { + return float64(as.Revision()) + } + reportCurrentAuthRevMu.Unlock() +} From e7291a1dab7e585eccd2cebee14557d6409dbc50 Mon Sep 17 00:00:00 2001 From: tangcong Date: Tue, 3 Mar 2020 17:26:04 +0800 Subject: [PATCH 24/56] auth: print warning log when error is ErrAuthOldRevision --- auth/store.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/auth/store.go b/auth/store.go index 02d18a6cef8..44b04de069a 100644 --- a/auth/store.go +++ b/auth/store.go @@ -775,8 +775,13 @@ func (as *authStore) isOpPermitted(userName string, revision uint64, key, rangeE if revision == 0 { return ErrUserEmpty } - - if revision < as.Revision() { + rev := as.Revision() + if revision < rev { + as.lg.Warn("request auth revision is less than current node auth revision", + zap.Uint64("current node auth revision", rev), + zap.Uint64("request auth revision", revision), + zap.ByteString("request key", key), + zap.Error(ErrAuthOldRevision)) return ErrAuthOldRevision } From acd94224599b75c2557efa2fab4ff142214d96fd Mon Sep 17 00:00:00 2001 From: tangcong Date: Tue, 3 Mar 2020 17:38:37 +0800 Subject: [PATCH 25/56] auth: cleanup saveConsistentIndex in NewAuthStore --- auth/store.go | 1 - 1 file changed, 1 deletion(-) diff --git a/auth/store.go b/auth/store.go index 44b04de069a..a3c46b18801 100644 --- a/auth/store.go +++ b/auth/store.go @@ -968,7 +968,6 @@ func NewAuthStore(be backend.Backend, tp TokenProvider) *authStore { if as.Revision() == 0 { as.commitRevision(tx) - as.saveConsistentIndex(tx) } as.setupMetricsReporter() From 27dffc6d018ac491616ea190e6e52f06659b40d5 Mon Sep 17 00:00:00 2001 From: tangcong Date: Tue, 10 Mar 2020 10:56:33 +0800 Subject: [PATCH 26/56] etcdserver: print warn log when failed to apply request --- etcdserver/apply.go | 3 +++ etcdserver/util.go | 17 ++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/etcdserver/apply.go b/etcdserver/apply.go index 93e78e390c8..555338574a5 100644 --- a/etcdserver/apply.go +++ b/etcdserver/apply.go @@ -110,6 +110,9 @@ func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult { ar := &applyResult{} defer func(start time.Time) { warnOfExpensiveRequest(start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err) + if ar.err != nil { + warnOfFailedRequest(start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err) + } }(time.Now()) // call into a.s.applyV3.F instead of a.F so upper appliers can check individual calls diff --git a/etcdserver/util.go b/etcdserver/util.go index 79bb6b859ca..4f73e86a794 100644 --- a/etcdserver/util.go +++ b/etcdserver/util.go @@ -109,7 +109,22 @@ func warnOfExpensiveRequest(now time.Time, reqStringer fmt.Stringer, respMsg pro warnOfExpensiveGenericRequest(now, reqStringer, "", resp, err) } -func warnOfExpensiveReadOnlyTxnRequest(now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) { +func warnOfFailedRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) { + var resp string + if !isNil(respMsg) { + resp = fmt.Sprintf("size:%d", proto.Size(respMsg)) + } + d := time.Since(now) + plog.Warningf( + "failed to apply request", + zap.Duration("took", d), + zap.String("request", reqStringer.String()), + zap.String("response", resp), + zap.Error(err), + ) +} + +func warnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) { reqStringer := pb.NewLoggableTxnRequest(r) var resp string if !isNil(txnResponse) { From 64fc4cc24485c2dc5c2d720d97c5982bfebe740a Mon Sep 17 00:00:00 2001 From: tangcong Date: Sun, 22 Mar 2020 12:04:19 +0800 Subject: [PATCH 27/56] auth: ensure RoleGrantPermission is compatible with older versions --- auth/store.go | 10 ---------- auth/store_test.go | 49 ---------------------------------------------- 2 files changed, 59 deletions(-) diff --git a/auth/store.go b/auth/store.go index a3c46b18801..d2fad6af4d4 100644 --- a/auth/store.go +++ b/auth/store.go @@ -727,16 +727,6 @@ func (as *authStore) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) ( }) if idx < len(role.KeyPermission) && bytes.Equal(role.KeyPermission[idx].Key, r.Perm.Key) && bytes.Equal(role.KeyPermission[idx].RangeEnd, r.Perm.RangeEnd) { - if role.KeyPermission[idx].PermType == r.Perm.PermType { - as.lg.Warn( - "ignored grant permission request to a role, existing permission", - zap.String("role-name", r.Name), - zap.ByteString("key", r.Perm.Key), - zap.ByteString("range-end", r.Perm.RangeEnd), - zap.String("permission-type", authpb.Permission_Type_name[int32(r.Perm.PermType)]), - ) - return &pb.AuthRoleGrantPermissionResponse{}, nil - } // update existing permission role.KeyPermission[idx].PermType = r.Perm.PermType } else { diff --git a/auth/store_test.go b/auth/store_test.go index 43532c60e6b..155b2f07f52 100644 --- a/auth/store_test.go +++ b/auth/store_test.go @@ -301,55 +301,6 @@ func TestListUsers(t *testing.T) { } } -func TestRoleGrantPermissionRevision(t *testing.T) { - as, tearDown := setupAuthStore(t) - defer tearDown(t) - - _, err := as.RoleAdd(&pb.AuthRoleAddRequest{Name: "role-test-1"}) - if err != nil { - t.Fatal(err) - } - - perm := &authpb.Permission{ - PermType: authpb.WRITE, - Key: []byte("Keys"), - RangeEnd: []byte("RangeEnd"), - } - _, err = as.RoleGrantPermission(&pb.AuthRoleGrantPermissionRequest{ - Name: "role-test-1", - Perm: perm, - }) - - if err != nil { - t.Fatal(err) - } - - r, err := as.RoleGet(&pb.AuthRoleGetRequest{Role: "role-test-1"}) - if err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(perm, r.Perm[0]) { - t.Errorf("expected %v, got %v", perm, r.Perm[0]) - } - - oldRevision := as.Revision() - - _, err = as.RoleGrantPermission(&pb.AuthRoleGrantPermissionRequest{ - Name: "role-test-1", - Perm: perm, - }) - - if err != nil { - t.Error(err) - } - newRevision := as.Revision() - - if oldRevision != newRevision { - t.Errorf("expected revision diff is 0, got %d", newRevision-oldRevision) - } -} - func TestRoleGrantPermission(t *testing.T) { as, tearDown := setupAuthStore(t) defer tearDown(t) From 294e71448926e220dcf88168236c13af58d22b2e Mon Sep 17 00:00:00 2001 From: tangcong Date: Mon, 6 Apr 2020 10:47:14 +0800 Subject: [PATCH 28/56] *: fix cherry-pick conflict --- auth/store.go | 11 +++-- etcdserver/backend.go | 2 +- etcdserver/server.go | 17 +++----- etcdserver/server_test.go | 8 ++-- etcdserver/util.go | 12 ++---- integration/v3_watch_test.go | 8 ++-- mvcc/kv_test.go | 2 +- mvcc/watchable_store.go | 8 ++-- mvcc/watchable_store_bench_test.go | 8 ++-- mvcc/watchable_store_test.go | 66 +++++------------------------- mvcc/watcher_bench_test.go | 6 +-- mvcc/watcher_test.go | 12 +++--- 12 files changed, 50 insertions(+), 110 deletions(-) diff --git a/auth/store.go b/auth/store.go index d2fad6af4d4..21c121e50ae 100644 --- a/auth/store.go +++ b/auth/store.go @@ -209,7 +209,6 @@ type authStore struct { tokenProvider TokenProvider syncConsistentIndex saveConsistentIndexFunc - bcryptCost int // the algorithm cost / strength for hashing auth passwords } func (as *authStore) SetConsistentIndexSyncer(syncer saveConsistentIndexFunc) { @@ -767,11 +766,11 @@ func (as *authStore) isOpPermitted(userName string, revision uint64, key, rangeE } rev := as.Revision() if revision < rev { - as.lg.Warn("request auth revision is less than current node auth revision", - zap.Uint64("current node auth revision", rev), - zap.Uint64("request auth revision", revision), - zap.ByteString("request key", key), - zap.Error(ErrAuthOldRevision)) + plog.Warningf("request auth revision is less than current node auth revision,"+ + "current node auth revision is %d,"+ + "request auth revision is %d,"+ + "request key is %s, "+ + "err is %v", rev, revision, key, ErrAuthOldRevision) return ErrAuthOldRevision } diff --git a/etcdserver/backend.go b/etcdserver/backend.go index 3481f37a810..fe2f86503e0 100644 --- a/etcdserver/backend.go +++ b/etcdserver/backend.go @@ -71,7 +71,7 @@ func openBackend(cfg ServerConfig) backend.Backend { // case, replace the db with the snapshot db sent by the leader. func recoverSnapshotBackend(cfg ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) { var cIndex consistentIndex - kv := mvcc.New(cfg.Logger, oldbe, &lease.FakeLessor{}, nil, &cIndex, mvcc.StoreConfig{CompactionBatchLimit: cfg.CompactionBatchLimit}) + kv := mvcc.New(oldbe, &lease.FakeLessor{}, nil, &cIndex) defer kv.Close() if snapshot.Metadata.Index <= kv.ConsistentIndex() { return oldbe, nil diff --git a/etcdserver/server.go b/etcdserver/server.go index 61cb91fbe08..7c1948c5aa4 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -447,16 +447,9 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers. - srv.lessor = lease.NewLessor( - srv.getLogger(), - srv.be, - lease.LessorConfig{ - MinLeaseTTL: int64(math.Ceil(minTTL.Seconds())), - CheckpointInterval: cfg.LeaseCheckpointInterval, - ExpiredLeasesRetryInterval: srv.Cfg.ReqTimeout(), - }) - - tp, err := auth.NewTokenProvider(cfg.Logger, cfg.AuthToken, + srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds()))) + + tp, err := auth.NewTokenProvider(cfg.AuthToken, func(index uint64) <-chan struct{} { return srv.applyWait.Wait(index) }, @@ -465,9 +458,9 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { plog.Warningf("failed to create token provider,err is %v", err) return nil, err } - srv.authStore = auth.NewAuthStore(srv.getLogger(), srv.be, tp, int(cfg.BcryptCost)) + srv.authStore = auth.NewAuthStore(srv.be, tp) - srv.kv = mvcc.New(srv.getLogger(), srv.be, srv.lessor, srv.authStore, &srv.consistIndex, mvcc.StoreConfig{CompactionBatchLimit: cfg.CompactionBatchLimit}) + srv.kv = mvcc.New(srv.be, srv.lessor, srv.authStore, &srv.consistIndex) if beExist { kvindex := srv.kv.ConsistentIndex() // TODO: remove kvindex != 0 checking when we do not expect users to upgrade diff --git a/etcdserver/server_test.go b/etcdserver/server_test.go index 61965e801b1..7ab52b5367e 100644 --- a/etcdserver/server_test.go +++ b/etcdserver/server_test.go @@ -916,7 +916,7 @@ func TestSnapshot(t *testing.T) { r: *r, store: st, } - srv.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &srv.consistIndex, mvcc.StoreConfig{}) + srv.kv = mvcc.New(be, &lease.FakeLessor{}, nil, &srv.consistIndex) srv.be = be ch := make(chan struct{}, 2) @@ -994,7 +994,7 @@ func TestSnapshotOrdering(t *testing.T) { be, tmpPath := backend.NewDefaultTmpBackend() defer os.RemoveAll(tmpPath) - s.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &s.consistIndex, mvcc.StoreConfig{}) + s.kv = mvcc.New(be, &lease.FakeLessor{}, nil, &s.consistIndex) s.be = be s.start() @@ -1052,7 +1052,7 @@ func TestTriggerSnap(t *testing.T) { } srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster} - srv.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &srv.consistIndex, mvcc.StoreConfig{}) + srv.kv = mvcc.New(be, &lease.FakeLessor{}, nil, &srv.consistIndex) srv.be = be srv.start() @@ -1121,7 +1121,7 @@ func TestConcurrentApplyAndSnapshotV3(t *testing.T) { defer func() { os.RemoveAll(tmpPath) }() - s.kv = mvcc.New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &s.consistIndex, mvcc.StoreConfig{}) + s.kv = mvcc.New(be, &lease.FakeLessor{}, nil, &s.consistIndex) s.be = be s.start() diff --git a/etcdserver/util.go b/etcdserver/util.go index 4f73e86a794..eb11d5d0de4 100644 --- a/etcdserver/util.go +++ b/etcdserver/util.go @@ -109,22 +109,16 @@ func warnOfExpensiveRequest(now time.Time, reqStringer fmt.Stringer, respMsg pro warnOfExpensiveGenericRequest(now, reqStringer, "", resp, err) } -func warnOfFailedRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) { +func warnOfFailedRequest(now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) { var resp string if !isNil(respMsg) { resp = fmt.Sprintf("size:%d", proto.Size(respMsg)) } d := time.Since(now) - plog.Warningf( - "failed to apply request", - zap.Duration("took", d), - zap.String("request", reqStringer.String()), - zap.String("response", resp), - zap.Error(err), - ) + plog.Warningf("failed to apply request,took %v,request %s,resp %s,err is %v", d, reqStringer.String(), resp, err) } -func warnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) { +func warnOfExpensiveReadOnlyTxnRequest(now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) { reqStringer := pb.NewLoggableTxnRequest(r) var resp string if !isNil(txnResponse) { diff --git a/integration/v3_watch_test.go b/integration/v3_watch_test.go index c91f4df6503..44c288c4956 100644 --- a/integration/v3_watch_test.go +++ b/integration/v3_watch_test.go @@ -781,9 +781,11 @@ func testV3WatchMultipleEventsTxn(t *testing.T, startRev int64) { type eventsSortByKey []*mvccpb.Event -func (evs eventsSortByKey) Len() int { return len(evs) } -func (evs eventsSortByKey) Swap(i, j int) { evs[i], evs[j] = evs[j], evs[i] } -func (evs eventsSortByKey) Less(i, j int) bool { return bytes.Compare(evs[i].Kv.Key, evs[j].Kv.Key) < 0 } +func (evs eventsSortByKey) Len() int { return len(evs) } +func (evs eventsSortByKey) Swap(i, j int) { evs[i], evs[j] = evs[j], evs[i] } +func (evs eventsSortByKey) Less(i, j int) bool { + return bytes.Compare(evs[i].Kv.Key, evs[j].Kv.Key) < 0 +} func TestV3WatchMultipleEventsPutUnsynced(t *testing.T) { defer testutil.AfterTest(t) diff --git a/mvcc/kv_test.go b/mvcc/kv_test.go index 722108c78c2..bafaac81ee7 100644 --- a/mvcc/kv_test.go +++ b/mvcc/kv_test.go @@ -710,7 +710,7 @@ func TestKVSnapshot(t *testing.T) { func TestWatchableKVWatch(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) + s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil, nil)) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() diff --git a/mvcc/watchable_store.go b/mvcc/watchable_store.go index 2c332b36081..8f12bddba88 100644 --- a/mvcc/watchable_store.go +++ b/mvcc/watchable_store.go @@ -15,10 +15,10 @@ package mvcc import ( - "go.etcd.io/etcd/auth" "sync" "time" + "github.com/coreos/etcd/auth" "github.com/coreos/etcd/lease" "github.com/coreos/etcd/mvcc/backend" "github.com/coreos/etcd/mvcc/mvccpb" @@ -68,11 +68,11 @@ type watchableStore struct { // cancel operations. type cancelFunc func() -func New(lg *zap.Logger, b backend.Backend, le lease.Lessor, as auth.AuthStore, ig ConsistentIndexGetter, cfg StoreConfig) ConsistentWatchableKV { - return newWatchableStore(lg, b, le, as, ig, cfg) +func New(b backend.Backend, le lease.Lessor, as auth.AuthStore, ig ConsistentIndexGetter) ConsistentWatchableKV { + return newWatchableStore(b, le, as, ig) } -func newWatchableStore(lg *zap.Logger, b backend.Backend, le lease.Lessor, as auth.AuthStore, ig ConsistentIndexGetter, cfg StoreConfig) *watchableStore { +func newWatchableStore(b backend.Backend, le lease.Lessor, as auth.AuthStore, ig ConsistentIndexGetter) *watchableStore { s := &watchableStore{ store: NewStore(b, le, ig), victimc: make(chan struct{}, 1), diff --git a/mvcc/watchable_store_bench_test.go b/mvcc/watchable_store_bench_test.go index 053d841cd60..48b05c69eec 100644 --- a/mvcc/watchable_store_bench_test.go +++ b/mvcc/watchable_store_bench_test.go @@ -25,7 +25,7 @@ import ( func BenchmarkWatchableStorePut(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() - s := New(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) + s := New(be, &lease.FakeLessor{}, nil, nil) defer cleanup(s, be, tmpPath) // arbitrary number of bytes @@ -46,7 +46,7 @@ func BenchmarkWatchableStorePut(b *testing.B) { func BenchmarkWatchableStoreTxnPut(b *testing.B) { var i fakeConsistentIndex be, tmpPath := backend.NewDefaultTmpBackend() - s := New(zap.NewExample(), be, &lease.FakeLessor{}, nil, &i, StoreConfig{}) + s := New(be, &lease.FakeLessor{}, nil, &i) defer cleanup(s, be, tmpPath) // arbitrary number of bytes @@ -67,7 +67,7 @@ func BenchmarkWatchableStoreTxnPut(b *testing.B) { // many synced watchers receiving a Put notification. func BenchmarkWatchableStoreWatchSyncPut(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() - s := newWatchableStore(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) + s := newWatchableStore(be, &lease.FakeLessor{}, nil, nil) defer cleanup(s, be, tmpPath) k := []byte("testkey") @@ -162,7 +162,7 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) { func BenchmarkWatchableStoreSyncedCancel(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() - s := newWatchableStore(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) + s := newWatchableStore(be, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() diff --git a/mvcc/watchable_store_test.go b/mvcc/watchable_store_test.go index f6086079400..b1133556ad8 100644 --- a/mvcc/watchable_store_test.go +++ b/mvcc/watchable_store_test.go @@ -30,11 +30,7 @@ import ( func TestWatch(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() @@ -56,11 +52,7 @@ func TestWatch(t *testing.T) { func TestNewWatcherCancel(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() @@ -230,11 +222,7 @@ func TestSyncWatchers(t *testing.T) { // TestWatchCompacted tests a watcher that watches on a compacted revision. func TestWatchCompacted(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() @@ -271,11 +259,7 @@ func TestWatchCompacted(t *testing.T) { func TestWatchFutureRev(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() @@ -316,11 +300,7 @@ func TestWatchRestore(t *testing.T) { test := func(delay time.Duration) func(t *testing.T) { return func(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer cleanup(s, b, tmpPath) testKey := []byte("foo") @@ -328,11 +308,7 @@ func TestWatchRestore(t *testing.T) { rev := s.Put(testKey, testValue, lease.NoLease) newBackend, newPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - newStore := newWatchableStore(newBackend, &lease.FakeLessor{}, nil) -======= - newStore := newWatchableStore(zap.NewExample(), newBackend, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + newStore := newWatchableStore(newBackend, &lease.FakeLessor{}, nil, nil) defer cleanup(newStore, newBackend, newPath) w := newStore.NewWatchStream() @@ -370,19 +346,11 @@ func TestWatchRestore(t *testing.T) { // 5. choose the watcher from step 1, without panic func TestWatchRestoreSyncedWatcher(t *testing.T) { b1, b1Path := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s1 := newWatchableStore(b1, &lease.FakeLessor{}, nil) + s1 := newWatchableStore(b1, &lease.FakeLessor{}, nil, nil) defer cleanup(s1, b1, b1Path) b2, b2Path := backend.NewDefaultTmpBackend() - s2 := newWatchableStore(b2, &lease.FakeLessor{}, nil) -======= - s1 := newWatchableStore(zap.NewExample(), b1, &lease.FakeLessor{}, nil, nil, StoreConfig{}) - defer cleanup(s1, b1, b1Path) - - b2, b2Path := backend.NewDefaultTmpBackend() - s2 := newWatchableStore(zap.NewExample(), b2, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s2 := newWatchableStore(b2, &lease.FakeLessor{}, nil, nil) defer cleanup(s2, b2, b2Path) testKey, testValue := []byte("foo"), []byte("bar") @@ -429,11 +397,7 @@ func TestWatchRestoreSyncedWatcher(t *testing.T) { // TestWatchBatchUnsynced tests batching on unsynced watchers func TestWatchBatchUnsynced(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) oldMaxRevs := watchBatchMaxRevs defer func() { @@ -567,11 +531,7 @@ func TestWatchVictims(t *testing.T) { oldChanBufLen, oldMaxWatchersPerSync := chanBufLen, maxWatchersPerSync b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() @@ -649,11 +609,7 @@ func TestWatchVictims(t *testing.T) { // canceling its watches. func TestStressWatchCancelClose(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - s := newWatchableStore(b, &lease.FakeLessor{}, nil) -======= - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() diff --git a/mvcc/watcher_bench_test.go b/mvcc/watcher_bench_test.go index fa79ec97195..604b1c44094 100644 --- a/mvcc/watcher_bench_test.go +++ b/mvcc/watcher_bench_test.go @@ -24,11 +24,7 @@ import ( func BenchmarkKVWatcherMemoryUsage(b *testing.B) { be, tmpPath := backend.NewDefaultTmpBackend() -<<<<<<< HEAD - watchable := newWatchableStore(be, &lease.FakeLessor{}, nil) -======= - watchable := newWatchableStore(zap.NewExample(), be, &lease.FakeLessor{}, nil, nil, StoreConfig{}) ->>>>>>> *: fix auth revision corruption bug + watchable := newWatchableStore(be, &lease.FakeLessor{}, nil, nil) defer cleanup(watchable, be, tmpPath) diff --git a/mvcc/watcher_test.go b/mvcc/watcher_test.go index b415c682236..078bfe5299d 100644 --- a/mvcc/watcher_test.go +++ b/mvcc/watcher_test.go @@ -31,7 +31,7 @@ import ( // and the watched event attaches the correct watchID. func TestWatcherWatchID(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) + s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil, nil)) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -83,7 +83,7 @@ func TestWatcherWatchID(t *testing.T) { // and returns events with matching prefixes. func TestWatcherWatchPrefix(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) + s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil, nil)) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -157,7 +157,7 @@ func TestWatcherWatchPrefix(t *testing.T) { // does not create watcher, which panics when canceling in range tree. func TestWatcherWatchWrongRange(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) + s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil, nil)) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -177,7 +177,7 @@ func TestWatcherWatchWrongRange(t *testing.T) { func TestWatchDeleteRange(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{}) + s := newWatchableStore(b, &lease.FakeLessor{}, nil, nil) defer func() { s.store.Close() @@ -216,7 +216,7 @@ func TestWatchDeleteRange(t *testing.T) { // with given id inside watchStream. func TestWatchStreamCancelWatcherByID(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) + s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil, nil)) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() @@ -308,7 +308,7 @@ func TestWatcherRequestProgress(t *testing.T) { func TestWatcherWatchWithFilter(t *testing.T) { b, tmpPath := backend.NewDefaultTmpBackend() - s := WatchableKV(newWatchableStore(zap.NewExample(), b, &lease.FakeLessor{}, nil, nil, StoreConfig{})) + s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil, nil)) defer cleanup(s, b, tmpPath) w := s.NewWatchStream() From 1b5e2f43056fdbcf45f8db0ddf8decc6446203a2 Mon Sep 17 00:00:00 2001 From: Changxin Miao Date: Thu, 7 May 2020 06:32:08 +0800 Subject: [PATCH 29/56] Update grpc-gateway to 1.3.1 (#11843) --- glide.lock | 10 ++-- glide.yaml | 6 +- .../grpc-gateway/runtime/convert.go | 18 ++++++ .../grpc-gateway/runtime/handler.go | 34 ++++++++--- .../grpc-gateway/runtime/marshal_json.go | 5 ++ .../grpc-gateway/runtime/marshal_jsonpb.go | 5 ++ .../grpc-gateway/runtime/marshaler.go | 6 ++ vendor/go.uber.org/atomic/atomic.go | 9 ++- vendor/go.uber.org/multierr/error.go | 58 +++++++++++++++++-- 9 files changed, 127 insertions(+), 24 deletions(-) diff --git a/glide.lock b/glide.lock index 5fcf059ca41..9a65ad94c52 100644 --- a/glide.lock +++ b/glide.lock @@ -1,5 +1,5 @@ -hash: 3fece95d13153dbb91034bd2708f8f7a39510c682dfda4c3ff691f09a61a2fc1 -updated: 2019-08-19T20:45:29.93927922-07:00 +hash: 87224cbe021ea625798ac508c8963f49ff4ceb852d99da50db9d6b7fe95282f4 +updated: 2020-05-04T21:27:57.570766+08:00 imports: - name: github.com/beorn7/perks version: 37c8de3658fcb183f997c4e13e8337516ab753e6 @@ -66,7 +66,7 @@ imports: - name: github.com/grpc-ecosystem/go-grpc-prometheus version: 0dafe0d496ea71181bf2dd039e7e3f44b6bd11a7 - name: github.com/grpc-ecosystem/grpc-gateway - version: 8cc3a55af3bcf171a1c23a90c4df9cf591706104 + version: 07f5e79768022f9a3265235f0db4ac8c3f675fec subpackages: - runtime - runtime/internal @@ -135,9 +135,9 @@ imports: - name: github.com/xiang90/probing version: 07dd2e8dfe18522e9c447ba95f2fe95262f63bb2 - name: go.uber.org/atomic - version: df976f2515e274675050de7b3f42545de80594fd + version: 845920076a298bdb984fb0f1b86052e4ca0a281c - name: go.uber.org/multierr - version: 3c4937480c32f4c13a875a1829af76c98ca3d40a + version: b587143a48b62b01d337824eab43700af6ffe222 - name: go.uber.org/zap version: 27376062155ad36be76b0f12cf1572a221d3a48c subpackages: diff --git a/glide.yaml b/glide.yaml index 00b10851a1e..32c80c6690b 100644 --- a/glide.yaml +++ b/glide.yaml @@ -49,11 +49,7 @@ import: - package: github.com/google/uuid version: v1.0.0 - package: github.com/grpc-ecosystem/grpc-gateway - version: v1.3.0 - subpackages: - - runtime - - runtime/internal - - utilities + version: v1.3.1 - package: github.com/jonboulle/clockwork version: v0.1.0 - package: github.com/kr/pty diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/convert.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/convert.go index 1af5cc4ebdd..f8931864100 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/convert.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/convert.go @@ -2,6 +2,10 @@ package runtime import ( "strconv" + + "github.com/golang/protobuf/jsonpb" + "github.com/golang/protobuf/ptypes/duration" + "github.com/golang/protobuf/ptypes/timestamp" ) // String just returns the given string. @@ -56,3 +60,17 @@ func Uint32(val string) (uint32, error) { } return uint32(i), nil } + +// Timestamp converts the given RFC3339 formatted string into a timestamp.Timestamp. +func Timestamp(val string) (*timestamp.Timestamp, error) { + var r *timestamp.Timestamp + err := jsonpb.UnmarshalString(val, r) + return r, err +} + +// Duration converts the given string into a timestamp.Duration. +func Duration(val string) (*duration.Duration, error) { + var r *duration.Duration + err := jsonpb.UnmarshalString(val, r) + return r, err +} diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/handler.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/handler.go index ae6a5d551cf..1770b85344d 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/handler.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/handler.go @@ -34,34 +34,47 @@ func ForwardResponseStream(ctx context.Context, mux *ServeMux, marshaler Marshal w.Header().Set("Transfer-Encoding", "chunked") w.Header().Set("Content-Type", marshaler.ContentType()) if err := handleForwardResponseOptions(ctx, w, nil, opts); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + HTTPError(ctx, mux, marshaler, w, req, err) return } - w.WriteHeader(http.StatusOK) - f.Flush() + + var delimiter []byte + if d, ok := marshaler.(Delimited); ok { + delimiter = d.Delimiter() + } else { + delimiter = []byte("\n") + } + + var wroteHeader bool for { resp, err := recv() if err == io.EOF { return } if err != nil { - handleForwardResponseStreamError(marshaler, w, err) + handleForwardResponseStreamError(wroteHeader, marshaler, w, err) return } if err := handleForwardResponseOptions(ctx, w, resp, opts); err != nil { - handleForwardResponseStreamError(marshaler, w, err) + handleForwardResponseStreamError(wroteHeader, marshaler, w, err) return } buf, err := marshaler.Marshal(streamChunk(resp, nil)) if err != nil { grpclog.Printf("Failed to marshal response chunk: %v", err) + handleForwardResponseStreamError(wroteHeader, marshaler, w, err) return } if _, err = w.Write(buf); err != nil { grpclog.Printf("Failed to send response chunk: %v", err) return } + wroteHeader = true + if _, err = w.Write(delimiter); err != nil { + grpclog.Printf("Failed to send delimiter chunk: %v", err) + return + } f.Flush() } } @@ -134,13 +147,20 @@ func handleForwardResponseOptions(ctx context.Context, w http.ResponseWriter, re return nil } -func handleForwardResponseStreamError(marshaler Marshaler, w http.ResponseWriter, err error) { +func handleForwardResponseStreamError(wroteHeader bool, marshaler Marshaler, w http.ResponseWriter, err error) { buf, merr := marshaler.Marshal(streamChunk(nil, err)) if merr != nil { grpclog.Printf("Failed to marshal an error: %v", merr) return } - if _, werr := fmt.Fprintf(w, "%s\n", buf); werr != nil { + if !wroteHeader { + s, ok := status.FromError(err) + if !ok { + s = status.New(codes.Unknown, err.Error()) + } + w.WriteHeader(HTTPStatusFromCode(s.Code())) + } + if _, werr := w.Write(buf); werr != nil { grpclog.Printf("Failed to notify error to client: %v", werr) return } diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_json.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_json.go index 0acd2ca29ef..b3a21418be4 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_json.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_json.go @@ -35,3 +35,8 @@ func (j *JSONBuiltin) NewDecoder(r io.Reader) Decoder { func (j *JSONBuiltin) NewEncoder(w io.Writer) Encoder { return json.NewEncoder(w) } + +// Delimiter for newline encoded JSON streams. +func (j *JSONBuiltin) Delimiter() []byte { + return []byte("\n") +} diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_jsonpb.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_jsonpb.go index 49f13f7fc74..d42cc593e51 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_jsonpb.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshal_jsonpb.go @@ -182,3 +182,8 @@ type protoEnum interface { } var typeProtoMessage = reflect.TypeOf((*proto.Message)(nil)).Elem() + +// Delimiter for newline encoded JSON streams. +func (j *JSONPb) Delimiter() []byte { + return []byte("\n") +} diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshaler.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshaler.go index 6d434f13cb4..98fe6e88ac5 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshaler.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/runtime/marshaler.go @@ -40,3 +40,9 @@ type EncoderFunc func(v interface{}) error // Encode delegates invocations to the underlying function itself. func (f EncoderFunc) Encode(v interface{}) error { return f(v) } + +// Delimited defines the streaming delimiter. +type Delimited interface { + // Delimiter returns the record seperator for the stream. + Delimiter() []byte +} diff --git a/vendor/go.uber.org/atomic/atomic.go b/vendor/go.uber.org/atomic/atomic.go index 1db6849fca0..ad5fa0980a7 100644 --- a/vendor/go.uber.org/atomic/atomic.go +++ b/vendor/go.uber.org/atomic/atomic.go @@ -250,11 +250,16 @@ func (b *Bool) Swap(new bool) bool { // Toggle atomically negates the Boolean and returns the previous value. func (b *Bool) Toggle() bool { - return truthy(atomic.AddUint32(&b.v, 1) - 1) + for { + old := b.Load() + if b.CAS(old, !old) { + return old + } + } } func truthy(n uint32) bool { - return n&1 == 1 + return n == 1 } func boolToInt(b bool) uint32 { diff --git a/vendor/go.uber.org/multierr/error.go b/vendor/go.uber.org/multierr/error.go index de6ce4736c8..04eb9618c10 100644 --- a/vendor/go.uber.org/multierr/error.go +++ b/vendor/go.uber.org/multierr/error.go @@ -1,4 +1,4 @@ -// Copyright (c) 2017 Uber Technologies, Inc. +// Copyright (c) 2019 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -33,7 +33,7 @@ // If only two errors are being combined, the Append function may be used // instead. // -// err = multierr.Combine(reader.Close(), writer.Close()) +// err = multierr.Append(reader.Close(), writer.Close()) // // This makes it possible to record resource cleanup failures from deferred // blocks with the help of named return values. @@ -99,8 +99,6 @@ var ( // Separator for single-line error messages. _singlelineSeparator = []byte("; ") - _newline = []byte("\n") - // Prefix for multi-line messages _multilinePrefix = []byte("the following errors occurred:") @@ -132,7 +130,7 @@ type errorGroup interface { } // Errors returns a slice containing zero or more errors that the supplied -// error is composed of. If the error is nil, the returned slice is empty. +// error is composed of. If the error is nil, a nil slice is returned. // // err := multierr.Append(r.Close(), w.Close()) // errors := multierr.Errors(err) @@ -399,3 +397,53 @@ func Append(left error, right error) error { errors := [2]error{left, right} return fromSlice(errors[0:]) } + +// AppendInto appends an error into the destination of an error pointer and +// returns whether the error being appended was non-nil. +// +// var err error +// multierr.AppendInto(&err, r.Close()) +// multierr.AppendInto(&err, w.Close()) +// +// The above is equivalent to, +// +// err := multierr.Append(r.Close(), w.Close()) +// +// As AppendInto reports whether the provided error was non-nil, it may be +// used to build a multierr error in a loop more ergonomically. For example: +// +// var err error +// for line := range lines { +// var item Item +// if multierr.AppendInto(&err, parse(line, &item)) { +// continue +// } +// items = append(items, item) +// } +// +// Compare this with a verison that relies solely on Append: +// +// var err error +// for line := range lines { +// var item Item +// if parseErr := parse(line, &item); parseErr != nil { +// err = multierr.Append(err, parseErr) +// continue +// } +// items = append(items, item) +// } +func AppendInto(into *error, err error) (errored bool) { + if into == nil { + // We panic if 'into' is nil. This is not documented above + // because suggesting that the pointer must be non-nil may + // confuse users into thinking that the error that it points + // to must be non-nil. + panic("misuse of multierr.AppendInto: into pointer must not be nil") + } + + if err == nil { + return false + } + *into = Append(*into, err) + return true +} From 5f799922a84795a7dbd5c0577bc3c3e6e42baaf5 Mon Sep 17 00:00:00 2001 From: tangcong Date: Sun, 26 Apr 2020 21:05:30 +0800 Subject: [PATCH 30/56] mvcc: fix deadlock bug --- mvcc/kvstore.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mvcc/kvstore.go b/mvcc/kvstore.go index ba34cd1f314..2a8a0361a0b 100644 --- a/mvcc/kvstore.go +++ b/mvcc/kvstore.go @@ -142,14 +142,18 @@ func NewStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) *sto func (s *store) compactBarrier(ctx context.Context, ch chan struct{}) { if ctx == nil || ctx.Err() != nil { - s.mu.Lock() select { case <-s.stopc: default: + // fix deadlock in mvcc,for more information, please refer to pr 11817. + // s.stopc is only updated in restore operation, which is called by apply + // snapshot call, compaction and apply snapshot requests are serialized by + // raft, and do not happen at the same time. + s.mu.Lock() f := func(ctx context.Context) { s.compactBarrier(ctx, ch) } s.fifoSched.Schedule(f) + s.mu.Unlock() } - s.mu.Unlock() return } close(ch) From 9caec0d124dc1e905caea3ddeab57554c539dcd9 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Mon, 18 May 2020 01:54:15 -0700 Subject: [PATCH 31/56] etcdserver,wal: fix inconsistencies in WAL and snapshot ref. https://github.com/etcd-io/etcd/issues/10219 Signed-off-by: Gyuho Lee --- etcdserver/raft.go | 26 +++++-- etcdserver/server.go | 14 +++- etcdserver/server_test.go | 26 ++++--- etcdserver/storage.go | 26 +++++-- pkg/mock/mockstorage/storage_recorder.go | 12 ++++ snap/snapshotter.go | 71 +++++++++++++++---- snap/snapshotter_test.go | 87 ++++++++++++++++++++++-- wal/wal.go | 61 +++++++++++++++++ wal/wal_test.go | 53 +++++++++++++++ 9 files changed, 340 insertions(+), 36 deletions(-) diff --git a/etcdserver/raft.go b/etcdserver/raft.go index f73df6c7dc0..212dccbdcc8 100644 --- a/etcdserver/raft.go +++ b/etcdserver/raft.go @@ -228,9 +228,19 @@ func (r *raftNode) start(rh *raftReadyHandler) { r.transport.Send(r.processMessages(rd.Messages)) } + // Must save the snapshot file and WAL snapshot entry before saving any other entries or hardstate to + // ensure that recovery after a snapshot restore is possible. + if !raft.IsEmptySnap(rd.Snapshot) { + // gofail: var raftBeforeSaveSnap struct{} + if err := r.storage.SaveSnap(rd.Snapshot); err != nil { + plog.Fatalf("failed to save Raft snapshot %v", err) + } + // gofail: var raftAfterSaveSnap struct{} + } + // gofail: var raftBeforeSave struct{} if err := r.storage.Save(rd.HardState, rd.Entries); err != nil { - plog.Fatalf("raft save state and entries error: %v", err) + plog.Fatalf("failed to raft save state and entries %v", err) } if !raft.IsEmptyHardState(rd.HardState) { proposalsCommitted.Set(float64(rd.HardState.Commit)) @@ -238,10 +248,14 @@ func (r *raftNode) start(rh *raftReadyHandler) { // gofail: var raftAfterSave struct{} if !raft.IsEmptySnap(rd.Snapshot) { - // gofail: var raftBeforeSaveSnap struct{} - if err := r.storage.SaveSnap(rd.Snapshot); err != nil { - plog.Fatalf("raft save snapshot error: %v", err) + // Force WAL to fsync its hard state before Release() releases + // old data from the WAL. Otherwise could get an error like: + // panic: tocommit(107) is out of range [lastIndex(84)]. Was the raft log corrupted, truncated, or lost? + // See https://github.com/etcd-io/etcd/issues/10219 for more details. + if err := r.storage.Sync(); err != nil { + plog.Fatalf("failed to sync Raft snapshot %v", err) } + // etcdserver now claim the snapshot has been persisted onto the disk notifyc <- struct{}{} @@ -249,6 +263,10 @@ func (r *raftNode) start(rh *raftReadyHandler) { r.raftStorage.ApplySnapshot(rd.Snapshot) plog.Infof("raft applied incoming snapshot at index %d", rd.Snapshot.Metadata.Index) // gofail: var raftAfterApplySnap struct{} + + if err := r.storage.Release(rd.Snapshot); err != nil { + plog.Fatalf("failed to release Raft wal %v", err) + } } r.raftStorage.Append(rd.Entries) diff --git a/etcdserver/server.go b/etcdserver/server.go index 7c1948c5aa4..4fedb200734 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -375,7 +375,15 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { if cfg.ShouldDiscover() { plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir()) } - snapshot, err = ss.Load() + + // Find a snapshot to start/restart a raft node + walSnaps, serr := wal.ValidSnapshotEntries(cfg.WALDir()) + if serr != nil { + return nil, serr + } + // snapshot files can be orphaned if etcd crashes after writing them but before writing the corresponding + // wal log entries + snapshot, err = ss.LoadNewestAvailable(walSnaps) if err != nil && err != snap.ErrNoSnapshot { return nil, err } @@ -1556,6 +1564,10 @@ func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) { } plog.Infof("saved snapshot at index %d", snap.Metadata.Index) + if err = s.r.storage.Release(snap); err != nil { + plog.Panicf("failed to release wal %v", err) + } + // When sending a snapshot, etcd will pause compaction. // After receives a snapshot, the slow follower needs to get all the entries right after // the snapshot sent to catch up. If we do not pause compaction, the log entries right after diff --git a/etcdserver/server_test.go b/etcdserver/server_test.go index 7ab52b5367e..90d2f6af532 100644 --- a/etcdserver/server_test.go +++ b/etcdserver/server_test.go @@ -922,11 +922,11 @@ func TestSnapshot(t *testing.T) { ch := make(chan struct{}, 2) go func() { - gaction, _ := p.Wait(1) + gaction, _ := p.Wait(2) defer func() { ch <- struct{}{} }() - if len(gaction) != 1 { - t.Fatalf("len(action) = %d, want 1", len(gaction)) + if len(gaction) != 2 { + t.Fatalf("len(action) = %d, want 2", len(gaction)) } if !reflect.DeepEqual(gaction[0], testutil.Action{Name: "SaveSnap"}) { t.Errorf("action = %s, want SaveSnap", gaction[0]) @@ -1013,6 +1013,9 @@ func TestSnapshotOrdering(t *testing.T) { if ac := <-p.Chan(); ac.Name != "Save" { t.Fatalf("expected Save, got %+v", ac) } + if ac := <-p.Chan(); ac.Name != "SaveSnap" { + t.Fatalf("expected Save, got %+v", ac) + } if ac := <-p.Chan(); ac.Name != "Save" { t.Fatalf("expected Save, got %+v", ac) } @@ -1022,7 +1025,10 @@ func TestSnapshotOrdering(t *testing.T) { t.Fatalf("expected file %q, got missing", snapPath) } // unblock SaveSnapshot, etcdserver now permitted to move snapshot file - if ac := <-p.Chan(); ac.Name != "SaveSnap" { + if ac := <-p.Chan(); ac.Name != "Sync" { + t.Fatalf("expected SaveSnap, got %+v", ac) + } + if ac := <-p.Chan(); ac.Name != "Release" { t.Fatalf("expected SaveSnap, got %+v", ac) } } @@ -1059,16 +1065,20 @@ func TestTriggerSnap(t *testing.T) { donec := make(chan struct{}) go func() { - wcnt := 2 + snapc + wcnt := 3 + snapc gaction, _ := p.Wait(wcnt) // each operation is recorded as a Save - // (SnapCount+1) * Puts + SaveSnap = (SnapCount+1) * Save + SaveSnap + // (SnapCount+1) * Puts + SaveSnap = (SnapCount+1) * Save + SaveSnap + Release if len(gaction) != wcnt { + t.Logf("gaction: %v", gaction) t.Fatalf("len(action) = %d, want %d", len(gaction), wcnt) } - if !reflect.DeepEqual(gaction[wcnt-1], testutil.Action{Name: "SaveSnap"}) { - t.Errorf("action = %s, want SaveSnap", gaction[wcnt-1]) + if !reflect.DeepEqual(gaction[wcnt-2], testutil.Action{Name: "SaveSnap"}) { + t.Errorf("action = %s, want SaveSnap", gaction[wcnt-2]) + } + if !reflect.DeepEqual(gaction[wcnt-1], testutil.Action{Name: "Release"}) { + t.Errorf("action = %s, want Release", gaction[wcnt-1]) } close(donec) }() diff --git a/etcdserver/storage.go b/etcdserver/storage.go index 55c2dd4b6a4..2cddb5cb218 100644 --- a/etcdserver/storage.go +++ b/etcdserver/storage.go @@ -34,6 +34,10 @@ type Storage interface { SaveSnap(snap raftpb.Snapshot) error // Close closes the Storage and performs finalization. Close() error + // Release releases the locked wal files older than the provided snapshot. + Release(snap raftpb.Snapshot) error + // Sync WAL + Sync() error } type storage struct { @@ -45,22 +49,32 @@ func NewStorage(w *wal.WAL, s *snap.Snapshotter) Storage { return &storage{w, s} } -// SaveSnap saves the snapshot to disk and release the locked -// wal files since they will not be used. +// SaveSnap saves the snapshot file to disk and writes the WAL snapshot entry. func (st *storage) SaveSnap(snap raftpb.Snapshot) error { walsnap := walpb.Snapshot{ Index: snap.Metadata.Index, Term: snap.Metadata.Term, } - err := st.WAL.SaveSnapshot(walsnap) + + // save the snapshot file before writing the snapshot to the wal. + // This makes it possible for the snapshot file to become orphaned, but prevents + // a WAL snapshot entry from having no corresponding snapshot file. + err := st.Snapshotter.SaveSnap(snap) if err != nil { return err } - err = st.Snapshotter.SaveSnap(snap) - if err != nil { + + return st.WAL.SaveSnapshot(walsnap) +} + +// Release releases resources older than the given snap and are no longer needed: +// - releases the locks to the wal files that are older than the provided wal for the given snap. +// - deletes any .snap.db files that are older than the given snap. +func (st *storage) Release(snap raftpb.Snapshot) error { + if err := st.WAL.ReleaseLockTo(snap.Metadata.Index); err != nil { return err } - return st.WAL.ReleaseLockTo(snap.Metadata.Index) + return st.Snapshotter.ReleaseSnapDBs(snap) } func readWAL(waldir string, snap walpb.Snapshot) (w *wal.WAL, id, cid types.ID, st raftpb.HardState, ents []raftpb.Entry) { diff --git a/pkg/mock/mockstorage/storage_recorder.go b/pkg/mock/mockstorage/storage_recorder.go index 4ecab9831b3..362912f6dcc 100644 --- a/pkg/mock/mockstorage/storage_recorder.go +++ b/pkg/mock/mockstorage/storage_recorder.go @@ -45,4 +45,16 @@ func (p *storageRecorder) SaveSnap(st raftpb.Snapshot) error { return nil } +func (p *storageRecorder) Release(st raftpb.Snapshot) error { + if !raft.IsEmptySnap(st) { + p.Record(testutil.Action{Name: "Release"}) + } + return nil +} + +func (p *storageRecorder) Sync() error { + p.Record(testutil.Action{Name: "Sync"}) + return nil +} + func (p *storageRecorder) Close() error { return nil } diff --git a/snap/snapshotter.go b/snap/snapshotter.go index 5d79b175915..1d73a1c2a27 100644 --- a/snap/snapshotter.go +++ b/snap/snapshotter.go @@ -23,6 +23,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "time" @@ -31,7 +32,7 @@ import ( "github.com/coreos/etcd/raft" "github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/snap/snappb" - + "github.com/coreos/etcd/wal/walpb" "github.com/coreos/pkg/capnslog" ) @@ -80,9 +81,8 @@ func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error { d, err := snap.Marshal() if err != nil { return err - } else { - marshallingDurations.Observe(float64(time.Since(start)) / float64(time.Second)) } + marshallingDurations.Observe(float64(time.Since(start)) / float64(time.Second)) err = pioutil.WriteAndSyncFile(filepath.Join(s.dir, fname), d, 0666) if err == nil { @@ -97,20 +97,35 @@ func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error { } func (s *Snapshotter) Load() (*raftpb.Snapshot, error) { + return s.loadMatching(func(*raftpb.Snapshot) bool { return true }) +} + +// LoadNewestAvailable loads the newest snapshot available that is in walSnaps. +func (s *Snapshotter) LoadNewestAvailable(walSnaps []walpb.Snapshot) (*raftpb.Snapshot, error) { + return s.loadMatching(func(snapshot *raftpb.Snapshot) bool { + m := snapshot.Metadata + for i := len(walSnaps) - 1; i >= 0; i-- { + if m.Term == walSnaps[i].Term && m.Index == walSnaps[i].Index { + return true + } + } + return false + }) +} + +// loadMatching returns the newest snapshot where matchFn returns true. +func (s *Snapshotter) loadMatching(matchFn func(*raftpb.Snapshot) bool) (*raftpb.Snapshot, error) { names, err := s.snapNames() if err != nil { return nil, err } var snap *raftpb.Snapshot for _, name := range names { - if snap, err = loadSnap(s.dir, name); err == nil { - break + if snap, err = loadSnap(s.dir, name); err == nil && matchFn(snap) { + return snap, nil } } - if err != nil { - return nil, ErrNoSnapshot - } - return snap, nil + return nil, ErrNoSnapshot } func loadSnap(dir, name string) (*raftpb.Snapshot, error) { @@ -172,7 +187,8 @@ func (s *Snapshotter) snapNames() ([]string, error) { if err != nil { return nil, err } - if err = s.cleanupSnapdir(names); err != nil { + names, err = s.cleanupSnapdir(names) + if err != nil { return nil, err } snaps := checkSuffix(names) @@ -208,12 +224,43 @@ func renameBroken(path string) { // cleanupSnapdir removes any files that should not be in the snapshot directory: // - db.tmp prefixed files that can be orphaned by defragmentation -func (s *Snapshotter) cleanupSnapdir(filenames []string) error { +func (s *Snapshotter) cleanupSnapdir(filenames []string) (names []string, err error) { for _, filename := range filenames { if strings.HasPrefix(filename, "db.tmp") { plog.Infof("found orphaned defragmentation file; deleting: %s", filename) if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { - return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + return nil, fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + } + continue + } + names = append(names, filename) + } + return names, nil +} + +func (s *Snapshotter) ReleaseSnapDBs(snap raftpb.Snapshot) error { + dir, err := os.Open(s.dir) + if err != nil { + return err + } + defer dir.Close() + filenames, err := dir.Readdirnames(-1) + if err != nil { + return err + } + for _, filename := range filenames { + if strings.HasSuffix(filename, ".snap.db") { + hexIndex := strings.TrimSuffix(filepath.Base(filename), ".snap.db") + index, err := strconv.ParseUint(hexIndex, 16, 64) + if err != nil { + plog.Warningf("failed to parse index from filename: %s (%v)", filename, err) + continue + } + if index < snap.Metadata.Index { + plog.Infof("found orphaned .snap.db file; deleting %q", filename) + if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { + plog.Warningf("failed to remove orphaned .snap.db file: %s (%v)", filename, rmErr) + } } } } diff --git a/snap/snapshotter_test.go b/snap/snapshotter_test.go index 6af823f03bf..926b75a807f 100644 --- a/snap/snapshotter_test.go +++ b/snap/snapshotter_test.go @@ -23,7 +23,9 @@ import ( "reflect" "testing" + "github.com/coreos/etcd/pkg/fileutil" "github.com/coreos/etcd/raft/raftpb" + "github.com/coreos/etcd/wal/walpb" ) var testSnap = &raftpb.Snapshot{ @@ -165,12 +167,48 @@ func TestLoadNewestSnap(t *testing.T) { t.Fatal(err) } - g, err := ss.Load() - if err != nil { - t.Errorf("err = %v, want nil", err) + cases := []struct { + name string + availableWalSnaps []walpb.Snapshot + expected *raftpb.Snapshot + }{ + { + name: "load-newest", + expected: &newSnap, + }, + { + name: "loadnewestavailable-newest", + availableWalSnaps: []walpb.Snapshot{{Index: 0, Term: 0}, {Index: 1, Term: 1}, {Index: 5, Term: 1}}, + expected: &newSnap, + }, + { + name: "loadnewestavailable-newest-unsorted", + availableWalSnaps: []walpb.Snapshot{{Index: 5, Term: 1}, {Index: 1, Term: 1}, {Index: 0, Term: 0}}, + expected: &newSnap, + }, + { + name: "loadnewestavailable-previous", + availableWalSnaps: []walpb.Snapshot{{Index: 0, Term: 0}, {Index: 1, Term: 1}}, + expected: testSnap, + }, } - if !reflect.DeepEqual(g, &newSnap) { - t.Errorf("snap = %#v, want %#v", g, &newSnap) + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var err error + var g *raftpb.Snapshot + if tc.availableWalSnaps != nil { + g, err = ss.LoadNewestAvailable(tc.availableWalSnaps) + } else { + g, err = ss.Load() + } + if err != nil { + t.Errorf("err = %v, want nil", err) + } + if !reflect.DeepEqual(g, tc.expected) { + t.Errorf("snap = %#v, want %#v", g, tc.expected) + } + }) } } @@ -228,3 +266,42 @@ func TestAllSnapshotBroken(t *testing.T) { t.Errorf("err = %v, want %v", err, ErrNoSnapshot) } } + +func TestReleaseSnapDBs(t *testing.T) { + dir := filepath.Join(os.TempDir(), "snapshot") + err := os.Mkdir(dir, 0700) + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + snapIndices := []uint64{100, 200, 300, 400} + for _, index := range snapIndices { + filename := filepath.Join(dir, fmt.Sprintf("%016x.snap.db", index)) + if err := ioutil.WriteFile(filename, []byte("snap file\n"), 0644); err != nil { + t.Fatal(err) + } + } + + ss := New(dir) + + if err := ss.ReleaseSnapDBs(raftpb.Snapshot{Metadata: raftpb.SnapshotMetadata{Index: 300}}); err != nil { + t.Fatal(err) + } + + deleted := []uint64{100, 200} + for _, index := range deleted { + filename := filepath.Join(dir, fmt.Sprintf("%016x.snap.db", index)) + if fileutil.Exist(filename) { + t.Errorf("expected %s (index: %d) to be deleted, but it still exists", filename, index) + } + } + + retained := []uint64{300, 400} + for _, index := range retained { + filename := filepath.Join(dir, fmt.Sprintf("%016x.snap.db", index)) + if !fileutil.Exist(filename) { + t.Errorf("expected %s (index: %d) to be retained, but it no longer exists", filename, index) + } + } +} diff --git a/wal/wal.go b/wal/wal.go index 6909e3ac7af..13f7ca5d335 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -420,6 +420,63 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb. return metadata, state, ents, err } +// ValidSnapshotEntries returns all the valid snapshot entries in the wal logs in the given directory. +// Snapshot entries are valid if their index is less than or equal to the most recent committed hardstate. +func ValidSnapshotEntries(walDir string) ([]walpb.Snapshot, error) { + var snaps []walpb.Snapshot + var state raftpb.HardState + var err error + + rec := &walpb.Record{} + names, err := readWalNames(walDir) + if err != nil { + return nil, err + } + + // open wal files in read mode, so that there is no conflict + // when the same WAL is opened elsewhere in write mode + rs, _, closer, err := openWALFiles(walDir, names, 0, false) + if err != nil { + return nil, err + } + defer func() { + if closer != nil { + closer() + } + }() + + // create a new decoder from the readers on the WAL files + decoder := newDecoder(rs...) + + for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) { + switch rec.Type { + case snapshotType: + var loadedSnap walpb.Snapshot + pbutil.MustUnmarshal(&loadedSnap, rec.Data) + snaps = append(snaps, loadedSnap) + case stateType: + state = mustUnmarshalState(rec.Data) + } + } + // We do not have to read out all the WAL entries + // as the decoder is opened in read mode. + if err != io.EOF && err != io.ErrUnexpectedEOF { + return nil, err + } + + // filter out any snaps that are newer than the committed hardstate + n := 0 + for _, s := range snaps { + if s.Index <= state.Commit { + snaps[n] = s + n++ + } + } + snaps = snaps[:n:n] + + return snaps, nil +} + // Verify reads through the given WAL and verifies that it is not corrupted. // It creates a new decoder to read through the records of the given WAL. // It does not conflict with any open WAL, but it is recommended not to @@ -599,6 +656,10 @@ func (w *WAL) sync() error { return err } +func (w *WAL) Sync() error { + return w.sync() +} + // ReleaseLockTo releases the locks, which has smaller index than the given index // except the largest one among them. // For example, if WAL is holding lock 1,2,3,4,5,6, ReleaseLockTo(4) will release diff --git a/wal/wal_test.go b/wal/wal_test.go index b060da19128..4c4b667f0cf 100644 --- a/wal/wal_test.go +++ b/wal/wal_test.go @@ -852,3 +852,56 @@ func TestOpenOnTornWrite(t *testing.T) { t.Fatalf("expected len(ents) = %d, got %d", wEntries, len(ents)) } } + +// TestValidSnapshotEntries ensures ValidSnapshotEntries returns all valid wal snapshot entries, accounting +// for hardstate +func TestValidSnapshotEntries(t *testing.T) { + p, err := ioutil.TempDir(os.TempDir(), "waltest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(p) + snap0 := walpb.Snapshot{Index: 0, Term: 0} + snap1 := walpb.Snapshot{Index: 1, Term: 1} + state1 := raftpb.HardState{Commit: 1, Term: 1} + snap2 := walpb.Snapshot{Index: 2, Term: 1} + snap3 := walpb.Snapshot{Index: 3, Term: 2} + state2 := raftpb.HardState{Commit: 3, Term: 2} + snap4 := walpb.Snapshot{Index: 4, Term: 2} // will be orphaned since the last committed entry will be snap3 + func() { + var w *WAL + w, err = Create(p, nil) + if err != nil { + t.Fatal(err) + } + defer w.Close() + + // snap0 is implicitly created at index 0, term 0 + if err = w.SaveSnapshot(snap1); err != nil { + t.Fatal(err) + } + if err = w.Save(state1, nil); err != nil { + t.Fatal(err) + } + if err = w.SaveSnapshot(snap2); err != nil { + t.Fatal(err) + } + if err = w.SaveSnapshot(snap3); err != nil { + t.Fatal(err) + } + if err = w.Save(state2, nil); err != nil { + t.Fatal(err) + } + if err = w.SaveSnapshot(snap4); err != nil { + t.Fatal(err) + } + }() + walSnaps, serr := ValidSnapshotEntries(p) + if serr != nil { + t.Fatal(serr) + } + expected := []walpb.Snapshot{snap0, snap1, snap2, snap3} + if !reflect.DeepEqual(walSnaps, expected) { + t.Errorf("expected walSnaps %+v, got %+v", expected, walSnaps) + } +} From 924b8128c2fd14e285f5f07e8645b35f9537c16f Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Mon, 18 May 2020 02:02:34 -0700 Subject: [PATCH 32/56] *: make sure snapshot save downloads SHA256 checksum ref. https://github.com/etcd-io/etcd/pull/11896 Signed-off-by: Gyuho Lee --- clientv3/client.go | 5 ++++ clientv3/maintenance.go | 17 ++++++++--- clientv3/snapshot/v3_snapshot.go | 29 ++++++++++++------- etcdserver/api/v3rpc/maintenance.go | 45 ++++++++++++++++++++++++----- 4 files changed, 74 insertions(+), 22 deletions(-) diff --git a/clientv3/client.go b/clientv3/client.go index 5a15cf5faea..c49e4ba1237 100644 --- a/clientv3/client.go +++ b/clientv3/client.go @@ -31,6 +31,7 @@ import ( "github.com/coreos/etcd/clientv3/credentials" "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" "github.com/coreos/etcd/pkg/logutil" + "github.com/coreos/pkg/capnslog" "github.com/google/uuid" "go.uber.org/zap" "google.golang.org/grpc" @@ -47,6 +48,10 @@ var ( roundRobinBalancerName = fmt.Sprintf("etcd-%s", picker.RoundrobinBalanced.String()) ) +var ( + plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "clientv3") +) + func init() { lg := zap.NewNop() if os.Getenv("ETCD_CLIENT_DEBUG") != "" { diff --git a/clientv3/maintenance.go b/clientv3/maintenance.go index 6db6c0e96a3..5e87cf8f3ed 100644 --- a/clientv3/maintenance.go +++ b/clientv3/maintenance.go @@ -193,23 +193,32 @@ func (m *maintenance) Snapshot(ctx context.Context) (io.ReadCloser, error) { return nil, toErr(ctx, err) } + plog.Info("opened snapshot stream; downloading") pr, pw := io.Pipe() go func() { for { resp, err := ss.Recv() if err != nil { + switch err { + case io.EOF: + plog.Info("completed snapshot read; closing") + default: + plog.Warningf("failed to receive from snapshot stream; closing (%v)", err) + } pw.CloseWithError(err) return } - if resp == nil && err == nil { - break - } + + // can "resp == nil && err == nil" + // before we receive snapshot SHA digest? + // No, server sends EOF with an empty response + // after it sends SHA digest at the end + if _, werr := pw.Write(resp.Blob); werr != nil { pw.CloseWithError(werr) return } } - pw.Close() }() return &snapshotReadCloser{ctx: ctx, ReadCloser: pr}, nil } diff --git a/clientv3/snapshot/v3_snapshot.go b/clientv3/snapshot/v3_snapshot.go index 6bf9244aa66..8751b5b1858 100644 --- a/clientv3/snapshot/v3_snapshot.go +++ b/clientv3/snapshot/v3_snapshot.go @@ -44,6 +44,7 @@ import ( "github.com/coreos/etcd/store" "github.com/coreos/etcd/wal" "github.com/coreos/etcd/wal/walpb" + "github.com/dustin/go-humanize" "go.uber.org/zap" ) @@ -87,6 +88,14 @@ type v3Manager struct { skipHashCheck bool } +// hasChecksum returns "true" if the file size "n" +// has appended sha256 hash digest. +func hasChecksum(n int64) bool { + // 512 is chosen because it's a minimum disk sector size + // smaller than (and multiplies to) OS page size in most systems + return (n % 512) == sha256.Size +} + // Save fetches snapshot from remote etcd server and saves data to target path. func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string) error { if len(cfg.Endpoints) != 1 { @@ -106,10 +115,7 @@ func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string if err != nil { return fmt.Errorf("could not open %s (%v)", partpath, err) } - s.lg.Info( - "created temporary db file", - zap.String("path", partpath), - ) + s.lg.Info("created temporary db file", zap.String("path", partpath)) now := time.Now() var rd io.ReadCloser @@ -117,13 +123,15 @@ func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string if err != nil { return err } - s.lg.Info( - "fetching snapshot", - zap.String("endpoint", cfg.Endpoints[0]), - ) - if _, err = io.Copy(f, rd); err != nil { + s.lg.Info("fetching snapshot", zap.String("endpoint", cfg.Endpoints[0])) + var size int64 + size, err = io.Copy(f, rd) + if err != nil { return err } + if !hasChecksum(size) { + return fmt.Errorf("sha256 checksum not found [bytes: %d]", size) + } if err = fileutil.Fsync(f); err != nil { return err } @@ -133,6 +141,7 @@ func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string s.lg.Info( "fetched snapshot", zap.String("endpoint", cfg.Endpoints[0]), + zap.String("size", humanize.Bytes(uint64(size))), zap.Duration("took", time.Since(now)), ) @@ -344,7 +353,7 @@ func (s *v3Manager) saveDB() error { if serr != nil { return serr } - hasHash := (off % 512) == sha256.Size + hasHash := hasChecksum(off) if hasHash { if err := db.Truncate(off - sha256.Size); err != nil { return err diff --git a/etcdserver/api/v3rpc/maintenance.go b/etcdserver/api/v3rpc/maintenance.go index c9df1800db2..9c168d78b0a 100644 --- a/etcdserver/api/v3rpc/maintenance.go +++ b/etcdserver/api/v3rpc/maintenance.go @@ -18,6 +18,7 @@ import ( "context" "crypto/sha256" "io" + "time" "github.com/coreos/etcd/auth" "github.com/coreos/etcd/etcdserver" @@ -27,6 +28,7 @@ import ( "github.com/coreos/etcd/mvcc/backend" "github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/version" + "github.com/dustin/go-humanize" ) type KVGetter interface { @@ -81,6 +83,9 @@ func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRe return &pb.DefragmentResponse{}, nil } +// big enough size to hold >1 OS pages in the buffer +const snapshotSendBufferSize = 32 * 1024 + func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error { snap := ms.bg.Backend().Snapshot() pr, pw := io.Pipe() @@ -95,19 +100,39 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance pw.Close() }() - // send file data + // record SHA digest of snapshot data + // used for integrity checks during snapshot restore operation h := sha256.New() - br := int64(0) - buf := make([]byte, 32*1024) - sz := snap.Size() - for br < sz { + + // buffer just holds read bytes from stream + // response size is multiple of OS page size, fetched in boltdb + // e.g. 4*1024 + buf := make([]byte, snapshotSendBufferSize) + + sent := int64(0) + total := snap.Size() + size := humanize.Bytes(uint64(total)) + + start := time.Now() + plog.Infof("sending database snapshot to client %s [%d bytes]", size, total) + for total-sent > 0 { n, err := io.ReadFull(pr, buf) if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { return togRPCError(err) } - br += int64(n) + sent += int64(n) + + // if total is x * snapshotSendBufferSize. it is possible that + // resp.RemainingBytes == 0 + // resp.Blob == zero byte but not nil + // does this make server response sent to client nil in proto + // and client stops receiving from snapshot stream before + // server sends snapshot SHA? + // No, the client will still receive non-nil response + // until server closes the stream with EOF + resp := &pb.SnapshotResponse{ - RemainingBytes: uint64(sz - br), + RemainingBytes: uint64(total - sent), Blob: buf[:n], } if err = srv.Send(resp); err != nil { @@ -116,13 +141,17 @@ func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance h.Write(buf[:n]) } - // send sha + // send SHA digest for integrity checks + // during snapshot restore operation sha := h.Sum(nil) + + plog.Infof("sending database sha256 checksum to client [%d bytes]", len(sha)) hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha} if err := srv.Send(hresp); err != nil { return togRPCError(err) } + plog.Infof("successfully sent database snapshot to client %s [%d bytes, took %s]", size, total, humanize.Time(start)) return nil } From 672314546b87a36a0cd49c37fbc586a492d6344d Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Mon, 18 May 2020 02:17:44 -0700 Subject: [PATCH 33/56] rafthttp: improve snapshot logging Signed-off-by: Gyuho Lee --- rafthttp/http.go | 11 ++++++++--- rafthttp/snapshot_sender.go | 5 ++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/rafthttp/http.go b/rafthttp/http.go index 817d6c64ba6..3dff6fe1a16 100644 --- a/rafthttp/http.go +++ b/rafthttp/http.go @@ -29,6 +29,7 @@ import ( "github.com/coreos/etcd/raft/raftpb" "github.com/coreos/etcd/snap" "github.com/coreos/etcd/version" + "github.com/dustin/go-humanize" ) const ( @@ -194,7 +195,9 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - receivedBytes.WithLabelValues(from).Add(float64(m.Size())) + msgSizeVal := m.Size() + msgSize := humanize.Bytes(uint64(msgSizeVal)) + receivedBytes.WithLabelValues(from).Add(float64(msgSizeVal)) if m.Type != raftpb.MsgSnap { plog.Errorf("unexpected raft message type %s on snapshot path", m.Type) @@ -207,7 +210,7 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { defer func() { snapshotReceiveInflights.WithLabelValues(from).Dec() }() - plog.Infof("receiving database snapshot [index:%d, from %s] ...", m.Snapshot.Metadata.Index, types.ID(m.From)) + plog.Infof("receiving database snapshot [index: %d, from: %s, raft message size: %s]", m.Snapshot.Metadata.Index, types.ID(m.From), msgSize) // save incoming database snapshot. n, err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index) if err != nil { @@ -217,8 +220,10 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { snapshotReceiveFailures.WithLabelValues(from).Inc() return } + + dbSize := humanize.Bytes(uint64(n)) receivedBytes.WithLabelValues(from).Add(float64(n)) - plog.Infof("received and saved database snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From)) + plog.Infof("successfully received and saved database snapshot [index: %d, from: %s, raft message size: %s, db size: %s]", m.Snapshot.Metadata.Index, types.ID(m.From), msgSize, dbSize) if err := h.r.Process(context.TODO(), m); err != nil { switch v := err.(type) { diff --git a/rafthttp/snapshot_sender.go b/rafthttp/snapshot_sender.go index a97c6f34564..2d36e123a9b 100644 --- a/rafthttp/snapshot_sender.go +++ b/rafthttp/snapshot_sender.go @@ -27,6 +27,7 @@ import ( "github.com/coreos/etcd/pkg/types" "github.com/coreos/etcd/raft" "github.com/coreos/etcd/snap" + "github.com/dustin/go-humanize" ) var ( @@ -75,7 +76,9 @@ func (s *snapshotSender) send(merged snap.Message) { u := s.picker.pick() req := createPostRequest(u, RaftSnapshotPrefix, body, "application/octet-stream", s.tr.URLs, s.from, s.cid) - plog.Infof("start to send database snapshot [index: %d, to %s]...", m.Snapshot.Metadata.Index, types.ID(m.To)) + snapshotTotalSizeVal := uint64(merged.TotalSize) + snapshotTotalSize := humanize.Bytes(snapshotTotalSizeVal) + plog.Infof("start to send database snapshot [index: %d, to %s, size %s]...", m.Snapshot.Metadata.Index, types.ID(m.To), snapshotTotalSize) snapshotSendInflights.WithLabelValues(to).Inc() defer func() { snapshotSendInflights.WithLabelValues(to).Dec() From 1205851db79a6bbeaa3e5caec0d9f776e56ba7b5 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Mon, 18 May 2020 11:30:17 -0700 Subject: [PATCH 34/56] version: 3.3.21 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 5a96c7aa51e..310b7045b45 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.20" + Version = "3.3.21" APIVersion = "unknown" // Git SHA Value will be set during build From 669285f515f46b5957fb82a03df8ac2b191f1af0 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 20 May 2020 11:01:13 -0700 Subject: [PATCH 35/56] rafthttp: log snapshot downloads Signed-off-by: Gyuho Lee --- rafthttp/http.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rafthttp/http.go b/rafthttp/http.go index 3dff6fe1a16..fbb96d48b8b 100644 --- a/rafthttp/http.go +++ b/rafthttp/http.go @@ -221,9 +221,10 @@ func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } + downloadTook := time.Since(start) dbSize := humanize.Bytes(uint64(n)) receivedBytes.WithLabelValues(from).Add(float64(n)) - plog.Infof("successfully received and saved database snapshot [index: %d, from: %s, raft message size: %s, db size: %s]", m.Snapshot.Metadata.Index, types.ID(m.From), msgSize, dbSize) + plog.Infof("successfully received and saved database snapshot [index: %d, from: %s, raft message size: %s, db size: %s, took: %s]", m.Snapshot.Metadata.Index, types.ID(m.From), msgSize, dbSize, downloadTook) if err := h.r.Process(context.TODO(), m); err != nil { switch v := err.(type) { From 8ce10ea4a5f755afa39362674939298471f8c409 Mon Sep 17 00:00:00 2001 From: tangcong Date: Wed, 20 May 2020 12:35:25 +0800 Subject: [PATCH 36/56] wal: fix crc mismatch crash bug --- wal/wal.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/wal/wal.go b/wal/wal.go index 13f7ca5d335..95870d183ac 100644 --- a/wal/wal.go +++ b/wal/wal.go @@ -456,6 +456,14 @@ func ValidSnapshotEntries(walDir string) ([]walpb.Snapshot, error) { snaps = append(snaps, loadedSnap) case stateType: state = mustUnmarshalState(rec.Data) + case crcType: + crc := decoder.crc.Sum32() + // current crc of decoder must match the crc of the record. + // do no need to match 0 crc, since the decoder is a new one at this case. + if crc != 0 && rec.Validate(crc) != nil { + return nil, ErrCRCMismatch + } + decoder.updateCRC(rec.Crc) } } // We do not have to read out all the WAL entries From a9d14cbb64b372cf44b539a5d557a6af3de9ab58 Mon Sep 17 00:00:00 2001 From: tangcong Date: Wed, 20 May 2020 20:43:48 +0800 Subject: [PATCH 37/56] wal: add TestValidSnapshotEntriesAfterPurgeWal testcase Signed-off-by: Gyuho Lee --- wal/wal_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/wal/wal_test.go b/wal/wal_test.go index 4c4b667f0cf..fc3826179bc 100644 --- a/wal/wal_test.go +++ b/wal/wal_test.go @@ -905,3 +905,59 @@ func TestValidSnapshotEntries(t *testing.T) { t.Errorf("expected walSnaps %+v, got %+v", expected, walSnaps) } } + +// TestValidSnapshotEntriesAfterPurgeWal ensure that there are many wal files, and after cleaning the first wal file, +// it can work well. +func TestValidSnapshotEntriesAfterPurgeWal(t *testing.T) { + oldSegmentSizeBytes := SegmentSizeBytes + SegmentSizeBytes = 64 + defer func() { + SegmentSizeBytes = oldSegmentSizeBytes + }() + p, err := ioutil.TempDir(os.TempDir(), "waltest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(p) + snap0 := walpb.Snapshot{Index: 0, Term: 0} + snap1 := walpb.Snapshot{Index: 1, Term: 1} + state1 := raftpb.HardState{Commit: 1, Term: 1} + snap2 := walpb.Snapshot{Index: 2, Term: 1} + snap3 := walpb.Snapshot{Index: 3, Term: 2} + state2 := raftpb.HardState{Commit: 3, Term: 2} + func() { + w, werr := Create(p, nil) + if werr != nil { + t.Fatal(werr) + } + defer w.Close() + + // snap0 is implicitly created at index 0, term 0 + if err = w.SaveSnapshot(snap1); err != nil { + t.Fatal(err) + } + if err = w.Save(state1, nil); err != nil { + t.Fatal(err) + } + if err = w.SaveSnapshot(snap2); err != nil { + t.Fatal(err) + } + if err = w.SaveSnapshot(snap3); err != nil { + t.Fatal(err) + } + for i := 0; i < 128; i++ { + if err = w.Save(state2, nil); err != nil { + t.Fatal(err) + } + } + }() + files, _, ferr := selectWALFiles(p, snap0) + if ferr != nil { + t.Fatal(ferr) + } + os.Remove(p + "/" + files[0]) + _, err = ValidSnapshotEntries(p) + if err != nil { + t.Fatal(err) + } +} From 282cce72fdec2d231d7307b856bd823026592c22 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 20 May 2020 15:42:36 -0700 Subject: [PATCH 38/56] version: 3.3.22 Signed-off-by: Gyuho Lee --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 310b7045b45..dfc61c221a5 100644 --- a/version/version.go +++ b/version/version.go @@ -26,7 +26,7 @@ import ( var ( // MinClusterVersion is the min cluster version this etcd binary is compatible with. MinClusterVersion = "3.0.0" - Version = "3.3.21" + Version = "3.3.22" APIVersion = "unknown" // Git SHA Value will be set during build From 611a1f7879cc3ac8752b975aace53dfb0ba9f84c Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Thu, 29 Nov 2018 11:39:48 -0500 Subject: [PATCH 39/56] version: openshift-v4.0 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 20 ++++++++++++++++++++ Dockerfile.rhel | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 Dockerfile.openshift create mode 100644 Dockerfile.rhel diff --git a/Dockerfile.openshift b/Dockerfile.openshift new file mode 100644 index 00000000000..1327c0d095c --- /dev/null +++ b/Dockerfile.openshift @@ -0,0 +1,20 @@ +FROM registry.svc.ci.openshift.org/openshift/release:golang-1.10 AS builder + +ENV GOPATH /go + +COPY . $GOPATH/src/go.etcd.io/etcd + +RUN yum install -y git && \ + cd $GOPATH/src/go.etcd.io/etcd && \ + make build + +# stage 2 +FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base + +ENTRYPOINT ["/usr/bin/etcd"] + +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ + +LABEL io.k8s.display-name="etcd server" \ + io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + maintainer="Sam Batschelet " diff --git a/Dockerfile.rhel b/Dockerfile.rhel new file mode 100644 index 00000000000..991967cbf2a --- /dev/null +++ b/Dockerfile.rhel @@ -0,0 +1,20 @@ +FROM openshift/golang-builder:1.10 AS builder + +ENV GOPATH /go + +COPY . $GOPATH/src/go.etcd.io/etcd + +RUN yum install -y make && \ + cd $GOPATH/src/go.etcd.io/etcd && \ + make build + +# stage 2 +FROM openshift/origin-base + +ENTRYPOINT ["/usr/bin/etcd"] + +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ + +LABEL io.k8s.display-name="etcd server" \ + io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + maintainer="Sam Batschelet " From 46f8e34d1a858d602d6f3b61ea4bb196b4020a53 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Wed, 20 Feb 2019 07:06:51 -0500 Subject: [PATCH 40/56] Dockerfile: add etcdctl Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 1327c0d095c..262e75afe8d 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -13,7 +13,7 @@ FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 991967cbf2a..efe90ffe4b3 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -13,7 +13,7 @@ FROM openshift/origin-base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ From 8f67a24a2e4443a1ce8f4266395a7377dffa3090 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 6 Feb 2019 22:49:01 -0800 Subject: [PATCH 41/56] Dockerfile.*: Fix "etcd is distributed" -> "etcd is a distributed" Correcting a typo from 2f109647 (version: openshift-v4.0, 2018-11-29). --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 262e75afe8d..d30cb6ac082 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -16,5 +16,5 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ - io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ maintainer="Sam Batschelet " diff --git a/Dockerfile.rhel b/Dockerfile.rhel index efe90ffe4b3..2318b99439a 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -16,5 +16,5 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ LABEL io.k8s.display-name="etcd server" \ - io.k8s.description="etcd is distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ + io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ maintainer="Sam Batschelet " From 516a0803b3a42df4ad4da27f24e482898fa4c5d5 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Wed, 20 Feb 2019 13:49:09 -0500 Subject: [PATCH 42/56] Dockerfile: resolve issue where binary was not properly copied from build. Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 3 ++- Dockerfile.rhel | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index d30cb6ac082..9dc11e3dd7c 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -13,7 +13,8 @@ FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 2318b99439a..ae4b1468f8d 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -13,7 +13,8 @@ FROM openshift/origin-base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/{etcd,etcdctl} /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ From a30c9dcbae9ab2e8a18d542f4c93c2c342f3f353 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Sun, 5 May 2019 11:21:48 -0400 Subject: [PATCH 43/56] OWNERS: add Signed-off-by: Sam Batschelet --- OWNERS | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 OWNERS diff --git a/OWNERS b/OWNERS new file mode 100644 index 00000000000..572c3af9dfc --- /dev/null +++ b/OWNERS @@ -0,0 +1,8 @@ +approvers: +- hexfusion +reviewers: +- deads2k +- crawford +- hexfusion +- smarterclayton +- wking From 7e7fa0e85d69c0bd9cce9397545d2c9aabd191fb Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Sat, 4 May 2019 11:49:48 -0400 Subject: [PATCH 44/56] Dockerfile: set coreos org as canonical for release-3.3 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 12 +++++------- Dockerfile.rhel | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 9dc11e3dd7c..36e10d51454 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -1,20 +1,18 @@ FROM registry.svc.ci.openshift.org/openshift/release:golang-1.10 AS builder -ENV GOPATH /go +WORKDIR /go/src/github.com/coreos/etcd -COPY . $GOPATH/src/go.etcd.io/etcd +COPY . . -RUN yum install -y git && \ - cd $GOPATH/src/go.etcd.io/etcd && \ - make build +RUN make build # stage 2 FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index ae4b1468f8d..af3d9df97f8 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -1,20 +1,18 @@ FROM openshift/golang-builder:1.10 AS builder -ENV GOPATH /go +WORKDIR /go/src/github.com/coreos/etcd -COPY . $GOPATH/src/go.etcd.io/etcd +COPY . . -RUN yum install -y make && \ - cd $GOPATH/src/go.etcd.io/etcd && \ - make build +RUN make build # stage 2 FROM openshift/origin-base ENTRYPOINT ["/usr/bin/etcd"] -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcd /usr/bin/ -COPY --from=builder /go/src/go.etcd.io/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ From 73426055acaa8672f95695d22b5b98440f8f6804 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Tue, 9 Jul 2019 14:39:00 -0400 Subject: [PATCH 45/56] Dockerfile: bump golang to 1.11 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 36e10d51454..c1eea2ca8c0 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -1,4 +1,4 @@ -FROM registry.svc.ci.openshift.org/openshift/release:golang-1.10 AS builder +FROM registry.svc.ci.openshift.org/openshift/release:golang-1.11 AS builder WORKDIR /go/src/github.com/coreos/etcd diff --git a/Dockerfile.rhel b/Dockerfile.rhel index af3d9df97f8..3ab2cdb801f 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -1,4 +1,4 @@ -FROM openshift/golang-builder:1.10 AS builder +FROM openshift/golang-builder:1.11 AS builder WORKDIR /go/src/github.com/coreos/etcd From f8f402b61b3942b9896eb99068bcc81b3197adf7 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Thu, 29 Aug 2019 17:08:06 -0400 Subject: [PATCH 46/56] Dockerfile: use build instead of make build make build performs a sanity test on the binary image which causes problems for unsupport arch. Because we run full CI tests against the image this check is not nessisary and will allow images to be build regardless of arch. Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index c1eea2ca8c0..f18d76fbcef 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -4,7 +4,7 @@ WORKDIR /go/src/github.com/coreos/etcd COPY . . -RUN make build +RUN ./build # stage 2 FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 3ab2cdb801f..bdd1929e617 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -4,7 +4,7 @@ WORKDIR /go/src/github.com/coreos/etcd COPY . . -RUN make build +RUN ./build # stage 2 FROM openshift/origin-base From c9ebfee8fc4c3cba24db5ed09753e8e6f9485d0d Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Mon, 23 Sep 2019 08:38:21 -0400 Subject: [PATCH 47/56] Dockerfile: bump golang 1.12 Signed-off-by: Sam Batschelet --- Dockerfile.openshift | 2 +- Dockerfile.rhel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index f18d76fbcef..6aa8493e07c 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -1,4 +1,4 @@ -FROM registry.svc.ci.openshift.org/openshift/release:golang-1.11 AS builder +FROM registry.svc.ci.openshift.org/openshift/release:golang-1.12 AS builder WORKDIR /go/src/github.com/coreos/etcd diff --git a/Dockerfile.rhel b/Dockerfile.rhel index bdd1929e617..281214e255c 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -1,4 +1,4 @@ -FROM openshift/golang-builder:1.11 AS builder +FROM openshift/golang-builder:1.12 AS builder WORKDIR /go/src/github.com/coreos/etcd From e4fd890074063eac10af85f878a3b35be0353731 Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 20 Feb 2020 14:26:47 -0500 Subject: [PATCH 48/56] add stub discovery-etcd-initial-cluster command --- .../discover-etcd-initial-cluster/main.go | 35 +++++++ .../initial-cluster.go | 92 +++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 openshift-tools/discover-etcd-initial-cluster/main.go create mode 100644 openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go diff --git a/openshift-tools/discover-etcd-initial-cluster/main.go b/openshift-tools/discover-etcd-initial-cluster/main.go new file mode 100644 index 00000000000..de62e32a9c3 --- /dev/null +++ b/openshift-tools/discover-etcd-initial-cluster/main.go @@ -0,0 +1,35 @@ +package main + +import ( + goflag "flag" + "fmt" + "math/rand" + "os" + "strings" + "time" + + discover_etcd_initial_cluster "github.com/coreos/etcd/openshift-tools/pkg/discover-etcd-initial-cluster" + "github.com/spf13/pflag" +) + +// copy from `utilflag "k8s.io/component-base/cli/flag"` +// WordSepNormalizeFunc changes all flags that contain "_" separators +func WordSepNormalizeFunc(f *pflag.FlagSet, name string) pflag.NormalizedName { + if strings.Contains(name, "_") { + return pflag.NormalizedName(strings.Replace(name, "_", "-", -1)) + } + return pflag.NormalizedName(name) +} + +func main() { + rand.Seed(time.Now().UTC().UnixNano()) + + pflag.CommandLine.SetNormalizeFunc(WordSepNormalizeFunc) + pflag.CommandLine.AddGoFlagSet(goflag.CommandLine) + + command := discover_etcd_initial_cluster.NewDiscoverEtcdInitialClusterCommand() + if err := command.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } +} diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go new file mode 100644 index 00000000000..0e9a8166f31 --- /dev/null +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -0,0 +1,92 @@ +package discover_etcd_initial_cluster + +import ( + "fmt" + "os" + "time" + + "github.com/spf13/pflag" + + "github.com/spf13/cobra" +) + +type DiscoverEtcdInitialClusterOptions struct { + // TargetPeerURLHost is the host portion of the peer URL. It is used to match on. (either IP or hostname) + TargetPeerURLHost string + + // CABundleFile is the file to use to trust the etcd server + CABundleFile string + // ClientCertFile is the client cert to use to authenticate this binary to etcd + ClientCertFile string + // ClientKeyFile is the client key to use to authenticate this binary to etcd + ClientKeyFile string + // Endpoints is a list of all the endpoints to use to try to contact etcd + Endpoints string + + // Revision is the revision value for the static pod + Revision string + // PreviousEtcdInitialClusterDir is the directory to store the previous etcd initial cluster value + PreviousEtcdInitialClusterDir string + + // TotalTimeToWait is the total time to wait before reporting failure and dumping logs + TotalTimeToWait time.Duration + // TimeToWaitBeforeUsingPreviousValue is the time to wait before checking to see if we have a previous value. + TimeToWaitBeforeUsingPreviousValue time.Duration +} + +func NewDiscoverEtcdInitialCluster() *DiscoverEtcdInitialClusterOptions { + return &DiscoverEtcdInitialClusterOptions{ + TotalTimeToWait: 30 * time.Second, + TimeToWaitBeforeUsingPreviousValue: 10 * time.Second, + } +} + +func NewDiscoverEtcdInitialClusterCommand() *cobra.Command { + o := NewDiscoverEtcdInitialCluster() + + cmd := &cobra.Command{ + Use: "discover-etcd-initial-cluster", + Short: "output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod", + Long: `output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod + +1. It tries to contact every available etcd to get a list of member. +2. Check each member to see if any one of them is the target. +3. If so, and if it is started, use the member list to create the ETCD_INITIAL_CLUSTER value and print it out. +4. If so, and if it it not started, use the existing member list and append the target value to create the ETCD_INITIAL_CLUSTER value and print it out. +5. If not, try again until either you have it or you have to check a cache. +6. If you have to check a cache and it is present, return +7. If the cache is not present, keep trying to contact etcd until total timeout is met. +`, + Run: func(cmd *cobra.Command, args []string) { + if err := o.Validate(); err != nil { + fmt.Fprint(os.Stderr, err) + os.Exit(1) + } + + if err := o.Run(); err != nil { + fmt.Fprint(os.Stderr, err) + os.Exit(1) + } + }, + } + o.BindFlags(cmd.Flags()) + + return cmd +} + +func (o *DiscoverEtcdInitialClusterOptions) BindFlags(flags *pflag.FlagSet) { + flags.StringVar(&o.CABundleFile, "cacert", o.CABundleFile, "file to use to verify the identity of the etcd server") + flags.StringVar(&o.ClientCertFile, "cert", o.ClientCertFile, "client cert to use to authenticate this binary to etcd") + flags.StringVar(&o.ClientKeyFile, "key", o.ClientKeyFile, "client key to use to authenticate this binary to etcd") + flags.StringVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") + flags.StringVar(&o.Revision, "revision", o.Revision, "revision value for the static pod") + flags.StringVar(&o.PreviousEtcdInitialClusterDir, "memory-dir", o.PreviousEtcdInitialClusterDir, "directory to store the previous etcd initial cluster value") +} + +func (o *DiscoverEtcdInitialClusterOptions) Validate() error { + return nil +} + +func (o *DiscoverEtcdInitialClusterOptions) Run() error { + return nil +} From 317ecea348eea18995e4176677a725c72a591503 Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 20 Feb 2020 14:32:23 -0500 Subject: [PATCH 49/56] build openshift tools with etcd --- Dockerfile.openshift | 1 + Dockerfile.rhel | 1 + build | 15 +++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/Dockerfile.openshift b/Dockerfile.openshift index 6aa8493e07c..78303e525d3 100644 --- a/Dockerfile.openshift +++ b/Dockerfile.openshift @@ -13,6 +13,7 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/discover-etcd-initial-cluster /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/Dockerfile.rhel b/Dockerfile.rhel index 281214e255c..bd353e1a3f8 100644 --- a/Dockerfile.rhel +++ b/Dockerfile.rhel @@ -13,6 +13,7 @@ ENTRYPOINT ["/usr/bin/etcd"] COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcd /usr/bin/ COPY --from=builder /go/src/github.com/coreos/etcd/bin/etcdctl /usr/bin/ +COPY --from=builder /go/src/github.com/coreos/etcd/bin/discover-etcd-initial-cluster /usr/bin/ LABEL io.k8s.display-name="etcd server" \ io.k8s.description="etcd is a distributed key-value store which stores the persistent master state for Kubernetes and OpenShift." \ diff --git a/build b/build index 5cf4d7c6426..e0787aaa8e2 100755 --- a/build +++ b/build @@ -64,6 +64,20 @@ etcd_build() { -o "${out}/etcdctl" ${REPO_PATH}/etcdctl || return } + +openshift_tools_build() { + out="bin" + if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi + toggle_failpoints_default + + # Static compilation is useful when etcd is run in a container. $GO_BUILD_FLAGS is OK + # shellcheck disable=SC2086 + CGO_ENABLED=0 go build $GO_BUILD_FLAGS \ + -installsuffix cgo \ + -ldflags "$GO_LDFLAGS" \ + -o "${out}/discover-etcd-initial-cluster" "github.com/coreos/etcd/openshift-tools/discover-etcd-initial-cluster" || return +} + tools_build() { out="bin" if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi @@ -91,4 +105,5 @@ fi # only build when called directly, not sourced if echo "$0" | grep "build$" >/dev/null; then etcd_build + openshift_tools_build fi From e8391753ef73307ce74c911958e3386021d47a20 Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 20 Feb 2020 17:04:39 -0500 Subject: [PATCH 50/56] codify the initial cluster check as golang code --- .../initial-cluster.go | 182 +++++++++++++++--- 1 file changed, 158 insertions(+), 24 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index 0e9a8166f31..736a14a3338 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -1,10 +1,19 @@ package discover_etcd_initial_cluster import ( + "context" "fmt" "os" + "strings" "time" + "github.com/coreos/etcd/etcdserver/etcdserverpb" + + "github.com/coreos/etcd/pkg/transport" + "google.golang.org/grpc" + + "github.com/coreos/etcd/clientv3" + "github.com/spf13/pflag" "github.com/spf13/cobra" @@ -13,6 +22,8 @@ import ( type DiscoverEtcdInitialClusterOptions struct { // TargetPeerURLHost is the host portion of the peer URL. It is used to match on. (either IP or hostname) TargetPeerURLHost string + // TargetName is the name to assign to this peer if we create it + TargetName string // CABundleFile is the file to use to trust the etcd server CABundleFile string @@ -21,24 +32,14 @@ type DiscoverEtcdInitialClusterOptions struct { // ClientKeyFile is the client key to use to authenticate this binary to etcd ClientKeyFile string // Endpoints is a list of all the endpoints to use to try to contact etcd - Endpoints string + Endpoints []string - // Revision is the revision value for the static pod - Revision string - // PreviousEtcdInitialClusterDir is the directory to store the previous etcd initial cluster value - PreviousEtcdInitialClusterDir string - - // TotalTimeToWait is the total time to wait before reporting failure and dumping logs - TotalTimeToWait time.Duration - // TimeToWaitBeforeUsingPreviousValue is the time to wait before checking to see if we have a previous value. - TimeToWaitBeforeUsingPreviousValue time.Duration + // MemberDir is the directory created when etcd starts the first time + MemberDir string } func NewDiscoverEtcdInitialCluster() *DiscoverEtcdInitialClusterOptions { - return &DiscoverEtcdInitialClusterOptions{ - TotalTimeToWait: 30 * time.Second, - TimeToWaitBeforeUsingPreviousValue: 10 * time.Second, - } + return &DiscoverEtcdInitialClusterOptions{} } func NewDiscoverEtcdInitialClusterCommand() *cobra.Command { @@ -49,13 +50,12 @@ func NewDiscoverEtcdInitialClusterCommand() *cobra.Command { Short: "output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod", Long: `output the value for ETCD_INITIAL_CLUSTER in openshift etcd static pod -1. It tries to contact every available etcd to get a list of member. -2. Check each member to see if any one of them is the target. -3. If so, and if it is started, use the member list to create the ETCD_INITIAL_CLUSTER value and print it out. -4. If so, and if it it not started, use the existing member list and append the target value to create the ETCD_INITIAL_CLUSTER value and print it out. -5. If not, try again until either you have it or you have to check a cache. -6. If you have to check a cache and it is present, return -7. If the cache is not present, keep trying to contact etcd until total timeout is met. +1. If --data-dir exists, output a marker value and exit. +2. It tries to contact every available etcd to get a list of member. +3. Check each member to see if any one of them is the target. +4. If so, and if it is started, use the member list to create the ETCD_INITIAL_CLUSTER value and print it out. +5. If so, and if it it not started, use the existing member list and append the target value to create the ETCD_INITIAL_CLUSTER value and print it out. +6. If not, try again until either you have it or you time out. `, Run: func(cmd *cobra.Command, args []string) { if err := o.Validate(); err != nil { @@ -78,15 +78,149 @@ func (o *DiscoverEtcdInitialClusterOptions) BindFlags(flags *pflag.FlagSet) { flags.StringVar(&o.CABundleFile, "cacert", o.CABundleFile, "file to use to verify the identity of the etcd server") flags.StringVar(&o.ClientCertFile, "cert", o.ClientCertFile, "client cert to use to authenticate this binary to etcd") flags.StringVar(&o.ClientKeyFile, "key", o.ClientKeyFile, "client key to use to authenticate this binary to etcd") - flags.StringVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") - flags.StringVar(&o.Revision, "revision", o.Revision, "revision value for the static pod") - flags.StringVar(&o.PreviousEtcdInitialClusterDir, "memory-dir", o.PreviousEtcdInitialClusterDir, "directory to store the previous etcd initial cluster value") + flags.StringSliceVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") + flags.StringVar(&o.MemberDir, "data-dir", o.MemberDir, "file to stat for existence of /var/lib/etcd/member") + flags.StringVar(&o.TargetPeerURLHost, "target-peer-url-host", o.TargetPeerURLHost, "host portion of the peer URL. It is used to match on. (either IP or hostname)") + flags.StringVar(&o.TargetName, "target-name", o.TargetName, "name to assign to this peer if we create it") } func (o *DiscoverEtcdInitialClusterOptions) Validate() error { + if len(o.CABundleFile) == 0 { + return fmt.Errorf("missing --cacert") + } + if len(o.ClientCertFile) == 0 { + return fmt.Errorf("missing --cert") + } + if len(o.ClientKeyFile) == 0 { + return fmt.Errorf("missing --key") + } + if len(o.Endpoints) == 0 { + return fmt.Errorf("missing --endpoints") + } + if len(o.MemberDir) == 0 { + return fmt.Errorf("missing --data-dir") + } + if len(o.TargetPeerURLHost) == 0 { + return fmt.Errorf("missing --target-peer-url-host") + } + if len(o.TargetName) == 0 { + return fmt.Errorf("missing --target-name") + } return nil } func (o *DiscoverEtcdInitialClusterOptions) Run() error { + _, err := os.Stat(o.MemberDir) + switch { + case os.IsNotExist(err): + // do nothing. This just means we fall through to the polling logic + + case err == nil: + fmt.Printf(o.TargetName) + return nil + + case err != nil: + return err + } + + client, err := o.getClient() + if err != nil { + return err + } + defer client.Close() + + var targetMember *etcdserverpb.Member + var allMembers []*etcdserverpb.Member + for i := 0; i < 10; i++ { + fmt.Fprintf(os.Stderr, "#### attempt %d\n", i) + targetMember, allMembers, err = o.checkForTarget(client) + + for _, member := range allMembers { + fmt.Fprintf(os.Stderr, " member=%v\n", stringifyMember(member)) + } + fmt.Fprintf(os.Stderr, " target=%v, err=%v\n", stringifyMember(targetMember), err) + + // we're done because we found what we want. + if targetMember != nil && err == nil { + break + } + + fmt.Fprintf(os.Stderr, "#### sleeping...\n") + time.Sleep(1 * time.Second) + } + if err != nil { + return err + } + if targetMember == nil { + return fmt.Errorf("timed out") + } + + etcdInitialClusterEntries := []string{} + for _, member := range allMembers { + if len(member.Name) == 0 { // this is the signal for whether or not a given peer is started + continue + } + etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, member.PeerURLs[0])) + } + if len(targetMember.Name) == 0 { + etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0])) + } + fmt.Printf(strings.Join(etcdInitialClusterEntries, ",")) + return nil } + +func stringifyMember(member *etcdserverpb.Member) string { + if member == nil { + return "nil" + } + + return fmt.Sprintf("{name=%q, peerURLs=[%s}, clientURLs=[%s]", member.Name, strings.Join(member.PeerURLs, ","), strings.Join(member.ClientURLs, ",")) +} + +func (o *DiscoverEtcdInitialClusterOptions) checkForTarget(client *clientv3.Client) (*etcdserverpb.Member, []*etcdserverpb.Member, error) { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + memberResponse, err := client.MemberList(ctx) + if err != nil { + return nil, nil, err + } + + var targetMember *etcdserverpb.Member + for i := range memberResponse.Members { + member := memberResponse.Members[i] + for _, peerURL := range member.PeerURLs { + if strings.Contains(peerURL, o.TargetPeerURLHost) { + targetMember = member + } + } + } + + return targetMember, memberResponse.Members, err +} + +func (o *DiscoverEtcdInitialClusterOptions) getClient() (*clientv3.Client, error) { + dialOptions := []grpc.DialOption{ + grpc.WithBlock(), // block until the underlying connection is up + } + + tlsInfo := transport.TLSInfo{ + CertFile: o.ClientCertFile, + KeyFile: o.ClientKeyFile, + TrustedCAFile: o.CABundleFile, + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return nil, err + } + + cfg := &clientv3.Config{ + DialOptions: dialOptions, + Endpoints: o.Endpoints, + DialTimeout: 15 * time.Second, + TLS: tlsConfig, + } + + return clientv3.New(*cfg) +} From c7df133f267b7533b9c130db629217bd7f312360 Mon Sep 17 00:00:00 2001 From: retroflexer Date: Sun, 23 Feb 2020 10:05:38 -0500 Subject: [PATCH 51/56] Archive data-dir if target member is unstarted --- .../initial-cluster.go | 62 ++++++++++++++++--- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index 736a14a3338..d061f433965 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "path/filepath" "strings" "time" @@ -34,8 +35,8 @@ type DiscoverEtcdInitialClusterOptions struct { // Endpoints is a list of all the endpoints to use to try to contact etcd Endpoints []string - // MemberDir is the directory created when etcd starts the first time - MemberDir string + // DataDir is the directory created when etcd starts the first time + DataDir string } func NewDiscoverEtcdInitialCluster() *DiscoverEtcdInitialClusterOptions { @@ -79,7 +80,7 @@ func (o *DiscoverEtcdInitialClusterOptions) BindFlags(flags *pflag.FlagSet) { flags.StringVar(&o.ClientCertFile, "cert", o.ClientCertFile, "client cert to use to authenticate this binary to etcd") flags.StringVar(&o.ClientKeyFile, "key", o.ClientKeyFile, "client key to use to authenticate this binary to etcd") flags.StringSliceVar(&o.Endpoints, "endpoints", o.Endpoints, "list of all the endpoints to use to try to contact etcd") - flags.StringVar(&o.MemberDir, "data-dir", o.MemberDir, "file to stat for existence of /var/lib/etcd/member") + flags.StringVar(&o.DataDir, "data-dir", o.DataDir, "dir to stat for existence of the member directory") flags.StringVar(&o.TargetPeerURLHost, "target-peer-url-host", o.TargetPeerURLHost, "host portion of the peer URL. It is used to match on. (either IP or hostname)") flags.StringVar(&o.TargetName, "target-name", o.TargetName, "name to assign to this peer if we create it") } @@ -97,7 +98,7 @@ func (o *DiscoverEtcdInitialClusterOptions) Validate() error { if len(o.Endpoints) == 0 { return fmt.Errorf("missing --endpoints") } - if len(o.MemberDir) == 0 { + if len(o.DataDir) == 0 { return fmt.Errorf("missing --data-dir") } if len(o.TargetPeerURLHost) == 0 { @@ -110,14 +111,25 @@ func (o *DiscoverEtcdInitialClusterOptions) Validate() error { } func (o *DiscoverEtcdInitialClusterOptions) Run() error { - _, err := os.Stat(o.MemberDir) + + //Temporary hack to work with the current pod.yaml + var memberDir string + if strings.HasSuffix(o.DataDir, "member") { + memberDir = o.DataDir + o.DataDir = filepath.Dir(o.DataDir) + } else { + memberDir = filepath.Join(o.DataDir, "member") + } + + memberDirExists := false + _, err := os.Stat(memberDir) switch { case os.IsNotExist(err): // do nothing. This just means we fall through to the polling logic case err == nil: - fmt.Printf(o.TargetName) - return nil + fmt.Fprintf(os.Stderr, "memberDir %s is present on %s\n", memberDir, o.TargetName) + memberDirExists = true case err != nil: return err @@ -148,11 +160,26 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { fmt.Fprintf(os.Stderr, "#### sleeping...\n") time.Sleep(1 * time.Second) } - if err != nil { + + switch { + case err != nil: return err - } - if targetMember == nil { + + case targetMember == nil && memberDirExists: + // we weren't able to locate other members and need to return based previous memberDir so we can restart. This is the off and on again flow. + fmt.Printf(o.TargetName) + return nil + + case targetMember == nil && !memberDirExists: + // our member has not been added to the cluster and we have no previous data to start based on. return fmt.Errorf("timed out") + + case targetMember != nil && len(targetMember.Name) == 0 && memberDirExists: + // our member has been added to the cluster and has never been started before, but a data directory exists. This means that we have dirty data we must remove + archiveDataDir(memberDir) + + default: + // a target member was found, but no exception circumstances. } etcdInitialClusterEntries := []string{} @@ -165,11 +192,26 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { if len(targetMember.Name) == 0 { etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0])) } + fmt.Printf(strings.Join(etcdInitialClusterEntries, ",")) return nil } +// TO DO: instead of archiving, we should remove the directory to avoid any confusion with the backups. +func archiveDataDir(sourceDir string) error { + targetDir := filepath.Join(sourceDir+"-removed-archive", time.Now().Format(time.RFC3339)) + + // If dir already exists, add seconds to the dir name + if _, err := os.Stat(targetDir); err == nil { + targetDir = filepath.Join(sourceDir+"-removed-archive", time.Now().Add(time.Second).Format(time.RFC3339)) + } + if err := os.Rename(sourceDir, targetDir); err != nil && !os.IsNotExist(err) { + return err + } + return nil +} + func stringifyMember(member *etcdserverpb.Member) string { if member == nil { return "nil" From 081507672959a93e42178ccb97b1a1b9ae784010 Mon Sep 17 00:00:00 2001 From: Alay Patel Date: Thu, 27 Feb 2020 11:03:10 -0500 Subject: [PATCH 52/56] fix removed member name, unmask error --- .../pkg/discover-etcd-initial-cluster/initial-cluster.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index d061f433965..3b8f49048e1 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -200,13 +200,10 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { // TO DO: instead of archiving, we should remove the directory to avoid any confusion with the backups. func archiveDataDir(sourceDir string) error { - targetDir := filepath.Join(sourceDir+"-removed-archive", time.Now().Format(time.RFC3339)) + targetDir := filepath.Join(sourceDir + "-removed-archive-" + time.Now().Format("2006-01-02-030405")) - // If dir already exists, add seconds to the dir name - if _, err := os.Stat(targetDir); err == nil { - targetDir = filepath.Join(sourceDir+"-removed-archive", time.Now().Add(time.Second).Format(time.RFC3339)) - } - if err := os.Rename(sourceDir, targetDir); err != nil && !os.IsNotExist(err) { + fmt.Fprintf(os.Stderr, "attempting to archive %s to %s", sourceDir, targetDir) + if err := os.Rename(sourceDir, targetDir); err != nil { return err } return nil From 1810482752aad85c17345e91b1fc0c03ca0917de Mon Sep 17 00:00:00 2001 From: retroflexer Date: Fri, 28 Feb 2020 10:31:04 -0500 Subject: [PATCH 53/56] If we weren't able to get client or get target member but memberDir exists, go ahead and start. --- .../initial-cluster.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index 3b8f49048e1..ac8c9e62b61 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -136,7 +136,12 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { } client, err := o.getClient() - if err != nil { + if err != nil && memberDirExists { + // we weren't able to get client and need to return based previous memberDir so we can restart. This is the off and on again flow. + fmt.Fprintf(os.Stderr, "Couldn't get client, but memberDir %s is present on %s, err=%s. Returning with no error.\n", memberDir, o.TargetName, err) + fmt.Printf(o.TargetName) + return nil + } else if err != nil { return err } defer client.Close() @@ -162,20 +167,23 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { } switch { - case err != nil: - return err - case targetMember == nil && memberDirExists: - // we weren't able to locate other members and need to return based previous memberDir so we can restart. This is the off and on again flow. + // we weren't able to locate other members and need to return based previous memberDir so we can restart. This is again the off and on flow. + fmt.Fprintf(os.Stderr, "Couldn't get targetMember, but memberDir %s is present on %s. Returning with no error.\n", memberDir, o.TargetName) fmt.Printf(o.TargetName) return nil + case err != nil: + fmt.Fprintf(os.Stderr, "Couldn't get targetMember. Returning error.\n") + return err + case targetMember == nil && !memberDirExists: // our member has not been added to the cluster and we have no previous data to start based on. return fmt.Errorf("timed out") case targetMember != nil && len(targetMember.Name) == 0 && memberDirExists: // our member has been added to the cluster and has never been started before, but a data directory exists. This means that we have dirty data we must remove + fmt.Fprintf(os.Stderr, "Found targetMember but is unstarted and memberDir exists. Archiving memberrDir\n") archiveDataDir(memberDir) default: @@ -190,6 +198,7 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, member.PeerURLs[0])) } if len(targetMember.Name) == 0 { + fmt.Fprintf(os.Stderr, "Adding the unstarted member to the end %s\n", o.TargetName) etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0])) } From d883e05ef41bf59d2b80d7dbd3ef2586ef8ad6c2 Mon Sep 17 00:00:00 2001 From: David Eads Date: Wed, 4 Mar 2020 19:28:11 -0500 Subject: [PATCH 54/56] list all peers in initial-cluster --- .../pkg/discover-etcd-initial-cluster/initial-cluster.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index ac8c9e62b61..cbcb5de5f70 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -195,7 +195,9 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error { if len(member.Name) == 0 { // this is the signal for whether or not a given peer is started continue } - etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, member.PeerURLs[0])) + for _, peerURL := range member.PeerURLs{ + etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", member.Name, peerURL)) + } } if len(targetMember.Name) == 0 { fmt.Fprintf(os.Stderr, "Adding the unstarted member to the end %s\n", o.TargetName) From 56f86d1efd897fbc77e6acc9be90aaecf459a1bb Mon Sep 17 00:00:00 2001 From: David Eads Date: Thu, 14 May 2020 09:34:25 -0400 Subject: [PATCH 55/56] make evaluation of targetMember strict --- .../pkg/discover-etcd-initial-cluster/initial-cluster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go index cbcb5de5f70..e06dd30e887 100644 --- a/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go +++ b/openshift-tools/pkg/discover-etcd-initial-cluster/initial-cluster.go @@ -241,7 +241,7 @@ func (o *DiscoverEtcdInitialClusterOptions) checkForTarget(client *clientv3.Clie for i := range memberResponse.Members { member := memberResponse.Members[i] for _, peerURL := range member.PeerURLs { - if strings.Contains(peerURL, o.TargetPeerURLHost) { + if peerURL == ("https://" + o.TargetPeerURLHost + ":2380") { targetMember = member } } From 9860350acdcc170fa3e4b34003a9d2f2147ae21a Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Fri, 10 Jul 2020 14:16:35 -0400 Subject: [PATCH 56/56] vendor: bump gRPC-go to v1.23.1 CARRY: This bump resolves and issue where max frame size (16kb) was not defined which has security implications. Signed-off-by: Sam Batschelet --- glide.lock | 6 +++--- glide.yaml | 2 +- .../grpc/internal/transport/http2_server.go | 5 ++++- .../google.golang.org/grpc/internal/transport/http_util.go | 1 + vendor/google.golang.org/grpc/version.go | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/glide.lock b/glide.lock index 9a65ad94c52..cceb4574a93 100644 --- a/glide.lock +++ b/glide.lock @@ -1,5 +1,5 @@ -hash: 87224cbe021ea625798ac508c8963f49ff4ceb852d99da50db9d6b7fe95282f4 -updated: 2020-05-04T21:27:57.570766+08:00 +hash: fb475a41a3ff034787f702d39abce833083d7fc3c37aa80e2bf35f82655c0a31 +updated: 2020-04-17T10:25:50.74702498-04:00 imports: - name: github.com/beorn7/perks version: 37c8de3658fcb183f997c4e13e8337516ab753e6 @@ -184,7 +184,7 @@ imports: - googleapis/api/annotations - googleapis/rpc/status - name: google.golang.org/grpc - version: 6eaf6f47437a6b4e2153a190160ef39a92c7eceb + version: 39e8a7b072a67ca2a75f57fa2e0d50995f5b22f6 subpackages: - balancer - balancer/base diff --git a/glide.yaml b/glide.yaml index 32c80c6690b..3de9b4c2ea9 100644 --- a/glide.yaml +++ b/glide.yaml @@ -118,7 +118,7 @@ import: subpackages: - rate - package: google.golang.org/grpc - version: v1.23.0 + version: v1.23.1 subpackages: - balancer - codes diff --git a/vendor/google.golang.org/grpc/internal/transport/http2_server.go b/vendor/google.golang.org/grpc/internal/transport/http2_server.go index 83439b5627d..4e26f6a1d6b 100644 --- a/vendor/google.golang.org/grpc/internal/transport/http2_server.go +++ b/vendor/google.golang.org/grpc/internal/transport/http2_server.go @@ -138,7 +138,10 @@ func newHTTP2Server(conn net.Conn, config *ServerConfig) (_ ServerTransport, err } framer := newFramer(conn, writeBufSize, readBufSize, maxHeaderListSize) // Send initial settings as connection preface to client. - var isettings []http2.Setting + isettings := []http2.Setting{{ + ID: http2.SettingMaxFrameSize, + Val: http2MaxFrameLen, + }} // TODO(zhaoq): Have a better way to signal "no limit" because 0 is // permitted in the HTTP2 spec. maxStreams := config.MaxStreams diff --git a/vendor/google.golang.org/grpc/internal/transport/http_util.go b/vendor/google.golang.org/grpc/internal/transport/http_util.go index 9d212867ce2..8f5f3349d90 100644 --- a/vendor/google.golang.org/grpc/internal/transport/http_util.go +++ b/vendor/google.golang.org/grpc/internal/transport/http_util.go @@ -667,6 +667,7 @@ func newFramer(conn net.Conn, writeBufferSize, readBufferSize int, maxHeaderList writer: w, fr: http2.NewFramer(w, r), } + f.fr.SetMaxReadFrameSize(http2MaxFrameLen) // Opt-in to Frame reuse API on framer to reduce garbage. // Frames aren't safe to read from after a subsequent call to ReadFrame. f.fr.SetReuseFrames() diff --git a/vendor/google.golang.org/grpc/version.go b/vendor/google.golang.org/grpc/version.go index 5411a73a22e..58885056385 100644 --- a/vendor/google.golang.org/grpc/version.go +++ b/vendor/google.golang.org/grpc/version.go @@ -19,4 +19,4 @@ package grpc // Version is the current grpc version. -const Version = "1.23.0" +const Version = "1.23.1"