Skip to content

Commit d3bb6f6

Browse files
committed
*: LeaseTimeToLive returns error if leader changed
The old leader demotes lessor and all the leases' expire time will be updated. Instead of returning incorrect remaining TTL, we should return errors to force client retry. Signed-off-by: Wei Fu <[email protected]>
1 parent e4448c4 commit d3bb6f6

File tree

5 files changed

+108
-2
lines changed

5 files changed

+108
-2
lines changed

server/etcdserver/v3_server.go

+12
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,9 @@ func (s *EtcdServer) leaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveR
357357
if err := s.waitAppliedIndex(); err != nil {
358358
return nil, err
359359
}
360+
361+
// gofail: var beforeLookupWhenLeaseTimeToLive struct{}
362+
360363
// primary; timetolive directly from leader
361364
le := s.lessor.Lookup(lease.LeaseID(r.ID))
362365
if le == nil {
@@ -372,6 +375,15 @@ func (s *EtcdServer) leaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveR
372375
}
373376
resp.Keys = kbs
374377
}
378+
379+
// The leasor could be demoted if leader changed during lookup.
380+
// We should return error to force retry instead of returning
381+
// incorrect remaining TTL.
382+
if le.Demoted() {
383+
// NOTE: lease.ErrNotPrimary is not retryable error for
384+
// client. Instead, uses ErrLeaderChanged.
385+
return nil, errors.ErrLeaderChanged
386+
}
375387
return resp, nil
376388
}
377389

server/lease/lease.go

+7
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ func (l *Lease) forever() {
9595
l.expiry = forever
9696
}
9797

98+
// Demoted returns true if the lease's expiry has been reset to forever.
99+
func (l *Lease) Demoted() bool {
100+
l.expiryMu.Lock()
101+
defer l.expiryMu.Unlock()
102+
return l.expiry == forever
103+
}
104+
98105
// Keys returns all the keys attached to the lease.
99106
func (l *Lease) Keys() []string {
100107
l.mu.RLock()

server/lease/leasehttp/http.go

+11
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ func (h *leaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
103103
http.Error(w, ErrLeaseHTTPTimeout.Error(), http.StatusRequestTimeout)
104104
return
105105
}
106+
107+
// gofail: var beforeLookupWhenForwardLeaseTimeToLive struct{}
108+
106109
l := h.l.Lookup(lease.LeaseID(lreq.LeaseTimeToLiveRequest.ID))
107110
if l == nil {
108111
http.Error(w, lease.ErrLeaseNotFound.Error(), http.StatusNotFound)
@@ -126,6 +129,14 @@ func (h *leaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
126129
resp.LeaseTimeToLiveResponse.Keys = kbs
127130
}
128131

132+
// The leasor could be demoted if leader changed during lookup.
133+
// We should return error to force retry instead of returning
134+
// incorrect remaining TTL.
135+
if l.Demoted() {
136+
http.Error(w, lease.ErrNotPrimary.Error(), http.StatusInternalServerError)
137+
return
138+
}
139+
129140
v, err = resp.Marshal()
130141
if err != nil {
131142
http.Error(w, err.Error(), http.StatusInternalServerError)

tests/integration/v3_lease_test.go

+76
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import (
2222
"testing"
2323
"time"
2424

25+
"github.com/stretchr/testify/assert"
26+
"github.com/stretchr/testify/require"
2527
"google.golang.org/grpc/codes"
2628
"google.golang.org/grpc/metadata"
2729
"google.golang.org/grpc/status"
@@ -30,8 +32,10 @@ import (
3032
"go.etcd.io/etcd/api/v3/mvccpb"
3133
"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
3234
"go.etcd.io/etcd/client/pkg/v3/testutil"
35+
clientv3 "go.etcd.io/etcd/client/v3"
3336
framecfg "go.etcd.io/etcd/tests/v3/framework/config"
3437
"go.etcd.io/etcd/tests/v3/framework/integration"
38+
gofail "go.etcd.io/gofail/runtime"
3539
)
3640

3741
// TestV3LeasePromote ensures the newly elected leader can promote itself
@@ -1046,6 +1050,78 @@ func TestV3LeaseRecoverKeyWithMutipleLease(t *testing.T) {
10461050
}
10471051
}
10481052

1053+
func TestV3LeaseTimeToLiveWithLeaderChanged(t *testing.T) {
1054+
t.Run("normal", func(subT *testing.T) {
1055+
testV3LeaseTimeToLiveWithLeaderChanged(subT, "beforeLookupWhenLeaseTimeToLive")
1056+
})
1057+
1058+
t.Run("forward", func(subT *testing.T) {
1059+
testV3LeaseTimeToLiveWithLeaderChanged(subT, "beforeLookupWhenForwardLeaseTimeToLive")
1060+
})
1061+
}
1062+
1063+
func testV3LeaseTimeToLiveWithLeaderChanged(t *testing.T, fpName string) {
1064+
if len(gofail.List()) == 0 {
1065+
t.Skip("please run 'make gofail-enable' before running the test")
1066+
}
1067+
1068+
integration.BeforeTest(t)
1069+
1070+
clus := integration.NewCluster(t, &integration.ClusterConfig{Size: 3})
1071+
defer clus.Terminate(t)
1072+
1073+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
1074+
defer cancel()
1075+
1076+
oldLeadIdx := clus.WaitLeader(t)
1077+
followerIdx := (oldLeadIdx + 1) % 3
1078+
1079+
followerMemberID := clus.Members[followerIdx].ID()
1080+
1081+
oldLeadC := clus.Client(oldLeadIdx)
1082+
1083+
leaseResp, err := oldLeadC.Grant(ctx, 100)
1084+
require.NoError(t, err)
1085+
1086+
require.NoError(t, gofail.Enable(fpName, `sleep("3s")`))
1087+
t.Cleanup(func() {
1088+
terr := gofail.Disable(fpName)
1089+
if terr != nil && terr != gofail.ErrDisabled {
1090+
t.Fatalf("failed to disable %s: %v", fpName, terr)
1091+
}
1092+
})
1093+
1094+
readyCh := make(chan struct{})
1095+
errCh := make(chan error, 1)
1096+
1097+
var targetC *clientv3.Client
1098+
switch fpName {
1099+
case "beforeLookupWhenLeaseTimeToLive":
1100+
targetC = oldLeadC
1101+
case "beforeLookupWhenForwardLeaseTimeToLive":
1102+
targetC = clus.Client((oldLeadIdx + 2) % 3)
1103+
default:
1104+
t.Fatalf("unsupported %s failpoint", fpName)
1105+
}
1106+
1107+
go func() {
1108+
<-readyCh
1109+
time.Sleep(1 * time.Second)
1110+
1111+
_, merr := oldLeadC.MoveLeader(ctx, uint64(followerMemberID))
1112+
assert.NoError(t, gofail.Disable(fpName))
1113+
errCh <- merr
1114+
}()
1115+
1116+
close(readyCh)
1117+
1118+
ttlResp, err := targetC.TimeToLive(ctx, leaseResp.ID)
1119+
require.NoError(t, err)
1120+
require.GreaterOrEqual(t, int64(100), ttlResp.TTL)
1121+
1122+
require.NoError(t, <-errCh)
1123+
}
1124+
10491125
// acquireLeaseAndKey creates a new lease and creates an attached key.
10501126
func acquireLeaseAndKey(clus *integration.Cluster, key string) (int64, error) {
10511127
// create lease

tests/robustness/makefile.mk

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ GOFAIL_VERSION = $(shell cd tools/mod && go list -m -f {{.Version}} go.etcd.io/g
3636

3737
.PHONY: gofail-enable
3838
gofail-enable: install-gofail
39-
gofail enable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
39+
gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
4040
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
4141
cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
4242
cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
4343
cd ./tests && go get go.etcd.io/gofail@${GOFAIL_VERSION}
4444

4545
.PHONY: gofail-disable
4646
gofail-disable: install-gofail
47-
gofail disable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
47+
gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
4848
cd ./server && go mod tidy
4949
cd ./etcdutl && go mod tidy
5050
cd ./etcdctl && go mod tidy

0 commit comments

Comments
 (0)