Skip to content

Commit 0f3354b

Browse files
committed
sql: lazily determine if a PCR reader catalog is in use
Previously, the logic to determine if a PCR reader catalog was in use could become stuck if an availability issue occurred with the leasing subsystem. This was because we could end up waiting indefinitely for the lease in failure scenarios like TestUnavailableZipDir, and the statement_timeout is not active this early. To address this, this patch adds a 30-second timeout for obtaining a lease on the system database when detecting PCR reader catalogs. Fixes: #141565 Release note: None
1 parent dc53d9e commit 0f3354b

File tree

2 files changed

+37
-14
lines changed

2 files changed

+37
-14
lines changed

pkg/cli/zip_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,6 @@ func TestUnavailableZip(t *testing.T) {
497497
defer leaktest.AfterTest(t)()
498498
defer log.Scope(t).Close(t)
499499

500-
skip.WithIssue(t, 141565)
501-
502500
skip.UnderShort(t)
503501
// Race builds make the servers so slow that they report spurious
504502
// unavailability.

pkg/sql/conn_executor.go

+37-12
Original file line numberDiff line numberDiff line change
@@ -1260,18 +1260,6 @@ func (s *Server) newConnExecutor(
12601260

12611261
ex.extraTxnState.hasAdminRoleCache = HasAdminRoleCache{}
12621262

1263-
if lm := ex.server.cfg.LeaseManager; executorType == executorTypeExec && lm != nil {
1264-
if desc, err := lm.Acquire(ctx, ex.server.cfg.Clock.Now(), keys.SystemDatabaseID); err != nil {
1265-
log.Infof(ctx, "unable to lease system database to determine if PCR reader is in use: %s", err)
1266-
} else {
1267-
defer desc.Release(ctx)
1268-
// The system database ReplicatedPCRVersion is set during reader tenant bootstrap,
1269-
// which guarantees that all user tenant sql connections to the reader tenant will
1270-
// correctly set this
1271-
ex.isPCRReaderCatalog = desc.Underlying().(catalog.DatabaseDescriptor).GetReplicatedPCRVersion() != 0
1272-
}
1273-
}
1274-
12751263
if postSetupFn != nil {
12761264
postSetupFn(ex)
12771265
}
@@ -1875,6 +1863,10 @@ type connExecutor struct {
18751863
// PCR reader catalog, which is done by checking for the ReplicatedPCRVersion
18761864
// field on the system database (which is set during tenant bootstrap).
18771865
isPCRReaderCatalog bool
1866+
1867+
// isPCRReaderCatalogInit tracks if the system database has been queried
1868+
// once for this connect to determine if this tenant is a reader catalog.
1869+
isPCRReaderCatalogInit bool
18781870
}
18791871

18801872
// ctxHolder contains a connection's context and, while session tracing is
@@ -3839,10 +3831,43 @@ func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalCo
38393831
evalCtx.copyFromExecCfg(ex.server.cfg)
38403832
}
38413833

3834+
// maybeInitPCRReaderCatalog leases the system database to determine if
3835+
// we are connecting to a PCR reader catalog, if this has not been attempted
3836+
// before.
3837+
func (ex *connExecutor) maybeInitPCRReaderCatalog(ctx context.Context) {
3838+
if ex.isPCRReaderCatalogInit {
3839+
return
3840+
}
3841+
// Wait up to 30 seconds attempting to acquire the lease on the system
3842+
// database. Normally we should already have a lease on this object,
3843+
// unless there is some availability issue.
3844+
const initPCRReaderCatalogTimeout = 30 * time.Second
3845+
err := timeutil.RunWithTimeout(ctx, "detect-pcr-reader-catalog", initPCRReaderCatalogTimeout,
3846+
func(ctx context.Context) error {
3847+
if lm := ex.server.cfg.LeaseManager; ex.executorType == executorTypeExec && lm != nil {
3848+
desc, err := lm.Acquire(ctx, ex.server.cfg.Clock.Now(), keys.SystemDatabaseID)
3849+
if err != nil {
3850+
return err
3851+
}
3852+
defer desc.Release(ctx)
3853+
// The system database ReplicatedPCRVersion is set during reader tenant bootstrap,
3854+
// which guarantees that all user tenant sql connections to the reader tenant will
3855+
// correctly set this
3856+
ex.isPCRReaderCatalog = desc.Underlying().(catalog.DatabaseDescriptor).GetReplicatedPCRVersion() != 0
3857+
}
3858+
return nil
3859+
})
3860+
if err != nil {
3861+
log.Infof(ctx, "unable to lease system database to determine if PCR reader is in use: %s", err)
3862+
}
3863+
ex.isPCRReaderCatalogInit = true
3864+
}
3865+
38423866
// GetPCRReaderTimestamp if the system database is setup as PCR
38433867
// catalog reader, then this function will return an non-zero timestamp
38443868
// to use for all read operations.
38453869
func (ex *connExecutor) GetPCRReaderTimestamp() hlc.Timestamp {
3870+
ex.maybeInitPCRReaderCatalog(ex.Ctx())
38463871
if ex.isPCRReaderCatalog && !ex.sessionData().BypassPCRReaderCatalogAOST {
38473872
return ex.server.cfg.LeaseManager.GetSafeReplicationTS()
38483873
}

0 commit comments

Comments
 (0)