Skip to content

Commit f1c965a

Browse files
committed
sql: lazily determine if a PCR reader catalog is in use
Previously, the logic to determine if a PCR reader catalog was in use could become stuck if an availability issue occurred with the leasing subsystem. This was because we could end up waiting indefinitely for the lease in failure scenarios like TestUnavailableZipDir, and the statement_timeout is not active this early. To address this, this patch adds a 30-second timeout for obtaining a lease on the system database when detecting PCR reader catalogs. Fixes: #141565 Release note: None
1 parent 9c97416 commit f1c965a

File tree

2 files changed

+36
-14
lines changed

2 files changed

+36
-14
lines changed

pkg/cli/zip_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,6 @@ func TestUnavailableZip(t *testing.T) {
497497
defer leaktest.AfterTest(t)()
498498
defer log.Scope(t).Close(t)
499499

500-
skip.WithIssue(t, 141565)
501-
502500
skip.UnderShort(t)
503501
// Race builds make the servers so slow that they report spurious
504502
// unavailability.

pkg/sql/conn_executor.go

+36-12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"math"
1414
"math/rand"
1515
"strings"
16+
"sync"
1617
"sync/atomic"
1718
"time"
1819
"unicode/utf8"
@@ -1273,18 +1274,6 @@ func (s *Server) newConnExecutor(
12731274

12741275
ex.extraTxnState.hasAdminRoleCache = HasAdminRoleCache{}
12751276

1276-
if lm := ex.server.cfg.LeaseManager; executorType == executorTypeExec && lm != nil {
1277-
if desc, err := lm.Acquire(ctx, ex.server.cfg.Clock.Now(), keys.SystemDatabaseID); err != nil {
1278-
log.Infof(ctx, "unable to lease system database to determine if PCR reader is in use: %s", err)
1279-
} else {
1280-
defer desc.Release(ctx)
1281-
// The system database ReplicatedPCRVersion is set during reader tenant bootstrap,
1282-
// which guarantees that all user tenant sql connections to the reader tenant will
1283-
// correctly set this
1284-
ex.isPCRReaderCatalog = desc.Underlying().(catalog.DatabaseDescriptor).GetReplicatedPCRVersion() != 0
1285-
}
1286-
}
1287-
12881277
if postSetupFn != nil {
12891278
postSetupFn(ex)
12901279
}
@@ -1888,6 +1877,10 @@ type connExecutor struct {
18881877
// PCR reader catalog, which is done by checking for the ReplicatedPCRVersion
18891878
// field on the system database (which is set during tenant bootstrap).
18901879
isPCRReaderCatalog bool
1880+
1881+
// isPCRReaderCatalogInit tracks if the system database has been queried
1882+
// once for this connect to determine if this tenant is a reader catalog.
1883+
isPCRReaderCatalogInit sync.Once
18911884
}
18921885

18931886
// ctxHolder contains a connection's context and, while session tracing is
@@ -3852,10 +3845,41 @@ func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalCo
38523845
evalCtx.copyFromExecCfg(ex.server.cfg)
38533846
}
38543847

3848+
// maybeInitPCRReaderCatalog leases the system database to determine if
3849+
// we are connecting to a PCR reader catalog, if this has not been attempted
3850+
// before.
3851+
func (ex *connExecutor) maybeInitPCRReaderCatalog(ctx context.Context) {
3852+
ex.isPCRReaderCatalogInit.Do(func() {
3853+
// Wait up to 30 seconds attempting to acquire the lease on the system
3854+
// database. Normally we should already have a lease on this object,
3855+
// unless there is some availability issue.
3856+
const initPCRReaderCatalogTimeout = 30 * time.Second
3857+
err := timeutil.RunWithTimeout(ctx, "detect-pcr-reader-catalog", initPCRReaderCatalogTimeout,
3858+
func(ctx context.Context) error {
3859+
if lm := ex.server.cfg.LeaseManager; ex.executorType == executorTypeExec && lm != nil {
3860+
desc, err := lm.Acquire(ctx, ex.server.cfg.Clock.Now(), keys.SystemDatabaseID)
3861+
if err != nil {
3862+
return err
3863+
}
3864+
defer desc.Release(ctx)
3865+
// The system database ReplicatedPCRVersion is set during reader tenant bootstrap,
3866+
// which guarantees that all user tenant sql connections to the reader tenant will
3867+
// correctly set this
3868+
ex.isPCRReaderCatalog = desc.Underlying().(catalog.DatabaseDescriptor).GetReplicatedPCRVersion() != 0
3869+
}
3870+
return nil
3871+
})
3872+
if err != nil {
3873+
log.Infof(ctx, "unable to lease system database to determine if PCR reader is in use: %s", err)
3874+
}
3875+
})
3876+
}
3877+
38553878
// GetPCRReaderTimestamp if the system database is setup as PCR
38563879
// catalog reader, then this function will return an non-zero timestamp
38573880
// to use for all read operations.
38583881
func (ex *connExecutor) GetPCRReaderTimestamp() hlc.Timestamp {
3882+
ex.maybeInitPCRReaderCatalog(ex.Ctx())
38593883
if ex.isPCRReaderCatalog && !ex.sessionData().BypassPCRReaderCatalogAOST {
38603884
return ex.server.cfg.LeaseManager.GetSafeReplicationTS()
38613885
}

0 commit comments

Comments
 (0)