@@ -24,6 +24,7 @@ import (
24
24
)
25
25
26
26
const minHeartbeatInterval = 500 * time .Millisecond
27
+ const wireVersion42 = 8 // Wire version for MongoDB 4.2
27
28
28
29
// Server state constants.
29
30
const (
@@ -295,6 +296,8 @@ func (s *Server) ProcessHandshakeError(err error, startingGenerationNumber uint6
295
296
return
296
297
}
297
298
299
+ // Unwrap any connection errors. If there is no wrapped connection error, then the error should
300
+ // not result in any Server state change (e.g. a command error from the database).
298
301
wrappedConnErr := unwrapConnectionError (err )
299
302
if wrappedConnErr == nil {
300
303
return
@@ -385,27 +388,58 @@ func getWriteConcernErrorForProcessing(err error) (*driver.WriteConcernError, bo
385
388
386
389
// ProcessError handles SDAM error handling and implements driver.ErrorProcessor.
387
390
func (s * Server ) ProcessError (err error , conn driver.Connection ) driver.ProcessErrorResult {
388
- // ignore nil error
391
+ // Ignore nil errors.
389
392
if err == nil {
390
393
return driver .NoChange
391
394
}
392
395
396
+ // Ignore errors from stale connections because the error came from a previous generation of the
397
+ // connection pool. The root cause of the error has aleady been handled, which is what caused
398
+ // the pool generation to increment. Processing errors for stale connections could result in
399
+ // handling the same error root cause multiple times (e.g. a temporary network interrupt causing
400
+ // all connections to the same server to return errors).
401
+ if conn .Stale () {
402
+ return driver .NoChange
403
+ }
404
+
393
405
// Must hold the processErrorLock while updating the server description and clearing the pool.
394
406
// Not holding the lock leads to possible out-of-order processing of pool.clear() and
395
407
// pool.ready() calls from concurrent server description updates.
396
408
s .processErrorLock .Lock ()
397
409
defer s .processErrorLock .Unlock ()
398
410
399
- // ignore stale error
400
- if conn .Stale () {
401
- return driver .NoChange
411
+ // Get the wire version and service ID from the connection description because they will never
412
+ // change for the lifetime of a connection and can possibly be different between connections to
413
+ // the same server.
414
+ connDesc := conn .Description ()
415
+ wireVersion := connDesc .WireVersion
416
+ serviceID := connDesc .ServiceID
417
+
418
+ // Get the topology version from the Server description because the Server description is
419
+ // updated by heartbeats and errors, so typically has a more up-to-date topology version.
420
+ serverDesc := s .desc .Load ().(description.Server )
421
+ topologyVersion := serverDesc .TopologyVersion
422
+
423
+ // We don't currently update the Server topology version when we create new application
424
+ // connections, so it's possible for a connection's topology version to be newer than the
425
+ // Server's topology version. Pick the "newest" of the two topology versions.
426
+ // Technically a nil topology version on a new database response should be considered a new
427
+ // topology version and replace the Server's topology version. However, we don't know if the
428
+ // connection's topology version is based on a new or old database response, so we ignore a nil
429
+ // topology version on the connection for now.
430
+ //
431
+ // TODO(GODRIVER-2841): Remove this logic once we set the Server description when we create
432
+ // TODO application connections because then the Server's topology version will always be the
433
+ // TODO latest known.
434
+ if tv := connDesc .TopologyVersion ; tv != nil && topologyVersion .CompareToIncoming (tv ) < 0 {
435
+ topologyVersion = tv
402
436
}
437
+
403
438
// Invalidate server description if not primary or node recovering error occurs.
404
439
// These errors can be reported as a command error or a write concern error.
405
- desc := conn .Description ()
406
440
if cerr , ok := err .(driver.Error ); ok && (cerr .NodeIsRecovering () || cerr .NotPrimary ()) {
407
- // ignore stale error
408
- if desc . TopologyVersion .CompareToIncoming (cerr .TopologyVersion ) >= 0 {
441
+ // Ignore errors that came from when the database was on a previous topology version.
442
+ if topologyVersion .CompareToIncoming (cerr .TopologyVersion ) >= 0 {
409
443
return driver .NoChange
410
444
}
411
445
@@ -415,16 +449,16 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
415
449
416
450
res := driver .ServerMarkedUnknown
417
451
// If the node is shutting down or is older than 4.2, we synchronously clear the pool
418
- if cerr .NodeIsShuttingDown () || desc . WireVersion == nil || desc . WireVersion . Max < 8 {
452
+ if cerr .NodeIsShuttingDown () || wireVersion == nil || wireVersion . Max < wireVersion42 {
419
453
res = driver .ConnectionPoolCleared
420
- s .pool .clear (err , desc . ServiceID )
454
+ s .pool .clear (err , serviceID )
421
455
}
422
456
423
457
return res
424
458
}
425
459
if wcerr , ok := getWriteConcernErrorForProcessing (err ); ok {
426
- // ignore stale error
427
- if desc . TopologyVersion .CompareToIncoming (wcerr .TopologyVersion ) >= 0 {
460
+ // Ignore errors that came from when the database was on a previous topology version.
461
+ if topologyVersion .CompareToIncoming (wcerr .TopologyVersion ) >= 0 {
428
462
return driver .NoChange
429
463
}
430
464
@@ -434,9 +468,9 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
434
468
435
469
res := driver .ServerMarkedUnknown
436
470
// If the node is shutting down or is older than 4.2, we synchronously clear the pool
437
- if wcerr .NodeIsShuttingDown () || desc . WireVersion == nil || desc . WireVersion . Max < 8 {
471
+ if wcerr .NodeIsShuttingDown () || wireVersion == nil || wireVersion . Max < wireVersion42 {
438
472
res = driver .ConnectionPoolCleared
439
- s .pool .clear (err , desc . ServiceID )
473
+ s .pool .clear (err , serviceID )
440
474
}
441
475
return res
442
476
}
@@ -458,7 +492,7 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
458
492
// monitoring check. The check is cancelled last to avoid a post-cancellation reconnect racing with
459
493
// updateDescription.
460
494
s .updateDescription (description .NewServerFromError (s .address , err , nil ))
461
- s .pool .clear (err , desc . ServiceID )
495
+ s .pool .clear (err , serviceID )
462
496
s .cancelCheck ()
463
497
return driver .ConnectionPoolCleared
464
498
}
0 commit comments