@@ -100,6 +100,7 @@ type LogTimeoutConfig struct {
100
100
type Config struct {
101
101
LogTimeoutConfig
102
102
EnableAutoReset bool `envconfig:"ENABLE_AUTO_RESET" default:"false"`
103
+ EnableAutoAssign bool `envconfig:"ENABLE_AUTO_ASSIGN" default:"true"`
103
104
ResetTimeout time.Duration `envconfig:"RESET_CLUSTER_TIMEOUT" default:"3m"`
104
105
MonitorBatchSize int `envconfig:"HOST_MONITOR_BATCH_SIZE" default:"100"`
105
106
DisabledHostvalidations DisabledHostValidations `envconfig:"DISABLED_HOST_VALIDATIONS" default:""` // Which host validations to disable (should not run in preprocess)
@@ -134,6 +135,7 @@ type API interface {
134
135
IsInstallable (h * models.Host ) bool
135
136
// auto assign host role
136
137
AutoAssignRole (ctx context.Context , h * models.Host , db * gorm.DB ) (bool , error )
138
+ RefreshRole (ctx context.Context , h * models.Host , db * gorm.DB ) error
137
139
IsValidMasterCandidate (h * models.Host , c * common.Cluster , db * gorm.DB , log logrus.FieldLogger ) (bool , error )
138
140
SetUploadLogsAt (ctx context.Context , h * models.Host , db * gorm.DB ) error
139
141
UpdateLogsProgress (ctx context.Context , h * models.Host , progress string ) error
@@ -375,6 +377,28 @@ func (m *Manager) updateInventory(ctx context.Context, cluster *common.Cluster,
375
377
}).Error
376
378
}
377
379
380
+ func (m * Manager ) refreshRoleInternal (ctx context.Context , h * models.Host , db * gorm.DB , forceRefresh bool ) error {
381
+ //update suggested role, if not yet set
382
+ var suggestedRole models.HostRole
383
+ var err error
384
+ if m .Config .EnableAutoAssign || forceRefresh {
385
+ //because of possible hw changes, suggested role should be calculated
386
+ //periodically even if the suggested role is already set
387
+ if h .Role == models .HostRoleAutoAssign &&
388
+ funk .ContainsString (hostStatusesBeforeInstallation [:], * h .Status ) {
389
+ if suggestedRole , err = m .autoRoleSelection (ctx , h , db ); err == nil {
390
+ if h .SuggestedRole != suggestedRole {
391
+ if err = updateRole (m .log , h , h .Role , suggestedRole , db , string (h .Role )); err == nil {
392
+ h .SuggestedRole = suggestedRole
393
+ m .log .Infof ("suggested role for host %s is %s" , * h .ID , suggestedRole )
394
+ }
395
+ }
396
+ }
397
+ }
398
+ }
399
+ return err
400
+ }
401
+
378
402
func (m * Manager ) refreshStatusInternal (ctx context.Context , h * models.Host , c * common.Cluster , i * common.InfraEnv , db * gorm.DB ) error {
379
403
if db == nil {
380
404
db = m .db
@@ -409,15 +433,6 @@ func (m *Manager) refreshStatusInternal(ctx context.Context, h *models.Host, c *
409
433
}
410
434
}
411
435
412
- //update suggested role, if not yet set
413
- var suggestedRole models.HostRole
414
- if h .Role == models .HostRoleAutoAssign && h .SuggestedRole == models .HostRoleAutoAssign &&
415
- funk .ContainsString (hostStatusesBeforeInstallation [:], * h .Status ) {
416
- if suggestedRole , err = m .autoRoleSelection (ctx , h , db ); err == nil {
417
- _ = updateRole (m .log , h , h .Role , suggestedRole , db , string (h .Role ))
418
- }
419
- }
420
-
421
436
err = m .sm .Run (TransitionTypeRefresh , newStateHost (h ), & TransitionArgsRefreshHost {
422
437
ctx : ctx ,
423
438
db : db ,
@@ -431,6 +446,13 @@ func (m *Manager) refreshStatusInternal(ctx context.Context, h *models.Host, c *
431
446
return nil
432
447
}
433
448
449
+ func (m * Manager ) RefreshRole (ctx context.Context , h * models.Host , db * gorm.DB ) error {
450
+ if db == nil {
451
+ db = m .db
452
+ }
453
+ return m .refreshRoleInternal (ctx , h , db , true )
454
+ }
455
+
434
456
func (m * Manager ) RefreshStatus (ctx context.Context , h * models.Host , db * gorm.DB ) error {
435
457
if db == nil {
436
458
db = m .db
@@ -1009,17 +1031,16 @@ func (m *Manager) AutoAssignRole(ctx context.Context, h *models.Host, db *gorm.D
1009
1031
if h .Role == models .HostRoleAutoAssign {
1010
1032
log := logutil .FromContext (ctx , m .log )
1011
1033
// If role is auto-assigned calculate the suggested roles
1012
- // This logic will moved to the monitor soon
1013
- suggestedRole , err := m .autoRoleSelection (ctx , h , db )
1014
- if err != nil {
1034
+ // to make sure the suggestion is fresh
1035
+ if err := m .RefreshRole (ctx , h , db ); err != nil { //force refresh
1015
1036
return false , err
1016
1037
}
1017
1038
1018
1039
//copy the suggested role into the role and update the host record
1019
- log .Infof ("suggested role %s for host %s cluster %s" , suggestedRole , h .ID .String (), h .ClusterID .String ())
1020
- if err := updateRole (m .log , h , suggestedRole , suggestedRole , db , string (models .HostRoleAutoAssign )); err != nil {
1040
+ log .Infof ("suggested role %s for host %s cluster %s" , h . SuggestedRole , h .ID .String (), h .ClusterID .String ())
1041
+ if err := updateRole (m .log , h , h . SuggestedRole , h . SuggestedRole , db , string (models .HostRoleAutoAssign )); err != nil {
1021
1042
log .WithError (err ).Errorf ("failed to update role %s for host %s cluster %s" ,
1022
- suggestedRole , h .ID .String (), h .ClusterID .String ())
1043
+ h . SuggestedRole , h .ID .String (), h .ClusterID .String ())
1023
1044
return true , err
1024
1045
}
1025
1046
@@ -1061,15 +1082,15 @@ func (m *Manager) selectRole(ctx context.Context, h *models.Host, db *gorm.DB) (
1061
1082
h .ID .String (), h .ClusterID .String ())
1062
1083
}
1063
1084
1064
- // count already existing masters
1065
- mastersCount := 0
1066
- if err = db .Model (& models.Host {}).Where ("cluster_id = ? and status != ? and role = ?" ,
1067
- h .ClusterID , models .HostStatusDisabled , models .HostRoleMaster ).Count (& mastersCount ).Error ; err != nil {
1085
+ // count already existing masters or hosts with suggested role of master
1086
+ otherMastersCount := 0
1087
+ if err = db .Model (& models.Host {}).Where ("cluster_id = ? and id != ? and status != ? and ( role = ? or suggested_role = ?) " ,
1088
+ h .ClusterID , h . ID , models .HostStatusDisabled , models .HostRoleMaster , models . HostRoleMaster ).Count (& otherMastersCount ).Error ; err != nil {
1068
1089
log .WithError (err ).Errorf ("failed to count masters in cluster %s" , h .ClusterID .String ())
1069
1090
return autoSelectedRole , err
1070
1091
}
1071
1092
1072
- if mastersCount < common .MinMasterHostsNeededForInstallation {
1093
+ if otherMastersCount < common .MinMasterHostsNeededForInstallation {
1073
1094
h .Role = models .HostRoleMaster
1074
1095
vc , err = newValidationContext (h , nil , nil , db , m .hwValidator )
1075
1096
if err != nil {
@@ -1269,13 +1290,14 @@ func (m *Manager) captureConnectivityReportMetrics(ctx context.Context, openshif
1269
1290
}
1270
1291
for _ , r := range connectivityReport .RemoteHosts {
1271
1292
for _ , l3 := range r .L3Connectivity {
1272
- _ , targetRole , err := GetHostnameAndRoleByIP (l3 .RemoteIPAddress , hosts )
1293
+ _ , targetRole , err := GetHostnameAndEffectiveRoleByIP (l3 .RemoteIPAddress , hosts )
1273
1294
if err != nil {
1274
1295
log .Warn (err )
1275
1296
continue
1276
1297
}
1277
- m .metricApi .NetworkLatencyBetweenHosts (openshiftVersion , h .Role , targetRole , l3 .AverageRTTMs )
1278
- m .metricApi .PacketLossBetweenHosts (openshiftVersion , h .Role , targetRole , l3 .PacketLossPercentage )
1298
+ effectiveRole := common .GetEffectiveRole (h )
1299
+ m .metricApi .NetworkLatencyBetweenHosts (openshiftVersion , effectiveRole , targetRole , l3 .AverageRTTMs )
1300
+ m .metricApi .PacketLossBetweenHosts (openshiftVersion , effectiveRole , targetRole , l3 .PacketLossPercentage )
1279
1301
}
1280
1302
}
1281
1303
}
0 commit comments