3
3
*
4
4
* This source file is part of the FoundationDB open source project
5
5
*
6
- * Copyright 2019-2021 Apple Inc. and the FoundationDB project authors
6
+ * Copyright 2019-2023 Apple Inc. and the FoundationDB project authors
7
7
*
8
8
* Licensed under the Apache License, Version 2.0 (the "License");
9
9
* you may not use this file except in compliance with the License.
@@ -44,11 +44,48 @@ func (updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconcile
44
44
45
45
pods , err := r .PodLifecycleManager .GetPods (ctx , r , cluster , internal .GetPodListOptions (cluster , "" , "" )... )
46
46
if err != nil {
47
- return & requeue {curError : err }
47
+ return & requeue {curError : err , delayedRequeue : true }
48
+ }
49
+
50
+ updates , err := getPodsToUpdate (logger , r , cluster , internal .CreatePodMap (cluster , pods ))
51
+ if err != nil {
52
+ return & requeue {curError : err , delay : podSchedulingDelayDuration , delayedRequeue : true }
53
+ }
54
+
55
+ if len (updates ) > 0 {
56
+ if cluster .Spec .AutomationOptions .PodUpdateStrategy == fdbv1beta2 .PodUpdateStrategyReplacement {
57
+ logger .Info ("Requeuing reconciliation to replace pods" )
58
+ return & requeue {message : "Requeueing reconciliation to replace pods" }
59
+ }
60
+
61
+ if r .PodLifecycleManager .GetDeletionMode (cluster ) == fdbv1beta2 .PodUpdateModeNone {
62
+ r .Recorder .Event (cluster , corev1 .EventTypeNormal ,
63
+ "NeedsPodsDeletion" , "Spec require deleting some pods, but deleting pods is disabled" )
64
+ cluster .Status .Generations .NeedsPodDeletion = cluster .ObjectMeta .Generation
65
+ err = r .updateOrApply (ctx , cluster )
66
+ if err != nil {
67
+ logger .Error (err , "Error updating cluster status" )
68
+ }
69
+ return & requeue {message : "Pod deletion is disabled" }
70
+ }
48
71
}
49
72
73
+ if len (updates ) == 0 {
74
+ return nil
75
+ }
76
+
77
+ adminClient , err := r .getDatabaseClientProvider ().GetAdminClient (cluster , r .Client )
78
+ if err != nil {
79
+ return & requeue {curError : err , delayedRequeue : true }
80
+ }
81
+ defer adminClient .Close ()
82
+
83
+ return deletePodsForUpdates (ctx , r , cluster , adminClient , updates , logger )
84
+ }
85
+
86
+ // getPodsToUpdate returns a map of Zone to Pods mapping. The map has the fault domain as key and all Pods in that fault domain will be present as a slice of *corev1.Pod.
87
+ func getPodsToUpdate (logger logr.Logger , reconciler * FoundationDBClusterReconciler , cluster * fdbv1beta2.FoundationDBCluster , podMap map [fdbv1beta2.ProcessGroupID ]* corev1.Pod ) (map [string ][]* corev1.Pod , error ) {
50
88
updates := make (map [string ][]* corev1.Pod )
51
- podMap := internal .CreatePodMap (cluster , pods )
52
89
53
90
for _ , processGroup := range cluster .Status .ProcessGroups {
54
91
if processGroup .IsMarkedForRemoval () {
@@ -74,90 +111,78 @@ func (updatePods) reconcile(ctx context.Context, r *FoundationDBClusterReconcile
74
111
logger .V (1 ).Info ("Could not find Pod for process group ID" ,
75
112
"processGroupID" , processGroup .ProcessGroupID )
76
113
continue
77
- // TODO should not be continue but rather be a requeue?
78
114
}
79
115
80
116
if shouldRequeueDueToTerminatingPod (pod , cluster , processGroup .ProcessGroupID ) {
81
- return & requeue { message : "Cluster has pod that is pending deletion" , delay : podSchedulingDelayDuration , delayedRequeue : true }
117
+ return nil , fmt . Errorf ( "cluster has Pod %s that is pending deletion" , pod . Name )
82
118
}
83
119
84
120
_ , idNum , err := podmanager .ParseProcessGroupID (processGroup .ProcessGroupID )
85
121
if err != nil {
86
- return & requeue {curError : err }
122
+ logger .Info ("Skipping Pod due to error parsing Process Group ID" ,
123
+ "processGroupID" , processGroup .ProcessGroupID ,
124
+ "error" , err .Error ())
125
+ continue
87
126
}
88
127
89
128
processClass , err := podmanager .GetProcessClass (cluster , pod )
90
129
if err != nil {
91
- return & requeue {curError : err }
130
+ logger .Info ("Skipping Pod due to error fetching process class" ,
131
+ "processGroupID" , processGroup .ProcessGroupID ,
132
+ "error" , err .Error ())
133
+ continue
92
134
}
93
135
94
136
specHash , err := internal .GetPodSpecHash (cluster , processClass , idNum , nil )
95
137
if err != nil {
96
- return & requeue {curError : err }
97
- }
98
-
99
- if pod .ObjectMeta .Annotations [fdbv1beta2 .LastSpecKey ] != specHash {
100
- logger .Info ("Update Pod" ,
138
+ logger .Info ("Skipping Pod due to error generating spec hash" ,
101
139
"processGroupID" , processGroup .ProcessGroupID ,
102
- "reason" , fmt .Sprintf ("specHash has changed from %s to %s" , specHash , pod .ObjectMeta .Annotations [fdbv1beta2 .LastSpecKey ]))
103
-
104
- podClient , message := r .getPodClient (cluster , pod )
105
- if podClient == nil {
106
- return & requeue {message : message , delay : podSchedulingDelayDuration }
107
- }
108
-
109
- substitutions , err := podClient .GetVariableSubstitutions ()
110
- if err != nil {
111
- return & requeue {curError : err }
112
- }
140
+ "error" , err .Error ())
141
+ continue
142
+ }
113
143
114
- if substitutions == nil {
115
- logger .Info ("Skipping pod due to missing locality information" ,
116
- "processGroupID" , processGroup .ProcessGroupID )
117
- continue
118
- }
144
+ // The Pod is updated, so we can continue.
145
+ if pod .ObjectMeta .Annotations [fdbv1beta2 .LastSpecKey ] == specHash {
146
+ continue
147
+ }
119
148
120
- zone := substitutions ["FDB_ZONE_ID" ]
121
- if r .InSimulation {
122
- zone = "simulation"
123
- }
149
+ logger .Info ("Update Pod" ,
150
+ "processGroupID" , processGroup .ProcessGroupID ,
151
+ "reason" , fmt .Sprintf ("specHash has changed from %s to %s" , specHash , pod .ObjectMeta .Annotations [fdbv1beta2 .LastSpecKey ]))
124
152
125
- if updates [zone ] == nil {
126
- updates [zone ] = make ([]* corev1.Pod , 0 )
127
- }
128
- updates [zone ] = append (updates [zone ], pod )
153
+ podClient , message := reconciler .getPodClient (cluster , pod )
154
+ if podClient == nil {
155
+ logger .Info ("Skipping Pod due to missing Pod client information" ,
156
+ "processGroupID" , processGroup .ProcessGroupID ,
157
+ "message" , message )
158
+ continue
129
159
}
130
- }
131
160
132
- if len (updates ) > 0 {
133
- if cluster .Spec .AutomationOptions .PodUpdateStrategy == fdbv1beta2 .PodUpdateStrategyReplacement {
134
- logger .Info ("Requeuing reconciliation to replace pods" )
135
- return & requeue {message : "Requeueing reconciliation to replace pods" }
161
+ substitutions , err := podClient .GetVariableSubstitutions ()
162
+ if err != nil {
163
+ logger .Info ("Skipping Pod due to missing variable substitutions" ,
164
+ "processGroupID" , processGroup .ProcessGroupID )
165
+ continue
136
166
}
137
167
138
- if r .PodLifecycleManager .GetDeletionMode (cluster ) == fdbv1beta2 .PodUpdateModeNone {
139
- r .Recorder .Event (cluster , corev1 .EventTypeNormal ,
140
- "NeedsPodsDeletion" , "Spec require deleting some pods, but deleting pods is disabled" )
141
- cluster .Status .Generations .NeedsPodDeletion = cluster .ObjectMeta .Generation
142
- err = r .updateOrApply (ctx , cluster )
143
- if err != nil {
144
- logger .Error (err , "Error updating cluster status" )
145
- }
146
- return & requeue {message : "Pod deletion is disabled" }
168
+ if substitutions == nil {
169
+ logger .Info ("Skipping Pod due to missing locality information" ,
170
+ "processGroupID" , processGroup .ProcessGroupID )
171
+ continue
147
172
}
148
- }
149
173
150
- if len (updates ) == 0 {
151
- return nil
152
- }
174
+ zone := substitutions ["FDB_ZONE_ID" ]
175
+ if reconciler .InSimulation {
176
+ zone = "simulation"
177
+ }
153
178
154
- adminClient , err := r .getDatabaseClientProvider ().GetAdminClient (cluster , r .Client )
155
- if err != nil {
156
- return & requeue {curError : err }
179
+ if updates [zone ] == nil {
180
+ updates [zone ] = make ([]* corev1.Pod , 0 )
181
+ }
182
+ updates [zone ] = append (updates [zone ], pod )
157
183
}
158
- defer adminClient .Close ()
159
184
160
- return deletePodsForUpdates ( ctx , r , cluster , adminClient , updates , logger )
185
+ return updates , nil
161
186
}
162
187
163
188
func shouldRequeueDueToTerminatingPod (pod * corev1.Pod , cluster * fdbv1beta2.FoundationDBCluster , processGroupID fdbv1beta2.ProcessGroupID ) bool {
0 commit comments