@@ -27,10 +27,14 @@ import (
27
27
corev1 "k8s.io/api/core/v1"
28
28
k8serr "k8s.io/apimachinery/pkg/api/errors"
29
29
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30
+ "k8s.io/apimachinery/pkg/fields"
30
31
"k8s.io/apimachinery/pkg/labels"
31
32
"k8s.io/apimachinery/pkg/types"
33
+ "k8s.io/apimachinery/pkg/util/wait"
32
34
"k8s.io/apimachinery/pkg/watch"
35
+ "k8s.io/client-go/tools/cache"
33
36
"k8s.io/client-go/tools/record"
37
+ watchtools "k8s.io/client-go/tools/watch"
34
38
"sigs.k8s.io/controller-runtime/pkg/client"
35
39
36
40
"github.com/gitpod-io/gitpod/common-go/kubernetes"
@@ -84,9 +88,6 @@ type Monitor struct {
84
88
85
89
OnError func (error )
86
90
87
- notifyPod map [string ]chan string
88
- notifyPodMapLock sync.Mutex
89
-
90
91
eventRecorder record.EventRecorder
91
92
}
92
93
@@ -107,8 +108,6 @@ func (m *Manager) CreateMonitor() (*Monitor, error) {
107
108
log .WithError (err ).Error ("workspace monitor error" )
108
109
},
109
110
110
- notifyPod : make (map [string ]chan string ),
111
-
112
111
eventRecorder : m .eventRecorder ,
113
112
}
114
113
res .eventpool = workpool .NewEventWorkerPool (res .handleEvent )
@@ -152,62 +151,13 @@ func (m *Monitor) handleEvent(evt watch.Event) {
152
151
switch evt .Object .(type ) {
153
152
case * corev1.Pod :
154
153
err = m .onPodEvent (evt )
155
- case * volumesnapshotv1.VolumeSnapshot :
156
- err = m .onVolumesnapshotEvent (evt )
157
154
}
158
155
159
156
if err != nil {
160
157
m .OnError (err )
161
158
}
162
159
}
163
160
164
- func (m * Monitor ) onVolumesnapshotEvent (evt watch.Event ) error {
165
- vs , ok := evt .Object .(* volumesnapshotv1.VolumeSnapshot )
166
- if ! ok {
167
- return xerrors .Errorf ("received non-volume-snapshot event" )
168
- }
169
-
170
- log := log .WithField ("volumesnapshot" , vs .Name )
171
-
172
- if vs .Spec .Source .PersistentVolumeClaimName == nil {
173
- // there is no pvc name within the VolumeSnapshot object
174
- log .Warn ("the spec.source.persistentVolumeClaimName is empty" )
175
- return nil
176
- }
177
-
178
- // the pod name is 1:1 mapping to pvc name
179
- podName := * vs .Spec .Source .PersistentVolumeClaimName
180
- log = log .WithField ("pod" , podName )
181
-
182
- // get the pod resource
183
- var pod corev1.Pod
184
- err := m .manager .Clientset .Get (context .Background (), types.NamespacedName {Namespace : vs .Namespace , Name : podName }, & pod )
185
- if err != nil && ! k8serr .IsNotFound (err ) {
186
- log .WithError (err ).Warnf ("cannot get pod" )
187
- }
188
-
189
- if vs .Status == nil || vs .Status .ReadyToUse == nil || ! * vs .Status .ReadyToUse || vs .Status .BoundVolumeSnapshotContentName == nil {
190
- if ! pod .CreationTimestamp .IsZero () {
191
- m .eventRecorder .Eventf (& pod , corev1 .EventTypeNormal , "VolumeSnapshot" , "Volume snapshot %q is in progress" , vs .Name )
192
- }
193
- return nil
194
- }
195
-
196
- vsc := * vs .Status .BoundVolumeSnapshotContentName
197
- log .Debugf ("the vsc %s is ready to use" , vsc )
198
- if ! pod .CreationTimestamp .IsZero () {
199
- m .eventRecorder .Eventf (& pod , corev1 .EventTypeNormal , "VolumeSnapshot" , "Volume snapshot %q is ready to use" , vs .Name )
200
- }
201
-
202
- m .notifyPodMapLock .Lock ()
203
- if m .notifyPod [podName ] != nil {
204
- m .notifyPod [podName ] <- vsc
205
- }
206
- m .notifyPodMapLock .Unlock ()
207
-
208
- return nil
209
- }
210
-
211
161
// onPodEvent interpretes Kubernetes events, translates and broadcasts them, and acts based on them
212
162
func (m * Monitor ) onPodEvent (evt watch.Event ) error {
213
163
// Beware: we patch running pods to add annotations. At the moment this is not a problem as do not attach
@@ -1150,31 +1100,65 @@ func (m *Monitor) finalizeWorkspaceContent(ctx context.Context, wso *workspaceOb
1150
1100
volumeSnapshotTime = time .Now ()
1151
1101
}
1152
1102
if createdVolumeSnapshot {
1153
- m .notifyPodMapLock .Lock ()
1154
- if m .notifyPod [wso .Pod .Name ] == nil {
1155
- m .notifyPod [wso .Pod .Name ] = make (chan string )
1156
- }
1157
- m .notifyPodMapLock .Unlock ()
1158
-
1159
- select {
1160
- case pvcVolumeSnapshotContentName = <- m .notifyPod [wso .Pod .Name ]:
1103
+ log = log .WithField ("VolumeSnapshot.Name" , pvcVolumeSnapshotName )
1104
+
1105
+ var volumeSnapshotWatcher * watchtools.RetryWatcher
1106
+ volumeSnapshotWatcher , err = watchtools .NewRetryWatcher ("1" , & cache.ListWatch {
1107
+ WatchFunc : func (options metav1.ListOptions ) (watch.Interface , error ) {
1108
+ return m .manager .VolumeSnapshotClient .SnapshotV1 ().VolumeSnapshots (m .manager .Config .Namespace ).Watch (ctx , metav1.ListOptions {
1109
+ FieldSelector : fields .OneTermEqualSelector ("metadata.name" , pvcVolumeSnapshotName ).String (),
1110
+ })
1111
+ },
1112
+ })
1113
+ if err != nil {
1114
+ log .WithError (err ).Info ("fall back to exponential backoff retry" )
1115
+ // we can not create a retry watcher, we fall back to exponential backoff retry
1116
+ backoff := wait.Backoff {
1117
+ Steps : 30 ,
1118
+ Duration : 100 * time .Millisecond ,
1119
+ Factor : 1.5 ,
1120
+ Jitter : 0.1 ,
1121
+ Cap : 10 * time .Minute ,
1122
+ }
1123
+ err = wait .ExponentialBackoff (backoff , func () (bool , error ) {
1124
+ var vs volumesnapshotv1.VolumeSnapshot
1125
+ err := m .manager .Clientset .Get (ctx , types.NamespacedName {Namespace : m .manager .Config .Namespace , Name : pvcVolumeSnapshotName }, & vs )
1126
+ if err != nil {
1127
+ if k8serr .IsNotFound (err ) {
1128
+ // volumesnapshot doesn't exist yet, retry again
1129
+ return false , nil
1130
+ }
1131
+ log .WithError (err ).Error ("was unable to get volume snapshot" )
1132
+ return false , err
1133
+ }
1134
+ if vs .Status != nil && vs .Status .ReadyToUse != nil && * vs .Status .ReadyToUse && vs .Status .BoundVolumeSnapshotContentName != nil {
1135
+ pvcVolumeSnapshotContentName = * vs .Status .BoundVolumeSnapshotContentName
1136
+ return true , nil
1137
+ }
1138
+ return false , nil
1139
+ })
1140
+ if err != nil {
1141
+ log .WithError (err ).Errorf ("failed while waiting for volume snapshot to get ready" )
1142
+ return nil , err
1143
+ }
1161
1144
readyVolumeSnapshot = true
1162
- case <- ctx .Done ():
1163
- // There might be a chance that the VolumeSnapshot is ready but somehow
1164
- // we did not receive the notification.
1165
- // For example, the ws-manager restarts before the VolumeSnapshot becomes ready.
1166
- // Let's give it the last chance to check the VolumeSnapshot is ready.
1167
- var vs volumesnapshotv1.VolumeSnapshot
1168
- err := m .manager .Clientset .Get (ctx , types.NamespacedName {Namespace : m .manager .Config .Namespace , Name : pvcVolumeSnapshotName }, & vs )
1169
- if err == nil && vs .Status != nil && vs .Status .ReadyToUse != nil && * vs .Status .ReadyToUse && vs .Status .BoundVolumeSnapshotContentName != nil {
1170
- pvcVolumeSnapshotContentName = * vs .Status .BoundVolumeSnapshotContentName
1171
- readyVolumeSnapshot = true
1172
- break
1145
+ } else {
1146
+ for event := range volumeSnapshotWatcher .ResultChan () {
1147
+ vs , ok := event .Object .(* volumesnapshotv1.VolumeSnapshot )
1148
+ if ! ok {
1149
+ log .Errorf ("unexpected type assertion %T" , event .Object )
1150
+ continue
1151
+ }
1152
+
1153
+ if vs != nil && vs .Status != nil && vs .Status .ReadyToUse != nil && * vs .Status .ReadyToUse && vs .Status .BoundVolumeSnapshotContentName != nil {
1154
+ pvcVolumeSnapshotContentName = * vs .Status .BoundVolumeSnapshotContentName
1155
+ readyVolumeSnapshot = true
1156
+ break
1157
+ }
1173
1158
}
1174
1159
1175
- err = xerrors .Errorf ("%s timed out while waiting for volume snapshot to get ready" , m .manager .Config .Timeouts .ContentFinalization .String ())
1176
- log .Error (err .Error ())
1177
- return nil , err
1160
+ // stop the volume snapshot retry watcher
1161
+ volumeSnapshotWatcher .Stop ()
1178
1162
}
1179
1163
1180
1164
hist , err := m .manager .metrics .volumeSnapshotTimeHistVec .GetMetricWithLabelValues (wsType , wso .Pod .Labels [workspaceClassLabel ])
0 commit comments