@@ -61,11 +61,10 @@ func WithPodListerFactory(factory PodListerFactory) K8sDatastoreOption {
61
61
type PodLister struct {
62
62
Lister listersv1.PodLister
63
63
sharedInformer informers.SharedInformerFactory
64
- ctx context.Context
65
64
}
66
65
67
- func (l * PodLister ) list ( selector labels. Selector ) ([]* corev1.Pod , error ) {
68
- return l .Lister .List (selector )
66
+ func (l * PodLister ) listEverything ( ) ([]* corev1.Pod , error ) {
67
+ return l .Lister .List (labels . Everything () )
69
68
70
69
}
71
70
@@ -97,8 +96,9 @@ func (ds *K8sDatastore) setInferencePool(pool *v1alpha1.InferencePool) {
97
96
// Create a new informer with the new selector.
98
97
ds .podLister = ds .podListerFactory (ds .inferencePool )
99
98
if ds .podLister != nil && ds .podLister .sharedInformer != nil {
100
- ds .podLister .sharedInformer .Start (ds .podLister .ctx .Done ())
101
- ds .podLister .sharedInformer .WaitForCacheSync (ds .podLister .ctx .Done ())
99
+ ctx := context .Background ()
100
+ ds .podLister .sharedInformer .Start (ctx .Done ())
101
+ ds .podLister .sharedInformer .WaitForCacheSync (ctx .Done ())
102
102
}
103
103
}
104
104
}
@@ -123,7 +123,7 @@ func (ds *K8sDatastore) createPodLister(pool *v1alpha1.InferencePool) *PodLister
123
123
}
124
124
125
125
newPodInformer := func (cs clientset.Interface , resyncPeriod time.Duration ) cache.SharedIndexInformer {
126
- informer := informersv1 .NewFilteredPodInformer (cs , pool .Namespace , 0 , nil , func (options * metav1.ListOptions ) {
126
+ informer := informersv1 .NewFilteredPodInformer (cs , pool .Namespace , resyncPeriod , cache. Indexers {} , func (options * metav1.ListOptions ) {
127
127
options .LabelSelector = labels .SelectorFromSet (selectorSet ).String ()
128
128
})
129
129
err := informer .SetTransform (func (obj interface {}) (interface {}, error ) {
@@ -140,30 +140,30 @@ func (ds *K8sDatastore) createPodLister(pool *v1alpha1.InferencePool) *PodLister
140
140
}
141
141
return informer
142
142
}
143
- sharedInformer := informers .NewSharedInformerFactory (ds .client , 0 )
143
+ // 0 means we disable resyncing, it is not really useful to resync every hour (the controller-runtime default),
144
+ // if things go wrong in the watch, no one will wait for an hour for things to get fixed.
145
+ // As precedence, kube-scheduler also disables this since it is expensive to list all pods from the api-server regularly.
146
+ resyncPeriod := time .Duration (0 )
147
+ sharedInformer := informers .NewSharedInformerFactory (ds .client , resyncPeriod )
144
148
sharedInformer .InformerFor (& v1.Pod {}, newPodInformer )
145
149
146
150
return & PodLister {
147
151
Lister : sharedInformer .Core ().V1 ().Pods ().Lister (),
148
152
sharedInformer : sharedInformer ,
149
- ctx : context .Background (),
150
153
}
151
154
}
152
155
153
- func (ds * K8sDatastore ) getPods () []* corev1.Pod {
156
+ func (ds * K8sDatastore ) getPods () ( []* corev1.Pod , error ) {
154
157
ds .poolMu .RLock ()
155
158
defer ds .poolMu .RUnlock ()
156
- if ds .podLister == nil {
157
- klog .V (logutil .DEFAULT ).Info ("InferencePool not yet initialized" )
158
- return []* corev1.Pod {}
159
+ if ! ds .HasSynced () {
160
+ return nil , errors .New ("InferencePool is not initialized in datastore" )
159
161
}
160
-
161
- pods , err := ds .podLister .list (labels .Everything ())
162
+ pods , err := ds .podLister .listEverything ()
162
163
if err != nil {
163
- klog .Errorf ("Failed to list pods for pool %s/%s: %v" , ds .inferencePool .Namespace , ds .inferencePool .Name , err )
164
- return []* corev1.Pod {}
164
+ return nil , err
165
165
}
166
- return pods
166
+ return pods , nil
167
167
}
168
168
169
169
func (s * K8sDatastore ) FetchModelData (modelName string ) (returnModel * v1alpha1.InferenceModel ) {
0 commit comments