@@ -125,6 +125,17 @@ func (n *numaFirst) takeFullSecondLevel() {
125
125
n .acc .takeFullSockets ()
126
126
}
127
127
128
+ // Sort the UncoreCaches within the NUMA nodes.
129
+ func (a * cpuAccumulator ) sortAvailableUncoreCaches () []int {
130
+ var result []int
131
+ for _ , numa := range a .sortAvailableNUMANodes () {
132
+ uncore := a .details .UncoreInNUMANodes (numa ).UnsortedList ()
133
+ a .sort (uncore , a .details .CPUsInUncoreCaches )
134
+ result = append (result , uncore ... )
135
+ }
136
+ return result
137
+ }
138
+
128
139
// If NUMA nodes are higher in the memory hierarchy than sockets, then just
129
140
// sort the NUMA nodes directly, and return them.
130
141
func (n * numaFirst ) sortAvailableNUMANodes () []int {
@@ -238,7 +249,14 @@ func (a *cpuAccumulator) isSocketFree(socketID int) bool {
238
249
return a .details .CPUsInSockets (socketID ).Size () == a .topo .CPUsPerSocket ()
239
250
}
240
251
241
- // Returns true if the supplied core is fully available in `topoDetails`.
252
+ // Returns true if the supplied UnCoreCache is fully available,
253
+ // "fully available" means that all the CPUs in it are free.
254
+ func (a * cpuAccumulator ) isUncoreCacheFree (uncoreID int ) bool {
255
+ return a .details .CPUsInUncoreCaches (uncoreID ).Size () == a .topo .CPUDetails .CPUsInUncoreCaches (uncoreID ).Size ()
256
+ }
257
+
258
+ // Returns true if the supplied core is fully available in `a.details`.
259
+ // "fully available" means that all the CPUs in it are free.
242
260
func (a * cpuAccumulator ) isCoreFree (coreID int ) bool {
243
261
return a .details .CPUsInCores (coreID ).Size () == a .topo .CPUsPerCore ()
244
262
}
@@ -265,6 +283,17 @@ func (a *cpuAccumulator) freeSockets() []int {
265
283
return free
266
284
}
267
285
286
+ // Returns free UncoreCache IDs as a slice sorted by sortAvailableUnCoreCache().
287
+ func (a * cpuAccumulator ) freeUncoreCache () []int {
288
+ free := []int {}
289
+ for _ , uncore := range a .sortAvailableUncoreCaches () {
290
+ if a .isUncoreCacheFree (uncore ) {
291
+ free = append (free , uncore )
292
+ }
293
+ }
294
+ return free
295
+ }
296
+
268
297
// Returns free core IDs as a slice sorted by sortAvailableCores().
269
298
func (a * cpuAccumulator ) freeCores () []int {
270
299
free := []int {}
@@ -358,6 +387,60 @@ func (a *cpuAccumulator) takeFullSockets() {
358
387
}
359
388
}
360
389
390
+ func (a * cpuAccumulator ) takeFullUncore () {
391
+ for _ , uncore := range a .freeUncoreCache () {
392
+ cpusInUncore := a .topo .CPUDetails .CPUsInUncoreCaches (uncore )
393
+ if ! a .needsAtLeast (cpusInUncore .Size ()) {
394
+ continue
395
+ }
396
+ klog .V (4 ).InfoS ("takeFullUncore: claiming uncore" , "uncore" , uncore )
397
+ a .take (cpusInUncore )
398
+ }
399
+ }
400
+
401
+ func (a * cpuAccumulator ) takePartialUncore (uncoreID int ) {
402
+ numCoresNeeded := a .numCPUsNeeded / a .topo .CPUsPerCore ()
403
+
404
+ // note: we need to keep the first N free cores (physical cpus) and only we we got these expand to their
405
+ // cpus (virtual cpus). Taking directly the first M cpus (virtual cpus) leads to suboptimal allocation
406
+ freeCores := a .details .CoresNeededInUncoreCache (numCoresNeeded , uncoreID )
407
+ freeCPUs := a .details .CPUsInCores (freeCores .UnsortedList ()... )
408
+
409
+ claimed := (a .numCPUsNeeded == freeCPUs .Size ())
410
+ klog .V (4 ).InfoS ("takePartialUncore: trying to claim partial uncore" ,
411
+ "uncore" , uncoreID ,
412
+ "claimed" , claimed ,
413
+ "needed" , a .numCPUsNeeded ,
414
+ "cores" , freeCores .String (),
415
+ "cpus" , freeCPUs .String ())
416
+ if ! claimed {
417
+ return
418
+
419
+ }
420
+ a .take (freeCPUs )
421
+ }
422
+
423
+ // First try to take full UncoreCache, if available and need is at least the size of the UncoreCache group.
424
+ // Second try to take the partial UncoreCache if available and the request size can fit w/in the UncoreCache.
425
+ func (a * cpuAccumulator ) takeUncoreCache () {
426
+ numCPUsInUncore := a .topo .CPUsPerUncore ()
427
+ for _ , uncore := range a .sortAvailableUncoreCaches () {
428
+ // take full UncoreCache if the CPUs needed is greater than free UncoreCache size
429
+ if a .needsAtLeast (numCPUsInUncore ) {
430
+ a .takeFullUncore ()
431
+ }
432
+
433
+ if a .isSatisfied () {
434
+ return
435
+ }
436
+
437
+ a .takePartialUncore (uncore )
438
+ if a .isSatisfied () {
439
+ return
440
+ }
441
+ }
442
+ }
443
+
361
444
func (a * cpuAccumulator ) takeFullCores () {
362
445
for _ , core := range a .freeCores () {
363
446
cpusInCore := a .topo .CPUDetails .CPUsInCores (core )
@@ -447,7 +530,7 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC
447
530
helper (n , k , 0 , []int {}, f )
448
531
}
449
532
450
- func takeByTopologyNUMAPacked (topo * topology.CPUTopology , availableCPUs cpuset.CPUSet , numCPUs int ) (cpuset.CPUSet , error ) {
533
+ func takeByTopologyNUMAPacked (topo * topology.CPUTopology , availableCPUs cpuset.CPUSet , numCPUs int , preferAlignByUncoreCache bool ) (cpuset.CPUSet , error ) {
451
534
acc := newCPUAccumulator (topo , availableCPUs , numCPUs )
452
535
if acc .isSatisfied () {
453
536
return acc .result , nil
@@ -470,14 +553,24 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
470
553
return acc .result , nil
471
554
}
472
555
473
- // 2. Acquire whole cores, if available and the container requires at least
556
+ // 2. If PreferAlignByUncoreCache is enabled, acquire whole UncoreCaches
557
+ // if available and the container requires at least a UncoreCache's-worth
558
+ // of CPUs. Otherwise, acquire CPUs from the least amount of UncoreCaches.
559
+ if preferAlignByUncoreCache {
560
+ acc .takeUncoreCache ()
561
+ if acc .isSatisfied () {
562
+ return acc .result , nil
563
+ }
564
+ }
565
+
566
+ // 3. Acquire whole cores, if available and the container requires at least
474
567
// a core's-worth of CPUs.
475
568
acc .takeFullCores ()
476
569
if acc .isSatisfied () {
477
570
return acc .result , nil
478
571
}
479
572
480
- // 3 . Acquire single threads, preferring to fill partially-allocated cores
573
+ // 4 . Acquire single threads, preferring to fill partially-allocated cores
481
574
// on the same sockets as the whole cores we have already taken in this
482
575
// allocation.
483
576
acc .takeRemainingCPUs ()
@@ -555,8 +648,10 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
555
648
// If the number of CPUs requested cannot be handed out in chunks of
556
649
// 'cpuGroupSize', then we just call out the packing algorithm since we
557
650
// can't distribute CPUs in this chunk size.
651
+ // PreferAlignByUncoreCache feature not implemented here yet and set to false.
652
+ // Support for PreferAlignByUncoreCache to be done at beta release.
558
653
if (numCPUs % cpuGroupSize ) != 0 {
559
- return takeByTopologyNUMAPacked (topo , availableCPUs , numCPUs )
654
+ return takeByTopologyNUMAPacked (topo , availableCPUs , numCPUs , false )
560
655
}
561
656
562
657
// Otherwise build an accumulator to start allocating CPUs from.
@@ -739,7 +834,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
739
834
// size 'cpuGroupSize' from 'bestCombo'.
740
835
distribution := (numCPUs / len (bestCombo ) / cpuGroupSize ) * cpuGroupSize
741
836
for _ , numa := range bestCombo {
742
- cpus , _ := takeByTopologyNUMAPacked (acc .topo , acc .details .CPUsInNUMANodes (numa ), distribution )
837
+ cpus , _ := takeByTopologyNUMAPacked (acc .topo , acc .details .CPUsInNUMANodes (numa ), distribution , false )
743
838
acc .take (cpus )
744
839
}
745
840
@@ -754,7 +849,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
754
849
if acc .details .CPUsInNUMANodes (numa ).Size () < cpuGroupSize {
755
850
continue
756
851
}
757
- cpus , _ := takeByTopologyNUMAPacked (acc .topo , acc .details .CPUsInNUMANodes (numa ), cpuGroupSize )
852
+ cpus , _ := takeByTopologyNUMAPacked (acc .topo , acc .details .CPUsInNUMANodes (numa ), cpuGroupSize , false )
758
853
acc .take (cpus )
759
854
remainder -= cpuGroupSize
760
855
}
@@ -778,5 +873,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
778
873
779
874
// If we never found a combination of NUMA nodes that we could properly
780
875
// distribute CPUs across, fall back to the packing algorithm.
781
- return takeByTopologyNUMAPacked (topo , availableCPUs , numCPUs )
876
+ return takeByTopologyNUMAPacked (topo , availableCPUs , numCPUs , false )
782
877
}
0 commit comments