Skip to content

Commit 05f37ad

Browse files
committed
initial sketching of interfacing
1 parent 77f8564 commit 05f37ad

File tree

4 files changed

+165
-2
lines changed

4 files changed

+165
-2
lines changed

docs/proposals/0683-epp-architecture-proposal/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,17 @@ Due to the possibility of this becoming a bit of a dumping ground. The API will
8686

8787
The flow controller will consume resource regime data, and enforce proper resource sharing between workloads. This will primarily be done through a queuing mechanism [as described here](https://docs.google.com/document/d/1VZL7opFWuwgWquvgiOzLlXAJ633qZ9U-A0ZixGjBgaI/edit?usp=sharing).
8888

89-
#### Scheduling Layer
89+
#### Scheduling Subsystem
9090

91-
As the Scheduling Layer is the final interface to the entirety of the pool, all configuration will be at the _pool_ level. The default scheduling layer will be an experimentally-backed LB algorithm, with exposed config values.
91+
The Scheduling Subsystem is intended to be
92+
93+
As the Scheduling is the final interface to the entirety of the pool, all configuration will be at the _pool_ level. The default scheduling layer will be an experimentally-backed LB algorithm, with exposed config values.
9294

9395
The Scheduler will define a strong interface API, so that new scheduling algos may be plugged & dark-launched to test in production traffic without impacting said traffic. Extension is expected to adhere to the [Scheduler Subsystem definition](https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/603)
9496

97+
98+
<img src="./images/epp_arch.svg" alt="Scheduling Algorithm" width="1000" />
99+
95100
### `Non-extensible`
96101

97102
#### Ext-Proc Server
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#names are egregiously long, but attempting to descibe custom logic within a name
2+
profileSelection: disagg-token-length
3+
schedulingResult: log-shadowbox-label-pd-result
4+
profiles:
5+
prefill:
6+
preschedule:
7+
- decode-prefix-cache-check
8+
filter:
9+
- is-prefill
10+
- has-required-accelerator
11+
score:
12+
- prefix-cache: 3
13+
- latency-scorer: 2
14+
selection:
15+
- best-score
16+
postschedule:
17+
- log-full-scores
18+
decode:
19+
filter:
20+
- is-decode
21+
score:
22+
- prefix-cache: 3
23+
- kv-cache-util: 5
24+
selection:
25+
- random-top-3
26+
shadowbox-decode:
27+
filter:
28+
- is-decode
29+
- is-tpu
30+
score:
31+
- prefix-cache-v2: 4
32+
- kv-cache-util: 1
33+
selection:
34+
- random-top-3

docs/proposals/0683-epp-architecture-proposal/images/scheduler_subsystem.svg

Lines changed: 1 addition & 0 deletions
Loading
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package framework
18+
19+
import (
20+
"context"
21+
"sync"
22+
23+
scheduling "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
24+
)
25+
26+
// Plugin is the parent type for all the scheduling framework plugins.
27+
type Plugin interface {
28+
Name() string
29+
}
30+
31+
type Endpoint interface {
32+
GetState() EndpointState
33+
GetScore() float32
34+
SetScore(val float32)
35+
}
36+
37+
type EndpointState struct {
38+
// only need to use a sync.Map if we do not plan on snapshotting data.
39+
storage sync.Map
40+
}
41+
42+
type SchedulingResult struct {
43+
results map[string][]Endpoint
44+
}
45+
46+
type Scheduler interface {
47+
Plugin
48+
// ProfileSelection selects scheduling profiles through the implemented
49+
// logic, and returns a subset of the registered scheduling profiles.
50+
ProfileSelection() map[string]SchedulingProfile
51+
52+
// SchedulingProfiles lists all of the scheduling profiles registered
53+
// with the scheduler.
54+
SchedulingProfiles() map[string]SchedulingProfile
55+
56+
// SchedulingResult takes the output of the result(s) of the scheduling cycle(s)
57+
// and makes sense of the data to be consumed by request control.
58+
// For example: suppose you have 2 profiles ShadowBoxing Profile & Production Profile.
59+
// SchedulingResult would know to simply log the result of ShadowBoxing
60+
// profile, and do nothing else with it.
61+
SchedulingResult(map[string][]Endpoint) SchedulingResult
62+
}
63+
64+
// SchedulingProfile is an interface to used to describe a profile that will
65+
// run for a given scheduling cycle.
66+
type SchedulingProfile interface {
67+
Plugin
68+
// PreSchedulePlugins are optional, and will be ran at the start of a
69+
// scheduling cycle. This should be scoped to any foundational work needed
70+
// that is custom to this scheduling profile.
71+
PreSchedulePlugins() []PreSchedule
72+
// Filters lists all Filter plugins associated with this Profile. Filters
73+
// are optional.
74+
Filters() []Filter
75+
// Scorers lists all Score plugins associated with this Profile. At
76+
// least 1 scorer must be registered for a profile to be valid.
77+
Scorers() map[Scorer]int
78+
// Selection returns the function that picks the endpoint(s).
79+
Selection() Picker
80+
// PostSchedulePlugins lists all Filter plugins associated with this
81+
// Profile. PostSchedulePlugins are ran after every scheduling cycle,
82+
// and are optional.
83+
PostSchedulePlugins() []PostSchedule
84+
}
85+
86+
// Preschedule will be ran at the start of a scheduling cycle. This should be
87+
// scoped to any foundational work needed that is custom to this scheduling
88+
// profile.
89+
type PreSchedule interface {
90+
Plugin
91+
PreSchedule(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint)
92+
}
93+
94+
// Filter runs before any scoring, and remove endpoints that are not fit for
95+
// selection. The framework will return an error to the client if the endpoints
96+
// are filtered to zero.
97+
type Filter interface {
98+
Plugin
99+
Filter(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
100+
}
101+
102+
// Scorer applies a score to each remaining endpoint provided. Scorers SHOULD
103+
// keep their score values in a normalized range: [0-1]. Any weighting should
104+
// be added at the SchedulingProfile configuration level.
105+
type Scorer interface {
106+
Plugin
107+
Score(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
108+
}
109+
110+
// Picker selects the endpoint(s) from the provided list of scored endpoints.
111+
// Picker MUST return, one endpoint at minimum.
112+
type Picker interface {
113+
Plugin
114+
Selection(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
115+
}
116+
117+
// PostSchedule runs per-scheduling cycle, and is part of a scheduling profile.
118+
// PostSchedule performs any remaining work needed for the scheduling cycle.
119+
// PostSchedule is not expected to change any values of the parameters.
120+
type PostSchedule interface {
121+
Plugin
122+
PostSchedule(ctx context.Context, state scheduling.CycleState, selectedEndpoints []Endpoint)
123+
}

0 commit comments

Comments
 (0)