Skip to content

Commit 9c03172

Browse files
committed
add support llm-inference cli
1 parent 20a81a5 commit 9c03172

19 files changed

+928
-3
lines changed

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ require (
2424
github.com/moby/buildkit v0.11.6
2525
github.com/opencontainers/go-digest v1.0.0
2626
github.com/pkg/errors v0.9.1
27-
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25.0.20240328145934-63d5da1a64df
27+
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25.0.20240329105646-c0ed2d3750f9
2828
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
2929
github.com/spf13/cobra v1.8.0
3030
github.com/spf13/pflag v1.0.5

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,8 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
425425
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
426426
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
427427
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
428-
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25.0.20240328145934-63d5da1a64df h1:6GfqcKcUj896FRN7j5BnhL5+mtocxlsTQMqiqhOAn00=
429-
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25.0.20240328145934-63d5da1a64df/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg=
428+
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25.0.20240329105646-c0ed2d3750f9 h1:ytq3dM17GsFlK8REuRMMr0+pwqj84824pXLw8m1ruW8=
429+
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25.0.20240329105646-c0ed2d3750f9/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg=
430430
github.com/sclevine/spec v1.4.0 h1:z/Q9idDcay5m5irkZ28M7PtQM4aOISzOpj4bUPkDee8=
431431
github.com/sclevine/spec v1.4.0/go.mod h1:LvpgJaFyvQzRvc1kaDs0bulYwzC70PbiYjC4QnFHkOM=
432432
github.com/secure-systems-lab/go-securesystemslib v0.4.0 h1:b23VGrQhTA8cN2CbBw7/FulN9fTtqYUdS5+Oxzt+DUE=

internal/namespaces/get_commands.go

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"github.com/scaleway/scaleway-cli/v2/internal/namespaces/vpc/v2"
4343
"github.com/scaleway/scaleway-cli/v2/internal/namespaces/vpcgw/v1"
4444
webhosting "github.com/scaleway/scaleway-cli/v2/internal/namespaces/webhosting/v1alpha1"
45+
llm_inference "github.com/scaleway/scaleway-cli/v2/internal/namespaces/llm_inference/v1beta1"
4546
)
4647

4748
// Enable beta in the code when products are in beta
@@ -94,6 +95,7 @@ func GetCommands() *core.Commands {
9495
ipam.GetCommands(),
9596
jobs.GetCommands(),
9697
serverless_sqldb.GetCommands(),
98+
llm_inference.GetCommands(),
9799
)
98100

99101
//if beta {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package llm_inference
2+
3+
import (
4+
llm_inference "github.com/scaleway/scaleway-sdk-go/api/llm_inference/v1beta1"
5+
"github.com/scaleway/scaleway-cli/v2/internal/human"
6+
"github.com/scaleway/scaleway-cli/v2/internal/core"
7+
)
8+
9+
func GetCommands() *core.Commands {
10+
cmds := GetGeneratedCommands()
11+
12+
human.RegisterMarshalerFunc(llm_inference.DeploymentStatus(""), human.EnumMarshalFunc(deployementStateMarshalSpecs))
13+
14+
human.RegisterMarshalerFunc(llm_inference.Deployment{}, DeploymentMarshalerFunc)
15+
human.RegisterMarshalerFunc([]*llm_inference.Model{}, ListModelMarshalerFunc)
16+
17+
cmds.MustFind("llm-inference", "deployment", "create").Override(deploymentCreateBuilder)
18+
cmds.MustFind("llm-inference", "deployment", "delete").Override(deploymentDeleteBuilder)
19+
cmds.MustFind("llm-inference", "endpoint", "create").Override(endpointCreateBuilder)
20+
21+
return cmds
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package llm_inference
2+
3+
import (
4+
"strings"
5+
llm_inference "github.com/scaleway/scaleway-sdk-go/api/llm_inference/v1beta1"
6+
"time"
7+
"github.com/scaleway/scaleway-cli/v2/internal/human"
8+
"github.com/scaleway/scaleway-cli/v2/internal/core"
9+
"reflect"
10+
"github.com/scaleway/scaleway-sdk-go/scw"
11+
"context"
12+
"github.com/fatih/color"
13+
"net/http"
14+
"errors"
15+
)
16+
17+
const (
18+
deploymentActionTimeout = 40 * time.Minute
19+
)
20+
21+
var (
22+
deployementStateMarshalSpecs = human.EnumMarshalSpecs{
23+
llm_inference.DeploymentStatusCreating: &human.EnumMarshalSpec{Attribute: color.FgBlue},
24+
llm_inference.DeploymentStatusDeploying: &human.EnumMarshalSpec{Attribute: color.FgBlue},
25+
llm_inference.DeploymentStatusDeleting: &human.EnumMarshalSpec{Attribute: color.FgBlue},
26+
llm_inference.DeploymentStatusError: &human.EnumMarshalSpec{Attribute: color.FgRed},
27+
llm_inference.DeploymentStatusReady: &human.EnumMarshalSpec{Attribute: color.FgGreen},
28+
llm_inference.DeploymentStatusLocked: &human.EnumMarshalSpec{Attribute: color.FgRed},
29+
}
30+
)
31+
32+
func DeploymentMarshalerFunc(i interface{}, opt *human.MarshalOpt) (string, error) {
33+
type tmp llm_inference.Deployment
34+
deployment := tmp(i.(llm_inference.Deployment))
35+
opt.Sections = []*human.MarshalSection{
36+
{
37+
FieldName: "Endpoints",
38+
Title: "Endpoints",
39+
},
40+
}
41+
str, err := human.Marshal(deployment, opt)
42+
if err != nil {
43+
return "", err
44+
}
45+
return str, nil
46+
}
47+
48+
func deploymentCreateBuilder(c *core.Command) *core.Command {
49+
c.ArgSpecs.GetByName("node-type").AutoCompleteFunc = autocompleteDeploymentNodeType
50+
type llmInferenceEndpointSpecCustom struct {
51+
*llm_inference.EndpointSpec
52+
IsPublic bool `json:"is-public"`
53+
}
54+
55+
type llmInferenceCreateDeploymentRequestCustom struct {
56+
*llm_inference.CreateDeploymentRequest
57+
Endpoints []*llmInferenceEndpointSpecCustom `json:"endpoints"`
58+
}
59+
60+
c.ArgSpecs.AddBefore("endpoints.{index}.private-network.private-network-id", &core.ArgSpec{
61+
Name: "endpoints.{index}.is-public",
62+
Short: "Will configure your public endpoint if true",
63+
Required: false,
64+
Default: core.DefaultValueSetter("false"),
65+
})
66+
67+
c.ArgsType = reflect.TypeOf(llmInferenceCreateDeploymentRequestCustom{})
68+
69+
c.WaitFunc = func(ctx context.Context, argsI, respI interface{}) (interface{}, error) {
70+
api := llm_inference.NewAPI(core.ExtractClient(ctx))
71+
return api.WaitForDeployment(&llm_inference.WaitForDeploymentRequest{
72+
DeploymentId: respI.(*llm_inference.Deployment).ID,
73+
Region: respI.(*llm_inference.Deployment).Region,
74+
Status: respI.(*llm_inference.Deployment).Status,
75+
Timeout: scw.TimeDurationPtr(deploymentActionTimeout),
76+
RetryInterval: core.DefaultRetryInterval,
77+
})
78+
}
79+
c.Interceptor = func(ctx context.Context, argsI interface{}, runner core.CommandRunner) (interface{}, error) {
80+
deploymentCreateCustomRequest := argsI.(*llmInferenceCreateDeploymentRequestCustom)
81+
deploymentRequest := deploymentCreateCustomRequest.CreateDeploymentRequest
82+
if deploymentCreateCustomRequest.Endpoints == nil {
83+
publicEndpoint := &llm_inference.EndpointSpecPublic{}
84+
endpoint := llm_inference.EndpointSpec{
85+
Public: publicEndpoint,
86+
PrivateNetwork: nil,
87+
DisableAuth: false,
88+
}
89+
deploymentRequest.Endpoints = append(deploymentRequest.Endpoints, &endpoint)
90+
return runner(ctx, deploymentRequest)
91+
}
92+
for _, endpoint := range deploymentCreateCustomRequest.Endpoints {
93+
publicEndpoint := &llm_inference.EndpointSpecPublic{}
94+
if !endpoint.IsPublic {
95+
publicEndpoint = nil
96+
}
97+
privateNetwork := &llm_inference.EndpointSpecPrivateNetwork{}
98+
if endpoint.EndpointSpec == nil {
99+
privateNetwork = nil
100+
} else {
101+
privateNetwork.PrivateNetworkID = endpoint.PrivateNetwork.PrivateNetworkID
102+
}
103+
endpoint := llm_inference.EndpointSpec{
104+
Public: publicEndpoint,
105+
PrivateNetwork: privateNetwork,
106+
DisableAuth: endpoint.DisableAuth,
107+
}
108+
deploymentRequest.Endpoints = append(deploymentRequest.Endpoints, &endpoint)
109+
}
110+
111+
return runner(ctx, deploymentRequest)
112+
}
113+
114+
return c
115+
}
116+
117+
func deploymentDeleteBuilder(c *core.Command) *core.Command {
118+
c.WaitFunc = func(ctx context.Context, argsI, respI interface{}) (interface{}, error) {
119+
api := llm_inference.NewAPI(core.ExtractClient(ctx))
120+
deployment, err := api.WaitForDeployment(&llm_inference.WaitForDeploymentRequest{
121+
DeploymentId: respI.(*llm_inference.Deployment).ID,
122+
Region: respI.(*llm_inference.Deployment).Region,
123+
Status: respI.(*llm_inference.Deployment).Status,
124+
Timeout: scw.TimeDurationPtr(deploymentActionTimeout),
125+
RetryInterval: core.DefaultRetryInterval,
126+
})
127+
if err != nil {
128+
notFoundError := &scw.ResourceNotFoundError{}
129+
responseError := &scw.ResponseError{}
130+
if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound || errors.As(err, &notFoundError) {
131+
return &core.SuccessResult{
132+
Resource: "deployment",
133+
Verb: "delete",
134+
}, nil
135+
}
136+
return nil, err
137+
}
138+
return deployment, nil
139+
}
140+
return c
141+
}
142+
143+
var completeListNodeTypesCache *llm_inference.ListNodeTypesResponse
144+
145+
func autocompleteDeploymentNodeType(ctx context.Context, prefix string, request any) core.AutocompleteSuggestions {
146+
req := request.(*llm_inference.CreateDeploymentRequest)
147+
suggestions := core.AutocompleteSuggestions(nil)
148+
149+
client := core.ExtractClient(ctx)
150+
api := llm_inference.NewAPI(client)
151+
152+
if completeListNodeTypesCache == nil {
153+
res, err := api.ListNodeTypes(&llm_inference.ListNodeTypesRequest{
154+
Region: req.Region,
155+
})
156+
if err != nil {
157+
return nil
158+
}
159+
completeListNodeTypesCache = res
160+
}
161+
162+
for _, nodeType := range completeListNodeTypesCache.NodeTypes {
163+
if strings.HasPrefix(nodeType.Name, prefix) {
164+
suggestions = append(suggestions, nodeType.Name)
165+
}
166+
}
167+
168+
return suggestions
169+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package llm_inference
2+
3+
import (
4+
"github.com/scaleway/scaleway-cli/v2/internal/core"
5+
"testing"
6+
"github.com/scaleway/scaleway-cli/v2/internal/namespaces/vpc/v2"
7+
)
8+
9+
func Test_DeploymentCreate(t *testing.T) {
10+
cmds := GetCommands()
11+
12+
t.Run("Single public endpoint", core.Test(&core.TestConfig{
13+
Commands: cmds,
14+
Cmd: "scw llm-inference deployment create node-type=H100 model-name=wizardlm/wizardlm-70b-v1.0:fp8 accept-eula=true",
15+
Check: core.TestCheckGolden(),
16+
AfterFunc: core.ExecAfterCmd("scw llm-inference deployment delete deployment-id={{ .CmdResult.ID }}"),
17+
}))
18+
t.Run("Deployment with wait flag", core.Test(&core.TestConfig{
19+
Commands: cmds,
20+
Cmd: "scw llm-inference deployment create model-name=meta/llama-2-7b-chat:fp16 node-type=L4 accept-eula=true -w",
21+
Check: core.TestCheckGolden(),
22+
AfterFunc: core.ExecAfterCmd("scw llm-inference deployment delete deployment-id={{ .CmdResult.ID }}"),
23+
}))
24+
}
25+
26+
func Test_CreateDeploymentPrivateEndpoint(t *testing.T) {
27+
cmds := GetCommands()
28+
cmds.Merge(vpc.GetCommands())
29+
t.Skip("Out of stock")
30+
t.Run("Create Deployment Private Endpoint", core.Test(&core.TestConfig{
31+
Commands: cmds,
32+
BeforeFunc: createPN(),
33+
Cmd: "scw llm-inference deployment create model-name=meta/llama-2-7b-chat:fp16 node-type=L4 accept-eula=true endpoints.0.private-network.private-network-id={{ .PN.ID }}",
34+
Check: core.TestCheckCombine(
35+
core.TestCheckGolden(),
36+
),
37+
AfterFunc: core.AfterFuncCombine(
38+
core.ExecAfterCmd("scw llm-inference endpoint delete {{ .CmdResult.ID }}"),
39+
deletePrivateNetwork(),
40+
deleteDeployment(),
41+
),
42+
}))
43+
}
44+
45+
func Test_DeploymentDelete(t *testing.T) {
46+
cmds := GetCommands()
47+
48+
t.Run("Delete deployment with wait flag", core.Test(&core.TestConfig{
49+
Commands: cmds,
50+
BeforeFunc: createDeploymentPublicEndpoint(),
51+
Cmd: "scw llm-inference deployment delete deployment-id={{ .DEPLOYMENT.ID }} -w",
52+
Check: core.TestCheckGolden(),
53+
}))
54+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package llm_inference
2+
3+
import (
4+
"github.com/scaleway/scaleway-cli/v2/internal/core"
5+
"reflect"
6+
llm_inference "github.com/scaleway/scaleway-sdk-go/api/llm_inference/v1beta1"
7+
"context"
8+
)
9+
10+
func endpointCreateBuilder(c *core.Command) *core.Command {
11+
type llmInferenceEndpointSpecCustom struct {
12+
*llm_inference.EndpointSpec
13+
IsPublic bool `json:"is-public"`
14+
}
15+
16+
type createEndpointRequestCustom struct {
17+
*llm_inference.CreateEndpointRequest
18+
Endpoint *llmInferenceEndpointSpecCustom `json:"endpoint"`
19+
}
20+
21+
c.ArgSpecs.AddBefore("endpoint.private-network.private-network-id", &core.ArgSpec{
22+
Name: "endpoint.is-public",
23+
Short: "Will configure your public endpoint if true",
24+
Required: false,
25+
Default: core.DefaultValueSetter("false"),
26+
})
27+
28+
c.ArgsType = reflect.TypeOf(createEndpointRequestCustom{})
29+
30+
c.Interceptor = func(ctx context.Context, argsI interface{}, runner core.CommandRunner) (interface{}, error) {
31+
createEndpointCustomRequest := argsI.(*createEndpointRequestCustom)
32+
createEndpointreq := createEndpointCustomRequest.CreateEndpointRequest
33+
endpoint := createEndpointCustomRequest.Endpoint
34+
if endpoint.IsPublic {
35+
publicEndpoint := &llm_inference.EndpointSpecPublic{}
36+
endpointToCreate := llm_inference.EndpointSpec{
37+
Public: publicEndpoint,
38+
PrivateNetwork: nil,
39+
DisableAuth: endpoint.DisableAuth,
40+
}
41+
createEndpointreq.Endpoint = &endpointToCreate
42+
}
43+
return runner(ctx, createEndpointreq)
44+
}
45+
return c
46+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package llm_inference
2+
3+
import (
4+
"github.com/scaleway/scaleway-cli/v2/internal/core"
5+
"github.com/scaleway/scaleway-cli/v2/internal/namespaces/vpc/v2"
6+
"testing"
7+
)
8+
9+
func Test_createEndpoint(t *testing.T) {
10+
cmds := GetCommands()
11+
cmds.Merge(vpc.GetCommands())
12+
13+
t.Skip("No stock to run test")
14+
15+
t.Run("Create Private Endpoint", core.Test(&core.TestConfig{
16+
Commands: cmds,
17+
BeforeFunc: core.BeforeFuncCombine(
18+
createPN(),
19+
createDeploymentPublicEndpoint(),
20+
),
21+
Cmd: "scw llm-inference endpoint create deployment-id={{ .DEPLOYMENT.ID }} endpoint.private-network.private-network-id={{ .PN.ID }}",
22+
Check: core.TestCheckCombine(
23+
core.TestCheckGolden(),
24+
),
25+
AfterFunc: core.AfterFuncCombine(
26+
core.ExecAfterCmd("scw llm-inference endpoint delete {{ .CmdResult.ID }}"),
27+
deletePrivateNetwork(),
28+
deleteDeployment(),
29+
),
30+
}))
31+
32+
t.Run("Create Public Endpoint", core.Test(&core.TestConfig{
33+
Commands: cmds,
34+
BeforeFunc: core.BeforeFuncCombine(
35+
createPN(),
36+
createDeploymentPrivateEndpoint(),
37+
),
38+
Cmd: "scw llm-inference endpoint create deployment-id={{ .DEPLOYMENT.ID }} endpoint.is-public=true",
39+
Check: core.TestCheckCombine(
40+
core.TestCheckGolden(),
41+
),
42+
AfterFunc: core.AfterFuncCombine(
43+
core.ExecAfterCmd("scw llm-inference endpoint delete {{ .CmdResult.ID }}"),
44+
deletePrivateNetwork(),
45+
deleteDeployment(),
46+
),
47+
}))
48+
}

0 commit comments

Comments
 (0)