Skip to content

Commit 788b85c

Browse files
Laure-diGnoaleyfodilremyleone
authored
feat(inference): migration v1 and support BYOM (#3048)
* feat(inference): migration deployment to v1 * feat(inference): BYOM support * add sweeper * manage id from regional or not * remove error_message * fix linter * last cassette * fix documentation * fix documentation lint * remove comment * Update docs/resources/inference_custom_model.md Co-authored-by: Guillaume Noale <[email protected]> * change model_id format * use of dsf.locality * ResourceCustomModelDelete return right err and testAccCheckCustomModelExists return err * fix(doc): add import part and fix typo * fix(doc): deployment required attribute * fix(inference): use of existing function cast * Update docs/resources/inference_custom_model.md Co-authored-by: Yacine Fodil <[email protected]> * skip tests until further notice * activate tests * fix(inference): rename resource from custom_model to model * update sdk-go * remove unecessary file * fix(doc): put real URL and more context * add support model data-source * testing * add test * update doc and tests * fix linter * fix linter * remove custom reference * update cassette * update cassette deployment with datasource --------- Co-authored-by: Guillaume Noale <[email protected]> Co-authored-by: Yacine Fodil <[email protected]> Co-authored-by: Rémy Léone <[email protected]>
1 parent d2789d8 commit 788b85c

21 files changed

+5989
-683
lines changed

docs/data-sources/inference_model.md

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
---
2+
subcategory: "inference"
3+
page_title: "Scaleway: scaleway_inference_model"
4+
---
5+
6+
# scaleway_inference_model
7+
8+
The `scaleway_inference_model` data source allows you to retrieve information about an inference model available in the Scaleway Inference API, either by providing the model's `name` or its `model_id`.
9+
10+
## Example Usage
11+
12+
### Basic
13+
14+
```hcl
15+
data "scaleway_inference_model" "my_model" {
16+
name = "meta/llama-3.1-8b-instruct:fp8"
17+
}
18+
```
19+
20+
## Argument Reference
21+
22+
You must provide either name or model_id, but not both.
23+
24+
- `name` (Optional, Conflicts with model_id) The fully qualified name of the model to look up (e.g., "meta/llama-3.1-8b-instruct:fp8"). The provider will search for a model with an exact name match in the selected region and project.
25+
- `model_id` (Optional, Conflicts with name) The ID of the model to retrieve. Must be a valid UUID with locality (i.e., Scaleway's zoned UUID format).
26+
- `project_id` (Optional) The project ID to use when listing models. If not provided, the provider default project is used.
27+
- `region` (Optional) The region where the model is hosted. If not set, the provider default region is used.
28+
29+
## Attributes Reference
30+
31+
In addition to the input arguments above, the following attributes are exported:
32+
33+
- `id` - The unique identifier of the model.
34+
- `tags` - Tags associated with the model.
35+
- `status` - The current status of the model (e.g., ready, error, etc.).
36+
- `description` - A textual description of the model (if available).
37+
- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
38+
- `parameter_size_bits` - Size, in bits, of the model parameters.
39+
- `size_bytes` - Total size, in bytes, of the model archive.
40+
- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
41+
- `node_type_name` - The type of node supported.
42+
- `quantization` - A list of supported quantization options, including:
43+
- `quantization_bits` - Number of bits used for quantization (e.g., 8, 16).
44+
- `allowed` - Whether this quantization is allowed.
45+
- `max_context_size` - Maximum context length supported by this quantization.

docs/resources/inference_deployment.md

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ For more information, see the [API documentation](https://www.scaleway.com/en/de
1313
### Basic
1414

1515
```terraform
16+
data "scaleway_inference_model" "my_model" {
17+
name = "meta/llama-3.1-8b-instruct:fp8"
18+
}
19+
1620
resource "scaleway_inference_deployment" "deployment" {
1721
name = "tf-inference-deployment"
1822
node_type = "L4"
19-
model_name = "meta/llama-3.1-8b-instruct:fp8"
23+
model_name = data.scaleway_inference_model.my_model.id
2024
public_endpoint {
2125
is_enabled = true
2226
}
@@ -26,7 +30,7 @@ resource "scaleway_inference_deployment" "deployment" {
2630

2731
## Argument Reference
2832

29-
- `model_name` - (Required) The model name to use for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
33+
- `model_id` - (Required) The model id used for the deployment.
3034
- `node_type` - (Required) The node type to use for the deployment. Node types can be found using Scaleway's CLI (`scw inference node-type list`)
3135
- `name` - (Optional) The deployment name.
3236
- `accept_eula` - (Optional) Some models (e.g Meta Llama) require end-user license agreements. Set `true` to accept.
@@ -48,7 +52,7 @@ resource "scaleway_inference_deployment" "deployment" {
4852
In addition to all arguments above, the following attributes are exported:
4953

5054
- `id` - The ID of the deployment.
51-
- `model_id` - The model id used for the deployment.
55+
- `model_name` - The model name used for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
5256
- `size` - The size of the pool.
5357
- `status` - The status of the deployment.
5458
- `created_at` - The date and time of the creation of the deployment.

docs/resources/inference_model.md

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
---
2+
subcategory: "Inference"
3+
page_title: "Scaleway: scaleway_inference_model"
4+
---
5+
6+
# Resource: scaleway_inference_model
7+
8+
The scaleway_inference_model resource allows you to upload and manage inference models in the Scaleway Inference ecosystem. Once registered, a model can be used in any scaleway_inference_deployment resource.
9+
10+
## Example Usage
11+
12+
### Basic
13+
14+
```terraform
15+
resource "scaleway_inference_model" "test" {
16+
name = "my-awesome-model"
17+
url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
18+
secret = "my-secret-token"
19+
}
20+
```
21+
22+
### Deploy your own model on your managed inference
23+
24+
```terraform
25+
resource "scaleway_inference_model" "my_model" {
26+
name = "my-awesome-model"
27+
url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
28+
secret = "my-secret-token"
29+
}
30+
31+
resource "scaleway_inference_deployment" "my_deployment" {
32+
name = "test-inference-deployment-basic"
33+
node_type = "H100" # replace with your node type
34+
model_id = scaleway_inference_model.my_model.id
35+
36+
public_endpoint {
37+
is_enabled = true
38+
}
39+
40+
accept_eula = true
41+
}
42+
```
43+
44+
## Argument Reference
45+
46+
- `name` - (Required) The name of the model. This must be unique within the project.
47+
- `url` - (Required) The HTTPS source URL from which the model will be downloaded. This is typically a Hugging Face repository URL (e.g., https://huggingface.co/agentica-org/DeepCoder-14B-Preview). The URL must be publicly accessible or require valid credentials via `secret`
48+
- `secret` - (Optional, Sensitive) Authentication token used to pull the model from a private or gated URL (e.g., a Hugging Face access token with read permission).
49+
- `region` - (Defaults to [provider](../index.md#region) `region`) The [region](../guides/regions_and_zones.md#regions) in which the deployment is created.
50+
- `project_id` - (Defaults to [provider](../index.md#project_id) `project_id`) The ID of the project the deployment is associated with.
51+
52+
## Attributes Reference
53+
54+
In addition to all arguments above, the following attributes are exported:
55+
56+
- `id` - The unique identifier of the model.
57+
- `tags` - Tags associated with the model.
58+
- `status` - The current status of the model (e.g., ready, error, etc.).
59+
- `description` - A textual description of the model (if available).
60+
- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
61+
- `parameter_size_bits` - Size, in bits, of the model parameters.
62+
- `size_bytes` - Total size, in bytes, of the model archive.
63+
- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
64+
- `node_type_name` - The type of node supported.
65+
- `quantization` - A list of supported quantization options, including:
66+
- `quantization_bits` - Number of bits used for quantization (e.g., 8, 16).
67+
- `allowed` - Whether this quantization is allowed.
68+
- `max_context_size` - Maximum context length supported by this quantization.
69+
70+
## Import
71+
72+
Models can be imported using, `{region}/{id}`, as shown below:
73+
74+
```bash
75+
terraform import scaleway_inference_model.my_model fr-par/11111111-1111-1111-1111-111111111111
76+
```

internal/provider/provider.go

+2
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ func Provider(config *Config) plugin.ProviderFunc {
167167
"scaleway_iam_ssh_key": iam.ResourceSSKKey(),
168168
"scaleway_iam_user": iam.ResourceUser(),
169169
"scaleway_inference_deployment": inference.ResourceDeployment(),
170+
"scaleway_inference_model": inference.ResourceModel(),
170171
"scaleway_instance_image": instance.ResourceImage(),
171172
"scaleway_instance_ip": instance.ResourceIP(),
172173
"scaleway_instance_ip_reverse_dns": instance.ResourceIPReverseDNS(),
@@ -273,6 +274,7 @@ func Provider(config *Config) plugin.ProviderFunc {
273274
"scaleway_iam_ssh_key": iam.DataSourceSSHKey(),
274275
"scaleway_iam_user": iam.DataSourceUser(),
275276
"scaleway_iam_api_key": iam.DataSourceAPIKey(),
277+
"scaleway_inference_model": inference.DataSourceModel(),
276278
"scaleway_instance_image": instance.DataSourceImage(),
277279
"scaleway_instance_ip": instance.DataSourceIP(),
278280
"scaleway_instance_placement_group": instance.DataSourcePlacementGroup(),

internal/services/inference/deployment.go

+35-19
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import (
66
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
77
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
88
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
9-
inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1"
9+
"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
1010
"github.com/scaleway/scaleway-sdk-go/scw"
11+
"github.com/scaleway/terraform-provider-scaleway/v2/internal/dsf"
1112
"github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors"
13+
"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality"
1214
"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional"
1315
"github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account"
1416
"github.com/scaleway/terraform-provider-scaleway/v2/internal/types"
@@ -43,17 +45,20 @@ func ResourceDeployment() *schema.Resource {
4345
"node_type": {
4446
Type: schema.TypeString,
4547
Required: true,
48+
ForceNew: true,
4649
Description: "The node type to use for the deployment",
4750
},
4851
"model_name": {
4952
Type: schema.TypeString,
50-
Required: true,
53+
Computed: true,
5154
Description: "The model name to use for the deployment",
5255
},
5356
"model_id": {
54-
Type: schema.TypeString,
55-
Computed: true,
56-
Description: "The model id used for the deployment",
57+
Type: schema.TypeString,
58+
Required: true,
59+
Description: "The model id used for the deployment",
60+
ForceNew: true,
61+
DiffSuppressFunc: dsf.Locality,
5762
},
5863
"accept_eula": {
5964
Type: schema.TypeBool,
@@ -70,16 +75,21 @@ func ResourceDeployment() *schema.Resource {
7075
"min_size": {
7176
Type: schema.TypeInt,
7277
Optional: true,
73-
Computed: true,
7478
Description: "The minimum size of the pool",
7579
ValidateFunc: validation.IntAtLeast(1),
80+
Default: 1,
7681
},
7782
"max_size": {
7883
Type: schema.TypeInt,
7984
Optional: true,
80-
Computed: true,
8185
Description: "The maximum size of the pool",
8286
ValidateFunc: validation.IntAtLeast(1),
87+
Default: 1,
88+
},
89+
"quantization": {
90+
Type: schema.TypeInt,
91+
Optional: true,
92+
Description: "The number of bits each model parameter should be quantized to",
8393
},
8494
"size": {
8595
Type: schema.TypeInt,
@@ -178,13 +188,13 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int
178188
}
179189

180190
req := &inference.CreateDeploymentRequest{
181-
Region: region,
182-
ProjectID: d.Get("project_id").(string),
183-
Name: d.Get("name").(string),
184-
NodeType: d.Get("node_type").(string),
185-
ModelName: d.Get("model_name").(string),
186-
Tags: types.ExpandStrings(d.Get("tags")),
187-
Endpoints: buildEndpoints(d),
191+
Region: region,
192+
ProjectID: d.Get("project_id").(string),
193+
Name: d.Get("name").(string),
194+
NodeTypeName: d.Get("node_type").(string),
195+
ModelID: locality.ExpandID(d.Get("model_id").(string)),
196+
Tags: types.ExpandStrings(d.Get("tags")),
197+
Endpoints: buildEndpoints(d),
188198
}
189199

190200
if isAcceptingEula, ok := d.GetOk("accept_eula"); ok {
@@ -199,6 +209,12 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int
199209
req.MaxSize = scw.Uint32Ptr(uint32(maxSize.(int)))
200210
}
201211

212+
if quantization, ok := d.GetOk("quantization"); ok {
213+
req.Quantization = &inference.DeploymentQuantization{
214+
Bits: uint32(quantization.(int)),
215+
}
216+
}
217+
202218
deployment, err := api.CreateDeployment(req, scw.WithContext(ctx))
203219
if err != nil {
204220
return diag.FromErr(err)
@@ -221,8 +237,8 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec {
221237
publicEndpointMap := publicEndpoint.([]interface{})[0].(map[string]interface{})
222238
if publicEndpointMap["is_enabled"].(bool) {
223239
publicEp := inference.EndpointSpec{
224-
Public: &inference.EndpointSpecPublic{},
225-
DisableAuth: publicEndpointMap["disable_auth"].(bool),
240+
PublicNetwork: &inference.EndpointPublicNetworkDetails{},
241+
DisableAuth: publicEndpointMap["disable_auth"].(bool),
226242
}
227243
endpoints = append(endpoints, &publicEp)
228244
}
@@ -232,7 +248,7 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec {
232248
privateEndpointMap := privateEndpoint.([]interface{})[0].(map[string]interface{})
233249
if privateID, exists := privateEndpointMap["private_network_id"]; exists {
234250
privateEp := inference.EndpointSpec{
235-
PrivateNetwork: &inference.EndpointSpecPrivateNetwork{
251+
PrivateNetwork: &inference.EndpointPrivateNetworkDetails{
236252
PrivateNetworkID: regional.ExpandID(privateID.(string)).ID,
237253
},
238254
DisableAuth: privateEndpointMap["disable_auth"].(bool),
@@ -264,7 +280,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter
264280
_ = d.Set("name", deployment.Name)
265281
_ = d.Set("region", deployment.Region)
266282
_ = d.Set("project_id", deployment.ProjectID)
267-
_ = d.Set("node_type", deployment.NodeType)
283+
_ = d.Set("node_type", deployment.NodeTypeName)
268284
_ = d.Set("model_name", deployment.ModelName)
269285
_ = d.Set("min_size", int(deployment.MinSize))
270286
_ = d.Set("max_size", int(deployment.MaxSize))
@@ -290,7 +306,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter
290306
privateEndpoints = append(privateEndpoints, privateEndpointSpec)
291307
}
292308

293-
if endpoint.PublicAccess != nil {
309+
if endpoint.PublicNetwork != nil {
294310
publicEndpointSpec := map[string]interface{}{
295311
"id": endpoint.ID,
296312
"is_enabled": true,

0 commit comments

Comments
 (0)