Skip to content

Commit 210c2f6

Browse files
jvstmepranitnaik43
authored andcommitted
[chore]: Refactor gateway modules (dstackai#2226)
* Split `core.models.gateways` in two * Split `server.services.gateways` in two
1 parent 87f2dc8 commit 210c2f6

File tree

16 files changed

+341
-316
lines changed

16 files changed

+341
-316
lines changed

docs/docs/reference/dstack.yml/service.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ The `service` configuration type allows running [services](../../concepts/servic
1414

1515
=== "OpenAI"
1616

17-
#SCHEMA# dstack._internal.core.models.gateways.OpenAIChatModel
17+
#SCHEMA# dstack.api.OpenAIChatModel
1818
overrides:
1919
show_root_heading: false
2020
type:
@@ -25,7 +25,7 @@ The `service` configuration type allows running [services](../../concepts/servic
2525
> TGI provides an OpenAI-compatible API starting with version 1.4.0,
2626
so models served by TGI can be defined with `format: openai` too.
2727

28-
#SCHEMA# dstack._internal.core.models.gateways.TGIChatModel
28+
#SCHEMA# dstack.api.TGIChatModel
2929
overrides:
3030
show_root_heading: false
3131
type:

src/dstack/_internal/core/models/configurations.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
from dstack._internal.core.models.common import CoreModel, Duration, RegistryAuth
1010
from dstack._internal.core.models.envs import Env
1111
from dstack._internal.core.models.fleets import FleetConfiguration
12-
from dstack._internal.core.models.gateways import AnyModel, GatewayConfiguration, OpenAIChatModel
12+
from dstack._internal.core.models.gateways import GatewayConfiguration
1313
from dstack._internal.core.models.profiles import ProfileParams
1414
from dstack._internal.core.models.repos.base import Repo
1515
from dstack._internal.core.models.repos.virtual import VirtualRepo
1616
from dstack._internal.core.models.resources import Range, ResourcesSpec
17+
from dstack._internal.core.models.services import AnyModel, OpenAIChatModel
1718
from dstack._internal.core.models.unix import UnixUser
1819
from dstack._internal.core.models.volumes import MountPoint, VolumeConfiguration, parse_mount_point
1920

src/dstack/_internal/core/models/gateways.py

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from dstack._internal.core.models.backends.base import BackendType
99
from dstack._internal.core.models.common import CoreModel
1010

11-
# TODO(#1595): refactor into different modules: gateway-specific and proxy-specific
12-
1311

1412
class GatewayStatus(str, Enum):
1513
SUBMITTED = "submitted"
@@ -110,69 +108,3 @@ class GatewayProvisioningData(CoreModel):
110108
availability_zone: Optional[str] = None
111109
hostname: Optional[str] = None
112110
backend_data: Optional[str] = None # backend-specific data in json
113-
114-
115-
class BaseChatModel(CoreModel):
116-
type: Annotated[Literal["chat"], Field(description="The type of the model")] = "chat"
117-
name: Annotated[str, Field(description="The name of the model")]
118-
format: Annotated[
119-
str, Field(description="The serving format. Supported values include `openai` and `tgi`")
120-
]
121-
122-
123-
class TGIChatModel(BaseChatModel):
124-
"""
125-
Mapping of the model for the OpenAI-compatible endpoint.
126-
127-
Attributes:
128-
type (str): The type of the model, e.g. "chat"
129-
name (str): The name of the model. This name will be used both to load model configuration from the HuggingFace Hub and in the OpenAI-compatible endpoint.
130-
format (str): The format of the model, e.g. "tgi" if the model is served with HuggingFace's Text Generation Inference.
131-
chat_template (Optional[str]): The custom prompt template for the model. If not specified, the default prompt template from the HuggingFace Hub configuration will be used.
132-
eos_token (Optional[str]): The custom end of sentence token. If not specified, the default end of sentence token from the HuggingFace Hub configuration will be used.
133-
"""
134-
135-
format: Annotated[
136-
Literal["tgi"], Field(description="The serving format. Must be set to `tgi`")
137-
]
138-
chat_template: Annotated[
139-
Optional[str],
140-
Field(
141-
description=(
142-
"The custom prompt template for the model."
143-
" If not specified, the default prompt template"
144-
" from the HuggingFace Hub configuration will be used"
145-
)
146-
),
147-
] = None # will be set before registering the service
148-
eos_token: Annotated[
149-
Optional[str],
150-
Field(
151-
description=(
152-
"The custom end of sentence token."
153-
" If not specified, the default end of sentence token"
154-
" from the HuggingFace Hub configuration will be used"
155-
)
156-
),
157-
] = None
158-
159-
160-
class OpenAIChatModel(BaseChatModel):
161-
"""
162-
Mapping of the model for the OpenAI-compatible endpoint.
163-
164-
Attributes:
165-
type (str): The type of the model, e.g. "chat"
166-
name (str): The name of the model. This name will be used both to load model configuration from the HuggingFace Hub and in the OpenAI-compatible endpoint.
167-
format (str): The format of the model, i.e. "openai".
168-
prefix (str): The `base_url` prefix: `http://hostname/{prefix}/chat/completions`. Defaults to `/v1`.
169-
"""
170-
171-
format: Annotated[
172-
Literal["openai"], Field(description="The serving format. Must be set to `openai`")
173-
]
174-
prefix: Annotated[str, Field(description="The `base_url` prefix (after hostname)")] = "/v1"
175-
176-
177-
ChatModel = Annotated[Union[TGIChatModel, OpenAIChatModel], Field(discriminator="format")]
178-
AnyModel = Union[ChatModel] # embeddings and etc.
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""
2+
Data structures related to `type: service` runs.
3+
"""
4+
5+
from typing import Optional, Union
6+
7+
from pydantic import Field
8+
from typing_extensions import Annotated, Literal
9+
10+
from dstack._internal.core.models.common import CoreModel
11+
12+
13+
class BaseChatModel(CoreModel):
14+
type: Annotated[Literal["chat"], Field(description="The type of the model")] = "chat"
15+
name: Annotated[str, Field(description="The name of the model")]
16+
format: Annotated[
17+
str, Field(description="The serving format. Supported values include `openai` and `tgi`")
18+
]
19+
20+
21+
class TGIChatModel(BaseChatModel):
22+
"""
23+
Mapping of the model for the OpenAI-compatible endpoint.
24+
25+
Attributes:
26+
type (str): The type of the model, e.g. "chat"
27+
name (str): The name of the model. This name will be used both to load model configuration from the HuggingFace Hub and in the OpenAI-compatible endpoint.
28+
format (str): The format of the model, e.g. "tgi" if the model is served with HuggingFace's Text Generation Inference.
29+
chat_template (Optional[str]): The custom prompt template for the model. If not specified, the default prompt template from the HuggingFace Hub configuration will be used.
30+
eos_token (Optional[str]): The custom end of sentence token. If not specified, the default end of sentence token from the HuggingFace Hub configuration will be used.
31+
"""
32+
33+
format: Annotated[
34+
Literal["tgi"], Field(description="The serving format. Must be set to `tgi`")
35+
]
36+
chat_template: Annotated[
37+
Optional[str],
38+
Field(
39+
description=(
40+
"The custom prompt template for the model."
41+
" If not specified, the default prompt template"
42+
" from the HuggingFace Hub configuration will be used"
43+
)
44+
),
45+
] = None # will be set before registering the service
46+
eos_token: Annotated[
47+
Optional[str],
48+
Field(
49+
description=(
50+
"The custom end of sentence token."
51+
" If not specified, the default end of sentence token"
52+
" from the HuggingFace Hub configuration will be used"
53+
)
54+
),
55+
] = None
56+
57+
58+
class OpenAIChatModel(BaseChatModel):
59+
"""
60+
Mapping of the model for the OpenAI-compatible endpoint.
61+
62+
Attributes:
63+
type (str): The type of the model, e.g. "chat"
64+
name (str): The name of the model. This name will be used both to load model configuration from the HuggingFace Hub and in the OpenAI-compatible endpoint.
65+
format (str): The format of the model, i.e. "openai".
66+
prefix (str): The `base_url` prefix: `http://hostname/{prefix}/chat/completions`. Defaults to `/v1`.
67+
"""
68+
69+
format: Annotated[
70+
Literal["openai"], Field(description="The serving format. Must be set to `openai`")
71+
]
72+
prefix: Annotated[str, Field(description="The `base_url` prefix (after hostname)")] = "/v1"
73+
74+
75+
ChatModel = Annotated[Union[TGIChatModel, OpenAIChatModel], Field(discriminator="format")]
76+
AnyModel = Union[ChatModel] # embeddings and etc.

src/dstack/_internal/server/background/tasks/process_running_jobs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from sqlalchemy.ext.asyncio import AsyncSession
77
from sqlalchemy.orm import joinedload
88

9-
import dstack._internal.server.services.gateways as gateways
109
from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT, DSTACK_SHIM_HTTP_PORT
1110
from dstack._internal.core.errors import GatewayError
1211
from dstack._internal.core.models.backends.base import BackendType
@@ -32,6 +31,7 @@
3231
)
3332
from dstack._internal.server.schemas.runner import TaskStatus
3433
from dstack._internal.server.services import logs as logs_services
34+
from dstack._internal.server.services import services
3535
from dstack._internal.server.services.jobs import (
3636
find_job,
3737
get_job_runtime_data,
@@ -313,7 +313,7 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
313313
and run.run_spec.configuration.type == "service"
314314
):
315315
try:
316-
await gateways.register_replica(session, run_model.gateway_id, run, job_model)
316+
await services.register_replica(session, run_model.gateway_id, run, job_model)
317317
except GatewayError as e:
318318
logger.warning(
319319
"%s: failed to register service replica: %s, age=%s",

src/dstack/_internal/server/background/tasks/process_runs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sqlalchemy.orm import joinedload, selectinload
99

1010
import dstack._internal.server.services.gateways as gateways
11-
import dstack._internal.server.services.gateways.autoscalers as autoscalers
11+
import dstack._internal.server.services.services.autoscalers as autoscalers
1212
from dstack._internal.core.errors import ServerError
1313
from dstack._internal.core.models.profiles import RetryEvent
1414
from dstack._internal.core.models.runs import (

0 commit comments

Comments
 (0)