Skip to content

Commit 90cde8d

Browse files
committed
add xpu support
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci update typos and bug fixes [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci xpu seeding PR1 [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci add seeding for pytorch utilities mp_fabric xpu forking xpu multiprocess pytorch add header for xpu rename change to lightning.pytorch [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Teardown from lightning-xpu (from #PR- 3) From Lightning-AI#3 [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci add torch.xpu.stream to ddp update docs [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci update _LIGHTNING_XPU_AVAILABLE to _lightning_xpu_available correct fabric imports.py 1. remove xpu.py from _graveyard 2. correct _lightning_xpu_available() usage fix _try_import function not defined issue in fabric add docs [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci fix circle import issue update pytorch trainer connector [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci correct usage in multiprocessing Fix precision device [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci update warning format
1 parent 5e0e02b commit 90cde8d

File tree

29 files changed

+451
-62
lines changed

29 files changed

+451
-62
lines changed

docs/source-fabric/fundamentals/launch.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i
9393
itself and are expected to be parsed there.
9494
9595
Options:
96-
--accelerator [cpu|gpu|cuda|mps|tpu]
96+
--accelerator [cpu|gpu|cuda|mps|tpu|xpu]
9797
The hardware accelerator to run on.
98+
Install Lightning-XPU to enable ``xpu``.
9899
--strategy [ddp|dp|deepspeed] Strategy for how to run across multiple
99100
devices.
100101
--devices TEXT Number of devices to run on (``int``), which

docs/source-pytorch/common/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
../advanced/model_parallel
1818
Train on single or multiple GPUs <../accelerators/gpu>
1919
Train on single or multiple HPUs <../integrations/hpu/index>
20+
Train on single or multiple XPUs <../integrations/xpu/index>
2021
Train on single or multiple TPUs <../accelerators/tpu>
2122
Train on MPS <../accelerators/mps>
2223
Use a pretrained model <../advanced/pretrained>
@@ -167,6 +168,13 @@ How-to Guides
167168
:col_css: col-md-4
168169
:height: 180
169170

171+
.. displayitem::
172+
:header: Train on single or multiple XPUs
173+
:description: Train models faster with XPU accelerators
174+
:button_link: ../integrations/xpu/index.html
175+
:col_css: col-md-4
176+
:height: 180
177+
170178
.. displayitem::
171179
:header: Train on single or multiple TPUs
172180
:description: TTrain models faster with TPU accelerators

docs/source-pytorch/common_usecases.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ Customize and extend Lightning for things like custom hardware or distributed st
133133
:button_link: integrations/hpu/index.html
134134
:height: 100
135135

136+
.. displayitem::
137+
:header: Train on single or multiple XPUs
138+
:description: Train models faster with XPUs.
139+
:col_css: col-md-12
140+
:button_link: integrations/xpu/index.html
141+
:height: 100
142+
136143
.. displayitem::
137144
:header: Train on single or multiple TPUs
138145
:description: Train models faster with TPUs.

docs/source-pytorch/conf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ def _load_py_module(name: str, location: str) -> ModuleType:
9393
target_dir="docs/source-pytorch/integrations/hpu",
9494
checkout="refs/tags/1.4.0",
9595
)
96+
assist_local.AssistantCLI.pull_docs_files(
97+
gh_user_repo="Lightning-AI/lightning-XPU",
98+
target_dir="docs/source-pytorch/integrations/xpu",
99+
checkout="tags/1.0.0",
100+
)
96101

97102
# Copy strategies docs as single pages
98103
assist_local.AssistantCLI.pull_docs_files(
@@ -355,6 +360,7 @@ def _load_py_module(name: str, location: str) -> ModuleType:
355360
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
356361
"torchmetrics": ("https://lightning.ai/docs/torchmetrics/stable/", None),
357362
"lightning_habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
363+
"intel-xpu": ("https://lightning-ai.github.io/lightning-XPU/", None),
358364
"tensorboardX": ("https://tensorboardx.readthedocs.io/en/stable/", None),
359365
# needed for referencing App from lightning scope
360366
"lightning.app": ("https://lightning.ai/docs/app/stable/", None),

docs/source-pytorch/extensions/accelerator.rst

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Currently there are accelerators for:
1111
- :doc:`GPU <../accelerators/gpu>`
1212
- :doc:`TPU <../accelerators/tpu>`
1313
- :doc:`HPU <../integrations/hpu/index>`
14+
- :doc:`XPU <../integrations/xpu/index>`
1415
- :doc:`MPS <../accelerators/mps>`
1516

1617
The Accelerator is part of the Strategy which manages communication across multiple devices (distributed communication).
@@ -31,16 +32,16 @@ Create a Custom Accelerator
3132
.. warning:: This is an :ref:`experimental <versioning:Experimental API>` feature.
3233

3334
Here is how you create a new Accelerator.
34-
Let's pretend we want to integrate the fictional XPU accelerator and we have access to its hardware through a library
35-
``xpulib``.
35+
Let's pretend we want to integrate the fictional YPU accelerator and we have access to its hardware through a library
36+
``ypulib``.
3637

3738
.. code-block:: python
3839
39-
import xpulib
40+
import ypulib
4041
4142
42-
class XPUAccelerator(Accelerator):
43-
"""Support for a hypothetical XPU, optimized for large-scale machine learning."""
43+
class YPUAccelerator(Accelerator):
44+
"""Support for a hypothetical YPU, optimized for large-scale machine learning."""
4445
4546
@staticmethod
4647
def parse_devices(devices: Any) -> Any:
@@ -51,29 +52,29 @@ Let's pretend we want to integrate the fictional XPU accelerator and we have acc
5152
@staticmethod
5253
def get_parallel_devices(devices: Any) -> Any:
5354
# Here, convert the device indices to actual device objects
54-
return [torch.device("xpu", idx) for idx in devices]
55+
return [torch.device("ypu", idx) for idx in devices]
5556
5657
@staticmethod
5758
def auto_device_count() -> int:
5859
# Return a value for auto-device selection when `Trainer(devices="auto")`
59-
return xpulib.available_devices()
60+
return ypulib.available_devices()
6061
6162
@staticmethod
6263
def is_available() -> bool:
63-
return xpulib.is_available()
64+
return ypulib.is_available()
6465
6566
def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
6667
# Return optional device statistics for loggers
6768
return {}
6869
6970
70-
Finally, add the XPUAccelerator to the Trainer:
71+
Finally, add the YPUAccelerator to the Trainer:
7172

7273
.. code-block:: python
7374
7475
from lightning.pytorch import Trainer
7576
76-
accelerator = XPUAccelerator()
77+
accelerator = YPUAccelerator()
7778
trainer = Trainer(accelerator=accelerator, devices=2)
7879
7980
@@ -89,28 +90,28 @@ If you wish to switch to a custom accelerator from the CLI without code changes,
8990

9091
.. code-block:: python
9192
92-
class XPUAccelerator(Accelerator):
93+
class YPUAccelerator(Accelerator):
9394
...
9495
9596
@classmethod
9697
def register_accelerators(cls, accelerator_registry):
9798
accelerator_registry.register(
98-
"xpu",
99+
"ypu",
99100
cls,
100-
description=f"XPU Accelerator - optimized for large-scale machine learning.",
101+
description=f"YPU Accelerator - optimized for large-scale machine learning.",
101102
)
102103
103104
Now, this is possible:
104105

105106
.. code-block:: python
106107
107-
trainer = Trainer(accelerator="xpu")
108+
trainer = Trainer(accelerator="ypu")
108109
109110
Or if you are using the Lightning CLI, for example:
110111

111112
.. code-block:: bash
112113
113-
python train.py fit --trainer.accelerator=xpu --trainer.devices=2
114+
python train.py fit --trainer.accelerator=ypu --trainer.devices=2
114115
115116
116117
----------

docs/source-pytorch/glossary/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
GPU <../accelerators/gpu>
2121
Half precision <../common/precision>
2222
HPU <../integrations/hpu/index>
23+
XPU <../integrations/xpu/index>
2324
Inference <../deploy/production_intermediate>
2425
Lightning CLI <../cli/lightning_cli>
2526
LightningDataModule <../data/datamodule>
@@ -177,6 +178,13 @@ Glossary
177178
:button_link: ../integrations/hpu/index.html
178179
:height: 100
179180

181+
.. displayitem::
182+
:header: XPU
183+
:description: Intel® Graphics Cards for faster training
184+
:col_css: col-md-12
185+
:button_link: ../integrations/xpu/index.html
186+
:height: 100
187+
180188
.. displayitem::
181189
:header: Inference
182190
:description: Making predictions by applying a trained model to unlabeled examples
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
.. _xpu:
2+
3+
Accelerator: XPU training
4+
=========================
5+
6+
.. raw:: html
7+
8+
<div class="display-card-container">
9+
<div class="row">
10+
11+
.. Add callout items below this line
12+
13+
.. displayitem::
14+
:header: Basic
15+
:description: Learn the basics of single and multi-XPU core training.
16+
:col_css: col-md-4
17+
:button_link: basic.html
18+
:height: 150
19+
:tag: basic
20+
21+
.. displayitem::
22+
:header: Intermediate
23+
:description: Enable state-of-the-art scaling with advanced mix-precision settings.
24+
:col_css: col-md-4
25+
:button_link: intermediate.html
26+
:height: 150
27+
:tag: intermediate
28+
29+
.. displayitem::
30+
:header: Advanced
31+
:description: Explore state-of-the-art scaling with additional advanced configurations.
32+
:col_css: col-md-4
33+
:button_link: advanced.html
34+
:height: 150
35+
:tag: advanced
36+
37+
.. raw:: html
38+
39+
</div>
40+
</div>
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
:orphan:
2+
3+
######################
4+
Level 19: Explore XPUs
5+
######################
6+
7+
Explore Intel® Graphics Cards (XPU) for model scaling.
8+
9+
----
10+
11+
.. raw:: html
12+
13+
<div class="display-card-container">
14+
<div class="row">
15+
16+
.. Add callout items below this line
17+
18+
.. displayitem::
19+
:header: Train models on XPUs
20+
:description: Learn the basics of single and multi-XPU core training.
21+
:col_css: col-md-6
22+
:button_link: ../integrations/xpu/basic.html
23+
:height: 150
24+
:tag: basic
25+
26+
.. displayitem::
27+
:header: Optimize models training on XPUs
28+
:description: Enable state-of-the-art scaling with advanced mixed-precision settings.
29+
:col_css: col-md-6
30+
:button_link: ../integrations/xpu/intermediate.html
31+
:height: 150
32+
:tag: intermediate
33+
34+
.. raw:: html
35+
36+
</div>
37+
</div>

examples/pytorch/xpu/mnist_sample.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright The Lightning AI team.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import torch
15+
from lightning.pytorch import LightningModule
16+
from lightning.pytorch.cli import LightningCLI
17+
from lightning.pytorch.demos.mnist_datamodule import MNISTDataModule
18+
from torch.nn import functional as F
19+
20+
21+
class LitClassifier(LightningModule):
22+
def __init__(self):
23+
super().__init__()
24+
self.l1 = torch.nn.Linear(28 * 28, 10)
25+
26+
def forward(self, x):
27+
return torch.relu(self.l1(x.view(x.size(0), -1)))
28+
29+
def training_step(self, batch, batch_idx):
30+
x, y = batch
31+
return F.cross_entropy(self(x), y)
32+
33+
def validation_step(self, batch, batch_idx):
34+
x, y = batch
35+
probs = self(x)
36+
acc = self.accuracy(probs, y)
37+
self.log("val_acc", acc)
38+
39+
def test_step(self, batch, batch_idx):
40+
x, y = batch
41+
logits = self(x)
42+
acc = self.accuracy(logits, y)
43+
self.log("test_acc", acc)
44+
45+
@staticmethod
46+
def accuracy(logits, y):
47+
return torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
48+
49+
def configure_optimizers(self):
50+
return torch.optim.Adam(self.parameters(), lr=0.02)
51+
52+
53+
if __name__ == "__main__":
54+
cli = LightningCLI(
55+
LitClassifier,
56+
MNISTDataModule,
57+
trainer_defaults={
58+
"accelerator": "gpu",
59+
"devices": 2,
60+
"max_epochs": 1,
61+
},
62+
run=False,
63+
save_config_kwargs={"overwrite": True},
64+
)
65+
66+
# Run the model ⚡
67+
cli.trainer.fit(cli.model, datamodule=cli.datamodule)
68+
cli.trainer.validate(cli.model, datamodule=cli.datamodule)
69+
cli.trainer.test(cli.model, datamodule=cli.datamodule)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
# validation accelerator connectors
22
lightning-habana >=1.2.0, <1.3.0
3+
4+
# validation XPU connectors
5+
lightning-xpu >=0.1.0

src/lightning/fabric/accelerators/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,10 @@
2222

2323
ACCELERATOR_REGISTRY = _AcceleratorRegistry()
2424
_register_classes(ACCELERATOR_REGISTRY, "register_accelerators", sys.modules[__name__], Accelerator)
25+
26+
from lightning.fabric.utilities.imports import _lightning_xpu_available
27+
28+
if _lightning_xpu_available() and "xpu" not in ACCELERATOR_REGISTRY:
29+
from lightning_xpu.fabric import XPUAccelerator
30+
31+
XPUAccelerator.register_accelerators(ACCELERATOR_REGISTRY)

src/lightning/fabric/cli.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,17 @@
2929
from lightning.fabric.utilities.consolidate_checkpoint import _process_cli_args
3030
from lightning.fabric.utilities.device_parser import _parse_gpu_ids
3131
from lightning.fabric.utilities.distributed import _suggested_max_num_threads
32+
from lightning.fabric.utilities.imports import _lightning_xpu_available
3233
from lightning.fabric.utilities.load import _load_distributed_checkpoint
3334

3435
_log = logging.getLogger(__name__)
3536

3637
_CLICK_AVAILABLE = RequirementCache("click")
3738
_LIGHTNING_SDK_AVAILABLE = RequirementCache("lightning_sdk")
3839

39-
_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu")
40+
_SUPPORTED_ACCELERATORS = ["cpu", "gpu", "cuda", "mps", "tpu"]
41+
if _lightning_xpu_available():
42+
_SUPPORTED_ACCELERATORS.append("xpu")
4043

4144

4245
def _get_supported_strategies() -> List[str]:
@@ -209,13 +212,17 @@ def _set_env_variables(args: Namespace) -> None:
209212
def _get_num_processes(accelerator: str, devices: str) -> int:
210213
"""Parse the `devices` argument to determine how many processes need to be launched on the current machine."""
211214
if accelerator == "gpu":
212-
parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True)
215+
parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True, include_xpu=True)
213216
elif accelerator == "cuda":
214217
parsed_devices = CUDAAccelerator.parse_devices(devices)
215218
elif accelerator == "mps":
216219
parsed_devices = MPSAccelerator.parse_devices(devices)
217220
elif accelerator == "tpu":
218221
raise ValueError("Launching processes for TPU through the CLI is not supported.")
222+
elif accelerator == "xpu":
223+
from lightning_xpu.fabric import XPUAccelerator
224+
225+
parsed_devices = XPUAccelerator.parse_devices(devices)
219226
else:
220227
return CPUAccelerator.parse_devices(devices)
221228
return len(parsed_devices) if parsed_devices is not None else 0

0 commit comments

Comments
 (0)