pujaltes
diff --git a/‎docs/source-fabric/fundamentals/launch.rst
Lines changed: 2 additions & 1 deletion b/‎docs/source-fabric/fundamentals/launch.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/source-pytorch/common/index.rst
Lines changed: 8 additions & 0 deletions b/‎docs/source-pytorch/common/index.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source-pytorch/common_usecases.rst
Lines changed: 7 additions & 0 deletions b/‎docs/source-pytorch/common_usecases.rst
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/source-pytorch/conf.py
Lines changed: 6 additions & 0 deletions b/‎docs/source-pytorch/conf.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/source-pytorch/extensions/accelerator.rst
Lines changed: 16 additions & 15 deletions b/‎docs/source-pytorch/extensions/accelerator.rst
Lines changed: 16 additions & 15 deletions
diff --git a/‎docs/source-pytorch/glossary/index.rst
Lines changed: 8 additions & 0 deletions b/‎docs/source-pytorch/glossary/index.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source-pytorch/integrations/xpu/index.rst
Lines changed: 40 additions & 0 deletions b/‎docs/source-pytorch/integrations/xpu/index.rst
Lines changed: 40 additions & 0 deletions
diff --git a/‎docs/source-pytorch/levels/advanced_level_23.rst
Lines changed: 37 additions & 0 deletions b/‎docs/source-pytorch/levels/advanced_level_23.rst
Lines changed: 37 additions & 0 deletions
diff --git a/‎examples/pytorch/xpu/mnist_sample.py
Lines changed: 69 additions & 0 deletions b/‎examples/pytorch/xpu/mnist_sample.py
Lines changed: 69 additions & 0 deletions
diff --git a/‎requirements/_integrations/accelerators.txt
Lines changed: 3 additions & 0 deletions b/‎requirements/_integrations/accelerators.txt
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/lightning/fabric/accelerators/__init__.py
Lines changed: 7 additions & 0 deletions b/‎src/lightning/fabric/accelerators/__init__.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/lightning/fabric/cli.py
Lines changed: 9 additions & 2 deletions b/‎src/lightning/fabric/cli.py
Lines changed: 9 additions & 2 deletions
@@ -93,8 +93,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i
       itself and are expected to be parsed there.
 
     Options:
-      --accelerator [cpu|gpu|cuda|mps|tpu]
+      --accelerator [cpu|gpu|cuda|mps|tpu|xpu]
                                       The hardware accelerator to run on.
+                                      Install Lightning-XPU to enable ``xpu``.
       --strategy [ddp|dp|deepspeed]   Strategy for how to run across multiple
                                       devices.
       --devices TEXT                  Number of devices to run on (``int``), which
 
@@ -17,6 +17,7 @@
    ../advanced/model_parallel
    Train on single or multiple GPUs <../accelerators/gpu>
    Train on single or multiple HPUs <../integrations/hpu/index>
+   Train on single or multiple XPUs <../integrations/xpu/index>
    Train on single or multiple TPUs <../accelerators/tpu>
    Train on MPS <../accelerators/mps>
    Use a pretrained model <../advanced/pretrained>
@@ -167,6 +168,13 @@ How-to Guides
     :col_css: col-md-4
     :height: 180
 
+.. displayitem::
+    :header: Train on single or multiple XPUs
+    :description: Train models faster with XPU accelerators
+    :button_link: ../integrations/xpu/index.html
+    :col_css: col-md-4
+    :height: 180
+
 .. displayitem::
     :header: Train on single or multiple TPUs
     :description: TTrain models faster with TPU accelerators
 
@@ -133,6 +133,13 @@ Customize and extend Lightning for things like custom hardware or distributed st
    :button_link: integrations/hpu/index.html
    :height: 100
 
+.. displayitem::
+   :header: Train on single or multiple XPUs
+   :description: Train models faster with XPUs.
+   :col_css: col-md-12
+   :button_link: integrations/xpu/index.html
+   :height: 100
+
 .. displayitem::
    :header: Train on single or multiple TPUs
    :description: Train models faster with TPUs.
 
@@ -93,6 +93,11 @@ def _load_py_module(name: str, location: str) -> ModuleType:
     target_dir="docs/source-pytorch/integrations/hpu",
     checkout="refs/tags/1.4.0",
 )
+assist_local.AssistantCLI.pull_docs_files(
+    gh_user_repo="Lightning-AI/lightning-XPU",
+    target_dir="docs/source-pytorch/integrations/xpu",
+    checkout="tags/1.0.0",
+)
 
 # Copy strategies docs as single pages
 assist_local.AssistantCLI.pull_docs_files(
@@ -355,6 +360,7 @@ def _load_py_module(name: str, location: str) -> ModuleType:
     "PIL": ("https://pillow.readthedocs.io/en/stable/", None),
     "torchmetrics": ("https://lightning.ai/docs/torchmetrics/stable/", None),
     "lightning_habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
+    "intel-xpu": ("https://lightning-ai.github.io/lightning-XPU/", None),
     "tensorboardX": ("https://tensorboardx.readthedocs.io/en/stable/", None),
     # needed for referencing App from lightning scope
     "lightning.app": ("https://lightning.ai/docs/app/stable/", None),
 
@@ -11,6 +11,7 @@ Currently there are accelerators for:
 - :doc:`GPU <../accelerators/gpu>`
 - :doc:`TPU <../accelerators/tpu>`
 - :doc:`HPU <../integrations/hpu/index>`
+- :doc:`XPU <../integrations/xpu/index>`
 - :doc:`MPS <../accelerators/mps>`
 
 The Accelerator is part of the Strategy which manages communication across multiple devices (distributed communication).
@@ -31,16 +32,16 @@ Create a Custom Accelerator
 .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.
 
 Here is how you create a new Accelerator.
-Let's pretend we want to integrate the fictional XPU accelerator and we have access to its hardware through a library
-``xpulib``.
+Let's pretend we want to integrate the fictional YPU accelerator and we have access to its hardware through a library
+``ypulib``.
 
 .. code-block:: python
 
-    import xpulib
+    import ypulib
 
 
-    class XPUAccelerator(Accelerator):
-        """Support for a hypothetical XPU, optimized for large-scale machine learning."""
+    class YPUAccelerator(Accelerator):
+        """Support for a hypothetical YPU, optimized for large-scale machine learning."""
 
         @staticmethod
         def parse_devices(devices: Any) -> Any:
@@ -51,29 +52,29 @@ Let's pretend we want to integrate the fictional XPU accelerator and we have acc
         @staticmethod
         def get_parallel_devices(devices: Any) -> Any:
             # Here, convert the device indices to actual device objects
-            return [torch.device("xpu", idx) for idx in devices]
+            return [torch.device("ypu", idx) for idx in devices]
 
         @staticmethod
         def auto_device_count() -> int:
             # Return a value for auto-device selection when `Trainer(devices="auto")`
-            return xpulib.available_devices()
+            return ypulib.available_devices()
 
         @staticmethod
         def is_available() -> bool:
-            return xpulib.is_available()
+            return ypulib.is_available()
 
         def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
             # Return optional device statistics for loggers
             return {}
 
 
-Finally, add the XPUAccelerator to the Trainer:
+Finally, add the YPUAccelerator to the Trainer:
 
 .. code-block:: python
 
     from lightning.pytorch import Trainer
 
-    accelerator = XPUAccelerator()
+    accelerator = YPUAccelerator()
     trainer = Trainer(accelerator=accelerator, devices=2)
 
 
@@ -89,28 +90,28 @@ If you wish to switch to a custom accelerator from the CLI without code changes,
 
 .. code-block:: python
 
-    class XPUAccelerator(Accelerator):
+    class YPUAccelerator(Accelerator):
         ...
 
         @classmethod
         def register_accelerators(cls, accelerator_registry):
             accelerator_registry.register(
-                "xpu",
+                "ypu",
                 cls,
-                description=f"XPU Accelerator - optimized for large-scale machine learning.",
+                description=f"YPU Accelerator - optimized for large-scale machine learning.",
             )
 
 Now, this is possible:
 
 .. code-block:: python
 
-    trainer = Trainer(accelerator="xpu")
+    trainer = Trainer(accelerator="ypu")
 
 Or if you are using the Lightning CLI, for example:
 
 .. code-block:: bash
 
-    python train.py fit --trainer.accelerator=xpu --trainer.devices=2
+    python train.py fit --trainer.accelerator=ypu --trainer.devices=2
 
 
 ----------
 
@@ -20,6 +20,7 @@
    GPU <../accelerators/gpu>
    Half precision <../common/precision>
    HPU <../integrations/hpu/index>
+   XPU <../integrations/xpu/index>
    Inference <../deploy/production_intermediate>
    Lightning CLI <../cli/lightning_cli>
    LightningDataModule <../data/datamodule>
@@ -177,6 +178,13 @@ Glossary
    :button_link: ../integrations/hpu/index.html
    :height: 100
 
+.. displayitem::
+   :header: XPU
+   :description: Intel® Graphics Cards for faster training
+   :col_css: col-md-12
+   :button_link: ../integrations/xpu/index.html
+   :height: 100
+
 .. displayitem::
    :header: Inference
    :description: Making predictions by applying a trained model to unlabeled examples
 
@@ -0,0 +1,40 @@
+.. _xpu:
+
+Accelerator: XPU training
+=========================
+
+.. raw:: html
+
+    <div class="display-card-container">
+        <div class="row">
+
+.. Add callout items below this line
+
+.. displayitem::
+   :header: Basic
+   :description: Learn the basics of single and multi-XPU core training.
+   :col_css: col-md-4
+   :button_link: basic.html
+   :height: 150
+   :tag: basic
+
+.. displayitem::
+   :header: Intermediate
+   :description: Enable state-of-the-art scaling with advanced mix-precision settings.
+   :col_css: col-md-4
+   :button_link: intermediate.html
+   :height: 150
+   :tag: intermediate
+
+.. displayitem::
+   :header: Advanced
+   :description: Explore state-of-the-art scaling with additional advanced configurations.
+   :col_css: col-md-4
+   :button_link: advanced.html
+   :height: 150
+   :tag: advanced
+
+.. raw:: html
+
+        </div>
+    </div>
@@ -0,0 +1,37 @@
+:orphan:
+
+######################
+Level 19: Explore XPUs
+######################
+
+Explore Intel® Graphics Cards (XPU) for model scaling.
+
+----
+
+.. raw:: html
+
+    <div class="display-card-container">
+        <div class="row">
+
+.. Add callout items below this line
+
+.. displayitem::
+   :header: Train models on XPUs
+   :description: Learn the basics of single and multi-XPU core training.
+   :col_css: col-md-6
+   :button_link: ../integrations/xpu/basic.html
+   :height: 150
+   :tag: basic
+
+.. displayitem::
+   :header: Optimize models training on XPUs
+   :description: Enable state-of-the-art scaling with advanced mixed-precision settings.
+   :col_css: col-md-6
+   :button_link: ../integrations/xpu/intermediate.html
+   :height: 150
+   :tag: intermediate
+
+.. raw:: html
+
+        </div>
+    </div>
@@ -0,0 +1,69 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from lightning.pytorch import LightningModule
+from lightning.pytorch.cli import LightningCLI
+from lightning.pytorch.demos.mnist_datamodule import MNISTDataModule
+from torch.nn import functional as F
+
+
+class LitClassifier(LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.l1 = torch.nn.Linear(28 * 28, 10)
+
+    def forward(self, x):
+        return torch.relu(self.l1(x.view(x.size(0), -1)))
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        return F.cross_entropy(self(x), y)
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        probs = self(x)
+        acc = self.accuracy(probs, y)
+        self.log("val_acc", acc)
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        self.log("test_acc", acc)
+
+    @staticmethod
+    def accuracy(logits, y):
+        return torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
+
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=0.02)
+
+
+if __name__ == "__main__":
+    cli = LightningCLI(
+        LitClassifier,
+        MNISTDataModule,
+        trainer_defaults={
+            "accelerator": "gpu",
+            "devices": 2,
+            "max_epochs": 1,
+        },
+        run=False,
+        save_config_kwargs={"overwrite": True},
+    )
+
+    # Run the model ⚡
+    cli.trainer.fit(cli.model, datamodule=cli.datamodule)
+    cli.trainer.validate(cli.model, datamodule=cli.datamodule)
+    cli.trainer.test(cli.model, datamodule=cli.datamodule)
@@ -1,2 +1,5 @@
 # validation accelerator connectors
 lightning-habana >=1.2.0, <1.3.0
+
+# validation XPU connectors
+lightning-xpu >=0.1.0
@@ -22,3 +22,10 @@
 
 ACCELERATOR_REGISTRY = _AcceleratorRegistry()
 _register_classes(ACCELERATOR_REGISTRY, "register_accelerators", sys.modules[__name__], Accelerator)
+
+from lightning.fabric.utilities.imports import _lightning_xpu_available
+
+if _lightning_xpu_available() and "xpu" not in ACCELERATOR_REGISTRY:
+    from lightning_xpu.fabric import XPUAccelerator
+
+    XPUAccelerator.register_accelerators(ACCELERATOR_REGISTRY)
@@ -29,14 +29,17 @@
 from lightning.fabric.utilities.consolidate_checkpoint import _process_cli_args
 from lightning.fabric.utilities.device_parser import _parse_gpu_ids
 from lightning.fabric.utilities.distributed import _suggested_max_num_threads
+from lightning.fabric.utilities.imports import _lightning_xpu_available
 from lightning.fabric.utilities.load import _load_distributed_checkpoint
 
 _log = logging.getLogger(__name__)
 
 _CLICK_AVAILABLE = RequirementCache("click")
 _LIGHTNING_SDK_AVAILABLE = RequirementCache("lightning_sdk")
 
-_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu")
+_SUPPORTED_ACCELERATORS = ["cpu", "gpu", "cuda", "mps", "tpu"]
+if _lightning_xpu_available():
+    _SUPPORTED_ACCELERATORS.append("xpu")
 
 
 def _get_supported_strategies() -> List[str]:
@@ -209,13 +212,17 @@ def _set_env_variables(args: Namespace) -> None:
 def _get_num_processes(accelerator: str, devices: str) -> int:
     """Parse the `devices` argument to determine how many processes need to be launched on the current machine."""
     if accelerator == "gpu":
-        parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True)
+        parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True, include_xpu=True)
     elif accelerator == "cuda":
         parsed_devices = CUDAAccelerator.parse_devices(devices)
     elif accelerator == "mps":
         parsed_devices = MPSAccelerator.parse_devices(devices)
     elif accelerator == "tpu":
         raise ValueError("Launching processes for TPU through the CLI is not supported.")
+    elif accelerator == "xpu":
+        from lightning_xpu.fabric import XPUAccelerator
+
+        parsed_devices = XPUAccelerator.parse_devices(devices)
     else:
         return CPUAccelerator.parse_devices(devices)
     return len(parsed_devices) if parsed_devices is not None else 0