diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml
new file mode 100644
index 0000000000..e0be897532
--- /dev/null
+++ b/.github/workflows/gpu_test.yml
@@ -0,0 +1,66 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: GPU Test V3
+
+on:
+  push:
+    branches: [ v3 ]
+  pull_request:
+    branches: [ v3 ]
+  workflow_dispatch:
+
+env:
+  LD_LIBRARY_PATH: /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
+
+    runs-on: gpu-runner
+    strategy:
+      matrix:
+        python-version: ['3.11']
+        numpy-version: ['2.0']
+        dependency-set: ["minimal"]
+
+    steps:
+    - uses: actions/checkout@v4
+    # - name: cuda-toolkit
+    #   uses: Jimver/cuda-toolkit@v0.2.16
+    #   id: cuda-toolkit
+    #   with:
+    #     cuda: '12.4.1'
+    - name: Set up CUDA
+      run: |
+        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
+        sudo dpkg -i cuda-keyring_1.1-1_all.deb
+        sudo apt-get update
+        sudo apt-get -y install cuda-toolkit-12-6
+        echo "/usr/local/cuda/bin" >> $GITHUB_PATH
+    - name: GPU check
+      run: |
+        nvidia-smi
+        echo $PATH
+        echo $LD_LIBRARY_PATH
+        nvcc -V
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'pip'
+    - name: Install Hatch and CuPy
+      run: |
+        python -m pip install --upgrade pip 
+        pip install hatch
+    - name: Set Up Hatch Env
+      run: |
+        hatch env create gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
+        hatch env run -e gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
+    - name: Run Tests
+      run: |
+        hatch env run --env gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage
diff --git a/pyproject.toml b/pyproject.toml
index 197c4aca10..8bee491a82 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,9 @@ jupyter = [
     'ipytree>=0.2.2',
     'ipywidgets>=8.0.0',
 ]
+gpu = [
+    "cupy-cuda12x",
+]
 docs = [
     'sphinx',
     'sphinx-autobuild>=2021.3.14',
@@ -120,7 +123,7 @@ build.hooks.vcs.version-file = "src/zarr/_version.py"
 [tool.hatch.envs.test]
 dependencies = [
     "numpy~={matrix:numpy}",
-    "universal_pathlib"
+    "universal_pathlib",
 ]
 features = ["test", "extra"]
 
@@ -134,8 +137,34 @@ python = ["3.10", "3.11", "3.12"]
 numpy = ["1.24", "1.26", "2.0"]
 features = ["optional"]
 
+[[tool.hatch.envs.test.matrix]]
+python = ["3.10", "3.11", "3.12"]
+numpy = ["1.24", "1.26", "2.0"]
+features = ["gpu"]
+
 [tool.hatch.envs.test.scripts]
 run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
+run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests"
+run = "run-coverage --no-cov"
+run-verbose = "run-coverage --verbose"
+run-mypy = "mypy src"
+run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*"
+list-env = "pip list"
+
+[tool.hatch.envs.gputest]
+dependencies = [
+    "numpy~={matrix:numpy}",
+    "universal_pathlib",
+]
+features = ["test", "extra", "gpu"]
+
+[[tool.hatch.envs.gputest.matrix]]
+python = ["3.10", "3.11", "3.12"]
+numpy = ["1.24", "1.26", "2.0"]
+version = ["minimal"]
+
+[tool.hatch.envs.gputest.scripts]
+run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
@@ -223,4 +252,8 @@ filterwarnings = [
     "error:::zarr.*",
     "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning",
     "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
+    "ignore:Creating a zarr.buffer.gpu.*:UserWarning",
+]
+markers = [
+    "gpu: mark a test as requiring CuPy and GPU"
 ]
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 8e01524992..ffe5a0b6f3 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -10,7 +10,8 @@
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.array_spec import ArraySpec
-from zarr.core.buffer import Buffer, as_numpy_array_wrapper
+from zarr.core.buffer import Buffer
+from zarr.core.buffer.cpu import as_numpy_array_wrapper
 from zarr.core.common import JSON, parse_enum, parse_named_configuration, to_thread
 from zarr.registry import register_codec
 
diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py
index bab81a104b..d8d793a965 100644
--- a/src/zarr/codecs/gzip.py
+++ b/src/zarr/codecs/gzip.py
@@ -7,7 +7,8 @@
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.array_spec import ArraySpec
-from zarr.core.buffer import Buffer, as_numpy_array_wrapper
+from zarr.core.buffer import Buffer
+from zarr.core.buffer.cpu import as_numpy_array_wrapper
 from zarr.core.common import JSON, parse_named_configuration, to_thread
 from zarr.registry import register_codec
 
diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py
index 92a2665425..fceb49cf2b 100644
--- a/src/zarr/codecs/zstd.py
+++ b/src/zarr/codecs/zstd.py
@@ -9,7 +9,8 @@
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.array_spec import ArraySpec
-from zarr.core.buffer import Buffer, as_numpy_array_wrapper
+from zarr.core.buffer import Buffer
+from zarr.core.buffer.cpu import as_numpy_array_wrapper
 from zarr.core.common import JSON, parse_named_configuration, to_thread
 from zarr.registry import register_codec
 
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 3738b0859b..e7b5709b77 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -512,7 +512,12 @@ async def _set_selection(
 
         # check value shape
         if np.isscalar(value):
-            value = np.asanyarray(value, dtype=self.metadata.dtype)
+            array_like = prototype.buffer.create_zero_length().as_array_like()
+            if isinstance(array_like, np._typing._SupportsArrayFunc):
+                # TODO: need to handle array types that don't support __array_function__
+                # like PyTorch and JAX
+                array_like_ = cast(np._typing._SupportsArrayFunc, array_like)
+            value = np.asanyarray(value, dtype=self.metadata.dtype, like=array_like_)
         else:
             if not hasattr(value, "shape"):
                 value = np.asarray(value, self.metadata.dtype)
@@ -520,7 +525,11 @@ async def _set_selection(
             #     value.shape == indexer.shape
             # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}"
             if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name:
-                value = np.array(value, dtype=self.metadata.dtype, order="A")
+                if hasattr(value, "astype"):
+                    # Handle things that are already NDArrayLike more efficiently
+                    value = value.astype(dtype=self.metadata.dtype, order="A")
+                else:
+                    value = np.array(value, dtype=self.metadata.dtype, order="A")
         value = cast(NDArrayLike, value)
         # We accept any ndarray like object from the user and convert it
         # to a NDBuffer (or subclass). From this point onwards, we only pass
diff --git a/src/zarr/core/buffer/__init__.py b/src/zarr/core/buffer/__init__.py
new file mode 100644
index 0000000000..b18417b00b
--- /dev/null
+++ b/src/zarr/core/buffer/__init__.py
@@ -0,0 +1,19 @@
+from zarr.core.buffer.core import (
+    ArrayLike,
+    Buffer,
+    BufferPrototype,
+    NDArrayLike,
+    NDBuffer,
+    default_buffer_prototype,
+)
+from zarr.core.buffer.cpu import numpy_buffer_prototype
+
+__all__ = [
+    "ArrayLike",
+    "Buffer",
+    "NDArrayLike",
+    "NDBuffer",
+    "BufferPrototype",
+    "default_buffer_prototype",
+    "numpy_buffer_prototype",
+]
diff --git a/src/zarr/core/buffer.py b/src/zarr/core/buffer/core.py
similarity index 84%
rename from src/zarr/core/buffer.py
rename to src/zarr/core/buffer/core.py
index 50252590dd..6e5d9465cf 100644
--- a/src/zarr/core/buffer.py
+++ b/src/zarr/core/buffer/core.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 import sys
-from collections.abc import Callable, Iterable, Sequence
+from abc import ABC, abstractmethod
+from collections.abc import Iterable, Sequence
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -9,6 +10,7 @@
     NamedTuple,
     Protocol,
     SupportsIndex,
+    cast,
     runtime_checkable,
 )
 
@@ -19,8 +21,6 @@
 from zarr.registry import (
     get_buffer_class,
     get_ndbuffer_class,
-    register_buffer,
-    register_ndbuffer,
 )
 
 if TYPE_CHECKING:
@@ -112,7 +112,7 @@ def check_item_key_is_1d_contiguous(key: Any) -> None:
         raise ValueError("slice must be contiguous")
 
 
-class Buffer:
+class Buffer(ABC):
     """A flat contiguous memory block
 
     We use Buffer throughout Zarr to represent a contiguous block of memory.
@@ -141,6 +141,7 @@ def __init__(self, array_like: ArrayLike):
         self._data = array_like
 
     @classmethod
+    @abstractmethod
     def create_zero_length(cls) -> Self:
         """Create an empty buffer with length zero
 
@@ -148,7 +149,11 @@ def create_zero_length(cls) -> Self:
         -------
             New empty 0-length buffer
         """
-        return cls(np.array([], dtype="b"))
+        if cls is Buffer:
+            raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'")
+        return cls(
+            cast(ArrayLike, None)
+        )  # This line will never be reached, but it satisfies the type checker
 
     @classmethod
     def from_array_like(cls, array_like: ArrayLike) -> Self:
@@ -166,6 +171,7 @@ def from_array_like(cls, array_like: ArrayLike) -> Self:
         return cls(array_like)
 
     @classmethod
+    @abstractmethod
     def from_buffer(cls, buffer: Buffer) -> Self:
         """Create a new buffer of an existing Buffer
 
@@ -185,10 +191,20 @@ def from_buffer(cls, buffer: Buffer) -> Self:
         Returns
         -------
             A new buffer representing the content of the input buffer
+
+        Note
+        ----
+        Subclasses of `Buffer` must override this method to implement
+        more optimal conversions that avoid copies where possible
         """
-        return cls.from_array_like(buffer.as_array_like())
+        if cls is Buffer:
+            raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'")
+        return cls(
+            cast(ArrayLike, None)
+        )  # This line will never be reached, but it satisfies the type checker
 
     @classmethod
+    @abstractmethod
     def from_bytes(cls, bytes_like: BytesLike) -> Self:
         """Create a new buffer of a bytes-like object (host memory)
 
@@ -201,7 +217,11 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
         -------
             New buffer representing `bytes_like`
         """
-        return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
+        if cls is Buffer:
+            raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'")
+        return cls(
+            cast(ArrayLike, None)
+        )  # This line will never be reached, but it satisfies the type checker
 
     def as_array_like(self) -> ArrayLike:
         """Returns the underlying array (host or device memory) of this buffer
@@ -214,6 +234,7 @@ def as_array_like(self) -> ArrayLike:
         """
         return self._data
 
+    @abstractmethod
     def as_numpy_array(self) -> npt.NDArray[Any]:
         """Returns the buffer as a NumPy array (host memory).
 
@@ -225,7 +246,7 @@ def as_numpy_array(self) -> npt.NDArray[Any]:
         -------
             NumPy array of this buffer (might be a data copy)
         """
-        return np.asanyarray(self._data)
+        ...
 
     def to_bytes(self) -> bytes:
         """Returns the buffer as `bytes` (host memory).
@@ -252,14 +273,10 @@ def __setitem__(self, key: slice, value: Any) -> None:
     def __len__(self) -> int:
         return self._data.size
 
+    @abstractmethod
     def __add__(self, other: Buffer) -> Self:
         """Concatenate two buffers"""
-
-        other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
-        return self.__class__(
-            np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
-        )
+        ...
 
 
 class NDBuffer:
@@ -293,6 +310,7 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
+    @abstractmethod
     def create(
         cls,
         *,
@@ -324,10 +342,13 @@ def create(
         A subclass can overwrite this method to create a ndarray-like object
         other then the default Numpy array.
         """
-        ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order))
-        if fill_value is not None:
-            ret.fill(fill_value)
-        return ret
+        if cls is NDBuffer:
+            raise NotImplementedError(
+                "Cannot call abstract method on the abstract class 'NDBuffer'"
+            )
+        return cls(
+            cast(NDArrayLike, None)
+        )  # This line will never be reached, but it satisfies the type checker
 
     @classmethod
     def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
@@ -345,6 +366,7 @@ def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
         return cls(ndarray_like)
 
     @classmethod
+    @abstractmethod
     def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
         """Create a new buffer of Numpy array-like object
 
@@ -357,7 +379,13 @@ def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
         -------
             New buffer representing `array_like`
         """
-        return cls.from_ndarray_like(np.asanyarray(array_like))
+        if cls is NDBuffer:
+            raise NotImplementedError(
+                "Cannot call abstract method on the abstract class 'NDBuffer'"
+            )
+        return cls(
+            cast(NDArrayLike, None)
+        )  # This line will never be reached, but it satisfies the type checker
 
     def as_ndarray_like(self) -> NDArrayLike:
         """Returns the underlying array (host or device memory) of this buffer
@@ -370,6 +398,7 @@ def as_ndarray_like(self) -> NDArrayLike:
         """
         return self._data
 
+    @abstractmethod
     def as_numpy_array(self) -> npt.NDArray[Any]:
         """Returns the buffer as a NumPy array (host memory).
 
@@ -381,7 +410,7 @@ def as_numpy_array(self) -> npt.NDArray[Any]:
         -------
             NumPy array of this buffer (might be a data copy)
         """
-        return np.asanyarray(self._data)
+        ...
 
     @property
     def dtype(self) -> np.dtype[Any]:
@@ -412,13 +441,11 @@ def squeeze(self, axis: tuple[int, ...]) -> Self:
     def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self:
         return self.__class__(self._data.astype(dtype=dtype, order=order))
 
-    def __getitem__(self, key: Any) -> Self:
-        return self.__class__(np.asanyarray(self._data.__getitem__(key)))
+    @abstractmethod
+    def __getitem__(self, key: Any) -> Self: ...
 
-    def __setitem__(self, key: Any, value: Any) -> None:
-        if isinstance(value, NDBuffer):
-            value = value._data
-        self._data.__setitem__(key, value)
+    @abstractmethod
+    def __setitem__(self, key: Any, value: Any) -> None: ...
 
     def __len__(self) -> int:
         return self._data.__len__()
@@ -439,34 +466,6 @@ def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Sel
         return self.__class__(self._data.transpose(axes))
 
 
-def as_numpy_array_wrapper(
-    func: Callable[[npt.NDArray[Any]], bytes], buf: Buffer, prototype: BufferPrototype
-) -> Buffer:
-    """Converts the input of `func` to a numpy array and the output back to `Buffer`.
-
-    This function is useful when calling a `func` that only support host memory such
-    as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert
-    the input `buf` to a Numpy array and convert the result back into a `Buffer`.
-
-    Parameters
-    ----------
-    func
-        The callable that will be called with the converted `buf` as input.
-        `func` must return bytes, which will be converted into a `Buffer`
-        before returned.
-    buf
-        The buffer that will be converted to a Numpy array before given as
-        input to `func`.
-    prototype
-        The prototype of the output buffer.
-
-    Returns
-    -------
-        The result of `func` converted to a `Buffer`
-    """
-    return prototype.buffer.from_bytes(func(buf.as_numpy_array()))
-
-
 class BufferPrototype(NamedTuple):
     """Prototype of the Buffer and NDBuffer class
 
@@ -487,12 +486,3 @@ class BufferPrototype(NamedTuple):
 # The default buffer prototype used throughout the Zarr codebase.
 def default_buffer_prototype() -> BufferPrototype:
     return BufferPrototype(buffer=get_buffer_class(), nd_buffer=get_ndbuffer_class())
-
-
-# The numpy prototype used for E.g. when reading the shard index
-def numpy_buffer_prototype() -> BufferPrototype:
-    return BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer)
-
-
-register_buffer(Buffer)
-register_ndbuffer(NDBuffer)
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
new file mode 100644
index 0000000000..0953805223
--- /dev/null
+++ b/src/zarr/core/buffer/cpu.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+from collections.abc import Callable, Iterable
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+)
+
+import numpy as np
+import numpy.typing as npt
+
+from zarr.core.buffer import core
+from zarr.core.buffer.core import ArrayLike, NDArrayLike
+from zarr.registry import (
+    register_buffer,
+    register_ndbuffer,
+)
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+    from zarr.core.common import BytesLike
+
+
+class Buffer(core.Buffer):
+    """A flat contiguous memory block
+
+    We use Buffer throughout Zarr to represent a contiguous block of memory.
+
+    A Buffer is backed by a underlying array-like instance that represents
+    the memory. The memory type is unspecified; can be regular host memory,
+    CUDA device memory, or something else. The only requirement is that the
+    array-like instance can be copied/converted to a regular Numpy array
+    (host memory).
+
+    Notes
+    -----
+    This buffer is untyped, so all indexing and sizes are in bytes.
+
+    Parameters
+    ----------
+    array_like
+        array-like object that must be 1-dim, contiguous, and byte dtype.
+    """
+
+    def __init__(self, array_like: ArrayLike):
+        super().__init__(array_like)
+
+    @classmethod
+    def create_zero_length(cls) -> Self:
+        return cls(np.array([], dtype="b"))
+
+    @classmethod
+    def from_buffer(cls, buffer: core.Buffer) -> Self:
+        """Create a new buffer of an existing Buffer
+
+        This is useful if you want to ensure that an existing buffer is
+        of the correct subclass of Buffer. E.g., MemoryStore uses this
+        to return a buffer instance of the subclass specified by its
+        BufferPrototype argument.
+
+        Typically, this only copies data if the data has to be moved between
+        memory types, such as from host to device memory.
+
+        Parameters
+        ----------
+        buffer
+            buffer object.
+
+        Returns
+        -------
+            A new buffer representing the content of the input buffer
+
+        Note
+        ----
+        Subclasses of `Buffer` must override this method to implement
+        more optimal conversions that avoid copies where possible
+        """
+        return cls.from_array_like(buffer.as_numpy_array())
+
+    @classmethod
+    def from_bytes(cls, bytes_like: BytesLike) -> Self:
+        """Create a new buffer of a bytes-like object (host memory)
+
+        Parameters
+        ----------
+        bytes_like
+           bytes-like object
+
+        Returns
+        -------
+            New buffer representing `bytes_like`
+        """
+        return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
+
+    def as_numpy_array(self) -> npt.NDArray[Any]:
+        """Returns the buffer as a NumPy array (host memory).
+
+        Notes
+        -----
+        Might have to copy data, consider using `.as_array_like()` instead.
+
+        Returns
+        -------
+            NumPy array of this buffer (might be a data copy)
+        """
+        return np.asanyarray(self._data)
+
+    def __add__(self, other: core.Buffer) -> Self:
+        """Concatenate two buffers"""
+
+        other_array = other.as_array_like()
+        assert other_array.dtype == np.dtype("b")
+        return self.__class__(
+            np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
+        )
+
+
+class NDBuffer(core.NDBuffer):
+    """An n-dimensional memory block
+
+    We use NDBuffer throughout Zarr to represent a n-dimensional memory block.
+
+    A NDBuffer is backed by a underlying ndarray-like instance that represents
+    the memory. The memory type is unspecified; can be regular host memory,
+    CUDA device memory, or something else. The only requirement is that the
+    ndarray-like instance can be copied/converted to a regular Numpy array
+    (host memory).
+
+    Notes
+    -----
+    The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    in order to use Python's type system to differentiate between the contiguous
+    Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
+    two classes separate.
+
+    Parameters
+    ----------
+    ndarray_like
+        ndarray-like object that is convertible to a regular Numpy array.
+    """
+
+    def __init__(self, array: NDArrayLike):
+        super().__init__(array)
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        shape: Iterable[int],
+        dtype: npt.DTypeLike,
+        order: Literal["C", "F"] = "C",
+        fill_value: Any | None = None,
+    ) -> Self:
+        ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order))
+        if fill_value is not None:
+            ret.fill(fill_value)
+        return ret
+
+    @classmethod
+    def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
+        return cls.from_ndarray_like(np.asanyarray(array_like))
+
+    def as_numpy_array(self) -> npt.NDArray[Any]:
+        """Returns the buffer as a NumPy array (host memory).
+
+        Warnings
+        --------
+        Might have to copy data, consider using `.as_ndarray_like()` instead.
+
+        Returns
+        -------
+            NumPy array of this buffer (might be a data copy)
+        """
+        return np.asanyarray(self._data)
+
+    def __getitem__(self, key: Any) -> Self:
+        return self.__class__(np.asanyarray(self._data.__getitem__(key)))
+
+    def __setitem__(self, key: Any, value: Any) -> None:
+        if isinstance(value, NDBuffer):
+            value = value._data
+        self._data.__setitem__(key, value)
+
+
+def as_numpy_array_wrapper(
+    func: Callable[[npt.NDArray[Any]], bytes], buf: core.Buffer, prototype: core.BufferPrototype
+) -> core.Buffer:
+    """Converts the input of `func` to a numpy array and the output back to `Buffer`.
+
+    This function is useful when calling a `func` that only support host memory such
+    as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert
+    the input `buf` to a Numpy array and convert the result back into a `Buffer`.
+
+    Parameters
+    ----------
+    func
+        The callable that will be called with the converted `buf` as input.
+        `func` must return bytes, which will be converted into a `Buffer`
+        before returned.
+    buf
+        The buffer that will be converted to a Numpy array before given as
+        input to `func`.
+    prototype
+        The prototype of the output buffer.
+
+    Returns
+    -------
+        The result of `func` converted to a `Buffer`
+    """
+    return prototype.buffer.from_bytes(func(buf.as_numpy_array()))
+
+
+# CPU buffer prototype using numpy arrays
+buffer_prototype = core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer)
+# default_buffer_prototype = buffer_prototype
+
+
+# The numpy prototype used for E.g. when reading the shard index
+def numpy_buffer_prototype() -> core.BufferPrototype:
+    return core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer)
+
+
+register_buffer(Buffer)
+register_ndbuffer(NDBuffer)
diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py
new file mode 100644
index 0000000000..9d38df2abf
--- /dev/null
+++ b/src/zarr/core/buffer/gpu.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+import warnings
+from collections.abc import Iterable
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+    cast,
+)
+
+import numpy as np
+import numpy.typing as npt
+
+from zarr.core.buffer import core
+from zarr.core.buffer.core import ArrayLike, BufferPrototype, NDArrayLike
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+    from zarr.core.common import BytesLike
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
+
+class Buffer(core.Buffer):
+    """A flat contiguous memory block on the GPU
+
+    We use Buffer throughout Zarr to represent a contiguous block of memory.
+
+    A Buffer is backed by a underlying array-like instance that represents
+    the memory. The memory type is unspecified; can be regular host memory,
+    CUDA device memory, or something else. The only requirement is that the
+    array-like instance can be copied/converted to a regular Numpy array
+    (host memory).
+
+    Note
+    ----
+    This buffer is untyped, so all indexing and sizes are in bytes.
+
+    Parameters
+    ----------
+    array_like
+        array-like object that must be 1-dim, contiguous, and byte dtype.
+    """
+
+    def __init__(self, array_like: ArrayLike):
+        if cp is None:
+            raise ImportError(
+                "Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy."
+            )
+
+        if array_like.ndim != 1:
+            raise ValueError("array_like: only 1-dim allowed")
+        if array_like.dtype != np.dtype("b"):
+            raise ValueError("array_like: only byte dtype allowed")
+
+        if not hasattr(array_like, "__cuda_array_interface__"):
+            # Slow copy based path for arrays that don't support the __cuda_array_interface__
+            # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol
+            msg = (
+                "Creating a zarr.buffer.gpu.Buffer with an array that does not support the "
+                "__cuda_array_interface__ for zero-copy transfers, "
+                "falling back to slow copy based path"
+            )
+            warnings.warn(
+                msg,
+                stacklevel=2,
+            )
+        self._data = cp.asarray(array_like)
+
+    @classmethod
+    def create_zero_length(cls) -> Self:
+        """Create an empty buffer with length zero
+
+        Returns
+        -------
+            New empty 0-length buffer
+        """
+        return cls(cp.array([], dtype="b"))
+
+    @classmethod
+    def from_buffer(cls, buffer: core.Buffer) -> Self:
+        """Create an GPU Buffer given an arbitrary Buffer
+        This will try to be zero-copy if `buffer` is already on the
+        GPU and will trigger a copy if not.
+
+        Returns
+        -------
+            New GPU Buffer constructed from `buffer`
+        """
+        return cls(buffer.as_array_like())
+
+    @classmethod
+    def from_bytes(cls, bytes_like: BytesLike) -> Self:
+        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
+
+    def as_numpy_array(self) -> npt.NDArray[Any]:
+        return cast(npt.NDArray[Any], cp.asnumpy(self._data))
+
+    def __add__(self, other: core.Buffer) -> Self:
+        other_array = other.as_array_like()
+        assert other_array.dtype == np.dtype("b")
+        gpu_other = Buffer(other_array)
+        gpu_other_array = gpu_other.as_array_like()
+        return self.__class__(
+            cp.concatenate((cp.asanyarray(self._data), cp.asanyarray(gpu_other_array)))
+        )
+
+
+class NDBuffer(core.NDBuffer):
+    """A n-dimensional memory block on the GPU
+
+    We use NDBuffer throughout Zarr to represent a n-dimensional memory block.
+
+    A NDBuffer is backed by a underlying ndarray-like instance that represents
+    the memory. The memory type is unspecified; can be regular host memory,
+    CUDA device memory, or something else. The only requirement is that the
+    ndarray-like instance can be copied/converted to a regular Numpy array
+    (host memory).
+
+    Note
+    ----
+    The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    in order to use Python's type system to differentiate between the contiguous
+    Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
+    two classes separate.
+
+    Parameters
+    ----------
+    ndarray_like
+        ndarray-like object that is convertible to a regular Numpy array.
+    """
+
+    def __init__(self, array: NDArrayLike):
+        if cp is None:
+            raise ImportError(
+                "Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy."
+            )
+
+        # assert array.ndim > 0
+        assert array.dtype != object
+        self._data = array
+
+        if not hasattr(array, "__cuda_array_interface__"):
+            # Slow copy based path for arrays that don't support the __cuda_array_interface__
+            # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol
+            msg = (
+                "Creating a zarr.buffer.gpu.NDBuffer with an array that does not support the "
+                "__cuda_array_interface__ for zero-copy transfers, "
+                "falling back to slow copy based path"
+            )
+            warnings.warn(
+                msg,
+                stacklevel=2,
+            )
+        self._data = cp.asarray(array)
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        shape: Iterable[int],
+        dtype: npt.DTypeLike,
+        order: Literal["C", "F"] = "C",
+        fill_value: Any | None = None,
+    ) -> Self:
+        ret = cls(cp.empty(shape=tuple(shape), dtype=dtype, order=order))
+        if fill_value is not None:
+            ret.fill(fill_value)
+        return ret
+
+    @classmethod
+    def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
+        """Create a new buffer of Numpy array-like object
+
+        Parameters
+        ----------
+        array_like
+            Object that can be coerced into a Numpy array
+
+        Returns
+        -------
+            New buffer representing `array_like`
+        """
+        return cls(cp.asarray(array_like))
+
+    def as_numpy_array(self) -> npt.NDArray[Any]:
+        """Returns the buffer as a NumPy array (host memory).
+
+        Warning
+        -------
+        Might have to copy data, consider using `.as_ndarray_like()` instead.
+
+        Returns
+        -------
+            NumPy array of this buffer (might be a data copy)
+        """
+        return cast(npt.NDArray[Any], cp.asnumpy(self._data))
+
+    def __getitem__(self, key: Any) -> Self:
+        return self.__class__(self._data.__getitem__(key))
+
+    def __setitem__(self, key: Any, value: Any) -> None:
+        if isinstance(value, NDBuffer):
+            value = value._data
+        elif isinstance(value, core.NDBuffer):
+            gpu_value = NDBuffer(value.as_ndarray_like())
+            value = gpu_value._data
+        self._data.__setitem__(key, value)
+
+
+buffer_prototype = BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer)
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 67f5c74347..611d1faea5 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -60,8 +60,8 @@ def reset(self) -> None:
                 "sharding_indexed": "zarr.codecs.sharding.ShardingCodec",
                 "transpose": "zarr.codecs.transpose.TransposeCodec",
             },
-            "buffer": "zarr.core.buffer.Buffer",
-            "ndbuffer": "zarr.core.buffer.NDBuffer",
+            "buffer": "zarr.core.buffer.cpu.Buffer",
+            "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
         }
     ],
 )
diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py
index 117fd69ec0..c817b0963a 100644
--- a/src/zarr/store/memory.py
+++ b/src/zarr/store/memory.py
@@ -4,7 +4,7 @@
 from typing import TYPE_CHECKING
 
 from zarr.abc.store import Store
-from zarr.core.buffer import Buffer
+from zarr.core.buffer import Buffer, gpu
 from zarr.core.common import concurrent_map
 from zarr.store._utils import _normalize_interval_index
 
@@ -126,3 +126,39 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
 
         for key in keys_unique:
             yield key
+
+
+class GpuMemoryStore(MemoryStore):
+    """A GPU only memory store that stores every chunk in GPU memory irrespective
+    of the original location. This guarantees that chunks will always be in GPU
+    memory for downstream processing. For location agnostic use cases, it would
+    be better to use `MemoryStore` instead.
+    """
+
+    _store_dict: MutableMapping[str, Buffer]
+
+    def __init__(
+        self,
+        store_dict: MutableMapping[str, Buffer] | None = None,
+        *,
+        mode: AccessModeLiteral = "r",
+    ):
+        super().__init__(mode=mode)
+        if store_dict:
+            self._store_dict = {k: gpu.Buffer.from_buffer(store_dict[k]) for k in iter(store_dict)}
+
+    def __str__(self) -> str:
+        return f"gpumemory://{id(self._store_dict)}"
+
+    def __repr__(self) -> str:
+        return f"GpuMemoryStore({str(self)!r})"
+
+    async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None:
+        self._check_writable()
+        assert isinstance(key, str)
+        if not isinstance(value, Buffer):
+            raise TypeError(f"Expected Buffer. Got {type(value)}.")
+
+        # Convert to gpu.Buffer
+        gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value)
+        await super().set(key, gpu_value, byte_range=byte_range)
diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py
index ee170a5dd3..a6120ef2f9 100644
--- a/src/zarr/testing/buffer.py
+++ b/src/zarr/testing/buffer.py
@@ -7,7 +7,7 @@
 import numpy as np
 import numpy.typing as npt
 
-from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer
+from zarr.core.buffer import Buffer, BufferPrototype, cpu
 from zarr.store import MemoryStore
 
 if TYPE_CHECKING:
@@ -25,11 +25,11 @@ class TestNDArrayLike(np.ndarray):
     """An example of a ndarray-like class"""
 
 
-class TestBuffer(Buffer):
+class TestBuffer(cpu.Buffer):
     """Example of a custom Buffer that handles ArrayLike"""
 
 
-class NDBufferUsingTestNDArrayLike(NDBuffer):
+class NDBufferUsingTestNDArrayLike(cpu.NDBuffer):
     """Example of a custom NDBuffer that handles MyNDArrayLike"""
 
     @classmethod
diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py
index 11978b4121..925087ae67 100644
--- a/src/zarr/testing/store.py
+++ b/src/zarr/testing/store.py
@@ -11,10 +11,12 @@
 
 
 S = TypeVar("S", bound=Store)
+B = TypeVar("B", bound=Buffer)
 
 
-class StoreTests(Generic[S]):
+class StoreTests(Generic[S, B]):
     store_cls: type[S]
+    buffer_cls: type[B]
 
     def set(self, store: S, key: str, value: Buffer) -> None:
         """
@@ -60,7 +62,7 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) ->
 
         # set
         with pytest.raises(ValueError):
-            await store.set("foo", Buffer.from_bytes(b"bar"))
+            await store.set("foo", self.buffer_cls.from_bytes(b"bar"))
 
         # delete
         with pytest.raises(ValueError):
@@ -87,7 +89,7 @@ async def test_get(
         """
         Ensure that data can be read from the store using the store.get method.
         """
-        data_buf = Buffer.from_bytes(data)
+        data_buf = self.buffer_cls.from_bytes(data)
         self.set(store, key, data_buf)
         observed = await store.get(key, prototype=default_buffer_prototype(), byte_range=byte_range)
         start, length = _normalize_interval_index(data_buf, interval=byte_range)
@@ -101,7 +103,7 @@ async def test_set(self, store: S, key: str, data: bytes) -> None:
         Ensure that data can be written to the store using the store.set method.
         """
         assert not store.mode.readonly
-        data_buf = Buffer.from_bytes(data)
+        data_buf = self.buffer_cls.from_bytes(data)
         await store.set(key, data_buf)
         observed = self.get(store, key)
         assert_bytes_equal(observed, data_buf)
@@ -120,7 +122,7 @@ async def test_get_partial_values(
     ) -> None:
         # put all of the data
         for key, _ in key_ranges:
-            self.set(store, key, Buffer.from_bytes(bytes(key, encoding="utf-8")))
+            self.set(store, key, self.buffer_cls.from_bytes(bytes(key, encoding="utf-8")))
 
         # read back just part of it
         observed_maybe = await store.get_partial_values(
@@ -148,28 +150,28 @@ async def test_get_partial_values(
 
     async def test_exists(self, store: S) -> None:
         assert not await store.exists("foo")
-        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
+        await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
         assert await store.exists("foo/zarr.json")
 
     async def test_delete(self, store: S) -> None:
-        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
+        await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
         assert await store.exists("foo/zarr.json")
         await store.delete("foo/zarr.json")
         assert not await store.exists("foo/zarr.json")
 
     async def test_empty(self, store: S) -> None:
         assert await store.empty()
-        self.set(store, "key", Buffer.from_bytes(bytes("something", encoding="utf-8")))
+        self.set(store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8")))
         assert not await store.empty()
 
     async def test_clear(self, store: S) -> None:
-        self.set(store, "key", Buffer.from_bytes(bytes("something", encoding="utf-8")))
+        self.set(store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8")))
         await store.clear()
         assert await store.empty()
 
     async def test_list(self, store: S) -> None:
         assert [k async for k in store.list()] == []
-        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
+        await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
         keys = [k async for k in store.list()]
         assert keys == ["foo/zarr.json"], keys
 
@@ -178,7 +180,7 @@ async def test_list(self, store: S) -> None:
             key = f"foo/c/{i}"
             expected.append(key)
             await store.set(
-                f"foo/c/{i}", Buffer.from_bytes(i.to_bytes(length=3, byteorder="little"))
+                f"foo/c/{i}", self.buffer_cls.from_bytes(i.to_bytes(length=3, byteorder="little"))
             )
 
     @pytest.mark.xfail
@@ -190,12 +192,12 @@ async def test_list_dir(self, store: S) -> None:
         out = [k async for k in store.list_dir("")]
         assert out == []
         assert [k async for k in store.list_dir("foo")] == []
-        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
-        await store.set("group-0/zarr.json", Buffer.from_bytes(b"\x01"))  # group
-        await store.set("group-0/group-1/zarr.json", Buffer.from_bytes(b"\x01"))  # group
-        await store.set("group-0/group-1/a1/zarr.json", Buffer.from_bytes(b"\x01"))
-        await store.set("group-0/group-1/a2/zarr.json", Buffer.from_bytes(b"\x01"))
-        await store.set("group-0/group-1/a3/zarr.json", Buffer.from_bytes(b"\x01"))
+        await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
+        await store.set("group-0/zarr.json", self.buffer_cls.from_bytes(b"\x01"))  # group
+        await store.set("group-0/group-1/zarr.json", self.buffer_cls.from_bytes(b"\x01"))  # group
+        await store.set("group-0/group-1/a1/zarr.json", self.buffer_cls.from_bytes(b"\x01"))
+        await store.set("group-0/group-1/a2/zarr.json", self.buffer_cls.from_bytes(b"\x01"))
+        await store.set("group-0/group-1/a3/zarr.json", self.buffer_cls.from_bytes(b"\x01"))
 
         keys_expected = ["foo", "group-0"]
         keys_observed = [k async for k in store.list_dir("")]
diff --git a/src/zarr/testing/utils.py b/src/zarr/testing/utils.py
index 3a70f96d44..a21a0be708 100644
--- a/src/zarr/testing/utils.py
+++ b/src/zarr/testing/utils.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
 
+from typing import Any, cast
+
+import pytest
+
 from zarr.core.buffer import Buffer
 from zarr.core.common import BytesLike
 
@@ -18,3 +22,21 @@ def assert_bytes_equal(b1: Buffer | BytesLike | None, b2: Buffer | BytesLike | N
     if isinstance(b2, Buffer):
         b2 = b2.to_bytes()
     assert b1 == b2
+
+
+def has_cupy() -> bool:
+    try:
+        import cupy
+
+        return cast(bool, cupy.cuda.runtime.getDeviceCount() > 0)
+    except ImportError:
+        return False
+    except cupy.cuda.runtime.CUDARuntimeError:
+        return False
+
+
+# Decorator for GPU tests
+def gpu_test(func: Any) -> Any:
+    return pytest.mark.gpu(
+        pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(func)
+    )
diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py
index d8af484ee6..c67d7bb4fd 100644
--- a/tests/v3/conftest.py
+++ b/tests/v3/conftest.py
@@ -94,6 +94,9 @@ async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> As
 def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]:
     """Fixture to parametrize over numpy-like libraries"""
 
+    if request.param == "cupy":
+        request.node.add_marker(pytest.mark.gpu)
+
     yield pytest.importorskip(request.param)
 
 
diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py
index 298a7f7eed..419f01c720 100644
--- a/tests/v3/test_buffer.py
+++ b/tests/v3/test_buffer.py
@@ -10,14 +10,21 @@
 from zarr.codecs.gzip import GzipCodec
 from zarr.codecs.transpose import TransposeCodec
 from zarr.codecs.zstd import ZstdCodec
-from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, numpy_buffer_prototype
+from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu
 from zarr.store.common import StorePath
+from zarr.store.memory import MemoryStore
 from zarr.testing.buffer import (
     NDBufferUsingTestNDArrayLike,
     StoreExpectingTestBuffer,
     TestBuffer,
     TestNDArrayLike,
 )
+from zarr.testing.utils import gpu_test
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 def test_nd_array_like(xp):
@@ -52,6 +59,31 @@ async def test_async_array_prototype():
     assert np.array_equal(expect, got)
 
 
+@gpu_test
+@pytest.mark.asyncio
+async def test_async_array_gpu_prototype():
+    """Test the use of the GPU buffer prototype"""
+
+    expect = cp.zeros((9, 9), dtype="uint16", order="F")
+    a = await AsyncArray.create(
+        StorePath(MemoryStore(mode="w")) / "test_async_array_gpu_prototype",
+        shape=expect.shape,
+        chunk_shape=(5, 5),
+        dtype=expect.dtype,
+        fill_value=0,
+    )
+    expect[1:4, 3:6] = cp.ones((3, 3))
+
+    await a.setitem(
+        selection=(slice(1, 4), slice(3, 6)),
+        value=cp.ones((3, 3)),
+        prototype=gpu.buffer_prototype,
+    )
+    got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=gpu.buffer_prototype)
+    assert isinstance(got, cp.ndarray)
+    assert cp.array_equal(expect, got)
+
+
 @pytest.mark.asyncio
 async def test_codecs_use_of_prototype():
     expect = np.zeros((10, 10), dtype="uint16", order="F")
@@ -84,8 +116,39 @@ async def test_codecs_use_of_prototype():
     assert np.array_equal(expect, got)
 
 
+@gpu_test
+@pytest.mark.asyncio
+async def test_codecs_use_of_gpu_prototype():
+    expect = cp.zeros((10, 10), dtype="uint16", order="F")
+    a = await AsyncArray.create(
+        StorePath(MemoryStore(mode="w")) / "test_codecs_use_of_gpu_prototype",
+        shape=expect.shape,
+        chunk_shape=(5, 5),
+        dtype=expect.dtype,
+        fill_value=0,
+        codecs=[
+            TransposeCodec(order=(1, 0)),
+            BytesCodec(),
+            BloscCodec(),
+            Crc32cCodec(),
+            GzipCodec(),
+            ZstdCodec(),
+        ],
+    )
+    expect[:] = cp.arange(100).reshape(10, 10)
+
+    await a.setitem(
+        selection=(slice(0, 10), slice(0, 10)),
+        value=expect[:],
+        prototype=gpu.buffer_prototype,
+    )
+    got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=gpu.buffer_prototype)
+    assert isinstance(got, cp.ndarray)
+    assert cp.array_equal(expect, got)
+
+
 def test_numpy_buffer_prototype():
-    buffer = numpy_buffer_prototype().buffer.create_zero_length()
-    ndbuffer = numpy_buffer_prototype().nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64"))
+    buffer = cpu.buffer_prototype.buffer.create_zero_length()
+    ndbuffer = cpu.buffer_prototype.nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64"))
     assert isinstance(buffer.as_array_like(), np.ndarray)
     assert isinstance(ndbuffer.as_ndarray_like(), np.ndarray)
diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py
index 881833797c..b3e04e51da 100644
--- a/tests/v3/test_config.py
+++ b/tests/v3/test_config.py
@@ -46,8 +46,8 @@ def test_config_defaults_set() -> None:
                 "path": "zarr.codecs.pipeline.BatchedCodecPipeline",
                 "batch_size": 1,
             },
-            "buffer": "zarr.core.buffer.Buffer",
-            "ndbuffer": "zarr.core.buffer.NDBuffer",
+            "buffer": "zarr.core.buffer.cpu.Buffer",
+            "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
             "codecs": {
                 "blosc": "zarr.codecs.blosc.BloscCodec",
                 "gzip": "zarr.codecs.gzip.GzipCodec",
diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py
index eb7b1f30dd..7592c77201 100644
--- a/tests/v3/test_group.py
+++ b/tests/v3/test_group.py
@@ -7,7 +7,7 @@
 from _pytest.compat import LEGACY_PATH
 
 from zarr import Array, AsyncArray, AsyncGroup, Group
-from zarr.core.buffer import Buffer
+from zarr.core.buffer import default_buffer_prototype
 from zarr.core.common import ZarrFormat
 from zarr.core.group import GroupMetadata
 from zarr.core.sync import sync
@@ -97,11 +97,18 @@ def test_group_members(store: MemoryStore | LocalStore, zarr_format: ZarrFormat)
 
     # add an extra object to the domain of the group.
     # the list of children should ignore this object.
-    sync(store.set(f"{path}/extra_object-1", Buffer.from_bytes(b"000000")))
+    sync(
+        store.set(f"{path}/extra_object-1", default_buffer_prototype().buffer.from_bytes(b"000000"))
+    )
     # add an extra object under a directory-like prefix in the domain of the group.
     # this creates a directory with a random key in it
     # this should not show up as a member
-    sync(store.set(f"{path}/extra_directory/extra_object-2", Buffer.from_bytes(b"000000")))
+    sync(
+        store.set(
+            f"{path}/extra_directory/extra_object-2",
+            default_buffer_prototype().buffer.from_bytes(b"000000"),
+        )
+    )
     members_observed = group.members()
     # members are not guaranteed to be ordered, so sort before comparing
     assert sorted(dict(members_observed)) == sorted(members_expected)
diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py
index 7895151b5f..5805de3035 100644
--- a/tests/v3/test_indexing.py
+++ b/tests/v3/test_indexing.py
@@ -12,7 +12,7 @@
 
 import zarr
 from zarr.abc.store import Store
-from zarr.core.buffer import BufferPrototype, NDBuffer
+from zarr.core.buffer import BufferPrototype, default_buffer_prototype
 from zarr.core.common import ChunkCoords
 from zarr.core.indexing import (
     make_slice_selection,
@@ -136,7 +136,7 @@ def test_get_basic_selection_0d(store: StorePath, use_out: bool, value: Any, dty
 
     if use_out:
         # test out param
-        b = NDBuffer.from_numpy_array(np.zeros_like(arr_np))
+        b = default_buffer_prototype().nd_buffer.from_numpy_array(np.zeros_like(arr_np))
         arr_z.get_basic_selection(Ellipsis, out=b)
         assert_array_equal(arr_np, b.as_ndarray_like())
 
@@ -247,7 +247,9 @@ def _test_get_basic_selection(a, z, selection):
     assert_array_equal(expect, actual)
 
     # test out param
-    b = NDBuffer.from_numpy_array(np.empty(shape=expect.shape, dtype=expect.dtype))
+    b = default_buffer_prototype().nd_buffer.from_numpy_array(
+        np.empty(shape=expect.shape, dtype=expect.dtype)
+    )
     z.get_basic_selection(selection, out=b)
     assert_array_equal(expect, b.as_numpy_array())
 
diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py
index 8afa4fef3a..59cae22de3 100644
--- a/tests/v3/test_store/test_local.py
+++ b/tests/v3/test_store/test_local.py
@@ -2,16 +2,17 @@
 
 import pytest
 
-from zarr.core.buffer import Buffer
+from zarr.core.buffer import Buffer, cpu
 from zarr.store.local import LocalStore
 from zarr.testing.store import StoreTests
 
 
-class TestLocalStore(StoreTests[LocalStore]):
+class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]):
     store_cls = LocalStore
+    buffer_cls = cpu.Buffer
 
     def get(self, store: LocalStore, key: str) -> Buffer:
-        return Buffer.from_bytes((store.root / key).read_bytes())
+        return self.buffer_cls.from_bytes((store.root / key).read_bytes())
 
     def set(self, store: LocalStore, key: str, value: Buffer) -> None:
         parent = (store.root / key).parent
diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py
index 51ecf46709..f76423c631 100644
--- a/tests/v3/test_store/test_memory.py
+++ b/tests/v3/test_store/test_memory.py
@@ -2,13 +2,15 @@
 
 import pytest
 
-from zarr.core.buffer import Buffer
-from zarr.store.memory import MemoryStore
+from zarr.core.buffer import Buffer, cpu, gpu
+from zarr.store.memory import GpuMemoryStore, MemoryStore
 from zarr.testing.store import StoreTests
+from zarr.testing.utils import gpu_test
 
 
-class TestMemoryStore(StoreTests[MemoryStore]):
+class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]):
     store_cls = MemoryStore
+    buffer_cls = cpu.Buffer
 
     def set(self, store: MemoryStore, key: str, value: Buffer) -> None:
         store._store_dict[key] = value
@@ -40,3 +42,38 @@ def test_store_supports_partial_writes(self, store: MemoryStore) -> None:
 
     def test_list_prefix(self, store: MemoryStore) -> None:
         assert True
+
+
+@gpu_test
+class TestGpuMemoryStore(StoreTests[GpuMemoryStore, gpu.Buffer]):
+    store_cls = GpuMemoryStore
+    buffer_cls = gpu.Buffer
+
+    def set(self, store: GpuMemoryStore, key: str, value: Buffer) -> None:
+        store._store_dict[key] = value
+
+    def get(self, store: MemoryStore, key: str) -> Buffer:
+        return store._store_dict[key]
+
+    @pytest.fixture(scope="function", params=[None, {}])
+    def store_kwargs(self, request) -> dict[str, str | None | dict[str, Buffer]]:
+        return {"store_dict": request.param, "mode": "r+"}
+
+    @pytest.fixture(scope="function")
+    def store(self, store_kwargs: str | None | dict[str, gpu.Buffer]) -> GpuMemoryStore:
+        return self.store_cls(**store_kwargs)
+
+    def test_store_repr(self, store: GpuMemoryStore) -> None:
+        assert str(store) == f"gpumemory://{id(store._store_dict)}"
+
+    def test_store_supports_writes(self, store: GpuMemoryStore) -> None:
+        assert store.supports_writes
+
+    def test_store_supports_listing(self, store: GpuMemoryStore) -> None:
+        assert store.supports_listing
+
+    def test_store_supports_partial_writes(self, store: GpuMemoryStore) -> None:
+        assert store.supports_partial_writes
+
+    def test_list_prefix(self, store: GpuMemoryStore) -> None:
+        assert True
diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py
index 2eec9d29cf..b09bf24e24 100644
--- a/tests/v3/test_store/test_remote.py
+++ b/tests/v3/test_store/test_remote.py
@@ -4,7 +4,7 @@
 import pytest
 from upath import UPath
 
-from zarr.core.buffer import Buffer, default_buffer_prototype
+from zarr.core.buffer import Buffer, cpu, default_buffer_prototype
 from zarr.core.sync import sync
 from zarr.store import RemoteStore
 from zarr.testing.store import StoreTests
@@ -88,7 +88,7 @@ async def test_basic():
     assert not await alist(store.list())
     assert not await store.exists("foo")
     data = b"hello"
-    await store.set("foo", Buffer.from_bytes(data))
+    await store.set("foo", cpu.Buffer.from_bytes(data))
     assert await store.exists("foo")
     assert (await store.get("foo", prototype=default_buffer_prototype())).to_bytes() == data
     out = await store.get_partial_values(
@@ -97,8 +97,9 @@ async def test_basic():
     assert out[0].to_bytes() == data[1:]
 
 
-class TestRemoteStoreS3(StoreTests[RemoteStore]):
+class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]):
     store_cls = RemoteStore
+    buffer_cls = cpu.Buffer
 
     @pytest.fixture(scope="function", params=("use_upath", "use_str"))
     def store_kwargs(self, request) -> dict[str, str | bool]:
@@ -131,7 +132,7 @@ def get(self, store: RemoteStore, key: str) -> Buffer:
             anon=store._fs.anon,
             endpoint_url=store._fs.endpoint_url,
         )
-        return Buffer.from_bytes(fs.cat(f"{store.path}/{key}"))
+        return self.buffer_cls.from_bytes(fs.cat(f"{store.path}/{key}"))
 
     def set(self, store: RemoteStore, key: str, value: Buffer) -> None:
         #  make a new, synchronous instance of the filesystem because this test is run in sync code
diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py
index 68bd11bbe8..a8f51e96e6 100644
--- a/tests/v3/test_store/test_stateful_store.py
+++ b/tests/v3/test_store/test_stateful_store.py
@@ -12,7 +12,7 @@
 
 import zarr
 from zarr.abc.store import AccessMode, Store
-from zarr.core.buffer import Buffer, BufferPrototype, default_buffer_prototype
+from zarr.core.buffer import BufferPrototype, cpu, default_buffer_prototype
 from zarr.store import MemoryStore
 from zarr.testing.strategies import key_ranges, paths
 
@@ -112,7 +112,7 @@ def __init__(self):
     def set(self, key: str, data: bytes) -> None:
         note(f"(set) Setting {key!r} with {data}")
         assert not self.store.mode.readonly
-        data_buf = Buffer.from_bytes(data)
+        data_buf = cpu.Buffer.from_bytes(data)
         self.store.set(key, data_buf)
         self.model[key] = data_buf