diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml new file mode 100644 index 0000000000..e0be897532 --- /dev/null +++ b/.github/workflows/gpu_test.yml @@ -0,0 +1,66 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: GPU Test V3 + +on: + push: + branches: [ v3 ] + pull_request: + branches: [ v3 ] + workflow_dispatch: + +env: + LD_LIBRARY_PATH: /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} + + runs-on: gpu-runner + strategy: + matrix: + python-version: ['3.11'] + numpy-version: ['2.0'] + dependency-set: ["minimal"] + + steps: + - uses: actions/checkout@v4 + # - name: cuda-toolkit + # uses: Jimver/cuda-toolkit@v0.2.16 + # id: cuda-toolkit + # with: + # cuda: '12.4.1' + - name: Set up CUDA + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get -y install cuda-toolkit-12-6 + echo "/usr/local/cuda/bin" >> $GITHUB_PATH + - name: GPU check + run: | + nvidia-smi + echo $PATH + echo $LD_LIBRARY_PATH + nvcc -V + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch and CuPy + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} + hatch env run -e gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env + - name: Run Tests + run: | + hatch env run --env gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage diff --git a/pyproject.toml b/pyproject.toml index 197c4aca10..8bee491a82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ jupyter = [ 'ipytree>=0.2.2', 'ipywidgets>=8.0.0', ] +gpu = [ + "cupy-cuda12x", +] docs = [ 'sphinx', 'sphinx-autobuild>=2021.3.14', @@ -120,7 +123,7 @@ build.hooks.vcs.version-file = "src/zarr/_version.py" [tool.hatch.envs.test] dependencies = [ "numpy~={matrix:numpy}", - "universal_pathlib" + "universal_pathlib", ] features = ["test", "extra"] @@ -134,8 +137,34 @@ python = ["3.10", "3.11", "3.12"] numpy = ["1.24", "1.26", "2.0"] features = ["optional"] +[[tool.hatch.envs.test.matrix]] +python = ["3.10", "3.11", "3.12"] +numpy = ["1.24", "1.26", "2.0"] +features = ["gpu"] + [tool.hatch.envs.test.scripts] run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests" +run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests" +run = "run-coverage --no-cov" +run-verbose = "run-coverage --verbose" +run-mypy = "mypy src" +run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*" +list-env = "pip list" + +[tool.hatch.envs.gputest] +dependencies = [ + "numpy~={matrix:numpy}", + "universal_pathlib", +] +features = ["test", "extra", "gpu"] + +[[tool.hatch.envs.gputest.matrix]] +python = ["3.10", "3.11", "3.12"] +numpy = ["1.24", "1.26", "2.0"] +version = ["minimal"] + +[tool.hatch.envs.gputest.scripts] +run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests" run = "run-coverage --no-cov" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" @@ -223,4 +252,8 @@ filterwarnings = [ "error:::zarr.*", "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning", "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", + "ignore:Creating a zarr.buffer.gpu.*:UserWarning", +] +markers = [ + "gpu: mark a test as requiring CuPy and GPU" ] diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py index 8e01524992..ffe5a0b6f3 100644 --- a/src/zarr/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -10,7 +10,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.core.array_spec import ArraySpec -from zarr.core.buffer import Buffer, as_numpy_array_wrapper +from zarr.core.buffer import Buffer +from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_enum, parse_named_configuration, to_thread from zarr.registry import register_codec diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py index bab81a104b..d8d793a965 100644 --- a/src/zarr/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -7,7 +7,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.core.array_spec import ArraySpec -from zarr.core.buffer import Buffer, as_numpy_array_wrapper +from zarr.core.buffer import Buffer +from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration, to_thread from zarr.registry import register_codec diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 92a2665425..fceb49cf2b 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -9,7 +9,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.core.array_spec import ArraySpec -from zarr.core.buffer import Buffer, as_numpy_array_wrapper +from zarr.core.buffer import Buffer +from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration, to_thread from zarr.registry import register_codec diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 3738b0859b..e7b5709b77 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -512,7 +512,12 @@ async def _set_selection( # check value shape if np.isscalar(value): - value = np.asanyarray(value, dtype=self.metadata.dtype) + array_like = prototype.buffer.create_zero_length().as_array_like() + if isinstance(array_like, np._typing._SupportsArrayFunc): + # TODO: need to handle array types that don't support __array_function__ + # like PyTorch and JAX + array_like_ = cast(np._typing._SupportsArrayFunc, array_like) + value = np.asanyarray(value, dtype=self.metadata.dtype, like=array_like_) else: if not hasattr(value, "shape"): value = np.asarray(value, self.metadata.dtype) @@ -520,7 +525,11 @@ async def _set_selection( # value.shape == indexer.shape # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}" if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name: - value = np.array(value, dtype=self.metadata.dtype, order="A") + if hasattr(value, "astype"): + # Handle things that are already NDArrayLike more efficiently + value = value.astype(dtype=self.metadata.dtype, order="A") + else: + value = np.array(value, dtype=self.metadata.dtype, order="A") value = cast(NDArrayLike, value) # We accept any ndarray like object from the user and convert it # to a NDBuffer (or subclass). From this point onwards, we only pass diff --git a/src/zarr/core/buffer/__init__.py b/src/zarr/core/buffer/__init__.py new file mode 100644 index 0000000000..b18417b00b --- /dev/null +++ b/src/zarr/core/buffer/__init__.py @@ -0,0 +1,19 @@ +from zarr.core.buffer.core import ( + ArrayLike, + Buffer, + BufferPrototype, + NDArrayLike, + NDBuffer, + default_buffer_prototype, +) +from zarr.core.buffer.cpu import numpy_buffer_prototype + +__all__ = [ + "ArrayLike", + "Buffer", + "NDArrayLike", + "NDBuffer", + "BufferPrototype", + "default_buffer_prototype", + "numpy_buffer_prototype", +] diff --git a/src/zarr/core/buffer.py b/src/zarr/core/buffer/core.py similarity index 84% rename from src/zarr/core/buffer.py rename to src/zarr/core/buffer/core.py index 50252590dd..6e5d9465cf 100644 --- a/src/zarr/core/buffer.py +++ b/src/zarr/core/buffer/core.py @@ -1,7 +1,8 @@ from __future__ import annotations import sys -from collections.abc import Callable, Iterable, Sequence +from abc import ABC, abstractmethod +from collections.abc import Iterable, Sequence from typing import ( TYPE_CHECKING, Any, @@ -9,6 +10,7 @@ NamedTuple, Protocol, SupportsIndex, + cast, runtime_checkable, ) @@ -19,8 +21,6 @@ from zarr.registry import ( get_buffer_class, get_ndbuffer_class, - register_buffer, - register_ndbuffer, ) if TYPE_CHECKING: @@ -112,7 +112,7 @@ def check_item_key_is_1d_contiguous(key: Any) -> None: raise ValueError("slice must be contiguous") -class Buffer: +class Buffer(ABC): """A flat contiguous memory block We use Buffer throughout Zarr to represent a contiguous block of memory. @@ -141,6 +141,7 @@ def __init__(self, array_like: ArrayLike): self._data = array_like @classmethod + @abstractmethod def create_zero_length(cls) -> Self: """Create an empty buffer with length zero @@ -148,7 +149,11 @@ def create_zero_length(cls) -> Self: ------- New empty 0-length buffer """ - return cls(np.array([], dtype="b")) + if cls is Buffer: + raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") + return cls( + cast(ArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker @classmethod def from_array_like(cls, array_like: ArrayLike) -> Self: @@ -166,6 +171,7 @@ def from_array_like(cls, array_like: ArrayLike) -> Self: return cls(array_like) @classmethod + @abstractmethod def from_buffer(cls, buffer: Buffer) -> Self: """Create a new buffer of an existing Buffer @@ -185,10 +191,20 @@ def from_buffer(cls, buffer: Buffer) -> Self: Returns ------- A new buffer representing the content of the input buffer + + Note + ---- + Subclasses of `Buffer` must override this method to implement + more optimal conversions that avoid copies where possible """ - return cls.from_array_like(buffer.as_array_like()) + if cls is Buffer: + raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") + return cls( + cast(ArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker @classmethod + @abstractmethod def from_bytes(cls, bytes_like: BytesLike) -> Self: """Create a new buffer of a bytes-like object (host memory) @@ -201,7 +217,11 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: ------- New buffer representing `bytes_like` """ - return cls.from_array_like(np.frombuffer(bytes_like, dtype="b")) + if cls is Buffer: + raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") + return cls( + cast(ArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker def as_array_like(self) -> ArrayLike: """Returns the underlying array (host or device memory) of this buffer @@ -214,6 +234,7 @@ def as_array_like(self) -> ArrayLike: """ return self._data + @abstractmethod def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). @@ -225,7 +246,7 @@ def as_numpy_array(self) -> npt.NDArray[Any]: ------- NumPy array of this buffer (might be a data copy) """ - return np.asanyarray(self._data) + ... def to_bytes(self) -> bytes: """Returns the buffer as `bytes` (host memory). @@ -252,14 +273,10 @@ def __setitem__(self, key: slice, value: Any) -> None: def __len__(self) -> int: return self._data.size + @abstractmethod def __add__(self, other: Buffer) -> Self: """Concatenate two buffers""" - - other_array = other.as_array_like() - assert other_array.dtype == np.dtype("b") - return self.__class__( - np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array))) - ) + ... class NDBuffer: @@ -293,6 +310,7 @@ def __init__(self, array: NDArrayLike): self._data = array @classmethod + @abstractmethod def create( cls, *, @@ -324,10 +342,13 @@ def create( A subclass can overwrite this method to create a ndarray-like object other then the default Numpy array. """ - ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order)) - if fill_value is not None: - ret.fill(fill_value) - return ret + if cls is NDBuffer: + raise NotImplementedError( + "Cannot call abstract method on the abstract class 'NDBuffer'" + ) + return cls( + cast(NDArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker @classmethod def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self: @@ -345,6 +366,7 @@ def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self: return cls(ndarray_like) @classmethod + @abstractmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: """Create a new buffer of Numpy array-like object @@ -357,7 +379,13 @@ def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: ------- New buffer representing `array_like` """ - return cls.from_ndarray_like(np.asanyarray(array_like)) + if cls is NDBuffer: + raise NotImplementedError( + "Cannot call abstract method on the abstract class 'NDBuffer'" + ) + return cls( + cast(NDArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker def as_ndarray_like(self) -> NDArrayLike: """Returns the underlying array (host or device memory) of this buffer @@ -370,6 +398,7 @@ def as_ndarray_like(self) -> NDArrayLike: """ return self._data + @abstractmethod def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). @@ -381,7 +410,7 @@ def as_numpy_array(self) -> npt.NDArray[Any]: ------- NumPy array of this buffer (might be a data copy) """ - return np.asanyarray(self._data) + ... @property def dtype(self) -> np.dtype[Any]: @@ -412,13 +441,11 @@ def squeeze(self, axis: tuple[int, ...]) -> Self: def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self: return self.__class__(self._data.astype(dtype=dtype, order=order)) - def __getitem__(self, key: Any) -> Self: - return self.__class__(np.asanyarray(self._data.__getitem__(key))) + @abstractmethod + def __getitem__(self, key: Any) -> Self: ... - def __setitem__(self, key: Any, value: Any) -> None: - if isinstance(value, NDBuffer): - value = value._data - self._data.__setitem__(key, value) + @abstractmethod + def __setitem__(self, key: Any, value: Any) -> None: ... def __len__(self) -> int: return self._data.__len__() @@ -439,34 +466,6 @@ def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Sel return self.__class__(self._data.transpose(axes)) -def as_numpy_array_wrapper( - func: Callable[[npt.NDArray[Any]], bytes], buf: Buffer, prototype: BufferPrototype -) -> Buffer: - """Converts the input of `func` to a numpy array and the output back to `Buffer`. - - This function is useful when calling a `func` that only support host memory such - as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert - the input `buf` to a Numpy array and convert the result back into a `Buffer`. - - Parameters - ---------- - func - The callable that will be called with the converted `buf` as input. - `func` must return bytes, which will be converted into a `Buffer` - before returned. - buf - The buffer that will be converted to a Numpy array before given as - input to `func`. - prototype - The prototype of the output buffer. - - Returns - ------- - The result of `func` converted to a `Buffer` - """ - return prototype.buffer.from_bytes(func(buf.as_numpy_array())) - - class BufferPrototype(NamedTuple): """Prototype of the Buffer and NDBuffer class @@ -487,12 +486,3 @@ class BufferPrototype(NamedTuple): # The default buffer prototype used throughout the Zarr codebase. def default_buffer_prototype() -> BufferPrototype: return BufferPrototype(buffer=get_buffer_class(), nd_buffer=get_ndbuffer_class()) - - -# The numpy prototype used for E.g. when reading the shard index -def numpy_buffer_prototype() -> BufferPrototype: - return BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) - - -register_buffer(Buffer) -register_ndbuffer(NDBuffer) diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py new file mode 100644 index 0000000000..0953805223 --- /dev/null +++ b/src/zarr/core/buffer/cpu.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +from collections.abc import Callable, Iterable +from typing import ( + TYPE_CHECKING, + Any, + Literal, +) + +import numpy as np +import numpy.typing as npt + +from zarr.core.buffer import core +from zarr.core.buffer.core import ArrayLike, NDArrayLike +from zarr.registry import ( + register_buffer, + register_ndbuffer, +) + +if TYPE_CHECKING: + from typing_extensions import Self + + from zarr.core.common import BytesLike + + +class Buffer(core.Buffer): + """A flat contiguous memory block + + We use Buffer throughout Zarr to represent a contiguous block of memory. + + A Buffer is backed by a underlying array-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + array-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + This buffer is untyped, so all indexing and sizes are in bytes. + + Parameters + ---------- + array_like + array-like object that must be 1-dim, contiguous, and byte dtype. + """ + + def __init__(self, array_like: ArrayLike): + super().__init__(array_like) + + @classmethod + def create_zero_length(cls) -> Self: + return cls(np.array([], dtype="b")) + + @classmethod + def from_buffer(cls, buffer: core.Buffer) -> Self: + """Create a new buffer of an existing Buffer + + This is useful if you want to ensure that an existing buffer is + of the correct subclass of Buffer. E.g., MemoryStore uses this + to return a buffer instance of the subclass specified by its + BufferPrototype argument. + + Typically, this only copies data if the data has to be moved between + memory types, such as from host to device memory. + + Parameters + ---------- + buffer + buffer object. + + Returns + ------- + A new buffer representing the content of the input buffer + + Note + ---- + Subclasses of `Buffer` must override this method to implement + more optimal conversions that avoid copies where possible + """ + return cls.from_array_like(buffer.as_numpy_array()) + + @classmethod + def from_bytes(cls, bytes_like: BytesLike) -> Self: + """Create a new buffer of a bytes-like object (host memory) + + Parameters + ---------- + bytes_like + bytes-like object + + Returns + ------- + New buffer representing `bytes_like` + """ + return cls.from_array_like(np.frombuffer(bytes_like, dtype="b")) + + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Notes + ----- + Might have to copy data, consider using `.as_array_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + return np.asanyarray(self._data) + + def __add__(self, other: core.Buffer) -> Self: + """Concatenate two buffers""" + + other_array = other.as_array_like() + assert other_array.dtype == np.dtype("b") + return self.__class__( + np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array))) + ) + + +class NDBuffer(core.NDBuffer): + """An n-dimensional memory block + + We use NDBuffer throughout Zarr to represent a n-dimensional memory block. + + A NDBuffer is backed by a underlying ndarray-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + ndarray-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer + is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + in order to use Python's type system to differentiate between the contiguous + Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the + two classes separate. + + Parameters + ---------- + ndarray_like + ndarray-like object that is convertible to a regular Numpy array. + """ + + def __init__(self, array: NDArrayLike): + super().__init__(array) + + @classmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order)) + if fill_value is not None: + ret.fill(fill_value) + return ret + + @classmethod + def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: + return cls.from_ndarray_like(np.asanyarray(array_like)) + + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Warnings + -------- + Might have to copy data, consider using `.as_ndarray_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + return np.asanyarray(self._data) + + def __getitem__(self, key: Any) -> Self: + return self.__class__(np.asanyarray(self._data.__getitem__(key))) + + def __setitem__(self, key: Any, value: Any) -> None: + if isinstance(value, NDBuffer): + value = value._data + self._data.__setitem__(key, value) + + +def as_numpy_array_wrapper( + func: Callable[[npt.NDArray[Any]], bytes], buf: core.Buffer, prototype: core.BufferPrototype +) -> core.Buffer: + """Converts the input of `func` to a numpy array and the output back to `Buffer`. + + This function is useful when calling a `func` that only support host memory such + as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert + the input `buf` to a Numpy array and convert the result back into a `Buffer`. + + Parameters + ---------- + func + The callable that will be called with the converted `buf` as input. + `func` must return bytes, which will be converted into a `Buffer` + before returned. + buf + The buffer that will be converted to a Numpy array before given as + input to `func`. + prototype + The prototype of the output buffer. + + Returns + ------- + The result of `func` converted to a `Buffer` + """ + return prototype.buffer.from_bytes(func(buf.as_numpy_array())) + + +# CPU buffer prototype using numpy arrays +buffer_prototype = core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) +# default_buffer_prototype = buffer_prototype + + +# The numpy prototype used for E.g. when reading the shard index +def numpy_buffer_prototype() -> core.BufferPrototype: + return core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) + + +register_buffer(Buffer) +register_ndbuffer(NDBuffer) diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py new file mode 100644 index 0000000000..9d38df2abf --- /dev/null +++ b/src/zarr/core/buffer/gpu.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import warnings +from collections.abc import Iterable +from typing import ( + TYPE_CHECKING, + Any, + Literal, + cast, +) + +import numpy as np +import numpy.typing as npt + +from zarr.core.buffer import core +from zarr.core.buffer.core import ArrayLike, BufferPrototype, NDArrayLike + +if TYPE_CHECKING: + from typing_extensions import Self + + from zarr.core.common import BytesLike + +try: + import cupy as cp +except ImportError: + cp = None + + +class Buffer(core.Buffer): + """A flat contiguous memory block on the GPU + + We use Buffer throughout Zarr to represent a contiguous block of memory. + + A Buffer is backed by a underlying array-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + array-like instance can be copied/converted to a regular Numpy array + (host memory). + + Note + ---- + This buffer is untyped, so all indexing and sizes are in bytes. + + Parameters + ---------- + array_like + array-like object that must be 1-dim, contiguous, and byte dtype. + """ + + def __init__(self, array_like: ArrayLike): + if cp is None: + raise ImportError( + "Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy." + ) + + if array_like.ndim != 1: + raise ValueError("array_like: only 1-dim allowed") + if array_like.dtype != np.dtype("b"): + raise ValueError("array_like: only byte dtype allowed") + + if not hasattr(array_like, "__cuda_array_interface__"): + # Slow copy based path for arrays that don't support the __cuda_array_interface__ + # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol + msg = ( + "Creating a zarr.buffer.gpu.Buffer with an array that does not support the " + "__cuda_array_interface__ for zero-copy transfers, " + "falling back to slow copy based path" + ) + warnings.warn( + msg, + stacklevel=2, + ) + self._data = cp.asarray(array_like) + + @classmethod + def create_zero_length(cls) -> Self: + """Create an empty buffer with length zero + + Returns + ------- + New empty 0-length buffer + """ + return cls(cp.array([], dtype="b")) + + @classmethod + def from_buffer(cls, buffer: core.Buffer) -> Self: + """Create an GPU Buffer given an arbitrary Buffer + This will try to be zero-copy if `buffer` is already on the + GPU and will trigger a copy if not. + + Returns + ------- + New GPU Buffer constructed from `buffer` + """ + return cls(buffer.as_array_like()) + + @classmethod + def from_bytes(cls, bytes_like: BytesLike) -> Self: + return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b")) + + def as_numpy_array(self) -> npt.NDArray[Any]: + return cast(npt.NDArray[Any], cp.asnumpy(self._data)) + + def __add__(self, other: core.Buffer) -> Self: + other_array = other.as_array_like() + assert other_array.dtype == np.dtype("b") + gpu_other = Buffer(other_array) + gpu_other_array = gpu_other.as_array_like() + return self.__class__( + cp.concatenate((cp.asanyarray(self._data), cp.asanyarray(gpu_other_array))) + ) + + +class NDBuffer(core.NDBuffer): + """A n-dimensional memory block on the GPU + + We use NDBuffer throughout Zarr to represent a n-dimensional memory block. + + A NDBuffer is backed by a underlying ndarray-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + ndarray-like instance can be copied/converted to a regular Numpy array + (host memory). + + Note + ---- + The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer + is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + in order to use Python's type system to differentiate between the contiguous + Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the + two classes separate. + + Parameters + ---------- + ndarray_like + ndarray-like object that is convertible to a regular Numpy array. + """ + + def __init__(self, array: NDArrayLike): + if cp is None: + raise ImportError( + "Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy." + ) + + # assert array.ndim > 0 + assert array.dtype != object + self._data = array + + if not hasattr(array, "__cuda_array_interface__"): + # Slow copy based path for arrays that don't support the __cuda_array_interface__ + # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol + msg = ( + "Creating a zarr.buffer.gpu.NDBuffer with an array that does not support the " + "__cuda_array_interface__ for zero-copy transfers, " + "falling back to slow copy based path" + ) + warnings.warn( + msg, + stacklevel=2, + ) + self._data = cp.asarray(array) + + @classmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + ret = cls(cp.empty(shape=tuple(shape), dtype=dtype, order=order)) + if fill_value is not None: + ret.fill(fill_value) + return ret + + @classmethod + def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: + """Create a new buffer of Numpy array-like object + + Parameters + ---------- + array_like + Object that can be coerced into a Numpy array + + Returns + ------- + New buffer representing `array_like` + """ + return cls(cp.asarray(array_like)) + + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Warning + ------- + Might have to copy data, consider using `.as_ndarray_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + return cast(npt.NDArray[Any], cp.asnumpy(self._data)) + + def __getitem__(self, key: Any) -> Self: + return self.__class__(self._data.__getitem__(key)) + + def __setitem__(self, key: Any, value: Any) -> None: + if isinstance(value, NDBuffer): + value = value._data + elif isinstance(value, core.NDBuffer): + gpu_value = NDBuffer(value.as_ndarray_like()) + value = gpu_value._data + self._data.__setitem__(key, value) + + +buffer_prototype = BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 67f5c74347..611d1faea5 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -60,8 +60,8 @@ def reset(self) -> None: "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", "transpose": "zarr.codecs.transpose.TransposeCodec", }, - "buffer": "zarr.core.buffer.Buffer", - "ndbuffer": "zarr.core.buffer.NDBuffer", + "buffer": "zarr.core.buffer.cpu.Buffer", + "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", } ], ) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 117fd69ec0..c817b0963a 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from zarr.abc.store import Store -from zarr.core.buffer import Buffer +from zarr.core.buffer import Buffer, gpu from zarr.core.common import concurrent_map from zarr.store._utils import _normalize_interval_index @@ -126,3 +126,39 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: for key in keys_unique: yield key + + +class GpuMemoryStore(MemoryStore): + """A GPU only memory store that stores every chunk in GPU memory irrespective + of the original location. This guarantees that chunks will always be in GPU + memory for downstream processing. For location agnostic use cases, it would + be better to use `MemoryStore` instead. + """ + + _store_dict: MutableMapping[str, Buffer] + + def __init__( + self, + store_dict: MutableMapping[str, Buffer] | None = None, + *, + mode: AccessModeLiteral = "r", + ): + super().__init__(mode=mode) + if store_dict: + self._store_dict = {k: gpu.Buffer.from_buffer(store_dict[k]) for k in iter(store_dict)} + + def __str__(self) -> str: + return f"gpumemory://{id(self._store_dict)}" + + def __repr__(self) -> str: + return f"GpuMemoryStore({str(self)!r})" + + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + self._check_writable() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError(f"Expected Buffer. Got {type(value)}.") + + # Convert to gpu.Buffer + gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value) + await super().set(key, gpu_value, byte_range=byte_range) diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index ee170a5dd3..a6120ef2f9 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -7,7 +7,7 @@ import numpy as np import numpy.typing as npt -from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer +from zarr.core.buffer import Buffer, BufferPrototype, cpu from zarr.store import MemoryStore if TYPE_CHECKING: @@ -25,11 +25,11 @@ class TestNDArrayLike(np.ndarray): """An example of a ndarray-like class""" -class TestBuffer(Buffer): +class TestBuffer(cpu.Buffer): """Example of a custom Buffer that handles ArrayLike""" -class NDBufferUsingTestNDArrayLike(NDBuffer): +class NDBufferUsingTestNDArrayLike(cpu.NDBuffer): """Example of a custom NDBuffer that handles MyNDArrayLike""" @classmethod diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 11978b4121..925087ae67 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -11,10 +11,12 @@ S = TypeVar("S", bound=Store) +B = TypeVar("B", bound=Buffer) -class StoreTests(Generic[S]): +class StoreTests(Generic[S, B]): store_cls: type[S] + buffer_cls: type[B] def set(self, store: S, key: str, value: Buffer) -> None: """ @@ -60,7 +62,7 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> # set with pytest.raises(ValueError): - await store.set("foo", Buffer.from_bytes(b"bar")) + await store.set("foo", self.buffer_cls.from_bytes(b"bar")) # delete with pytest.raises(ValueError): @@ -87,7 +89,7 @@ async def test_get( """ Ensure that data can be read from the store using the store.get method. """ - data_buf = Buffer.from_bytes(data) + data_buf = self.buffer_cls.from_bytes(data) self.set(store, key, data_buf) observed = await store.get(key, prototype=default_buffer_prototype(), byte_range=byte_range) start, length = _normalize_interval_index(data_buf, interval=byte_range) @@ -101,7 +103,7 @@ async def test_set(self, store: S, key: str, data: bytes) -> None: Ensure that data can be written to the store using the store.set method. """ assert not store.mode.readonly - data_buf = Buffer.from_bytes(data) + data_buf = self.buffer_cls.from_bytes(data) await store.set(key, data_buf) observed = self.get(store, key) assert_bytes_equal(observed, data_buf) @@ -120,7 +122,7 @@ async def test_get_partial_values( ) -> None: # put all of the data for key, _ in key_ranges: - self.set(store, key, Buffer.from_bytes(bytes(key, encoding="utf-8"))) + self.set(store, key, self.buffer_cls.from_bytes(bytes(key, encoding="utf-8"))) # read back just part of it observed_maybe = await store.get_partial_values( @@ -148,28 +150,28 @@ async def test_get_partial_values( async def test_exists(self, store: S) -> None: assert not await store.exists("foo") - await store.set("foo/zarr.json", Buffer.from_bytes(b"bar")) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) assert await store.exists("foo/zarr.json") async def test_delete(self, store: S) -> None: - await store.set("foo/zarr.json", Buffer.from_bytes(b"bar")) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) assert await store.exists("foo/zarr.json") await store.delete("foo/zarr.json") assert not await store.exists("foo/zarr.json") async def test_empty(self, store: S) -> None: assert await store.empty() - self.set(store, "key", Buffer.from_bytes(bytes("something", encoding="utf-8"))) + self.set(store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8"))) assert not await store.empty() async def test_clear(self, store: S) -> None: - self.set(store, "key", Buffer.from_bytes(bytes("something", encoding="utf-8"))) + self.set(store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8"))) await store.clear() assert await store.empty() async def test_list(self, store: S) -> None: assert [k async for k in store.list()] == [] - await store.set("foo/zarr.json", Buffer.from_bytes(b"bar")) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) keys = [k async for k in store.list()] assert keys == ["foo/zarr.json"], keys @@ -178,7 +180,7 @@ async def test_list(self, store: S) -> None: key = f"foo/c/{i}" expected.append(key) await store.set( - f"foo/c/{i}", Buffer.from_bytes(i.to_bytes(length=3, byteorder="little")) + f"foo/c/{i}", self.buffer_cls.from_bytes(i.to_bytes(length=3, byteorder="little")) ) @pytest.mark.xfail @@ -190,12 +192,12 @@ async def test_list_dir(self, store: S) -> None: out = [k async for k in store.list_dir("")] assert out == [] assert [k async for k in store.list_dir("foo")] == [] - await store.set("foo/zarr.json", Buffer.from_bytes(b"bar")) - await store.set("group-0/zarr.json", Buffer.from_bytes(b"\x01")) # group - await store.set("group-0/group-1/zarr.json", Buffer.from_bytes(b"\x01")) # group - await store.set("group-0/group-1/a1/zarr.json", Buffer.from_bytes(b"\x01")) - await store.set("group-0/group-1/a2/zarr.json", Buffer.from_bytes(b"\x01")) - await store.set("group-0/group-1/a3/zarr.json", Buffer.from_bytes(b"\x01")) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) + await store.set("group-0/zarr.json", self.buffer_cls.from_bytes(b"\x01")) # group + await store.set("group-0/group-1/zarr.json", self.buffer_cls.from_bytes(b"\x01")) # group + await store.set("group-0/group-1/a1/zarr.json", self.buffer_cls.from_bytes(b"\x01")) + await store.set("group-0/group-1/a2/zarr.json", self.buffer_cls.from_bytes(b"\x01")) + await store.set("group-0/group-1/a3/zarr.json", self.buffer_cls.from_bytes(b"\x01")) keys_expected = ["foo", "group-0"] keys_observed = [k async for k in store.list_dir("")] diff --git a/src/zarr/testing/utils.py b/src/zarr/testing/utils.py index 3a70f96d44..a21a0be708 100644 --- a/src/zarr/testing/utils.py +++ b/src/zarr/testing/utils.py @@ -1,5 +1,9 @@ from __future__ import annotations +from typing import Any, cast + +import pytest + from zarr.core.buffer import Buffer from zarr.core.common import BytesLike @@ -18,3 +22,21 @@ def assert_bytes_equal(b1: Buffer | BytesLike | None, b2: Buffer | BytesLike | N if isinstance(b2, Buffer): b2 = b2.to_bytes() assert b1 == b2 + + +def has_cupy() -> bool: + try: + import cupy + + return cast(bool, cupy.cuda.runtime.getDeviceCount() > 0) + except ImportError: + return False + except cupy.cuda.runtime.CUDARuntimeError: + return False + + +# Decorator for GPU tests +def gpu_test(func: Any) -> Any: + return pytest.mark.gpu( + pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(func) + ) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index d8af484ee6..c67d7bb4fd 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -94,6 +94,9 @@ async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> As def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]: """Fixture to parametrize over numpy-like libraries""" + if request.param == "cupy": + request.node.add_marker(pytest.mark.gpu) + yield pytest.importorskip(request.param) diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index 298a7f7eed..419f01c720 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -10,14 +10,21 @@ from zarr.codecs.gzip import GzipCodec from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec -from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, numpy_buffer_prototype +from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu from zarr.store.common import StorePath +from zarr.store.memory import MemoryStore from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, TestBuffer, TestNDArrayLike, ) +from zarr.testing.utils import gpu_test + +try: + import cupy as cp +except ImportError: + cp = None def test_nd_array_like(xp): @@ -52,6 +59,31 @@ async def test_async_array_prototype(): assert np.array_equal(expect, got) +@gpu_test +@pytest.mark.asyncio +async def test_async_array_gpu_prototype(): + """Test the use of the GPU buffer prototype""" + + expect = cp.zeros((9, 9), dtype="uint16", order="F") + a = await AsyncArray.create( + StorePath(MemoryStore(mode="w")) / "test_async_array_gpu_prototype", + shape=expect.shape, + chunk_shape=(5, 5), + dtype=expect.dtype, + fill_value=0, + ) + expect[1:4, 3:6] = cp.ones((3, 3)) + + await a.setitem( + selection=(slice(1, 4), slice(3, 6)), + value=cp.ones((3, 3)), + prototype=gpu.buffer_prototype, + ) + got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=gpu.buffer_prototype) + assert isinstance(got, cp.ndarray) + assert cp.array_equal(expect, got) + + @pytest.mark.asyncio async def test_codecs_use_of_prototype(): expect = np.zeros((10, 10), dtype="uint16", order="F") @@ -84,8 +116,39 @@ async def test_codecs_use_of_prototype(): assert np.array_equal(expect, got) +@gpu_test +@pytest.mark.asyncio +async def test_codecs_use_of_gpu_prototype(): + expect = cp.zeros((10, 10), dtype="uint16", order="F") + a = await AsyncArray.create( + StorePath(MemoryStore(mode="w")) / "test_codecs_use_of_gpu_prototype", + shape=expect.shape, + chunk_shape=(5, 5), + dtype=expect.dtype, + fill_value=0, + codecs=[ + TransposeCodec(order=(1, 0)), + BytesCodec(), + BloscCodec(), + Crc32cCodec(), + GzipCodec(), + ZstdCodec(), + ], + ) + expect[:] = cp.arange(100).reshape(10, 10) + + await a.setitem( + selection=(slice(0, 10), slice(0, 10)), + value=expect[:], + prototype=gpu.buffer_prototype, + ) + got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=gpu.buffer_prototype) + assert isinstance(got, cp.ndarray) + assert cp.array_equal(expect, got) + + def test_numpy_buffer_prototype(): - buffer = numpy_buffer_prototype().buffer.create_zero_length() - ndbuffer = numpy_buffer_prototype().nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64")) + buffer = cpu.buffer_prototype.buffer.create_zero_length() + ndbuffer = cpu.buffer_prototype.nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64")) assert isinstance(buffer.as_array_like(), np.ndarray) assert isinstance(ndbuffer.as_ndarray_like(), np.ndarray) diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py index 881833797c..b3e04e51da 100644 --- a/tests/v3/test_config.py +++ b/tests/v3/test_config.py @@ -46,8 +46,8 @@ def test_config_defaults_set() -> None: "path": "zarr.codecs.pipeline.BatchedCodecPipeline", "batch_size": 1, }, - "buffer": "zarr.core.buffer.Buffer", - "ndbuffer": "zarr.core.buffer.NDBuffer", + "buffer": "zarr.core.buffer.cpu.Buffer", + "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index eb7b1f30dd..7592c77201 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -7,7 +7,7 @@ from _pytest.compat import LEGACY_PATH from zarr import Array, AsyncArray, AsyncGroup, Group -from zarr.core.buffer import Buffer +from zarr.core.buffer import default_buffer_prototype from zarr.core.common import ZarrFormat from zarr.core.group import GroupMetadata from zarr.core.sync import sync @@ -97,11 +97,18 @@ def test_group_members(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) # add an extra object to the domain of the group. # the list of children should ignore this object. - sync(store.set(f"{path}/extra_object-1", Buffer.from_bytes(b"000000"))) + sync( + store.set(f"{path}/extra_object-1", default_buffer_prototype().buffer.from_bytes(b"000000")) + ) # add an extra object under a directory-like prefix in the domain of the group. # this creates a directory with a random key in it # this should not show up as a member - sync(store.set(f"{path}/extra_directory/extra_object-2", Buffer.from_bytes(b"000000"))) + sync( + store.set( + f"{path}/extra_directory/extra_object-2", + default_buffer_prototype().buffer.from_bytes(b"000000"), + ) + ) members_observed = group.members() # members are not guaranteed to be ordered, so sort before comparing assert sorted(dict(members_observed)) == sorted(members_expected) diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index 7895151b5f..5805de3035 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -12,7 +12,7 @@ import zarr from zarr.abc.store import Store -from zarr.core.buffer import BufferPrototype, NDBuffer +from zarr.core.buffer import BufferPrototype, default_buffer_prototype from zarr.core.common import ChunkCoords from zarr.core.indexing import ( make_slice_selection, @@ -136,7 +136,7 @@ def test_get_basic_selection_0d(store: StorePath, use_out: bool, value: Any, dty if use_out: # test out param - b = NDBuffer.from_numpy_array(np.zeros_like(arr_np)) + b = default_buffer_prototype().nd_buffer.from_numpy_array(np.zeros_like(arr_np)) arr_z.get_basic_selection(Ellipsis, out=b) assert_array_equal(arr_np, b.as_ndarray_like()) @@ -247,7 +247,9 @@ def _test_get_basic_selection(a, z, selection): assert_array_equal(expect, actual) # test out param - b = NDBuffer.from_numpy_array(np.empty(shape=expect.shape, dtype=expect.dtype)) + b = default_buffer_prototype().nd_buffer.from_numpy_array( + np.empty(shape=expect.shape, dtype=expect.dtype) + ) z.get_basic_selection(selection, out=b) assert_array_equal(expect, b.as_numpy_array()) diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index 8afa4fef3a..59cae22de3 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -2,16 +2,17 @@ import pytest -from zarr.core.buffer import Buffer +from zarr.core.buffer import Buffer, cpu from zarr.store.local import LocalStore from zarr.testing.store import StoreTests -class TestLocalStore(StoreTests[LocalStore]): +class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]): store_cls = LocalStore + buffer_cls = cpu.Buffer def get(self, store: LocalStore, key: str) -> Buffer: - return Buffer.from_bytes((store.root / key).read_bytes()) + return self.buffer_cls.from_bytes((store.root / key).read_bytes()) def set(self, store: LocalStore, key: str, value: Buffer) -> None: parent = (store.root / key).parent diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 51ecf46709..f76423c631 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -2,13 +2,15 @@ import pytest -from zarr.core.buffer import Buffer -from zarr.store.memory import MemoryStore +from zarr.core.buffer import Buffer, cpu, gpu +from zarr.store.memory import GpuMemoryStore, MemoryStore from zarr.testing.store import StoreTests +from zarr.testing.utils import gpu_test -class TestMemoryStore(StoreTests[MemoryStore]): +class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): store_cls = MemoryStore + buffer_cls = cpu.Buffer def set(self, store: MemoryStore, key: str, value: Buffer) -> None: store._store_dict[key] = value @@ -40,3 +42,38 @@ def test_store_supports_partial_writes(self, store: MemoryStore) -> None: def test_list_prefix(self, store: MemoryStore) -> None: assert True + + +@gpu_test +class TestGpuMemoryStore(StoreTests[GpuMemoryStore, gpu.Buffer]): + store_cls = GpuMemoryStore + buffer_cls = gpu.Buffer + + def set(self, store: GpuMemoryStore, key: str, value: Buffer) -> None: + store._store_dict[key] = value + + def get(self, store: MemoryStore, key: str) -> Buffer: + return store._store_dict[key] + + @pytest.fixture(scope="function", params=[None, {}]) + def store_kwargs(self, request) -> dict[str, str | None | dict[str, Buffer]]: + return {"store_dict": request.param, "mode": "r+"} + + @pytest.fixture(scope="function") + def store(self, store_kwargs: str | None | dict[str, gpu.Buffer]) -> GpuMemoryStore: + return self.store_cls(**store_kwargs) + + def test_store_repr(self, store: GpuMemoryStore) -> None: + assert str(store) == f"gpumemory://{id(store._store_dict)}" + + def test_store_supports_writes(self, store: GpuMemoryStore) -> None: + assert store.supports_writes + + def test_store_supports_listing(self, store: GpuMemoryStore) -> None: + assert store.supports_listing + + def test_store_supports_partial_writes(self, store: GpuMemoryStore) -> None: + assert store.supports_partial_writes + + def test_list_prefix(self, store: GpuMemoryStore) -> None: + assert True diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 2eec9d29cf..b09bf24e24 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -4,7 +4,7 @@ import pytest from upath import UPath -from zarr.core.buffer import Buffer, default_buffer_prototype +from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.sync import sync from zarr.store import RemoteStore from zarr.testing.store import StoreTests @@ -88,7 +88,7 @@ async def test_basic(): assert not await alist(store.list()) assert not await store.exists("foo") data = b"hello" - await store.set("foo", Buffer.from_bytes(data)) + await store.set("foo", cpu.Buffer.from_bytes(data)) assert await store.exists("foo") assert (await store.get("foo", prototype=default_buffer_prototype())).to_bytes() == data out = await store.get_partial_values( @@ -97,8 +97,9 @@ async def test_basic(): assert out[0].to_bytes() == data[1:] -class TestRemoteStoreS3(StoreTests[RemoteStore]): +class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): store_cls = RemoteStore + buffer_cls = cpu.Buffer @pytest.fixture(scope="function", params=("use_upath", "use_str")) def store_kwargs(self, request) -> dict[str, str | bool]: @@ -131,7 +132,7 @@ def get(self, store: RemoteStore, key: str) -> Buffer: anon=store._fs.anon, endpoint_url=store._fs.endpoint_url, ) - return Buffer.from_bytes(fs.cat(f"{store.path}/{key}")) + return self.buffer_cls.from_bytes(fs.cat(f"{store.path}/{key}")) def set(self, store: RemoteStore, key: str, value: Buffer) -> None: # make a new, synchronous instance of the filesystem because this test is run in sync code diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py index 68bd11bbe8..a8f51e96e6 100644 --- a/tests/v3/test_store/test_stateful_store.py +++ b/tests/v3/test_store/test_stateful_store.py @@ -12,7 +12,7 @@ import zarr from zarr.abc.store import AccessMode, Store -from zarr.core.buffer import Buffer, BufferPrototype, default_buffer_prototype +from zarr.core.buffer import BufferPrototype, cpu, default_buffer_prototype from zarr.store import MemoryStore from zarr.testing.strategies import key_ranges, paths @@ -112,7 +112,7 @@ def __init__(self): def set(self, key: str, data: bytes) -> None: note(f"(set) Setting {key!r} with {data}") assert not self.store.mode.readonly - data_buf = Buffer.from_bytes(data) + data_buf = cpu.Buffer.from_bytes(data) self.store.set(key, data_buf) self.model[key] = data_buf