Skip to content

Commit d982275

Browse files
weiji14seisman
andauthored
pyarrow: Check compatibility of pyarrow.array with string type (#2933)
Co-authored-by: Dongdong Tian <[email protected]>
1 parent c07f1b6 commit d982275

File tree

7 files changed

+66
-17
lines changed

7 files changed

+66
-17
lines changed

doc/conf.py

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
"contextily": ("https://contextily.readthedocs.io/en/stable/", None),
8686
"geopandas": ("https://geopandas.org/en/stable/", None),
8787
"numpy": ("https://numpy.org/doc/stable/", None),
88+
"pyarrow": ("https://arrow.apache.org/docs/", None),
8889
"python": ("https://docs.python.org/3/", None),
8990
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
9091
"rasterio": ("https://rasterio.readthedocs.io/en/stable/", None),

doc/ecosystem.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ Python objects. They are based on the C++ implementation of Arrow.
9494
```{note}
9595
If you have [PyArrow][] installed, PyGMT does have some initial support for
9696
`pandas.Series` and `pandas.DataFrame` objects with Apache Arrow-backed arrays.
97-
Specifically, only uint/int/float and date32/date64 are supported for now.
98-
Support for string Array dtypes, Duration types and GeoArrow geometry types is still a work in progress.
99-
For more details, see
97+
Specifically, only uint/int/float, date32/date64 and string types are supported for now.
98+
Support for Duration types and GeoArrow geometry types is still a work in progress. For
99+
more details, see
100100
[issue #2800](https://github.com/GenericMappingTools/pygmt/issues/2800).
101101
```
102102

pygmt/_typing.py

+10
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
Type aliases for type hints.
33
"""
44

5+
import contextlib
6+
import importlib
7+
from collections.abc import Sequence
58
from typing import Literal
69

10+
import numpy as np
11+
712
# Anchor codes
813
AnchorCode = Literal["TL", "TC", "TR", "ML", "MC", "MR", "BL", "BC", "BR"]
14+
15+
# String array types
16+
StringArrayTypes = Sequence[str] | np.ndarray
17+
with contextlib.suppress(ImportError):
18+
StringArrayTypes |= importlib.import_module(name="pyarrow").StringArray

pygmt/clib/conversion.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,13 @@ def sequence_to_ctypes_array(
280280

281281
def strings_to_ctypes_array(strings: Sequence[str] | np.ndarray) -> ctp.Array:
282282
"""
283-
Convert a sequence (e.g., a list) of strings into a ctypes array.
283+
Convert a sequence (e.g., a list) of strings or numpy.ndarray of strings into a
284+
ctypes array.
284285
285286
Parameters
286287
----------
287288
strings
288-
A sequence of strings.
289+
A sequence of strings, or a numpy.ndarray of str dtype.
289290
290291
Returns
291292
-------

pygmt/src/text.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Sequence
66

77
import numpy as np
8-
from pygmt._typing import AnchorCode
8+
from pygmt._typing import AnchorCode, StringArrayTypes
99
from pygmt.clib import Session
1010
from pygmt.exceptions import GMTInvalidInput
1111
from pygmt.helpers import (
@@ -48,7 +48,7 @@ def text_( # noqa: PLR0912
4848
x=None,
4949
y=None,
5050
position: AnchorCode | None = None,
51-
text=None,
51+
text: str | StringArrayTypes | None = None,
5252
angle=None,
5353
font=None,
5454
justify: bool | None | AnchorCode | Sequence[AnchorCode] = None,
@@ -104,7 +104,7 @@ def text_( # noqa: PLR0912
104104
105105
For example, ``position="TL"`` plots the text at the Top Left corner
106106
of the map.
107-
text : str or 1-D array
107+
text
108108
The text string, or an array of strings to plot on the figure.
109109
angle: float, str, bool or list
110110
Set the angle measured in degrees counter-clockwise from

pygmt/tests/test_clib_virtualfile_from_vectors.py

+27-6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
from pygmt.clib.session import DTYPES_NUMERIC
1212
from pygmt.exceptions import GMTInvalidInput
1313
from pygmt.helpers import GMTTempFile
14+
from pygmt.helpers.testing import skip_if_no
15+
16+
try:
17+
import pyarrow as pa
18+
19+
pa_array = pa.array
20+
except ImportError:
21+
pa_array = None
1422

1523

1624
@pytest.fixture(scope="module", name="dtypes")
@@ -53,17 +61,30 @@ def test_virtualfile_from_vectors(dtypes):
5361

5462

5563
@pytest.mark.benchmark
56-
@pytest.mark.parametrize("dtype", [str, object])
57-
def test_virtualfile_from_vectors_one_string_or_object_column(dtype):
58-
"""
59-
Test passing in one column with string or object dtype into virtual file dataset.
64+
@pytest.mark.parametrize(
65+
("array_func", "dtype"),
66+
[
67+
pytest.param(np.array, {"dtype": np.str_}, id="str"),
68+
pytest.param(np.array, {"dtype": np.object_}, id="object"),
69+
pytest.param(
70+
pa_array,
71+
{}, # {"type": pa.string()}
72+
marks=skip_if_no(package="pyarrow"),
73+
id="pyarrow",
74+
),
75+
],
76+
)
77+
def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype):
78+
"""
79+
Test passing in one column with string (numpy/pyarrow) or object (numpy)
80+
dtype into virtual file dataset.
6081
"""
6182
size = 5
6283
x = np.arange(size, dtype=np.int32)
6384
y = np.arange(size, size * 2, 1, dtype=np.int32)
64-
strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype)
85+
strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype)
6586
with clib.Session() as lib:
66-
with lib.virtualfile_from_vectors((x, y, strings)) as vfile:
87+
with lib.virtualfile_from_vectors(vectors=(x, y, strings)) as vfile:
6788
with GMTTempFile() as outfile:
6889
lib.call_module("convert", [vfile, f"->{outfile.name}"])
6990
output = outfile.read(keep_tabs=True)

pygmt/tests/test_text.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
from pygmt import Figure
1010
from pygmt.exceptions import GMTCLibError, GMTInvalidInput
1111
from pygmt.helpers import GMTTempFile
12+
from pygmt.helpers.testing import skip_if_no
13+
14+
try:
15+
import pyarrow as pa
16+
17+
pa_array = pa.array
18+
except ImportError:
19+
pa_array = None
1220

1321
TEST_DATA_DIR = Path(__file__).parent / "data"
1422
POINTS_DATA = TEST_DATA_DIR / "points.txt"
@@ -48,8 +56,16 @@ def test_text_single_line_of_text(region, projection):
4856

4957

5058
@pytest.mark.benchmark
51-
@pytest.mark.mpl_image_compare
52-
def test_text_multiple_lines_of_text(region, projection):
59+
@pytest.mark.mpl_image_compare(filename="test_text_multiple_lines_of_text.png")
60+
@pytest.mark.parametrize(
61+
"array_func",
62+
[
63+
list,
64+
pytest.param(np.array, id="numpy"),
65+
pytest.param(pa_array, marks=skip_if_no(package="pyarrow"), id="pyarrow"),
66+
],
67+
)
68+
def test_text_multiple_lines_of_text(region, projection, array_func):
5369
"""
5470
Place multiple lines of text at their respective x, y locations.
5571
"""
@@ -59,7 +75,7 @@ def test_text_multiple_lines_of_text(region, projection):
5975
projection=projection,
6076
x=[1.2, 1.6],
6177
y=[0.6, 0.3],
62-
text=["This is a line of text", "This is another line of text"],
78+
text=array_func(["This is a line of text", "This is another line of text"]),
6379
)
6480
return fig
6581

0 commit comments

Comments
 (0)