Skip to content

Commit a5c0aa2

Browse files
seismanweiji14
andauthored
**Breaking**: data_kind: Now 'matrix' represents a 2-D numpy array and unrecognized data types fall back to 'vectors' (#3351)
Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com>
1 parent 8a30c8e commit a5c0aa2

File tree

5 files changed

+70
-36
lines changed

5 files changed

+70
-36
lines changed

pygmt/clib/session.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,10 +1787,7 @@ def virtualfile_in( # noqa: PLR0912
17871787
"grid": self.virtualfile_from_grid,
17881788
"image": tempfile_from_image,
17891789
"stringio": self.virtualfile_from_stringio,
1790-
# Note: virtualfile_from_matrix is not used because a matrix can be
1791-
# converted to vectors instead, and using vectors allows for better
1792-
# handling of string type inputs (e.g. for datetime data types)
1793-
"matrix": self.virtualfile_from_vectors,
1790+
"matrix": self.virtualfile_from_matrix,
17941791
"vectors": self.virtualfile_from_vectors,
17951792
}[kind]
17961793

@@ -1807,29 +1804,33 @@ def virtualfile_in( # noqa: PLR0912
18071804
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
18081805
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
18091806
elif kind == "vectors":
1810-
_data = [x, y]
1811-
if z is not None:
1812-
_data.append(z)
1813-
if extra_arrays:
1814-
_data.extend(extra_arrays)
1815-
elif kind == "matrix": # turn 2-D arrays into list of vectors
1816-
if hasattr(data, "items") and not hasattr(data, "to_frame"):
1807+
if data is None:
1808+
# data is None, so data must be given via x/y/z.
1809+
_data = [x, y]
1810+
if z is not None:
1811+
_data.append(z)
1812+
if extra_arrays:
1813+
_data.extend(extra_arrays)
1814+
elif hasattr(data, "items") and not hasattr(data, "to_frame"):
18171815
# pandas.DataFrame or xarray.Dataset types.
18181816
# pandas.Series will be handled below like a 1-D numpy.ndarray.
18191817
_data = [array for _, array in data.items()]
1820-
elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf":
1821-
# Just use virtualfile_from_matrix for 2-D numpy.ndarray
1822-
# which are signed integer (i), unsigned integer (u) or
1823-
# floating point (f) types
1824-
_virtualfile_from = self.virtualfile_from_matrix
1825-
_data = (data,)
18261818
else:
18271819
# Python list, tuple, numpy.ndarray, and pandas.Series types
18281820
_data = np.atleast_2d(np.asanyarray(data).T)
1821+
elif kind == "matrix":
1822+
# GMT can only accept a 2-D matrix which are signed integer (i), unsigned
1823+
# integer (u) or floating point (f) types. For other data types, we need to
1824+
# use virtualfile_from_vectors instead, which turns the matrix into a list
1825+
# of vectors and allows for better handling of non-integer/float type inputs
1826+
# (e.g. for string or datetime data types).
1827+
_data = (data,)
1828+
if data.dtype.kind not in "iuf":
1829+
_virtualfile_from = self.virtualfile_from_vectors
1830+
_data = data.T
18291831

18301832
# Finally create the virtualfile from the data, to be passed into GMT
18311833
file_context = _virtualfile_from(*_data)
1832-
18331834
return file_context
18341835

18351836
def virtualfile_from_data(

pygmt/helpers/utils.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,12 @@ def data_kind(
207207
- ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D
208208
- ``"image"``: a 3-D :class:`xarray.DataArray` object
209209
- ``"stringio"``: a :class:`io.StringIO` object
210-
- ``"matrix"``: anything else that is not ``None``
211-
- ``"vectors"``: ``data`` is ``None`` and ``required=True``
210+
- ``"matrix"``: a 2-D array-like object that implements ``__array_interface__``
211+
(e.g., :class:`numpy.ndarray`)
212+
- ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized
213+
data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary
214+
with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like
215+
objects.
212216
213217
Parameters
214218
----------
@@ -268,27 +272,27 @@ def data_kind(
268272
269273
The "matrix"`` kind:
270274
271-
>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray
272-
'matrix'
273275
>>> data_kind(data=np.arange(10).reshape((5, 2))) # 2-D numpy.ndarray
274276
'matrix'
277+
278+
The "vectors" kind:
279+
280+
>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray
281+
'vectors'
275282
>>> data_kind(data=np.arange(60).reshape((3, 4, 5))) # 3-D numpy.ndarray
276-
'matrix'
283+
'vectors'
277284
>>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset()) # xarray.Dataset
278-
'matrix'
285+
'vectors'
279286
>>> data_kind(data=[1, 2, 3]) # 1-D sequence
280-
'matrix'
287+
'vectors'
281288
>>> data_kind(data=[[1, 2, 3], [4, 5, 6]]) # sequence of sequences
282-
'matrix'
289+
'vectors'
283290
>>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]}) # dictionary
284-
'matrix'
291+
'vectors'
285292
>>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})) # pd.DataFrame
286-
'matrix'
293+
'vectors'
287294
>>> data_kind(data=pd.Series([1, 2, 3], name="x")) # pd.Series
288-
'matrix'
289-
290-
The "vectors" kind:
291-
295+
'vectors'
292296
>>> data_kind(data=None)
293297
'vectors'
294298
"""
@@ -312,7 +316,10 @@ def data_kind(
312316
# geopandas.GeoDataFrame or shapely.geometry).
313317
# Reference: https://gist.github.com/sgillies/2217756
314318
kind = "geojson"
315-
case x if x is not None: # Any not-None is considered as a matrix.
319+
case x if hasattr(x, "__array_interface__") and data.ndim == 2:
320+
# 2-D Array-like objects that implements ``__array_interface__`` (e.g.,
321+
# numpy.ndarray).
322+
# Reference: https://numpy.org/doc/stable/reference/arrays.interface.html
316323
kind = "matrix"
317324
case _: # Fall back to "vectors" if data is None and required=True.
318325
kind = "vectors"

pygmt/src/legend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def legend(
9191
kwargs["F"] = box
9292

9393
kind = data_kind(spec)
94-
if kind not in {"vectors", "file", "stringio"}: # kind="vectors" means spec is None
94+
if spec is not None and kind not in {"file", "stringio"}:
9595
raise GMTInvalidInput(f"Unrecognized data type: {type(spec)}")
9696
if kind == "file" and is_nonstr_iter(spec):
9797
raise GMTInvalidInput("Only one legend specification file is allowed.")

pygmt/src/x2sys_cross.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def x2sys_cross(
195195
match data_kind(track):
196196
case "file":
197197
file_contexts.append(contextlib.nullcontext(track))
198-
case "matrix":
198+
case "vectors":
199199
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from
200200
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file
201201
tagfile = Path(

pygmt/tests/test_clib_virtualfile_in.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
import pandas as pd
1010
import pytest
1111
import xarray as xr
12+
from packaging.version import Version
1213
from pygmt import clib
14+
from pygmt.clib import __gmt_version__
1315
from pygmt.exceptions import GMTInvalidInput
14-
from pygmt.helpers import GMTTempFile
16+
from pygmt.helpers import GMTTempFile, data_kind
1517

1618
POINTS_DATA = Path(__file__).parent / "data" / "points.txt"
1719

@@ -101,3 +103,27 @@ def test_virtualfile_in_fail_non_valid_data(data):
101103
z=data[:, 2],
102104
data=data,
103105
)
106+
107+
108+
@pytest.mark.xfail(
109+
condition=Version(__gmt_version__) <= Version("6.5.0"),
110+
reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8600",
111+
)
112+
def test_virtualfile_in_matrix_string_dtype():
113+
"""
114+
Pass a string dtype matrix should work and the matrix should be passed via a series
115+
of vectors.
116+
"""
117+
data = np.array([["11:30W", "30:30S"], ["12:30W", "30:00S"]])
118+
assert data_kind(data) == "matrix" # data is recognized as "matrix" kind
119+
assert data.dtype.type == np.str_
120+
assert data.dtype.kind not in "iuf" # dtype is not in numeric dtypes
121+
122+
with clib.Session() as lib:
123+
with lib.virtualfile_in(data=data) as vintbl:
124+
with GMTTempFile() as outfile:
125+
lib.call_module("info", [vintbl, "-C", f"->{outfile.name}"])
126+
output = outfile.read(keep_tabs=False)
127+
assert output == "347.5 348.5 -30.5 -30\n"
128+
# Should check that lib.virtualfile_from_vectors is called once,
129+
# not lib.virtualfile_from_matrix, but it's technically complicated.

0 commit comments

Comments
 (0)