-
Notifications
You must be signed in to change notification settings - Fork 229
**Breaking**: data_kind: Now 'matrix' represents a 2-D numpy array and unrecognized data types fall back to 'vectors' #3351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
0c82b3c
808755d
0eb4f8f
9891b2c
a9d094c
3d8be4d
7c104a9
5790923
6954c5d
9300ca3
2701a4a
7fcf57f
991f688
51569c8
4a4f192
2a6e788
2b054b6
cfa32ed
ef6e6aa
423e5dc
81e57f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1790,10 +1790,7 @@ | |
"grid": self.virtualfile_from_grid, | ||
"image": tempfile_from_image, | ||
"stringio": self.virtualfile_from_stringio, | ||
# Note: virtualfile_from_matrix is not used because a matrix can be | ||
# converted to vectors instead, and using vectors allows for better | ||
# handling of string type inputs (e.g. for datetime data types) | ||
"matrix": self.virtualfile_from_vectors, | ||
"matrix": self.virtualfile_from_matrix, | ||
"vectors": self.virtualfile_from_vectors, | ||
}[kind] | ||
|
||
|
@@ -1810,29 +1807,32 @@ | |
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) | ||
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),) | ||
elif kind == "vectors": | ||
_data = [np.atleast_1d(x), np.atleast_1d(y)] | ||
if z is not None: | ||
_data.append(np.atleast_1d(z)) | ||
if extra_arrays: | ||
_data.extend(extra_arrays) | ||
elif kind == "matrix": # turn 2-D arrays into list of vectors | ||
if hasattr(data, "items") and not hasattr(data, "to_frame"): | ||
if data is None: | ||
# data is None, so data must be given via x/y/z. | ||
_data = [np.atleast_1d(x), np.atleast_1d(y)] | ||
if z is not None: | ||
_data.append(np.atleast_1d(z)) | ||
if extra_arrays: | ||
_data.extend(extra_arrays) | ||
elif hasattr(data, "items") and not hasattr(data, "to_frame"): | ||
# pandas.DataFrame or xarray.Dataset types. | ||
# pandas.Series will be handled below like a 1-D numpy.ndarray. | ||
_data = [array for _, array in data.items()] | ||
elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf": | ||
# Just use virtualfile_from_matrix for 2-D numpy.ndarray | ||
# which are signed integer (i), unsigned integer (u) or | ||
# floating point (f) types | ||
_virtualfile_from = self.virtualfile_from_matrix | ||
_data = (data,) | ||
else: | ||
# Python list, tuple, numpy.ndarray, and pandas.Series types | ||
_data = np.atleast_2d(np.asanyarray(data).T) | ||
elif kind == "matrix": | ||
# GMT can only accept a 2-D matrix which are signed integer (i), unsigned | ||
# integer (u) or floating point (f) types. For other data types, we need to | ||
# use virtualfile_from_vectors instead, which turns the matrix into list of | ||
# vectors and allows for better handling of string type inputs (e.g. for | ||
# datetime data types). | ||
_data = (data,) | ||
if data.dtype.kind not in "iuf": | ||
_virtualfile_from = self.virtualfile_from_vectors | ||
Comment on lines
+1828
to
+1829
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing test coverage for these lines? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can add a test for it. Before adding more tests, I'm wondering if we should split the big "test_clib_virtualfiles.py" file (with more than 500 lines) into separate smaller test files, i.e., one test file for each Session method.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We've split it before in #2784, so yes, ok to split it again 😆 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a test in cfa32ed to cover this line. |
||
|
||
# Finally create the virtualfile from the data, to be passed into GMT | ||
file_context = _virtualfile_from(*_data) | ||
|
||
return file_context | ||
|
||
def virtualfile_from_data( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from collections.abc import Iterable, Sequence | ||
from typing import Any, Literal | ||
|
||
import numpy as np | ||
import xarray as xr | ||
from pygmt.encodings import charset | ||
from pygmt.exceptions import GMTInvalidInput | ||
|
@@ -207,8 +208,10 @@ def data_kind( | |
- ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D | ||
- ``"image"``: a 3-D :class:`xarray.DataArray` object | ||
- ``"stringio"``: a :class:`io.StringIO` object | ||
- ``"matrix"``: anything else that is not ``None`` | ||
- ``"vectors"``: ``data`` is ``None`` and ``required=True`` | ||
- ``"matrix"``: a 2-D :class:`numpy.ndarray` object | ||
- ``"vectors"``: fallback to ``"vectors"`` for any unrecognized data. Common data | ||
types include, a :class:`pandas.DataFrame` object, a dictionary with array-like | ||
values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like objects. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -268,27 +271,27 @@ def data_kind( | |
|
||
The "matrix"`` kind: | ||
|
||
>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray | ||
'matrix' | ||
>>> data_kind(data=np.arange(10).reshape((5, 2))) # 2-D numpy.ndarray | ||
'matrix' | ||
|
||
The "vectors" kind: | ||
|
||
>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray | ||
'vectors' | ||
>>> data_kind(data=np.arange(60).reshape((3, 4, 5))) # 3-D numpy.ndarray | ||
'matrix' | ||
'vectors' | ||
>>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset()) # xarray.Dataset | ||
'matrix' | ||
'vectors' | ||
>>> data_kind(data=[1, 2, 3]) # 1-D sequence | ||
'matrix' | ||
'vectors' | ||
>>> data_kind(data=[[1, 2, 3], [4, 5, 6]]) # sequence of sequences | ||
'matrix' | ||
'vectors' | ||
>>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]}) # dictionary | ||
'matrix' | ||
'vectors' | ||
>>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})) # pd.DataFrame | ||
'matrix' | ||
'vectors' | ||
>>> data_kind(data=pd.Series([1, 2, 3], name="x")) # pd.Series | ||
'matrix' | ||
|
||
The "vectors" kind: | ||
|
||
'vectors' | ||
>>> data_kind(data=None) | ||
'vectors' | ||
""" | ||
|
@@ -312,7 +315,7 @@ def data_kind( | |
# geopandas.GeoDataFrame or shapely.geometry). | ||
# Reference: https://gist.github.com/sgillies/2217756 | ||
kind = "geojson" | ||
case x if x is not None: # Any not-None is considered as a matrix. | ||
case np.ndarray() if data.ndim == 2: # A 2-D numpy.ndarray object. | ||
kind = "matrix" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two things:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's unclear if we can pass a 3-D numpy array yet. Even if we can, it means more work, since in
Yes, I was thinking about checking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in 4a4f192, although other array-like objects are not tested. |
||
case _: # Fall back to "vectors" if data is None and required=True. | ||
kind = "vectors" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -195,7 +195,7 @@ def x2sys_cross( | |
match data_kind(track): | ||
case "file": | ||
file_contexts.append(contextlib.nullcontext(track)) | ||
case "matrix": | ||
case "vectors": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pandas.DataFrame now is "vectors" kind. |
||
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from | ||
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file | ||
tagfile = Path( | ||
|
Uh oh!
There was an error while loading. Please reload this page.