From 0c82b3cad9c3afffd4f389653eff67f797a9f12c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 3 Oct 2024 14:42:07 +0800 Subject: [PATCH 01/12] data_kind: Refactor the if-else statements into if-return statements --- pygmt/helpers/utils.py | 67 ++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 24a938df518..59928482091 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -187,7 +187,7 @@ def _check_encoding( return "ISOLatin1+" -def data_kind( +def data_kind( # noqa: PLR0911 data: Any = None, required: bool = True ) -> Literal[ "arg", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors" @@ -195,15 +195,18 @@ def data_kind( r""" Check the kind of data that is provided to a module. - The ``data`` argument can be in any type, but only following types are supported: + The ``data`` argument can be in any type. Following data kinds are recognized: - - a string or a :class:`pathlib.PurePath` object or a sequence of them, representing - a file name or a list of file names - - a 2-D or 3-D :class:`xarray.DataArray` object - - a 2-D matrix - - None, bool, int or float type representing an optional arguments - - a geo-like Python object that implements ``__geo_interface__`` (e.g., - geopandas.GeoDataFrame or shapely.geometry) + - ``"arg"``: data is ``None`` and ``required=False``, or bool, int, float, + representing an optional argument, used for dealing with optional virtual files + - ``"file"``: a string or a :class:`pathlib.PurePath` object or a sequence of them, + representing one or more file names + - ``"geojson"``: a geo-like Python object that implements ``__geo_interface__`` + (e.g., geopandas.GeoDataFrame or shapely.geometry) + - ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D + - ``"image"``: a 3-D :class:`xarray.DataArray` object + - ``"matrix"``: anything that is not None + - ``"vectors"``: data is ``None`` and ``required=True`` Parameters ---------- @@ -287,30 +290,36 @@ def data_kind( >>> data_kind(data=None) 'vectors' """ - kind: Literal[ - "arg", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors" - ] + # One file or a list/tuple of files. if isinstance(data, str | pathlib.PurePath) or ( isinstance(data, list | tuple) and all(isinstance(_file, str | pathlib.PurePath) for _file in data) ): - # One or more files - kind = "file" - elif isinstance(data, bool | int | float) or (data is None and not required): - kind = "arg" - elif isinstance(data, io.StringIO): - kind = "stringio" - elif isinstance(data, xr.DataArray): - kind = "image" if len(data.dims) == 3 else "grid" - elif hasattr(data, "__geo_interface__"): - # geo-like Python object that implements ``__geo_interface__`` - # (geopandas.GeoDataFrame or shapely.geometry) - kind = "geojson" - elif data is not None: - kind = "matrix" - else: - kind = "vectors" - return kind + return "file" + + # A StringIO object. + if isinstance(data, io.StringIO): + return "stringio" + + # An option argument, mainly for dealing optional virtual files. + if isinstance(data, bool | int | float) or (data is None and not required): + return "arg" + + # An xarray.DataArray object, representing a grid or an image. + if isinstance(data, xr.DataArray): + return "image" if len(data.dims) == 3 else "grid" + + # Geo-like Python object that implements ``__geo_interface__`` (e.g., + # geopandas.GeoDataFrame or shapely.geometry). + # Reference: https://gist.github.com/sgillies/2217756 + if hasattr(data, "__geo_interface__"): + return "geojson" + + # Any not-None is considered as a matrix. + if data is not None: + return "matrix" + + return "vectors" def non_ascii_to_octal( From 808755d7747dcbcd93b8a0d495b11990d10deb6f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 3 Oct 2024 17:21:14 +0800 Subject: [PATCH 02/12] data_kind: Now 'matrix' represents a 2-D numpy array and unrecognizd data types fall back to 'vectors' --- pygmt/clib/session.py | 36 ++++++++++++++++++------------------ pygmt/helpers/utils.py | 35 +++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 376d441746a..42edfe44bc8 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1790,10 +1790,7 @@ def virtualfile_in( # noqa: PLR0912 "grid": self.virtualfile_from_grid, "image": tempfile_from_image, "stringio": self.virtualfile_from_stringio, - # Note: virtualfile_from_matrix is not used because a matrix can be - # converted to vectors instead, and using vectors allows for better - # handling of string type inputs (e.g. for datetime data types) - "matrix": self.virtualfile_from_vectors, + "matrix": self.virtualfile_from_matrix, "vectors": self.virtualfile_from_vectors, }[kind] @@ -1810,29 +1807,32 @@ def virtualfile_in( # noqa: PLR0912 warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) _data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),) elif kind == "vectors": - _data = [np.atleast_1d(x), np.atleast_1d(y)] - if z is not None: - _data.append(np.atleast_1d(z)) - if extra_arrays: - _data.extend(extra_arrays) - elif kind == "matrix": # turn 2-D arrays into list of vectors - if hasattr(data, "items") and not hasattr(data, "to_frame"): + if data is None: + # data is None, so data must be given via x/y/z. + _data = [np.atleast_1d(x), np.atleast_1d(y)] + if z is not None: + _data.append(np.atleast_1d(z)) + if extra_arrays: + _data.extend(extra_arrays) + elif hasattr(data, "items") and not hasattr(data, "to_frame"): # pandas.DataFrame or xarray.Dataset types. # pandas.Series will be handled below like a 1-D numpy.ndarray. _data = [array for _, array in data.items()] - elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf": - # Just use virtualfile_from_matrix for 2-D numpy.ndarray - # which are signed integer (i), unsigned integer (u) or - # floating point (f) types - _virtualfile_from = self.virtualfile_from_matrix - _data = (data,) else: # Python list, tuple, numpy.ndarray, and pandas.Series types _data = np.atleast_2d(np.asanyarray(data).T) + elif kind == "matrix": + # GMT can only accept a 2-D matrix which are signed integer (i), unsigned + # integer (u) or floating point (f) types. For other data types, we need to + # use virtualfile_from_vectors instead, which turns the matrix into list of + # vectors and allows for better handling of string type inputs (e.g. for + # datetime data types). + _data = (data,) + if data.dtype.kind not in "iuf": + _virtualfile_from = self.virtualfile_from_vectors # Finally create the virtualfile from the data, to be passed into GMT file_context = _virtualfile_from(*_data) - return file_context def virtualfile_from_data( diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 59928482091..8b5aa12dcc2 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -14,6 +14,7 @@ from collections.abc import Iterable, Sequence from typing import Any, Literal +import numpy as np import xarray as xr from pygmt.encodings import charset from pygmt.exceptions import GMTInvalidInput @@ -205,8 +206,10 @@ def data_kind( # noqa: PLR0911 (e.g., geopandas.GeoDataFrame or shapely.geometry) - ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D - ``"image"``: a 3-D :class:`xarray.DataArray` object - - ``"matrix"``: anything that is not None - - ``"vectors"``: data is ``None`` and ``required=True`` + - ``"matrix"``: a 2-D :class:`numpy.ndarray` object + - ``"vectors"``: fallback to ``"vectors"`` for any unrecognized data. Common data + types include, a :class:`pandas.DataFrame` object, a dictionary with array-like + values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like objects. Parameters ---------- @@ -266,27 +269,27 @@ def data_kind( # noqa: PLR0911 The "matrix"`` kind: - >>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray - 'matrix' >>> data_kind(data=np.arange(10).reshape((5, 2))) # 2-D numpy.ndarray 'matrix' + + The "vectors" kind: + + >>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray + 'vectors' >>> data_kind(data=np.arange(60).reshape((3, 4, 5))) # 3-D numpy.ndarray - 'matrix' + 'vectors' >>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset()) # xarray.Dataset - 'matrix' + 'vectors' >>> data_kind(data=[1, 2, 3]) # 1-D sequence - 'matrix' + 'vectors' >>> data_kind(data=[[1, 2, 3], [4, 5, 6]]) # sequence of sequences - 'matrix' + 'vectors' >>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]}) # dictionary - 'matrix' + 'vectors' >>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})) # pd.DataFrame - 'matrix' + 'vectors' >>> data_kind(data=pd.Series([1, 2, 3], name="x")) # pd.Series - 'matrix' - - The "vectors" kind: - + 'vectors' >>> data_kind(data=None) 'vectors' """ @@ -315,8 +318,8 @@ def data_kind( # noqa: PLR0911 if hasattr(data, "__geo_interface__"): return "geojson" - # Any not-None is considered as a matrix. - if data is not None: + # A 2-D numpy.ndarray. + if isinstance(data, np.ndarray) and data.ndim == 2: return "matrix" return "vectors" From 0eb4f8fcc3b7e29d33ee4d9dad75d8edc5e5a8a8 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 3 Oct 2024 17:27:16 +0800 Subject: [PATCH 03/12] Make 'data' a required parameter --- pygmt/helpers/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 59928482091..0fe90a886b3 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -188,7 +188,7 @@ def _check_encoding( def data_kind( # noqa: PLR0911 - data: Any = None, required: bool = True + data: Any, required: bool = True ) -> Literal[ "arg", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors" ]: From 9891b2c687c0ff2f12bfe6b5c357a3f99bb97082 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 3 Oct 2024 17:37:03 +0800 Subject: [PATCH 04/12] Fix x2sys_cross as pd.DataFrame is 'vectors' kind now --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 79daf523fec..382f560f6f7 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -195,7 +195,7 @@ def x2sys_cross( match data_kind(track): case "file": file_contexts.append(contextlib.nullcontext(track)) - case "matrix": + case "vectors": # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from # $X2SYS_HOME/TAGNAME/TAGNAME.tag file tagfile = Path( From a9d094cfe0b777abc679701f361c31f9f1716590 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 3 Oct 2024 17:41:17 +0800 Subject: [PATCH 05/12] Fix legend as now 'vectors' doesn't mean data is None --- pygmt/src/legend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/legend.py b/pygmt/src/legend.py index ed34bc0d797..ddc26cbd2eb 100644 --- a/pygmt/src/legend.py +++ b/pygmt/src/legend.py @@ -91,7 +91,7 @@ def legend( kwargs["F"] = box kind = data_kind(spec) - if kind not in {"vectors", "file", "stringio"}: # kind="vectors" means spec is None + if spec is not None and kind not in {"file", "stringio"}: raise GMTInvalidInput(f"Unrecognized data type: {type(spec)}") if kind == "file" and is_nonstr_iter(spec): raise GMTInvalidInput("Only one legend specification file is allowed.") From 3d8be4d5982008a58c38cfd1aa879c9f64a524e2 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 3 Oct 2024 19:26:12 +0800 Subject: [PATCH 06/12] Add docstrings for stringio --- pygmt/helpers/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 0fe90a886b3..4c2451762ce 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -195,7 +195,7 @@ def data_kind( # noqa: PLR0911 r""" Check the kind of data that is provided to a module. - The ``data`` argument can be in any type. Following data kinds are recognized: + The ``data`` argument can be in any types. Following data kinds are recognized: - ``"arg"``: data is ``None`` and ``required=False``, or bool, int, float, representing an optional argument, used for dealing with optional virtual files @@ -205,6 +205,7 @@ def data_kind( # noqa: PLR0911 (e.g., geopandas.GeoDataFrame or shapely.geometry) - ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D - ``"image"``: a 3-D :class:`xarray.DataArray` object + - ``"stringio"``: a :class:`io.StringIO` object - ``"matrix"``: anything that is not None - ``"vectors"``: data is ``None`` and ``required=True`` From 6954c5d47f81852a72eaeb75913500bfd96f14c9 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 7 Oct 2024 13:43:59 +0800 Subject: [PATCH 07/12] Fix docstrings --- pygmt/helpers/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index c505c45be81..2885ca3878a 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -209,9 +209,10 @@ def data_kind( - ``"image"``: a 3-D :class:`xarray.DataArray` object - ``"stringio"``: a :class:`io.StringIO` object - ``"matrix"``: a 2-D :class:`numpy.ndarray` object - - ``"vectors"``: fallback to ``"vectors"`` for any unrecognized data. Common data - types include, a :class:`pandas.DataFrame` object, a dictionary with array-like - values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like objects. + - ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized + data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary + with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like + objects. Parameters ---------- From 51569c820e8e44ac711b3d939119bf1c157ad454 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 07:03:52 +0800 Subject: [PATCH 08/12] Update pygmt/clib/session.py Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/clib/session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 5eed8053f6c..8c72eee596c 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1815,9 +1815,9 @@ def virtualfile_in( # noqa: PLR0912 elif kind == "matrix": # GMT can only accept a 2-D matrix which are signed integer (i), unsigned # integer (u) or floating point (f) types. For other data types, we need to - # use virtualfile_from_vectors instead, which turns the matrix into list of - # vectors and allows for better handling of string type inputs (e.g. for - # datetime data types). + # use virtualfile_from_vectors instead, which turns the matrix into a list + # of vectors and allows for better handling of non-integer/float type inputs + # (e.g. for string or datetime data types). _data = (data,) if data.dtype.kind not in "iuf": _virtualfile_from = self.virtualfile_from_vectors From 4a4f192d71f2754306a80858b6789c93d2fefd08 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 15:34:19 +0800 Subject: [PATCH 09/12] 2-D array-like that implements '__array_interface__' is matrix --- pygmt/helpers/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 792961ea265..07c965ae277 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -14,7 +14,6 @@ from collections.abc import Iterable, Mapping, Sequence from typing import Any, Literal -import numpy as np import xarray as xr from pygmt.encodings import charset from pygmt.exceptions import GMTInvalidInput @@ -208,7 +207,8 @@ def data_kind( - ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D - ``"image"``: a 3-D :class:`xarray.DataArray` object - ``"stringio"``: a :class:`io.StringIO` object - - ``"matrix"``: a 2-D :class:`numpy.ndarray` object + - ``"matrix"``: a 2-D array-like object that implements ``__array_interface__`` + (e.g., :class:`numpy.ndarray`) - ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like @@ -316,7 +316,10 @@ def data_kind( # geopandas.GeoDataFrame or shapely.geometry). # Reference: https://gist.github.com/sgillies/2217756 kind = "geojson" - case np.ndarray() if data.ndim == 2: # A 2-D numpy.ndarray object. + case x if hasattr(x, "__array_interface__") and data.ndim == 2: + # 2-D Array-like objects that implements ``__array_interface__`` (e.g., + # numpy.ndarray). + # Reference: https://numpy.org/doc/stable/reference/arrays.interface.html kind = "matrix" case _: # Fall back to "vectors" if data is None and required=True. kind = "vectors" From cfa32ed8e5558ac3215366949b504e83ad2fa4ca Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 15 Oct 2024 14:02:16 +0800 Subject: [PATCH 10/12] Add a test for passing string dtype matrix --- pygmt/tests/test_clib_virtualfile_in.py | 28 ++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_virtualfile_in.py b/pygmt/tests/test_clib_virtualfile_in.py index 95adcaff8ae..d854d556050 100644 --- a/pygmt/tests/test_clib_virtualfile_in.py +++ b/pygmt/tests/test_clib_virtualfile_in.py @@ -9,9 +9,11 @@ import pandas as pd import pytest import xarray as xr +from packaging.version import Version from pygmt import clib +from pygmt.clib import __gmt_version__ from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import GMTTempFile +from pygmt.helpers import GMTTempFile, data_kind POINTS_DATA = Path(__file__).parent / "data" / "points.txt" @@ -101,3 +103,27 @@ def test_virtualfile_in_fail_non_valid_data(data): z=data[:, 2], data=data, ) + + +@pytest.mark.xfail( + condition=Version(__gmt_version__) <= Version("6.5.0"), + reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8600", +) +def test_virtualfile_in_matrix_string_dtype(): + """ + Pass a string dtype matrix should work and the matrix should be passed via a series + of vectors. + """ + data = np.array([["11:30W", "30:30S"], ["12:30W", "30:00S"]]) + assert data_kind(data) == "matrix" # data is recognized as "matrix" kind + assert data.dtype.type == np.str_ + assert data.dtype.kind not in "iuf" # dtype is not in numeric dtypes + + with clib.Session() as lib: + with lib.virtualfile_in(data=data) as vintbl: + with GMTTempFile() as outfile: + lib.call_module("info", [vintbl, "-C", f"->{outfile.name}"]) + output = outfile.read(keep_tabs=False) + assert output == "347.5 348.5 -30.5 -30\n" + # Should check that lib.virtualfile_from_vectors is called once, + # not lib.virtualfile_from_matrix, but it's technically complicated. From ef6e6aa424454c43f863f573d61078d23657b443 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 15 Oct 2024 14:31:34 +0800 Subject: [PATCH 11/12] Fix a bug when passing a 2-D matrix to virtualfile_from_vectors --- pygmt/clib/session.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 892428be5b5..008d02b5402 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1831,6 +1831,7 @@ def virtualfile_in( # noqa: PLR0912 _data = (data,) if data.dtype.kind not in "iuf": _virtualfile_from = self.virtualfile_from_vectors + _data = data # Finally create the virtualfile from the data, to be passed into GMT file_context = _virtualfile_from(*_data) From 423e5dc1e653770d2a17730e284332a4c2189bfb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 15 Oct 2024 16:34:05 +0800 Subject: [PATCH 12/12] Should transpose the 2-D matrix before passing to virtualfile_from_vectors --- pygmt/clib/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 008d02b5402..98447c47c7a 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1831,7 +1831,7 @@ def virtualfile_in( # noqa: PLR0912 _data = (data,) if data.dtype.kind not in "iuf": _virtualfile_from = self.virtualfile_from_vectors - _data = data + _data = data.T # Finally create the virtualfile from the data, to be passed into GMT file_context = _virtualfile_from(*_data)