From 95fab98a5af7469f39d69c4d5b558d63d91b2ce2 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 20 Apr 2024 10:50:39 +0800 Subject: [PATCH 01/32] pygmt.x2sys_cross: Add 'output_type' parameter for output in pandas/numpy/file formats --- pygmt/src/x2sys_cross.py | 71 ++++++++++++++------------------- pygmt/tests/test_x2sys_cross.py | 14 ++++--- 2 files changed, 39 insertions(+), 46 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index eadd20dcfb2..9fec5fc4982 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -5,19 +5,19 @@ import contextlib import os from pathlib import Path +from typing import Literal import pandas as pd -from packaging.version import Version from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_list, data_kind, fmt_docstring, kwargs_to_strings, unique_name, use_alias, + validate_output_table_type, ) @@ -71,7 +71,12 @@ def tempfile_from_dftrack(track, suffix): Z="trackvalues", ) @kwargs_to_strings(R="sequence") -def x2sys_cross(tracks=None, outfile=None, **kwargs): +def x2sys_cross( + tracks=None, + output_type: Literal["pandas", "numpy", "file"] = "pandas", + outfile: str | None = None, + **kwargs, +): r""" Calculate crossovers between track data files. @@ -102,11 +107,8 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs): set it will default to $GMT_SHAREDIR/x2sys]. (**Note**: MGD77 files will also be looked for via $MGD77_HOME/mgd77_paths.txt and .gmt files will be searched for via $GMT_SHAREDIR/mgg/gmtfile_paths). - - outfile : str - Optional. The file name for the output ASCII txt file to store the - table in. - + {output_type} + {outfile} tag : str Specify the x2sys TAG which identifies the attributes of this data type. @@ -183,15 +185,16 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs): Returns ------- - crossover_errors : :class:`pandas.DataFrame` or None - Table containing crossover error information. - Return type depends on whether the ``outfile`` parameter is set: - - - :class:`pandas.DataFrame` with (x, y, ..., etc) if ``outfile`` is not - set - - None if ``outfile`` is set (track output will be stored in the set in - ``outfile``) + crossover_errors + Table containing crossover error information. Return type depends on ``outfile`` + and ``output_type``: + + - None if ``outfile`` is set (output will be stored in file set by ``outfile``) + - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set + (depends on ``output_type``) """ + output_type = validate_output_table_type(output_type, outfile=outfile) + with Session() as lib: file_contexts = [] for track in tracks: @@ -216,35 +219,21 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs): else: raise GMTInvalidInput(f"Unrecognized data type: {type(track)}") - with GMTTempFile(suffix=".txt") as tmpfile: + with lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl: with contextlib.ExitStack() as stack: fnames = [stack.enter_context(c) for c in file_contexts] - if outfile is None: - outfile = tmpfile.name lib.call_module( module="x2sys_cross", - args=build_arg_list(kwargs, infile=fnames, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if outfile isn't set, return pd.DataFrame - # Read the tab-separated ASCII table - date_format_kwarg = ( - {"date_format": "ISO8601"} - if Version(pd.__version__) >= Version("2.0.0") - else {} + args=build_arg_list(kwargs, infile=fnames, outfile=vouttbl), ) - table = pd.read_csv( - tmpfile.name, - sep="\t", - header=2, # Column names are on 2nd row - comment=">", # Skip the 3rd row with a ">" - parse_dates=[2, 3], # Datetimes on 3rd and 4th column - **date_format_kwarg, # Parse dates in ISO8601 format on pandas>=2 + result = lib.virtualfile_to_dataset( + vfname=vouttbl, output_type=output_type, header=2 ) - # Remove the "# " from "# x" in the first column - table = table.rename(columns={table.columns[0]: table.columns[0][2:]}) - elif outfile != tmpfile.name: # if outfile is set, output in outfile only - table = None - return table + # Convert 3rd and 4th columns to datetimes. + # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". + # "t_1"/"t_2" means they are datetimes and should be converted. + # "i_1"/"i_2" means they are dummy times (i.e., floating-point values). + if output_type == "pandas" and result.columns[2] == "t_1": + result.iloc[:, 2:4] = result.iloc[:, 2:4].apply(pd.to_datetime) + return result diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index c9209bd254a..3c2a8509edf 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -49,7 +49,11 @@ def test_x2sys_cross_input_file_output_file(): x2sys_init(tag=tag, fmtfile="xyz", force=True) outfile = tmpdir_p / "tmp_coe.txt" output = x2sys_cross( - tracks=["@tut_ship.xyz"], tag=tag, coe="i", outfile=outfile + tracks=["@tut_ship.xyz"], + tag=tag, + coe="i", + outfile=outfile, + output_type="file", ) assert output is None # check that output is None since outfile is set @@ -97,8 +101,8 @@ def test_x2sys_cross_input_dataframe_output_dataframe(tracks): columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] - assert output.dtypes["i_1"].type == np.object_ - assert output.dtypes["i_2"].type == np.object_ + assert output.dtypes["i_1"].type == np.float64 + assert output.dtypes["i_2"].type == np.float64 @pytest.mark.usefixtures("mock_x2sys_home") @@ -158,8 +162,8 @@ def test_x2sys_cross_input_dataframe_with_nan(tracks): columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] - assert output.dtypes["i_1"].type == np.object_ - assert output.dtypes["i_2"].type == np.object_ + assert output.dtypes["i_1"].type == np.float64 + assert output.dtypes["i_2"].type == np.float64 @pytest.mark.usefixtures("mock_x2sys_home") From ce926a0222bcff496fae8b0b98de814f5e3388cc Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 20 Apr 2024 10:51:55 +0800 Subject: [PATCH 02/32] Move session-unrelated code block outside the session block --- pygmt/src/x2sys_cross.py | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 9fec5fc4982..e481e06d36d 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -195,30 +195,30 @@ def x2sys_cross( """ output_type = validate_output_table_type(output_type, outfile=outfile) - with Session() as lib: - file_contexts = [] - for track in tracks: - kind = data_kind(track) - if kind == "file": - file_contexts.append(contextlib.nullcontext(track)) - elif kind == "matrix": - # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from - # $X2SYS_HOME/TAGNAME/TAGNAME.tag file - lastline = ( - Path(os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag") - .read_text(encoding="utf8") - .strip() - .split("\n")[-1] - ) # e.g. "-Dxyz -Etsv -I1/1" - for item in sorted(lastline.split()): # sort list alphabetically - if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz - suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice) - - # Save pandas.DataFrame track data to temporary file - file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix)) - else: - raise GMTInvalidInput(f"Unrecognized data type: {type(track)}") + file_contexts = [] + for track in tracks: + kind = data_kind(track) + if kind == "file": + file_contexts.append(contextlib.nullcontext(track)) + elif kind == "matrix": + # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from + # $X2SYS_HOME/TAGNAME/TAGNAME.tag file + lastline = ( + Path(os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag") + .read_text(encoding="utf8") + .strip() + .split("\n")[-1] + ) # e.g. "-Dxyz -Etsv -I1/1" + for item in sorted(lastline.split()): # sort list alphabetically + if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz + suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice) + + # Save pandas.DataFrame track data to temporary file + file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix)) + else: + raise GMTInvalidInput(f"Unrecognized data type: {type(track)}") + with Session() as lib: with lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl: with contextlib.ExitStack() as stack: fnames = [stack.enter_context(c) for c in file_contexts] From 86278cb369b4a8c493d8f24ae341a24dd3f3b8e7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 20 Apr 2024 10:54:25 +0800 Subject: [PATCH 03/32] Refactor if-else using match statements --- pygmt/src/x2sys_cross.py | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index e481e06d36d..85ecbeab57d 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -197,26 +197,25 @@ def x2sys_cross( file_contexts = [] for track in tracks: - kind = data_kind(track) - if kind == "file": - file_contexts.append(contextlib.nullcontext(track)) - elif kind == "matrix": - # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from - # $X2SYS_HOME/TAGNAME/TAGNAME.tag file - lastline = ( - Path(os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag") - .read_text(encoding="utf8") - .strip() - .split("\n")[-1] - ) # e.g. "-Dxyz -Etsv -I1/1" - for item in sorted(lastline.split()): # sort list alphabetically - if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz - suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice) - - # Save pandas.DataFrame track data to temporary file - file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix)) - else: - raise GMTInvalidInput(f"Unrecognized data type: {type(track)}") + match data_kind(track): + case "file": + file_contexts.append(contextlib.nullcontext(track)) + case "matrix": + # find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from + # $X2SYS_HOME/TAGNAME/TAGNAME.tag file + tagfile = Path( + os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag" + ) + # Last line is like "-Dxyz -Etsv -I1/1" + lastline = tagfile.read_text().splitlines()[-1] + for item in sorted(lastline.split()): # sort list alphabetically + if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz + suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice) + + # Save pandas.DataFrame track data to temporary file + file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix)) + case _: + raise GMTInvalidInput(f"Unrecognized data type: {type(track)}") with Session() as lib: with lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl: From 58c6ea48b62a4aafdc0b8e11abc46977358f6483 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 20 Apr 2024 10:54:57 +0800 Subject: [PATCH 04/32] Fix static typing issue --- pygmt/src/x2sys_cross.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 85ecbeab57d..ed15c6c8960 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -5,7 +5,7 @@ import contextlib import os from pathlib import Path -from typing import Literal +from typing import Any, Literal import pandas as pd from pygmt.clib import Session @@ -195,7 +195,7 @@ def x2sys_cross( """ output_type = validate_output_table_type(output_type, outfile=outfile) - file_contexts = [] + file_contexts: list[contextlib.AbstractContextManager[Any]] = [] for track in tracks: match data_kind(track): case "file": From d6eeade003f763644a3f35371620d9ad135ea1a9 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 20 Apr 2024 11:29:00 +0800 Subject: [PATCH 05/32] Fix warnings --- pygmt/src/x2sys_cross.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index ed15c6c8960..6814a5dda55 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -234,5 +234,7 @@ def x2sys_cross( # "t_1"/"t_2" means they are datetimes and should be converted. # "i_1"/"i_2" means they are dummy times (i.e., floating-point values). if output_type == "pandas" and result.columns[2] == "t_1": - result.iloc[:, 2:4] = result.iloc[:, 2:4].apply(pd.to_datetime) + result[result.columns[2:4]] = result[result.columns[2:4]].apply( + pd.to_datetime, unit="s" + ) return result From 9d12ae17741c92186583a27a42f43d971c1de4c9 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 22 Apr 2024 09:36:04 +0800 Subject: [PATCH 06/32] Convert dummpy times to timedelta --- pygmt/src/x2sys_cross.py | 12 +++++++----- pygmt/tests/test_x2sys_cross.py | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 6814a5dda55..2f83a7dd08f 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -229,12 +229,14 @@ def x2sys_cross( vfname=vouttbl, output_type=output_type, header=2 ) - # Convert 3rd and 4th columns to datetimes. + # Convert 3rd and 4th columns to datetime or timedelta. # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". - # "t_1"/"t_2" means they are datetimes and should be converted. - # "i_1"/"i_2" means they are dummy times (i.e., floating-point values). - if output_type == "pandas" and result.columns[2] == "t_1": + # "t_1"/"t_2" means they are absolute datetimes. + # "i_1"/"i_2" means they are dummy times relative to unix epoch. + if output_type == "pandas": + t_or_i = result.columns[2][0] + to_func = {"t": pd.to_datetime, "i": pd.to_timedelta}[t_or_i] result[result.columns[2:4]] = result[result.columns[2:4]].apply( - pd.to_datetime, unit="s" + to_func, unit="s" ) return result diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index 3c2a8509edf..e3852f3a158 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -101,8 +101,8 @@ def test_x2sys_cross_input_dataframe_output_dataframe(tracks): columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] - assert output.dtypes["i_1"].type == np.float64 - assert output.dtypes["i_2"].type == np.float64 + assert output.dtypes["i_1"].type == np.timedelta64 + assert output.dtypes["i_2"].type == np.timedelta64 @pytest.mark.usefixtures("mock_x2sys_home") @@ -162,8 +162,8 @@ def test_x2sys_cross_input_dataframe_with_nan(tracks): columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] - assert output.dtypes["i_1"].type == np.float64 - assert output.dtypes["i_2"].type == np.float64 + assert output.dtypes["i_1"].type == np.timedelta64 + assert output.dtypes["i_2"].type == np.timedelta64 @pytest.mark.usefixtures("mock_x2sys_home") From 28eb1df81273c018c1be56d768543e80aa8a493e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 22 Apr 2024 13:14:47 +0800 Subject: [PATCH 07/32] Let validate_output_table_type specify the supported output types --- pygmt/helpers/validators.py | 26 ++++++++++++++++++-------- pygmt/src/triangulate.py | 2 +- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/pygmt/helpers/validators.py b/pygmt/helpers/validators.py index 94916eac1f5..74922261bb4 100644 --- a/pygmt/helpers/validators.py +++ b/pygmt/helpers/validators.py @@ -3,13 +3,16 @@ """ import warnings +from collections.abc import Sequence from typing import Literal from pygmt.exceptions import GMTInvalidInput def validate_output_table_type( - output_type: Literal["pandas", "numpy", "file"], outfile: str | None = None + output_type: Literal["pandas", "numpy", "file"], + valid_types: Sequence[str] = ("pandas", "numpy", "file"), + outfile: str | None = None, ) -> Literal["pandas", "numpy", "file"]: """ Check if the ``output_type`` and ``outfile`` parameters are valid. @@ -17,8 +20,10 @@ def validate_output_table_type( Parameters ---------- output_type - Desired output type of tabular data. Valid values are ``"pandas"``, - ``"numpy"`` and ``"file"``. + Desired output type of tabular data. Valid values are ``"pandas"``, ``"numpy"`` + and ``"file"``. + valid_types + Valid desired output types. outfile File name for saving the result data. Required if ``output_type`` is ``"file"``. If specified, ``output_type`` will be forced to be ``"file"``. @@ -36,23 +41,28 @@ def validate_output_table_type( 'numpy' >>> validate_output_table_type(output_type="file", outfile="output-fname.txt") 'file' + >>> validate_output_table_type(output_type="pandas", valid_types=("pandas", "file")) + 'pandas' >>> validate_output_table_type(output_type="invalid-type") Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' either as 'file', ... + pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' as 'pandas' or ... >>> validate_output_table_type("file", outfile=None) Traceback (most recent call last): ... pygmt.exceptions.GMTInvalidInput: Must specify 'outfile' for output_type='file'. + >>> validate_output_table_type(output_type="numpy", valid_types=("pandas", "file")) + Traceback (most recent call last): + ... + pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' as 'pandas' or 'file'. >>> with warnings.catch_warnings(record=True) as w: ... validate_output_table_type("pandas", outfile="not-none.txt") ... assert len(w) == 1 'file' """ - if output_type not in ["file", "numpy", "pandas"]: - raise GMTInvalidInput( - "Must specify 'output_type' either as 'file', 'numpy', or 'pandas'." - ) + if output_type not in valid_types: + msg = f"Must specify 'output_type' as '{"' or '".join(valid_types)}'." + raise GMTInvalidInput(msg) if output_type == "file" and outfile is None: raise GMTInvalidInput("Must specify 'outfile' for output_type='file'.") if output_type != "file" and outfile is not None: diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py index f1b64db38ec..1765bd1d28e 100644 --- a/pygmt/src/triangulate.py +++ b/pygmt/src/triangulate.py @@ -233,7 +233,7 @@ def delaunay_triples( ``triangulate`` is a Cartesian or small-geographic area operator and is unaware of periodic or polar boundary conditions. """ - output_type = validate_output_table_type(output_type, outfile) + output_type = validate_output_table_type(output_type, outfile=outfile) with Session() as lib: with ( From 5e926e85ba5ac97b0e33029630d68cbd7b3b02e7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 22 Apr 2024 15:45:14 +0800 Subject: [PATCH 08/32] Fix --- pygmt/helpers/validators.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pygmt/helpers/validators.py b/pygmt/helpers/validators.py index 74922261bb4..916c7341588 100644 --- a/pygmt/helpers/validators.py +++ b/pygmt/helpers/validators.py @@ -46,7 +46,7 @@ def validate_output_table_type( >>> validate_output_table_type(output_type="invalid-type") Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' as 'pandas' or ... + pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' as 'pandas', ... >>> validate_output_table_type("file", outfile=None) Traceback (most recent call last): ... @@ -54,14 +54,18 @@ def validate_output_table_type( >>> validate_output_table_type(output_type="numpy", valid_types=("pandas", "file")) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' as 'pandas' or 'file'. + pygmt.exceptions.GMTInvalidInput: Must specify 'output_type' as 'pandas', or 'file'. >>> with warnings.catch_warnings(record=True) as w: ... validate_output_table_type("pandas", outfile="not-none.txt") ... assert len(w) == 1 'file' """ if output_type not in valid_types: - msg = f"Must specify 'output_type' as '{"' or '".join(valid_types)}'." + msg = ( + "Must specify 'output_type' as " + + ", ".join(f"'{v}'" for v in valid_types[:-1]) + + f", or '{valid_types[-1]}'." + ) raise GMTInvalidInput(msg) if output_type == "file" and outfile is None: raise GMTInvalidInput("Must specify 'outfile' for output_type='file'.") From 3a3df0a6d67edc085575602e4ed160c7d485472d Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 6 May 2024 09:48:08 +0800 Subject: [PATCH 09/32] Update docstrings --- pygmt/helpers/validators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/helpers/validators.py b/pygmt/helpers/validators.py index 916c7341588..eb040d189ec 100644 --- a/pygmt/helpers/validators.py +++ b/pygmt/helpers/validators.py @@ -20,10 +20,10 @@ def validate_output_table_type( Parameters ---------- output_type - Desired output type of tabular data. Valid values are ``"pandas"``, ``"numpy"`` - and ``"file"``. + Desired output type of tabular data. Default valid values are ``"pandas"``, + ``"numpy"`` and ``"file"``, but can be configured by parameter ``valid_types``. valid_types - Valid desired output types. + Tuple of valid desired output types. outfile File name for saving the result data. Required if ``output_type`` is ``"file"``. If specified, ``output_type`` will be forced to be ``"file"``. From b46d21dfda4348eb9637133124596657f8591602 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 7 May 2024 09:07:13 +0800 Subject: [PATCH 10/32] Remove support of numpy output type --- pygmt/src/x2sys_cross.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 2f83a7dd08f..54b19eeca30 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -73,7 +73,7 @@ def tempfile_from_dftrack(track, suffix): @kwargs_to_strings(R="sequence") def x2sys_cross( tracks=None, - output_type: Literal["pandas", "numpy", "file"] = "pandas", + output_type: Literal["pandas", "file"] = "pandas", outfile: str | None = None, **kwargs, ): @@ -107,7 +107,12 @@ def x2sys_cross( set it will default to $GMT_SHAREDIR/x2sys]. (**Note**: MGD77 files will also be looked for via $MGD77_HOME/mgd77_paths.txt and .gmt files will be searched for via $GMT_SHAREDIR/mgg/gmtfile_paths). - {output_type} + output_type + Desired output type of the result data. + + - ``pandas`` will return a :class:`pandas.DataFrame` object. + - ``file`` will save the result to the file specified by the ``outfile`` + parameter. {outfile} tag : str Specify the x2sys TAG which identifies the attributes of this data @@ -190,10 +195,11 @@ def x2sys_cross( and ``output_type``: - None if ``outfile`` is set (output will be stored in file set by ``outfile``) - - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set - (depends on ``output_type``) + - :class:`pandas.DataFrame` if ``output_type`` is set to ``"pandas"`` """ - output_type = validate_output_table_type(output_type, outfile=outfile) + output_type = validate_output_table_type( + output_type, valid_types=("pandas", "file"), outfile=outfile + ) file_contexts: list[contextlib.AbstractContextManager[Any]] = [] for track in tracks: From 5f045069c76fa316d550bcf7e0cea75149045e5b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 7 May 2024 09:50:03 +0800 Subject: [PATCH 11/32] Remove the output_type parameter --- pygmt/src/x2sys_cross.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 54b19eeca30..8fcb572a5cc 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -5,7 +5,7 @@ import contextlib import os from pathlib import Path -from typing import Any, Literal +from typing import Any import pandas as pd from pygmt.clib import Session @@ -17,7 +17,6 @@ kwargs_to_strings, unique_name, use_alias, - validate_output_table_type, ) @@ -73,7 +72,6 @@ def tempfile_from_dftrack(track, suffix): @kwargs_to_strings(R="sequence") def x2sys_cross( tracks=None, - output_type: Literal["pandas", "file"] = "pandas", outfile: str | None = None, **kwargs, ): @@ -107,12 +105,6 @@ def x2sys_cross( set it will default to $GMT_SHAREDIR/x2sys]. (**Note**: MGD77 files will also be looked for via $MGD77_HOME/mgd77_paths.txt and .gmt files will be searched for via $GMT_SHAREDIR/mgg/gmtfile_paths). - output_type - Desired output type of the result data. - - - ``pandas`` will return a :class:`pandas.DataFrame` object. - - ``file`` will save the result to the file specified by the ``outfile`` - parameter. {outfile} tag : str Specify the x2sys TAG which identifies the attributes of this data @@ -191,15 +183,12 @@ def x2sys_cross( Returns ------- crossover_errors - Table containing crossover error information. Return type depends on ``outfile`` - and ``output_type``: - - - None if ``outfile`` is set (output will be stored in file set by ``outfile``) - - :class:`pandas.DataFrame` if ``output_type`` is set to ``"pandas"`` + Table containing crossover error information. A :class:`pandas.DataFrame` object + is returned if ``outfile`` is not set, otherwise ``None`` is returned and output + will be stored in file set by ``outfile``. """ - output_type = validate_output_table_type( - output_type, valid_types=("pandas", "file"), outfile=outfile - ) + # Determine output type based on 'outfile' parameter + output_type = "pandas" if outfile is None else "file" file_contexts: list[contextlib.AbstractContextManager[Any]] = [] for track in tracks: From 84765e47b387294fde6cd84be3afed2ca05c9095 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 7 May 2024 09:55:09 +0800 Subject: [PATCH 12/32] Remove the output_type parameter from tests --- pygmt/tests/test_x2sys_cross.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index c5ba60016ce..d263d099d97 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -54,7 +54,6 @@ def test_x2sys_cross_input_file_output_file(): tag=tag, coe="i", outfile=outfile, - output_type="file", ) assert output is None # check that output is None since outfile is set From b9b4098e94a970fc00387ef83609b5a7df62114e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 7 May 2024 11:12:47 +0800 Subject: [PATCH 13/32] Improve tests --- pygmt/tests/test_x2sys_cross.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index d263d099d97..b1c4a45fb64 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -50,15 +50,17 @@ def test_x2sys_cross_input_file_output_file(): x2sys_init(tag=tag, fmtfile="xyz", force=True) outfile = tmpdir_p / "tmp_coe.txt" output = x2sys_cross( - tracks=["@tut_ship.xyz"], - tag=tag, - coe="i", - outfile=outfile, + tracks=["@tut_ship.xyz"], tag=tag, coe="i", outfile=outfile ) - assert output is None # check that output is None since outfile is set assert outfile.stat().st_size > 0 # check that outfile exists at path - _ = pd.read_csv(outfile, sep="\t", header=2) # ensure ASCII text file loads ok + result = pd.read_csv(outfile, sep="\t", comment=">", header=2) + assert result.shape == (14338, 12) + columns = list(result.columns) + assert columns[:6] == ["# x", "y", "i_1", "i_2", "dist_1", "dist_2"] + assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] + npt.assert_allclose(result["i_1"].min(), 45.2099, rtol=1.0e-4) + npt.assert_allclose(result["i_1"].max(), 82945.93699, rtol=1.0e-4) @pytest.mark.usefixtures("mock_x2sys_home") From 9bc063a390db4f6eb111d7eccb2f37f03bc001cb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 25 May 2024 18:39:25 +0800 Subject: [PATCH 14/32] Deal with TIME_EPOCH --- pygmt/src/x2sys_cross.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 8fcb572a5cc..ce939eb254f 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -229,9 +229,20 @@ def x2sys_cross( # "t_1"/"t_2" means they are absolute datetimes. # "i_1"/"i_2" means they are dummy times relative to unix epoch. if output_type == "pandas": - t_or_i = result.columns[2][0] - to_func = {"t": pd.to_datetime, "i": pd.to_timedelta}[t_or_i] + if (time_unit := lib.get_default("TIME_UNIT")) != "s": + msg = ( + f"Configuration TIME_UNIT must be 's' but '{time_unit}' " + "is given." + ) + raise GMTInvalidInput(msg) + to_args = {"unit": "s"} + match result.columns[2][0]: # "t" or "i". + case "t": + to_func = pd.to_datetime + to_args["origin"] = lib.get_default("TIME_EPOCH") + case "i": + to_func = pd.to_timeldelta result[result.columns[2:4]] = result[result.columns[2:4]].apply( - to_func, unit="s" + to_func, **to_args ) return result From b0b5099613a11c5493a715cf95562f5d0bd49e59 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 26 May 2024 23:08:22 +0800 Subject: [PATCH 15/32] Support TIME_UNIT for 'd' and 's' --- pygmt/src/x2sys_cross.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index ce939eb254f..37bba2e3aee 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -228,14 +228,19 @@ def x2sys_cross( # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". # "t_1"/"t_2" means they are absolute datetimes. # "i_1"/"i_2" means they are dummy times relative to unix epoch. + # Internally, they are all represented as double-precision numbers in + # GMT, relative to TIME_EPOCH with the unit defined by TIME_UNIT. if output_type == "pandas": - if (time_unit := lib.get_default("TIME_UNIT")) != "s": + # TIME_UNIT can be 'y'/'o'/'w'/'d'/'h'/'m'/'s', but pd.to_datetime() + # only supports unit of 'D'/'s'/'ms'/'us'/'ns'. + if (time_unit := lib.get_default("TIME_UNIT")) not in "ds": msg = ( - f"Configuration TIME_UNIT must be 's' but '{time_unit}' " - "is given." + "Value of configuration TIME_UNIT must be 'd' (day) or " + "'s' (second) but '{time_unit}' is given." ) raise GMTInvalidInput(msg) - to_args = {"unit": "s"} + time_unit = {"d": "D", "s": "s"}[time_unit] + to_args = {"unit": time_unit} match result.columns[2][0]: # "t" or "i". case "t": to_func = pd.to_datetime From 1396ee84b9c60f8976b1a54d7725f1c583476dc4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 26 May 2024 23:16:33 +0800 Subject: [PATCH 16/32] Fix a typo --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 37bba2e3aee..a8201e5fadb 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -246,7 +246,7 @@ def x2sys_cross( to_func = pd.to_datetime to_args["origin"] = lib.get_default("TIME_EPOCH") case "i": - to_func = pd.to_timeldelta + to_func = pd.to_timedelta result[result.columns[2:4]] = result[result.columns[2:4]].apply( to_func, **to_args ) From 6f2671aa1a387c927bc770e807e6b489696ec096 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 May 2024 00:12:46 +0800 Subject: [PATCH 17/32] Update --- pygmt/src/x2sys_cross.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index a8201e5fadb..dfae05987f5 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -239,14 +239,13 @@ def x2sys_cross( "'s' (second) but '{time_unit}' is given." ) raise GMTInvalidInput(msg) - time_unit = {"d": "D", "s": "s"}[time_unit] - to_args = {"unit": time_unit} - match result.columns[2][0]: # "t" or "i". - case "t": - to_func = pd.to_datetime - to_args["origin"] = lib.get_default("TIME_EPOCH") - case "i": - to_func = pd.to_timedelta + + t_or_i = result.columns[2][0] # "t" or "i". + to_func = {"t": pd.to_datetime, "i": pd.to_timedelta} + to_args = {"unit": {"d": "D", "s": "s"}[time_unit]} + if t_or_i == "t": + to_args["origin"] = lib.get_default("TIME_EPOCH") + result[result.columns[2:4]] = result[result.columns[2:4]].apply( to_func, **to_args ) From a9a4179d4de36387bc69c826bfb7ddb84829ada9 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 May 2024 07:14:40 +0800 Subject: [PATCH 18/32] Fix --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index dfae05987f5..9809c9fc603 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -243,7 +243,7 @@ def x2sys_cross( t_or_i = result.columns[2][0] # "t" or "i". to_func = {"t": pd.to_datetime, "i": pd.to_timedelta} to_args = {"unit": {"d": "D", "s": "s"}[time_unit]} - if t_or_i == "t": + if t_or_i == "i": to_args["origin"] = lib.get_default("TIME_EPOCH") result[result.columns[2:4]] = result[result.columns[2:4]].apply( From 04a1986b23ea184152004889a7bde832686f18ce Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 May 2024 07:27:01 +0800 Subject: [PATCH 19/32] Typo --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 9809c9fc603..31b22e26370 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -241,7 +241,7 @@ def x2sys_cross( raise GMTInvalidInput(msg) t_or_i = result.columns[2][0] # "t" or "i". - to_func = {"t": pd.to_datetime, "i": pd.to_timedelta} + to_func = {"t": pd.to_datetime, "i": pd.to_timedelta}[t_or_i] to_args = {"unit": {"d": "D", "s": "s"}[time_unit]} if t_or_i == "i": to_args["origin"] = lib.get_default("TIME_EPOCH") From b27212beae7c271d60f8b5a64e029f9e18a902e4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 May 2024 09:43:17 +0800 Subject: [PATCH 20/32] Fix --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 31b22e26370..67c26117863 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -243,7 +243,7 @@ def x2sys_cross( t_or_i = result.columns[2][0] # "t" or "i". to_func = {"t": pd.to_datetime, "i": pd.to_timedelta}[t_or_i] to_args = {"unit": {"d": "D", "s": "s"}[time_unit]} - if t_or_i == "i": + if t_or_i == "t": to_args["origin"] = lib.get_default("TIME_EPOCH") result[result.columns[2:4]] = result[result.columns[2:4]].apply( From 97312fba6a69659c478630f0e362c942d5cc2f84 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 May 2024 13:59:08 +0800 Subject: [PATCH 21/32] Improve the handling of TIME_UNIT --- pygmt/src/x2sys_cross.py | 75 +++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 67c26117863..bc2492b5c65 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -70,11 +70,7 @@ def tempfile_from_dftrack(track, suffix): Z="trackvalues", ) @kwargs_to_strings(R="sequence") -def x2sys_cross( - tracks=None, - outfile: str | None = None, - **kwargs, -): +def x2sys_cross(tracks=None, outfile: str | None = None, **kwargs): r""" Calculate crossovers between track data files. @@ -202,7 +198,7 @@ def x2sys_cross( os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag" ) # Last line is like "-Dxyz -Etsv -I1/1" - lastline = tagfile.read_text().splitlines()[-1] + lastline = tagfile.read_text(encoding="utf8").splitlines()[-1] for item in sorted(lastline.split()): # sort list alphabetically if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice) @@ -224,29 +220,44 @@ def x2sys_cross( vfname=vouttbl, output_type=output_type, header=2 ) - # Convert 3rd and 4th columns to datetime or timedelta. - # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". - # "t_1"/"t_2" means they are absolute datetimes. - # "i_1"/"i_2" means they are dummy times relative to unix epoch. - # Internally, they are all represented as double-precision numbers in - # GMT, relative to TIME_EPOCH with the unit defined by TIME_UNIT. - if output_type == "pandas": - # TIME_UNIT can be 'y'/'o'/'w'/'d'/'h'/'m'/'s', but pd.to_datetime() - # only supports unit of 'D'/'s'/'ms'/'us'/'ns'. - if (time_unit := lib.get_default("TIME_UNIT")) not in "ds": - msg = ( - "Value of configuration TIME_UNIT must be 'd' (day) or " - "'s' (second) but '{time_unit}' is given." - ) - raise GMTInvalidInput(msg) - - t_or_i = result.columns[2][0] # "t" or "i". - to_func = {"t": pd.to_datetime, "i": pd.to_timedelta}[t_or_i] - to_args = {"unit": {"d": "D", "s": "s"}[time_unit]} - if t_or_i == "t": - to_args["origin"] = lib.get_default("TIME_EPOCH") - - result[result.columns[2:4]] = result[result.columns[2:4]].apply( - to_func, **to_args - ) - return result + # Convert 3rd and 4th columns to datetime or timedelta. + # + # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". + # "t_1"/"t_2" means they are absolute datetimes. + # "i_1"/"i_2" means they are dummy times relative to unix epoch. + # Internally, they are all represented as double-precision numbers in GMT, + # relative to TIME_EPOCH with the unit defined by TIME_UNIT. + if output_type == "pandas": + # TIME_UNIT can be 'y'/'o'/'w'/'d'/'h'/'m'/'s'. + # pd.to_datetime() supports unit of 'D'/'s'/'ms'/'us'/'ns' + # pd.to_timedelta() supports unit of 'W'/'D'/'h'/'m'/'s'/'ms'/'us'/'ns'. + time_unit = lib.get_default("TIME_UNIT") + match result.columns[2][0]: # "t" or "i" + case "t": # Absolute time + if time_unit not in "ds": + msg = ( + "Value of configuration TIME_UNIT must be 'd' (day) or " + "'s' (second) but '{time_unit}' is given." + ) + raise GMTInvalidInput(msg) + to_func = pd.to_datetime + to_args = { + "unit": {"d": "D", "s": "s"}[time_unit], + "origin": lib.get_default("TIME_EPOCH"), + } + case "i": # Relative time + to_func = pd.to_timedelta + if time_unit not in "wdhms": + msg = ( + "Value of configuration TIME_UNIT must be 'w' (week), " + "'d' (day), 'h' (hour), 'm' (minute) or 's' (second) " + "but '{time_unit}' is given." + ) + raise GMTInvalidInput(msg) + unit = time_unit.upper() if time_unit in "wd" else time_unit + to_args = {"unit": unit} + + result[result.columns[2:4]] = result[result.columns[2:4]].apply( + to_func, **to_args + ) + return result From 6450ba0488dba0e3337d554e86d1ac66caa33d1e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 May 2024 14:10:19 +0800 Subject: [PATCH 22/32] Call the apply function separately --- pygmt/src/x2sys_cross.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index bc2492b5c65..eb4457200bf 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -237,27 +237,24 @@ def x2sys_cross(tracks=None, outfile: str | None = None, **kwargs): if time_unit not in "ds": msg = ( "Value of configuration TIME_UNIT must be 'd' (day) or " - "'s' (second) but '{time_unit}' is given." + f"'s' (second) but '{time_unit}' is given." ) raise GMTInvalidInput(msg) - to_func = pd.to_datetime - to_args = { - "unit": {"d": "D", "s": "s"}[time_unit], - "origin": lib.get_default("TIME_EPOCH"), - } + result[result.columns[2:4]] = result[result.columns[2:4]].apply( + pd.to_datetime, + unit={"d": "D", "s": "s"}[time_unit], + origin=lib.get_default("TIME_EPOCH"), + ) case "i": # Relative time - to_func = pd.to_timedelta if time_unit not in "wdhms": msg = ( "Value of configuration TIME_UNIT must be 'w' (week), " "'d' (day), 'h' (hour), 'm' (minute) or 's' (second) " - "but '{time_unit}' is given." + f"but '{time_unit}' is given." ) raise GMTInvalidInput(msg) - unit = time_unit.upper() if time_unit in "wd" else time_unit - to_args = {"unit": unit} - - result[result.columns[2:4]] = result[result.columns[2:4]].apply( - to_func, **to_args - ) + result[result.columns[2:4]] = result[result.columns[2:4]].apply( + pd.to_timedelta, + unit=time_unit.upper() if time_unit in "wd" else time_unit, + ) return result From d5294a414b84eb3ec9d505f13845c62f4cb72db6 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 09:00:12 +0800 Subject: [PATCH 23/32] Further simplify the logic --- pygmt/src/x2sys_cross.py | 48 +++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index eb4457200bf..7dab2469db3 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -228,33 +228,25 @@ def x2sys_cross(tracks=None, outfile: str | None = None, **kwargs): # Internally, they are all represented as double-precision numbers in GMT, # relative to TIME_EPOCH with the unit defined by TIME_UNIT. if output_type == "pandas": - # TIME_UNIT can be 'y'/'o'/'w'/'d'/'h'/'m'/'s'. - # pd.to_datetime() supports unit of 'D'/'s'/'ms'/'us'/'ns' + # In GMT, TIME_UNIT can be 'y' (year), 'o' (month), 'w' (week), + # 'd' (day), 'h' (hour), 'm' (minute), 's' (second). + # Years are 365.2425 days and months are of equal length. # pd.to_timedelta() supports unit of 'W'/'D'/'h'/'m'/'s'/'ms'/'us'/'ns'. - time_unit = lib.get_default("TIME_UNIT") - match result.columns[2][0]: # "t" or "i" - case "t": # Absolute time - if time_unit not in "ds": - msg = ( - "Value of configuration TIME_UNIT must be 'd' (day) or " - f"'s' (second) but '{time_unit}' is given." - ) - raise GMTInvalidInput(msg) - result[result.columns[2:4]] = result[result.columns[2:4]].apply( - pd.to_datetime, - unit={"d": "D", "s": "s"}[time_unit], - origin=lib.get_default("TIME_EPOCH"), - ) - case "i": # Relative time - if time_unit not in "wdhms": - msg = ( - "Value of configuration TIME_UNIT must be 'w' (week), " - "'d' (day), 'h' (hour), 'm' (minute) or 's' (second) " - f"but '{time_unit}' is given." - ) - raise GMTInvalidInput(msg) - result[result.columns[2:4]] = result[result.columns[2:4]].apply( - pd.to_timedelta, - unit=time_unit.upper() if time_unit in "wd" else time_unit, - ) + match time_unit := lib.get_default("TIME_UNIT"): + case "y": + unit = "s" + scale = 365.2425 * 86400.0 + case "o": + unit = "s" + scale = 365.2425 / 12.0 * 86400.0 + case _: + unit = time_unit.upper() if time_unit in "wd" else time_unit + scale = 1.0 + + columns = result.columns[2:4] + result[columns] = result[columns].apply( + lambda x: pd.to_timedelta(x * scale, unit=unit) + ) + if result.columns[2][0] == "t": # "t" or "i": + result[columns] += pd.Timestamp(lib.get_default("TIME_EPOCH")) return result From 55f7c300d40489bd8d8cd4a0831b0497d0a34b34 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 10:30:53 +0800 Subject: [PATCH 24/32] Fix --- pygmt/src/x2sys_cross.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 7dab2469db3..57d60687625 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -244,9 +244,8 @@ def x2sys_cross(tracks=None, outfile: str | None = None, **kwargs): scale = 1.0 columns = result.columns[2:4] - result[columns] = result[columns].apply( - lambda x: pd.to_timedelta(x * scale, unit=unit) - ) + result[columns] *= scale + result[columns] = result[columns].apply(pd.to_timedelta, unit=unit) if result.columns[2][0] == "t": # "t" or "i": result[columns] += pd.Timestamp(lib.get_default("TIME_EPOCH")) return result From a44390dce3ec31284305fdb2aa1678ed4b912d1c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 12:35:55 +0800 Subject: [PATCH 25/32] Further simplification --- pygmt/src/x2sys_cross.py | 51 +++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 57d60687625..455590c2da9 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -70,7 +70,9 @@ def tempfile_from_dftrack(track, suffix): Z="trackvalues", ) @kwargs_to_strings(R="sequence") -def x2sys_cross(tracks=None, outfile: str | None = None, **kwargs): +def x2sys_cross( + tracks=None, outfile: str | None = None, **kwargs +) -> pd.DataFrame | None: r""" Calculate crossovers between track data files. @@ -220,32 +222,33 @@ def x2sys_cross(tracks=None, outfile: str | None = None, **kwargs): vfname=vouttbl, output_type=output_type, header=2 ) - # Convert 3rd and 4th columns to datetime or timedelta. - # + if output_type == "file": + return result + + # Convert 3rd and 4th columns to datetime/timedelta for pandas output. # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". # "t_1"/"t_2" means they are absolute datetimes. # "i_1"/"i_2" means they are dummy times relative to unix epoch. # Internally, they are all represented as double-precision numbers in GMT, # relative to TIME_EPOCH with the unit defined by TIME_UNIT. - if output_type == "pandas": - # In GMT, TIME_UNIT can be 'y' (year), 'o' (month), 'w' (week), - # 'd' (day), 'h' (hour), 'm' (minute), 's' (second). - # Years are 365.2425 days and months are of equal length. - # pd.to_timedelta() supports unit of 'W'/'D'/'h'/'m'/'s'/'ms'/'us'/'ns'. - match time_unit := lib.get_default("TIME_UNIT"): - case "y": - unit = "s" - scale = 365.2425 * 86400.0 - case "o": - unit = "s" - scale = 365.2425 / 12.0 * 86400.0 - case _: - unit = time_unit.upper() if time_unit in "wd" else time_unit - scale = 1.0 - - columns = result.columns[2:4] - result[columns] *= scale - result[columns] = result[columns].apply(pd.to_timedelta, unit=unit) - if result.columns[2][0] == "t": # "t" or "i": - result[columns] += pd.Timestamp(lib.get_default("TIME_EPOCH")) + # In GMT, TIME_UNIT can be 'y' (year), 'o' (month), 'w' (week), 'd' (day), + # 'h' (hour), 'm' (minute), 's' (second). Years are 365.2425 days and months + # are of equal length. + # pd.to_timedelta() supports unit of 'W'/'D'/'h'/'m'/'s'/'ms'/'us'/'ns'. + match time_unit := lib.get_default("TIME_UNIT"): + case "y": + unit = "s" + scale = 365.2425 * 86400.0 + case "o": + unit = "s" + scale = 365.2425 / 12.0 * 86400.0 + case _: + unit = time_unit.upper() if time_unit in "wd" else time_unit + scale = 1.0 + + columns = result.columns[2:4] + result[columns] *= scale + result[columns] = result[columns].apply(pd.to_timedelta, unit=unit) + if result.columns[2][0] == "t": # "t" or "i": + result[columns] += pd.Timestamp(lib.get_default("TIME_EPOCH")) return result From b81e2928a21fe1900e4f6c4168afe2384faba675 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 13:36:24 +0800 Subject: [PATCH 26/32] Improve tests --- pygmt/tests/test_x2sys_cross.py | 57 +++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index b1c4a45fb64..c66b8d580fa 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -12,7 +12,7 @@ import pandas as pd import pytest from packaging.version import Version -from pygmt import x2sys_cross, x2sys_init +from pygmt import config, x2sys_cross, x2sys_init from pygmt.clib import __gmt_version__ from pygmt.datasets import load_sample_data from pygmt.exceptions import GMTInvalidInput @@ -60,7 +60,7 @@ def test_x2sys_cross_input_file_output_file(): assert columns[:6] == ["# x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] npt.assert_allclose(result["i_1"].min(), 45.2099, rtol=1.0e-4) - npt.assert_allclose(result["i_1"].max(), 82945.93699, rtol=1.0e-4) + npt.assert_allclose(result["i_1"].max(), 82945.9370, rtol=1.0e-4) @pytest.mark.usefixtures("mock_x2sys_home") @@ -83,11 +83,16 @@ def test_x2sys_cross_input_file_output_dataframe(): columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] + assert output["i_1"].dtype.type == np.timedelta64 + assert output["i_2"].dtype.type == np.timedelta64 + npt.assert_allclose(output["i_1"].min().total_seconds(), 45.2099, rtol=1.0e-4) + npt.assert_allclose(output["i_1"].max().total_seconds(), 82945.937, rtol=1.0e-4) @pytest.mark.benchmark @pytest.mark.usefixtures("mock_x2sys_home") -def test_x2sys_cross_input_dataframe_output_dataframe(tracks): +@pytest.mark.parametrize("unit", ["s", "o", "y"]) +def test_x2sys_cross_input_dataframe_output_dataframe(tracks, unit): """ Run x2sys_cross by passing in one dataframe, and output internal crossovers to a pandas.DataFrame. @@ -96,19 +101,44 @@ def test_x2sys_cross_input_dataframe_output_dataframe(tracks): tag = Path(tmpdir).name x2sys_init(tag=tag, fmtfile="xyz", force=True) - output = x2sys_cross(tracks=tracks, tag=tag, coe="i") + with config(TIME_UNIT=unit): + output = x2sys_cross(tracks=tracks, tag=tag, coe="i") assert isinstance(output, pd.DataFrame) assert output.shape == (14, 12) columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] - assert output.dtypes["i_1"].type == np.timedelta64 - assert output.dtypes["i_2"].type == np.timedelta64 + assert output["i_1"].dtype.type == np.timedelta64 + assert output["i_2"].dtype.type == np.timedelta64 + + # Scale to convert a value to second + match unit: + case "y": + scale = 365.2425 * 86400.0 + case "o": + scale = 365.2425 / 12.0 * 86400.0 + case _: + scale = 1.0 + npt.assert_allclose( + output["i_1"].min().total_seconds(), 0.9175 * scale, rtol=1.0e-4 + ) + npt.assert_allclose( + output["i_1"].max().total_seconds(), 23.9996 * scale, rtol=1.0e-4 + ) @pytest.mark.usefixtures("mock_x2sys_home") -def test_x2sys_cross_input_two_dataframes(): +@pytest.mark.parametrize( + ("unit", "epoch"), + [ + ("s", "1970-01-01T00:00:00"), + ("o", "1970-01-01T00:00:00"), + ("y", "1970-01-01T00:00:00"), + ("s", "2012-03-04T05:06:07"), + ], +) +def test_x2sys_cross_input_two_dataframes(unit, epoch): """ Run x2sys_cross by passing in two pandas.DataFrame tables with a time column, and output external crossovers to a pandas.DataFrame. @@ -132,15 +162,22 @@ def test_x2sys_cross_input_two_dataframes(): track["time"] = pd.date_range(start=f"2020-{i}1-01", periods=10, freq="min") tracks.append(track) - output = x2sys_cross(tracks=tracks, tag=tag, coe="e") + with config(TIME_UNIT=unit, TIME_EPOCH=epoch): + output = x2sys_cross(tracks=tracks, tag=tag, coe="e") assert isinstance(output, pd.DataFrame) assert output.shape == (26, 12) columns = list(output.columns) assert columns[:6] == ["x", "y", "t_1", "t_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] - assert output.dtypes["t_1"].type == np.datetime64 - assert output.dtypes["t_2"].type == np.datetime64 + assert output["t_1"].dtype.type == np.datetime64 + assert output["t_2"].dtype.type == np.datetime64 + + tolerance = pd.Timedelta("1ms") + t1_min = pd.Timestamp("2020-01-01 00:00:10.6677") + t1_max = pd.Timestamp("2020-01-01 00:08:29.8067") + assert abs(output["t_1"].min() - t1_min) < tolerance + assert abs(output["t_1"].max() - t1_max) < tolerance @pytest.mark.usefixtures("mock_x2sys_home") From 870d9c75e5a0936362615ab25bfd9db6bdf7639e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 16:28:01 +0800 Subject: [PATCH 27/32] Update pygmt/src/x2sys_cross.py --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 455590c2da9..ca99c8a2567 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -228,7 +228,7 @@ def x2sys_cross( # Convert 3rd and 4th columns to datetime/timedelta for pandas output. # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". # "t_1"/"t_2" means they are absolute datetimes. - # "i_1"/"i_2" means they are dummy times relative to unix epoch. + # "i_1"/"i_2" means they are dummy times. # Internally, they are all represented as double-precision numbers in GMT, # relative to TIME_EPOCH with the unit defined by TIME_UNIT. # In GMT, TIME_UNIT can be 'y' (year), 'o' (month), 'w' (week), 'd' (day), From db94b91a5250e307b29d099eced3d1b4dee19c35 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 17:56:11 +0800 Subject: [PATCH 28/32] Fix x2sys_cross tests on macOS M --- pygmt/tests/test_x2sys_cross.py | 37 +++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index c66b8d580fa..935e1b995cc 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -55,7 +55,7 @@ def test_x2sys_cross_input_file_output_file(): assert output is None # check that output is None since outfile is set assert outfile.stat().st_size > 0 # check that outfile exists at path result = pd.read_csv(outfile, sep="\t", comment=">", header=2) - assert result.shape == (14338, 12) + assert result.shape == (14374, 12) if sys.platform == "darwin" else (14338, 12) columns = list(result.columns) assert columns[:6] == ["# x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] @@ -65,7 +65,7 @@ def test_x2sys_cross_input_file_output_file(): @pytest.mark.usefixtures("mock_x2sys_home") @pytest.mark.xfail( - condition=Version(__gmt_version__) < Version("6.5.0") or sys.platform == "darwin", + condition=Version(__gmt_version__) < Version("6.5.0"), reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8188", ) def test_x2sys_cross_input_file_output_dataframe(): @@ -79,7 +79,7 @@ def test_x2sys_cross_input_file_output_dataframe(): output = x2sys_cross(tracks=["@tut_ship.xyz"], tag=tag, coe="i") assert isinstance(output, pd.DataFrame) - assert output.shape == (14338, 12) + assert output.shape == (14374, 12) if sys.platform == "darwin" else (14338, 12) columns = list(output.columns) assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"] assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"] @@ -243,7 +243,7 @@ def test_x2sys_cross_invalid_tracks_input_type(tracks): @pytest.mark.usefixtures("mock_x2sys_home") @pytest.mark.xfail( - condition=Version(__gmt_version__) < Version("6.5.0") or sys.platform == "darwin", + condition=Version(__gmt_version__) < Version("6.5.0"), reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8188", ) def test_x2sys_cross_region_interpolation_numpoints(): @@ -264,15 +264,21 @@ def test_x2sys_cross_region_interpolation_numpoints(): ) assert isinstance(output, pd.DataFrame) - assert output.shape == (3882, 12) - # Check crossover errors (z_X) and mean value of observables (z_M) - npt.assert_allclose(output.z_X.mean(), -138.66, rtol=1e-4) - npt.assert_allclose(output.z_M.mean(), -2896.875915) + if sys.platform == "darwin": + assert output.shape == (3894, 12) + # Check crossover errors (z_X) and mean value of observables (z_M) + npt.assert_allclose(output.z_X.mean(), -138.23215, rtol=1e-4) + npt.assert_allclose(output.z_M.mean(), -2897.187545, rtol=1e-4) + else: + assert output.shape == (3882, 12) + # Check crossover errors (z_X) and mean value of observables (z_M) + npt.assert_allclose(output.z_X.mean(), -138.66, rtol=1e-4) + npt.assert_allclose(output.z_M.mean(), -2896.875915, rtol=1e-4) @pytest.mark.usefixtures("mock_x2sys_home") @pytest.mark.xfail( - condition=Version(__gmt_version__) < Version("6.5.0") or sys.platform == "darwin", + condition=Version(__gmt_version__) < Version("6.5.0"), reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8188", ) def test_x2sys_cross_trackvalues(): @@ -285,7 +291,12 @@ def test_x2sys_cross_trackvalues(): output = x2sys_cross(tracks=["@tut_ship.xyz"], tag=tag, trackvalues=True) assert isinstance(output, pd.DataFrame) - assert output.shape == (14338, 12) - # Check mean of track 1 values (z_1) and track 2 values (z_2) - npt.assert_allclose(output.z_1.mean(), -2422.418556, rtol=1e-4) - npt.assert_allclose(output.z_2.mean(), -2402.268364, rtol=1e-4) + if sys.platform == "darwin": + assert output.shape == (14374, 12) + # Check mean of track 1 values (z_1) and track 2 values (z_2) + npt.assert_allclose(output.z_1.mean(), -2422.973372, rtol=1e-4) + npt.assert_allclose(output.z_2.mean(), -2402.87476, rtol=1e-4) + else: + assert output.shape == (14338, 12) + npt.assert_allclose(output.z_1.mean(), -2422.418556, rtol=1e-4) + npt.assert_allclose(output.z_2.mean(), -2402.268364, rtol=1e-4) From de17d5ee4eb8d01671f95d15804fb3bf4e633ca7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 21:40:08 +0800 Subject: [PATCH 29/32] Update pygmt/src/x2sys_cross.py --- pygmt/src/x2sys_cross.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index ca99c8a2567..5be61ac4509 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -227,8 +227,7 @@ def x2sys_cross( # Convert 3rd and 4th columns to datetime/timedelta for pandas output. # These two columns have names "t_1"/"t_2" or "i_1"/"i_2". - # "t_1"/"t_2" means they are absolute datetimes. - # "i_1"/"i_2" means they are dummy times. + # "t_" means absolute datetimes and "i_" means dummy times. # Internally, they are all represented as double-precision numbers in GMT, # relative to TIME_EPOCH with the unit defined by TIME_UNIT. # In GMT, TIME_UNIT can be 'y' (year), 'o' (month), 'w' (week), 'd' (day), From ebce56eb7393f78c55da60191e5de2494d6fd245 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 28 May 2024 21:40:39 +0800 Subject: [PATCH 30/32] Update pygmt/src/x2sys_cross.py --- pygmt/src/x2sys_cross.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index 5be61ac4509..c4990d19677 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -248,6 +248,6 @@ def x2sys_cross( columns = result.columns[2:4] result[columns] *= scale result[columns] = result[columns].apply(pd.to_timedelta, unit=unit) - if result.columns[2][0] == "t": # "t" or "i": + if columns[0][0] == "t": # "t" or "i": result[columns] += pd.Timestamp(lib.get_default("TIME_EPOCH")) return result From 9fd35ce97248af4889da409382f1034ef4231979 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 9 Jun 2024 15:48:56 +0800 Subject: [PATCH 31/32] Apply suggestions from code review Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/src/x2sys_cross.py | 6 ++++-- pygmt/tests/test_x2sys_cross.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pygmt/src/x2sys_cross.py b/pygmt/src/x2sys_cross.py index c4990d19677..af79cfee852 100644 --- a/pygmt/src/x2sys_cross.py +++ b/pygmt/src/x2sys_cross.py @@ -103,7 +103,9 @@ def x2sys_cross( set it will default to $GMT_SHAREDIR/x2sys]. (**Note**: MGD77 files will also be looked for via $MGD77_HOME/mgd77_paths.txt and .gmt files will be searched for via $GMT_SHAREDIR/mgg/gmtfile_paths). - {outfile} + + outfile + The file name for the output ASCII txt file to store the table in. tag : str Specify the x2sys TAG which identifies the attributes of this data type. @@ -241,7 +243,7 @@ def x2sys_cross( case "o": unit = "s" scale = 365.2425 / 12.0 * 86400.0 - case _: + case "w" | "d" | "h" | "m" | "s": unit = time_unit.upper() if time_unit in "wd" else time_unit scale = 1.0 diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index 935e1b995cc..546a2887ea2 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -95,7 +95,7 @@ def test_x2sys_cross_input_file_output_dataframe(): def test_x2sys_cross_input_dataframe_output_dataframe(tracks, unit): """ Run x2sys_cross by passing in one dataframe, and output internal crossovers to a - pandas.DataFrame. + pandas.DataFrame, checking TIME_UNIT s (second), o (month), and y (year). """ with TemporaryDirectory(prefix="X2SYS", dir=Path.cwd()) as tmpdir: tag = Path(tmpdir).name From 2b3474b638ef5a1bd5e1e930dcf58fa84cec0fd1 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 9 Jun 2024 16:17:11 +0800 Subject: [PATCH 32/32] Update pygmt/tests/test_x2sys_cross.py Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/tests/test_x2sys_cross.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pygmt/tests/test_x2sys_cross.py b/pygmt/tests/test_x2sys_cross.py index 546a2887ea2..09f424d1a42 100644 --- a/pygmt/tests/test_x2sys_cross.py +++ b/pygmt/tests/test_x2sys_cross.py @@ -141,7 +141,8 @@ def test_x2sys_cross_input_dataframe_output_dataframe(tracks, unit): def test_x2sys_cross_input_two_dataframes(unit, epoch): """ Run x2sys_cross by passing in two pandas.DataFrame tables with a time column, and - output external crossovers to a pandas.DataFrame. + output external crossovers to a pandas.DataFrame, checking TIME_UNIT s (second), + o (month), and y (year), and TIME_EPOCH 1970 and 2012. """ with TemporaryDirectory(prefix="X2SYS", dir=Path.cwd()) as tmpdir: tmpdir_p = Path(tmpdir)