-
Notifications
You must be signed in to change notification settings - Fork 228
pygmt.x2sys_cross: Refactor to use virtualfiles for output tables [BREAKING CHANGE: Dummy times in 3rd and 4th columns now have np.timedelta64 type] #3182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 37 commits
95fab98
ce926a0
86278cb
58c6ea4
d6eeade
9d12ae1
28eb1df
5e926e8
c1c756d
3a3df0a
3aea9a6
d869a32
b46d21d
81a1ec0
07fe53e
5f04506
84765e4
b9b4098
bf59e61
9bc063a
b0b5099
1396ee8
6f2671a
a9a4179
04a1986
b27212b
aa3e9af
97312fb
6450ba0
29a7f9e
d5294a4
55f7c30
a44390d
b81e292
870d9c7
db94b91
de17d5e
ebce56e
71af717
cf2cfc7
3a62fc1
9fd35ce
2b3474b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -5,13 +5,12 @@ | |||
import contextlib | ||||
import os | ||||
from pathlib import Path | ||||
from typing import Any | ||||
|
||||
import pandas as pd | ||||
from packaging.version import Version | ||||
from pygmt.clib import Session | ||||
from pygmt.exceptions import GMTInvalidInput | ||||
from pygmt.helpers import ( | ||||
GMTTempFile, | ||||
build_arg_list, | ||||
data_kind, | ||||
fmt_docstring, | ||||
|
@@ -71,7 +70,9 @@ def tempfile_from_dftrack(track, suffix): | |||
Z="trackvalues", | ||||
) | ||||
@kwargs_to_strings(R="sequence") | ||||
def x2sys_cross(tracks=None, outfile=None, **kwargs): | ||||
def x2sys_cross( | ||||
tracks=None, outfile: str | None = None, **kwargs | ||||
) -> pd.DataFrame | None: | ||||
r""" | ||||
Calculate crossovers between track data files. | ||||
|
||||
|
@@ -102,11 +103,7 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs): | |||
set it will default to $GMT_SHAREDIR/x2sys]. (**Note**: MGD77 files | ||||
will also be looked for via $MGD77_HOME/mgd77_paths.txt and .gmt | ||||
files will be searched for via $GMT_SHAREDIR/mgg/gmtfile_paths). | ||||
|
||||
outfile : str | ||||
Optional. The file name for the output ASCII txt file to store the | ||||
table in. | ||||
|
||||
{outfile} | ||||
tag : str | ||||
Specify the x2sys TAG which identifies the attributes of this data | ||||
type. | ||||
|
@@ -183,68 +180,74 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs): | |||
|
||||
Returns | ||||
------- | ||||
crossover_errors : :class:`pandas.DataFrame` or None | ||||
Table containing crossover error information. | ||||
Return type depends on whether the ``outfile`` parameter is set: | ||||
|
||||
- :class:`pandas.DataFrame` with (x, y, ..., etc) if ``outfile`` is not | ||||
set | ||||
- None if ``outfile`` is set (track output will be stored in the set in | ||||
``outfile``) | ||||
crossover_errors | ||||
Table containing crossover error information. A :class:`pandas.DataFrame` object | ||||
is returned if ``outfile`` is not set, otherwise ``None`` is returned and output | ||||
will be stored in file set by ``outfile``. | ||||
""" | ||||
with Session() as lib: | ||||
file_contexts = [] | ||||
for track in tracks: | ||||
kind = data_kind(track) | ||||
if kind == "file": | ||||
# Determine output type based on 'outfile' parameter | ||||
output_type = "pandas" if outfile is None else "file" | ||||
|
||||
file_contexts: list[contextlib.AbstractContextManager[Any]] = [] | ||||
for track in tracks: | ||||
match data_kind(track): | ||||
case "file": | ||||
file_contexts.append(contextlib.nullcontext(track)) | ||||
elif kind == "matrix": | ||||
case "matrix": | ||||
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from | ||||
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file | ||||
lastline = ( | ||||
Path(os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag") | ||||
.read_text(encoding="utf8") | ||||
.strip() | ||||
.split("\n")[-1] | ||||
) # e.g. "-Dxyz -Etsv -I1/1" | ||||
tagfile = Path( | ||||
os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag" | ||||
) | ||||
# Last line is like "-Dxyz -Etsv -I1/1" | ||||
lastline = tagfile.read_text(encoding="utf8").splitlines()[-1] | ||||
for item in sorted(lastline.split()): # sort list alphabetically | ||||
if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz | ||||
suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice) | ||||
|
||||
# Save pandas.DataFrame track data to temporary file | ||||
file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix)) | ||||
else: | ||||
case _: | ||||
raise GMTInvalidInput(f"Unrecognized data type: {type(track)}") | ||||
|
||||
with GMTTempFile(suffix=".txt") as tmpfile: | ||||
with Session() as lib: | ||||
with lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl: | ||||
with contextlib.ExitStack() as stack: | ||||
fnames = [stack.enter_context(c) for c in file_contexts] | ||||
if outfile is None: | ||||
outfile = tmpfile.name | ||||
lib.call_module( | ||||
module="x2sys_cross", | ||||
args=build_arg_list(kwargs, infile=fnames, outfile=outfile), | ||||
) | ||||
|
||||
# Read temporary csv output to a pandas table | ||||
if outfile == tmpfile.name: # if outfile isn't set, return pd.DataFrame | ||||
# Read the tab-separated ASCII table | ||||
date_format_kwarg = ( | ||||
{"date_format": "ISO8601"} | ||||
if Version(pd.__version__) >= Version("2.0.0") | ||||
else {} | ||||
args=build_arg_list(kwargs, infile=fnames, outfile=vouttbl), | ||||
) | ||||
table = pd.read_csv( | ||||
tmpfile.name, | ||||
sep="\t", | ||||
header=2, # Column names are on 2nd row | ||||
comment=">", # Skip the 3rd row with a ">" | ||||
parse_dates=[2, 3], # Datetimes on 3rd and 4th column | ||||
**date_format_kwarg, # Parse dates in ISO8601 format on pandas>=2 | ||||
result = lib.virtualfile_to_dataset( | ||||
vfname=vouttbl, output_type=output_type, header=2 | ||||
) | ||||
Comment on lines
-241
to
225
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes. The main problem is that, as far as I know, there is no equivalent way to represent a multi-segment file in pandas. The multi-segment support was also mentioned in #2729 (comment). If we can have a general way to represent multi-segment in pandas, then it should be straightforward to output multi-segments from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's already tested in the pygmt/pygmt/datatypes/dataset.py Line 215 in 466c8b6
For |
||||
# Remove the "# " from "# x" in the first column | ||||
table = table.rename(columns={table.columns[0]: table.columns[0][2:]}) | ||||
elif outfile != tmpfile.name: # if outfile is set, output in outfile only | ||||
table = None | ||||
|
||||
return table | ||||
if output_type == "file": | ||||
return result | ||||
|
||||
# Convert 3rd and 4th columns to datetime/timedelta for pandas output. | ||||
# These two columns have names "t_1"/"t_2" or "i_1"/"i_2". | ||||
# "t_" means absolute datetimes and "i_" means dummy times. | ||||
# Internally, they are all represented as double-precision numbers in GMT, | ||||
# relative to TIME_EPOCH with the unit defined by TIME_UNIT. | ||||
# In GMT, TIME_UNIT can be 'y' (year), 'o' (month), 'w' (week), 'd' (day), | ||||
# 'h' (hour), 'm' (minute), 's' (second). Years are 365.2425 days and months | ||||
# are of equal length. | ||||
# pd.to_timedelta() supports unit of 'W'/'D'/'h'/'m'/'s'/'ms'/'us'/'ns'. | ||||
match time_unit := lib.get_default("TIME_UNIT"): | ||||
case "y": | ||||
unit = "s" | ||||
scale = 365.2425 * 86400.0 | ||||
case "o": | ||||
unit = "s" | ||||
scale = 365.2425 / 12.0 * 86400.0 | ||||
case _: | ||||
unit = time_unit.upper() if time_unit in "wd" else time_unit | ||||
scale = 1.0 | ||||
seisman marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
columns = result.columns[2:4] | ||||
result[columns] *= scale | ||||
result[columns] = result[columns].apply(pd.to_timedelta, unit=unit) | ||||
if result.columns[2][0] == "t": # "t" or "i": | ||||
seisman marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
result[columns] += pd.Timestamp(lib.get_default("TIME_EPOCH")) | ||||
return result |
Uh oh!
There was an error while loading. Please reload this page.