@@ -1775,7 +1775,7 @@ def read_virtualfile(
1775
1775
def virtualfile_to_dataset (
1776
1776
self ,
1777
1777
vfname : str ,
1778
- output_type : Literal ["pandas" , "numpy" , "file" ] = "pandas" ,
1778
+ output_type : Literal ["pandas" , "numpy" , "file" , "strings" ] = "pandas" ,
1779
1779
column_names : list [str ] | None = None ,
1780
1780
dtype : type | dict [str , type ] | None = None ,
1781
1781
index_col : str | int | None = None ,
@@ -1796,6 +1796,7 @@ def virtualfile_to_dataset(
1796
1796
- ``"pandas"`` will return a :class:`pandas.DataFrame` object.
1797
1797
- ``"numpy"`` will return a :class:`numpy.ndarray` object.
1798
1798
- ``"file"`` means the result was saved to a file and will return ``None``.
1799
+ - ``"strings"`` will return the trailing text only as an array of strings.
1799
1800
column_names
1800
1801
The column names for the :class:`pandas.DataFrame` output.
1801
1802
dtype
@@ -1841,6 +1842,16 @@ def virtualfile_to_dataset(
1841
1842
... assert result is None
1842
1843
... assert Path(outtmp.name).stat().st_size > 0
1843
1844
...
1845
+ ... # strings output
1846
+ ... with Session() as lib:
1847
+ ... with lib.virtualfile_out(kind="dataset") as vouttbl:
1848
+ ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1849
+ ... outstr = lib.virtualfile_to_dataset(
1850
+ ... vfname=vouttbl, output_type="strings"
1851
+ ... )
1852
+ ... assert isinstance(outstr, np.ndarray)
1853
+ ... assert outstr.dtype.kind in ("S", "U")
1854
+ ...
1844
1855
... # numpy output
1845
1856
... with Session() as lib:
1846
1857
... with lib.virtualfile_out(kind="dataset") as vouttbl:
@@ -1869,6 +1880,9 @@ def virtualfile_to_dataset(
1869
1880
... column_names=["col1", "col2", "col3", "coltext"],
1870
1881
... )
1871
1882
... assert isinstance(outpd2, pd.DataFrame)
1883
+ >>> outstr
1884
+ array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
1885
+ 'TEXT123 TEXT456789'], dtype='<U18')
1872
1886
>>> outnp
1873
1887
array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
1874
1888
[4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
@@ -1890,11 +1904,14 @@ def virtualfile_to_dataset(
1890
1904
if output_type == "file" : # Already written to file, so return None
1891
1905
return None
1892
1906
1893
- # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
1894
- result = self .read_virtualfile (vfname , kind = "dataset" ).contents .to_dataframe (
1895
- column_names = column_names ,
1896
- dtype = dtype ,
1897
- index_col = index_col ,
1907
+ # Read the virtual file as a _GMT_DATASET object
1908
+ result = self .read_virtualfile (vfname , kind = "dataset" ).contents
1909
+
1910
+ if output_type == "strings" : # strings output
1911
+ return result .to_strings ()
1912
+
1913
+ result = result .to_dataframe (
1914
+ column_names = column_names , dtype = dtype , index_col = index_col
1898
1915
)
1899
1916
if output_type == "numpy" : # numpy.ndarray output
1900
1917
return result .to_numpy ()
0 commit comments