@@ -26,6 +26,7 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
26
26
>>> with GMTTempFile(suffix=".txt") as tmpfile:
27
27
... # Prepare the sample data file
28
28
... with Path(tmpfile.name).open(mode="w") as fp:
29
+ ... print("# x y z name", file=fp)
29
30
... print(">", file=fp)
30
31
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
31
32
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
@@ -42,7 +43,8 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
42
43
... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns])
43
44
... # The table
44
45
... tbl = ds.table[0].contents
45
- ... print(tbl.n_columns, tbl.n_segments, tbl.n_records)
46
+ ... print(tbl.n_columns, tbl.n_segments, tbl.n_records, tbl.n_headers)
47
+ ... print(tbl.header[: tbl.n_headers])
46
48
... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns])
47
49
... for i in range(tbl.n_segments):
48
50
... seg = tbl.segment[i].contents
@@ -51,7 +53,8 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
51
53
... print(seg.text[: seg.n_rows])
52
54
1 3 2
53
55
[1.0, 2.0, 3.0] [10.0, 11.0, 12.0]
54
- 3 2 4
56
+ 3 2 4 1
57
+ [b'x y z name']
55
58
[1.0, 2.0, 3.0] [10.0, 11.0, 12.0]
56
59
[1.0, 4.0]
57
60
[2.0, 5.0]
@@ -144,8 +147,9 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
144
147
("hidden" , ctp .c_void_p ),
145
148
]
146
149
147
- def to_dataframe (
150
+ def to_dataframe ( # noqa: PLR0912
148
151
self ,
152
+ header : int | None = None ,
149
153
column_names : pd .Index | None = None ,
150
154
dtype : type | Mapping [Any , type ] | None = None ,
151
155
index_col : str | int | None = None ,
@@ -164,6 +168,10 @@ def to_dataframe(
164
168
----------
165
169
column_names
166
170
A list of column names.
171
+ header
172
+ Row number containing column names. ``header=None`` means not to parse the
173
+ column names from table header. Ignored if the row number is larger than the
174
+ number of headers in the table.
167
175
dtype
168
176
Data type. Can be a single type for all columns or a dictionary mapping
169
177
column names to types.
@@ -184,6 +192,7 @@ def to_dataframe(
184
192
>>> with GMTTempFile(suffix=".txt") as tmpfile:
185
193
... # prepare the sample data file
186
194
... with Path(tmpfile.name).open(mode="w") as fp:
195
+ ... print("# col1 col2 col3 colstr", file=fp)
187
196
... print(">", file=fp)
188
197
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
189
198
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
@@ -194,9 +203,9 @@ def to_dataframe(
194
203
... with lib.virtualfile_out(kind="dataset") as vouttbl:
195
204
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
196
205
... ds = lib.read_virtualfile(vouttbl, kind="dataset")
197
- ... df = ds.contents.to_dataframe()
206
+ ... df = ds.contents.to_dataframe(header=0 )
198
207
>>> df
199
- 0 1 2 3
208
+ col1 col2 col3 colstr
200
209
0 1.0 2.0 3.0 TEXT1 TEXT23
201
210
1 4.0 5.0 6.0 TEXT4 TEXT567
202
211
2 7.0 8.0 9.0 TEXT8 TEXT90
@@ -230,14 +239,19 @@ def to_dataframe(
230
239
pd .Series (data = np .char .decode (textvector ), dtype = pd .StringDtype ())
231
240
)
232
241
242
+ if header is not None :
243
+ tbl = self .table [0 ].contents # Use the first table!
244
+ if header < tbl .n_headers :
245
+ column_names = tbl .header [header ].decode ().split ()
246
+
233
247
if len (vectors ) == 0 :
234
248
# Return an empty DataFrame if no columns are found.
235
249
df = pd .DataFrame (columns = column_names )
236
250
else :
237
251
# Create a DataFrame object by concatenating multiple columns
238
252
df = pd .concat (objs = vectors , axis = "columns" )
239
253
if column_names is not None : # Assign column names
240
- df .columns = column_names
254
+ df .columns = column_names [: df . shape [ 1 ]]
241
255
if dtype is not None : # Set dtype for the whole dataset or individual columns
242
256
df = df .astype (dtype )
243
257
if index_col is not None : # Use a specific column as index
0 commit comments