Skip to content

Commit 60775db

Browse files
authored
Render API docstrings in .py files as html (#98)
This does the following: - Ensure the package in `spec/API_specification/dataframe_api/` is importable - Ensure all docstrings for objects/methods are rendered in the html docs via `autodoc` - Add a description for operators supported by the dataframe object (and a few TODOs) - Adds Sphinx templates for styling the html rendered versions of methods and attributes properly (taken over from the array-api repo). The Sphinx templates help with better styling of methods/attrs - Adds some infrastructure for static typing that we may need. So far only the `Scalar` class needed a `TypeVar`, but there should be more to do there.
1 parent 17ddbbd commit 60775db

16 files changed

+387
-54
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
*.swp
22
_build
3+
__pycache__
4+
spec/API_specification/generated/
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
.. _column-object:
2+
3+
Column object
4+
=============
5+
6+
A conforming implementation of the dataframe API standard must provide and
7+
support a column object having the following attributes and methods.
8+
9+
-------------------------------------------------
10+
11+
Methods
12+
-------
13+
TODO
14+
15+
..
16+
NOTE: please keep the methods in alphabetical order
17+
18+
.. currentmodule:: dataframe_api
19+
20+
.. autosummary::
21+
:toctree: generated
22+
:template: property.rst
23+

spec/API_specification/column_selection.md

-1
This file was deleted.

spec/API_specification/dataframe_api/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
Function stubs and API documentation for the DataFrame API standard.
33
"""
44

5+
from .column_object import *
6+
from .dataframe_object import *
7+
from .groupby_object import *
8+
9+
510
__dataframe_api_version__: str = "YYYY.MM"
611
"""
712
String representing the version of the DataFrame API specification to which the
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Types for type annotations used in the dataframe API standard.
3+
4+
The type variables should be replaced with the actual types for a given
5+
library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame.
6+
"""
7+
from __future__ import annotations
8+
9+
from dataclasses import dataclass
10+
from typing import (
11+
Any,
12+
List,
13+
Literal,
14+
Optional,
15+
Sequence,
16+
Tuple,
17+
TypeVar,
18+
Union,
19+
Protocol,
20+
)
21+
from enum import Enum
22+
23+
array = TypeVar("array")
24+
Scalar = TypeVar("Scalar")
25+
device = TypeVar("device")
26+
dtype = TypeVar("dtype")
27+
SupportsDLPack = TypeVar("SupportsDLPack")
28+
SupportsBufferProtocol = TypeVar("SupportsBufferProtocol")
29+
PyCapsule = TypeVar("PyCapsule")
30+
# ellipsis cannot actually be imported from anywhere, so include a dummy here
31+
# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556
32+
ellipsis = TypeVar("ellipsis")
33+
34+
_T_co = TypeVar("_T_co", covariant=True)
35+
36+
37+
class NestedSequence(Protocol[_T_co]):
38+
def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]:
39+
...
40+
41+
def __len__(self, /) -> int:
42+
...
43+
44+
45+
__all__ = [
46+
"Any",
47+
"DataFrame",
48+
"List",
49+
"Literal",
50+
"NestedSequence",
51+
"Optional",
52+
"PyCapsule",
53+
"SupportsBufferProtocol",
54+
"SupportsDLPack",
55+
"Tuple",
56+
"Union",
57+
"Sequence",
58+
"array",
59+
"device",
60+
"dtype",
61+
"ellipsis",
62+
"Enum",
63+
]

spec/API_specification/dataframe_api/dataframe_object.py

+56-26
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
__all__ = ["DataFrame"]
2-
3-
from typing import Sequence, TYPE_CHECKING
1+
from __future__ import annotations
2+
from typing import Sequence, Union, TYPE_CHECKING
43

54
if TYPE_CHECKING:
65
from .column_object import Column
76
from .groupby_object import GroupBy
7+
from ._types import Scalar
8+
9+
10+
__all__ = ["DataFrame"]
811

912

1013
class DataFrame:
@@ -33,7 +36,7 @@ def get_column_by_name(self, name: str, /) -> Column:
3336
"""
3437
...
3538

36-
def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame":
39+
def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
3740
"""
3841
Select multiple columns by name.
3942
@@ -52,7 +55,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame":
5255
"""
5356
...
5457

55-
def get_rows(self, indices: Sequence[int]) -> "DataFrame":
58+
def get_rows(self, indices: Sequence[int]) -> DataFrame:
5659
"""
5760
Select a subset of rows, similar to `ndarray.take`.
5861
@@ -75,7 +78,7 @@ def get_rows(self, indices: Sequence[int]) -> "DataFrame":
7578

7679
def slice_rows(
7780
self, start: int | None, stop: int | None, step: int | None
78-
) -> "DataFrame":
81+
) -> DataFrame:
7982
"""
8083
Select a subset of rows corresponding to a slice.
8184
@@ -91,7 +94,7 @@ def slice_rows(
9194
"""
9295
...
9396

94-
def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame":
97+
def get_rows_by_mask(self, mask: "Column[bool]") -> DataFrame:
9598
"""
9699
Select a subset of rows corresponding to a mask.
97100
@@ -110,7 +113,7 @@ def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame":
110113
"""
111114
...
112115

113-
def insert(self, loc: int, label: str, value: Column) -> "DataFrame":
116+
def insert(self, loc: int, label: str, value: Column) -> DataFrame:
114117
"""
115118
Insert column into DataFrame at specified location.
116119
@@ -124,7 +127,7 @@ def insert(self, loc: int, label: str, value: Column) -> "DataFrame":
124127
"""
125128
...
126129

127-
def drop_column(self, label: str) -> "DataFrame":
130+
def drop_column(self, label: str) -> DataFrame:
128131
"""
129132
Drop the specified column.
130133
@@ -143,7 +146,7 @@ def drop_column(self, label: str) -> "DataFrame":
143146
"""
144147
...
145148

146-
def set_column(self, label: str, value: Column) -> "DataFrame":
149+
def set_column(self, label: str, value: Column) -> DataFrame:
147150
"""
148151
Add or replace a column.
149152
@@ -158,8 +161,10 @@ def set_column(self, label: str, value: Column) -> "DataFrame":
158161
"""
159162
...
160163

161-
def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame":
164+
def __eq__(self, other: DataFrame | Scalar) -> DataFrame:
162165
"""
166+
Compare for equality.
167+
163168
Parameters
164169
----------
165170
other : DataFrame or Scalar
@@ -173,8 +178,10 @@ def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame":
173178
"""
174179
...
175180

176-
def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame":
181+
def __ne__(self, other: DataFrame | Scalar) -> DataFrame:
177182
"""
183+
Compare for non-equality.
184+
178185
Parameters
179186
----------
180187
other : DataFrame or Scalar
@@ -188,8 +195,10 @@ def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame":
188195
"""
189196
...
190197

191-
def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame":
198+
def __ge__(self, other: DataFrame | Scalar) -> DataFrame:
192199
"""
200+
Compare for "greater than or equal to" `other`.
201+
193202
Parameters
194203
----------
195204
other : DataFrame or Scalar
@@ -203,8 +212,10 @@ def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame":
203212
"""
204213
...
205214

206-
def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame":
215+
def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
207216
"""
217+
Compare for "greater than" `other`.
218+
208219
Parameters
209220
----------
210221
other : DataFrame or Scalar
@@ -218,8 +229,10 @@ def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame":
218229
"""
219230
...
220231

221-
def __le__(self, other: DataFrame | "Scalar") -> "DataFrame":
232+
def __le__(self, other: DataFrame | Scalar) -> DataFrame:
222233
"""
234+
Compare for "less than or equal to" `other`.
235+
223236
Parameters
224237
----------
225238
other : DataFrame or Scalar
@@ -233,8 +246,10 @@ def __le__(self, other: DataFrame | "Scalar") -> "DataFrame":
233246
"""
234247
...
235248

236-
def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame":
249+
def __lt__(self, other: DataFrame | Scalar) -> DataFrame:
237250
"""
251+
Compare for "less than" `other`.
252+
238253
Parameters
239254
----------
240255
other : DataFrame or Scalar
@@ -248,8 +263,10 @@ def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame":
248263
"""
249264
...
250265

251-
def __add__(self, other: DataFrame | "Scalar") -> "DataFrame":
266+
def __add__(self, other: DataFrame | Scalar) -> DataFrame:
252267
"""
268+
Add `other` dataframe or scalar to this dataframe.
269+
253270
Parameters
254271
----------
255272
other : DataFrame or Scalar
@@ -263,8 +280,10 @@ def __add__(self, other: DataFrame | "Scalar") -> "DataFrame":
263280
"""
264281
...
265282

266-
def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame":
283+
def __sub__(self, other: DataFrame | Scalar) -> DataFrame:
267284
"""
285+
Subtract `other` dataframe or scalar from this dataframe.
286+
268287
Parameters
269288
----------
270289
other : DataFrame or Scalar
@@ -278,8 +297,10 @@ def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame":
278297
"""
279298
...
280299

281-
def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame":
300+
def __mul__(self, other: DataFrame | Scalar) -> DataFrame:
282301
"""
302+
Multiply `other` dataframe or scalar with this dataframe.
303+
283304
Parameters
284305
----------
285306
other : DataFrame or Scalar
@@ -293,8 +314,10 @@ def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame":
293314
"""
294315
...
295316

296-
def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame":
317+
def __truediv__(self, other: DataFrame | Scalar) -> DataFrame:
297318
"""
319+
Divide this dataframe by `other` dataframe or scalar. True division, returns floats.
320+
298321
Parameters
299322
----------
300323
other : DataFrame or Scalar
@@ -308,8 +331,10 @@ def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame":
308331
"""
309332
...
310333

311-
def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame":
334+
def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame:
312335
"""
336+
Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
337+
313338
Parameters
314339
----------
315340
other : DataFrame or Scalar
@@ -323,8 +348,10 @@ def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame":
323348
"""
324349
...
325350

326-
def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame":
351+
def __pow__(self, other: DataFrame | Scalar) -> DataFrame:
327352
"""
353+
Raise this dataframe to the power of `other`.
354+
328355
Parameters
329356
----------
330357
other : DataFrame or Scalar
@@ -338,8 +365,10 @@ def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame":
338365
"""
339366
...
340367

341-
def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame":
368+
def __mod__(self, other: DataFrame | Scalar) -> DataFrame:
342369
"""
370+
Return modulus of this dataframe by `other` (`%` operator).
371+
343372
Parameters
344373
----------
345374
other : DataFrame or Scalar
@@ -353,8 +382,10 @@ def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame":
353382
"""
354383
...
355384

356-
def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFrame"]:
385+
def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]:
357386
"""
387+
Return quotient and remainder of integer division. See `divmod` builtin function.
388+
358389
Parameters
359390
----------
360391
other : DataFrame or Scalar
@@ -364,8 +395,7 @@ def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFra
364395
365396
Returns
366397
-------
367-
DataFrame
368-
DataFrame
398+
A tuple of two DataFrame's
369399
"""
370400
...
371401

0 commit comments

Comments
 (0)