data-apis · rgommers · Jan 18, 2023 · Jan 17, 2023 · Jan 17, 2023 · Jan 17, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 *.swp
 _build
+__pycache__
+spec/API_specification/generated/
diff --git a/spec/API_specification/column_object.rst b/spec/API_specification/column_object.rst
@@ -0,0 +1,23 @@
+.. _column-object:
+
+Column object
+=============
+
+A conforming implementation of the dataframe API standard must provide and
+support a column object having the following attributes and methods.
+
+-------------------------------------------------
+
+Methods
+-------
+TODO
+
+..
+  NOTE: please keep the methods in alphabetical order
+
+    .. currentmodule:: dataframe_api
+
+    .. autosummary::
+       :toctree: generated
+   :template: property.rst
+
diff --git a/spec/API_specification/column_selection.md b/spec/API_specification/column_selection.md
diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
@@ -2,6 +2,11 @@
 Function stubs and API documentation for the DataFrame API standard.
 """
 
+from .column_object import *
+from .dataframe_object import *
+from .groupby_object import *
+
+
 __dataframe_api_version__: str = "YYYY.MM"
 """
 String representing the version of the DataFrame API specification to which the

diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
@@ -0,0 +1,63 @@
+"""
+Types for type annotations used in the dataframe API standard.
+
+The type variables should be replaced with the actual types for a given
+library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import (
+    Any,
+    List,
+    Literal,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+    Protocol,
+)
+from enum import Enum
+
+array = TypeVar("array")
+Scalar = TypeVar("Scalar")
+device = TypeVar("device")
+dtype = TypeVar("dtype")
+SupportsDLPack = TypeVar("SupportsDLPack")
+SupportsBufferProtocol = TypeVar("SupportsBufferProtocol")
+PyCapsule = TypeVar("PyCapsule")
+# ellipsis cannot actually be imported from anywhere, so include a dummy here
+# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556
+ellipsis = TypeVar("ellipsis")
+
+_T_co = TypeVar("_T_co", covariant=True)
+
+
+class NestedSequence(Protocol[_T_co]):
+    def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]:
+        ...
+
+    def __len__(self, /) -> int:
+        ...
+
+
+__all__ = [
+    "Any",
+    "DataFrame",
+    "List",
+    "Literal",
+    "NestedSequence",
+    "Optional",
+    "PyCapsule",
+    "SupportsBufferProtocol",
+    "SupportsDLPack",
+    "Tuple",
+    "Union",
+    "Sequence",
+    "array",
+    "device",
+    "dtype",
+    "ellipsis",
+    "Enum",
+]
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -1,10 +1,13 @@
-__all__ = ["DataFrame"]
+from __future__ import annotations
+from typing import Sequence, Union, TYPE_CHECKING
+
+from .column_object import Column
+from .groupby_object import GroupBy
 
-from typing import Sequence, TYPE_CHECKING
+from ._types import Scalar
 
-if TYPE_CHECKING:
-    from .column_object import Column
-    from .groupby_object import GroupBy
+
+__all__ = ["DataFrame"]
 
 
 class DataFrame:
@@ -33,7 +36,7 @@ def get_column_by_name(self, name: str, /) -> Column:
         """
         ...
 
-    def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame":
+    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
         """
         Select multiple columns by name.
 
@@ -52,7 +55,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame":
         """
         ...
 
-    def get_rows(self, indices: Sequence[int]) -> "DataFrame":
+    def get_rows(self, indices: Sequence[int]) -> DataFrame:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
@@ -75,7 +78,7 @@ def get_rows(self, indices: Sequence[int]) -> "DataFrame":
 
     def slice_rows(
         self, start: int | None, stop: int | None, step: int | None
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Select a subset of rows corresponding to a slice.
 
@@ -91,7 +94,7 @@ def slice_rows(
         """
         ...
 
-    def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame":
+    def get_rows_by_mask(self, mask: "Column[bool]") -> DataFrame:
         """
         Select a subset of rows corresponding to a mask.
 
@@ -110,7 +113,7 @@ def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame":
         """
         ...
 
-    def insert(self, loc: int, label: str, value: Column) -> "DataFrame":
+    def insert(self, loc: int, label: str, value: Column) -> DataFrame:
         """
         Insert column into DataFrame at specified location.
 
@@ -124,7 +127,7 @@ def insert(self, loc: int, label: str, value: Column) -> "DataFrame":
         """
         ...
 
-    def drop_column(self, label: str) -> "DataFrame":
+    def drop_column(self, label: str) -> DataFrame:
         """
         Drop the specified column.
 
@@ -143,7 +146,7 @@ def drop_column(self, label: str) -> "DataFrame":
         """
         ...
 
-    def set_column(self, label: str, value: Column) -> "DataFrame":
+    def set_column(self, label: str, value: Column) -> DataFrame:
         """
         Add or replace a column.
 
@@ -158,8 +161,10 @@ def set_column(self, label: str, value: Column) -> "DataFrame":
         """
         ...
 
-    def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __eq__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Compare for equality.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -173,8 +178,10 @@ def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __ne__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Compare for non-equality.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -188,8 +195,10 @@ def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __ge__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Compare for "greater than or equal to" `other`.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -203,8 +212,10 @@ def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __gt__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Compare for "greater than" `other`.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -218,8 +229,10 @@ def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __le__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __le__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Compare for "less than or equal to" `other`.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -233,8 +246,10 @@ def __le__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __lt__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Compare for "less than" `other`.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -248,8 +263,10 @@ def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __add__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __add__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Add `other` dataframe or scalar to this dataframe.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -263,8 +280,10 @@ def __add__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __sub__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Subtract `other` dataframe or scalar from this dataframe.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -278,8 +297,10 @@ def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __mul__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Multiply  `other` dataframe or scalar with this dataframe.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -293,8 +314,10 @@ def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __truediv__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -308,8 +331,10 @@ def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __floordiv__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -323,8 +348,10 @@ def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __pow__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Raise this dataframe to the power of `other`.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -338,8 +365,10 @@ def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame":
+    def __mod__(self, other: DataFrame | "Scalar") -> DataFrame:
         """
+        Return modulus of this dataframe by `other` (`%` operator).
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -353,8 +382,10 @@ def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame":
         """
         ...
 
-    def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFrame"]:
+    def __divmod__(self, other: DataFrame | "Scalar") -> tuple[DataFrame, DataFrame]:
         """
+        Return quotient and remainder of integer division. See `divmod` builtin function.
+
         Parameters
         ----------
         other : DataFrame or Scalar
@@ -364,8 +395,7 @@ def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFra
 
         Returns
         -------
-        DataFrame
-        DataFrame
+        A tuple of two DataFrame's
         """
         ...