Skip to content

Commit 3111499

Browse files
authored
Add DataFrame.unique_indices (#194)
* add DataFrame.unique_indices * typo + minor reword
1 parent d10e22a commit 3111499

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

Diff for: spec/API_specification/dataframe_api/column_object.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]:
624624
indices corresponding to the same unique value, there is no guarantee
625625
about which one will appear in the result.
626626
If the original Column contains multiple `'NaN'` values, then
627-
only a single index corresponding to those values should be returned.
627+
only a single index corresponding to those values will be returned.
628628
Likewise for null values (if ``skip_nulls=False``).
629629
To get the unique values, you can do ``col.get_rows(col.unique_indices())``.
630630
"""

Diff for: spec/API_specification/dataframe_api/dataframe_object.py

+21
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,27 @@ def is_nan(self) -> DataFrame:
759759
"""
760760
...
761761

762+
def unique_indices(self, keys: Sequence[str], *, skip_nulls: bool = True) -> Column[int]:
763+
"""
764+
Return indices corresponding to unique values across selected columns.
765+
766+
Returns
767+
-------
768+
Column[int]
769+
Indices corresponding to unique values.
770+
771+
Notes
772+
-----
773+
There are no ordering guarantees. In particular, if there are multiple
774+
indices corresponding to the same unique value(s), there is no guarantee
775+
about which one will appear in the result.
776+
If the original column(s) contain multiple `'NaN'` values, then
777+
only a single index corresponding to those values will be returned.
778+
Likewise for null values (if ``skip_nulls=False``).
779+
To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
780+
"""
781+
...
782+
762783
def fill_nan(self, value: float | 'null', /) -> DataFrame:
763784
"""
764785
Fill ``nan`` values with the given fill value.

0 commit comments

Comments
 (0)