Skip to content

Commit eeffe68

Browse files
committed
ENH: Add set_index to Series
1 parent 8a1c8ad commit eeffe68

File tree

5 files changed

+378
-94
lines changed

5 files changed

+378
-94
lines changed

Diff for: doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ Other Enhancements
181181
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
182182
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
183183
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
184+
- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`)
184185
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
185186
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
186187
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).

Diff for: pandas/core/frame.py

+32-80
Original file line numberDiff line numberDiff line change
@@ -3826,43 +3826,54 @@ def shift(self, periods=1, freq=None, axis=0):
38263826
def set_index(self, keys, drop=True, append=False, inplace=False,
38273827
verify_integrity=False):
38283828
"""
3829-
Set the DataFrame index (row labels) using one or more existing
3830-
columns. By default yields a new object.
3829+
Set the DataFrame index (row labels) using one or more columns.
38313830
38323831
Parameters
38333832
----------
38343833
keys : column label or list of column labels / arrays
3834+
Either a column label, Series, Index, MultiIndex, list,
3835+
np.ndarray or a list containing only column labels, Series, Index,
3836+
MultiIndex, list, np.ndarray.
38353837
drop : boolean, default True
3836-
Delete columns to be used as the new index
3838+
Delete columns to be used as the new index.
38373839
append : boolean, default False
3838-
Whether to append columns to existing index
3840+
Whether to append columns to existing index.
38393841
inplace : boolean, default False
3840-
Modify the DataFrame in place (do not create a new object)
3842+
Modify the DataFrame in place (do not create a new object).
38413843
verify_integrity : boolean, default False
38423844
Check the new index for duplicates. Otherwise defer the check until
38433845
necessary. Setting to False will improve the performance of this
3844-
method
3846+
method.
3847+
3848+
Returns
3849+
-------
3850+
reindexed : DataFrame if inplace is False, else None
3851+
3852+
See Also
3853+
--------
3854+
Series.set_index: Corresponding method for Series
38453855
38463856
Examples
38473857
--------
38483858
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
38493859
... 'year': [2012, 2014, 2013, 2014],
3850-
... 'sale':[55, 40, 84, 31]})
3851-
month sale year
3852-
0 1 55 2012
3853-
1 4 40 2014
3854-
2 7 84 2013
3855-
3 10 31 2014
3860+
... 'sale': [55, 40, 84, 31]})
3861+
>>> df
3862+
month year sale
3863+
0 1 2012 55
3864+
1 4 2014 40
3865+
2 7 2013 84
3866+
3 10 2014 31
38563867
38573868
Set the index to become the 'month' column:
38583869
38593870
>>> df.set_index('month')
3860-
sale year
3871+
year sale
38613872
month
3862-
1 55 2012
3863-
4 40 2014
3864-
7 84 2013
3865-
10 31 2014
3873+
1 2012 55
3874+
4 2014 40
3875+
7 2013 84
3876+
10 2014 31
38663877
38673878
Create a multi-index using columns 'year' and 'month':
38683879
@@ -3883,73 +3894,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
38833894
2 2014 4 40
38843895
3 2013 7 84
38853896
4 2014 10 31
3886-
3887-
Returns
3888-
-------
3889-
dataframe : DataFrame
38903897
"""
3891-
inplace = validate_bool_kwarg(inplace, 'inplace')
38923898
if not isinstance(keys, list):
38933899
keys = [keys]
38943900

3895-
if inplace:
3896-
frame = self
3897-
else:
3898-
frame = self.copy()
3899-
3900-
arrays = []
3901-
names = []
3902-
if append:
3903-
names = [x for x in self.index.names]
3904-
if isinstance(self.index, MultiIndex):
3905-
for i in range(self.index.nlevels):
3906-
arrays.append(self.index._get_level_values(i))
3907-
else:
3908-
arrays.append(self.index)
3909-
3910-
to_remove = []
3911-
for col in keys:
3912-
if isinstance(col, MultiIndex):
3913-
# append all but the last column so we don't have to modify
3914-
# the end of this loop
3915-
for n in range(col.nlevels - 1):
3916-
arrays.append(col._get_level_values(n))
3917-
3918-
level = col._get_level_values(col.nlevels - 1)
3919-
names.extend(col.names)
3920-
elif isinstance(col, Series):
3921-
level = col._values
3922-
names.append(col.name)
3923-
elif isinstance(col, Index):
3924-
level = col
3925-
names.append(col.name)
3926-
elif isinstance(col, (list, np.ndarray, Index)):
3927-
level = col
3928-
names.append(None)
3929-
else:
3930-
level = frame[col]._values
3931-
names.append(col)
3932-
if drop:
3933-
to_remove.append(col)
3934-
arrays.append(level)
3935-
3936-
index = ensure_index_from_sequences(arrays, names)
3937-
3938-
if verify_integrity and not index.is_unique:
3939-
duplicates = index[index.duplicated()].unique()
3940-
raise ValueError('Index has duplicate keys: {dup}'.format(
3941-
dup=duplicates))
3942-
3943-
for c in to_remove:
3944-
del frame[c]
3945-
3946-
# clear up memory usage
3947-
index._cleanup()
3948-
3949-
frame.index = index
3950-
3951-
if not inplace:
3952-
return frame
3901+
vi = verify_integrity
3902+
return super(DataFrame, self).set_index(keys=keys, drop=drop,
3903+
append=append, inplace=inplace,
3904+
verify_integrity=vi)
39533905

39543906
def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
39553907
col_fill=''):

Diff for: pandas/core/generic.py

+136-3
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3333
from pandas.core.dtypes.inference import is_hashable
3434
from pandas.core.dtypes.missing import isna, notna
35-
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
35+
from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel,
36+
ABCSeries, ABCDataFrame)
3637

3738
from pandas.core.base import PandasObject, SelectionMixin
38-
from pandas.core.index import (Index, MultiIndex, ensure_index,
39-
InvalidIndexError, RangeIndex)
39+
from pandas.core.index import (Index, MultiIndex,
40+
InvalidIndexError, RangeIndex,
41+
ensure_index, ensure_index_from_sequences)
4042
import pandas.core.indexing as indexing
4143
from pandas.core.indexes.datetimes import DatetimeIndex
4244
from pandas.core.indexes.period import PeriodIndex, Period
@@ -663,6 +665,137 @@ def _set_axis(self, axis, labels):
663665
y : same as input
664666
"""
665667

668+
def set_index(self, keys, drop=True, append=False, inplace=False,
669+
verify_integrity=False):
670+
"""
671+
Set the Series/DataFrame index (row labels) using one or more given
672+
arrays (or column labels in case of DataFrame).
673+
By default yields a new object.
674+
675+
Parameters
676+
----------
677+
keys : column label or list of column labels / arrays. For Series case,
678+
only array or list of arrays is allowed.
679+
drop : boolean, default True
680+
Delete columns to be used as the new index (only for DataFrame).
681+
append : boolean, default False
682+
Whether to append columns to existing index
683+
inplace : boolean, default False
684+
Modify the Series/DataFrame in place (do not create a new object)
685+
verify_integrity : boolean, default False
686+
Check the new index for duplicates. Otherwise defer the check until
687+
necessary. Setting to False will improve the performance of this
688+
method
689+
690+
Returns
691+
-------
692+
reindexed : Series/DataFrame if inplace is False, else None
693+
694+
See Also
695+
--------
696+
DataFrame.set_index: method adapted for DataFrame
697+
Series.set_index: method adapted for Series
698+
699+
Examples
700+
--------
701+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
702+
... 'year': [2012, 2014, 2013, 2014],
703+
... 'sale': [55, 40, 84, 31]})
704+
>>> df
705+
month year sale
706+
0 1 2012 55
707+
1 4 2014 40
708+
2 7 2013 84
709+
3 10 2014 31
710+
711+
Set the index to become the 'month' column:
712+
713+
>>> df.set_index('month')
714+
year sale
715+
month
716+
1 2012 55
717+
4 2014 40
718+
7 2013 84
719+
10 2014 31
720+
721+
Create a multi-index using columns 'year' and 'month':
722+
723+
>>> df.set_index(['year', 'month'])
724+
sale
725+
year month
726+
2012 1 55
727+
2014 4 40
728+
2013 7 84
729+
2014 10 31
730+
731+
Create a multi-index using a set of values and a column:
732+
733+
>>> df.set_index([[1, 2, 3, 4], 'year'])
734+
month sale
735+
year
736+
1 2012 1 55
737+
2 2014 4 40
738+
3 2013 7 84
739+
4 2014 10 31
740+
"""
741+
inplace = validate_bool_kwarg(inplace, 'inplace')
742+
if inplace:
743+
obj = self
744+
else:
745+
obj = self.copy()
746+
747+
arrays = []
748+
names = []
749+
if append:
750+
names = [x for x in self.index.names]
751+
if isinstance(self.index, ABCMultiIndex):
752+
for i in range(self.index.nlevels):
753+
arrays.append(self.index._get_level_values(i))
754+
else:
755+
arrays.append(self.index)
756+
757+
to_remove = []
758+
for col in keys:
759+
if isinstance(col, ABCMultiIndex):
760+
for n in range(col.nlevels):
761+
arrays.append(col._get_level_values(n))
762+
names.extend(col.names)
763+
elif isinstance(col, ABCIndexClass):
764+
# Index but not MultiIndex (treated above)
765+
arrays.append(col)
766+
names.append(col.name)
767+
elif isinstance(col, ABCSeries):
768+
arrays.append(col._values)
769+
names.append(col.name)
770+
elif isinstance(col, (list, np.ndarray)):
771+
arrays.append(col)
772+
names.append(None)
773+
# from here, col can only be a column label (and obj a DataFrame);
774+
# see checks in Series.set_index and DataFrame.set_index
775+
else:
776+
arrays.append(obj[col]._values)
777+
names.append(col)
778+
if drop:
779+
to_remove.append(col)
780+
781+
index = ensure_index_from_sequences(arrays, names)
782+
783+
if verify_integrity and not index.is_unique:
784+
duplicates = list(index[index.duplicated()])
785+
raise ValueError('Index has duplicate keys: {dup}'.format(
786+
dup=duplicates))
787+
788+
for c in to_remove:
789+
del obj[c]
790+
791+
# clear up memory usage
792+
index._cleanup()
793+
794+
obj.index = index
795+
796+
if not inplace:
797+
return obj
798+
666799
@Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
667800
def transpose(self, *args, **kwargs):
668801

0 commit comments

Comments
 (0)