@@ -5501,7 +5501,22 @@ def corr(self, method='pearson', min_periods=1):
5501
5501
5502
5502
def cov (self , min_periods = None ):
5503
5503
"""
5504
- Compute pairwise covariance of columns, excluding NA/null values
5504
+ Compute pairwise covariance of columns, excluding NA/null values.
5505
+
5506
+ Compute the pairwise covariance among the series of a DataFrame.
5507
+ The returned data frame is the `covariance matrix
5508
+ <https://en.wikipedia.org/wiki/Covariance_matrix>`__ of the columns
5509
+ of the DataFrame.
5510
+
5511
+ Both NA and null values are automatically excluded from the
5512
+ calculation. (See the note below about bias from missing values.)
5513
+ A threshold can be set for the minimum number of
5514
+ observations for each value created. Comparisons with observations
5515
+ below this threshold will be returned as ``NaN``.
5516
+
5517
+ This method is generally used for the analysis of time series data to
5518
+ understand the relationship between different measures
5519
+ across time.
5505
5520
5506
5521
Parameters
5507
5522
----------
@@ -5511,12 +5526,71 @@ def cov(self, min_periods=None):
5511
5526
5512
5527
Returns
5513
5528
-------
5514
- y : DataFrame
5529
+ DataFrame
5530
+ The covariance matrix of the series of the DataFrame.
5531
+
5532
+ See Also
5533
+ --------
5534
+ pandas.Series.cov : compute covariance with another Series
5535
+ pandas.core.window.EWM.cov: expoential weighted sample covariance
5536
+ pandas.core.window.Expanding.cov : expanding sample covariance
5537
+ pandas.core.window.Rolling.cov : rolling sample covariance
5515
5538
5516
5539
Notes
5517
5540
-----
5518
- `y` contains the covariance matrix of the DataFrame's time series.
5519
- The covariance is normalized by N-1 (unbiased estimator).
5541
+ Returns the covariance matrix of the DataFrame's time series.
5542
+ The covariance is normalized by N-1.
5543
+
5544
+ For DataFrames that have Series that are missing data (assuming that
5545
+ data is `missing at random
5546
+ <https://en.wikipedia.org/wiki/Missing_data#Missing_at_random>`__)
5547
+ the returned covariance matrix will be an unbiased estimate
5548
+ of the variance and covariance between the member Series.
5549
+
5550
+ However, for many applications this estimate may not be acceptable
5551
+ because the estimate covariance matrix is not guaranteed to be positive
5552
+ semi-definite. This could lead to estimate correlations having
5553
+ absolute values which are greater than one, and/or a non-invertible
5554
+ covariance matrix. See `Estimation of covariance matrices
5555
+ <http://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_
5556
+ matrices>`__ for more details.
5557
+
5558
+ Examples
5559
+ --------
5560
+ >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)],
5561
+ ... columns=['dogs', 'cats'])
5562
+ >>> df.cov()
5563
+ dogs cats
5564
+ dogs 0.666667 -1.000000
5565
+ cats -1.000000 1.666667
5566
+
5567
+ >>> np.random.seed(42)
5568
+ >>> df = pd.DataFrame(np.random.randn(1000, 5),
5569
+ ... columns=['a', 'b', 'c', 'd', 'e'])
5570
+ >>> df.cov()
5571
+ a b c d e
5572
+ a 0.998438 -0.020161 0.059277 -0.008943 0.014144
5573
+ b -0.020161 1.059352 -0.008543 -0.024738 0.009826
5574
+ c 0.059277 -0.008543 1.010670 -0.001486 -0.000271
5575
+ d -0.008943 -0.024738 -0.001486 0.921297 -0.013692
5576
+ e 0.014144 0.009826 -0.000271 -0.013692 0.977795
5577
+
5578
+ **Minimum number of periods**
5579
+
5580
+ This method also supports an optional ``min_periods`` keyword
5581
+ that specifies the required minimum number of non-NA observations for
5582
+ each column pair in order to have a valid result:
5583
+
5584
+ >>> np.random.seed(42)
5585
+ >>> df = pd.DataFrame(np.random.randn(20, 3),
5586
+ ... columns=['a', 'b', 'c'])
5587
+ >>> df.loc[df.index[:5], 'a'] = np.nan
5588
+ >>> df.loc[df.index[5:10], 'b'] = np.nan
5589
+ >>> df.cov(min_periods=12)
5590
+ a b c
5591
+ a 0.316741 NaN -0.150812
5592
+ b NaN 1.248003 0.191417
5593
+ c -0.150812 0.191417 0.895202
5520
5594
"""
5521
5595
numeric_df = self ._get_numeric_data ()
5522
5596
cols = numeric_df .columns
0 commit comments