@@ -2618,13 +2618,16 @@ def hist(self, bins=10, **kwds):
2618
2618
2619
2619
def kde (self , bw_method = None , ind = None , ** kwds ):
2620
2620
"""
2621
- Kernel Density Estimate plot using Gaussian kernels.
2621
+ Generate Kernel Density Estimate plot using Gaussian kernels.
2622
2622
2623
- In statistics, kernel density estimation (KDE) is a non-parametric way
2624
- to estimate the probability density function (PDF) of a random
2623
+ In statistics, ` kernel density estimation`_ (KDE) is a non-parametric
2624
+ way to estimate the probability density function (PDF) of a random
2625
2625
variable. This function uses Gaussian kernels and includes automatic
2626
2626
bandwith determination.
2627
2627
2628
+ .. _kernel density estimation:
2629
+ https://en.wikipedia.org/wiki/Kernel_density_estimation
2630
+
2628
2631
Parameters
2629
2632
----------
2630
2633
bw_method : str, scalar or callable, optional
@@ -2635,26 +2638,27 @@ def kde(self, bw_method=None, ind=None, **kwds):
2635
2638
ind : NumPy array or integer, optional
2636
2639
Evaluation points for the estimated PDF. If None (default),
2637
2640
1000 equally spaced points are used. If `ind` is a NumPy array, the
2638
- kde is evaluated at the points passed. If `ind` is an integer,
2641
+ KDE is evaluated at the points passed. If `ind` is an integer,
2639
2642
`ind` number of equally spaced points are used.
2640
- kwds : optional
2643
+ ** kwds : optional
2641
2644
Additional keyword arguments are documented in
2642
2645
:meth:`pandas.Series.plot`.
2643
2646
2644
2647
Returns
2645
2648
-------
2646
2649
axes : matplotlib.AxesSubplot or np.array of them
2647
2650
2648
- See also
2651
+ See Also
2649
2652
--------
2650
2653
scipy.stats.gaussian_kde : Representation of a kernel-density
2651
2654
estimate using Gaussian kernels. This is the function used
2652
2655
internally to estimate the PDF.
2656
+ DataFrame.plot.kde : Generate a KDE plot for a DataFrame.
2653
2657
2654
2658
Examples
2655
2659
--------
2656
2660
Given a Series of points randomly sampled from an unknown
2657
- distribution, estimate this distribution using KDE with automatic
2661
+ distribution, estimate its distribution using KDE with automatic
2658
2662
bandwidth determination and plot the results, evaluating them at
2659
2663
1000 equally spaced points (default):
2660
2664
@@ -2664,10 +2668,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
2664
2668
>>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
2665
2669
>>> ax = s.plot.kde()
2666
2670
2667
-
2668
- An scalar fixed bandwidth can be specified. Using a too small bandwidth
2669
- can lead to overfitting, while a too large bandwidth can result in
2670
- underfitting:
2671
+ A scalar bandwidth can be specified. Using a small bandwidth value can
2672
+ lead to overfitting, while using a large bandwidth value may result
2673
+ in underfitting:
2671
2674
2672
2675
.. plot::
2673
2676
:context: close-figs
@@ -2851,27 +2854,80 @@ def hist(self, by=None, bins=10, **kwds):
2851
2854
2852
2855
def kde (self , bw_method = None , ind = None , ** kwds ):
2853
2856
"""
2854
- Kernel Density Estimate plot
2857
+ Generate Kernel Density Estimate plot using Gaussian kernels.
2858
+
2859
+ In statistics, `kernel density estimation`_ (KDE) is a non-parametric
2860
+ way to estimate the probability density function (PDF) of a random
2861
+ variable. This function uses Gaussian kernels and includes automatic
2862
+ bandwith determination.
2863
+
2864
+ .. _kernel density estimation:
2865
+ https://en.wikipedia.org/wiki/Kernel_density_estimation
2855
2866
2856
2867
Parameters
2857
2868
----------
2858
- bw_method: str, scalar or callable, optional
2859
- The method used to calculate the estimator bandwidth. This can be
2869
+ bw_method : str, scalar or callable, optional
2870
+ The method used to calculate the estimator bandwidth. This can be
2860
2871
'scott', 'silverman', a scalar constant or a callable.
2861
2872
If None (default), 'scott' is used.
2862
2873
See :class:`scipy.stats.gaussian_kde` for more information.
2863
2874
ind : NumPy array or integer, optional
2864
- Evaluation points. If None (default), 1000 equally spaced points
2865
- are used. If `ind` is a NumPy array, the kde is evaluated at the
2866
- points passed. If `ind` is an integer, `ind` number of equally
2867
- spaced points are used.
2868
- ` **kwds` : optional
2875
+ Evaluation points for the estimated PDF . If None (default),
2876
+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
2877
+ KDE is evaluated at the points passed. If `ind` is an integer,
2878
+ `ind` number of equally spaced points are used.
2879
+ **kwds : optional
2869
2880
Additional keyword arguments are documented in
2870
2881
:meth:`pandas.DataFrame.plot`.
2871
2882
2872
2883
Returns
2873
2884
-------
2874
2885
axes : matplotlib.AxesSubplot or np.array of them
2886
+
2887
+ See Also
2888
+ --------
2889
+ scipy.stats.gaussian_kde : Representation of a kernel-density
2890
+ estimate using Gaussian kernels. This is the function used
2891
+ internally to estimate the PDF.
2892
+ Series.plot.kde : Generate a KDE plot for a Series.
2893
+
2894
+ Examples
2895
+ --------
2896
+ Given several Series of points randomly sampled from unknown
2897
+ distributions, estimate their distribution using KDE with automatic
2898
+ bandwidth determination and plot the results, evaluating them at
2899
+ 1000 equally spaced points (default):
2900
+
2901
+ .. plot::
2902
+ :context: close-figs
2903
+
2904
+ >>> df = pd.DataFrame({
2905
+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
2906
+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
2907
+ ... })
2908
+ >>> ax = df.plot.kde()
2909
+
2910
+ A scalar bandwidth can be specified. Using a small bandwidth value can
2911
+ lead to overfitting, while using a large bandwidth value may result
2912
+ in underfitting:
2913
+
2914
+ .. plot::
2915
+ :context: close-figs
2916
+
2917
+ >>> ax = df.plot.kde(bw_method=0.3)
2918
+
2919
+ .. plot::
2920
+ :context: close-figs
2921
+
2922
+ >>> ax = df.plot.kde(bw_method=3)
2923
+
2924
+ Finally, the `ind` parameter determines the evaluation points for the
2925
+ plot of the estimated PDF:
2926
+
2927
+ .. plot::
2928
+ :context: close-figs
2929
+
2930
+ >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
2875
2931
"""
2876
2932
return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
2877
2933
0 commit comments