|
from __future__ import annotations |
|
|
|
from contextlib import contextmanager |
|
from typing import ( |
|
TYPE_CHECKING, |
|
Any, |
|
) |
|
|
|
from pandas.plotting._core import _get_plot_backend |
|
|
|
if TYPE_CHECKING: |
|
from collections.abc import ( |
|
Generator, |
|
Mapping, |
|
) |
|
|
|
from matplotlib.axes import Axes |
|
from matplotlib.colors import Colormap |
|
from matplotlib.figure import Figure |
|
from matplotlib.table import Table |
|
import numpy as np |
|
|
|
from pandas import ( |
|
DataFrame, |
|
Series, |
|
) |
|
|
|
|
|
def table(ax: Axes, data: DataFrame | Series, **kwargs) -> Table: |
|
""" |
|
Helper function to convert DataFrame and Series to matplotlib.table. |
|
|
|
Parameters |
|
---------- |
|
ax : Matplotlib axes object |
|
data : DataFrame or Series |
|
Data for table contents. |
|
**kwargs |
|
Keyword arguments to be passed to matplotlib.table.table. |
|
If `rowLabels` or `colLabels` is not specified, data index or column |
|
name will be used. |
|
|
|
Returns |
|
------- |
|
matplotlib table object |
|
|
|
Examples |
|
-------- |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> import matplotlib.pyplot as plt |
|
>>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) |
|
>>> fix, ax = plt.subplots() |
|
>>> ax.axis('off') |
|
(0.0, 1.0, 0.0, 1.0) |
|
>>> table = pd.plotting.table(ax, df, loc='center', |
|
... cellLoc='center', colWidths=list([.2, .2])) |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.table( |
|
ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs |
|
) |
|
|
|
|
|
def register() -> None: |
|
""" |
|
Register pandas formatters and converters with matplotlib. |
|
|
|
This function modifies the global ``matplotlib.units.registry`` |
|
dictionary. pandas adds custom converters for |
|
|
|
* pd.Timestamp |
|
* pd.Period |
|
* np.datetime64 |
|
* datetime.datetime |
|
* datetime.date |
|
* datetime.time |
|
|
|
See Also |
|
-------- |
|
deregister_matplotlib_converters : Remove pandas formatters and converters. |
|
|
|
Examples |
|
-------- |
|
.. plot:: |
|
:context: close-figs |
|
|
|
The following line is done automatically by pandas so |
|
the plot can be rendered: |
|
|
|
>>> pd.plotting.register_matplotlib_converters() |
|
|
|
>>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'), |
|
... 'y': [1, 2] |
|
... }) |
|
>>> plot = df.plot.line(x='ts', y='y') |
|
|
|
Unsetting the register manually an error will be raised: |
|
|
|
>>> pd.set_option("plotting.matplotlib.register_converters", |
|
... False) # doctest: +SKIP |
|
>>> df.plot.line(x='ts', y='y') # doctest: +SKIP |
|
Traceback (most recent call last): |
|
TypeError: float() argument must be a string or a real number, not 'Period' |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
plot_backend.register() |
|
|
|
|
|
def deregister() -> None: |
|
""" |
|
Remove pandas formatters and converters. |
|
|
|
Removes the custom converters added by :func:`register`. This |
|
attempts to set the state of the registry back to the state before |
|
pandas registered its own units. Converters for pandas' own types like |
|
Timestamp and Period are removed completely. Converters for types |
|
pandas overwrites, like ``datetime.datetime``, are restored to their |
|
original value. |
|
|
|
See Also |
|
-------- |
|
register_matplotlib_converters : Register pandas formatters and converters |
|
with matplotlib. |
|
|
|
Examples |
|
-------- |
|
.. plot:: |
|
:context: close-figs |
|
|
|
The following line is done automatically by pandas so |
|
the plot can be rendered: |
|
|
|
>>> pd.plotting.register_matplotlib_converters() |
|
|
|
>>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'), |
|
... 'y': [1, 2] |
|
... }) |
|
>>> plot = df.plot.line(x='ts', y='y') |
|
|
|
Unsetting the register manually an error will be raised: |
|
|
|
>>> pd.set_option("plotting.matplotlib.register_converters", |
|
... False) # doctest: +SKIP |
|
>>> df.plot.line(x='ts', y='y') # doctest: +SKIP |
|
Traceback (most recent call last): |
|
TypeError: float() argument must be a string or a real number, not 'Period' |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
plot_backend.deregister() |
|
|
|
|
|
def scatter_matrix( |
|
frame: DataFrame, |
|
alpha: float = 0.5, |
|
figsize: tuple[float, float] | None = None, |
|
ax: Axes | None = None, |
|
grid: bool = False, |
|
diagonal: str = "hist", |
|
marker: str = ".", |
|
density_kwds: Mapping[str, Any] | None = None, |
|
hist_kwds: Mapping[str, Any] | None = None, |
|
range_padding: float = 0.05, |
|
**kwargs, |
|
) -> np.ndarray: |
|
""" |
|
Draw a matrix of scatter plots. |
|
|
|
Parameters |
|
---------- |
|
frame : DataFrame |
|
alpha : float, optional |
|
Amount of transparency applied. |
|
figsize : (float,float), optional |
|
A tuple (width, height) in inches. |
|
ax : Matplotlib axis object, optional |
|
grid : bool, optional |
|
Setting this to True will show the grid. |
|
diagonal : {'hist', 'kde'} |
|
Pick between 'kde' and 'hist' for either Kernel Density Estimation or |
|
Histogram plot in the diagonal. |
|
marker : str, optional |
|
Matplotlib marker type, default '.'. |
|
density_kwds : keywords |
|
Keyword arguments to be passed to kernel density estimate plot. |
|
hist_kwds : keywords |
|
Keyword arguments to be passed to hist function. |
|
range_padding : float, default 0.05 |
|
Relative extension of axis range in x and y with respect to |
|
(x_max - x_min) or (y_max - y_min). |
|
**kwargs |
|
Keyword arguments to be passed to scatter function. |
|
|
|
Returns |
|
------- |
|
numpy.ndarray |
|
A matrix of scatter plots. |
|
|
|
Examples |
|
-------- |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) |
|
>>> pd.plotting.scatter_matrix(df, alpha=0.2) |
|
array([[<Axes: xlabel='A', ylabel='A'>, <Axes: xlabel='B', ylabel='A'>, |
|
<Axes: xlabel='C', ylabel='A'>, <Axes: xlabel='D', ylabel='A'>], |
|
[<Axes: xlabel='A', ylabel='B'>, <Axes: xlabel='B', ylabel='B'>, |
|
<Axes: xlabel='C', ylabel='B'>, <Axes: xlabel='D', ylabel='B'>], |
|
[<Axes: xlabel='A', ylabel='C'>, <Axes: xlabel='B', ylabel='C'>, |
|
<Axes: xlabel='C', ylabel='C'>, <Axes: xlabel='D', ylabel='C'>], |
|
[<Axes: xlabel='A', ylabel='D'>, <Axes: xlabel='B', ylabel='D'>, |
|
<Axes: xlabel='C', ylabel='D'>, <Axes: xlabel='D', ylabel='D'>]], |
|
dtype=object) |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.scatter_matrix( |
|
frame=frame, |
|
alpha=alpha, |
|
figsize=figsize, |
|
ax=ax, |
|
grid=grid, |
|
diagonal=diagonal, |
|
marker=marker, |
|
density_kwds=density_kwds, |
|
hist_kwds=hist_kwds, |
|
range_padding=range_padding, |
|
**kwargs, |
|
) |
|
|
|
|
|
def radviz( |
|
frame: DataFrame, |
|
class_column: str, |
|
ax: Axes | None = None, |
|
color: list[str] | tuple[str, ...] | None = None, |
|
colormap: Colormap | str | None = None, |
|
**kwds, |
|
) -> Axes: |
|
""" |
|
Plot a multidimensional dataset in 2D. |
|
|
|
Each Series in the DataFrame is represented as a evenly distributed |
|
slice on a circle. Each data point is rendered in the circle according to |
|
the value on each Series. Highly correlated `Series` in the `DataFrame` |
|
are placed closer on the unit circle. |
|
|
|
RadViz allow to project a N-dimensional data set into a 2D space where the |
|
influence of each dimension can be interpreted as a balance between the |
|
influence of all dimensions. |
|
|
|
More info available at the `original article |
|
<https://doi.org/10.1145/331770.331775>`_ |
|
describing RadViz. |
|
|
|
Parameters |
|
---------- |
|
frame : `DataFrame` |
|
Object holding the data. |
|
class_column : str |
|
Column name containing the name of the data point category. |
|
ax : :class:`matplotlib.axes.Axes`, optional |
|
A plot instance to which to add the information. |
|
color : list[str] or tuple[str], optional |
|
Assign a color to each category. Example: ['blue', 'green']. |
|
colormap : str or :class:`matplotlib.colors.Colormap`, default None |
|
Colormap to select colors from. If string, load colormap with that |
|
name from matplotlib. |
|
**kwds |
|
Options to pass to matplotlib scatter plotting method. |
|
|
|
Returns |
|
------- |
|
:class:`matplotlib.axes.Axes` |
|
|
|
See Also |
|
-------- |
|
pandas.plotting.andrews_curves : Plot clustering visualization. |
|
|
|
Examples |
|
-------- |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> df = pd.DataFrame( |
|
... { |
|
... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], |
|
... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], |
|
... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], |
|
... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], |
|
... 'Category': [ |
|
... 'virginica', |
|
... 'virginica', |
|
... 'setosa', |
|
... 'virginica', |
|
... 'virginica', |
|
... 'versicolor', |
|
... 'versicolor', |
|
... 'setosa', |
|
... 'virginica', |
|
... 'setosa' |
|
... ] |
|
... } |
|
... ) |
|
>>> pd.plotting.radviz(df, 'Category') # doctest: +SKIP |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.radviz( |
|
frame=frame, |
|
class_column=class_column, |
|
ax=ax, |
|
color=color, |
|
colormap=colormap, |
|
**kwds, |
|
) |
|
|
|
|
|
def andrews_curves( |
|
frame: DataFrame, |
|
class_column: str, |
|
ax: Axes | None = None, |
|
samples: int = 200, |
|
color: list[str] | tuple[str, ...] | None = None, |
|
colormap: Colormap | str | None = None, |
|
**kwargs, |
|
) -> Axes: |
|
""" |
|
Generate a matplotlib plot for visualizing clusters of multivariate data. |
|
|
|
Andrews curves have the functional form: |
|
|
|
.. math:: |
|
f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) + |
|
x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots |
|
|
|
Where :math:`x` coefficients correspond to the values of each dimension |
|
and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`. |
|
Each row of frame then corresponds to a single curve. |
|
|
|
Parameters |
|
---------- |
|
frame : DataFrame |
|
Data to be plotted, preferably normalized to (0.0, 1.0). |
|
class_column : label |
|
Name of the column containing class names. |
|
ax : axes object, default None |
|
Axes to use. |
|
samples : int |
|
Number of points to plot in each curve. |
|
color : str, list[str] or tuple[str], optional |
|
Colors to use for the different classes. Colors can be strings |
|
or 3-element floating point RGB values. |
|
colormap : str or matplotlib colormap object, default None |
|
Colormap to select colors from. If a string, load colormap with that |
|
name from matplotlib. |
|
**kwargs |
|
Options to pass to matplotlib plotting method. |
|
|
|
Returns |
|
------- |
|
:class:`matplotlib.axes.Axes` |
|
|
|
Examples |
|
-------- |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> df = pd.read_csv( |
|
... 'https://raw.githubusercontent.com/pandas-dev/' |
|
... 'pandas/main/pandas/tests/io/data/csv/iris.csv' |
|
... ) |
|
>>> pd.plotting.andrews_curves(df, 'Name') # doctest: +SKIP |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.andrews_curves( |
|
frame=frame, |
|
class_column=class_column, |
|
ax=ax, |
|
samples=samples, |
|
color=color, |
|
colormap=colormap, |
|
**kwargs, |
|
) |
|
|
|
|
|
def bootstrap_plot( |
|
series: Series, |
|
fig: Figure | None = None, |
|
size: int = 50, |
|
samples: int = 500, |
|
**kwds, |
|
) -> Figure: |
|
""" |
|
Bootstrap plot on mean, median and mid-range statistics. |
|
|
|
The bootstrap plot is used to estimate the uncertainty of a statistic |
|
by relying on random sampling with replacement [1]_. This function will |
|
generate bootstrapping plots for mean, median and mid-range statistics |
|
for the given number of samples of the given size. |
|
|
|
.. [1] "Bootstrapping (statistics)" in \ |
|
https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 |
|
|
|
Parameters |
|
---------- |
|
series : pandas.Series |
|
Series from where to get the samplings for the bootstrapping. |
|
fig : matplotlib.figure.Figure, default None |
|
If given, it will use the `fig` reference for plotting instead of |
|
creating a new one with default parameters. |
|
size : int, default 50 |
|
Number of data points to consider during each sampling. It must be |
|
less than or equal to the length of the `series`. |
|
samples : int, default 500 |
|
Number of times the bootstrap procedure is performed. |
|
**kwds |
|
Options to pass to matplotlib plotting method. |
|
|
|
Returns |
|
------- |
|
matplotlib.figure.Figure |
|
Matplotlib figure. |
|
|
|
See Also |
|
-------- |
|
pandas.DataFrame.plot : Basic plotting for DataFrame objects. |
|
pandas.Series.plot : Basic plotting for Series objects. |
|
|
|
Examples |
|
-------- |
|
This example draws a basic bootstrap plot for a Series. |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> s = pd.Series(np.random.uniform(size=100)) |
|
>>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP |
|
<Figure size 640x480 with 6 Axes> |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.bootstrap_plot( |
|
series=series, fig=fig, size=size, samples=samples, **kwds |
|
) |
|
|
|
|
|
def parallel_coordinates( |
|
frame: DataFrame, |
|
class_column: str, |
|
cols: list[str] | None = None, |
|
ax: Axes | None = None, |
|
color: list[str] | tuple[str, ...] | None = None, |
|
use_columns: bool = False, |
|
xticks: list | tuple | None = None, |
|
colormap: Colormap | str | None = None, |
|
axvlines: bool = True, |
|
axvlines_kwds: Mapping[str, Any] | None = None, |
|
sort_labels: bool = False, |
|
**kwargs, |
|
) -> Axes: |
|
""" |
|
Parallel coordinates plotting. |
|
|
|
Parameters |
|
---------- |
|
frame : DataFrame |
|
class_column : str |
|
Column name containing class names. |
|
cols : list, optional |
|
A list of column names to use. |
|
ax : matplotlib.axis, optional |
|
Matplotlib axis object. |
|
color : list or tuple, optional |
|
Colors to use for the different classes. |
|
use_columns : bool, optional |
|
If true, columns will be used as xticks. |
|
xticks : list or tuple, optional |
|
A list of values to use for xticks. |
|
colormap : str or matplotlib colormap, default None |
|
Colormap to use for line colors. |
|
axvlines : bool, optional |
|
If true, vertical lines will be added at each xtick. |
|
axvlines_kwds : keywords, optional |
|
Options to be passed to axvline method for vertical lines. |
|
sort_labels : bool, default False |
|
Sort class_column labels, useful when assigning colors. |
|
**kwargs |
|
Options to pass to matplotlib plotting method. |
|
|
|
Returns |
|
------- |
|
matplotlib.axes.Axes |
|
|
|
Examples |
|
-------- |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> df = pd.read_csv( |
|
... 'https://raw.githubusercontent.com/pandas-dev/' |
|
... 'pandas/main/pandas/tests/io/data/csv/iris.csv' |
|
... ) |
|
>>> pd.plotting.parallel_coordinates( |
|
... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') |
|
... ) # doctest: +SKIP |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.parallel_coordinates( |
|
frame=frame, |
|
class_column=class_column, |
|
cols=cols, |
|
ax=ax, |
|
color=color, |
|
use_columns=use_columns, |
|
xticks=xticks, |
|
colormap=colormap, |
|
axvlines=axvlines, |
|
axvlines_kwds=axvlines_kwds, |
|
sort_labels=sort_labels, |
|
**kwargs, |
|
) |
|
|
|
|
|
def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: |
|
""" |
|
Lag plot for time series. |
|
|
|
Parameters |
|
---------- |
|
series : Series |
|
The time series to visualize. |
|
lag : int, default 1 |
|
Lag length of the scatter plot. |
|
ax : Matplotlib axis object, optional |
|
The matplotlib axis object to use. |
|
**kwds |
|
Matplotlib scatter method keyword arguments. |
|
|
|
Returns |
|
------- |
|
matplotlib.axes.Axes |
|
|
|
Examples |
|
-------- |
|
Lag plots are most commonly used to look for patterns in time series data. |
|
|
|
Given the following time series |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> np.random.seed(5) |
|
>>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50)) |
|
>>> s = pd.Series(x) |
|
>>> s.plot() # doctest: +SKIP |
|
|
|
A lag plot with ``lag=1`` returns |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> pd.plotting.lag_plot(s, lag=1) |
|
<Axes: xlabel='y(t)', ylabel='y(t + 1)'> |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) |
|
|
|
|
|
def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes: |
|
""" |
|
Autocorrelation plot for time series. |
|
|
|
Parameters |
|
---------- |
|
series : Series |
|
The time series to visualize. |
|
ax : Matplotlib axis object, optional |
|
The matplotlib axis object to use. |
|
**kwargs |
|
Options to pass to matplotlib plotting method. |
|
|
|
Returns |
|
------- |
|
matplotlib.axes.Axes |
|
|
|
Examples |
|
-------- |
|
The horizontal lines in the plot correspond to 95% and 99% confidence bands. |
|
|
|
The dashed line is 99% confidence band. |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) |
|
>>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) |
|
>>> pd.plotting.autocorrelation_plot(s) # doctest: +SKIP |
|
""" |
|
plot_backend = _get_plot_backend("matplotlib") |
|
return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) |
|
|
|
|
|
class _Options(dict): |
|
""" |
|
Stores pandas plotting options. |
|
|
|
Allows for parameter aliasing so you can just use parameter names that are |
|
the same as the plot function parameters, but is stored in a canonical |
|
format that makes it easy to breakdown into groups later. |
|
|
|
Examples |
|
-------- |
|
|
|
.. plot:: |
|
:context: close-figs |
|
|
|
>>> np.random.seed(42) |
|
>>> df = pd.DataFrame({'A': np.random.randn(10), |
|
... 'B': np.random.randn(10)}, |
|
... index=pd.date_range("1/1/2000", |
|
... freq='4MS', periods=10)) |
|
>>> with pd.plotting.plot_params.use("x_compat", True): |
|
... _ = df["A"].plot(color="r") |
|
... _ = df["B"].plot(color="g") |
|
""" |
|
|
|
|
|
_ALIASES = {"x_compat": "xaxis.compat"} |
|
_DEFAULT_KEYS = ["xaxis.compat"] |
|
|
|
def __init__(self, deprecated: bool = False) -> None: |
|
self._deprecated = deprecated |
|
super().__setitem__("xaxis.compat", False) |
|
|
|
def __getitem__(self, key): |
|
key = self._get_canonical_key(key) |
|
if key not in self: |
|
raise ValueError(f"{key} is not a valid pandas plotting option") |
|
return super().__getitem__(key) |
|
|
|
def __setitem__(self, key, value) -> None: |
|
key = self._get_canonical_key(key) |
|
super().__setitem__(key, value) |
|
|
|
def __delitem__(self, key) -> None: |
|
key = self._get_canonical_key(key) |
|
if key in self._DEFAULT_KEYS: |
|
raise ValueError(f"Cannot remove default parameter {key}") |
|
super().__delitem__(key) |
|
|
|
def __contains__(self, key) -> bool: |
|
key = self._get_canonical_key(key) |
|
return super().__contains__(key) |
|
|
|
def reset(self) -> None: |
|
""" |
|
Reset the option store to its initial state |
|
|
|
Returns |
|
------- |
|
None |
|
""" |
|
|
|
self.__init__() |
|
|
|
def _get_canonical_key(self, key): |
|
return self._ALIASES.get(key, key) |
|
|
|
@contextmanager |
|
def use(self, key, value) -> Generator[_Options, None, None]: |
|
""" |
|
Temporarily set a parameter value using the with statement. |
|
Aliasing allowed. |
|
""" |
|
old_value = self[key] |
|
try: |
|
self[key] = value |
|
yield self |
|
finally: |
|
self[key] = old_value |
|
|
|
|
|
plot_params = _Options() |
|
|