|
| 1 | +""" |
| 2 | +Color points by categories |
| 3 | +--------------------------- |
| 4 | +The :meth:`pygmt.Figure.plot` method can be used to plot symbols which are |
| 5 | +color-coded by categories. In the example below, we show how the |
| 6 | +`Palmer Penguins dataset <https://github.com/allisonhorst/palmerpenguins>`__ |
| 7 | +can be visualized. Here, we can pass the individual categories included in |
| 8 | +the "species" column directly to the ``color`` parameter via |
| 9 | +``color=df.species.cat.codes.astype(int)``. Additionally, we have to set |
| 10 | +``cmap=True``. A desired colormap can be selected via the :meth:`pygmt.makecpt` |
| 11 | +method. |
| 12 | +""" |
| 13 | + |
| 14 | +import pandas as pd |
| 15 | +import pygmt |
| 16 | + |
| 17 | +# Load sample penguins data and convert 'species' column to categorical dtype |
| 18 | +df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/penguins.csv") |
| 19 | +df.species = df.species.astype(dtype="category") |
| 20 | + |
| 21 | +# Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax) |
| 22 | +# The below example will return a numpy array like [30.0, 60.0, 12.0, 22.0] |
| 23 | +region = pygmt.info( |
| 24 | + table=df[["bill_length_mm", "bill_depth_mm"]], # x and y columns |
| 25 | + per_column=True, # report the min/max values per column as a numpy array |
| 26 | + # round the min/max values of the first two columns to the nearest multiple |
| 27 | + # of 3 and 2, respectively |
| 28 | + spacing=(3, 2), |
| 29 | +) |
| 30 | + |
| 31 | +# Make a 2D categorical scatter plot, coloring each of the 3 species differently |
| 32 | +fig = pygmt.Figure() |
| 33 | + |
| 34 | +# Generate a basemap of 10 cm x 10 cm size |
| 35 | +fig.basemap( |
| 36 | + region=region, |
| 37 | + projection="X10c/10c", |
| 38 | + frame=[ |
| 39 | + 'xafg+l"Bill length (mm)"', |
| 40 | + 'yafg+l"Bill depth (mm)"', |
| 41 | + 'WSen+t"Penguin size at Palmer Station"', |
| 42 | + ], |
| 43 | +) |
| 44 | + |
| 45 | +# Define a colormap to be used for three categories, define the range of the |
| 46 | +# new discrete CPT using series=(lowest_value, highest_value, interval), |
| 47 | +# use color_model="+c" to write the discrete color palette "inferno" in |
| 48 | +# categorical format |
| 49 | +pygmt.makecpt(cmap="inferno", series=(0, 3, 1), color_model="+c") |
| 50 | + |
| 51 | +fig.plot( |
| 52 | + # Use bill length and bill depth as x and y data input, respectively |
| 53 | + x=df.bill_length_mm, |
| 54 | + y=df.bill_depth_mm, |
| 55 | + # Vary each symbol size according to another feature (body mass, scaled by 7.5*10e-5) |
| 56 | + sizes=df.body_mass_g * 7.5e-5, |
| 57 | + # Points colored by categorical number code |
| 58 | + color=df.species.cat.codes.astype(int), |
| 59 | + # Use colormap created by makecpt |
| 60 | + cmap=True, |
| 61 | + # Do not clip symbols that fall close to the map bounds |
| 62 | + no_clip=True, |
| 63 | + # Use circles as symbols with size in centimeter units |
| 64 | + style="cc", |
| 65 | + # Set transparency level for all symbols to deal with overplotting |
| 66 | + transparency=40, |
| 67 | +) |
| 68 | + |
| 69 | +# A colorbar displaying the different penguin species types will be added |
| 70 | +# once GMT 6.2.0 is released. |
| 71 | + |
| 72 | +fig.show() |
0 commit comments