matplotlib bar plot add legend from categories dataframe column - dictionary

I try to add the legend which should, according to my example, output:
a red square with the word fruit and
a green square with the word
veggie.
I tried several things (the example below is just 1 of the many trials), but I can't get it work.
Can someone tell me how to solve this problem?
import pandas as pd
from matplotlib import pyplot as plt
data = [['apple', 'fruit', 10], ['nanaba', 'fruit', 15], ['salat','veggie', 144]]
data = pd.DataFrame(data, columns = ['Object', 'Type', 'Value'])
colors = {'fruit':'red', 'veggie':'green'}
c = data['Type'].apply(lambda x: colors[x])
bars = plt.bar(data['Object'], data['Value'], color=c, label=colors)
plt.legend()

The usual way to create a legend for objects which are not in the axes would be to create proxy artists as shown in the legend guide
Here,
colors = {'fruit':'red', 'veggie':'green'}
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]
plt.legend(handles, labels)

So this is a hacky solution and I'm sure there are probably better ways to do this. What you can do is plot individual bar plots that are invisible using width=0 with the original plot colors and specify the labels. You will have to do this in a subplot though.
import pandas as pd
from matplotlib import pyplot as plt
data = [['apple', 'fruit', 10], ['nanaba', 'fruit', 15], ['salat','veggie', 144]]
data = pd.DataFrame(data, columns = ['Object', 'Type', 'Value'])
colors = {'fruit':'red', 'veggie':'green'}
c = data['Type'].apply(lambda x: colors[x])
ax = plt.subplot(111) #specify a subplot
bars = ax.bar(data['Object'], data['Value'], color=c) #Plot data on subplot axis
for i, j in colors.items(): #Loop over color dictionary
ax.bar(data['Object'], data['Value'],width=0,color=j,label=i) #Plot invisible bar graph but have the legends specified
ax.legend()
plt.show()

Related

Legend for gridded Holoviews visualization with categorical data

I'm using the holoviz xarray extension (holoviews.xarray) to visualize a gridded dataset with landcover classes. Plotting the data is straightforward with da.hvplot(). This results however in a continuous colormap with standard tick labels, whereas I need the categories plotted using a specific colormap and their labels included in a legend.
So how can I plot gridded categorical data using Holoviews? My plot needs to:
Have the categories plotted according to a specific colormap (hex color codes).
Include a legend with labels ["water", "cirrus", ...].
Handle situations where the data do not contain all classes. Explanation, when using da.hvplot(cmap=tuple(color_key.values()) while da does not contain all classes this typically results in a plot where the colorbar ticks do not match the color classes.
Have a legend outside the plotted data.
The best I got so far is the example provided below. But how can I move that legend out of the plot? Or is there a more straightforward solution?
import holoviews as hv
import hvplot.xarray
import numpy as np
import xarray as xr
color_key = {
"No Data": "#000000",
"Saturated / Defective": "#ff0000",
"Dark Area Pixels": "#2f2f2f",
"Cloud Shadows": "#643200",
"Vegetation": "#00a000",
"Bare Soils": "#ffe65a",
"water": "#0000ff",
"Clouds low probability / Unclassified": "#808080",
"Clouds medium probability": "#c0c0c0",
"Clouds high probability": "#ffffff",
"Cirrus": "#64c8ff",
"Snow / Ice": "#ff96ff",
}
# Generate sample data
nx = 40
ny = 70
xcoords = [37 + 0.1 * i for i in range(nx)]
ycoords = [5 + 0.2 * i for i in range(ny)]
data = np.random.randint(low=0, high=len(color_key), size=nx * ny).reshape(nx, ny)
da = xr.DataArray(
data,
dims=["x", "y"],
coords={"x": xcoords, "y": ycoords},
)
# Visualization
legend = hv.NdOverlay(
{
k: hv.Points([0, 0], label=f"{k}").opts(color=v, size=0, apply_ranges=False)
for k, v in color_key.items()
},
"Classification",
)
da.hvplot().opts(cmap=tuple(color_key.values())) * legend
You could either set .opts(legend_location='right') OR you can override the actual ticks on the colorbar using the colorbar_opts option and by providing a fixed ticker along with major_label_overrides like this:
ticks = np.arange(len(color_key), dtype='float') + 0.0001
ticker = FixedTicker(ticks=ticks)
labels = dict(zip(ticks, color_key))
da.hvplot(height=600).opts(clim=(-0.5, 11.5), cmap=tuple(color_key.values()), colorbar_opts={'ticker': ticker, 'major_label_overrides': labels})

Adding values to grouped bar chart in hvplot

I'm trying to add labels to a grouped hvplot barchart.
My example dataframe has the following structure:
import pandas as pd
import numpy as np
import holoviews as hv
import hvplot.pandas
hv.extension('bokeh')
df = pd.DataFrame({'A' : ['A','B','A','B','A','B'],
'B' : [1,1,2,2,3,3],
'C' : list((range(20,26)))
})
The bar chart is created with the following code:
bar = df.hvplot.bar(x='B', y='C', by='A')
bar
hvplot bar chart
I tried to add labels according to this and this SO questions:
labels = hv.Labels(data=df, kdims=['B','A'],vdims='C')
labels
But an overlay of both plots
bar * labels
results in an error, though the dimensions seem to be the same for me.
ValueError: all the input arrays must have same number of dimensions
:Overlay
.Bars.I :Bars [B,A] (C)
.Labels.I :Labels [B,A] (C)
Any hint to the solution is appreciated. Thank you!
This is possible for normal bar charts, but unfortunately this is not possible yet for grouped barcharts: https://github.com/holoviz/holoviews/pull/3385
You could create separate bar charts per category in col A and then add labels, but you won't have a grouped bar chart then:
def barplot_and_labels_category(category):
df_subset = df[df.A == category]
plot = df_subset.hvplot.bar(x='B', y='C', ylim=(0, 30))
labels = hv.Labels(
df_subset,
kdims=['B', 'C'],
vdims='C',
).opts(text_color='black', text_font_size='20pt')
return plot * labels
(barplot_and_labels_category('A') + barplot_and_labels_category('B')).cols(1)

How can i specify the color of a bar in a bar chart by its value?

Well, i created a bar chart and now i want to specify the color of a bar depending of its value on y-axis. simplified- if the value is positive the bar should be red and is the value nagative the bar should be blue.
For me it's only possible to change the color along the x-axis but not the y-axis.
from bokeh.palettes import plasma
source = ColumnDataSource(data={'date' : pd.to_datetime(df_data['date'], format='%Y-%m'), 'values' : df_data['values'], 'color' : plasma(256)})
p = figure(x_axis_label='time',
x_axis_type='datetime',
y_axis_label='diff',
tools = [hover]
toolbar_location=None
title="title")
p.vbar(x = 'date',top = 'values', source=source, width=timedelta(days=20), color = 'color')
I've found an example on:
https://docs.bokeh.org/en/latest/docs/user_guide/categorical.html
But i need to differentiate or to color the bars by their values no by their number. I know my example makes no sense, but i only want to demonstrate what my expectations are.
Ok, i found a solution by myself by unsing the cut function of pandas.
import pandas as pd
import numpy as np
values = array(df_data['values']).values)
bins = [np.NINF, 0, np.inf]
categories = pd.cut(values, bins, right=False)
palette = ['blue', 'red']
colors = []
for i in categories.codes:
colors.append[palette[i]]
# Now i can add this column to my ColumnDataSource:
source = ColumnDataSource(data={'date' : pd.to_datetime(df_data['date'], format='%Y-%m'), 'values' : df_data['values'], 'color' : colors}
p.vbar(x = 'date',top = 'values', source=source, width=timedelta(days=20), color = 'colors')
Of course this is just as "quick and dirty" solution and there is enough room for optimization.

Bokeh: How to fix x-axis in time

I have produced a time series scatter plot in bokeh, which updates when a user interactively selects a new time series. However, I want to fix the x-axis between 0000 to 2359 hours for comparison (Bokeh tries to guess the appropriate x-range).
Below is a random snippet of data. In this code, how do I fix the x_range without it changing the scale to microseconds?
import pandas as pd
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import Range1d
output_notebook()
data = {'2015-08-20 13:39:46': [-0.02813796, 0],
'2015-08-28 12:6:5': [ 1.32426938, 1],
'2015-08-28 13:42:59': [-0.16289655, 1],
'2015-12-14 16:19:44': [ 2.30476287, 1],
'2016-02-01 17:8:32': [ 0.41165004, 0],
'2016-02-09 11:26:33': [-0.65023149, 0],
'2016-04-08 17:57:47': [ 0.09335096, 1],
'2016-04-27 19:2:15': [ 1.43917208, 0]}
test = pd.DataFrame(data=data).T
test.columns = ["activity","objectID"]
test.index = pd.to_datetime(test.index)
p = figure(plot_width=500, plot_height=250, x_axis_label='X', y_axis_label='Y', x_axis_type="datetime")# x_range = Range1d(# dont know what to put here))
r = p.circle(x=test.index.time, y=test["activity"])
show(p, notebook_handle=True);
I've found a (scrappy) solution for this but it doesn't fix the axes sizes entirely since the size of the axes seems to be dependent on other axes properties such as the length of the y-tick labels.
# a method for setting constant x-axis in hours for bokeh:
day_x_axis = pd.DataFrame(data=[0,0], index=['2015-07-28 23:59:00', '2015-08- 28 00:01:00'], columns=["activity"])
day_x_axis.index = pd.to_datetime(day_x_axis.index)
new_time_series = pd.concat((old_time_series, day_x_axis), axis=0) # this will set all other columns you had to NaN.
I fixed my axes entirely by also setting the y_range property when instantiating the figure object.

Create a plot with twin axes, but with ranges not linked together and with auto-scaling

I've joined to images illustrating my problem:
The graph with only one axis and one data series;
The graph with both axes (the line at the bottom is the same as the one in the first graph).
I'm using widgets and a Bokeh server in order to have users play with different options showing different data series.
As you can see in the code below, I've used DataRange1d for both ranges, but even though both axes get auto-scaled when I change scenarios using the widgets, the axes stay linked together, covering an identical range no matter what.
I've searched in the docs and the only solution I found to my problem is to pass a specific range to Range1d or DataRange1d. I can't do this since I have many data series to show, so one range wouldn't fit for all.
Thanks!
The code:
#create plots
p_balance = figure(width=500, height=300, title='Balance', tools='save')
p_total_debt = figure(width=500, height=300, title='Total debt', tools='save')
p_both = figure(width=1000, height=300, title='Both', tools='save')
#add the second axis
p_both.y_range = DataRange1d()
p_both.extra_y_ranges = {'total debt': DataRange1d()}
p_both.add_layout(LinearAxis(y_range_name='total debt'), 'right')
#add glyphs
line_balance = p_balance.line(x=list(range(0,26)), y='y', source=source_balance, color='black', line_width=2)
line_total_debt = p_total_debt.line(x=list(range(0,26)), y='y', source=source_total_debt, color='black', line_width=2)
#for the second plot with both series
line_balance2 = p_both.line(x=list(range(0,26)), y='y', source=source_balance, color='black', line_width=2)
line_total_debt = p_both.line(x=list(range(0,26)), y='y', source=source_total_debt, color='black', line_width=2, y_range_name='total debt')
Image 1
Image 2
It works as soon as you provide some starting values to the Datarange objects...
I the dataranges do not initialize correctly, so it has to be done "manually"
I faked the data since you did not provide any data. Instead of setting the start and end values explicitly you can use min(your_data) max(your_data).
from bokeh.models import DataRange1d, LinearAxis, Range1d
from bokeh.plotting import figure, show
# create plots
p_both = figure(width=1000, height=300, title='Both', tools='save', toolbar_sticky=False)
# add the second axis
p_both.y_range = Range1d(0, 26)
p_both.extra_y_ranges = {'total_debt': Range1d(start=1000, end=1050)}
# for the second plot with both series
line_balance2 = p_both.line(x=range(0, 26), y=range(0, 26), color='black', line_width=2)
line_total_debt = p_both.line(x=range(0, 26), y=range(1000 + 0, 1000 + 26), color='red', line_width=2,
y_range_name='total_debt')
p_both.add_layout(LinearAxis(y_range_name='total_debt'), 'right')
show(p_both)

Resources