scale bokeh heatmap plot size with cells - bokeh

I am trying to generate a rectangular plot using Bokeh. I want the plot to set the the dimensions such that as I increase elements on either x or y axis, the cells cover the entire plot.
Here is what I am trying to do:
from bokeh.plotting import figure, show, output_file
from bokeh.palettes import Reds
data_dict={'state':['New York','New Mexico','New York','New Mexico'],'feature':['poverty','poverty','unemployment','unemployment'],'color':=[Reds[9][0],Reds[9][1],Reds[9][2],Reds[9][3]}
p = figure(title="testing",tools="hover", toolbar_location=None,x_range=data_dict['feature'], y_range=data_dict['state'])
p.rect('feature','state',source=data_dict,
color='color',width=1,height=1)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
show(p)```
Eliminate all the whitespace in the plot.

If I understand your question correctly, you're looking for a heatmap similar to this one in Bokeh's examples:
https://docs.bokeh.org/en/latest/docs/gallery/categorical.html?highlight=heatmap
I fiddled with your code a little bit and came up with something similar. The code didn't run exactly as posted, so I'm not sure what you saw, but you would have needed separate lists for range labels (since there's repetition in the data_dict lists).
from bokeh.plotting import figure, show
from bokeh.palettes import Reds
import numpy as np
data_dict = {
'state': ['New York', 'New York', 'New Mexico', 'New Mexico'],
'feature': ['poverty', 'unemployment', 'poverty', 'unemployment'],
'colors': [Reds[9][0], Reds[9][1],
Reds[9][2], Reds[9][3]]
}
x_labels = ['poverty', 'unemployment']
y_labels = ['New York', 'New Mexico']
p = figure(title="testing",tools="hover", toolbar_location=None, x_range=x_labels, y_range=y_labels)
p.rect('feature', 'state', source=data_dict,
color='colors', width=1, height=1)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
show(p)

Related

How to plot a vertical line on a bar plot in Bokeh?

Based on the first example of the user-guide of Bokeh,
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.models import Span
output_file("bars.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]
p = figure(x_range=fruits, plot_height=250, title="Fruit Counts",
toolbar_location=None, tools="")
p.vbar(x=fruits, top=counts, width=0.9)
# these two lines
vline = Span(location='Apples', dimension='height', line_color='blue', line_width=4)
p.renderers.extend([vline])
p.xgrid.grid_line_color = None
p.y_range.start = 0
show(p)
I am trying to add a vertical line to a bar plot whose x-range are categories. However, this does not seem to be possible, as this raises an error "ValueError: expected a value of type Real, got Apples of type str".
location='Apples' does not work as intended as it expected a number.
One solution is to convert the categorical value to the corresponding numeric value on the plot:
index = p.x_range.factors.index("Apples")
delta = (p.x_range.end - p.x_range.start)/p.x_range.factors.length;
location = delta/2 + index;
If the plot is dynamic (e.g. values are not known when the plot is built), then use an auxiliary JS function to do the conversion:
function _value_to_location(x_range, value) {
var index = x_range.factors.findIndex(x => x == value)
var delta = (x_range.end - x_range.start)/x_range.factors.length;
return delta/2 + index;
};
...
vline.location = _value_to_location(figure.x_range, "Apples");

Bokeh: How to show both color and marker type in legend

I want to plot legend that shows both line color and marker type when using boekh package in jupyter notebook.
I have many lines in one plot. To distinguish them, I tried my best to distinguish them by evenly distributing their color in the color space. However, when the number of lines reaches e.g. 9, the color of some lines are quite similar. So, I want to add different marker types on top of different colors so that when two lines are similar in color, they have different marker type.
It was straight-forward with matplotlib, but not straight-forward with bokeh. Below is the code I have now that can only plot legend with color.
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline
from bokeh.plotting import figure, show, ColumnDataSource, save, output_notebook, output_file, reset_output
from bokeh.io import export_svgs,export_png
from bokeh.models import HoverTool, Legend
from bokeh.layouts import gridplot
import colorsys # needed for generating N equally extinguishable colors
from operator import add # needed for add lists
d = {'Sex': ['male', 'male','male','male', 'male','male','female','female','female','female','female','female'], 'age': [20, 20,20, 25,25,25,20, 20,20,25,25,25], 'working_hours': [20,30,40,20,30,40,20,30,40,20,30,40],'income': [1000, 2000,3000,1500, 2500,3500,1100, 2100,3100,1300, 2300,3300] }
values = pd.DataFrame(data=d)
x_var = 'working_hours'
x_var_dimension = 'H'
y_var = 'income'
y_var_dimension = 'Dollars'
hover = HoverTool(tooltips=[("data (x,y)", "(#x, #y)")])
TOOLS=[hover]
p= figure(width=1200, height=600,tools=TOOLS, x_axis_type='linear', x_axis_label='%s [%s]'%(x_var, x_var_dimension),y_axis_label='%s [%s]'%(y_var, y_var_dimension))
nr_expressions_row_col=9
figs_array_row_col = []
figs_row_row_col=[]
legend_its_row_col = []
legend_its_row_col_renderer = []
loop_count = 0;
for key, group in values.groupby(['Sex']):
for key_sub1, group_sub1 in group.groupby(['age']):
loop_count+=1
#print type(key)
#print group_sub1
#print count
#hover = HoverTool(tooltips=[("data (x,y)", "($x, $y)")])
x_data = group_sub1[x_var].values;
y_data = group_sub1[y_var].values
(color_r,color_g,color_b) = colorsys.hsv_to_rgb(loop_count*1.0/nr_expressions_row_col, 1, 1)
plot_row_col_line = p.line(x_data, y_data,line_color=(int(255*color_r),int(255*color_g),int(255*color_b)))
legend_its_row_col.append(("%s %s"%(key,key_sub1), [plot_row_col_line]))
legend_row_col = Legend(items = legend_its_row_col, location=(0,0))
legend_row_col.click_policy = 'hide'
legend_row_col.background_fill_alpha = 0
p.add_layout(legend_row_col, 'left')
figs_row_row_col.append(p)
figs_array_row_col.append(figs_row_row_col)
grid_row_col = gridplot(figs_array_row_col)
reset_output()
output_notebook()
show(grid_row_col)
What I can get with my code is:
What I want is:
This should give the result you want, I've added a cyclic list with all the markers that can be used with p.scatter and it takes another marker every iteration. After this I add it with the line glyph to the legend dictionary.
#!/usr/bin/python3
import pandas as pd
import numpy as np
import math
from bokeh.plotting import figure, show, ColumnDataSource, save, output_file, reset_output
from bokeh.models import HoverTool, Legend
from bokeh.layouts import gridplot
import colorsys # needed for generating N equally extinguishable colors
from itertools import cycle
d = {'Sex': ['male', 'male','male','male', 'male','male','female','female','female','female','female','female'], 'age': [20, 20,20, 25,25,25,20, 20,20,25,25,25], 'working_hours': [20,30,40,20,30,40,20,30,40,20,30,40],'income': [1000, 2000,3000,1500, 2500,3500,1100, 2100,3100,1300, 2300,3300] }
values = pd.DataFrame(data=d)
x_var = 'working_hours'
x_var_dimension = 'H'
y_var = 'income'
y_var_dimension = 'Dollars'
hover = HoverTool(tooltips=[("data (x,y)", "(#x, #y)")])
TOOLS=[hover]
p= figure(width=1200, height=600,tools=TOOLS, x_axis_type='linear', x_axis_label='%s [%s]'%(x_var, x_var_dimension),y_axis_label='%s [%s]'%(y_var, y_var_dimension))
nr_expressions_row_col=9
figs_array_row_col = []
figs_row_row_col=[]
legend_its_row_col = []
legend_its_row_col_renderer = []
loop_count = 0;
markers = ['circle', 'square', 'triangle', 'asterisk', 'circle_x', 'square_x', 'inverted_triangle', 'x', 'circle_cross', 'square_cross', 'diamond', 'cross']
pool = cycle(markers)
for key, group in values.groupby(['Sex']):
for key_sub1, group_sub1 in group.groupby(['age']):
loop_count+=1
x_data = group_sub1[x_var].values;
y_data = group_sub1[y_var].values
(color_r,color_g,color_b) = colorsys.hsv_to_rgb(loop_count*1.0/nr_expressions_row_col, 1, 1)
plot_row_col_line = p.line(x_data, y_data,line_color=(int(255*color_r),int(255*color_g),int(255*color_b)))
plot_row_col_glyph = p.scatter(x_data, y_data, color=(int(255*color_r),int(255*color_g),int(255*color_b)), size=10, marker=next(pool))
legend_its_row_col.append(("%s %s"%(key,key_sub1), [plot_row_col_line, plot_row_col_glyph]))
legend_row_col = Legend(items = legend_its_row_col, location=(0,0))
legend_row_col.click_policy = 'hide'
legend_row_col.background_fill_alpha = 0
p.add_layout(legend_row_col, 'left')
figs_row_row_col.append(p)
figs_array_row_col.append(figs_row_row_col)
grid_row_col = gridplot(figs_array_row_col)
reset_output()
show(grid_row_col)

Adding interaction to heat map to show another bokeh plot based on the selection

I'm trying to add interaction to heatmap(using rect) using CustomJS to show another bokeh plot based on the selected value.
This is what I've tried
heat_map_df_stack = pd.DataFrame(heat_map_df.stack(), columns=['rate']).reset_index()
....
issue_heat_map = figure(title="",
x_range=issues, y_range=list(reversed(products)),
x_axis_location="above", plot_width=400, plot_height=400,
tools=TOOLS, toolbar_location='below',
tooltips=[('Product & Issue Id', '#Product #Issue'), ('Issue Count', '#rate')],
name='issue_heat_map')
....
issue_heat_map.rect(x="Issue", y="Product", width=1, height=1,
source=heat_map_df_stack,
fill_color={'field': 'rate', 'transform': mapper},
line_color=None)
....
taptool = issue_heat_map.select(type=TapTool)
taptool.callback = CustomJS(args = dict(source = ""), code =
"""
console.log('test')
console.log(cb_obj)
var inds = cb_obj.selected;
window.alert(inds);
""")
On click of the rect or selection, nothing is happening now.
[Edit] : I updated the above code. Now I'm able to see console log and alert, but have no clue on how to get selected value from the heat map.
Here is a version using the bokeh server. The code is a adaption of the heatmap example from the bokeh gallery.
from math import pi
import pandas as pd
import numpy as np
from bokeh.io import curdoc
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
from bokeh.sampledata.unemployment1948 import data
data['Year'] = data['Year'].astype(str)
data = data.set_index('Year')
data.drop('Annual', axis=1, inplace=True)
data.columns.name = 'Month'
years = list(data.index)
months = list(data.columns)
# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()
source = ColumnDataSource(df)
# this is the colormap from the original NYTimes plot
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=df.rate.min(), high=df.rate.max())
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom, tap"
p = figure(title="US Unemployment ({0} - {1})".format(years[0], years[-1]),
x_range=years, y_range=list(reversed(months)),
x_axis_location="above", plot_width=900, plot_height=400,
tools=TOOLS, toolbar_location='below',
tooltips=[('date', '#Month #Year'), ('rate', '#rate%')])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3
heatmap = p.rect(x="Year", y="Month", width=1, height=1,
source=source,
fill_color={'field': 'rate', 'transform': mapper},
line_color=None)
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
ticker=BasicTicker(desired_num_ticks=len(colors)),
formatter=PrintfTickFormatter(format="%d%%"),
label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
# Adding the tap interaction + plot
other_source = ColumnDataSource({'x': range(10), 'y': range(10)})
other_plot = figure(title="Other Plot")
other_line = other_plot.line(x='x', y='y', source=other_source)
def update(attr, old, new):
if not old:
old = [1]
if new:
other_source.data.update(y=np.array(other_source.data['y'])/old[0]*new[0])
source.selected.on_change('indices', update)
curdoc().add_root(gridplot([[p, other_plot]]))
The important part are the last few lines, where I set up the second plot and add the update function to change the slope of the line in the second plot according to the selected rect from the heatmap.

Bokeh Colorbar Vertical title to right of colorbar?

I'm trying to do something that I'd normally consider trivial but seems to be very difficult in bokeh: Adding a vertical colorbar to a plot and then having the title of the colorbar (a.k.a. the variable behind the colormapping) appear to one side of the colorbar but rotated 90 degrees clockwise from horizontal.
From what I can tell of the bokeh ColorBar() interface (looking at both documentation and using the python interpreter's help() function for this element), this is not possible. In desperation I have added my own Label()-based annotation. This works but is klunky and displays odd behavior when deployed in a bokeh serve situation--that the width of the data window on the plot varies inversely with the length of the title colorbar's title string.
Below I've included a modified version of the bokeh server mpg example. Apologies for its complexity, but I felt this was the best way to illustrate the problem using infrastructure/data that ships with bokeh. For those unfamiliar with bokeh serve, this code snippet needs to saved to a file named main.py that resides in a directory--for the sake of argument let's say CrossFilter2--and in the parent directory of CrossFilter2 one needs to invoke the command
bokeh serve --show CrossFilter2
this will then display in a browser window (localhost:5006/CrossFilter2) and if you play with the color selection widget you will see what I mean, namely that short variable names such as 'hp' or 'mpg' result in a wider data display windows than longer variable names such as 'accel' or 'weight'. I suspect that there may be a bug in how label elements are sized--that their x and y dimensions are swapped--and that bokeh has not understood that the label element has been rotated.
My questions are:
Must I really have to go to this kind of trouble to get a simple colorbar label feature that I can get with little-to-no trouble in matplotlib/plotly?
If I must go through the hassle you can see in my sample code, is there some other way I can do this that avoids the data window width problem?
import numpy as np
import pandas as pd
from bokeh.layouts import row, widgetbox
from bokeh.models import Select
from bokeh.models import HoverTool, ColorBar, LinearColorMapper, Label
from bokeh.palettes import Spectral5
from bokeh.plotting import curdoc, figure, ColumnDataSource
from bokeh.sampledata.autompg import autompg_clean as df
df = df.copy()
SIZES = list(range(6, 22, 3))
COLORS = Spectral5
# data cleanup
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
columns = sorted(df.columns)
discrete = [x for x in columns if df[x].dtype == object]
continuous = [x for x in columns if x not in discrete]
quantileable = [x for x in continuous if len(df[x].unique()) > 20]
def create_figure():
xs = df[x.value].tolist()
ys = df[y.value].tolist()
x_title = x.value.title()
y_title = y.value.title()
name = df['name'].tolist()
kw = dict()
if x.value in discrete:
kw['x_range'] = sorted(set(xs))
if y.value in discrete:
kw['y_range'] = sorted(set(ys))
kw['title'] = "%s vs %s" % (y_title, x_title)
p = figure(plot_height=600, plot_width=800,
tools='pan,box_zoom,wheel_zoom,lasso_select,reset,save',
toolbar_location='above', **kw)
p.xaxis.axis_label = x_title
p.yaxis.axis_label = y_title
if x.value in discrete:
p.xaxis.major_label_orientation = pd.np.pi / 4
if size.value != 'None':
groups = pd.qcut(df[size.value].values, len(SIZES))
sz = [SIZES[xx] for xx in groups.codes]
else:
sz = [9] * len(xs)
if color.value != 'None':
coloring = df[color.value].tolist()
cv_95 = np.percentile(np.asarray(coloring), 95)
mapper = LinearColorMapper(palette=Spectral5,
low=cv_min, high=cv_95)
mapper.low_color = 'blue'
mapper.high_color = 'red'
add_color_bar = True
ninety_degrees = pd.np.pi / 2.
color_bar = ColorBar(color_mapper=mapper, title='',
#title=color.value.title(),
title_text_font_style='bold',
title_text_font_size='20px',
title_text_align='center',
orientation='vertical',
major_label_text_font_size='16px',
major_label_text_font_style='bold',
label_standoff=8,
major_tick_line_color='black',
major_tick_line_width=3,
major_tick_in=12,
location=(0,0))
else:
c = ['#31AADE'] * len(xs)
add_color_bar = False
if add_color_bar:
source = ColumnDataSource(data=dict(x=xs, y=ys,
c=coloring, size=sz, name=name))
else:
source = ColumnDataSource(data=dict(x=xs, y=ys, color=c,
size=sz, name=name))
if add_color_bar:
p.circle('x', 'y', fill_color={'field': 'c',
'transform': mapper},
line_color=None, size='size', source=source)
else:
p.circle('x', 'y', color='color', size='size', source=source)
p.add_tools(HoverTool(tooltips=[('x', '#x'), ('y', '#y'),
('desc', '#name')]))
if add_color_bar:
color_bar_label = Label(text=color.value.title(),
angle=ninety_degrees,
text_color='black',
text_font_style='bold',
text_font_size='20px',
x=25, y=300,
x_units='screen', y_units='screen')
p.add_layout(color_bar, 'right')
p.add_layout(color_bar_label, 'right')
return p
def update(attr, old, new):
layout.children[1] = create_figure()
x = Select(title='X-Axis', value='mpg', options=columns)
x.on_change('value', update)
y = Select(title='Y-Axis', value='hp', options=columns)
y.on_change('value', update)
size = Select(title='Size', value='None',
options=['None'] + quantileable)
size.on_change('value', update)
color = Select(title='Color', value='None',
options=['None'] + quantileable)
color.on_change('value', update)
controls = widgetbox([x, y, color, size], width=200)
layout = row(controls, create_figure())
curdoc().add_root(layout)
curdoc().title = "Crossfilter"
You can add a vertical label to the Colorbar by plotting it on a separate axis and adding a title to this axis. To illustrate this, here's a modified version of Bokeh's standard Colorbar example (found here):
import numpy as np
from bokeh.plotting import figure, output_file, show
from bokeh.models import LogColorMapper, LogTicker, ColorBar
from bokeh.layouts import row
plot_height = 500
plot_width = 500
color_bar_height = plot_height + 11
color_bar_width = 180
output_file('color_bar.html')
def normal2d(X, Y, sigx=1.0, sigy=1.0, mux=0.0, muy=0.0):
z = (X-mux)**2 / sigx**2 + (Y-muy)**2 / sigy**2
return np.exp(-z/2) / (2 * np.pi * sigx * sigy)
X, Y = np.mgrid[-3:3:100j, -2:2:100j]
Z = normal2d(X, Y, 0.1, 0.2, 1.0, 1.0) + 0.1*normal2d(X, Y, 1.0, 1.0)
image = Z * 1e6
color_mapper = LogColorMapper(palette="Viridis256", low=1, high=1e7)
plot = figure(x_range=(0,1), y_range=(0,1), toolbar_location=None,
width=plot_width, height=plot_height)
plot.image(image=[image], color_mapper=color_mapper,
dh=[1.0], dw=[1.0], x=[0], y=[0])
Now, to make the Colorbar, create a separate dummy plot, add the Colorbar to the dummy plot and place it next to the main plot. Add the Colorbar label as the title of the dummy plot and center it appropriately.
color_bar = ColorBar(color_mapper=color_mapper, ticker=LogTicker(),
label_standoff=12, border_line_color=None, location=(0,0))
color_bar_plot = figure(title="My color bar title", title_location="right",
height=color_bar_height, width=color_bar_width,
toolbar_location=None, min_border=0,
outline_line_color=None)
color_bar_plot.add_layout(color_bar, 'right')
color_bar_plot.title.align="center"
color_bar_plot.title.text_font_size = '12pt'
layout = row(plot, color_bar_plot)
show(layout)
This gives the following output image:
One thing to look out for is that color_bar_width is set wide enough to incorporate both the Colorbar, its axes labels and the Colorbar label. If the width is set too small, you will get an error and the plot won't render.
As of Bokeh 0.12.10 there is no built in label available for colorbars. In addition to your approach or something like it, another possibility would be a custom extension, though that is similarly not trivial.
Offhand, a colobar label certainly seems like a reasonable thing to consider. Regarding the notion that it ought to be trivially available, if you polled all users about what they consider should be trivially available, there will be thousands of different suggestions for what to prioritize. As is very often the case in the OSS world, there are far more possible things to do, than there are people to do them (less than 3 in this case). So, would first suggest a GitHub Issue to request the feature, and second, if you have the ability, volunteering to help implement it. Your contribution would be valuable and appreciated by many.

How do I use custom labels for ticks in Bokeh?

I understand how you specify specific ticks to show in Bokeh, but my question is if there is a way to assign a specific label to show versus the position. So for example
plot.xaxis[0].ticker=FixedTicker(ticks=[0,1])
will only show the x-axis labels at 0 and 1, but what if instead of showing 0 and 1 I wanted to show Apple and Orange. Something like
plot.xaxis[0].ticker=FixedTicker(ticks=[0,1], labels=['Apple', 'Orange'])
A histogram won't work for the data I am plotting. Is there anyway to use custom labels in Bokeh like this?
Fixed ticks can just be passed directly as the "ticker" value, and major label overrides can be provided to explicitly supply custom labels for specific values:
from bokeh.plotting import figure, output_file, show
p = figure()
p.circle(x=[1,2,3], y=[4,6,5], size=20)
p.xaxis.ticker = [1, 2, 3]
p.xaxis.major_label_overrides = {1: 'A', 2: 'B', 3: 'C'}
output_file("test.html")
show(p)
EDIT: Updated for Bokeh 0.12.5 but also see simpler method in the other answer.
This worked for me:
import pandas as pd
from bokeh.charts import Bar, output_file, show
from bokeh.models import TickFormatter
from bokeh.core.properties import Dict, Int, String
class FixedTickFormatter(TickFormatter):
"""
Class used to allow custom axis tick labels on a bokeh chart
Extends bokeh.model.formatters.TickFormatte
"""
JS_CODE = """
import {Model} from "model"
import * as p from "core/properties"
export class FixedTickFormatter extends Model
type: 'FixedTickFormatter'
doFormat: (ticks) ->
labels = #get("labels")
return (labels[tick] ? "" for tick in ticks)
#define {
labels: [ p.Any ]
}
"""
labels = Dict(Int, String, help="""
A mapping of integer ticks values to their labels.
""")
__implementation__ = JS_CODE
skills_list = ['cheese making', 'squanching', 'leaving harsh criticisms']
pct_counts = [25, 40, 1]
df = pd.DataFrame({'skill':skills_list, 'pct jobs with skill':pct_counts})
p = Bar(df, 'index', values='pct jobs with skill', title="Top skills for ___ jobs", legend=False)
label_dict = {}
for i, s in enumerate(skills_list):
label_dict[i] = s
p.xaxis[0].formatter = FixedTickFormatter(labels=label_dict)
output_file("bar.html")
show(p)
This can be dealt with as categorical data, see bokeh documentation.
from bokeh.plotting import figure, show
categories = ['A', 'B','C' ]
p = figure(x_range=categories)
p.circle(x=categories, y=[4, 6, 5], size=20)
show(p)

Resources