Bokeh LabelSet x axis being datetime - bokeh

I am new to Bokeh and looking for solution to label each data point. Replicating the examples shown in documents, I could not find solutions with X axis being datetime.
import pandas as mypd
from bokeh.models import LabelSet , ColumnarDataSource
from bokeh.plotting import figure, output_file, show
date_1 = ['2020-01-01', '2020-01-02','2020-01-03','2020-01-04','2020-01-05']
sal = mypd.DataFrame(date_1)
sal.columns = ["Date_1"]
sal['Sales'] = [15,25,36,17,4]
sal['Date_1'] = mypd.to_datetime(sal['Date_1'])
p= figure(x_axis_type = "datetime")
p.line(x =sal['Date_1'] ,y = sal['Sales'])
lab = LabelSet(x = sal['Date_1'], y = sal['Sales'], text = sal['Sales'])
p.add_layout(lab)
show(p)
It is throwing the error
ValueError: expected an element of either String, Dict(Enum('expr', 'field', 'value', 'transform'), Either(String, Instance(Transform), Instance(Expression), Float)) or Float, got 0 2020-01-01
I understand the error is because x axis take numerical data for labelset.
Is my understanding correct ?
If yes what is the workaround ?
I tried with similar queries but could not find a solution for myself.
Similar Query
And this

The simplest solution is to just use a common data source. It also prevents you from embedding the data twice.
import pandas as pd
from bokeh.models import LabelSet, ColumnDataSource
from bokeh.plotting import figure, show
sal = (pd.DataFrame({'Date_1': pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05']),
'Sales': [15, 25, 36, 17, 4]})
.set_index('Date_1'))
ds = ColumnDataSource(sal)
p = figure(x_axis_type="datetime")
p.line(x='Date_1', y='Sales', source=ds)
lab = LabelSet(x='Date_1', y='Sales', text='Sales', source=ds)
p.add_layout(lab)
show(p)

Related

Changing Bokeh table row count

Below is a working example of a Bokeh table that is populated with selections from a scatter plot.
Once the table is first initialized, it always shows that many rows (too many or too few).
Is there a way to make the number of rows dynamic to fit the number of records selected?
Thank you
import numpy as np
import pandas as pd
from bokeh.layouts import row
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, curdoc, show
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
#Plotting points on chart.
initial_df = pd.DataFrame(np.random.randint(0,100,size=(500, 2)),
columns=["X","Y"],
index=[str(i) for i in range(1,500+1)])
pointchart=figure(plot_width=800, plot_height=700,
tools=['lasso_select','box_select'],
title="Points for selection")
pointchart_source= ColumnDataSource(initial_df )
pointchart_glyph= pointchart.circle("X","Y",source=pointchart_source,size=3.5)
#Source for table
source_df=initial_df
source_df['ID']=source_df.index
#Making initial table source from dataframe. The table will always have this number of rows.
initial_source_for_table = ColumnDataSource(source_df)
columns = [TableColumn(field='ID', title="Col1"),
TableColumn(field="X", title="Col2"),
TableColumn(field="Y", title="Col3")]
global data_table #lets you access it in the callback.
data_table = DataTable(source=initial_source_for_table, columns=columns, width=800, height=400)
def on_selection_change(attr, old, new):
newdataframe= pd.DataFrame(pointchart_source.data).loc[new]
newdataframe['ID']=newdataframe.index
newsource=ColumnDataSource(newdataframe[['ID',"X","Y"]].dropna(how='all'))
data_table.source=newsource
data_table.width=500
data_table.height=500
pointchart_glyph.data_source.selected.on_change('indices',on_selection_change)
#Show
layout=row(pointchart,data_table)
curdoc().add_root(layout)
!powershell -command {'bokeh serve --show Test_Table.ipynb'}
I don't have Jupyter Notebook but this example should help you on your way.
Just count the number of selected points and alter the number of table rows with table.height = number_points * 25. Run the code with: bokeh serve --show app.py
from bokeh.io import curdoc, show
from bokeh.layouts import widgetbox
from bokeh.models import ColumnDataSource, Slider, DataTable, TableColumn
max_i = 200
init_i = 6
def get_square(n):
return dict(x = list(range(n)), y = [x ** 2 for x in range(n)])
source = ColumnDataSource(get_square(init_i))
columns = [
TableColumn(field = "x", title = "x"),
TableColumn(field = "y", title = "x**2"),
]
table = DataTable(source = source, columns = columns, width = 320)
slider = Slider(start = 1, end = max_i, value = init_i, step = 1, title = "i", width = 300)
def update_data(attrname, old, new):
i = slider.value
table.source.data = get_square(i)
table.height = i * 25 + 25
slider.on_change('value', update_data)
layout = widgetbox(slider, table)
curdoc().add_root(layout)
BTW: you should not replace the entire ColumnDataSource in your callback but just assign a new data to it like in my example, that is use:
table.source.data = new_data
instead of:
table.source = new_source

Inserting Labels in Bokeh

I am trying to inser labels in Bokeh and it is not working.
My code is:
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.models import NumeralTickFormatter
df_carteira_grafico = df_resumo_1
df_carteira_grafico['mes_status'] = (df_carteira_grafico['mes_juncao'].astype(dtype=str))+' - '+df_carteira_grafico['Atraso']
output_notebook()
p=figure()
carteira = df_carteira_grafico['mes_status']
tamanho = df_resumo_1['Valor a Entregar']
p = figure(x_range=carteira, plot_height=300, title="Status_Carteira")
p.vbar(x=carteira, top=tamanho, width=0.9)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.yaxis[0].formatter = NumeralTickFormatter(format="0.0")
show(p)
I am getting this:
I want to get this:
Tks for the help.
If you put your data in a ColumnDataSource yourself, then that source can be used to drive both the vbar and a LabelSet as demonstrated in the documentation. Something like:
# CDS can also be created directly from data frames, but not clear in your case
source = ColumnDataSource(data=
dict(carteira=carteira, tamanho=tamanho, labels=[str(x) for x in tamanho])
)
p.vbar(x='carteira', top='tamanho', width=0.9, source=source)
labels = LabelSet(x='carteira', y='tamanho', text='labels', y_offset=5, source=source)
p.add_layout(labels)
However please note that I could not actually test this directly, because the example code in your question was not self-contained and complete. Hopefully it points the way, though.
See Providing Data for Plots and Tables for more information about Bokeh data sources.
Got it. For thse who may need in the future, here is the code:
bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.models import NumeralTickFormatter
from numpy import pi
from bokeh.models import ColumnDataSource
from bokeh.models import LabelSet
df_carteira_grafico = df_resumo_1
df_carteira_grafico['mes_status'] = (df_carteira_grafico['mes_juncao'].astype(dtype=str))+' - '+df_carteira_grafico['Atraso']
output_notebook()
p=figure()
carteira = df_carteira_grafico['mes_status']
tamanho = df_resumo_1['Valor a Entregar']
source = ColumnDataSource(data=dict(carteira=carteira, tamanho=tamanho, labels=[str(x) for x in tamanho]))
p = figure(x_range=carteira, plot_height=400, title="Status_Carteira")
p.vbar(x='carteira', top='tamanho', width=0.9, source=source)
labels = LabelSet(x='carteira', y='tamanho', text='labels', y_offset=5, source=source)
p.add_layout(labels)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.yaxis[0].formatter = NumeralTickFormatter(format="0.0")
show(p)

bokeh 0.12.10 not rendering Segments on GMapPlot

I am trying to display line segments on a map using GMapPlot. The lines flashes in red and then disappears, in jupyter notebook. This is my code (some decimals left out):
map_options = GMapOptions(lat=37.88, lng=-122.23, map_type="roadmap", zoom=10)
plot = GMapPlot(
x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
source = ColumnDataSource( data = dict(
y=[ 37.762260 ],
x=[-121.96226],
ym01=[37.762290 ],
xm01=[-121.96189 ]
)
segment = Segment(x0="x", y0="y", x1="xm01", y1="ym01",line_color="green", line_width=100)
plot.add_glyph(source, segment)
plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
output_notebook()
show(plot)
UPDATE This issue is resolved in https://github.com/bokeh/bokeh/pull/8240 which will be part of Bokeh 1.0
I've tried to reproduce with updated code:
from bokeh.io import show
from bokeh.models import GMapOptions, ColumnDataSource
from bokeh.plotting import figure, gmap
map_options = GMapOptions(lat=37.88, lng=-122.23, map_type="roadmap", zoom=10)
plot = gmap(google_api_key=API_KEY, map_options=map_options)
source = ColumnDataSource( data = dict(
y=[ 37.762260 ],
x=[-121.96226],
ym01=[37.762290 ],
xm01=[-121.96189 ]
))
plot.segment(x0="x", y0="y", x1="xm01", y1="ym01",line_color="green", line_width=10, source=source)
show(plot)
And can confirm that the segment does not show up. Slightly changing to show circles does work, so I have to conclude that this is a bug of some sort. Please file a detailed GitHub issue to report this bug.

Replacing figure and table in layout when using global ColumnDataSource

I am using bokeh 0.12.9. I have a table and a figure which I replace in the global layout on callback. I usually build the ColumnDataSource right before I build the new figure/table. Now I wanted to try and see if I can have a global ColumnDataSource so that I can adjust the data via a CDSView (no need to replace table/figure then).
Unfortunately even keeping a separate CDS and view for table and plot fails. When clicking the radio button a couple of times I receive the following javascript error:
Uncaught TypeError: Cannot read property 'data' of undefined
from datetime import date
from random import randint
from bokeh.models import Line
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
import bokeh.layouts as layouts
import bokeh.models.widgets as widgets
from bokeh.io import curdoc
from bokeh.models import CustomJS, Slider
from bokeh import palettes
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource, CDSView, IndexFilter
from bokeh.models import widgets
def gen_plot(source=None, view=None):
p = figure(title='test',
x_axis_type="datetime",
plot_width=600, plot_height=400)
colors = palettes.Category10[10]
cols = [str(col) for col in source.column_names]
for ix, col in enumerate(cols):
if col == 'index':
continue
r = p.line(x='index', y=col, source=source, view=view,
legend='_' + col,
color=colors[ix])
p.legend.location = "bottom_left"
return p
def gen_table(source=None, view=None):
columns = [TableColumn(field=ele, title=ele) for ele
in source.column_names]
tab = widgets.DataTable(source=source, view=view, columns=columns,
selectable=False,
reorderable=False,
width=600, height=400)
return tab
def update(attr, old, new):
p = gen_plot(source=cdss[0], view=vs[0])
t = gen_table(source=cdss[1], view=vs[1])
print l.children
l.children[1] = p
l.children[2].children[0] = t
# set up data
cols = ['col1', 'col2', 'col3', 'col4']
df1 = pd.DataFrame(pd.util.testing.getTimeSeriesData())
df1.columns = cols
df2 = pd.DataFrame(pd.util.testing.getTimeSeriesData())
df2.columns = cols
dfs = [df1, df2]
cds1 = ColumnDataSource(df1)
cds2 = ColumnDataSource(df2)
cdss = [cds1, cds2]
filters = [IndexFilter([0, 1, 2, 4])]
filters = []
v1 = CDSView(source=cds1, filters=filters)
v2 = CDSView(source=cds2, filters=filters)
vs = [v1, v2]
# initialize items to replace
p = gen_plot(source=cdss[0], view=vs[0])
t = gen_table(source=cdss[1], view=vs[1])
# initialize controls
radio_wghting = widgets.RadioButtonGroup(labels=["Equal", "Exponential"],
active=0)
radio_wghting.on_change('active', update)
# set up layout
sizing_mode = 'fixed'
l = layout([radio_wghting, p, t], sizing_mode=sizing_mode)
curdoc().add_root(l)
curdoc().title = 'blub'
# call callback initially
update('value', 0, 0)
Any hints are much appreciated!
Now I wanted to try and see if I can have a global ColumnDataSource so
that I can adjust the data via a CDSView (no need to replace
table/figure then).
The code you are showing is the one in which you are trying to replace the figure and table.
When you replace the child of a layout object in that way, you are not actually removing the previous figures from curdoc, and other elements in the document still have the old figures and tables in their references.
You could try something like that to update the sources directly.
for rend in p.renderers:
try:
rend.data_source
except AttributeError:
pass
else:
rend.data_source.data.update(new_data_dictionary)
and
t.source.data.update(new_data_dictionary)
EDIT to answer the comment
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Button
from bokeh.layouts import gridplot, widgetbox
from random import random, choice
import numpy as np
my_data = {1:{'x':[],'y':[],'colo':[],'size':[]}}
kelly_colors = [ '#F3C300','#875692', '#F38400', '#A1CAF1','#BE0032', '#C2B280', '#848482','#008856', '#E68FAC', '#0067A5',
'#F99379', '#604E97', '#F6A600','#B3446C', '#DCD300', '#882D17','#8DB600', '#654522', '#E25822','#2B3D26', ]
x = np.arange(0,50,0.1)
def rand_dict():
rand_x = [choice(x) for i in range(7)]
return {'x':rand_x,'y':np.array([random()*100 for i in rand_x]),'colo':np.array([choice(kelly_colors) for i in rand_x]),'size':np.array([(5+int(random()*50)) for i in rand_x])}
def add_stuff():
global my_data
my_data[max(my_data.keys())+1] = rand_dict()
make_doc()
def change_stuff():
global my_data
myfig = curdoc().select_one({"name":"myfig"})
for i,rend in enumerate(myfig.renderers):
try:
rend.data_source
except AttributeError:
pass
else:
my_data[i+1] = rand_dict()
rend.data_source.data.update(my_data[i+1])
def clear_stuff():
global my_data
my_data = {1:{'x':[],'y':[],'colo':[],'size':[]}}
make_doc()
def make_doc():
curdoc().clear()
myfig = figure(plot_width=1000,plot_height=800,outline_line_alpha=0,name='myfig')
myfig.x_range.start = -5
myfig.x_range.end = 55
myfig.y_range.start = -10
myfig.y_range.end = 110
myfig.renderers = []
add_button = Button(label='add stuff',width=100)
change_button = Button(label='change stuff',width=100)
clear_button = Button(label='clear stuff',width=100)
add_button.on_click(add_stuff)
change_button.on_click(change_stuff)
clear_button.on_click(clear_stuff)
grid = gridplot([[myfig,widgetbox(add_button,change_button,clear_button)]],toolbar_location=None)
curdoc().add_root(grid)
update_doc()
def update_doc():
myfig = curdoc().select_one({"name":"myfig"})
for key in my_data:
myfig.scatter(x='x',y='y',color='colo',size='size',source=ColumnDataSource(data=my_data[key]))
curdoc().title = 'mytitle'
make_doc()
what I like about doing this is that you can just save the my_data dictionary with numpy, load it later and keep changing your plots from there.
def load_data():
global my_data
my_data = np.load(path_to_saved_data).item()
make_doc()
You can probably do something similar using pandas dataframes, I am just more comfortable with plain dictionaries.

How to update holoviews Bars using an ipywidgets SelectionRangeSlider?

I want to select data from some pandas DataFrame in a Jupyter-notebook through a SelectionRangeSlider and plot the filtered data using holoviews bar chart.
Consider the following example:
import numpy as np
import pandas as pd
import datetime
import holoviews as hv
hv.extension('bokeh')
import ipywidgets as widgets
start = int(datetime.datetime(2017,1,1).strftime("%s"))
end = int(datetime.datetime(2017,12,31).strftime("%s"))
size = 100
rints = np.random.randint(start, end + 1, size = size)
df = pd.DataFrame(rints, columns = ['zeit'])
df["bytes"] = np.random.randint(5,20,size=size)
df['who']= np.random.choice(['John', 'Paul', 'George', 'Ringo'], len(df))
df["zeit"] = pd.to_datetime(df["zeit"], unit='s')
df.zeit = df.zeit.dt.date
df.sort_values('zeit', inplace = True)
df = df.reset_index(drop=True)
df.head(2)
This gives the test DataFrame df:
Let's group the data:
data = pd.DataFrame(df.groupby('who')['bytes'].sum())
data.reset_index(level=0, inplace=True)
data.sort_values(by="bytes", inplace=True)
data.head(2)
Now, create the SelectionRangeSlider that is to be used to filter and update the barchart.
%%opts Bars [width=800 height=400 tools=['hover']]
def view2(v):
x = df[(df.zeit > r2.value[0].date()) & (df.zeit < r2.value[1].date())]
data = pd.DataFrame(x.groupby('who')['bytes'].sum())
data.sort_values(by="bytes", inplace=True)
data.reset_index(inplace=True)
display(hv.Bars(data, kdims=['who'], vdims=['bytes']))
r2 = widgets.SelectionRangeSlider(options = options, index = index, description = 'Test')
widgets.interactive(view2, v=r2)
(I have already created an issue on github for the slider not displaying the label correctly, https://github.com/jupyter-widgets/ipywidgets/issues/1759)
Problems that persist:
the image width and size collapse to default after first update (is there a way to give %%opts as argument to hv.Bars?)
the y-Scale should remain constant (i.e. from 0 to 150 for all updates)
is there any optimization possible concerning speed of updates?
Thanks for any help.
Figured out how to do it using bokeh: https://github.com/bokeh/bokeh/issues/7082

Resources