I'm trying to add jitter to a plot so that duplicate values don't overlap each other and the code runs fine but the display HTML file gives me an error.
The code:
from bokeh.plotting import figure
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, Jitter
x = [1,2,3,4,5,3,3,3]
y = [1,2,2,4,5,2,3,3]
data = ColumnDataSource(dict(x=x, y=y))
output_file("iris.html")
f=figure()
f.plot_width = 800
f.plot_height = 800
f.sizing_mode="stretch_both"
f.circle(x={'value': "x", 'transform': Jitter(width=0.4)}, y="y", source=data)
show(f)
The error I get when I open the HTML file is:
Bokeh Error
Number property 'x' given invalid value: "x"
That's not a great error message, but the problem is you're trying to transform the value "x" and not the field "x" of your data source. It should work it:
f.circle(x={'field': "x", 'transform': Jitter(width=0.4)}, y="y", source=data)
Related
I want to place a label at the top left corner of each streaming plot, be it one plot, or two plots, etc. The plots are stretched in both directions. For now, I have to manually specify a y postion depending on how many plots are shown. (y=200 for two plots, and y=440 for one plot) One may resolve it by recording the total range of y values shown in the plot, but it feels too hacky. I'm wondering if there is a simple way to do this. Thanks for any help.
from bokeh.server.server import Server
from bokeh.models import ColumnDataSource, Label
from bokeh.plotting import figure
from bokeh.layouts import column
import numpy as np
import datetime as dt
from functools import partial
import time
def f_random():
data = np.random.rand()
data = (dt.datetime.now(), data)
return data
def f_sinewave():
data = np.sin(time.time()/1.)
data = (dt.datetime.now(), data)
return data
def make_document(doc, functions, labels):
def update():
for index, func in enumerate(functions):
data = func()
sources[index].stream(new_data=dict(time=[data[0]], data=[data[1]]), rollover=1000)
annotations[index].text = f'{data[1]: .3f}'
sources = [ColumnDataSource(dict(time=[], data=[])) for _ in range(len(functions))]
figs = []
annotations = []
for i in range(len(functions)):
figs.append(figure(x_axis_type='datetime', plot_width=800, plot_height=400, y_axis_label=labels[i]))
figs[i].line(x='time', y='data', source=sources[i])
annotations.append(Label(x=10, y=200, text='', text_font_size='20px', text_color='black',
x_units='screen', y_units='screen', background_fill_color='white'))
figs[i].add_layout(annotations[i])
doc.add_root(column([fig for fig in figs], sizing_mode='stretch_both'))
doc.add_periodic_callback(callback=update, period_milliseconds=100)
if __name__ == '__main__':
# list of functions and labels to feed into the scope
functions = [f_random, f_sinewave]
labels = ['random', 'sinewave']
server = Server({'/': partial(make_document, functions=functions, labels=labels)})
server.start()
server.io_loop.add_callback(server.show, "/")
try:
server.io_loop.start()
except KeyboardInterrupt:
print('keyboard interruption')
For now you could do:
Label(x=10, y=figs[i].plot_height-30, ...)
It seems like allowing negative values to implicitly position against the "opposite" side would be a nice feature (and a good first task for new contributors), so I would encourage you to file a GitHub issue about it.
I am trying to update the data source for a Bokeh scatter plot using a function.
But instead of plotting only the new data, the plot shows all of it.
I think I'm passing a new data source to the plot, but the old plotted points persist.
How would you update the scatterplot with just new data?
Also, is there any way of retrieving the current selection in the dropdown menu without interacting with it? (i.e. without a callback that uses on_change)
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import Tabs, Select
from bokeh.layouts import column, row, Spacer
from bokeh.io import curdoc
from bokeh.plotting import figure, curdoc, show
#Plotting points on initial chart.
df_AB = pd.DataFrame(np.random.randint(0,100,size=(500, 2)), columns=list('AB'), index=[str(i) for i in range(1,500+1)])
pointchart=figure(plot_width=800, plot_height=700, tools=['lasso_select','box_select'],title="Point scatter")
pointchart_source= ColumnDataSource(df_AB[["A","B"]])
pointchart_glyph= pointchart.circle("A","B",source=pointchart_source)
#Dropdown
selectoroptions=['','new selection', 'other selection']
Xselector = Select(title="Dropdown:", value="", options=selectoroptions)
#Callback to update data source
def Xdropdownchange(attrname, old, new):
pointchart_glyph= pointchart.circle("X","Y",source=make_updated_source())
Xselector.on_change("value", Xdropdownchange)
#Making new/updated data source based on dropdowns.
df_XY = pd.DataFrame(np.random.randint(0,100,size=(500, 2)), columns=list('XY'), index=[str(i) for i in range(1,500+1)])
def make_updated_source():
new_x=pd.Series(list(df_XY.iloc[0:100]["X"]),name="X")
new_y=pd.Series(list(df_XY.iloc[0:100]["Y"]),name="Y")
sourcedf=pd.DataFrame([new_x,new_y]).T
pointchart_source= ColumnDataSource(sourcedf)
return pointchart_source
#Show
layout=row(column(Xselector, Spacer(width=400, height=500)),pointchart)
curdoc().add_root(layout)
!powershell -command {'bokeh serve --show Dropdown_sourcechange.ipynb'}
I changed some things in your code and it now shows your original data if you select the empty value in your dropdown or a randomly generated dataset when you select one of the other values in the dropdown. Retrieving the current selection in the dropdown without using a callback is also possible with print(Xselector.value)
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import Tabs, Select
from bokeh.layouts import column, row, Spacer
from bokeh.io import curdoc
from bokeh.plotting import figure, curdoc, show
#Plotting points on initial chart.
df_AB = pd.DataFrame(np.random.randint(0,100,size=(500, 2)), columns=list('XY'), index=[str(i) for i in range(1,500+1)])
pointchart=figure(plot_width=800, plot_height=700, tools=['lasso_select','box_select','wheel_zoom'],title="Point scatter")
source= ColumnDataSource(df_AB[["X","Y"]])
pointchart.circle("X","Y",source=source)
#Dropdown
selectoroptions=['','new selection', 'other selection']
Xselector = Select(title="Dropdown:", value="", options=selectoroptions)
def make_updated_source(attr, old, new):
if new == '':
source.data = ColumnDataSource(df_AB[["X","Y"]]).data
else:
df_XY = pd.DataFrame(np.random.randint(0,100,size=(500, 2)), columns=list('XY'), index=[str(i) for i in range(1,500+1)])
new_x=pd.Series(list(df_XY.iloc[0:100]["X"]),name="X")
new_y=pd.Series(list(df_XY.iloc[0:100]["Y"]),name="Y")
sourcedf=pd.DataFrame([new_x,new_y]).T
source.data = ColumnDataSource(sourcedf).data
Xselector.on_change("value", make_updated_source)
#Retrieve selection in dropdown withoud on_change
print(Xselector.value)
#Show
layout=row(column(Xselector, Spacer(width=400, height=500)),pointchart)
curdoc().add_root(layout)
!powershell -command {'bokeh serve --show Dropdown_sourcechange.ipynb'}
I am trying to add a permanent label on nodes for a networkx graph using spring_layout and bokeh library. I would like for this labels to be re-positioned as the graph scales or refreshed like what string layout does, re-positioning the nodes as the graph scales or refreshed.
I tried to create the graph, and layout, then got pos from the string_layout. However, as I call pos=nx.spring_layout(G), it will generated a set of positions for the nodes in graph G, which I can get coordinates of to put into the LabelSet. However, I have to call graph = from_networkx(G, spring_layout, scale=2, center=(0,0)) to draw the network graph. This will create a new set of position for the node. Therefore, the positions of the nodes and the labels will not be the same.
How to fix this issues?
Thanks for asking this question. Working through it, I've realized that it is currently more work than it should be. I'd very strongly encourage you to open a GitHub issue so that we can discuss what improvements can best make this kind of thing easier for users.
Here is a complete example:
import networkx as nx
from bokeh.io import output_file, show
from bokeh.models import CustomJSTransform, LabelSet
from bokeh.models.graphs import from_networkx
from bokeh.plotting import figure
G=nx.karate_club_graph()
p = figure(x_range=(-3,3), y_range=(-3,3))
p.grid.grid_line_color = None
r = from_networkx(G, nx.spring_layout, scale=3, center=(0,0))
r.node_renderer.glyph.size=15
r.edge_renderer.glyph.line_alpha=0.2
p.renderers.append(r)
So far this is all fairly normal Bokeh graph layout code. Here is the additional part you need to add permanent labels for each node:
from bokeh.transform import transform
# add the labels to the node renderer data source
source = r.node_renderer.data_source
source.data['names'] = [str(x*10) for x in source.data['index']]
# create a transform that can extract the actual x,y positions
code = """
var result = new Float64Array(xs.length)
for (var i = 0; i < xs.length; i++) {
result[i] = provider.graph_layout[xs[i]][%s]
}
return result
"""
xcoord = CustomJSTransform(v_func=code % "0", args=dict(provider=r.layout_provider))
ycoord = CustomJSTransform(v_func=code % "1", args=dict(provider=r.layout_provider))
# Use the transforms to supply coords to a LabelSet
labels = LabelSet(x=transform('index', xcoord),
y=transform('index', ycoord),
text='names', text_font_size="12px",
x_offset=5, y_offset=5,
source=source, render_mode='canvas')
p.add_layout(labels)
show(p)
Basically, since Bokeh (potentially) computes layouts in the browser, the actual node locations are only available via the "layout provider" which is currently a bit tedious to access. As I said, please open a GitHub issue to suggest making this better for users. There are probably some very quick and easy things we can do to make this much simpler for users.
The code above results in:
similar solution as #bigreddot.
#Libraries for this solution
from bokeh.plotting import figure ColumnDataSource
from bokeh.models import LabelSet
#Remove randomness
import numpy as np
np.random.seed(1337)
#Load positions
pos = nx.spring_layout(G)
#Dict to df
labels_df = pd.DataFrame.from_dict(pos).T
#Reset index + column names
labels_df = labels_df.reset_index()
labels_df.columns = ["names", "x", "y"]
graph_renderer = from_networkx(G, pos, center=(0,0))
.
.
.
plot.renderers.append(graph_renderer)
#Set labels
labels = LabelSet(x='x', y='y', text='names', source=ColumnDataSource(labels_df))
#Add labels
plot.add_layout(labels)
Fixed node positions
From the networkx.spring_layout() documentation: you can add a list of nodes with a fixed position as a parameter.
import networkx as nx
import matplotlib.pyplot as plt
g = nx.Graph()
g.add_edges_from([(0,1),(1,2),(0,2),(1,3)])
pos = nx.spring_layout(g)
nx.draw(g,pos)
plt.show()
Then you can plot the nodes at a fixed position:
pos = nx.spring_layout(g, pos=pos, fixed=[0,1,2,3])
nx.draw(g,pos)
plt.show()
I have produced a time series scatter plot in bokeh, which updates when a user interactively selects a new time series. However, I want to fix the x-axis between 0000 to 2359 hours for comparison (Bokeh tries to guess the appropriate x-range).
Below is a random snippet of data. In this code, how do I fix the x_range without it changing the scale to microseconds?
import pandas as pd
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import Range1d
output_notebook()
data = {'2015-08-20 13:39:46': [-0.02813796, 0],
'2015-08-28 12:6:5': [ 1.32426938, 1],
'2015-08-28 13:42:59': [-0.16289655, 1],
'2015-12-14 16:19:44': [ 2.30476287, 1],
'2016-02-01 17:8:32': [ 0.41165004, 0],
'2016-02-09 11:26:33': [-0.65023149, 0],
'2016-04-08 17:57:47': [ 0.09335096, 1],
'2016-04-27 19:2:15': [ 1.43917208, 0]}
test = pd.DataFrame(data=data).T
test.columns = ["activity","objectID"]
test.index = pd.to_datetime(test.index)
p = figure(plot_width=500, plot_height=250, x_axis_label='X', y_axis_label='Y', x_axis_type="datetime")# x_range = Range1d(# dont know what to put here))
r = p.circle(x=test.index.time, y=test["activity"])
show(p, notebook_handle=True);
I've found a (scrappy) solution for this but it doesn't fix the axes sizes entirely since the size of the axes seems to be dependent on other axes properties such as the length of the y-tick labels.
# a method for setting constant x-axis in hours for bokeh:
day_x_axis = pd.DataFrame(data=[0,0], index=['2015-07-28 23:59:00', '2015-08- 28 00:01:00'], columns=["activity"])
day_x_axis.index = pd.to_datetime(day_x_axis.index)
new_time_series = pd.concat((old_time_series, day_x_axis), axis=0) # this will set all other columns you had to NaN.
I fixed my axes entirely by also setting the y_range property when instantiating the figure object.
Maintainers note: this question is obsolete. Calling multiple glyph methods on a figure automatically combines (and has for many years). For information on modern Bokeh, see:
https://docs.bokeh.org/en/latest/docs/user_guide/plotting.html
OBSOLETE:
I am running the Bokeh tutorial in the IPython notebook. It only displays the scatter plot and not the line plot. From the command-line it renders both plots separately.
How do I get both graphs in the same chart, on top of each other?
import numpy as np
import bokeh.plotting as bplt
bplt.output_file("bokehtest.html")
#bplt.output_notebook(url=None)
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y = np.cos(x)
bplt.line(x, y, color="red")
bplt.scatter(x, y, marker="square", color="blue")
bplt.show()
OBSOLETE ANSWER: see https://docs.bokeh.org/en/latest/docs/user_guide/plotting.html* for modern Bokeh
You just need to call bplt.hold() before any of the plotting commands, to toggle the "hold state". The following code works for me:
import numpy as np
import bokeh.plotting as bplt
bplt.output_file("bokehtest.html")
#bplt.output_notebook(url=None)
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y = np.cos(x)
bplt.hold() # <--- The important line!!
bplt.line(x, y, color="red")
bplt.scatter(x, y, marker="square", color="blue")
bplt.show()
OBSOLETE ANSWER: see https://docs.bokeh.org/en/latest/docs/user_guide/plotting.html for modern Bokeh
Try using the figure command like in this example:
import numpy as np
import bokeh.plotting as bplt
bplt.output_file("bokehtest.html")
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y = np.cos(x)
bplt.figure()
bplt.line(x, y, color="red")
bplt.scatter(x, y, marker="square", color="blue")
bplt.show()