How to read and set selected nodes in hv.Graph? - holoviews

Can I read the selected nodes (using “tap” tool) from hv.Graph object?
Can I update the list of selected nodes programmatically? At or after initialization.
# ---- cell #1 ----
import hvplot.networkx as hvnx
import numpy as np
import networkx as nx
import holoviews as hv
np.random.seed(1)
G = nx.bull_graph()
fig = hvnx.draw(G)
fig
# ---- cell #2 ----
# (1) Can I read the selected nodes from `fig` that a user tapped?
# * e.g. smth like print(fig.tap_tool.selected) to return [1,4]
# (2) Can I update the list of selected nodes programmatically?
# * e.g. smth like fig.tap_tool.selected = [1,2,4]

Related

Warp10 and streamlit integration?

Two simple questions:
Does Warp10 integrate into streamlit to feed visualisations?
If so, please would you specify how this can be accomplished?
Thanking you in advance.
Best wishes,
There's no direct integration of Warp 10 in streamlit.
Although streamlit can handle any kind of data, it's mainly focused on pandas DataFrame. DataFrames are tables whereas Warp 10 Geo Time Series are time series. So even if Warp 10 was integrated in streamlit, it would require some code to properly format the data for streamlit to give its full potential.
That being said, here is a small example on how to display data stored in Warp 10 with streamlit:
import json
from datetime import datetime, timedelta
import requests
import streamlit as st
from bokeh.palettes import Category10_10 as palette
from bokeh.plotting import figure
# Should be put in a configuration file.
fetch_endpoint = 'http://localhost:8080/api/v0/fetch'
token = 'READ' # Change that to your actual token
def load_data_as_json(selector, start, end):
headers = {'X-Warp10-Token': token}
params = {'selector': selector, 'start': start, 'end': end, 'format': 'json'}
r = requests.get(fetch_endpoint, params=params, headers=headers)
return r.text
st.title('Warp 10 Test')
# Input parameters
selector = st.text_input('Selector', value="~streamlit.*{}")
start_date = st.date_input('Start date', value=datetime.now() - timedelta(days=10))
start_time = st.time_input('Start time')
end_date = st.date_input('End date')
end_time = st.time_input('End time')
# Convert datetime.dates and datetime.times to microseconds (default time unit in Warp 10)
start = int(datetime.combine(start_date, start_time).timestamp()) * 1000000
end = int(datetime.combine(end_date, end_time).timestamp()) * 1000000
# Make the query to Warp 10 and get back a json.
json_data = load_data_as_json(selector, start, end)
gtss = json.loads(json_data)
# Iterate through the json and populate a Bokeh graph.
p = figure(title='GTSs', x_axis_label='time', y_axis_label='value')
for gts_index, gts in enumerate(gtss):
tss = []
vals = []
for point in gts['v']:
tss.append(point[0])
vals.append(point[-1])
p.line(x=tss, y=vals, legend_label=gts['c'] + json.dumps(gts['l']), color=palette[gts_index % len(palette)])
st.bokeh_chart(p, use_container_width=True)
# Also display the json.
st.json(json_data)

pyspark how to sum and produce top 10 using pyspark

I have a csv file with two fields, a key and a value:
{1Y4dZ123eAMGooBmVzBLUWEZ2JfCCUY91},8.530366
{1YdZ123433MGooBmVzBLUWEZ1234CUY91},8.530366
{1YdZ2344AMGooBmVzBLUWE123JfCCUY91},8.530366
{1YdECDNthiMGooBmVzBLUWEZ2JfCCUY91},8.530366
{1YdZDNHqeAMGooBmVzBLUWEZ2JfCCUY91},8.530366
{1YdZDNHqeAMGooBDJTdBLUWEZ2JfCCUY91},8.530366
{1YdZDNHqeAMGooBmVzBLUWEZ2JfCCUY91},8.530366
{1YdZ123qeAMGooBmVzBLUWEZ2JfCCUY91},8.530366
{1YdZDNHqeAMGooBmVzBLUWEZ2JfCCUY91},8.530366
{1YdZDNHqeAMGooBm123LUWEZ2JfCCUY91},8.530366
{17RJgv5ujkFerSd48Akdd2GneUAW47nphQ},20.0
{17RJgv5ujkFerSd48Akdd2GneUAW47nphQ},20.0
{17RJgv5ujkFerSd48Akdd2GneUAW47nphQ},20.0
{13uZ6tSr5oh1ui9Hd1tEqJKo2AHhJ6JdFS},0.03895804
What I'm trying to do is sum up the second column and group by the first column, then derive the top 10 keys with the highest values.
Below is the code I've tried using but I get a 'tuple index out of range' error:
import re
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.session import SparkSession
sc = pyspark.SparkContext()
spark = SparkSession(sc)
voutFile = sc.textFile("input/voutfiltered.csv")
features=voutFile.map(lambda l:
(l.split(',')[0],float(l.split(',')[1])))
top10 = features.takeOrdered(10, key = lambda x: -x[2])
for record in top10:
print("{}: {};{}".format(record[0],record[1],record[2]))```
Any particular reason why you're not using the DataFrame API? It's much more flexible, convenient and faster than the RDD API.
import pyspark.sql.functions as f
df = spark.read.format("csv").option("header", "true").load("/path/to/your/file.csv/")
(df.groupBy(f.col("key_col"))
.agg(f.count(f.col("value_col")).alias("count_value_col"))
.sort(col("count_value_col").desc())
.limit(10)
.show())

Changing Bokeh table row count

Below is a working example of a Bokeh table that is populated with selections from a scatter plot.
Once the table is first initialized, it always shows that many rows (too many or too few).
Is there a way to make the number of rows dynamic to fit the number of records selected?
Thank you
import numpy as np
import pandas as pd
from bokeh.layouts import row
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, curdoc, show
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
#Plotting points on chart.
initial_df = pd.DataFrame(np.random.randint(0,100,size=(500, 2)),
columns=["X","Y"],
index=[str(i) for i in range(1,500+1)])
pointchart=figure(plot_width=800, plot_height=700,
tools=['lasso_select','box_select'],
title="Points for selection")
pointchart_source= ColumnDataSource(initial_df )
pointchart_glyph= pointchart.circle("X","Y",source=pointchart_source,size=3.5)
#Source for table
source_df=initial_df
source_df['ID']=source_df.index
#Making initial table source from dataframe. The table will always have this number of rows.
initial_source_for_table = ColumnDataSource(source_df)
columns = [TableColumn(field='ID', title="Col1"),
TableColumn(field="X", title="Col2"),
TableColumn(field="Y", title="Col3")]
global data_table #lets you access it in the callback.
data_table = DataTable(source=initial_source_for_table, columns=columns, width=800, height=400)
def on_selection_change(attr, old, new):
newdataframe= pd.DataFrame(pointchart_source.data).loc[new]
newdataframe['ID']=newdataframe.index
newsource=ColumnDataSource(newdataframe[['ID',"X","Y"]].dropna(how='all'))
data_table.source=newsource
data_table.width=500
data_table.height=500
pointchart_glyph.data_source.selected.on_change('indices',on_selection_change)
#Show
layout=row(pointchart,data_table)
curdoc().add_root(layout)
!powershell -command {'bokeh serve --show Test_Table.ipynb'}
I don't have Jupyter Notebook but this example should help you on your way.
Just count the number of selected points and alter the number of table rows with table.height = number_points * 25. Run the code with: bokeh serve --show app.py
from bokeh.io import curdoc, show
from bokeh.layouts import widgetbox
from bokeh.models import ColumnDataSource, Slider, DataTable, TableColumn
max_i = 200
init_i = 6
def get_square(n):
return dict(x = list(range(n)), y = [x ** 2 for x in range(n)])
source = ColumnDataSource(get_square(init_i))
columns = [
TableColumn(field = "x", title = "x"),
TableColumn(field = "y", title = "x**2"),
]
table = DataTable(source = source, columns = columns, width = 320)
slider = Slider(start = 1, end = max_i, value = init_i, step = 1, title = "i", width = 300)
def update_data(attrname, old, new):
i = slider.value
table.source.data = get_square(i)
table.height = i * 25 + 25
slider.on_change('value', update_data)
layout = widgetbox(slider, table)
curdoc().add_root(layout)
BTW: you should not replace the entire ColumnDataSource in your callback but just assign a new data to it like in my example, that is use:
table.source.data = new_data
instead of:
table.source = new_source

Replacing figure and table in layout when using global ColumnDataSource

I am using bokeh 0.12.9. I have a table and a figure which I replace in the global layout on callback. I usually build the ColumnDataSource right before I build the new figure/table. Now I wanted to try and see if I can have a global ColumnDataSource so that I can adjust the data via a CDSView (no need to replace table/figure then).
Unfortunately even keeping a separate CDS and view for table and plot fails. When clicking the radio button a couple of times I receive the following javascript error:
Uncaught TypeError: Cannot read property 'data' of undefined
from datetime import date
from random import randint
from bokeh.models import Line
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
import bokeh.layouts as layouts
import bokeh.models.widgets as widgets
from bokeh.io import curdoc
from bokeh.models import CustomJS, Slider
from bokeh import palettes
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource, CDSView, IndexFilter
from bokeh.models import widgets
def gen_plot(source=None, view=None):
p = figure(title='test',
x_axis_type="datetime",
plot_width=600, plot_height=400)
colors = palettes.Category10[10]
cols = [str(col) for col in source.column_names]
for ix, col in enumerate(cols):
if col == 'index':
continue
r = p.line(x='index', y=col, source=source, view=view,
legend='_' + col,
color=colors[ix])
p.legend.location = "bottom_left"
return p
def gen_table(source=None, view=None):
columns = [TableColumn(field=ele, title=ele) for ele
in source.column_names]
tab = widgets.DataTable(source=source, view=view, columns=columns,
selectable=False,
reorderable=False,
width=600, height=400)
return tab
def update(attr, old, new):
p = gen_plot(source=cdss[0], view=vs[0])
t = gen_table(source=cdss[1], view=vs[1])
print l.children
l.children[1] = p
l.children[2].children[0] = t
# set up data
cols = ['col1', 'col2', 'col3', 'col4']
df1 = pd.DataFrame(pd.util.testing.getTimeSeriesData())
df1.columns = cols
df2 = pd.DataFrame(pd.util.testing.getTimeSeriesData())
df2.columns = cols
dfs = [df1, df2]
cds1 = ColumnDataSource(df1)
cds2 = ColumnDataSource(df2)
cdss = [cds1, cds2]
filters = [IndexFilter([0, 1, 2, 4])]
filters = []
v1 = CDSView(source=cds1, filters=filters)
v2 = CDSView(source=cds2, filters=filters)
vs = [v1, v2]
# initialize items to replace
p = gen_plot(source=cdss[0], view=vs[0])
t = gen_table(source=cdss[1], view=vs[1])
# initialize controls
radio_wghting = widgets.RadioButtonGroup(labels=["Equal", "Exponential"],
active=0)
radio_wghting.on_change('active', update)
# set up layout
sizing_mode = 'fixed'
l = layout([radio_wghting, p, t], sizing_mode=sizing_mode)
curdoc().add_root(l)
curdoc().title = 'blub'
# call callback initially
update('value', 0, 0)
Any hints are much appreciated!
Now I wanted to try and see if I can have a global ColumnDataSource so
that I can adjust the data via a CDSView (no need to replace
table/figure then).
The code you are showing is the one in which you are trying to replace the figure and table.
When you replace the child of a layout object in that way, you are not actually removing the previous figures from curdoc, and other elements in the document still have the old figures and tables in their references.
You could try something like that to update the sources directly.
for rend in p.renderers:
try:
rend.data_source
except AttributeError:
pass
else:
rend.data_source.data.update(new_data_dictionary)
and
t.source.data.update(new_data_dictionary)
EDIT to answer the comment
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Button
from bokeh.layouts import gridplot, widgetbox
from random import random, choice
import numpy as np
my_data = {1:{'x':[],'y':[],'colo':[],'size':[]}}
kelly_colors = [ '#F3C300','#875692', '#F38400', '#A1CAF1','#BE0032', '#C2B280', '#848482','#008856', '#E68FAC', '#0067A5',
'#F99379', '#604E97', '#F6A600','#B3446C', '#DCD300', '#882D17','#8DB600', '#654522', '#E25822','#2B3D26', ]
x = np.arange(0,50,0.1)
def rand_dict():
rand_x = [choice(x) for i in range(7)]
return {'x':rand_x,'y':np.array([random()*100 for i in rand_x]),'colo':np.array([choice(kelly_colors) for i in rand_x]),'size':np.array([(5+int(random()*50)) for i in rand_x])}
def add_stuff():
global my_data
my_data[max(my_data.keys())+1] = rand_dict()
make_doc()
def change_stuff():
global my_data
myfig = curdoc().select_one({"name":"myfig"})
for i,rend in enumerate(myfig.renderers):
try:
rend.data_source
except AttributeError:
pass
else:
my_data[i+1] = rand_dict()
rend.data_source.data.update(my_data[i+1])
def clear_stuff():
global my_data
my_data = {1:{'x':[],'y':[],'colo':[],'size':[]}}
make_doc()
def make_doc():
curdoc().clear()
myfig = figure(plot_width=1000,plot_height=800,outline_line_alpha=0,name='myfig')
myfig.x_range.start = -5
myfig.x_range.end = 55
myfig.y_range.start = -10
myfig.y_range.end = 110
myfig.renderers = []
add_button = Button(label='add stuff',width=100)
change_button = Button(label='change stuff',width=100)
clear_button = Button(label='clear stuff',width=100)
add_button.on_click(add_stuff)
change_button.on_click(change_stuff)
clear_button.on_click(clear_stuff)
grid = gridplot([[myfig,widgetbox(add_button,change_button,clear_button)]],toolbar_location=None)
curdoc().add_root(grid)
update_doc()
def update_doc():
myfig = curdoc().select_one({"name":"myfig"})
for key in my_data:
myfig.scatter(x='x',y='y',color='colo',size='size',source=ColumnDataSource(data=my_data[key]))
curdoc().title = 'mytitle'
make_doc()
what I like about doing this is that you can just save the my_data dictionary with numpy, load it later and keep changing your plots from there.
def load_data():
global my_data
my_data = np.load(path_to_saved_data).item()
make_doc()
You can probably do something similar using pandas dataframes, I am just more comfortable with plain dictionaries.

How to update holoviews Bars using an ipywidgets SelectionRangeSlider?

I want to select data from some pandas DataFrame in a Jupyter-notebook through a SelectionRangeSlider and plot the filtered data using holoviews bar chart.
Consider the following example:
import numpy as np
import pandas as pd
import datetime
import holoviews as hv
hv.extension('bokeh')
import ipywidgets as widgets
start = int(datetime.datetime(2017,1,1).strftime("%s"))
end = int(datetime.datetime(2017,12,31).strftime("%s"))
size = 100
rints = np.random.randint(start, end + 1, size = size)
df = pd.DataFrame(rints, columns = ['zeit'])
df["bytes"] = np.random.randint(5,20,size=size)
df['who']= np.random.choice(['John', 'Paul', 'George', 'Ringo'], len(df))
df["zeit"] = pd.to_datetime(df["zeit"], unit='s')
df.zeit = df.zeit.dt.date
df.sort_values('zeit', inplace = True)
df = df.reset_index(drop=True)
df.head(2)
This gives the test DataFrame df:
Let's group the data:
data = pd.DataFrame(df.groupby('who')['bytes'].sum())
data.reset_index(level=0, inplace=True)
data.sort_values(by="bytes", inplace=True)
data.head(2)
Now, create the SelectionRangeSlider that is to be used to filter and update the barchart.
%%opts Bars [width=800 height=400 tools=['hover']]
def view2(v):
x = df[(df.zeit > r2.value[0].date()) & (df.zeit < r2.value[1].date())]
data = pd.DataFrame(x.groupby('who')['bytes'].sum())
data.sort_values(by="bytes", inplace=True)
data.reset_index(inplace=True)
display(hv.Bars(data, kdims=['who'], vdims=['bytes']))
r2 = widgets.SelectionRangeSlider(options = options, index = index, description = 'Test')
widgets.interactive(view2, v=r2)
(I have already created an issue on github for the slider not displaying the label correctly, https://github.com/jupyter-widgets/ipywidgets/issues/1759)
Problems that persist:
the image width and size collapse to default after first update (is there a way to give %%opts as argument to hv.Bars?)
the y-Scale should remain constant (i.e. from 0 to 150 for all updates)
is there any optimization possible concerning speed of updates?
Thanks for any help.
Figured out how to do it using bokeh: https://github.com/bokeh/bokeh/issues/7082

Resources