Bokeh initializing error - plot

I'm trying to do clustering and plot with bokeh , I'm new in Bokeh , there is a warning message . this is my code :
#Clustering
import numpy as np
np.random.seed(0)
from bokeh.io import curdoc
from bokeh.layouts import widgetbox, row, column
from bokeh.models import ColumnDataSource, Select, Slider
from bokeh.plotting import figure
from bokeh.palettes import Spectral6
from sklearn import cluster, datasets
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from bokeh.models.widgets import Panel, Tabs
# define some helper functions
# set up initial data
n_samples = len(data)
n_clusters = 2
algorithm = 'MiniBatchKMeans'
X=data
#clustering with k-means
def clustering(X, n_clusters):
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)
# Generate the new colors:
model = KMeans(n_clusters=n_clusters)
model.fit(X)
if hasattr(model, 'labels_'):
y_pred = model.labels_.astype(np.int)
else:
y_pred = model.predict(X)
return X, y_pred
#setup initials
y=n_clusters
X, y_pred = clustering(X,n_clusters)
spectral=np.hstack([Spectral6]*20)
colors=[spectral[i] for i in range(y)]
#plotting
plot=figure(toolbar_location=None,title=algorithm)
source=ColumnDataSource(data=dict(x=X[:,0],y=X[:,1],colors=colors))
plot.circle('x','y',fill_color='colors',line_color=None,source=source)
# set up widgets
Class=[
'Comm',
'Cr',
'Ey',
'F ',
'Int ',
]
a_select = Select(value='Comm',
title='Select Class:',
width=200,
options=Class)
# min_Not = Slider(title="min_not ", start=0.000000e+00, end=1.876726e+13, value=0.000000e+00, step=1)
# max_Not = Slider(title="max_not", start=0.000000e+00, end=1.876726e+13, value=1.876726e+13, step=1)
Not_slider = Slider(title="Number of samples",
value=1500.0,
start=0.000000e+00,
end=1.876726e+13,
step=1,
width=400)
clusters_slider = Slider(title="Number of clusters",
value=2.0,
start=2.0,
end=10.0,
step=1,
width=400)
# set up callbacks
# def update_algorithm_or_clusters(attrname, old, new):
# global X
# algorithm = a_select.value
# n_clusters = int(clusters_slider.value)
# X, y_pred = clustering(X, n_clusters)
# colors = [spectral[i] for i in range(y_pred)
# source.data= dict(colors=colors, x=X[:, 0], y=X[:, 1])
# plot.title.text = algorithm
# set up layout
selects = row(a_select, width=200)
inputs = column(selects, widgetbox(Not_slider, clusters_slider))
# add to document
curdoc().add_root(row(inputs, plot))
curdoc().title = "Clustering"
and this is warning message , how could solve the problem ?
BokehUserWarning: ColumnDataSource's columns must be of the same length
lambda: warnings.warn("ColumnDataSource's columns must be of the same length", BokehUserWarning))

Related

Pydeck HexLayer min and log scale

Let's consider this HexLayer example using PyDeck in StreamLit:
import numpy as np
import pandas as pd
import pydeck as pdk
import streamlit as st
lat0=40.7
lon0=-74.1201062
n_points = 1000
lat = np.random.normal(loc=lat0, scale=0.02, size=n_points)
lon = np.random.normal(loc=lon0, scale=0.02, size=n_points)
data = pd.DataFrame({'lat': lat, 'lon': lon})
st.pydeck_chart(pdk.Deck(
map_provider="mapbox",
initial_view_state=pdk.ViewState(
latitude=lat0,
longitude=lon0,
zoom=10,
),
layers=[
pdk.Layer(
'HexagonLayer',
data=data,
get_position='[lon, lat]',
radius=1000,
coverage=0.6,
),
],
))
Here's the output:
Is there a way to only display the hexagonal bis with a count above a given threshold, say counts>5?
Similarly, is it possible to set a logarithmic scale for the color/height of the hexagons?

How to plot a 3D line using plotly.graph_objects?

I want to draw a line between two points in a 3D plot. But, I can not use plotly.express for some reasons as suggested in the documentation. https://plotly.com/python/3d-line-plots/
Is it possible to draw a line in a 3D plot using the plotly.graph_objects module ? If so, how ?
I found this solution:
plotly.express solution:
import plotly.express as px
import numpy as np
x = np.array([0,1])
y = np.array([1,1])
z = np.array([1,1])
df = pd.DataFrame({"x": x, "y":y, "z":z})
fig = px.line_3d(df, x="x", y="y", z="z")
fig.show()
output :
plotly.graph_objects version :
import plotly.graph_objs as go
import numpy as np
x = np.array([0,1])
y = np.array([1,1])
z = np.array([1,1])
fig = go.Figure(data=go.Scatter3d(x=x, y=y,z=z, mode='lines'))
fig.show()
output 2:

plotting interactive Dendrogram with an unequal Heatmap using plotly python in jupyter notebook

I'm trying to plot an interactive dendrogram with an unequal heatmap on jupyter notebook using Plotly package that would look like this one.
My example data called dataHeat_arr is numpy.ndarray, which has 75 rows (samples called S0 till S74) and 100 columns (Metabolites called M0 till M99) and available in the link.
In the link there are 3 csv files.
dataHeat_arr.csv - the numpy.ndarray 75*100
name_molec.csv - list of metbolite named M0, M1,... till M99
Samplenum.csv - list of samples named S0, S1,... till S74
I based my code on the example from her and made some changes since my the heatmat is unequale.
Also tried questoins Plotly clustered heatmap (with dendrogram)/Python
I do not know what I'm doning worng by the finale figure is missig the heat map.
only when ploting with equale heatmap I manage to plot the heatmap with the dendrogram.
her is my code.
Import packges:
import plotly.figure_factory as ff
import numpy as np
np.random.seed(1)
import pandas as pd
import numpy as np
import string
from itables import init_notebook_mode
from itables import show
import cimcb_lite as cb
import plotly.graph_objects as go
init_notebook_mode(all_interactive=True)
Initialize figure by creating upper dendrogram:
# name the samples S0 till S74
# Samplenum & name_molec are csv files in the link of Github and should be treated as lists
labels = Samplenum
dataHeat_arr_t= np.transpose(dataHeat_arr)
# Initialize figure by creating upper dendrogram
fig = ff.create_dendrogram(dataHeat_arr, orientation='bottom', labels=labels)
# fig = ff.create_dendrogram(dataHeat_arr_t, orientation='bottom', labels=name_molec[:100] ) ,labels=name_molec[:100]
for i in range(len(fig['data'])):
fig['data'][i]['yaxis'] = 'y2'
Then Create Side Dendrogram:
# Create Side Dendrogram
dendro_side = ff.create_dendrogram(dataHeat_arr_t, orientation='right' ,labels=name_molec[:100])
# dendro_side = ff.create_dendrogram(dataHeat_arr, orientation='right', labels=labels)
for i in range(len(dendro_side['data'])):
dendro_side['data'][i]['xaxis'] = 'x2'
# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
fig.add_trace(data)
Create Heatmap:
heatmap = [
go.Heatmap(
x = name_molec[:100],
y =labels ,
z = dataHeat_arr,
colorscale = 'Cividis'
)
]
Add Heatmap Data to Figure:
for data in heatmap:
fig.add_trace(data)
Now layout:
# Edit Layout
fig.update_layout({'width':1500, 'height':750,
'showlegend':False, 'hovermode': 'closest',
})
# Edit xaxis
fig.update_layout(xaxis={'domain': [.15, 1],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'ticks':""})
# Edit xaxis2
fig.update_layout(xaxis2={'domain': [0, .15],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""})
# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .85],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks': ""
})
# Edit yaxis2
fig.update_layout(yaxis2={'domain':[0.852, 0.975],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""})
fig.show()
for some reason the plot looks like that:
Any idea/ hint will be usfull and appriciated!
The correct code to plot heatmap with dendrogram on both y-axis an x-aixs:
Initialize figure by creating upper dendrogram:
Samplenum = ["S" + str(x) for x in idx]
labels = Samplenum
dataHeat_arr_t= np.transpose(dataHeat_arr)
# Initialize figure by creating upper dendrogram
# The line below was changed comparing to my question
fig = ff.create_dendrogram(dataHeat_arr_t, orientation='bottom', labels=name_molec[:99] )
for i in range(len(fig['data'])):
fig['data'][i]['yaxis'] = 'y2'
Create Side Dendrogram
# The line below was changed as well comparing to my question
dendro_side = ff.create_dendrogram(dataHeat_arr, orientation='right', labels=["S" + str(x) for x in idx])
for i in range(len(dendro_side['data'])):
dendro_side['data'][i]['xaxis'] = 'x2'
# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
fig.add_trace(data)
Create Heatmap:
heatmap = [
go.Heatmap(
x = name_molec[:99],
y =labels ,
z = dataHeat_arr,
colorscale = 'Cividis'
)
]
These 4 lines are new ( were not in my question):
heatmap[0]['x'] = fig['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']
# to tickes text on y-axis as well
fig['layout']['yaxis']['ticktext'] = np.asarray(labels)
fig['layout']['yaxis']['tickvals'] = np.asarray(dendro_side['layout']['yaxis']['tickvals'])
Most of the layout code stays the same but :showticklabels': True
# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .7],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': True,
'ticks': ""
})
The output looks more like the picture at beginning of my question.

R equivalent python code gives different output

I am trying to write R equivalent code in python but not getting the same result. The R code is as follows:
# Set parameters
max.people = 50
max.trials = 500
plot.step = 1
# load libraries
library(tidyverse)
#Set up an initial data frame
df<-data.frame("trial"=NA,"people"=NA, "val"=NA)
# Set up a common theme for plots
ztheme<-function(){
theme_classic()+
theme(panel.background=element_rect(fill="#F0F0F0", color="#F0F0F0"))+
theme(plot.background=element_rect(fill="#F0F0F0", color="#F0F0F0"))}
#Run main loop
for(trial in 1:max.trials){
# set up a buffer. Makes the program run a lot faster.
buff<-data.frame("trial"=NA,"people"=NA, "val"=NA)
for(people in 1:max.people){
buff<-rbind(buff,data.frame("trial"=trial,"people"=people, "val"=NA))
samp<-sample(1:365, people, replace=T)
if(length(unique(samp))==length(samp)){
buff$val[nrow(buff)]<-0
}else{
buff$val[nrow(buff)]<-1
}; rm(samp)}
df<-rbind(df, buff); rm(buff)
print(paste(round(trial/(max.trials)*100, 2), "% Complete", sep=""))
}
df<-subset(df, !is.na(df$trial))
rm(max.people); rm(people); rm(trial)
# Generate multiple plots of result
for(n in seq(plot.step,max.trials,plot.step)){
print(
ggplot(summarise(group_by(subset(df, trial<=n), people), prob=mean(val)), aes(people, prob))+
geom_bar(stat="identity", fill="steelblue1")+
geom_smooth(se=F, color="black", method="loess")+
scale_y_continuous(labels=scales::percent, limits=c(0,1))+
labs(title="Birthday Paradox",
subtitle=paste("Based on",n,"simulations."),
x="Number of People in Room",
y="One or More Matching Birthdays (True/False Ratio)",
caption="created by /u/zonination")+
ztheme())
ggsave(paste("bday_", formatC(n,width=5,flag = "0"), ".png", sep=""), height=4.5, width=7, dpi=120, type="cairo-png")
}; rm(n)
I have written equivalent code in python as follows:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
plt.style.use('ggplot')
maxTrials = 500
maxPeople = 50
plotStep = 1
df = pd.DataFrame(columns=['trial','people','val'])
for trial in range(plotStep, maxTrials+1):
buff = pd.DataFrame()
for people in range(plotStep,maxPeople+1):
buff = buff.append(pd.DataFrame({'trial':[trial],'people':[people],'val':[np.nan]}), ignore_index=True)
samp = [random.randint(1,366) for x in range(people)]
if len(set(samp)) == len(samp):
buff.at[len(buff.index)-1,'val'] = 0
else:
buff.at[len(buff.index)-1,'val'] = 1
del(samp)
df = df.append(buff, ignore_index=True)
del(buff)
print(str(round(trial/(maxTrials)*100, 2)) + "% Complete")
df = df.dropna(axis=0, how='any')
del(maxPeople)
del(people)
del(trial)
for n in range(plotStep,5):
dfCopy = df.loc[df.trial<=n]
dfCopy = dfCopy.groupby(['people'])['val'].mean().to_frame(name='prob').reset_index()
print(dfCopy)
plt.bar(dfCopy['people'],
dfCopy['prob'],
color='blue',
edgecolor='none',
width=0.5,
align='center')
plt.suptitle("Birthday Paradox\n")
plt.title("Based on "+str(n)+" simulations.")
plt.yticks([0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
plt.xlabel("Number of people in room")
plt.ylabel("Probability of one or more matching birthdays")
plt.savefig("bday_"+str(n)+".png", dpi=110, bbox_inches='tight')
A few initial saved plot from R look like this but python output looks like this I want to know if this is because of rounding error of some sort.
The code is just fine but you don't clear your axes, so it will add every run without clearing the last.
Adding plt.cla() after plt.savefig(...) will make it look much like the R output

How to make `Heatmaps` in `Bokeh` with a continuous color map, using Python 3?

I was trying to replicate this style of HeatMap that maps continuous values to a LinearColorMapper instance: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I wanted to make a HeatMap (w/ either charts or rect) and then add a single selection widget to select the obsv_id and then a slider widget to go through the dates.
However, I was having trouble in the beginning with the HeatMap itself with a single obsv_id/date pair. What am I doing wrong in creating this HeatMap? This would essentially be a 3x3 rectangle plot of the size variable and the loc variable.
Bonus: Can you help me/give some advice on how to wire the output of these widgets to control the plot?
I saw these posts but all of the examples use actual hex colors as a list instead of mapping using a continuous measure:
python bokeh, how to make a correlation plot? http://docs.bokeh.org/en/latest/docs/gallery/categorical.html
# Init
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_notebook, output_file, reset_output, show, ColumnDataSource
from bokeh.models import LinearColorMapper
reset_output()
output_notebook()
np.random.seed(0)
# Coords
dates = ["07-3","07-11","08-6","08-28"]
#locs = ["air","water","earth"]
locs = [0,1,2]
size = [3.0, 0.2, 0.025]
observations = ["obsv_%d"%_ for _ in range(10)]
# Data
Ar_tmp = np.zeros(( len(dates)*len(locs)*len(size)*len(observations), 5 ), dtype=object)
i = 0
for date in dates:
for loc in locs:
for s in size:
for obsv_id in observations:
Ar_tmp[i,:] = np.array([obsv_id, date, loc, s, np.random.random()])
i += 1
DF_tmp = pd.DataFrame(Ar_tmp, columns=["obsv_id", "date", "loc", "size", "value"])
DF_tmp["value"] = DF_tmp["value"].astype(float)
DF_tmp["size"] = DF_tmp["size"].astype(float)
DF_tmp["loc"] = DF_tmp["loc"].astype(float)
# obsv_id date loc size value
# 0 obsv_0 07-3 air 3.0 0.548814
# 1 obsv_1 07-3 air 3.0 0.715189
# 2 obsv_2 07-3 air 3.0 0.602763
# 3 obsv_3 07-3 air 3.0 0.544883
# 4 obsv_4 07-3 air 3.0 0.423655
mapper = LinearColorMapper(low = DF_tmp["value"].min(), high = DF_tmp["value"].max())
# # Create Heatmap of a single observation and date pair
query_idx = set(DF_tmp.index[DF_tmp["obsv_id"] == "obsv_0"]) & set(DF_tmp.index[DF_tmp["date"] == "08-28"])
# p = HeatMap(data=DF_tmp.loc[query_idx,:], x="loc", y="size", values="value")
p = figure()
p.rect(x="loc", y="size",
source=ColumnDataSource(DF_tmp.loc[query_idx,:]),
fill_color={'field': 'value', 'transform': mapper},
line_color=None)
show(p)
My Error:
# Javascript error adding output!
# TypeError: Cannot read property 'length' of null
# See your browser Javascript console for more details.
You have to provide a palette to LinearColorMapper. For example:
mapper = LinearColorMapper(
palette='Magma256',
low=DF_tmp["value"].min(),
high=DF_tmp["value"].max()
)
From the LinearColorMapper doc:
class LinearColorMapper(palette=None, **kwargs)
Map numbers in a range [low, high] linearly into a sequence of colors (a palette).
Not related to your exception, but you'll also need to pass a width and height parameters to p.rect().

Resources