Pydeck HexLayer min and log scale - streamlit

Let's consider this HexLayer example using PyDeck in StreamLit:
import numpy as np
import pandas as pd
import pydeck as pdk
import streamlit as st
lat0=40.7
lon0=-74.1201062
n_points = 1000
lat = np.random.normal(loc=lat0, scale=0.02, size=n_points)
lon = np.random.normal(loc=lon0, scale=0.02, size=n_points)
data = pd.DataFrame({'lat': lat, 'lon': lon})
st.pydeck_chart(pdk.Deck(
map_provider="mapbox",
initial_view_state=pdk.ViewState(
latitude=lat0,
longitude=lon0,
zoom=10,
),
layers=[
pdk.Layer(
'HexagonLayer',
data=data,
get_position='[lon, lat]',
radius=1000,
coverage=0.6,
),
],
))
Here's the output:
Is there a way to only display the hexagonal bis with a count above a given threshold, say counts>5?
Similarly, is it possible to set a logarithmic scale for the color/height of the hexagons?

Related

Plotly choropleth map in jupyter notebooks not showing color

Trying to make a choropleth map in plotly using some data I have in a csv file. Have created This is what i get in result(my map)
Below are the coding that I have did to the work:
import json
import pandas as pd
import plotly.express as px
asean_country = json.load(open("aseancovidmap.geojson","r"))
df= pd.read_csv("covidcases.csv")
df["iso-2"]=df['Country'].apply(lambda x: id_map[x])
id_map={}
for feature in asean_country['features']:
feature['id']= feature['properties']['sform']
id_map[feature['properties']['name']]=feature['id']
figure=px.choropleth(df,locations='iso-2',locationmode='country names',geojson=asean_country,color='Ttlcases',scope='asia',title='Total COVID 19 cases in ASEAN Countries as on 10/1/2022')
figure.show()
clearly I don't have access to your files, so have sourced geometry and COVID data. For reference this is at end of answer.
the key change I have made. *Don't loop over geojson Define locations as column in dataframe and featureidkey
clearly this is coloring countries
solution
import json
import pandas as pd
import plotly.express as px
# asean_country = json.load(open("aseancovidmap.geojson","r"))
asean_country = gdf_asean.rename(columns={"adm0_a3": "iso_a2"}).__geo_interface__
# df= pd.read_csv("covidcases.csv")
df = gdf_asean_cases.loc[:, ["iso_code", "adm0_a3", "total_cases", "date"]].rename(
columns={"iso_code": "iso_a2", "total_cases": "Ttlcases"}
)
figure = px.choropleth(
df,
locations="iso_a2",
featureidkey="properties.iso_a2",
geojson=asean_country,
color="Ttlcases",
title="Total COVID 19 cases in ASEAN Countries as on 10/1/2022",
).update_geos(fitbounds="locations", visible=True).update_layout(margin={"t":40,"b":0,"l":0,"r":0})
figure.show()
data sourcing
import requests, io
import geopandas as gpd
import pandas as pd
# get asia geometry
gdf = gpd.read_file(
"https://gist.githubusercontent.com/hrbrmstr/94bdd47705d05a50f9cf/raw/0ccc6b926e1aa64448e239ac024f04e518d63954/asia.geojson"
)
# get countries that make up ASEAN
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_ASEAN_countries_by_GDP")[1].loc[1:]
# no geometry for singapore.... just ASEAN geometry
gdf_asean = (
gdf.loc[:, ["admin", "adm0_a3", "geometry"]]
.merge(
df.loc[:, ["Country", "Rank"]], left_on="admin", right_on="Country", how="right"
)
)
# get COVID data
dfall = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv")
# filter to last date in data
dfall["date"] = pd.to_datetime(dfall["date"])
dflatest = dfall.groupby(["iso_code"], as_index=False).last()
# merge geometry and COVID data
gdf_asean_cases = gdf_asean.merge(
dflatest.loc[:, ["iso_code", "total_cases", "date"]], left_on="adm0_a3", right_on="iso_code"
)

How to plot a 3D line using plotly.graph_objects?

I want to draw a line between two points in a 3D plot. But, I can not use plotly.express for some reasons as suggested in the documentation. https://plotly.com/python/3d-line-plots/
Is it possible to draw a line in a 3D plot using the plotly.graph_objects module ? If so, how ?
I found this solution:
plotly.express solution:
import plotly.express as px
import numpy as np
x = np.array([0,1])
y = np.array([1,1])
z = np.array([1,1])
df = pd.DataFrame({"x": x, "y":y, "z":z})
fig = px.line_3d(df, x="x", y="y", z="z")
fig.show()
output :
plotly.graph_objects version :
import plotly.graph_objs as go
import numpy as np
x = np.array([0,1])
y = np.array([1,1])
z = np.array([1,1])
fig = go.Figure(data=go.Scatter3d(x=x, y=y,z=z, mode='lines'))
fig.show()
output 2:

How to replicate the Column with data set in table

I want to convert question columns into a row.using Python Pandas like below
import pandas as pd
from openpyxl import load_workbook
df = pd.read_excel (r'file' ,sheet_name='results')
d = {'Score.1':'Score','Score.2':'Score','Duration.1':'Duration','Duration.2':'Duration'}
melted=pd.melt(df, id_vars=['userid','Candidate','Score','Duration'], value_vars=['Question 1'],var_name='myVarname', value_name='myValname')
melted1=pd.melt(df, id_vars=['userid','Candidate','Score.1','Duration.1'], value_vars=['Question 2'],var_name='myVarname', value_name='myValname').rename(columns=d)
melted2=pd.melt(df, id_vars=['userid','Candidate','Score.2','Duration.2'], value_vars=['Question 3 '],var_name='myVarname', value_name='myValname').rename(columns=d)
......
melted2=pd.melt(df, id_vars=['userid','Candidate','Score.25','Duration.25'], value_vars=['Question 25 '],var_name='myVarname', value_name='myValname').rename(columns=d)
meltedfinal=[melted,melted1,melted2]
result = pd.concat(meltedfinal)
result.to_excel(r'file') # doctest: +SKIP

Bokeh initializing error

I'm trying to do clustering and plot with bokeh , I'm new in Bokeh , there is a warning message . this is my code :
#Clustering
import numpy as np
np.random.seed(0)
from bokeh.io import curdoc
from bokeh.layouts import widgetbox, row, column
from bokeh.models import ColumnDataSource, Select, Slider
from bokeh.plotting import figure
from bokeh.palettes import Spectral6
from sklearn import cluster, datasets
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from bokeh.models.widgets import Panel, Tabs
# define some helper functions
# set up initial data
n_samples = len(data)
n_clusters = 2
algorithm = 'MiniBatchKMeans'
X=data
#clustering with k-means
def clustering(X, n_clusters):
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)
# Generate the new colors:
model = KMeans(n_clusters=n_clusters)
model.fit(X)
if hasattr(model, 'labels_'):
y_pred = model.labels_.astype(np.int)
else:
y_pred = model.predict(X)
return X, y_pred
#setup initials
y=n_clusters
X, y_pred = clustering(X,n_clusters)
spectral=np.hstack([Spectral6]*20)
colors=[spectral[i] for i in range(y)]
#plotting
plot=figure(toolbar_location=None,title=algorithm)
source=ColumnDataSource(data=dict(x=X[:,0],y=X[:,1],colors=colors))
plot.circle('x','y',fill_color='colors',line_color=None,source=source)
# set up widgets
Class=[
'Comm',
'Cr',
'Ey',
'F ',
'Int ',
]
a_select = Select(value='Comm',
title='Select Class:',
width=200,
options=Class)
# min_Not = Slider(title="min_not ", start=0.000000e+00, end=1.876726e+13, value=0.000000e+00, step=1)
# max_Not = Slider(title="max_not", start=0.000000e+00, end=1.876726e+13, value=1.876726e+13, step=1)
Not_slider = Slider(title="Number of samples",
value=1500.0,
start=0.000000e+00,
end=1.876726e+13,
step=1,
width=400)
clusters_slider = Slider(title="Number of clusters",
value=2.0,
start=2.0,
end=10.0,
step=1,
width=400)
# set up callbacks
# def update_algorithm_or_clusters(attrname, old, new):
# global X
# algorithm = a_select.value
# n_clusters = int(clusters_slider.value)
# X, y_pred = clustering(X, n_clusters)
# colors = [spectral[i] for i in range(y_pred)
# source.data= dict(colors=colors, x=X[:, 0], y=X[:, 1])
# plot.title.text = algorithm
# set up layout
selects = row(a_select, width=200)
inputs = column(selects, widgetbox(Not_slider, clusters_slider))
# add to document
curdoc().add_root(row(inputs, plot))
curdoc().title = "Clustering"
and this is warning message , how could solve the problem ?
BokehUserWarning: ColumnDataSource's columns must be of the same length
lambda: warnings.warn("ColumnDataSource's columns must be of the same length", BokehUserWarning))

How to make `Heatmaps` in `Bokeh` with a continuous color map, using Python 3?

I was trying to replicate this style of HeatMap that maps continuous values to a LinearColorMapper instance: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I wanted to make a HeatMap (w/ either charts or rect) and then add a single selection widget to select the obsv_id and then a slider widget to go through the dates.
However, I was having trouble in the beginning with the HeatMap itself with a single obsv_id/date pair. What am I doing wrong in creating this HeatMap? This would essentially be a 3x3 rectangle plot of the size variable and the loc variable.
Bonus: Can you help me/give some advice on how to wire the output of these widgets to control the plot?
I saw these posts but all of the examples use actual hex colors as a list instead of mapping using a continuous measure:
python bokeh, how to make a correlation plot? http://docs.bokeh.org/en/latest/docs/gallery/categorical.html
# Init
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_notebook, output_file, reset_output, show, ColumnDataSource
from bokeh.models import LinearColorMapper
reset_output()
output_notebook()
np.random.seed(0)
# Coords
dates = ["07-3","07-11","08-6","08-28"]
#locs = ["air","water","earth"]
locs = [0,1,2]
size = [3.0, 0.2, 0.025]
observations = ["obsv_%d"%_ for _ in range(10)]
# Data
Ar_tmp = np.zeros(( len(dates)*len(locs)*len(size)*len(observations), 5 ), dtype=object)
i = 0
for date in dates:
for loc in locs:
for s in size:
for obsv_id in observations:
Ar_tmp[i,:] = np.array([obsv_id, date, loc, s, np.random.random()])
i += 1
DF_tmp = pd.DataFrame(Ar_tmp, columns=["obsv_id", "date", "loc", "size", "value"])
DF_tmp["value"] = DF_tmp["value"].astype(float)
DF_tmp["size"] = DF_tmp["size"].astype(float)
DF_tmp["loc"] = DF_tmp["loc"].astype(float)
# obsv_id date loc size value
# 0 obsv_0 07-3 air 3.0 0.548814
# 1 obsv_1 07-3 air 3.0 0.715189
# 2 obsv_2 07-3 air 3.0 0.602763
# 3 obsv_3 07-3 air 3.0 0.544883
# 4 obsv_4 07-3 air 3.0 0.423655
mapper = LinearColorMapper(low = DF_tmp["value"].min(), high = DF_tmp["value"].max())
# # Create Heatmap of a single observation and date pair
query_idx = set(DF_tmp.index[DF_tmp["obsv_id"] == "obsv_0"]) & set(DF_tmp.index[DF_tmp["date"] == "08-28"])
# p = HeatMap(data=DF_tmp.loc[query_idx,:], x="loc", y="size", values="value")
p = figure()
p.rect(x="loc", y="size",
source=ColumnDataSource(DF_tmp.loc[query_idx,:]),
fill_color={'field': 'value', 'transform': mapper},
line_color=None)
show(p)
My Error:
# Javascript error adding output!
# TypeError: Cannot read property 'length' of null
# See your browser Javascript console for more details.
You have to provide a palette to LinearColorMapper. For example:
mapper = LinearColorMapper(
palette='Magma256',
low=DF_tmp["value"].min(),
high=DF_tmp["value"].max()
)
From the LinearColorMapper doc:
class LinearColorMapper(palette=None, **kwargs)
Map numbers in a range [low, high] linearly into a sequence of colors (a palette).
Not related to your exception, but you'll also need to pass a width and height parameters to p.rect().

Resources