Plotly choropleth map in jupyter notebooks not showing color - jupyter-notebook

Trying to make a choropleth map in plotly using some data I have in a csv file. Have created This is what i get in result(my map)
Below are the coding that I have did to the work:
import json
import pandas as pd
import plotly.express as px
asean_country = json.load(open("aseancovidmap.geojson","r"))
df= pd.read_csv("covidcases.csv")
df["iso-2"]=df['Country'].apply(lambda x: id_map[x])
id_map={}
for feature in asean_country['features']:
feature['id']= feature['properties']['sform']
id_map[feature['properties']['name']]=feature['id']
figure=px.choropleth(df,locations='iso-2',locationmode='country names',geojson=asean_country,color='Ttlcases',scope='asia',title='Total COVID 19 cases in ASEAN Countries as on 10/1/2022')
figure.show()

clearly I don't have access to your files, so have sourced geometry and COVID data. For reference this is at end of answer.
the key change I have made. *Don't loop over geojson Define locations as column in dataframe and featureidkey
clearly this is coloring countries
solution
import json
import pandas as pd
import plotly.express as px
# asean_country = json.load(open("aseancovidmap.geojson","r"))
asean_country = gdf_asean.rename(columns={"adm0_a3": "iso_a2"}).__geo_interface__
# df= pd.read_csv("covidcases.csv")
df = gdf_asean_cases.loc[:, ["iso_code", "adm0_a3", "total_cases", "date"]].rename(
columns={"iso_code": "iso_a2", "total_cases": "Ttlcases"}
)
figure = px.choropleth(
df,
locations="iso_a2",
featureidkey="properties.iso_a2",
geojson=asean_country,
color="Ttlcases",
title="Total COVID 19 cases in ASEAN Countries as on 10/1/2022",
).update_geos(fitbounds="locations", visible=True).update_layout(margin={"t":40,"b":0,"l":0,"r":0})
figure.show()
data sourcing
import requests, io
import geopandas as gpd
import pandas as pd
# get asia geometry
gdf = gpd.read_file(
"https://gist.githubusercontent.com/hrbrmstr/94bdd47705d05a50f9cf/raw/0ccc6b926e1aa64448e239ac024f04e518d63954/asia.geojson"
)
# get countries that make up ASEAN
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_ASEAN_countries_by_GDP")[1].loc[1:]
# no geometry for singapore.... just ASEAN geometry
gdf_asean = (
gdf.loc[:, ["admin", "adm0_a3", "geometry"]]
.merge(
df.loc[:, ["Country", "Rank"]], left_on="admin", right_on="Country", how="right"
)
)
# get COVID data
dfall = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv")
# filter to last date in data
dfall["date"] = pd.to_datetime(dfall["date"])
dflatest = dfall.groupby(["iso_code"], as_index=False).last()
# merge geometry and COVID data
gdf_asean_cases = gdf_asean.merge(
dflatest.loc[:, ["iso_code", "total_cases", "date"]], left_on="adm0_a3", right_on="iso_code"
)

Related

Pydeck HexLayer min and log scale

Let's consider this HexLayer example using PyDeck in StreamLit:
import numpy as np
import pandas as pd
import pydeck as pdk
import streamlit as st
lat0=40.7
lon0=-74.1201062
n_points = 1000
lat = np.random.normal(loc=lat0, scale=0.02, size=n_points)
lon = np.random.normal(loc=lon0, scale=0.02, size=n_points)
data = pd.DataFrame({'lat': lat, 'lon': lon})
st.pydeck_chart(pdk.Deck(
map_provider="mapbox",
initial_view_state=pdk.ViewState(
latitude=lat0,
longitude=lon0,
zoom=10,
),
layers=[
pdk.Layer(
'HexagonLayer',
data=data,
get_position='[lon, lat]',
radius=1000,
coverage=0.6,
),
],
))
Here's the output:
Is there a way to only display the hexagonal bis with a count above a given threshold, say counts>5?
Similarly, is it possible to set a logarithmic scale for the color/height of the hexagons?

rpy2 does not convert back to pandas

I have an R object that will not convert to Pandas, and the strange part is that it doesn't throw an error.
Updated with the code I'm using, sorry not to supply that up front -- and to miss the request for 2 weeks!
Python code that calls an R script
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
import datetime
from rpy2.robjects.conversion import localconverter
def serial_date_to_string(srl_no):
new_date = datetime.datetime(1970,1,1,0,0) + datetime.timedelta(srl_no - 1)
return new_date.strftime("%Y-%m-%d")
jurisdiction='TX'
r=ro.r
r_df=r['source']('farrington.R')
with localconverter(ro.default_converter + pandas2ri.converter):
pd_from_r_df = ro.conversion.rpy2py(r_df)
The issue is that pd_from_r_df returns an R object rather than a Pandas dataframe:
>>> pd_from_r_df
R object with classes: ('list',) mapped to:
[ListSexpVector, BoolSexpVector]
value: <class 'rpy2.rinterface.ListSexpVector'>
<rpy2.rinterface.ListSexpVector object at 0x7faa4c4eff08> [RTYPES.VECSXP]
visible: <class 'rpy2.rinterface.BoolSexpVector'>
<rpy2.rinterface.BoolSexpVector object at 0x7faa4c4e7948> [RTYPES.LGLSXP]
Here's the R script "farrington.R", which returns a surveillance time series, which ro.conversion.rpy2py isn't (as used above) converting to a pandas dataframe
library('surveillance')
library(readr)
library(tidyr)
library(dplyr)
w<-1
b<-3
nfreq<-52
steps_back<- 28
alpha<-0.05
counts <- read_csv("Weekly_counts_of_death_by_jurisdiction_and_cause_of_death.csv")
counts<-counts[,!colnames(counts) %in% c('Cause Subgroup','Time Period','Suppress','Note','Average Number of Deaths in Time Period','Difference from 2015-2019 to 2020','Percent Difference from 2015-2019 to 2020')]
wide_counts_by_cause<-pivot_wider(counts,names_from='Cause Group',values_from='Number of Deaths',values_fn=(`Cause Group`=sum))
wide_state <- filter(wide_counts_by_cause,`State Abbreviation`==jurisdiction)
wide_state <- filter(wide_state,Type=='Unweighted')
wide_state[is.na(wide_state)] <-0
important_columns=c('Alzheimer disease and dementia','Cerebrovascular diseases','Heart failure','Hypertensive dieases','Ischemic heart disease','Other diseases of the circulatory system','Malignant neoplasms','Diabetes','Renal failure','Sepsis','Chronic lower respiratory disease','Influenza and pneumonia','Other diseases of the respiratory system','Residual (all other natural causes)')
all_columns <- append(c('Year','Week'),important_columns)
selected_wide_state<-wide_state[, names(wide_state) %in% all_columns]
start<-c(as.numeric(min(selected_wide_state[,'Year'])),as.numeric(min(selected_wide_state[,'Week'])))
freq<-as.numeric(max(selected_wide_state[,'Week']))
sts <- new("sts",epoch=1:nrow(numeric_wide_state),start=start,freq=freq,observed=numeric_wide_state)
sts_4 <- aggregate(sts[,important_columns],nfreq=nfreq)
start_idx=end_idx-steps_back
cntrlFar <- list(range=start_idx:end_idx,w==w,b==b,alpha==alpha)
surveil_ts_4_far <- farrington(sts_4,control=cntrlFar)
far_df<-tidy.sts(surveil_ts_4_far)
far_df
(using the NCHS data here [from a couple months back] https://data.cdc.gov/NCHS/Weekly-counts-of-death-by-jurisdiction-and-cause-o/u6jv-9ijr/ )
In R, when calling source() by default on a script without named functions, the returned object is a list of two named components, $value and $visible, where:
$value is the last displayed or defined object which in your case is the far_df data frame (which in R data.frame is a class object extending list type);
$visible is a boolean vector indicating if last object was displayed or not which in your case is TRUE. This would be FALSE had you ended script at far_df <- tidy.sts(surveil_ts_4_far).
In fact, your Python error confirms this output indicatating a list of [ListSexpVector, BoolSexpVector].
Therefore, since you only want the first item, index for first item accordingly by number or name.
r_raw = ro.r['source']('farrington.R') # IN R: r_raw <- source('farrington.R')
r_df = r_raw[0] # IN R: r_df <- r_raw[1]
r_df = r_raw[r_raw.names.index('value')] # IN R: r_df <- r_raw$value
with localconverter(ro.default_converter + pandas2ri.converter):
pd_from_r_df = ro.conversion.rpy2py(r_df)

How to replicate the Column with data set in table

I want to convert question columns into a row.using Python Pandas like below
import pandas as pd
from openpyxl import load_workbook
df = pd.read_excel (r'file' ,sheet_name='results')
d = {'Score.1':'Score','Score.2':'Score','Duration.1':'Duration','Duration.2':'Duration'}
melted=pd.melt(df, id_vars=['userid','Candidate','Score','Duration'], value_vars=['Question 1'],var_name='myVarname', value_name='myValname')
melted1=pd.melt(df, id_vars=['userid','Candidate','Score.1','Duration.1'], value_vars=['Question 2'],var_name='myVarname', value_name='myValname').rename(columns=d)
melted2=pd.melt(df, id_vars=['userid','Candidate','Score.2','Duration.2'], value_vars=['Question 3 '],var_name='myVarname', value_name='myValname').rename(columns=d)
......
melted2=pd.melt(df, id_vars=['userid','Candidate','Score.25','Duration.25'], value_vars=['Question 25 '],var_name='myVarname', value_name='myValname').rename(columns=d)
meltedfinal=[melted,melted1,melted2]
result = pd.concat(meltedfinal)
result.to_excel(r'file') # doctest: +SKIP

IndexError while plotting netCDF data using Basemap, matplotlib and contour command

I am trying to plot variables of pressure and wind velocities from the netcdf file of MERRA re-analysis data at a single level. I'm using the basemap module for the plotting and other necessary packages to get the data. Unfortunately, I end up with an error.
Here is my code:
from netCDF4 import Dataset as NetCDFFile
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap
import os
import netCDF4
from netCDF4 import Dataset
from osgeo import gdal
os.chdir('F:\Atmospheric rivers\MERRA')
dirrec=str('F:\Atmospheric rivers\MERRA')
ncfile='MERRA2_400.inst6_3d_ana_Np.20180801.SUB.nc'
file=Dataset(ncfile,'r',format='NETCDF4')
lon=file.variables['lon'][:]
lat=file.variables['lat'][:]
time=file.variables['time'][:]
u=file.variables['U'][:]
v=file.variables['V'][:]
p=file.variables['PS'][:]
q=file.variables['QV'][:]
map = Basemap(projection='merc',llcrnrlon=70.,llcrnrlat=8.,urcrnrlon=90.,urcrnrlat=20.,resolution='i')
map.drawcoastlines()
map.drawstates()
map.drawcountries()
#map.drawlsmask(land_color='Linen', ocean_color='#CCFFFF', resolution ='i') # can use HTML names or codes for colors
#map.drawcounties()
parallels = np.arange(0,50,5.) # make latitude lines ever 5 degrees from 30N-50N
meridians = np.arange(-95,-70,5.) # make longitude lines every 5 degrees from 95W to 70W
map.drawparallels(parallels,labels=[1,0,0,0],fontsize=10)
map.drawmeridians(meridians,labels=[0,0,0,1],fontsize=10)
x,y= np.meshgrid(lon-180,lat) # for this dataset, longitude is 0 through 360, so you need to subtract 180 to properly display on map
#x,y = map(lons,lats)
clevs = np.arange(960,1040,4)
cs = map.contour(x,y,p[0,:,:]/100,clevs,colors='blue',linewidths=1.)
plt.show()
plt.savefig('Pressure.png')
Error
%run "F:/Atmospheric rivers/MERRA/aosc.py"
C:\Users\Pavilion\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\mpl_toolkits\basemap\__init__.py:3505: MatplotlibDeprecationWarning: The ishold function was deprecated in version 2.0.
b = ax.ishold()
C:\Users\Pavilion\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\mpl_toolkits\basemap\__init__.py:3570: MatplotlibDeprecationWarning: axes.hold is deprecated.
See the API Changes document (http://matplotlib.org/api/api_changes.html)
for more details.
ax.hold(b)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
F:\Atmospheric rivers\MERRA\aosc.py in <module>()
36 #x,y = map(lons,lats)
37 clevs = np.arange(960,1040,4)
---> 38 cs = map.contour(x,y,p[0,:,:]/100,clevs,colors='blue',linewidths=1.)
39
40
C:\Users\Pavilion\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\mpl_toolkits\basemap\__init__.py in with_transform(self, x, y, data, *args, **kwargs)
519 # convert lat/lon coords to map projection coords.
520 x, y = self(x,y)
--> 521 return plotfunc(self,x,y,data,*args,**kwargs)
522 return with_transform
523
C:\Users\Pavilion\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\mpl_toolkits\basemap\__init__.py in contour(self, x, y, data, *args, **kwargs)
3540 # only do this check for global projections.
3541 if self.projection in _cylproj + _pseudocyl:
-> 3542 xx = x[x.shape[0]/2,:]
3543 condition = (xx >= self.xmin) & (xx <= self.xmax)
3544 xl = xx.compress(condition).tolist()
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
So, in summary the error turns out to be
IndexError: only integers, slices (:), ellipsis (...), numpy.newaxis (None) and integer or boolean arrays are valid indices
How can I fix this error?
I guess you are not using correct values for the x and y to make the plot. Basemap contour expects all x,y and z to be 2D arrays. In your case, x and y are most likely vectors of longitude and latitude values. In addtion, you should convert the longitude and latitude to the figure coordinates based on the map coordinates/projection.
Can you try:
lons,lats = np.meshgrid(lon-180.,lat);
x,y = map(lons,lats)
cs = map.contour(x,y,p[0,:,:]/100,clevs,colors='blue',linewidths=1.)
The second line should convert your longitude and latitude values for the correct x and y values for your selected projection.

How to make `Heatmaps` in `Bokeh` with a continuous color map, using Python 3?

I was trying to replicate this style of HeatMap that maps continuous values to a LinearColorMapper instance: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I wanted to make a HeatMap (w/ either charts or rect) and then add a single selection widget to select the obsv_id and then a slider widget to go through the dates.
However, I was having trouble in the beginning with the HeatMap itself with a single obsv_id/date pair. What am I doing wrong in creating this HeatMap? This would essentially be a 3x3 rectangle plot of the size variable and the loc variable.
Bonus: Can you help me/give some advice on how to wire the output of these widgets to control the plot?
I saw these posts but all of the examples use actual hex colors as a list instead of mapping using a continuous measure:
python bokeh, how to make a correlation plot? http://docs.bokeh.org/en/latest/docs/gallery/categorical.html
# Init
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_notebook, output_file, reset_output, show, ColumnDataSource
from bokeh.models import LinearColorMapper
reset_output()
output_notebook()
np.random.seed(0)
# Coords
dates = ["07-3","07-11","08-6","08-28"]
#locs = ["air","water","earth"]
locs = [0,1,2]
size = [3.0, 0.2, 0.025]
observations = ["obsv_%d"%_ for _ in range(10)]
# Data
Ar_tmp = np.zeros(( len(dates)*len(locs)*len(size)*len(observations), 5 ), dtype=object)
i = 0
for date in dates:
for loc in locs:
for s in size:
for obsv_id in observations:
Ar_tmp[i,:] = np.array([obsv_id, date, loc, s, np.random.random()])
i += 1
DF_tmp = pd.DataFrame(Ar_tmp, columns=["obsv_id", "date", "loc", "size", "value"])
DF_tmp["value"] = DF_tmp["value"].astype(float)
DF_tmp["size"] = DF_tmp["size"].astype(float)
DF_tmp["loc"] = DF_tmp["loc"].astype(float)
# obsv_id date loc size value
# 0 obsv_0 07-3 air 3.0 0.548814
# 1 obsv_1 07-3 air 3.0 0.715189
# 2 obsv_2 07-3 air 3.0 0.602763
# 3 obsv_3 07-3 air 3.0 0.544883
# 4 obsv_4 07-3 air 3.0 0.423655
mapper = LinearColorMapper(low = DF_tmp["value"].min(), high = DF_tmp["value"].max())
# # Create Heatmap of a single observation and date pair
query_idx = set(DF_tmp.index[DF_tmp["obsv_id"] == "obsv_0"]) & set(DF_tmp.index[DF_tmp["date"] == "08-28"])
# p = HeatMap(data=DF_tmp.loc[query_idx,:], x="loc", y="size", values="value")
p = figure()
p.rect(x="loc", y="size",
source=ColumnDataSource(DF_tmp.loc[query_idx,:]),
fill_color={'field': 'value', 'transform': mapper},
line_color=None)
show(p)
My Error:
# Javascript error adding output!
# TypeError: Cannot read property 'length' of null
# See your browser Javascript console for more details.
You have to provide a palette to LinearColorMapper. For example:
mapper = LinearColorMapper(
palette='Magma256',
low=DF_tmp["value"].min(),
high=DF_tmp["value"].max()
)
From the LinearColorMapper doc:
class LinearColorMapper(palette=None, **kwargs)
Map numbers in a range [low, high] linearly into a sequence of colors (a palette).
Not related to your exception, but you'll also need to pass a width and height parameters to p.rect().

Resources