Changing axis order during frame animation in plotly - r

I want the y-axis in the plot to order by pop every year when the frames move forward. So when Africa overtakes Americas the axis should change accordingly.
library(plotly)
library(tidyverse)
library(gapminder)
pop_df <- gapminder %>%
group_by(continent, year) %>%
summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop")
pop_df %>%
mutate(continent = reorder(continent, pop)) %>%
plot_ly(y=~continent, x=~pop, frame = ~year) %>%
add_bars(color = ~ continent, text = ~formatC(pop/1000000, format = "f", digits = 1), textposition = "outside", showlegend =FALSE)

Related

How to reorder the plot by factors in ggplot?

I am trying to reorder the geom_col plot by one of the factors pct_female_vacc used below in plot of the variable pct_vacc_GenderType.
df
library(tidyverse)
library(lubridate)
library(scales)
library(gganimate)
file_url1 <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/cowin_vaccine_data_statewise.csv"
df_vaccination <- read.csv(url(file_url1))
df_vaccination <- df_vaccination %>%
mutate(Updated.On = as.Date(Updated.On))
plot
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()
I am looking to get above plot to be reordered by red color i.e pct_female_vacc factor.
Unable to use reorder_within as I have not used facet_wrap here. Also tried fct_reorder but may be I am not doing it right or even that doesn't work in this case.
What you want to do is simple with forcats::fct_reorder. The only thing you have to be cautious about is that you need to set the factor before pivot_longer. Here you go:
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On) - 1) %>% # the newest date contains only NAs, so I use the second oldest
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
mutate(State = forcats::fct_reorder(State, pct_female_vacc)) %>% # since you pivot longer in the next step you have to order your factors here
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
filter(!is.na(pct_vacc)) %>% # use this instead of na.omit() to remove NAs
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col() +
theme(legend.position = "bottom") # I moved the legend to the bottom so it looks better on for stackoverflow
Created on 2021-05-16 by the reprex package (v2.0.0)
arrange the data by pct_female_vacc and change the State to factor based on appearance.
library(tidyverse)
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
mutate(pct_female_vacc = `Female.Individuals.Vaccinated.`/Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated) %>%
arrange(pct_female_vacc) %>%
mutate(State = factor(State, unique(State))) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()

How can we data wrangling to obtain shown ratio/proportion chart shown

Goal is to produce a visualization indicating ratio.
Please help us how can we produce such ratio chart (high lighted) in R ?
library(tidyverse)
# Dataset creation
df <- data.frame(cls = c(rep("A",4),rep("B",4)),
grd = c("A1",rep("A2",3),rep(c("B1","B2"), 2)),
typ = c(rep("m",2),rep("o",2),"m","n",rep("p",2)),
pnts = c(rep(1:4,2)))
df
#### Data wrangling
df1 <- df %>%
group_by(cls) %>%
summarise(cls_pct = sum(pnts))
df1
df2 <- df %>%
group_by(cls,grd) %>%
summarize(grd_pct = sum(pnts))
df2
df3 <- df %>%
group_by(cls,grd,typ) %>%
summarise(typ_pct = sum(pnts))
df3
#### Attempt to combine all df1,df2,df3
# but mutate and summarise are mixing up leading to wrong results
df3 %>%
group_by(cls,grd) %>%
mutate(grd_pct = sum(typ_pct)) %>%
group_by(cls) %>%
mutate(cls_pct = sum(grd_pct))
Attempt to visualize all the ratios in 1 chart
data %>%
pivot_longer(cols = -c(cls:pnts),
names_to = "per_cat",
values_to = "percent") %>%
ggplot(aes(cls,percent, col = typ, fill = grd)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
plot of the same.
EDIT -- added formula version with more useful output for visualization.
ORIG: At this point it may be worth making a function to reduce copying and pasting, but this may get you what you need:
library(tidyverse)
df %>%
group_by(cls) %>%
mutate(per1 = sum(pnts),
per1_pct = per1 / sum(per1)) %>%
group_by(cls, grd) %>%
mutate(per2 = sum(pnts),
per2_pct = per2 / sum(per2)) %>%
group_by(cls, grd, typ) %>%
mutate(per3 = sum(pnts),
per3_pct = per3 / sum(per3)) %>%
ungroup()
EDIT: Here's a general function to calculate the stats for a given grouping, making it easier to combine a few groupings together in long format better suited for visualization.
df_sum <- function(df, level, ...) {
df %>%
group_by(...) %>%
summarize(grp_ttl = sum(pnts)) %>%
mutate(ttl = sum(grp_ttl),
pct = grp_ttl / ttl) %>%
ungroup() %>%
mutate(level = {{ level }} )
}
df_sum(df, level = 1, cls) %>%
bind_rows(df_sum(df, level = 2, cls, grd)) %>%
bind_rows(df_sum(df, level = 3, cls, grd, typ)) %>%
mutate(label = coalesce(as.character(typ), # This grabs the first non-NA
as.character(grd),
as.character(cls))) -> df_summed
df_summed %>%
ggplot(aes(level, grp_ttl)) +
geom_col(color = "white") +
geom_text(aes(label = paste0(label, "\n", grp_ttl, "/", ttl)),
color = "white",
position = position_stack(vjust = 0.5)) +
scale_x_reverse() + # To make level 1 at the top
coord_flip() # To switch from vertical to horizontal orientation

Plotly: handling missing values with animated chart

I found out that when using animated plotly chart you need to have the same number of observations for each of your factors. Meaning -> one missing observation results in whole trace being discarded for entire duration of the animated chart. That is especially a problem when you use time-series data and some of your traces start later, or end sooner than others. Is there any workaround beside of imputing null values for the missings? Thanks!
Crossposting from rstudio community
Example:
library(gapminder)
library(plotly)
library(dplyr)
#working example with no missings
gapminder %>%
group_by(year, continent) %>%
summarise(pop = mean(pop), gdpPercap = mean(gdpPercap), lifeExp = mean(lifeExp)) %>%
plot_ly( x = ~gdpPercap,
y = ~lifeExp,
size = ~pop,
color = ~continent,
frame = ~year,
text = ~continent,
hoverinfo = "text",
type = 'scatter',
mode = 'markers')
#filtering one row results in missing Africa trace for entirety of the plot
gapminder %>%
group_by(year, continent) %>%
summarise(pop = mean(pop), gdpPercap = mean(gdpPercap), lifeExp = mean(lifeExp)) %>%
filter(gdpPercap > 1253) %>%
plot_ly( x = ~gdpPercap,
y = ~lifeExp,
size = ~pop,
color = ~continent,
frame = ~year,
text = ~continent,
hoverinfo = "text",
type = 'scatter',
mode = 'markers')
There seems to be no direct way to solve this problem. Indirectly, the problem with NAs in dataframe can be solved by using ggplot + ggplotly instead of plotly (see this answer). Moreover, when there is an incomplete dataset as per my example, instead of NAs in some rows, it can be solved by using complete function from the tidyverse package.
See the solution:
p <-
gapminder %>%
group_by(year, continent) %>%
summarise(pop =
mean(pop), gdpPercap = mean(gdpPercap), lifeExp = mean(lifeExp)) %>%
filter(gdpPercap > 1253) %>%
complete(continent,year) %>%
ggplot(aes(gdpPercap, lifeExp, color = continent)) +
geom_point(aes(frame = year)) + theme_bw()
ggplotly(p)
That being said, I am not a fan of workarounds when used in production, so feel free to inform me about the development in plotly animate function.

Make a boxplot in highchart with a date object in the x axis

I am trying to make a boxplot in highchart to include it in a shiny app, along with another graph I already have.
The problem is that boxplot, as far as I can tell, do not behave like other plots and when you map a date to the x-axis, it is treated as a character string, this mean: the plot display the entire date ex: "2018-04-01" an not Apr'18 like it does in other plots.
Here I put a little reprex of what I have done
# Packages
library(tidyverse)
library(lubridate)
library(highcharter)
library(magrittr)
library(plotly)
# Data
stocks <- data.frame(
time = rep(as.Date('2009-01-01') + month(1:12), times = 10),
stock_price = rnorm(120, 0, 1)
)
# line plot
stocks %>%
group_by(time) %>%
summarise(mean_price = mean(stock_price)) %>%
hchart(.,
type = "line",
hcaes(x = "time",
y = "mean_price"))
# Box plot first try
# hchart boxplot
stocks %$%
hcboxplot(x = stock_price, time) %>%
hc_chart(type = "column")
After doing this first try, I try to create an abbreviated date and map it to the x-axis as follows, but the boxes are shown ordered alphabetically not chronologically
# hchart boxplot
stocks %>%
mutate(month = month(time, label = T),
year = str_extract(as.character(year(time)), "..$"),
time2 = paste(month, year, sep = "'")) %$%
hcboxplot(x = stock_price, time2) %>%
hc_chart(type = "column")
My desired output is a plot with x-axis like the line plot or like plotly's output
stocks %>%
group_by(time) %>%
plot_ly(x = ~time, y = ~stock_price, type = "box")
With the help of arrange() and fct_inorder(), I believe I've achieved your desired outcome:
stocks %>%
arrange(time) %>%
mutate(
month = month(time, label = T),
year = str_extract(as.character(year(time)), "..$"),
time2 = fct_inorder(paste(month, year, sep = "'"))
) %$%
hcboxplot(x = stock_price, time2) %>%
hc_chart(type = "column")

How to plot arrest rate (%) for the top 20 crime types (crimes of chicago dataset)?

I am working with R in RStudio and would like to plot via highchart package a graphic that includes on the x-Axis the crime type, and on the y-Axis the arrest rate in %. So to see on which crime type the highest arrest was made. I am working with following code in shiny, which is working but not ploting what exactly I want:
output$top20arrestCrime <- renderHighchart({
arrestCrimeAnalysis <- cc %>%
group_by(Primary.Type, Arrest == TRUE) %>%
summarise(Total = n()) %>%
arrange(desc(Total))
hchart(arrestCrimeAnalysis, "column", hcaes(x = Primary.Type, y = Total, color = Total)) %>%
hc_exporting(enabled = TRUE, filename = "Top_20_Locations") %>%
hc_title(text = "Top 20 Crime Types") %>%
hc_subtitle(text = "(2001 - 2016)") %>%
hc_xAxis(title = list(text = "Crime Type"), labels = list(rotation = -90)) %>%
hc_yAxis(title = list(text = "Arrest Rate %")) %>%
hc_colorAxis(stops = color_stops(n = 10, colors = c("#d98880", "#85c1e9", "#82e0aa"))) %>%
hc_add_theme(hc_theme_smpl()) %>%
hc_legend(enabled = FALSE)
})
I am working with this dataset: https://www.kaggle.com/currie32/crimes-in-chicago.
when I run the code, it just show me on the x-Axis the crime type (e.g. THEFT, ROBERRY) etc, which is correct and on the y-Axis the sum of thefts for example from 2001-2016. But I want on the y-Axis the Arrest Rate in percentage, so how many arrests happened. and this in a highcharter with the top 20 arrests crime types.
Example screenshot of Shiny app
Your problem is you haven't told highcharter to put Arrest Rate on the y axis. You've told it to put Total on the y axis:
arrestCrimeAnalysis <- cc %>%
group_by(Primary.Type, Arrest == TRUE) %>%
summarise(Total = n()) %>%
arrange(desc(Total))
hchart(arrestCrimeAnalysis, "column", hcaes(x = Primary.Type, y = Total, color = Total))
Change y = Total to y = ArrestRate or whatever your rate column name is.

Resources