Second Y Axis In Facet Wrap with Line and Histogram (Tidyverse) - r

Trying to plot total cases of covid19 at the country level with a histogram of daily new cases to show a sustained drop in new cases leads to a 'flattening of the curve' (assuming that is the case).
library(tidyverse)
#clean raw data source
c19 = read_csv("https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv") %>%
mutate(Cases = Confirmed) %>%
mutate(Country = `Country/Region`) %>%
select(Date, Country, Cases, Deaths) %>%
group_by(Date, Country) %>%
summarise(Cases = sum(Cases),
Deaths = sum(Deaths)) %>%
ungroup() %>%
group_by(Country) %>%
mutate(Lagged_Cases = ifelse(is.na(lag(Cases)), 0, lag(Cases))) %>%
mutate(NewCases = Cases - Lagged_Cases) %>%
mutate(IndexDate = ifelse(Lagged_Cases == 0 & Cases > 0, 1, ifelse(Lagged_Cases > 0, 2, 0))) %>%
filter(IndexDate > 0) %>%
mutate(Index = row_number()) %>%
ungroup() %>%
select(-IndexDate) %>%
filter(Country %in% c("US","Korea, South","Sweden")) %>%
inner_join(data.frame(Country = c("US","Korea, South","Sweden"),
Pop = c(328000000,51245707,10230000)))
c19 %>%
ggplot() +
geom_line(aes(x=Index, y=Cases/1000, color=Country), size=2) +
geom_histogram(aes(x=Index, y=NewCases/75, group=Country), stat="identity", alpha=.4) +
#scale_y_continuous(sec.axis = sec_axis(~./data$Cases)) +
facet_wrap(vars(Country), scales="free_y") +
ggtitle("Flattening The Curve?") +
xlab("Days Since First Case") +
ylab("Total Cases (thousands) - Daily New Cases (not to scale)")

Related

Plot time-series smooth confident interval

I am working with this time series and I plot the smooth mean but for some reason, I cannot get the confidence area to appear. I tried using level=0.95 on the geom_smooth command but still, nothing happens.
data=https://github.com/gonzalodqa/timeseries
months_order <- c(7:12,1:6)
dates <- make_datetime(c(rep(3,6), rep(4,6)), months_order)
t %>%
mutate(datetime = make_datetime(year, month, day, hour, minute, second)) %>%
filter(datetime >= make_datetime(2018,7), datetime < make_datetime(2020,7)) %>%
group_by(year, month) %>%
mutate(dummy = month(datetime) == 7 & datetime == min(datetime)) %>%
ungroup() %>%
mutate(dummy = cumsum(dummy)) %>%
group_by(dummy) %>%
mutate(datetime = datetime - years(year - 4) - years(month>=7),
years = paste(unique(year), collapse = " / ")) %>%
ungroup() %>%
ggplot() +
geom_line(aes(x = datetime, y = T42, colour = years)) +
scale_x_datetime(breaks = dates, labels = month.abb[months_order]) +
labs(colour = "Year")+geom_smooth(aes(x=datetime,y=T42),`level=0.95,color="black")+theme_light()+
xlab("Time (Months)")+ylab("Temperature (°C)")+geom_hline(yintercept=5, linetype="dashed", color
= "black",lwd=1)+
scale_color_manual(values=c("grey","grey","red"))
I have not specified any formula on geom_smooth() I tried to google the answer and also here but I cannot seem to find a solution
Thank you for any imput
I think it's because it's color, and it's CI is extremely narrow. By adding lwd = 0.5, fill = "steelblue", I can barely find existence of CI. Take a look very carefully, then you may see something blue.
t %>%
mutate(datetime = make_datetime(year, month, day, hour, minute, second)) %>%
filter(datetime >= make_datetime(2018,7), datetime < make_datetime(2020,7)) %>%
group_by(year, month) %>%
mutate(dummy = month(datetime) == 7 & datetime == min(datetime)) %>%
ungroup() %>%
mutate(dummy = cumsum(dummy)) %>%
group_by(dummy) %>%
mutate(datetime = datetime - years(year - 4) - years(month>=7),
years = paste(unique(year), collapse = " / ")) %>%
ungroup() %>%
ggplot() +
geom_line(aes(x = datetime, y = T42, colour = years)) +
scale_x_datetime(breaks = dates, labels = month.abb[months_order]) +
labs(colour = "Year")+geom_smooth(aes(x=datetime,y=T42),level=0.95,color="black", lwd = 0.5, fill = "steelblue")+theme_light()+
xlab("Time (Months)")+ylab("Temperature (°C)")+geom_hline(yintercept=5, linetype="dashed", color
= "black",lwd=1)+
scale_color_manual(values=c("grey","grey","red"))

Creating a geographic file for use with tmap and coming up with error when coding shapefile

I am trying to reproduce a map I found here: http://zevross.com/blog/2018/10/02/creating-beautiful-demographic-maps-in-r-with-the-tidycensus-and-tmap-packages/
I am using RStudio and am running the following code:
library(ggplot2) # For plotting
library(tidycensus) # For downloading Census data
library(tmap) # For creating tmap
library(tmaptools) # For reading and processing spatial data related to tmap
library(dplyr) # For data wrangling
library(sf) # For reading, writing and working with spatial objects
census_api_key("enter your API key here", overwrite = TRUE)
dat12 <- get_acs("county", table = "B27001", year = 2012,
output = "tidy", state = NULL, geometry = FALSE) %>%
rename(`2012` = estimate) %>%
select(-NAME, -moe)
dat16 <- get_acs("county", table = "B27001", year = 2016,
output = "tidy", state = NULL, geometry = TRUE, shift_geo = TRUE) %>%
rename(`2016` = estimate) %>%
select(-moe)
dat <- left_join(dat16, dat12, by = c("GEOID", "variable"))
st_geometry(dat) <- NULL # This drops the geometry and leaves a table
head(dat)
dat <- mutate(dat,
cat = case_when(
variable %in% paste0("B27001_0",
c("09","12","37","40")) ~ "pop1834",
variable %in% paste0("B27001_0",
c("11","14","39","42")) ~ "pop1834ni")) %>%
filter(!is.na(cat))
# Create long version
dat <- tidyr::gather(dat, year, estimate, c(`2012`, `2016`))
# Group the data by our new categories and sum
dat <- group_by(dat, GEOID, NAME, year, cat) %>%
summarize(estimate = sum(estimate)) %>%
ungroup() %>%
tidyr::spread(cat, estimate)
dat <- mutate(dat, est = (pop1834ni/pop1834) * 100) %>%
select(-c(pop1834, pop1834ni)) %>%
tidyr::spread(year, est) %>%
mutate(diff = `2016`-`2012`)
head(dat)
datlong <- select(dat, -diff) %>%
tidyr::gather(year, estimate, c(`2012`, `2016`)) %>%
group_by(year) %>%
mutate(med = round(median(estimate, na.rm = TRUE), 1))
ggplot(datlong, aes(estimate)) +
geom_histogram(fill = "firebrick2",
color = "white", bins = 60) +
xlab("Uninsured adults ages 18-34 by county (%)") +
theme(plot.title = element_text(hjust = 0.5)) +
facet_wrap(~year, ncol = 1) +
geom_vline(aes(xintercept = med,
group = year), lty = "dashed") +
geom_text(aes(label = paste("Median = ", med), x = med, y = 55))
d10 <- top_n(dat, 10, diff) %>%
mutate(type = "Insured population decreased",
difftemp = diff)
i10 <- top_n(dat, -10, diff) %>%
mutate(type = "Insured population increased",
difftemp = abs(diff))
id10 <- bind_rows(list(i10, d10)) %>%
arrange(desc(difftemp))
ggplot(id10) +
geom_col(aes(x = forcats::fct_reorder(NAME, difftemp),
y = difftemp, fill = type)) +
coord_flip() +
scale_fill_manual(values = c("firebrick2", "cyan4")) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom",
legend.title = element_blank()) +
ggtitle("Counties with the greatest change (+/-) in
insured population, ages 18-34, 2012-2016") +
ylab("Difference in % insured (2016 - 2012)") +
xlab("")
shp <- dat16 %>%
filter(variable == "B27001_001") # much faster than using distinct()
select(GEOID, NAME) %>%
left_join(dat, by = c("GEOID", "NAME")) %>%
arrange(GEOID) %>%
rename(uninsured_2012 = `2012`,
uninsured_2016 = `2016`,
uninsured_diff = diff)
Up until the last bit of code, the one that begins with shp, everything runs perfect. Once,
shp <- dat16 %>%
filter(variable == "B27001_001") # much faster than using distinct()
select(GEOID, NAME) %>%
left_join(dat, by = c("GEOID", "NAME")) %>%
arrange(GEOID) %>%
rename(uninsured_2012 = `2012`,
uninsured_2016 = `2016`,
uninsured_diff = diff)
is run, I get the following error:
Error in select(GEOID, NAME) : object 'GEOID' not found
I have checked dat16 and dat. GEOID and NAME are present there. I am not sure what is wrong with the SELECT function as I have not loaded another library which may interfere with it. Any help would be appreciated.
I see now what was missing, a %>% (pipe) following the 'filter':
shp <- dat16 %>%
filter(variable == "B27001_001") %>% # much faster than using distinct()
select(GEOID, NAME) %>%
left_join(dat, by = c("GEOID", "NAME")) %>%
arrange(GEOID) %>%
rename(
uninsured_2012 = `2012`,
uninsured_2016 = `2016`,
uninsured_diff = diff
)

How to reorder the plot by factors in ggplot?

I am trying to reorder the geom_col plot by one of the factors pct_female_vacc used below in plot of the variable pct_vacc_GenderType.
df
library(tidyverse)
library(lubridate)
library(scales)
library(gganimate)
file_url1 <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/cowin_vaccine_data_statewise.csv"
df_vaccination <- read.csv(url(file_url1))
df_vaccination <- df_vaccination %>%
mutate(Updated.On = as.Date(Updated.On))
plot
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()
I am looking to get above plot to be reordered by red color i.e pct_female_vacc factor.
Unable to use reorder_within as I have not used facet_wrap here. Also tried fct_reorder but may be I am not doing it right or even that doesn't work in this case.
What you want to do is simple with forcats::fct_reorder. The only thing you have to be cautious about is that you need to set the factor before pivot_longer. Here you go:
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On) - 1) %>% # the newest date contains only NAs, so I use the second oldest
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
mutate(State = forcats::fct_reorder(State, pct_female_vacc)) %>% # since you pivot longer in the next step you have to order your factors here
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
filter(!is.na(pct_vacc)) %>% # use this instead of na.omit() to remove NAs
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col() +
theme(legend.position = "bottom") # I moved the legend to the bottom so it looks better on for stackoverflow
Created on 2021-05-16 by the reprex package (v2.0.0)
arrange the data by pct_female_vacc and change the State to factor based on appearance.
library(tidyverse)
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
mutate(pct_female_vacc = `Female.Individuals.Vaccinated.`/Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated) %>%
arrange(pct_female_vacc) %>%
mutate(State = factor(State, unique(State))) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()

How can we data wrangling to obtain shown ratio/proportion chart shown

Goal is to produce a visualization indicating ratio.
Please help us how can we produce such ratio chart (high lighted) in R ?
library(tidyverse)
# Dataset creation
df <- data.frame(cls = c(rep("A",4),rep("B",4)),
grd = c("A1",rep("A2",3),rep(c("B1","B2"), 2)),
typ = c(rep("m",2),rep("o",2),"m","n",rep("p",2)),
pnts = c(rep(1:4,2)))
df
#### Data wrangling
df1 <- df %>%
group_by(cls) %>%
summarise(cls_pct = sum(pnts))
df1
df2 <- df %>%
group_by(cls,grd) %>%
summarize(grd_pct = sum(pnts))
df2
df3 <- df %>%
group_by(cls,grd,typ) %>%
summarise(typ_pct = sum(pnts))
df3
#### Attempt to combine all df1,df2,df3
# but mutate and summarise are mixing up leading to wrong results
df3 %>%
group_by(cls,grd) %>%
mutate(grd_pct = sum(typ_pct)) %>%
group_by(cls) %>%
mutate(cls_pct = sum(grd_pct))
Attempt to visualize all the ratios in 1 chart
data %>%
pivot_longer(cols = -c(cls:pnts),
names_to = "per_cat",
values_to = "percent") %>%
ggplot(aes(cls,percent, col = typ, fill = grd)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
plot of the same.
EDIT -- added formula version with more useful output for visualization.
ORIG: At this point it may be worth making a function to reduce copying and pasting, but this may get you what you need:
library(tidyverse)
df %>%
group_by(cls) %>%
mutate(per1 = sum(pnts),
per1_pct = per1 / sum(per1)) %>%
group_by(cls, grd) %>%
mutate(per2 = sum(pnts),
per2_pct = per2 / sum(per2)) %>%
group_by(cls, grd, typ) %>%
mutate(per3 = sum(pnts),
per3_pct = per3 / sum(per3)) %>%
ungroup()
EDIT: Here's a general function to calculate the stats for a given grouping, making it easier to combine a few groupings together in long format better suited for visualization.
df_sum <- function(df, level, ...) {
df %>%
group_by(...) %>%
summarize(grp_ttl = sum(pnts)) %>%
mutate(ttl = sum(grp_ttl),
pct = grp_ttl / ttl) %>%
ungroup() %>%
mutate(level = {{ level }} )
}
df_sum(df, level = 1, cls) %>%
bind_rows(df_sum(df, level = 2, cls, grd)) %>%
bind_rows(df_sum(df, level = 3, cls, grd, typ)) %>%
mutate(label = coalesce(as.character(typ), # This grabs the first non-NA
as.character(grd),
as.character(cls))) -> df_summed
df_summed %>%
ggplot(aes(level, grp_ttl)) +
geom_col(color = "white") +
geom_text(aes(label = paste0(label, "\n", grp_ttl, "/", ttl)),
color = "white",
position = position_stack(vjust = 0.5)) +
scale_x_reverse() + # To make level 1 at the top
coord_flip() # To switch from vertical to horizontal orientation

how to plot lines matching data using ggplot2

plot increase_rate contains abs(increase_rate) > 0.05.
but under the code, discard the data between -0.05 to 0.05.
I also plot data including from -0.05 to 0.05 range.
library(tidyverse)
data(population, package="tidyr")
population %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(population_nextY = lead(population)) %>%
mutate(increase_rate = (population_nextY - population)/population_nextY) %>%
filter(abs(increase_rate) > 0.05) %>%
ungroup %>%
ggplot()+
geom_line(aes(x = year, y = increase_rate, color = country))
I want to get final plot like this.
d <-
population %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(population_nextY = lead(population)) %>%
mutate(increase_rate = (population_nextY - population)/population_nextY) %>%
ungroup
select_country <-
d %>% filter(!between(increase_rate, -0.05, 0.05)) %>%
select(country) %>% distinct %>% unlist
d %>%
filter(country %in% select_country) %>%
ggplot()+
geom_line(aes(x = year, y = increase_rate, color = country))
use between:
filter(!between(increase_rate, -0.05, 0.05))
add column using mutate() function
population %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(population_nextY = lead(population)) %>%
mutate(increase_rate = (population_nextY - population)/population_nextY) %>%
mutate(judge = max(abs(increase_rate), na.rm=T)) %>%
filter(judge > 0.05) %>%
ungroup %>%
ggplot() +
geom_line(aes(x = year, y = increase_rate, color = country))

Resources