Second Y Axis In Facet Wrap with Line and Histogram (Tidyverse)

Second Y Axis In Facet Wrap with Line and Histogram (Tidyverse) - r

Trying to plot total cases of covid19 at the country level with a histogram of daily new cases to show a sustained drop in new cases leads to a 'flattening of the curve' (assuming that is the case).
library(tidyverse)
#clean raw data source
c19 = read_csv("https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv") %>%
mutate(Cases = Confirmed) %>%
mutate(Country = `Country/Region`) %>%
select(Date, Country, Cases, Deaths) %>%
group_by(Date, Country) %>%
summarise(Cases = sum(Cases),
Deaths = sum(Deaths)) %>%
ungroup() %>%
group_by(Country) %>%
mutate(Lagged_Cases = ifelse(is.na(lag(Cases)), 0, lag(Cases))) %>%
mutate(NewCases = Cases - Lagged_Cases) %>%
mutate(IndexDate = ifelse(Lagged_Cases == 0 & Cases > 0, 1, ifelse(Lagged_Cases > 0, 2, 0))) %>%
filter(IndexDate > 0) %>%
mutate(Index = row_number()) %>%
ungroup() %>%
select(-IndexDate) %>%
filter(Country %in% c("US","Korea, South","Sweden")) %>%
inner_join(data.frame(Country = c("US","Korea, South","Sweden"),
Pop = c(328000000,51245707,10230000)))
c19 %>%
ggplot() +
geom_line(aes(x=Index, y=Cases/1000, color=Country), size=2) +
geom_histogram(aes(x=Index, y=NewCases/75, group=Country), stat="identity", alpha=.4) +
#scale_y_continuous(sec.axis = sec_axis(~./data$Cases)) +
facet_wrap(vars(Country), scales="free_y") +
ggtitle("Flattening The Curve?") +
xlab("Days Since First Case") +
ylab("Total Cases (thousands) - Daily New Cases (not to scale)")

Related

Plot time-series smooth confident interval

I am working with this time series and I plot the smooth mean but for some reason, I cannot get the confidence area to appear. I tried using level=0.95 on the geom_smooth command but still, nothing happens.
data=https://github.com/gonzalodqa/timeseries
months_order <- c(7:12,1:6)
dates <- make_datetime(c(rep(3,6), rep(4,6)), months_order)
t %>%
mutate(datetime = make_datetime(year, month, day, hour, minute, second)) %>%
filter(datetime >= make_datetime(2018,7), datetime < make_datetime(2020,7)) %>%
group_by(year, month) %>%
mutate(dummy = month(datetime) == 7 & datetime == min(datetime)) %>%
ungroup() %>%
mutate(dummy = cumsum(dummy)) %>%
group_by(dummy) %>%
mutate(datetime = datetime - years(year - 4) - years(month>=7),
years = paste(unique(year), collapse = " / ")) %>%
ungroup() %>%
ggplot() +
geom_line(aes(x = datetime, y = T42, colour = years)) +
scale_x_datetime(breaks = dates, labels = month.abb[months_order]) +
labs(colour = "Year")+geom_smooth(aes(x=datetime,y=T42),`level=0.95,color="black")+theme_light()+
xlab("Time (Months)")+ylab("Temperature (°C)")+geom_hline(yintercept=5, linetype="dashed", color
= "black",lwd=1)+
scale_color_manual(values=c("grey","grey","red"))
I have not specified any formula on geom_smooth() I tried to google the answer and also here but I cannot seem to find a solution
Thank you for any imput

I think it's because it's color, and it's CI is extremely narrow. By adding lwd = 0.5, fill = "steelblue", I can barely find existence of CI. Take a look very carefully, then you may see something blue.
t %>%
mutate(datetime = make_datetime(year, month, day, hour, minute, second)) %>%
filter(datetime >= make_datetime(2018,7), datetime < make_datetime(2020,7)) %>%
group_by(year, month) %>%
mutate(dummy = month(datetime) == 7 & datetime == min(datetime)) %>%
ungroup() %>%
mutate(dummy = cumsum(dummy)) %>%
group_by(dummy) %>%
mutate(datetime = datetime - years(year - 4) - years(month>=7),
years = paste(unique(year), collapse = " / ")) %>%
ungroup() %>%
ggplot() +
geom_line(aes(x = datetime, y = T42, colour = years)) +
scale_x_datetime(breaks = dates, labels = month.abb[months_order]) +
labs(colour = "Year")+geom_smooth(aes(x=datetime,y=T42),level=0.95,color="black", lwd = 0.5, fill = "steelblue")+theme_light()+
xlab("Time (Months)")+ylab("Temperature (°C)")+geom_hline(yintercept=5, linetype="dashed", color
= "black",lwd=1)+
scale_color_manual(values=c("grey","grey","red"))

Creating a geographic file for use with tmap and coming up with error when coding shapefile

I am trying to reproduce a map I found here: http://zevross.com/blog/2018/10/02/creating-beautiful-demographic-maps-in-r-with-the-tidycensus-and-tmap-packages/
I am using RStudio and am running the following code:
library(ggplot2) # For plotting
library(tidycensus) # For downloading Census data
library(tmap) # For creating tmap
library(tmaptools) # For reading and processing spatial data related to tmap
library(dplyr) # For data wrangling
library(sf) # For reading, writing and working with spatial objects
census_api_key("enter your API key here", overwrite = TRUE)
dat12 <- get_acs("county", table = "B27001", year = 2012,
output = "tidy", state = NULL, geometry = FALSE) %>%
rename(`2012` = estimate) %>%
select(-NAME, -moe)
dat16 <- get_acs("county", table = "B27001", year = 2016,
output = "tidy", state = NULL, geometry = TRUE, shift_geo = TRUE) %>%
rename(`2016` = estimate) %>%
select(-moe)
dat <- left_join(dat16, dat12, by = c("GEOID", "variable"))
st_geometry(dat) <- NULL # This drops the geometry and leaves a table
head(dat)
dat <- mutate(dat,
cat = case_when(
variable %in% paste0("B27001_0",
c("09","12","37","40")) ~ "pop1834",
variable %in% paste0("B27001_0",
c("11","14","39","42")) ~ "pop1834ni")) %>%
filter(!is.na(cat))
# Create long version
dat <- tidyr::gather(dat, year, estimate, c(`2012`, `2016`))
# Group the data by our new categories and sum
dat <- group_by(dat, GEOID, NAME, year, cat) %>%
summarize(estimate = sum(estimate)) %>%
ungroup() %>%
tidyr::spread(cat, estimate)
dat <- mutate(dat, est = (pop1834ni/pop1834) * 100) %>%
select(-c(pop1834, pop1834ni)) %>%
tidyr::spread(year, est) %>%
mutate(diff = `2016`-`2012`)
head(dat)
datlong <- select(dat, -diff) %>%
tidyr::gather(year, estimate, c(`2012`, `2016`)) %>%
group_by(year) %>%
mutate(med = round(median(estimate, na.rm = TRUE), 1))
ggplot(datlong, aes(estimate)) +
geom_histogram(fill = "firebrick2",
color = "white", bins = 60) +
xlab("Uninsured adults ages 18-34 by county (%)") +
theme(plot.title = element_text(hjust = 0.5)) +
facet_wrap(~year, ncol = 1) +
geom_vline(aes(xintercept = med,
group = year), lty = "dashed") +
geom_text(aes(label = paste("Median = ", med), x = med, y = 55))
d10 <- top_n(dat, 10, diff) %>%
mutate(type = "Insured population decreased",
difftemp = diff)
i10 <- top_n(dat, -10, diff) %>%
mutate(type = "Insured population increased",
difftemp = abs(diff))
id10 <- bind_rows(list(i10, d10)) %>%
arrange(desc(difftemp))
ggplot(id10) +
geom_col(aes(x = forcats::fct_reorder(NAME, difftemp),
y = difftemp, fill = type)) +
coord_flip() +
scale_fill_manual(values = c("firebrick2", "cyan4")) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom",
legend.title = element_blank()) +
ggtitle("Counties with the greatest change (+/-) in
insured population, ages 18-34, 2012-2016") +
ylab("Difference in % insured (2016 - 2012)") +
xlab("")
shp <- dat16 %>%
filter(variable == "B27001_001") # much faster than using distinct()
select(GEOID, NAME) %>%
left_join(dat, by = c("GEOID", "NAME")) %>%
arrange(GEOID) %>%
rename(uninsured_2012 = `2012`,
uninsured_2016 = `2016`,
uninsured_diff = diff)
Up until the last bit of code, the one that begins with shp, everything runs perfect. Once,
shp <- dat16 %>%
filter(variable == "B27001_001") # much faster than using distinct()
select(GEOID, NAME) %>%
left_join(dat, by = c("GEOID", "NAME")) %>%
arrange(GEOID) %>%
rename(uninsured_2012 = `2012`,
uninsured_2016 = `2016`,
uninsured_diff = diff)
is run, I get the following error:
Error in select(GEOID, NAME) : object 'GEOID' not found
I have checked dat16 and dat. GEOID and NAME are present there. I am not sure what is wrong with the SELECT function as I have not loaded another library which may interfere with it. Any help would be appreciated.

I see now what was missing, a %>% (pipe) following the 'filter':
shp <- dat16 %>%
filter(variable == "B27001_001") %>% # much faster than using distinct()
select(GEOID, NAME) %>%
left_join(dat, by = c("GEOID", "NAME")) %>%
arrange(GEOID) %>%
rename(
uninsured_2012 = `2012`,
uninsured_2016 = `2016`,
uninsured_diff = diff
)

How to reorder the plot by factors in ggplot?

I am trying to reorder the geom_col plot by one of the factors pct_female_vacc used below in plot of the variable pct_vacc_GenderType.
df
library(tidyverse)
library(lubridate)
library(scales)
library(gganimate)
file_url1 <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/cowin_vaccine_data_statewise.csv"
df_vaccination <- read.csv(url(file_url1))
df_vaccination <- df_vaccination %>%
mutate(Updated.On = as.Date(Updated.On))
plot
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()
I am looking to get above plot to be reordered by red color i.e pct_female_vacc factor.
Unable to use reorder_within as I have not used facet_wrap here. Also tried fct_reorder but may be I am not doing it right or even that doesn't work in this case.

What you want to do is simple with forcats::fct_reorder. The only thing you have to be cautious about is that you need to set the factor before pivot_longer. Here you go:
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On) - 1) %>% # the newest date contains only NAs, so I use the second oldest
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
mutate(State = forcats::fct_reorder(State, pct_female_vacc)) %>% # since you pivot longer in the next step you have to order your factors here
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
filter(!is.na(pct_vacc)) %>% # use this instead of na.omit() to remove NAs
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col() +
theme(legend.position = "bottom") # I moved the legend to the bottom so it looks better on for stackoverflow
Created on 2021-05-16 by the reprex package (v2.0.0)

arrange the data by pct_female_vacc and change the State to factor based on appearance.
library(tidyverse)
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
mutate(pct_female_vacc = `Female.Individuals.Vaccinated.`/Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated) %>%
arrange(pct_female_vacc) %>%
mutate(State = factor(State, unique(State))) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()

How can we data wrangling to obtain shown ratio/proportion chart shown

Goal is to produce a visualization indicating ratio.
Please help us how can we produce such ratio chart (high lighted) in R ?
library(tidyverse)
# Dataset creation
df <- data.frame(cls = c(rep("A",4),rep("B",4)),
grd = c("A1",rep("A2",3),rep(c("B1","B2"), 2)),
typ = c(rep("m",2),rep("o",2),"m","n",rep("p",2)),
pnts = c(rep(1:4,2)))
df
#### Data wrangling
df1 <- df %>%
group_by(cls) %>%
summarise(cls_pct = sum(pnts))
df1
df2 <- df %>%
group_by(cls,grd) %>%
summarize(grd_pct = sum(pnts))
df2
df3 <- df %>%
group_by(cls,grd,typ) %>%
summarise(typ_pct = sum(pnts))
df3
#### Attempt to combine all df1,df2,df3
# but mutate and summarise are mixing up leading to wrong results
df3 %>%
group_by(cls,grd) %>%
mutate(grd_pct = sum(typ_pct)) %>%
group_by(cls) %>%
mutate(cls_pct = sum(grd_pct))
Attempt to visualize all the ratios in 1 chart
data %>%
pivot_longer(cols = -c(cls:pnts),
names_to = "per_cat",
values_to = "percent") %>%
ggplot(aes(cls,percent, col = typ, fill = grd)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
plot of the same.

EDIT -- added formula version with more useful output for visualization.
ORIG: At this point it may be worth making a function to reduce copying and pasting, but this may get you what you need:
library(tidyverse)
df %>%
group_by(cls) %>%
mutate(per1 = sum(pnts),
per1_pct = per1 / sum(per1)) %>%
group_by(cls, grd) %>%
mutate(per2 = sum(pnts),
per2_pct = per2 / sum(per2)) %>%
group_by(cls, grd, typ) %>%
mutate(per3 = sum(pnts),
per3_pct = per3 / sum(per3)) %>%
ungroup()
EDIT: Here's a general function to calculate the stats for a given grouping, making it easier to combine a few groupings together in long format better suited for visualization.
df_sum <- function(df, level, ...) {
df %>%
group_by(...) %>%
summarize(grp_ttl = sum(pnts)) %>%
mutate(ttl = sum(grp_ttl),
pct = grp_ttl / ttl) %>%
ungroup() %>%
mutate(level = {{ level }} )
}
df_sum(df, level = 1, cls) %>%
bind_rows(df_sum(df, level = 2, cls, grd)) %>%
bind_rows(df_sum(df, level = 3, cls, grd, typ)) %>%
mutate(label = coalesce(as.character(typ), # This grabs the first non-NA
as.character(grd),
as.character(cls))) -> df_summed
df_summed %>%
ggplot(aes(level, grp_ttl)) +
geom_col(color = "white") +
geom_text(aes(label = paste0(label, "\n", grp_ttl, "/", ttl)),
color = "white",
position = position_stack(vjust = 0.5)) +
scale_x_reverse() + # To make level 1 at the top
coord_flip() # To switch from vertical to horizontal orientation

how to plot lines matching data using ggplot2

plot increase_rate contains abs(increase_rate) > 0.05.
but under the code, discard the data between -0.05 to 0.05.
I also plot data including from -0.05 to 0.05 range.
library(tidyverse)
data(population, package="tidyr")
population %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(population_nextY = lead(population)) %>%
mutate(increase_rate = (population_nextY - population)/population_nextY) %>%
filter(abs(increase_rate) > 0.05) %>%
ungroup %>%
ggplot()+
geom_line(aes(x = year, y = increase_rate, color = country))
I want to get final plot like this.
d <-
population %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(population_nextY = lead(population)) %>%
mutate(increase_rate = (population_nextY - population)/population_nextY) %>%
ungroup
select_country <-
d %>% filter(!between(increase_rate, -0.05, 0.05)) %>%
select(country) %>% distinct %>% unlist
d %>%
filter(country %in% select_country) %>%
ggplot()+
geom_line(aes(x = year, y = increase_rate, color = country))

use between:
filter(!between(increase_rate, -0.05, 0.05))

add column using mutate() function
population %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(population_nextY = lead(population)) %>%
mutate(increase_rate = (population_nextY - population)/population_nextY) %>%
mutate(judge = max(abs(increase_rate), na.rm=T)) %>%
filter(judge > 0.05) %>%
ungroup %>%
ggplot() +
geom_line(aes(x = year, y = increase_rate, color = country))

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Second Y Axis In Facet Wrap with Line and Histogram (Tidyverse) - r

Related

Plot time-series smooth confident interval

Creating a geographic file for use with tmap and coming up with error when coding shapefile

How to reorder the plot by factors in ggplot?

How can we data wrangling to obtain shown ratio/proportion chart shown

how to plot lines matching data using ggplot2

Categories

Resources