I am trying to reorder the geom_col plot by one of the factors pct_female_vacc used below in plot of the variable pct_vacc_GenderType.
df
library(tidyverse)
library(lubridate)
library(scales)
library(gganimate)
file_url1 <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/cowin_vaccine_data_statewise.csv"
df_vaccination <- read.csv(url(file_url1))
df_vaccination <- df_vaccination %>%
mutate(Updated.On = as.Date(Updated.On))
plot
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()
I am looking to get above plot to be reordered by red color i.e pct_female_vacc factor.
Unable to use reorder_within as I have not used facet_wrap here. Also tried fct_reorder but may be I am not doing it right or even that doesn't work in this case.
What you want to do is simple with forcats::fct_reorder. The only thing you have to be cautious about is that you need to set the factor before pivot_longer. Here you go:
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On) - 1) %>% # the newest date contains only NAs, so I use the second oldest
# arrange(desc(Updated.On)) %>%
mutate(pct_female_vacc = Female.Individuals.Vaccinated./Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated,
State = as.factor(State)
) %>%
mutate(State = forcats::fct_reorder(State, pct_female_vacc)) %>% # since you pivot longer in the next step you have to order your factors here
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
mutate(pct_vacc_GenderType = as.factor(pct_vacc_GenderType)) %>%
filter(!is.na(pct_vacc)) %>% # use this instead of na.omit() to remove NAs
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col() +
theme(legend.position = "bottom") # I moved the legend to the bottom so it looks better on for stackoverflow
Created on 2021-05-16 by the reprex package (v2.0.0)
arrange the data by pct_female_vacc and change the State to factor based on appearance.
library(tidyverse)
df_vaccination %>%
filter(State != "India",
Updated.On == max(Updated.On)) %>%
mutate(pct_female_vacc = `Female.Individuals.Vaccinated.`/Total.Individuals.Vaccinated,
pct_male_vacc = Male.Individuals.Vaccinated./Total.Individuals.Vaccinated) %>%
arrange(pct_female_vacc) %>%
mutate(State = factor(State, unique(State))) %>%
pivot_longer(cols = c(pct_female_vacc:pct_male_vacc),
names_to = "pct_vacc_GenderType",
values_to = "pct_vacc") %>%
na.omit() %>%
ggplot(aes(x = pct_vacc, y = State ,
fill = pct_vacc_GenderType)) +
geom_col()
Related
I am creating a grouped bar chart like so:
library(tidyverse)
library(echarts4r)
data("starwars")
starwars %>%
group_by(sex, eye_color) %>%
summarise(height = mean(height, na.rm=TRUE)) %>%
group_by(sex) %>%
e_charts(x = eye_color, timeline = TRUE) %>%
e_bar(height, legend = FALSE)
How do I set the range of the y axis (height) to be the same across groups (sex)?
You could set maximum value for the y axis using e_y_axis(max = XXX), e.g. in the code below I set the max value based on the maximum of height.
library(tidyverse)
library(echarts4r)
data("starwars")
ymax <- max()
dat <- starwars %>%
group_by(sex, eye_color) %>%
summarise(height = mean(height, na.rm=TRUE), .groups = "drop")
ymax <- 50 * ceiling(max(dat$height, na.rm = TRUE) / 50)
dat %>%
group_by(sex) %>%
e_charts(x = eye_color, timeline = TRUE) %>%
e_bar(height, legend = FALSE) %>%
e_y_axis(max = ymax)
Here is one of the dataset I have been practicing on R-cloud and this the approach:
data_long %>%
filter(Indicator == "Emissions")
glimpse(data_long)
## This is suppose to return 2 columns Year & Emissions, it ends up aggregating values:
## Emissions
## 1417688795
data_long %>%
filter(Indicator == "Emissions") %>%
group_by(Year) %>%
summarize(Emissions = sum(Value))
## Another -- Error in `check_aesthetics()`:
## ! Aesthetics must be either length 1 or the same as the data (1): x
data_long %>%
filter(Indicator == "Emissions") %>%
group_by(Year) %>%
summarize(Emissions = sum(Value)) %>%
ggplot(aes(x = Year, y = Emissions)) +
geom_line(size = 1.5)
I am assuming there is a issue with one of the packages (gradethis) or (learnr) as I am using R-cloud, are there any working limitations with it? any suggestions?
Thanks,
Vikram
Goal is to produce a visualization indicating ratio.
Please help us how can we produce such ratio chart (high lighted) in R ?
library(tidyverse)
# Dataset creation
df <- data.frame(cls = c(rep("A",4),rep("B",4)),
grd = c("A1",rep("A2",3),rep(c("B1","B2"), 2)),
typ = c(rep("m",2),rep("o",2),"m","n",rep("p",2)),
pnts = c(rep(1:4,2)))
df
#### Data wrangling
df1 <- df %>%
group_by(cls) %>%
summarise(cls_pct = sum(pnts))
df1
df2 <- df %>%
group_by(cls,grd) %>%
summarize(grd_pct = sum(pnts))
df2
df3 <- df %>%
group_by(cls,grd,typ) %>%
summarise(typ_pct = sum(pnts))
df3
#### Attempt to combine all df1,df2,df3
# but mutate and summarise are mixing up leading to wrong results
df3 %>%
group_by(cls,grd) %>%
mutate(grd_pct = sum(typ_pct)) %>%
group_by(cls) %>%
mutate(cls_pct = sum(grd_pct))
Attempt to visualize all the ratios in 1 chart
data %>%
pivot_longer(cols = -c(cls:pnts),
names_to = "per_cat",
values_to = "percent") %>%
ggplot(aes(cls,percent, col = typ, fill = grd)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
plot of the same.
EDIT -- added formula version with more useful output for visualization.
ORIG: At this point it may be worth making a function to reduce copying and pasting, but this may get you what you need:
library(tidyverse)
df %>%
group_by(cls) %>%
mutate(per1 = sum(pnts),
per1_pct = per1 / sum(per1)) %>%
group_by(cls, grd) %>%
mutate(per2 = sum(pnts),
per2_pct = per2 / sum(per2)) %>%
group_by(cls, grd, typ) %>%
mutate(per3 = sum(pnts),
per3_pct = per3 / sum(per3)) %>%
ungroup()
EDIT: Here's a general function to calculate the stats for a given grouping, making it easier to combine a few groupings together in long format better suited for visualization.
df_sum <- function(df, level, ...) {
df %>%
group_by(...) %>%
summarize(grp_ttl = sum(pnts)) %>%
mutate(ttl = sum(grp_ttl),
pct = grp_ttl / ttl) %>%
ungroup() %>%
mutate(level = {{ level }} )
}
df_sum(df, level = 1, cls) %>%
bind_rows(df_sum(df, level = 2, cls, grd)) %>%
bind_rows(df_sum(df, level = 3, cls, grd, typ)) %>%
mutate(label = coalesce(as.character(typ), # This grabs the first non-NA
as.character(grd),
as.character(cls))) -> df_summed
df_summed %>%
ggplot(aes(level, grp_ttl)) +
geom_col(color = "white") +
geom_text(aes(label = paste0(label, "\n", grp_ttl, "/", ttl)),
color = "white",
position = position_stack(vjust = 0.5)) +
scale_x_reverse() + # To make level 1 at the top
coord_flip() # To switch from vertical to horizontal orientation
I'm trying to add labels and percentages to each layer within a sunburst chart using R - so it looks like this Sunburst.
I can create a sunburst chart (using this guide) but I can't figure out how to add the labels or percentages. I also want to be able to print the chart with all labels and percentages.
Here's my code so far.
# libraries
library(dplyr)
library(treemap)
library(sunburstR)
library(readxl)
library(vcd)
## Load Arthritis as example
Data <- data.frame(Arthritis)
Data <- Data %>% select(-ID) %>%
mutate(Age=ifelse(Age<50,"Young","Old")) %>% group_by(Treatment,Sex,Improved,Age) %>%
summarise(Count=n()) %>%
mutate(Path=paste(Treatment,Sex,Improved,Age,sep="-")) %>%
ungroup() %>%
select(Path,Count)
sunburst(Data)
Any help would be great.
Thanks.
I suggest the ggsunburst package https://github.com/didacs/ggsunburst
library(ggsunburst)
library(dplyr)
library(vcd) # just for the Arthritis dataset
Data <- data.frame(Arthritis)
# compute percentage using tally
# add column leaf, with format "name->attribute:value"
# ggsunburst considers everything after "->" as attributes
# the attribute "size" is used as the size of the arc
df <- Data %>%
mutate(Age=ifelse(Age<50,"Young","Old")) %>%
group_by(Treatment,Sex,Improved,Age) %>%
tally() %>%
mutate(percentage = n/nrow(Data)*100,
size=paste("->size:",round(percentage,2),sep=""),
leaf=paste(Improved,size,sep = "")) %>%
ungroup() %>%
select(Treatment,Sex,Age,leaf)
# sunburst_data reads from a file so you need to create one
write.table(df, file = 'data.csv', row.names = F, col.names = F, sep = ",")
# specify node_attributes = "size" to add labels with percentages in terminal nodes
sb <- sunburst_data('data.csv', type = "lineage", sep = ',', node_attributes = "size")
# compute percentages for internal nodes
tre <- Data %>%
group_by(Treatment) %>%
tally() %>%
mutate(percent=n/nrow(Data)*100,
name=Treatment) %>%
ungroup() %>%
select(name,percent)
sex <- Data %>%
group_by(Treatment,Sex) %>%
tally() %>%
mutate(percent=n/nrow(Data)*100,
name=Sex) %>%
ungroup() %>%
select(name,percent)
age <- Data %>%
mutate(Age=ifelse(Age<50,"Young","Old")) %>%
group_by(Treatment,Sex,Age) %>%
tally() %>%
mutate(percent=n/nrow(Data)*100,
name=Age) %>%
ungroup() %>%
select(name,percent)
x <- rbind(tre, sex, age)
# the rows in x are in the same order as sb$node_labels, cbind works here only because of that
x <- cbind(sb$node_labels, round(x[,"percent"],2))
percent <- x %>% mutate(name_percent = paste(label,percent,"%"))
sunburst(sb, node_labels.min = 0) +
geom_text(data = sb$leaf_labels, aes(x=x, y=0.1, label=paste(size,"%"), angle=angle, hjust=hjust), size = 2) +
geom_text(data = percent, aes(x=x, y=y, label=name_percent, angle=pangle), size=2)
How do I duplicate my x-axis (becomes y in type='bar'?) so I have them both at the top and bottom?
Example:
library(dplyr)
library(ggplot2)
library(highcharter)
mpgg <- mpg %>%
filter(class %in% c("suv", "compact", "midsize")) %>%
add_row(manufacturer="loner",class="newClass")%>%
group_by(class, manufacturer) %>%
summarize(count = n())
categories_grouped <- mpgg %>%
group_by(name = class) %>%
do(categories = .$manufacturer) %>%
list_parse()
highchart() %>%
hc_xAxis(categories = categories_grouped) %>%
hc_add_series(data = mpgg, type = "bar", hcaes(y = count, color = manufacturer),
showInLegend = FALSE)