I am trying to create a "order" stacked bar chart that each stack is colored by one variable and ordered by another variable, please find my example as below:
library(ggplot2)
library(dplyr)
data(iris)
chart.df.st00 <- iris %>%
as_tibble %>%
mutate(`Sepal.Length`=round(`Sepal.Length`)) %>%
count(Species,`Sepal.Length`) %>%
mutate(`Sepal.Length`=as.character(`Sepal.Length`)) %>%
group_by(Species) %>%
mutate(percent=n/sum(n)*100) %>%
arrange(desc(n)) %>%
mutate(rank=1:n()) %>%
ungroup %>%
mutate(rank=paste(Species,rank,sep='-'))
chart.df.st01 <- chart.df.st00 %>%
left_join(chart.df.st00 %>%
distinct(`Sepal.Length`) %>%
mutate(color=colorRampPalette(
RColorBrewer::brewer.pal(length(unique(chart.df.st00$`Sepal.Length`)),'Set1'))(length(unique(chart.df.st00$`Sepal.Length`)))))
chart.color1.st00 <- chart.df.st01 %>%
distinct(rank,color) %>%
arrange(rank)
chart.color1.st01 <- chart.color1.st00$color
names(chart.color1.st01) <- chart.color1.st00$rank
chart1 <- ggplot(data=chart.df.st01,
aes(x=1,y=percent)) +
geom_bar(aes(fill=rank),stat='identity') +
scale_fill_manual(values=chart.color1.st01) +
facet_wrap(.~Species,ncol = 1) +
scale_y_reverse(breaks=c(0,25,50,75,100),labels=c(100,75,50,25,0)) +
coord_flip()
chart.color2.st00 <- chart.df.st01 %>%
distinct(color,Sepal.Length) %>%
arrange(Sepal.Length)
chart.color2.st01 <- chart.color2.st00$color
names(chart.color2.st01) <- chart.color2.st00$`Sepal.Length`
chart2 <- ggplot(data=chart.df,
aes(x=1,y=percent)) +
geom_bar(aes(fill=`Sepal.Length`),stat='identity') +
scale_fill_manual(values=chart.color2.st01) +
facet_wrap(.~Species,ncol = 1) +
coord_flip()
In my example, each stack is filled by Sepal.Length, and order by rank, chart1 has the ordering of the stacks I want, but not the legend, while chart2 has the legend I want, but not the ordering of the stacks.
Is there a way to have a single chart with the stacked bar of chart1 and the legend of chart2?
Thanks!
Using the code for your second chart this could be achieved by additionally mapping rank on the group aes:
library(ggplot2)
library(dplyr)
data(iris)
chart.df.st00 <- iris %>%
as_tibble %>%
mutate(`Sepal.Length`=round(`Sepal.Length`)) %>%
count(Species,`Sepal.Length`) %>%
mutate(`Sepal.Length`=as.character(`Sepal.Length`)) %>%
group_by(Species) %>%
mutate(percent=n/sum(n)*100) %>%
arrange(desc(n)) %>%
mutate(rank=1:n()) %>%
ungroup %>%
mutate(rank=paste(Species,rank,sep='-'))
chart.df.st01 <- chart.df.st00 %>%
left_join(chart.df.st00 %>%
distinct(`Sepal.Length`) %>%
mutate(color=colorRampPalette(
RColorBrewer::brewer.pal(length(unique(chart.df.st00$`Sepal.Length`)),'Set1'))(length(unique(chart.df.st00$`Sepal.Length`)))))
#> Joining, by = "Sepal.Length"
chart.color2.st00 <- chart.df.st01 %>%
distinct(color,Sepal.Length) %>%
arrange(Sepal.Length)
chart.color2.st01 <- chart.color2.st00$color
names(chart.color2.st01) <- chart.color2.st00$`Sepal.Length`
ggplot(data=chart.df.st01,
aes(x=1,y=percent)) +
geom_bar(aes(fill=`Sepal.Length`, group = rank), stat='identity') +
scale_fill_manual(values = chart.color2.st01) +
facet_wrap(.~Species,ncol = 1) +
scale_y_reverse(breaks=c(0,25,50,75,100),labels=c(100,75,50,25,0)) +
coord_flip()
Related
I'm trying to get the variable labels and value labels to be displayed on a stacked bar chart.
library(tidyverse)
data <- haven::read_spss("http://staff.bath.ac.uk/pssiw/stats2/SAQ.sav")
data %>%
select(Q01:Q04) %>%
gather %>%
group_by(key, value) %>%
tally %>%
mutate(n = n/sum(n)*100, round = 1) %>%
mutate(n = round(n, 2)) %>%
ggplot(aes(x=key, y=n, fill=factor(value))) +
geom_col() +
geom_text(aes(label=as_factor(n)), position=position_stack(.5)) +
coord_flip() +
theme(aspect.ratio = 1/3) + scale_fill_brewer(palette = "Set2")
Instead of Q01, Q02, Q03, Q04, I would like to use the variable labels.
library(labelled)
var_label(data$Q01)
Statistics makes me cry
var_label(data$Q02)
My friends will think Im stupid for not being able to cope with SPSS
var_label(data$Q03)
Standard deviations excite me
var_label(data$Q04)
I dream that . . .
along with associated value labels
val_labels(data$Q01)
Strongly agree Agree Neither Disagree Strongly disagree Not answered
1 2 3 4 5 9
I tried using label = as_factor(n) but that didn't work.
We may extract the labels and then do a join
library(forcats)
library(haven)
library(dplyr)
library(tidyr)
library(labelled)
subdat <- data %>%
select(Q01:Q04)
d1 <- subdat %>%
summarise(across(everything(), var_label)) %>%
pivot_longer(everything())
subdat %>%
pivot_longer(everything(), values_to = 'val') %>%
left_join(d1, by = 'name') %>%
mutate(name = value, value = NULL) %>%
count(name, val) %>%
mutate(n = n/sum(n)*100, round = 1) %>%
mutate(n = round(n, 2)) %>%
ungroup %>%
mutate(labels = names(val_labels(val)[val])) %>%
ggplot(aes(x=name, y=n, fill=labels)) +
geom_col() +
geom_text(aes(label=as_factor(n)),
position=position_stack(.5)) +
coord_flip() +
theme(aspect.ratio = 1/3) +
scale_fill_brewer(palette = "Set2")
-output
I have 16000 ish missing persons data that I am trying to order by Count and then plot on a graph. this is the code i am using. I am wanting to plot only the top ten.
mp.city <-mp.All %>%
group_by(State, City, Sex) %>%
summarise(Count = n())
mp.city %>%
arrange(desc(Count)) %>%
slice(1:10) %>%
ggplot(aes(y = City)) +
geom_bar()
the code will run but the plot is garbage. Any help would be amazing thank you!
I think you can manage it con head():
url<-'https://raw.githubusercontent.com/kitapplegate/fall2020/master/mpAll.csv'
mp.All<-read.csv(url)
library(ggplot2)
library(dplyr)
mp.city <-mp.All %>%
group_by(State, City, Sex) %>%
summarise(Count = n())
mp.city %>%
# sort
arrange(desc(Count)) %>%
# top 10 overall
head(10) %>%
# plot ordered
ggplot(aes(x = reorder(City,Count), y = Count))+
geom_bar( stat = "identity") +
# flipped
coord_flip() +
# label for x axis (flipped)
xlab("City")
P.S.
Next time try to share your data with dput(head(yourdata)) and posting the result, it's way better.
I would like to sort by ggplot facet_wrap by color.
For example, in this demo code, the color corresponds to groups A, B, C. I am looking to have all the red plots next to each other, and same for the blue and green plots.
I tried sorting my data by group but ggplot seems to switch the order when plotting.
library(tidyverse)
set.seed(42)
# Generate example data frame
id <- 1:15
data <- map(id, ~rnorm(10))
date <- map(id, ~1:10)
group <- map_chr(id, ~sample(c('a','b','c'), size=1))
df <- tibble(id=id, data=data, date=date, group=group) %>% unnest(cols = c(data, date))
# Generate plot
df %>%
arrange(group) %>%
ggplot(mapping = aes(x=date, y=data, color=group)) +
geom_line() +
geom_point() +
facet_wrap(~ id)
This could help:
library(tidyverse)
set.seed(42)
# Generate example data frame
id <- 1:15
data <- map(id, ~rnorm(10))
date <- map(id, ~1:10)
group <- map_chr(id, ~sample(c('a','b','c'), size=1))
df <- tibble(id=id, data=data, date=date, group=group) %>% unnest(cols = c(data, date))
df2 <- df %>% mutate(id=factor(id))%>%
group_by(group) %>%
mutate(N = n()) %>%
ungroup() %>%
mutate(id = fct_reorder(id, N))
# Generate plot
df2 %>%
arrange(group) %>%
ggplot(mapping = aes(x=date, y=data, color=group)) +
geom_line() +
geom_point() +
facet_wrap(~ id)
This would be a way (would have to get rid of the double title though):
df %>%
arrange(group) %>%
ggplot(mapping = aes(x=date, y=data, color=group)) +
geom_line() +
geom_point() +
facet_wrap(~ group + id)
I have a regular boxplot in ggplot2:
# working example
library(ggplot2)
mtcars %>%
mutate(cyl=as.factor(cyl)) %>%
mutate(vs=as.factor(vs)) %>%
ggplot(aes(y=mpg, x=cyl)) +
geom_boxplot(aes(colour=vs))
It looks like this:
However, when I create an object and pass it to plotly, I lose the dodge position:
library(plotly)
mtcars_boxplot <-
mtcars %>%
mutate(cyl=as.factor(cyl)) %>%
mutate(vs=as.factor(vs)) %>%
ggplot(aes(y=mpg, x=cyl)) +
geom_boxplot(aes(colour=vs))
mtcars_boxplot %>%
ggplotly()
It looks like this:
I tried to add position=position_dodge() & position=position_dodge2() but none of them worked:
library(plotly)
mtcars_boxplot <-
mtcars %>%
mutate(cyl=as.factor(cyl)) %>%
mutate(vs=as.factor(vs)) %>%
ggplot(aes(y=mpg, x=cyl)) +
geom_boxplot(aes(colour=vs), position=position_dodge2())
mtcars_boxplot %>%
ggplotly()
What should I do to keep the dodge position like the first plot?
As suggested here, add layout(boxmode = "group")
library(plotly)
mtcars_boxplot %>%
ggplotly() %>%
layout(boxmode = "group")
I would like to create one separate plot per group in a data frame and include the group in the title.
With the iris dataset I can in base R and ggplot do this
plots1 <- lapply(split(iris, iris$Species),
function(x)
ggplot(x, aes(x=Petal.Width, y=Petal.Length)) +
geom_point() +
ggtitle(x$Species[1]))
Is there an equivalent using dplyr?
Here's an attempt using facets instead of title.
p <- ggplot(data=iris, aes(x=Petal.Width, y=Petal.Length)) + geom_point()
plots2 = iris %>% group_by(Species) %>% do(plots = p %+% . + facet_wrap(~Species))
where I use %+% to replace the dataset in p with the subset for each call.
or (working but complex) with ggtitle
plots3 = iris %>%
group_by(Species) %>%
do(
plots = ggplot(data=.) +
geom_point(aes(x=Petal.Width, y=Petal.Length)) +
ggtitle(. %>% select(Species) %>% mutate(Species=as.character(Species)) %>% head(1) %>% as.character()))
The problem is that I can't seem to set the title per group with ggtitle in a very simple way.
Thanks!
Use .$Species to pull the species data into ggtitle:
iris %>% group_by(Species) %>% do(plots=ggplot(data=.) +
aes(x=Petal.Width, y=Petal.Length) + geom_point() + ggtitle(unique(.$Species)))
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
plots3 <- iris %>%
group_by(Species) %>%
group_map(~ ggplot(.) + aes(x=Petal.Width, y=Petal.Length) + geom_point() + ggtitle(.y[[1]]))
length(plots3)
#> [1] 3
# for example, the second plot :
plots3[[2]]
Created on 2021-11-19 by the reprex package (v2.0.1)
This is another option using rowwise:
plots2 = iris %>%
group_by(Species) %>%
do(plots = p %+% .) %>%
rowwise() %>%
do(x=.$plots + ggtitle(.$Species))