specific fill order in geom_area - r

Happy new year! Consider this simple example:
> df <- tibble(type = c('0_10','0_9','0_8','0_10','0_9','0_8','1_10','1_9','1_8','1_10','1_9','1_8'),
+ time = c(1,1,1,2,2,2,1,1,1,2,2,2),
+ value = c(2,3,4,2,3,6,-2,-3,-4,-2,-3,-5))
> df
# A tibble: 12 x 3
type time value
<chr> <dbl> <dbl>
1 0_10 1 2
2 0_9 1 3
3 0_8 1 4
4 0_10 2 2
5 0_9 2 3
6 0_8 2 6
7 1_10 1 -2
8 1_9 1 -3
9 1_8 1 -4
10 1_10 2 -2
11 1_9 2 -3
12 1_8 2 -5
I am creating a plot that stacks the values of value over time, by type. I would like to obtain the same color for the type 0_10 and 1_10, another color for 0_9 and 1_9 and another color for 0_8 and 1_8.
Unfortunately, ggplot does not seem to use the factor ordering I am asking for. You can see below that 0_10 is purple while 1_10 is green... They should have the same color.
mylevels = c('0_10','0_9','0_8','1_10','1_9','1_8')
df %>%
ggplot(aes(x = time)) +
geom_area(inheris.aes = FALSE,
data = . %>% dplyr::filter(str_detect(type, '0_')),
aes(y = value,
fill = factor(type, levels = mylevels)),
position = 'stack', color = 'black')+
scale_fill_viridis_d() +
geom_area(inheris.aes = FALSE,
data = . %>% dplyr::filter(str_detect(type, '1_')),
aes(y = value, fill = factor(type, levels = mylevels)),
position = 'stack', color = 'black')
Any idea?
Thanks!

I'd create a new column from the last numbers from your type column for the fill. and create groups by type.
library(tidyverse)
df <- tibble(type = c('0_10','0_9','0_8','0_10','0_9','0_8','1_10','1_9','1_8','1_10','1_9','1_8'), time = c(1,1,1,2,2,2,1,1,1,2,2,2), value = c(2,3,4,2,3,6,-2,-3,-4,-2,-3,-5))
df %>%
mutate(colvar = gsub("^*._","", type)) %>%
ggplot(aes(x = time)) +
geom_area(aes(y = value,
fill = colvar,
group = type),
position = 'stack', color = 'black') +
scale_fill_viridis_d()
If you want to visualise your information of your first "type number", you need to make it another aesthetic, for example alpha:
update new colvar variable as factor with ordered levels
df %>%
mutate(colvar = factor(gsub("^*._","", type), levels = 8:10),
alphavar = as.integer(substr(type, 1, 1))) %>%
ggplot(aes(x = time)) +
geom_area(aes(y = value,
fill = colvar,
group = type,
alpha = alphavar),
position = 'stack', color = 'black') +
scale_fill_viridis_d() +
scale_alpha_continuous(breaks = 0:1, range = c(0.7,1) )

Related

barplot with different factor order for each x-axis tick

I was answering this question where #Léo wanted a barplot with stat = "identity" and position = "identity". This causes the bars (one for every value of the fill aesthetic) to get on top of eachother, making some to get hidden:
His solution was to set alpha = 0.5, but he didn't liked the result as the colors mixed in different ways in each x-axis tick. Thus, i figured that the solution would be to have a different color ordering for each x-axis tick, but i don't know how to do it in ggplot.
What I've tried:
Dummy data:
library(tidyverse)
set.seed(7)
df = tibble(
categories = rep(c("a", "b", "c"), each = 3) %>% factor(),
xaxis = rep(1:3, 3) %>% factor(),
yaxis = runif(9))
What plotted the "original" graph, shown above:
ggplot() +
geom_bar(aes(xaxis, yaxis, fill = categories), df,
stat = "identity", position = "identity")
My attempt: changing the categories levels order and creating a different geom_bar for each x-axis value with a for loop:
g = ggplot()
for(x in unique(df$xaxis)){
df.x = df %>% filter(xaxis == x) %>% mutate(categories = fct_reorder(categories, yaxis))
g = g + geom_bar(aes(xaxis, yaxis, fill = categories), df.x,
stat = "identity", position = "identity")}
plot(g)
The levels on df.x actually change to the correct order for every iteration, but the same graph as before gets produced.
I draw a traditional overlapping plot and (if i understood correctly) your desired plot below to compare results:
library(tidyverse)
set.seed(7)
df = tibble(
categories = rep(c("a", "b", "c"), each = 3) %>% factor(),
xaxis = rep(1:3, 3) %>% factor(),
yaxis = runif(9))
ggplot() +
geom_bar(aes(xaxis, yaxis, fill = categories, group=categories), df, alpha=0.8,
stat = "identity", position = position_dodge(width=0.3,preserve = "single"))
df<-df %>% group_by(xaxis) %>% mutate(rank=rank(-yaxis)) %>%
pivot_wider(values_from=yaxis, names_from = rank, values_fill = 0,
names_sort = T, names_prefix = "rank")
print(df)
#> # A tibble: 9 × 5
#> # Groups: xaxis [3]
#> categories xaxis rank1 rank2 rank3
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 a 1 0.989 0 0
#> 2 a 2 0 0.398 0
#> 3 a 3 0 0 0.116
#> 4 b 1 0 0 0.0697
#> 5 b 2 0 0 0.244
#> 6 b 3 0.792 0 0
#> 7 c 1 0 0.340 0
#> 8 c 2 0.972 0 0
#> 9 c 3 0 0.166 0
g <- reduce(
map(paste0("rank",1:3),
~geom_bar(aes(xaxis, .data[[.x]], fill=categories), stat="identity", position="identity")),
`+`, .init = ggplot(df) )
g
Created on 2022-11-02 with reprex v2.0.2
EDIT
It is easier, thanks to Park and this post
set.seed(7)
df = tibble(
categories = rep(c("a", "b", "c"), each = 3) %>% factor(),
xaxis = rep(1:3, 3) %>% factor(),
yaxis = runif(9))
df %>% group_by(xaxis) %>% arrange(rank(-yaxis)) %>%
ggplot() + geom_bar(aes(xaxis, yaxis, fill=categories), stat="identity", position="identity")
How about this?
df %>%
arrange(xaxis, yaxis) %>%
group_by(xaxis) %>%
mutate(yaxis = yaxis - lag(yaxis, default = 0)) %>%
ggplot() +
geom_bar(aes(xaxis, yaxis, fill = categories),
stat = "identity", position = "stack")

Plot multiple variable in the same bar plot

With my dataframe that looks like this (I have in total 1322 rows) :
I'd like to make a bar plot with the percentage of rating of the CFS score. It should look similar to this :
With this code, I can make a single bar plot for the column cfs_triage :
ggplot(data = df) +
geom_bar(mapping = aes(x = cfs_triage, y = (..count..)/sum(..count..)))
But I can't find out to make one with the three varaibles next to another.
Thank you in advance to all of you that will help me with making this barplot with the percentage of rating for this three variable !(I'm not sure that my explanations are very clear, but I hope that it's the case :))
Your best bet here is to pivot your data into long format. We don't have your data, but we can reproduce a similar data set like this:
set.seed(1)
df <- data.frame(cfs_triage = sample(10, 1322, TRUE, prob = 1:10),
cfs_silver = sample(10, 1322, TRUE),
cfs_student = sample(10, 1322, TRUE, prob = 10:1))
df[] <- lapply(df, function(x) { x[sample(1322, 300)] <- NA; x})
Now the dummy data set looks a lot like yours:
head(df)
#> cfs_triage cfs_silver cfs_student
#> 1 9 NA 1
#> 2 8 4 2
#> 3 NA 8 NA
#> 4 NA 10 9
#> 5 9 5 NA
#> 6 3 1 NA
If we pivot into long format, then we will end up with two columns: one containing the values, and one containing the column name that the value belonged to in the original data frame:
library(tidyverse)
df_long <- df %>%
pivot_longer(everything())
head(df_long)
#> # A tibble: 6 x 2
#> name value
#> <chr> <int>
#> 1 cfs_triage 9
#> 2 cfs_silver NA
#> 3 cfs_student 1
#> 4 cfs_triage 8
#> 5 cfs_silver 4
#> 6 cfs_student 2
This then allows us to plot with value on the x axis, and we can use name as a grouping / fill variable:
ggplot(df_long, aes(value, fill = name)) +
geom_bar(position = 'dodge') +
scale_fill_grey(name = NULL) +
theme_bw(base_size = 16) +
scale_x_continuous(breaks = 1:10)
#> Warning: Removed 900 rows containing non-finite values (`stat_count()`).
Created on 2022-11-25 with reprex v2.0.2
Maybe you need something like this: The formatting was taken from #Allan Cameron (many Thanks!):
library(tidyverse)
library(scales)
df %>%
mutate(id = row_number()) %>%
pivot_longer(-id) %>%
group_by(id) %>%
mutate(percent = value/sum(value, na.rm = TRUE)) %>%
mutate(percent = ifelse(is.na(percent), 0, percent)) %>%
mutate(my_label = str_trim(paste0(format(100 * percent, digits = 1), "%"))) %>%
ggplot(aes(x = factor(name), y = percent, fill = factor(name), label = my_label))+
geom_col(position = position_dodge())+
geom_text(aes(label = my_label), vjust=-1) +
facet_wrap(. ~ id, nrow=1, strip.position = "bottom")+
scale_fill_grey(name = NULL) +
scale_y_continuous(labels = scales::percent)+
theme_bw(base_size = 16)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

label mean lines in ggplot that are mapped in a group

I have density plots for each shift and year. The means are plotted by grouping in a df called mu. I also add vertical reference lines which I can label without issue but I cannot seem to get the labels on the grouped vertical lines. You will see my latest attempt which throws an error "Aesthetics must be either length 1 or the same as the data (134): x"
My code
library(ggplot2)
library(dplyr)
df <- read.csv("f4_bna_no_cup.csv")
head(df)
ï..n yr s ys x
1 1 2021 1 2021-1 116.83
2 2 2021 1 2021-1 114.83
3 3 2021 1 2021-1 115.50
4 4 2021 1 2021-1 115.42
5 5 2021 1 2021-1 115.58
6 6 2021 1 2021-1 115.58
#summarize means by ys (year-shift)
mu <- df %>%
group_by(ys,s) %>%
summarise(grp.mean = mean(x))
mu
ys s grp.mean
<chr> <int> <dbl>
1 2021-1 1 116.
2 2021-2 2 117.
3 2022-1 1 114.
4 2022-2 2 115.
llab<-mu
shift <- c("Shift 1", "Shift 2")
#density charts on df
ggplot(data=df, aes(x=x,group =ys, fill = yr, color = yr)) +
geom_density(alpha = 0.4) +
scale_x_continuous(limits=c(112,120))+
geom_vline(aes(xintercept = grp.mean), data = mu, linetype = "dashed", size = 0.5) +
geom_text(aes(x=llab$grp.mean, y=.6), label = llab$ys) + #this throws the error
geom_vline(aes(xintercept=114.8), linetype="dashed", size=0.5, color = 'green3') +
geom_text(aes(x=114.8, y=.6), label = "Target", angle = 90, color="black",size=3) +
geom_vline(aes(xintercept=114.1), linetype="solid", size=0.5, color = 'limegreen') +
geom_text(aes(x=114.1, y=.55), label = "Potential", angle = 90, color="black",size=3 ) +
geom_vline(aes(xintercept=113.4), linetype="solid", size=0.5, color = 'firebrick3') +
geom_text(aes(x=113.4, y=.62), label = "Label wt", angle = 90,
color="black",size=3, family = "Times New Roman", vjust=0) +
facet_grid(
.~s,
labeller = labeller(
s = c(`1` = "Shift 1", `2` = "Shift 2")
))+
theme_light()+
theme(legend.position = "none")
Output so far...I'm so close.
Persistence pays off. I figured it out and thought I would share it in case someone else has a similar problem:
All code remains the same as in my question except a slight change to grouping for the mu df, AND replace the line that I noted as throwing the error as follows:
#small change to group_by, retaining yr
mu <- df %>%
group_by(yr,s,ys) %>%
summarise(grp.mean = mean(x))
Replace: geom_text(aes(x=llab$grp.mean, y=.6), label = llab$ys), with
geom_text(data = mu, aes(label = yr), x = mu$grp.mean, y = .60, color = "black", angle = 90, vjust = 0)

R - (ggplot2 library) - Legends not showing on graphs

What I'm doing
I'm using a library for R called ggplot2, which allows for a lot of different options for creating graphics and other things. I'm using that to display two different data sets on one graph with different colours for each set of data I want to display.
The Problem
I'm also trying to get a legend to to show up in my graph that will tell the user which set of data corresponds to which colour. So far, I've not been able to get it to show.
What I've tried
I've set it to have a position at the top/bottom/left/right to make sure nothing was making it's position to none by default, which would've hidden it.
The Code
# PDF/Plot generation
pdf("activity-plot.pdf")
ggplot(data.frame("Time"=times), aes(x=Time)) +
#Data Set 1
geom_density(fill = "#1A3552", colour = "#4271AE", alpha = 0.8) +
geom_text(x=mean(times)-1, y=max(density(times)$y/2), label="Mean {1} Activity", angle=90, size = 4) +
geom_vline(aes(xintercept=mean(times)), color="cyan", linetype="dashed", size=1, alpha = 0.5) +
# Data Set 2
geom_density(data=data.frame("Time"=timesSec), fill = "gray", colour = "orange", alpha = 0.8) +
geom_text(x=mean(timesSec)-1, y=max(density(timesSec)$y/2), label="Mean {2} Activity", angle=90, size = 4) +
geom_vline(aes(xintercept=mean(timesSec)), color="orange", linetype="dashed", size=1, alpha = 0.5) +
# Main Graph Info
labs(title="Activity in the past 48 hours", subtitle="From {DATE 1} to {DATE 2}", caption="{LOCATION}") +
scale_x_continuous(name = "Time of Day", breaks=seq(c(0:23))) +
scale_y_continuous(name = "Activity") +
theme(legend.position="top")
dev.off()
Result
As pointed out by #Ben, you should pass the color into an aes in order to get the legend being displayed.
However, a better way to get a ggplot is to merge your two values "Time" and "Timesec" into a single dataframe and reshape your dataframe into a longer format. Here, to illustrate this, I created this dummy dataframe:
Time = sample(1:24, 200, replace = TRUE)
Timesec = sample(1:24, 200, replace = TRUE)
df <- data.frame(Time, Timesec)
Time Timesec
1 22 23
2 21 9
3 19 9
4 10 6
5 7 24
6 15 9
... ... ...
So, the first step is to reshape your dataframe into a longer format. Here, I'm using pivot_longer function from tidyr package:
library(tidyr)
library(dplyr)
df %>% pivot_longer(everything(), names_to = "var",values_to = "val")
# A tibble: 400 x 2
var val
<chr> <int>
1 Time 22
2 Timesec 23
3 Time 21
4 Timesec 9
5 Time 19
6 Timesec 9
7 Time 10
8 Timesec 6
9 Time 7
10 Timesec 24
# … with 390 more rows
To add geom_vline and geom_text based on the mean of your values, a nice way of doing it easily is to create a second dataframe gathering the mean and the maximal density values needed to be plot:
library(tidyr)
library(dplyr)
df_lab <- df %>% pivot_longer(everything(), names_to = "var",values_to = "val") %>%
group_by(var) %>%
summarise(Mean = mean(val),
Density = max(density(val)$y))
# A tibble: 2 x 3
var Mean Density
<chr> <dbl> <dbl>
1 Time 11.6 0.0555
2 Timesec 12.1 0.0517
So, using df and df_lab, you can generate your entire plot. Here, we passed color and fill arguments into the aes and use scale_color_manual and scale_fill_manual to set appropriate colors:
library(dplyr)
library(tidyr)
library(ggplot2)
df %>% pivot_longer(everything(), names_to = "var",values_to = "val") %>%
ggplot(aes(x = val, fill = var, colour = var))+
geom_density(alpha = 0.8)+
scale_color_manual(values = c("#4271AE", "orange"))+
scale_fill_manual(values = c("#1A3552", "gray"))+
geom_vline(inherit.aes = FALSE, data = df_lab,
aes(xintercept = Mean, color = var), linetype = "dashed", size = 1,
show.legend = FALSE)+
geom_text(inherit.aes = FALSE, data = df_lab,
aes(x = Mean-0.5, y = Density/2, label = var, color = var), angle = 90,
show.legend = FALSE)+
labs(title="Activity in the past 48 hours", subtitle="From {DATE 1} to {DATE 2}", caption="{LOCATION}") +
scale_x_continuous(name = "Time of Day", breaks=seq(c(0:23))) +
scale_y_continuous(name = "Activity") +
theme(legend.position="top")
Does it answer your question ?

ggplot sf_geom create custom border between multiple area

I want create border between multiple sub part of my map like this answer :
https://stackoverflow.com/a/49523256/9829458
However, It does not work in my R. Only external border of this map was drawn. And when I use this code on my dataset, I have the same problem...
My data :
city_code Name Long Lat Groups
<chr> <chr> <dbl> <dbl> <dbl>
1 34001 ABEI… 724751. 6262333. 9
2 34002 ADIS… 734961. 6270688. 10
3 34003 AGDE 739245. 6245728. 7
4 34004 AGEL 688135. 6249905. 4
5 34005 AGON… 758530. 6311345. 20
6 34006 AIGNE 683215. 6247000. 4
7 34007 AIGU… 685638. 6249976. 4
8 34008 LES … 705573. 6274482. 6
9 34009 ALIG… 727555. 6263258. 9
10 34010 ANIA… 747789. 6287511. 18
My map :
read_sf("Map.shp") %>%
mutate(Groups = as.factor(Groups)) %>%
mutate(Groups = factor(Groups, levels = c(paste0(1:23)))) %>%
ggplot() +
geom_sf(aes(fill = Groups)) +
theme_bw()
So, in my case, I want draw "Groups" border on my map while seeing cities border (and conserve fill = Groups colors).
Solution was edited in the original post :
read_sf("Map.shp") %>%
mutate(Groups = as.factor(Groups)) %>%
mutate(Groups = factor(Groups, levels = c(paste0(1:23)))) %>%
ggplot() +
geom_sf(aes(fill = Groups), size = 0.4) +
geom_sf(fill = "transparent", color = "Black", size = 1, data = . %>% group_by(Groups) %>% summarise()) +
theme_bw()

Resources