ggplot2: draw lines connecting observations in each group in faceted boxplot - r

I have the following dataframe:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = as.factor(rep(c("X", "Y", "Z"), times = 30)), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2))
I want to get boxplots of X1 for each method and across the three patterns and within each complexity. I use the following plot:
ggplot(dd, aes(x = Pattern, y = X1, fill = Method)) +
facet_grid(~Complexity) +
geom_boxplot() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(nrow=1))
which gives me the attached figure:
Fabulous. However, each observation for method A, B and C are on the same dataset (with indicator nsim) within 'X' (same for the cases within 'Y' and same for within 'Z') and I would like to link the observations (values of X1) between the three methods in each of the three patterns (but not link the three patterns because that would be meaningless).
Specifically, I want a plot as follows (with hand-drawn lines imagined to connect the different simulations IDS here):
So, I tried the following, however,I am getting the boxplots to no longer be bunched together for each x (and the x axes is now also messed up).
library(ggplot2)
ggplot(dd, aes(x = interaction(Method,Pattern), y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method), position = "identity") +
geom_line(aes(x = interaction(Method,Pattern), y = X1,
group=interaction(Pattern,nsim)),
size = 0.15, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = interaction(Method,Pattern),
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
geom_point(aes(x = interaction(Method,Pattern),
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "Y", "", "", "Z","")) + xlab("Pattern")
Which gives the following:
but the boxplots for each setting of Pattern gets separated (I would like them bunched together) and also the x-axes gets messed up (which I have somewhat of an inelegant fix). So, the most important thing I need resolved still is the space between the boxplots inside each Pattern (which I would like to be smaller) than the space between boxplots of different Patterns.
How do I fix this? Many thanks for your suggestions.

Is this what you're looking for?
ggplot(dd, aes(x = Pattern, y = X1, fill = Method)) +
geom_line(aes(group=interaction(Method,nsim)),
position = position_dodge(width = 0.8),
size = 0.1, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
geom_boxplot() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(nrow=1)) +
theme_light()

I am not sure if there are better methods of doing this, but I did the following:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2))
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
geom_line(aes(x = dummy, y = X1,
group=interaction(Pattern,nsim)),
size = 0.15, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Dark2")
which yields the following:
I would like the boxplots to be closer to each other, and would like some advice on how to do this, if anyone has any ideas. Perhaps the next step will be to write this up as a general function.

Related

Customize the position of `geom_rug`

Below is a working example
library(ggplot2)
set.seed(926)
df <- data.frame(expression = rnorm(900),
time = c(rnorm(300), rnorm(300, 1, 2), rnorm(300, 2,0.5)),
membership = factor(rep(1:3, each = 300)))
ggplot(df, aes(x = time, y = expression, fill = membership)) +
geom_point(shape=21, size = 3) +
geom_rug(data = subset(df, membership ==3), sides = "b", color = "green", length = unit(1.5, "cm")) +
geom_rug(data = subset(df, membership ==2), sides = "b", color = "blue", length = unit(1, "cm")) +
geom_rug(data = subset(df, membership ==1), sides = "b", color = "red") +
scale_y_continuous(expand = c(0.3, 0))
My hope is something like
.
Note that I know the options of outside = TRUE, side = "tb" out there. But placing all rug plots at the bottom is what I really hope for.
geom_rug is designed to be drawn at the margins of a plot. It's probably best to use geom_point with a custom symbol in this case:
ggplot(df, aes(x = time, y = expression, fill = membership)) +
geom_point(shape=21, size = 3) +
geom_point(aes(y = -as.numeric(membership) - 2.5, color = membership),
shape = "|", size = 8) +
geom_hline(yintercept = -3) +
theme_classic(base_size = 20) +
scale_y_continuous(breaks = c(-2, 0, 2))
I don't think the position of geom_rug() can be easily customised. I'd recommend to use geom_segment() instead to draw the rugs like you'd want them.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.2.2
set.seed(926)
df <- data.frame(expression = rnorm(900),
time = c(rnorm(300), rnorm(300, 1, 2), rnorm(300, 2,0.5)),
membership = factor(rep(1:3, each = 300)))
# Helper variables
limits <- range(df$expression)
step <- diff(limits) * 0.1
size <- 0.45 * step
ggplot(df, aes(x = time, y = expression, fill = membership)) +
geom_point(shape=21, size = 3) +
geom_segment(
aes(
colour = membership,
xend = time,
y = limits[1] - as.numeric(membership) * step + size,
yend = limits[1] - as.numeric(membership) * step - size
)
)
Created on 2022-12-12 by the reprex package (v2.0.1)

ggplot trying to make a Cleveland plot but I cannot get a legend

library(ggplot2)
library(ggthemes)
data <- read.csv('/Users/zbhay/Documents/r-data.csv', header = 1)
zb <- ggplot(data) +
geom_segment( aes(x=x, xend=x, y=value1, yend=value2), color="black")+
geom_point( aes(x=x, y=value1), color=rgb(0.2,0.7,0.1,1), size=4 )+
geom_point( aes(x=x, y=value2), color=rgb(0.7,0.2,0.1,1), size=4 )+
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)
)
zb + labs(title = "Title",
subtitle = "subtitle") +
xlab("Business Functions") +
ylab("# of hours")
legend("left", c("Starting", "Ending"),
box.col = "darkgreen"
)
Hello, here is the code. The CSV file is structured as follows; column A = names, column b = starting number, column c = final number.
I am trying to set up a legend that calls out the final number vs starting number. I have tried and tried but cannot seem to be able to crack it. If anyone knows a fix, I would appreciate it if you could let me know.
As a general rule when using ggplot2 you have to map on aesthetics if you want to get a legend, i.e. instead of setting the colors for your points as arguments map a value on the color aes, e.g. in my code below I map the constant value or category start on the color aes inside aes() for the first geom_point. Afterwards you could use scale_color_manual to assign your desired colors and labels to these "categories" or "values". Finally, the color of the legend box could be set via the theme option legend.background. However, the legend keys themselves have a background color too, which I set to NA via legend.key.
Using some fake random example data:
library(ggplot2)
library(ggthemes)
set.seed(123)
data <- data.frame(x = letters[1:5], value1 = runif(5, 0, 10000), value2 = runif(5, 0, 10000))
ggplot(data) +
geom_segment(aes(x = x, xend = x, y = value1, yend = value2), color = "black") +
geom_point(aes(x = x, y = value1, color = "start"), size = 4) +
geom_point(aes(x = x, y = value2, color = "end"), size = 4) +
coord_flip() +
theme_solarized() +
scale_y_continuous(breaks = seq(0, 10000, by = 500)) +
scale_color_manual(values = c(start = rgb(0.2, 0.7, 0.1, 1), end = rgb(0.7, 0.2, 0.1, 1)), labels = c(start = "Starting", end = "Ending")) +
labs(title = "Title", subtitle = "subtitle", x = "Business Functions", y = "# of hours", color = NULL) +
theme(
legend.key = element_rect(fill = NA),
legend.background = element_rect(fill = "darkgreen")
)

How to make log10 ONLY first y-axis (not secondary y-axis) in ggplot in R

I would like to plot ONLY y-axis1 DATA (left axis, Var1, dotted line) as a log10 scale. The dotted line would therefore look higher on the y-axis and differences between 1 and 2 would be noticeable.
I have tried several things, but does not work ( I believe I am using them in the wrong order/place) such as:
+coord_trans(y='log10')--> empty plot
scale_y_continuous(trans = log10_trans(),... --> makes both Var1 and Var 2 log10
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),labels = trans_format("log10", math_format(10^.x)))--> makes both y axis log10 and removes y-axis2 (Var2)
data<- data.frame(
Day=c(1,2,3,1,2,3,1,2,3),
Name=rep(c(rep("a",3),rep("b",3),rep("c",3))),
Var1=c(1090,484,64010,1090,484,64010,1090,484,64010),
Var2= c(4,16,39,2,22,39,41,10,3))
ggplot(data) +
geom_bar(aes(fill=Name, y=Var2*1000, x=Day),stat="identity", colour="black", position= position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=Var1),stat="identity",color="black", linetype="dotted", size=0.8)+
geom_point(aes(Day, Var1), shape=8)+
labs(title= "",
x="",y=expression('Var1'))+
scale_y_continuous(
sec.axis=sec_axis(~./1000, name= expression(paste("Var2"))))+
theme_classic()+
scale_fill_grey(start = 1, end=0.1,name = "", labels = c("a", "b", "c"))
I think the easiest way is to have the primary axis be the linear one, but put it on the right side of the plot. Then, you can have the secondary one be your log-transformed axis.
library(ggplot2)
data<- data.frame(
Day=c(1,2,3,1,2,3,1,2,3),
Name=rep(c(rep("a",3),rep("b",3),rep("c",3))),
Var1=c(1090,484,64010,1090,484,64010,1090,484,64010),
Var2= c(4,16,39,2,22,39,41,10,3))
# Max of secondary divided by max of primary
upper <- log10(3e6) / 80
breakfun <- function(x) {
10^scales::extended_breaks()(log10(x))
}
ggplot(data) +
geom_bar(aes(fill=Name, y=Var2, x=Day),
stat="identity", colour="black", position= position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=log10(Var1) / upper),
stat="identity",color="black", linetype="dotted", size=0.8)+
geom_point(aes(Day, log10(Var1) / upper), shape=8)+
labs(title= "",
x="",y=expression('Var1'))+
scale_y_continuous(
position = "right",
name = "Var2",
sec.axis = sec_axis(~10^ (. * upper), name= expression(paste("Var1")),
breaks = breakfun)
)+
theme_classic() +
scale_fill_grey(start = 1, end=0.1,name = "", labels = c("a", "b", "c"))
Created on 2022-02-09 by the reprex package (v2.0.1)
Here is a custom breaks function:
br <- function(limits) {
10^(seq(ifelse(limits[1] <= 0,
0,
trunc(log10(limits[1]))),
trunc(log10(limits[2])),
by = 1))}
ggplot(data) +
geom_bar(aes(fill = Name, y = Var2 * 1000, x = Day),
stat = "identity",
colour = "black",
position = position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=Var1),
stat = "identity",
color = "black",
linetype = "dotted",
size = 0.8)+
geom_point(aes(Day, Var1),
shape = 8)+
labs(title = "",
x = "",
y = expression('Var1'))+
scale_y_continuous(
breaks = br,
sec.axis = sec_axis(~./1000, name= expression(paste("Var2"))))+
theme_classic()+
scale_fill_grey(start = 1,
end = 0.1,
name = "",
labels = c("a", "b", "c"))
Results aren't so pretty but you can customize the breaks as you wish.
You absolutely should read the answer #teunbrand linked to in the comment to your question. But for the matter of displaying log values on the left and original values on the right, you can use:
tibble(Day = 1:10,
Val1 =10*Day) %>%
ggplot(aes(x = Day, y = log10(Val1))) +
geom_col() +
scale_y_log10(name = "log(Val1)",
sec.axis = sec_axis(~ 10^., name = "Val1"))

ggplot2: add line and points showing means (stat_summary)

So I am using this data frame:
xym <- data.frame(
Var1 = c("vloga", "odločitve", "dolgoročno",
"krizno", "uživa v", "vloga", "odločitve",
"dolgoročno", "krizno", "uživa v", "vloga",
"odločitve","dolgoročno", "krizno", "uživa v",
"vloga","odločitve", "dolgoročno", "krizno",
"uživa v"),
Var2 = c("Nad","Nad", "Nad", "Nad", "Nad", "Pod",
"Pod", "Pod", "Pod", "Pod", "Enak","Enak",
"Enak", "Enak", "Enak", "Sam.", "Sam.", "Sam.",
"Sam.", "Sam."),
value = c(4, 3, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2.5, 2.5,
2, 3.5 ,5 ,6 ,6 ,5 ,6))
And with this code:
p <- ggplot(xym, aes(x = Var1, y = value, fill = Var2)) + coord_flip()+
theme_bw() + scale_fill_manual(values = c("yellow", "deepskyblue1", "yellowgreen","orchid4")) + xlim(rev(levels(xym$Var1)))+ theme(axis.title=element_blank(),axis.ticks.y=element_blank(),legend.position = "bottom",
axis.text.x = element_text(angle = 0,vjust = 0.4)) +
geom_bar(stat = "identity", width = 0.7, position = position_dodge(width=0.7)) +
geom_text(aes(x = Var1, y =max(value), label = round(value, 2), fill = Var2),
angle = 0, position = position_dodge(width = 0.7), size = 4.2)
p + labs(fill="")
p + stat_summary(fun.y=mean, colour="red", geom="line", aes(group = 1))
I produce output:
But beside the red line which is marking total average by question (i.e. "dolgoročno", "krizno" etc.) I would like to add points and next to the bars as well as labels of the individual question group mean
My output should look something like the picture below, (I did it in paint), where the black dots represent my desired points and the value 3.6 of the first dot is the average of (6,2,4,2.5) and represents my desired value labels.
I've also looked at:
Plot average line in a facet_wrap
ggplot2: line connecting the means of grouped data
How to label graph with the mean of the values using ggplot2
One option would be the following. I followed your code and added a few lines.
# Your code
p <- ggplot(xym, aes(x = Var1, y = value, fill = Var2)) +
coord_flip() +
theme_bw() +
scale_fill_manual(values = c("yellow", "deepskyblue1", "yellowgreen","orchid4")) +
xlim(rev(levels(xym$Var1))) +
theme(axis.title = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 0,vjust = 0.4)) +
geom_bar(stat = "identity", width = 0.7, position = position_dodge(width = 0.7)) +
geom_text(aes(x = Var1, y = max(value), label = round(value, 2), fill = Var2),
angle = 0, position = position_dodge(width = 0.7), size = 4.2)
p + labs(fill = "")
Then, I added the following code. You can add dots changing geom to point in stat_summary. For labels, I chose to get data from ggplot_build() and crated a data frame called foo. (I think there are other ways to do the same job.) Using foo, I added annotation in the end.
p2 <- p +
stat_summary(fun.y = mean, color = "red", geom = "line", aes(group = 1)) +
stat_summary(fun.y = mean, color = "black", geom ="point", aes(group = 1), size = 5,
show.legend = FALSE)
# This is the data for your dots in the graph
foo <- as.data.frame(ggplot_build(p2)$data[[4]])
p2 +
annotate("text", x = foo$x, y = foo$y + 0.5, color = "black", label = foo$y)

ggplot2 legend with two different geom_point

I have the following ggplot graph with circles representing the observed data and the crosses the mean for each treatment :
d <- data.frame(Number = rnorm(12,100,20),
Treatment = rep(c("A","B","C", "D"), each = 3))
av <- aggregate(d["Number"], d["Treatment"], mean)
ggplot(data = d, aes(y = Number, x = Treatment)) +
geom_point(shape = 1, size = 6, color = "grey50") +
geom_point(data=av, shape = 4) +
theme_bw()
I would like to add a legend with the exact same symbols on top of the graphs but I'm a bit lost... I use aes to force the creation of legend and then try to modify it with manual scales but the result is not convincing. I would like to have one grey circle of size 6. That sounds also quite complicated for such a basic thing ... There is probably an easyier solution.
ggplot(data = d, aes(y = Number, x = Treatment)) +
geom_point(aes(shape = "1", size = "6", color = "grey50")) +
geom_point(data=av, aes(shape = "4")) +
theme_bw() +
scale_shape_manual(name = "", values = c(1,4), labels = c("observed values", "mean")) +
scale_size_manual(name = "", values = c(6,1), labels = c("observed values", "mean")) +
scale_color_manual(name = "", values = c("grey50","black"),
labels = c("observed values", "mean")) +
theme(legend.position = "top",
legend.key = element_rect(color = NA))
http://imagizer.imageshack.us/v2/320x240q90/842/4pgj.png
The ggplot2 way would be combining everything into a single data.frame like this:
av$Aggregated <- "mean"
d$Aggregated <- "observed value"
d <- rbind(d, av)
ggplot(data = d, aes(y = Number, x = Treatment,
shape=Aggregated, size=Aggregated, colour=Aggregated)) +
geom_point()
And than customize using manual scales and themes.

Resources