I have similar data like this :
Dat<-read.table (text=" Class Value
0 8
1 10
0 9
2 12
2 13
1 10
3 15
2 12
3 12
4 17
4 19
1 13
2 12
4 10
", header=TRUE)
A= 0, B=1, C=2, D=3, E=4
I have tried to sketch my plot as follows, but I failed to plot it using geom_line.
The sketch is hypothetical and may not reflect the hypothetical data.
The X-axis shows a range of the values according to the groups, i.e., A, B, C, D, E. For example, in the data, E has a range from 10 and 19.
Another option using geom_linerange like this:
library(dplyr)
library(ggplot2)
Dat %>%
mutate(Class = as.factor(Class)) %>%
group_by(Class) %>%
summarise(x_min = min(Value),
x_max = max(Value)) %>%
ggplot(aes(y = Class, colour = Class, group = Class)) +
geom_linerange(aes(xmin = x_min, xmax = x_max)) +
scale_y_discrete(labels=c("A","B","C","D", "E")) +
theme_classic() +
labs(x = "Value", y = "Class", color = "") +
theme(legend.position = "none")
Created on 2023-01-01 with reprex v2.0.2
Just wanted to throw a base R plot() approach in there for posterity though I see this is tagged with ggplot:
plot(Dat[,2:1], type = "n", xlim = c(5, 20), axes = FALSE, ylab = "")
sapply(unique(Dat$Class), function(x)
segments(x0 = min(Dat$Value[Dat$Class %in% x]),
x1 = max(Dat$Value[Dat$Class == x]),
y0 = Dat$Class[Dat$Class == x][1], col = x+1))
axis(1)
axis(2, at = unique(Dat$Class), labels = LETTERS[unique(Dat$Class)+1], las = 1)
mtext("Class", side = 2, las = 1, adj = 2)
A bit tongue-in-cheek, but this replicates your plot style with minimal code, and uses geom_line, as requested.
library(ggplot2)
ggplot(within(Dat, Class <- LETTERS[Class + 1]),
aes(Value, Class, color = Class)) +
geom_line(linewidth = 4, lineend = "round") +
scale_color_manual(values = c("#ed2028", "#22b14c", "#ffaec9", "#a349a4",
"#c4c4c4"), guide = "none") +
scale_x_continuous(breaks = c(5, 10, 15, 20), limits = c(5, 20)) +
theme_classic(base_size = 30)
library(tidyverse)
library(ggalt)
Dat %>%
group_by(Class) %>%
summarise(min = min(Value),
max = max(Value)) %>%
ggplot() +
geom_dumbbell(aes(
y = Class,
x = min,
xend = max,
color = factor(Class)
), size = 1) +
theme_light() +
xlab("Value")
Related
I have below dataframe and want to draw a ggplot with stacked bar for lower and upper limits with each data points.
library(ggplot2)
dat = rbind(data.frame('val' = c(3,4), 'val0' = c(1,2), 'val2' = c(5,6), 'name' = c('A', 'A')),
data.frame('val' = c(3,4)+2, 'val0' = c(1,2)+2, 'val2' = c(5,6)+2, 'name' = c('B', 'B')))
dat
> dat
val val0 val2 name
1 3 1 5 A
2 4 2 6 A
3 5 3 7 B
4 6 4 8 B
I am able to obtain below ggplot
ggplot(dat, aes(x = name)) +
geom_point(aes(y = val, color = name), size = 10, alpha = 0.3)
However I want to get below plot (hand-drawn - just for illustration)
Is there any way to draw above plot with my data?
Any pointer will be very helpful
To get the rectangles, the easiest way is probably to use a modified box plot:
ggplot(within(dat, group <- c(1, 2, 1, 2)), aes(name, val, group = group)) +
geom_boxplot(stat = "identity", alpha = 0.5, color = "#00000030",
aes(ymin = val0, lower = val0, fill = name,
group = interaction(name, group),
ymax = val2, upper = val2, middle = val0),
width = 0.5) +
geom_point(position = position_dodge(width = 0.5),
aes(fill = name), shape = 21, size = 4) +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 20)
Maybe you want to use a dodged geom_pointrange like this:
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.1.2
dat = rbind(data.frame('val' = c(3,4), 'val0' = c(1,2), 'val2' = c(5,6), 'name' = c('A', 'A')),
data.frame('val' = c(3,4)+2, 'val0' = c(1,2)+2, 'val2' = c(5,6)+2, 'name' = c('B', 'B')))
dat$fac <- factor(unlist(sapply(as.vector(table(dat$name)), seq_len)))
ggplot(dat) +
geom_pointrange(aes(x=name,y=val,colour=name,
ymin=val0,ymax=val2, group = fac), position=position_dodge(width=0.3))
Created on 2022-07-20 by the reprex package (v2.0.1)
I am trying to create boxplot that would compare ob vs A and B at multiple location (I.e., Start, Mid, End) for Top and Low values (10% in this case). I am trying to use the gather, facet_wrap, grid.arrange, ggplot functionality in R but can not put things together. here is my code so far- I would appreciate help moving forward.
library(tidyverse)
library(gridExtra)
DF_1 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("Start",100))
DF_2 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("Mid",100))
DF_3 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("End",100))
DF_1_Top = DF_1[order(DF_1$Ob,decreasing = TRUE),][1:10,]
DF_1_Low = DF_1[order(DF_1$Ob,decreasing = FALSE),][1:10,]
DF_2_Top = DF_2[order(DF_2$Ob,decreasing = TRUE),][1:10,]
DF_2_Low = DF_2[order(DF_2$Ob,decreasing = FALSE),][1:10,]
DF_3_Top = DF_1[order(DF_3$Ob,decreasing = TRUE),][1:10,]
DF_3_Low = DF_1[order(DF_3$Ob,decreasing = FALSE),][1:10,]
DF_Top = rbind(DF_1_Top, DF_2_Top, DF_3_Top)
DF_Low = rbind(DF_1_Low, DF_2_Low, DF_3_Low)
DF_T = gather(DF_Top, key = "Variable", value = "Value", - "loc")
DF_L = gather(DF_Low, key = "Variable", value = "Value", - "loc")
P1 = ggplot(DF_T, aes(x = Variable, y = Value))+
geom_boxplot()+facet_wrap(~loc, nrow = 1)
P2 = ggplot(DF_L, aes(x = Variable, y = Value))+
geom_boxplot()+facet_wrap(~loc, nrow = 1)
grid.arrange(P1,P2, nrow = 2)
Here is a manually drawn figure that i would like to achieve
You could stack all of your data into a single data frame and create a single graph. For example:
d = bind_rows(High=DF_Top, Low=DF_Low, .id='source') %>%
mutate(source=factor(source, levels=c("High","Low")))
d %>%
gather(key, value, Ob:B) %>%
mutate(key = fct_relevel(key, "Ob")) %>%
ggplot(aes(key, value)) +
geom_hline(yintercept=0) +
geom_boxplot() +
facet_grid(source ~ loc, switch="x") +
labs(x="", y="") +
scale_y_continuous(expand=expand_scale(mult=c(0.0, 0.02))) +
theme_classic() +
theme(strip.placement="outside",
strip.background.x=element_rect(colour=NA, fill=NA),
strip.text.x=element_text(size=11, face="bold"))
Responding to your comment, I'm not wild about moving the key labels to a legend, but...
d %>%
gather(key, value, Ob:B) %>%
mutate(key = fct_relevel(key, "Ob")) %>%
ggplot(aes(loc, value, colour=key)) +
geom_hline(yintercept=0) +
geom_boxplot() +
facet_grid(source ~ ., switch="x") +
labs(x="", y="", colour="") +
scale_y_continuous(expand=expand_scale(mult=c(0.0, 0.02))) +
theme_classic() +
theme(legend.position="bottom",
legend.box.margin=margin(t=-20))
This is my df :
df <- data.frame(annee = rep(c(2003,2004), times = 1, each = 3), sps = c("a", "b", "c"), nb = 1:3)
I create a column containing my labels :
df$labels <- paste("nb", df$sps, "=", df$nb)
Then I do my plot :
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data=df, aes(x=8, y=2.5, label= labels), colour="black", inherit.aes=FALSE, parse=FALSE)
But I have a problem with my text in each facet : I would like to have 3 lines (one for each sps).
I tried with the symbol "\n" but I failed in trying to obtain :
"nb a = 1 \n nb b = 2 \n nb c = 3" for each year
Thanks for help
You will have to concatenate what you want broken into several lines into one single string.
newdf <- aggregate(labels ~ annee, data = df, FUN = paste, collapse = "\n")
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data = newdf, aes(x = 8, y = 2, label = labels), color = "black") +
scale_x_continuous(limits = c(0, 11)) +
scale_y_continuous(limits = c(0, 2.25))
You can achieve what you want by creating a separate data.frame for your labels:
library(tidyverse)
df <- data.frame(annee = rep(c(2003,2004),
times = 1, each = 3),
sps = c("a", "b", "c"),
nb = 1:3)
# create labels in separate data.frame
label_df <- df %>%
mutate(labels = paste("nb", sps, "=", nb)) %>%
group_by(annee) %>%
summarise(labels = paste(labels, collapse = "\n")) %>%
mutate(x = 6.5,
y = 2.2)
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~annee) +
geom_text(data = label_df, aes(x = x, y = y, label = labels)) +
coord_cartesian(ylim = c(0, 2.4), xlim = c(1, 8))
I have a changing df and I am grouping different values c.
With ggplot2 I plot them with the following code to get a scatterplott with multiple linear regression lines (geom_smooth)
ggplot(aes(x = a, y = b, group = c)) +
geom_point(shape = 1, aes(color = c), alpha = alpha) +
geom_smooth(method = "lm", aes(group = c, color = c), se = F)
Now I want to display on each geom_smooth line in the plot a label with the value of the group c.
This has to be dynamic, because I can not write new code when my df changes.
Example: my df looks like this
a b c
----------------
1.6 24 100
-1.4 43 50
1 28 100
4.3 11 50
-3.45 5.2 50
So in this case I would get 3 geom_smooth lines in the plot with different colors.
Now I simply want to add a text label to the plot with "100" next to the geom_smooth with the group c = 100 and a text label with "50"to the line for the group c = 50, and so on... as new groups get introduced in the df, new geom_smooth lines are plotted and need to be labeled.
the whole code for the plot:
ggplot(aes(x = a, y = b, group = c), data = df, na.rm = TRUE) +
geom_point(aes(color = GG, size = factor(c)), alpha=0.3) +
scale_x_continuous(limits = c(-200,2300))+
scale_y_continuous(limits = c(-1.8,1.5))+
geom_hline(yintercept=0, size=0.4, color="black") +
scale_color_distiller(palette="YlGnBu", na.value="white") +
geom_smooth(method = "lm", aes(group = factor(GG), color = GG), se = F) +
geom_label_repel(data = labelInfo, aes(x= max, y = predAtMax, label = label, color = label))
You can probably do it if you pick the location you want the lines labelled. Below, I set them to label at the far right end of each line, and used ggrepel to avoid overlapping labels:
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
labelInfo <-
split(df, df$c) %>%
lapply(function(x){
data.frame(
predAtMax = lm(b~a, data=x) %>%
predict(newdata = data.frame(a = max(x$a)))
, max = max(x$a)
)}) %>%
bind_rows
labelInfo$label = levels(df$c)
ggplot(
df
, aes(x = a, y = b, color = c)
) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F) +
geom_label_repel(data = labelInfo
, aes(x= max
, y = predAtMax
, label = label
, color = label))
This method might work for you. It uses ggplot_build to access the rightmost point in the actual geom_smooth lines to add a label by it. Below is an adaptation that uses Mark Peterson's example.
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
p <-
ggplot(df, aes(x = a, y = b, color = c)) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F)
p.smoothedmaxes <-
ggplot_build(p)$data[[2]] %>%
group_by( group) %>%
filter( x == max(x))
p +
geom_text_repel( data = p.smoothedmaxes,
mapping = aes(x = x, y = y, label = round(y,2)),
col = p.smoothedmaxes$colour,
inherit.aes = FALSE)
This came up for me today and I landed on this solution with data = ~fn()
library(tidyverse)
library(broom)
mpg |>
ggplot(aes(x = displ, y = hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~{
nest_by(.x, class) |>
summarize(broom::augment(lm(hwy ~ displ, data = data))) |>
slice_max(order_by = displ, n = 1)
}
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()
Or do it with a function
#' #examples
#' last_lm_points(df = mpg, formula = hwy~displ, group = class)
last_lm_points <- function(df, formula, group) {
# df <- mpg; formula <- as.formula(hwy~displ); group <- sym("class");
x_arg <- formula[[3]]
df |>
nest_by({{group}}) |>
summarize(broom::augment(lm(formula, data = data))) |>
slice_max(order_by = get(x_arg), n = 1)
}
mpg |>
ggplot(aes(displ, hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~last_lm_points(.x, hwy~displ, class)
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()
In the violin plot below, I want to add total number of rows used to draw each plot excluding NA values.
Input:
df <- cbindX(as.data.frame(rep(c(rep("trt", 4*500), rep("trt2",4*500)),2)),
as.data.frame(rnorm(15*500,2)),
as.data.frame(c(rep("A", 8*500), rep("B", 8*500))))
colnames(df) <- c("variable", "value", "mark")
code:
ggplot(df,aes(x=variable,y=value)) + geom_violin(trim = T) + geom_text(aes(x = variable, y = -2, label=nrow(df)),color="red")
Output:
Expected output:
This should help you:
library(dplyr)
count<-df %>% filter(!is.na(value)) %>%
group_by(variable) %>%
summarise(n=n()) %>%
as.data.frame
# variable n
# 1 trt 4000
# 2 trt2 3500
ggplot(df,aes(x=variable,y=value)) + geom_violin(trim = T) +
geom_text(data=count,aes(x = variable, y = -2, label=n),color="red")
would this workout for you
ggplot(df,aes(x=variable,y=value)) + geom_violin(trim = T) + annotate("text", label = "4000", x =1, y = -3, size = 10, colour = "black") + annotate("text", label = "3500", x =2, y = -3, size = 10, colour = "black")