Related
I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
I would like to place at the top of the largest column the x value (goals). So Team A would have the label "3" and Team B the label "2" on top of those respective columns.
Code:
df <- tibble ( team = rep(c('A', 'B'), each = 5),
goals = rep(1:5,2),
prob = c(.10, .15, .25, .20, .15, .20, .30, .20, .10, .05))
df %>%
ggplot(aes(x = goals, y = prob)) +
geom_col() +
facet_wrap(~team)
Another option if you want to do it all in a single pipe would be:
df %>%
group_by(team) %>%
mutate(label = ifelse(prob == max(prob), goals, "")) %>%
ggplot(aes(x = goals, y = prob)) +
geom_col() +
facet_wrap(~team) +
geom_text(aes(label = label), vjust = -0.5)
One option would be to make a separate data frame containing the "top" observations per team using e.g. group_by + slice_max. Afterwards you could pass this dataset to geom_text to add the labels for just the top observations:
df <- data.frame( team = rep(c('A', 'B'), each = 5),
goals = rep(1:5,2),
prob = c(.10, .15, .25, .20, .15, .20, .30, .20, .10, .05))
library(ggplot2)
library(dplyr, warn = FALSE)
df_lab <- df |>
group_by(team) |>
slice_max(prob, n = 1) |>
ungroup()
ggplot(df, aes(x = goals, y = prob)) +
geom_col() +
geom_text(data = df_lab, aes(label = goals), vjust = 0, nudge_y = .005) +
facet_wrap(~team)
How can I show the direction (heading) of observations using ggplot2? Is there a way to adjust shape=17(triangle) so that it "points" to the next time observations?
Example Code
library(ggplot2)
dat <- data.frame(id = c(1, 1, 2, 2, 3, 3),
time = c(1, 2, 1, 2, 1, 2),
x = c(.1, .2, .3, .4, .5, .6),
y = c(.6, .25, .4, .33, .2, .51))
ggplot(dat, aes(x, y, color=factor(id))) +
geom_point(shape=17) +
# geom_line() +
NULL
We can use ggplot2::geom_segment after we reshape the data using
dplyr and tidyr::pivot_wider:
dat <- data.frame(id = c(1, 1, 2, 2, 3, 3),
time = c(1, 2, 1, 2, 1, 2),
x = c(.1, .2, .3, .4, .5, .6),
y = c(.6, .25, .4, .33, .2, .51))
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
pivot_wider(names_from = time, values_from = c(x, y)) %>%
ggplot(aes(x=x_1, y=y_1, color=factor(id))) +
geom_segment(aes(xend = x_2, yend = y_2),
arrow = arrow(length = unit(.3,"cm"))) +
labs(x="x", y="y", color="id")
Edit:
but I just want the arrow pointing without lines.
I'm not sure how we should handle the second point for each id (since it has not direction) but if we want to omit them from the plot we can do:
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
group_by(id) %>%
arrange(id, time) %>%
mutate(x_2 = x + 0.0001 * (lead(x) - x),
y_2 = y + 0.0001 * (lead(y) - y)) %>%
filter(!is.na(x_2)) %>%
ggplot(aes(x=x, y=y, color=factor(id))) +
geom_segment(aes(xend = x_2, yend = y_2),
arrow = arrow(length = unit(.3,"cm"))) +
labs(x="x", y="y", color="id")
Or if we want the arrows to point to the next measurement, independently of the color we can use the code below (now there is only the last point missing because of no direction):
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
arrange(id, time) %>%
mutate(x_2 = x + 0.0001 * (lead(x) - x),
y_2 = y + 0.0001 * (lead(y) - y)) %>%
filter(!is.na(x_2)) %>%
ggplot(aes(x=x, y=y, color=factor(id))) +
geom_segment(aes(xend = x_2, yend = y_2),
arrow = arrow(length = unit(.3,"cm"))) +
labs(x="x", y="y", color="id")
If we want to keep the 'last' measures we could add them in another geom_point layer...
Combining ideas from dario's answer, How to scale a 2D vector and keep direction and Arranging arrows between points nicely in ggplot2
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
pivot_wider(names_from = time, values_from = c(x, y)) %>%
group_by(id) %>%
mutate(x_v = x_2 - x_1, y_v = y_2 - y_1) %>%
mutate_at(vars("x_v", "y_v"),
list(units =~ (. / sqrt((x_v)^2 + (y_v)^2))/1000)) %>%
ggplot(aes(x=x_1, y=y_1, colour = factor(id))) +
geom_segment(aes(xend = x_1 + x_v_units, yend = y_1 + y_v_units),
show.legend = F,
arrow = arrow(length = unit(.3,"cm"), type="closed", angle = 20)) +
geom_point(data = (dat %>% filter(time == 2)), aes(x, y), shape=15, size=2) +
labs(x="x", y="y", colour="id") +
theme_bw()
Data:
dat <- data.frame(id = c(1, 1, 2, 2, 3, 3),
time = c(1, 2, 1, 2, 1, 2),
x = c(.1, .2, .3, .4, .5, .6),
y = c(.6, .25, .4, .33, .2, .51))
I am trying to create boxplot that would compare ob vs A and B at multiple location (I.e., Start, Mid, End) for Top and Low values (10% in this case). I am trying to use the gather, facet_wrap, grid.arrange, ggplot functionality in R but can not put things together. here is my code so far- I would appreciate help moving forward.
library(tidyverse)
library(gridExtra)
DF_1 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("Start",100))
DF_2 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("Mid",100))
DF_3 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("End",100))
DF_1_Top = DF_1[order(DF_1$Ob,decreasing = TRUE),][1:10,]
DF_1_Low = DF_1[order(DF_1$Ob,decreasing = FALSE),][1:10,]
DF_2_Top = DF_2[order(DF_2$Ob,decreasing = TRUE),][1:10,]
DF_2_Low = DF_2[order(DF_2$Ob,decreasing = FALSE),][1:10,]
DF_3_Top = DF_1[order(DF_3$Ob,decreasing = TRUE),][1:10,]
DF_3_Low = DF_1[order(DF_3$Ob,decreasing = FALSE),][1:10,]
DF_Top = rbind(DF_1_Top, DF_2_Top, DF_3_Top)
DF_Low = rbind(DF_1_Low, DF_2_Low, DF_3_Low)
DF_T = gather(DF_Top, key = "Variable", value = "Value", - "loc")
DF_L = gather(DF_Low, key = "Variable", value = "Value", - "loc")
P1 = ggplot(DF_T, aes(x = Variable, y = Value))+
geom_boxplot()+facet_wrap(~loc, nrow = 1)
P2 = ggplot(DF_L, aes(x = Variable, y = Value))+
geom_boxplot()+facet_wrap(~loc, nrow = 1)
grid.arrange(P1,P2, nrow = 2)
Here is a manually drawn figure that i would like to achieve
You could stack all of your data into a single data frame and create a single graph. For example:
d = bind_rows(High=DF_Top, Low=DF_Low, .id='source') %>%
mutate(source=factor(source, levels=c("High","Low")))
d %>%
gather(key, value, Ob:B) %>%
mutate(key = fct_relevel(key, "Ob")) %>%
ggplot(aes(key, value)) +
geom_hline(yintercept=0) +
geom_boxplot() +
facet_grid(source ~ loc, switch="x") +
labs(x="", y="") +
scale_y_continuous(expand=expand_scale(mult=c(0.0, 0.02))) +
theme_classic() +
theme(strip.placement="outside",
strip.background.x=element_rect(colour=NA, fill=NA),
strip.text.x=element_text(size=11, face="bold"))
Responding to your comment, I'm not wild about moving the key labels to a legend, but...
d %>%
gather(key, value, Ob:B) %>%
mutate(key = fct_relevel(key, "Ob")) %>%
ggplot(aes(loc, value, colour=key)) +
geom_hline(yintercept=0) +
geom_boxplot() +
facet_grid(source ~ ., switch="x") +
labs(x="", y="", colour="") +
scale_y_continuous(expand=expand_scale(mult=c(0.0, 0.02))) +
theme_classic() +
theme(legend.position="bottom",
legend.box.margin=margin(t=-20))
I'm plotting a discrete CDF. I have a few questions regarding geom_step which I'm not finding by using Google.
Is it possible to make the line segment representing the jump dashed
rather than solid to better show whats going on?
Is it possible to add geom_point more efficiently than I do? (less
c/p).
Below is my current solution:
library(tidyverse)
library(ggthemes)
theme_set(theme_few())
x0 <- seq(-0.5, -0.01, by = 0.01)
x1 <- seq(0, 0.99, by = 0.02)
x2 <- seq(1, 1.99, by = 0.02)
x3 <- seq(2, 2.99, by = 0.02)
x35 <- seq(3, 3.49, by = 0.01)
x4 <- seq(3.5, 3.99, by = 0.01)
tibble_ex <- tibble(
x0 = x0,
x1 = x1,
x2 = x2,
x3 = x3,
x35 = x35,
x4 = x4
)
tibble_ex %>%
gather(x, xax, x0:x4) %>%
mutate(cdf = case_when(x == 'x0' ~ 0,
x == 'x1' ~ 1/2,
x == 'x2' ~ 3/5,
x == 'x3' ~ 4/5,
x == 'x35' ~ 9/10,
x == 'x4' ~ 1)) %>%
ggplot(aes(x = xax, y = cdf)) +
geom_step() +
geom_point(aes(x = 0, y = 0), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 1, y = 0.5), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 2, y = 3/5), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 3, y = 4/5), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 3.5, y = 9/10), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 0, y = 0.5), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 1, y = 3/5), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 2, y = 4/5), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 3, y = 9/10), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 3.5, y = 1), size = 3, shape = 21, fill = 'black') +
labs(x = 'x', y = 'F(x)')
ggplot will be more powerful to use if you can put your data into a data frame and structure it so that the characteristics of your data can be mapped directly.
Here's a way to take your data and augment it with additional rows that represent the connecting points, by matching each x with the prior cdf value. I added a column, type, to keep track of which is which. I also arrange df so that geom_segment plots the points in the right order.
new_steps <-
tibble(x = c(0:3, 3.5, 4),
cdf = c(0, .5, .6, .8, .9, 1))
df <- new_steps %>%
mutate(type = "cdf") %>%
bind_rows(new_steps %>%
mutate(type = "prior",
cdf = lag(cdf))) %>%
drop_na() %>%
arrange(x, desc(type))
Then we can map the points' fill and the geom_segments' linetype to type.
ggplot(df) +
geom_point(aes(x, cdf, fill = type),
shape = 21) +
scale_fill_manual(values = c("black", "white")) +
geom_segment(aes(x = lag(x), y = lag(cdf),
xend = x, yend = cdf,
lty = type)) +
scale_linetype_manual(values = c("dashed", "solid"))
(1) No, there is not a built-in way to make the geom_step half-dashed. But if you post this as a separate question, perhaps someone will help create a new geom for this.
(2) The answer is to put the points you want plotted in a data frame, like anything else you might want to plot:
point_data = data.frame(x = rep(c(0, 1, 2, 3, 3.5), 2),
y = c(0, rep(c(.5, .6, .8, .9), 2), 1),
z = rep(c("a", "b"), each = 5))
# calling your gathered/mutated version of tibble_ex df
ggplot(df, aes(x = xax, y = cdf)) +
geom_step() +
geom_point(data = point_data, aes(x = x, y = y, fill = z), shape = 21) +
scale_fill_manual(values = c("white", "black"), guide = FALSE) +
labs(x = 'x', y = 'F(x)')
For the second part of your question, you can put all the coordinates in a separate data frame and call geom_point only once:
ddf <- data.frame(xax = rep(c(0:3, 3.5), 2),
cdf = c(0, .5, .6, .8, .9, .5, .6, .8, .9, 1),
col = rep(c("white", "black"), each = 5))
dev.new()
tibble_ex %>%
gather(x, xax, x0:x4) %>%
mutate(cdf = case_when(x == 'x0' ~ 0,
x == 'x1' ~ 1/2,
x == 'x2' ~ 3/5,
x == 'x3' ~ 4/5,
x == 'x35' ~ 9/10,
x == 'x4' ~ 1)) %>%
ggplot(aes(x = xax, y = cdf)) +
geom_step() +
geom_point(data = ddf, aes(fill = I(col)), size = 3, shape = 21) +
labs(x = 'x', y = 'F(x)')