How can I show the direction (heading) of observations using ggplot2? Is there a way to adjust shape=17(triangle) so that it "points" to the next time observations?
Example Code
library(ggplot2)
dat <- data.frame(id = c(1, 1, 2, 2, 3, 3),
time = c(1, 2, 1, 2, 1, 2),
x = c(.1, .2, .3, .4, .5, .6),
y = c(.6, .25, .4, .33, .2, .51))
ggplot(dat, aes(x, y, color=factor(id))) +
geom_point(shape=17) +
# geom_line() +
NULL
We can use ggplot2::geom_segment after we reshape the data using
dplyr and tidyr::pivot_wider:
dat <- data.frame(id = c(1, 1, 2, 2, 3, 3),
time = c(1, 2, 1, 2, 1, 2),
x = c(.1, .2, .3, .4, .5, .6),
y = c(.6, .25, .4, .33, .2, .51))
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
pivot_wider(names_from = time, values_from = c(x, y)) %>%
ggplot(aes(x=x_1, y=y_1, color=factor(id))) +
geom_segment(aes(xend = x_2, yend = y_2),
arrow = arrow(length = unit(.3,"cm"))) +
labs(x="x", y="y", color="id")
Edit:
but I just want the arrow pointing without lines.
I'm not sure how we should handle the second point for each id (since it has not direction) but if we want to omit them from the plot we can do:
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
group_by(id) %>%
arrange(id, time) %>%
mutate(x_2 = x + 0.0001 * (lead(x) - x),
y_2 = y + 0.0001 * (lead(y) - y)) %>%
filter(!is.na(x_2)) %>%
ggplot(aes(x=x, y=y, color=factor(id))) +
geom_segment(aes(xend = x_2, yend = y_2),
arrow = arrow(length = unit(.3,"cm"))) +
labs(x="x", y="y", color="id")
Or if we want the arrows to point to the next measurement, independently of the color we can use the code below (now there is only the last point missing because of no direction):
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
arrange(id, time) %>%
mutate(x_2 = x + 0.0001 * (lead(x) - x),
y_2 = y + 0.0001 * (lead(y) - y)) %>%
filter(!is.na(x_2)) %>%
ggplot(aes(x=x, y=y, color=factor(id))) +
geom_segment(aes(xend = x_2, yend = y_2),
arrow = arrow(length = unit(.3,"cm"))) +
labs(x="x", y="y", color="id")
If we want to keep the 'last' measures we could add them in another geom_point layer...
Combining ideas from dario's answer, How to scale a 2D vector and keep direction and Arranging arrows between points nicely in ggplot2
library(dplyr)
library(tidyr)
library(ggplot2)
dat %>%
pivot_wider(names_from = time, values_from = c(x, y)) %>%
group_by(id) %>%
mutate(x_v = x_2 - x_1, y_v = y_2 - y_1) %>%
mutate_at(vars("x_v", "y_v"),
list(units =~ (. / sqrt((x_v)^2 + (y_v)^2))/1000)) %>%
ggplot(aes(x=x_1, y=y_1, colour = factor(id))) +
geom_segment(aes(xend = x_1 + x_v_units, yend = y_1 + y_v_units),
show.legend = F,
arrow = arrow(length = unit(.3,"cm"), type="closed", angle = 20)) +
geom_point(data = (dat %>% filter(time == 2)), aes(x, y), shape=15, size=2) +
labs(x="x", y="y", colour="id") +
theme_bw()
Data:
dat <- data.frame(id = c(1, 1, 2, 2, 3, 3),
time = c(1, 2, 1, 2, 1, 2),
x = c(.1, .2, .3, .4, .5, .6),
y = c(.6, .25, .4, .33, .2, .51))
Related
I would like to place at the top of the largest column the x value (goals). So Team A would have the label "3" and Team B the label "2" on top of those respective columns.
Code:
df <- tibble ( team = rep(c('A', 'B'), each = 5),
goals = rep(1:5,2),
prob = c(.10, .15, .25, .20, .15, .20, .30, .20, .10, .05))
df %>%
ggplot(aes(x = goals, y = prob)) +
geom_col() +
facet_wrap(~team)
Another option if you want to do it all in a single pipe would be:
df %>%
group_by(team) %>%
mutate(label = ifelse(prob == max(prob), goals, "")) %>%
ggplot(aes(x = goals, y = prob)) +
geom_col() +
facet_wrap(~team) +
geom_text(aes(label = label), vjust = -0.5)
One option would be to make a separate data frame containing the "top" observations per team using e.g. group_by + slice_max. Afterwards you could pass this dataset to geom_text to add the labels for just the top observations:
df <- data.frame( team = rep(c('A', 'B'), each = 5),
goals = rep(1:5,2),
prob = c(.10, .15, .25, .20, .15, .20, .30, .20, .10, .05))
library(ggplot2)
library(dplyr, warn = FALSE)
df_lab <- df |>
group_by(team) |>
slice_max(prob, n = 1) |>
ungroup()
ggplot(df, aes(x = goals, y = prob)) +
geom_col() +
geom_text(data = df_lab, aes(label = goals), vjust = 0, nudge_y = .005) +
facet_wrap(~team)
Is there a straightforward way to use alpha on only one variable using ggplot2?
I would have imagined that scale_alpha_manual(values = c(0, 1)) would work like scale_color_manual(). Ultimately, I am interested in doing an animation where a colour appears gradually.
df = data.frame(time = 1:100, x1 = rnorm(100, 1, 5), x2 = rnorm(100, 1, 5)) %>%
melt(id.vars = 'time')
df %>%
ggplot(aes(time, value, colour = variable)) +
geom_line() +
scale_color_manual(values = c('black', 'blue')) +
scale_alpha_manual(values = c(0, 1))
I am trying to get something like this but with an alpha
You could use the alpha as an aesthetic:
df = data.frame(time = 1:100, x1 = rnorm(100, 1, 5), x2 = rnorm(100, 1, 5)) %>%
melt(id.vars = 'time')
df %>%
ggplot(aes(time, value, colour = variable, alpha=variable)) +
geom_line() +
scale_color_manual(values = c('black', 'blue')) +
scale_alpha_manual(values = c(0.3, 1))
I would like to plot an angle between two lines using ggplot2, meaning something similar to the bold red line in the plot below. Is there an easy solution to this?
Data and code to make the plot without the red line:
library(tidyverse)
df <- tibble(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5))
ggplot(
df, aes(x, y, group = line))+
geom_path()
have a look at geom_curve, e.g. :
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve(aes(x = 1.5, y = 2, xend = 2, yend = 1.5), curvature = -0.5, color = "red", size = 3)
You will have to tweak it a bit to use it in a more robust, automatic way, for example:
red_curve <- df %>%
group_by(line) %>%
summarise( avg_x = mean(x),
avg_y = mean(y))
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve( data = red_curve, aes(x = avg_x[1], y = avg_y[1], xend = avg_x[2], yend = avg_y[2]), curvature = 0.5, color = "red", size = 3)
Here is a solution with geom_arc of the ggforce package.
library(ggplot2)
library(ggforce)
angle <- function(p, c){
M <- p - c
Arg(complex(real = M[1], imaginary = M[2]))
}
O <- c(1,1)
P1 <- c(5,3)
P2 <- c(3,5)
a1 <- angle(P1, O)
a2 <- angle(P2, O)
df <- data.frame(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5)
)
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE)
The arc does not look like a true arc circle. That's because the aspect ratio is not set to 1. To set the aspect ratio to 1:
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE) +
coord_fixed()
I'm plotting a discrete CDF. I have a few questions regarding geom_step which I'm not finding by using Google.
Is it possible to make the line segment representing the jump dashed
rather than solid to better show whats going on?
Is it possible to add geom_point more efficiently than I do? (less
c/p).
Below is my current solution:
library(tidyverse)
library(ggthemes)
theme_set(theme_few())
x0 <- seq(-0.5, -0.01, by = 0.01)
x1 <- seq(0, 0.99, by = 0.02)
x2 <- seq(1, 1.99, by = 0.02)
x3 <- seq(2, 2.99, by = 0.02)
x35 <- seq(3, 3.49, by = 0.01)
x4 <- seq(3.5, 3.99, by = 0.01)
tibble_ex <- tibble(
x0 = x0,
x1 = x1,
x2 = x2,
x3 = x3,
x35 = x35,
x4 = x4
)
tibble_ex %>%
gather(x, xax, x0:x4) %>%
mutate(cdf = case_when(x == 'x0' ~ 0,
x == 'x1' ~ 1/2,
x == 'x2' ~ 3/5,
x == 'x3' ~ 4/5,
x == 'x35' ~ 9/10,
x == 'x4' ~ 1)) %>%
ggplot(aes(x = xax, y = cdf)) +
geom_step() +
geom_point(aes(x = 0, y = 0), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 1, y = 0.5), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 2, y = 3/5), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 3, y = 4/5), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 3.5, y = 9/10), size = 3, shape = 21, fill = 'white') +
geom_point(aes(x = 0, y = 0.5), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 1, y = 3/5), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 2, y = 4/5), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 3, y = 9/10), size = 3, shape = 21, fill = 'black') +
geom_point(aes(x = 3.5, y = 1), size = 3, shape = 21, fill = 'black') +
labs(x = 'x', y = 'F(x)')
ggplot will be more powerful to use if you can put your data into a data frame and structure it so that the characteristics of your data can be mapped directly.
Here's a way to take your data and augment it with additional rows that represent the connecting points, by matching each x with the prior cdf value. I added a column, type, to keep track of which is which. I also arrange df so that geom_segment plots the points in the right order.
new_steps <-
tibble(x = c(0:3, 3.5, 4),
cdf = c(0, .5, .6, .8, .9, 1))
df <- new_steps %>%
mutate(type = "cdf") %>%
bind_rows(new_steps %>%
mutate(type = "prior",
cdf = lag(cdf))) %>%
drop_na() %>%
arrange(x, desc(type))
Then we can map the points' fill and the geom_segments' linetype to type.
ggplot(df) +
geom_point(aes(x, cdf, fill = type),
shape = 21) +
scale_fill_manual(values = c("black", "white")) +
geom_segment(aes(x = lag(x), y = lag(cdf),
xend = x, yend = cdf,
lty = type)) +
scale_linetype_manual(values = c("dashed", "solid"))
(1) No, there is not a built-in way to make the geom_step half-dashed. But if you post this as a separate question, perhaps someone will help create a new geom for this.
(2) The answer is to put the points you want plotted in a data frame, like anything else you might want to plot:
point_data = data.frame(x = rep(c(0, 1, 2, 3, 3.5), 2),
y = c(0, rep(c(.5, .6, .8, .9), 2), 1),
z = rep(c("a", "b"), each = 5))
# calling your gathered/mutated version of tibble_ex df
ggplot(df, aes(x = xax, y = cdf)) +
geom_step() +
geom_point(data = point_data, aes(x = x, y = y, fill = z), shape = 21) +
scale_fill_manual(values = c("white", "black"), guide = FALSE) +
labs(x = 'x', y = 'F(x)')
For the second part of your question, you can put all the coordinates in a separate data frame and call geom_point only once:
ddf <- data.frame(xax = rep(c(0:3, 3.5), 2),
cdf = c(0, .5, .6, .8, .9, .5, .6, .8, .9, 1),
col = rep(c("white", "black"), each = 5))
dev.new()
tibble_ex %>%
gather(x, xax, x0:x4) %>%
mutate(cdf = case_when(x == 'x0' ~ 0,
x == 'x1' ~ 1/2,
x == 'x2' ~ 3/5,
x == 'x3' ~ 4/5,
x == 'x35' ~ 9/10,
x == 'x4' ~ 1)) %>%
ggplot(aes(x = xax, y = cdf)) +
geom_step() +
geom_point(data = ddf, aes(fill = I(col)), size = 3, shape = 21) +
labs(x = 'x', y = 'F(x)')
I saw this great plot from fivethirty that has a slight overlap of density plots for different colleges. Check out this link at fivethirtyeight.com
How would you replicate this plot with ggplot2?
Specifically how would you get that slight overlap, facet_wrap isn't going to work.
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
ggplot(TestFrame, aes(x = Score, group = Group)) +
geom_density(alpha = .75, fill = 'black')
As always with ggplot, the key is getting the data in the right format, and then the plotting is pretty straightforward. I'm sure there would be another way to do this, but my approach was to do the density estimation with density() and then to make a sort of manual geom_density() with geom_ribbon(), which takes a ymin and ymax, necessary for moving the shape off the x axis.
The rest of the challenge was in getting the order of the printing correct, since it seems that ggplot will print the widest ribbon first. In the end, the part that requires the bulkiest code is the production of the quartiles.
I also produced some data that is a bit more consistent with the original figure.
library(ggplot2)
library(dplyr)
library(broom)
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:10], 10000))
df <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>% #rev() means the ordering will be from top to bottom
group_by(Group, GroupNum) %>%
do(tidy(density(.$Score, bw = diff(range(.$Score))/20))) %>% #The original has quite a large bandwidth
group_by() %>%
mutate(ymin = GroupNum * (max(y) / 1.5), #This constant controls how much overlap between groups there is
ymax = y + ymin,
ylabel = ymin + min(ymin)/2,
xlabel = min(x) - mean(range(x))/2) #This constant controls how far to the left the labels are
#Get quartiles
labels <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>%
group_by(Group, GroupNum) %>%
mutate(q1 = quantile(Score)[2],
median = quantile(Score)[3],
q3 = quantile(Score)[4]) %>%
filter(row_number() == 1) %>%
select(-Score) %>%
left_join(df) %>%
mutate(xmed = x[which.min(abs(x - median))],
yminmed = ymin[which.min(abs(x - median))],
ymaxmed = ymax[which.min(abs(x - median))]) %>%
filter(row_number() == 1)
p <- ggplot(df, aes(x, ymin = ymin, ymax = ymax)) + geom_text(data = labels, aes(xlabel, ylabel, label = Group)) +
geom_vline(xintercept = 0, size = 1.5, alpha = 0.5, colour = "#626262") +
geom_vline(xintercept = c(-2.5, -1.25, 1.25, 2.5), size = 0.75, alpha = 0.25, colour = "#626262") +
theme(panel.grid = element_blank(),
panel.background = element_rect(fill = "#F0F0F0"),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
for (i in unique(df$GroupNum)) {
p <- p + geom_ribbon(data = df[df$GroupNum == i,], aes(group = GroupNum), colour = "#F0F0F0", fill = "black") +
geom_segment(data = labels[labels$GroupNum == i,], aes(x = xmed, xend = xmed, y = yminmed, yend = ymaxmed), colour = "#F0F0F0", linetype = "dashed") +
geom_segment(data = labels[labels$GroupNum == i,], x = min(df$x), xend = max(df$x), aes(y = ymin, yend = ymin), size = 1.5, lineend = "round")
}
p <- p + geom_text(data = labels[labels$Group == "A",], aes(xmed - xlabel/50, ylabel),
label = "Median", colour = "#F0F0F0", hjust = 0, fontface = "italic", size = 4)
Edit
I noticed the original actually does a bit of fudging by stretching out each distribution with a horizontal line (you can see a join if you look closely...). I added something similar with the second geom_segment() in the loop.
Although there is a great & accepted answer available already - I finished my contribution as an alternative avenue without data reformatting.
TestFrame <-
data.frame(
Score =
c(rnorm(50, 3, 2)+rnorm(50, -1, 3)
,rnorm(50, 3, 2)+rnorm(50, -2, 3)
,rnorm(50, 3, 2)+rnorm(50, -3, 3)
,rnorm(50, 3, 2)+rnorm(50, -4, 3)
,rnorm(50, 3, 2)+rnorm(50, -5, 3))
,Group =
c(rep('Ones', 50)
,rep('Twos', 50)
,rep('Threes', 50)
,rep('Fours', 50)
,rep('Fives', 50))
)
require(ggplot2)
require(grid)
spacing=0.05
tm <- theme(legend.position="none", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),axis.title.y=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.margin = unit(c(0,0,0,0),"mm"))
firstQuintile = quantile(TestFrame$Score,0.2)
secondQuintile = quantile(TestFrame$Score,0.4)
median = quantile(TestFrame$Score,0.5)
thirdQuintile = quantile(TestFrame$Score,0.6)
fourthQuintile = quantile(TestFrame$Score,0.8)
ymax <- 1.5*max(density(TestFrame[TestFrame$Group=="Ones",]$Score)$y)
xmax <- 1.2*max(TestFrame$Score)
xmin <- 1.2*min(TestFrame$Score)
p0 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(fill = "transparent",colour = NA)+ylim(0-5*spacing,ymax)+xlim(xmin,xmax)+tm
p0 <- p0 + geom_vline(aes(xintercept=firstQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=secondQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=thirdQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=fourthQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=median),color="darkgray",size=2)
#previous line is a little hack for creating a working empty grid with proper sizing
p1 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p2 <- ggplot(TestFrame[TestFrame$Group=="Twos",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p3 <- ggplot(TestFrame[TestFrame$Group=="Threes",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p4 <- ggplot(TestFrame[TestFrame$Group=="Fours",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p5 <- ggplot(TestFrame[TestFrame$Group=="Fives",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
f <- grobTree(ggplotGrob(p1))
g <- grobTree(ggplotGrob(p2))
h <- grobTree(ggplotGrob(p3))
i <- grobTree(ggplotGrob(p4))
j <- grobTree(ggplotGrob(p5))
a1 <- annotation_custom(grob = f, xmin = xmin, xmax = xmax,ymin = -spacing, ymax = ymax)
a2 <- annotation_custom(grob = g, xmin = xmin, xmax = xmax,ymin = -spacing*2, ymax = ymax-spacing)
a3 <- annotation_custom(grob = h, xmin = xmin, xmax = xmax,ymin = -spacing*3, ymax = ymax-spacing*2)
a4 <- annotation_custom(grob = i, xmin = xmin, xmax = xmax,ymin = -spacing*4, ymax = ymax-spacing*3)
a5 <- annotation_custom(grob = j, xmin = xmin, xmax = xmax,ymin = -spacing*5, ymax = ymax-spacing*4)
pfinal <- p0 + a1 + a2 + a3 + a4 + a5
pfinal
Using dedicated geom_joy() from ggjoy package:
library(ggjoy)
ggplot(TestFrame, aes(Score, Group)) +
geom_joy()
# dummy data
set.seed(1)
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
head(TestFrame)
# Score Group
# 1 -0.6264538 Ones
# 2 0.1836433 Ones
# 3 -0.8356286 Ones
# 4 1.5952808 Ones
# 5 0.3295078 Ones
# 6 -0.8204684 Ones