Add count as label to points in geom_count - r

I used geom_count to visualise overlaying points as sized groups, but I also want to add the actual count as a label to the plotted points, like this:
However, to achieve this, I had to create a new data frame containing the counts and use these data in geom_text as shown here:
#Creating two data frames
data <- data.frame(x = c(2, 2, 2, 2, 3, 3, 3, 3, 3, 4),
y = c(1, 2, 2, 2, 2, 2, 3, 3, 3, 3),
id = c("a", "b", "b", "b", "c",
"c", "d", "d", "d", "e"))
data2 <- data %>%
group_by(id) %>%
summarise(x = mean(x), y = mean(y), count = n())
# Creating the plot
ggplot(data = data, aes(x = x, y = y)) +
geom_count() +
scale_size_continuous(range = c(10, 15)) +
geom_text(data = data2,
aes(x = x, y = y, label = count),
color = "#ffffff")
Is there any way to achieve this in a more elegant way (i.e. without the need for the second data frame)? I know that you can access the count in geom_count using ..n.., yet if I try to access this in geom_text, this is not working.

Are you expecting this:
ggplot(data %>%
group_by(id) %>%
summarise(x = mean(x), y = mean(y), count = n()),
aes(x = x, y = y)) + geom_point(aes(size = count)) +
scale_size_continuous(range = c(10, 15)) +
geom_text(aes(label = count),
color = "#ffffff")
update:
If the usage of geom_count is must, then the expected output can be achieved using:
p <- ggplot(data = data, aes(x = x, y = y)) +
geom_count() + scale_size_continuous(range = c(10, 15))
p + geom_text(data = ggplot_build(p)$data[[1]],
aes(x, y, label = n), color = "#ffffff")

here would be a solution for a code with discrete values
f<-ggplot(data = STest, aes(x = x, y = y)) + geom_count()+scale_x_discrete(labels = c("strong decrease","decrease","no change","increase","strong increase","no opinion"))+scale_y_discrete(labels = c("strong decrease","decrease","no change","increase","strong increase","no opinion"))
f + geom_text(data = ggplot_build(p)$data[[1]],aes(x, y, label = n,vjust= -2))
Thank you so much!

A much easier way to change this is to use the labs() function so in this case it would be ...labs(size = "Count") + ....
That should be all you need.

Related

How to add individual hlines for each bar in a plot?

Given a data frame and a plot as follows:
library(dplyr)
library(ggplot2)
dat <- data.frame(grp = c("a", "b", "c"),
val = c(30, 20, 10),
avg = c(25, 15, 5))
dat %>%
ggplot(aes(x = grp, y = val)) +
geom_bar(stat = "identity")
How do I amend the code above to place a unique horizontal reference line (avg) on each bar as shown below:
This could be achieved via geom_segment like so, where I first conver grp to a numeric and corresponding to the default width of a bar of .9 put the x at .45 to the left and xend at .45 to the right:
library(ggplot2)
dat <- data.frame(grp = c("a", "b", "c"),
val = c(30, 20, 10),
avg = c(25, 15, 5))
ggplot(dat, aes(x = grp, y = val)) +
geom_bar(stat = "identity") +
geom_segment(aes(y = avg, yend = avg,
x = as.numeric(factor(grp)) - .45,
xend = as.numeric(factor(grp)) + .45), color = "red")
EDIT Thanks to comment by #tjebo: As hard-coding is rarely a good idea one could set the width via a variable:
w <- .9
...
geom_segment(aes(y = avg, yend = avg,
x = as.numeric(factor(grp)) - w/2,
xend = as.numeric(factor(grp)) + w/2), color = "red")

gganimate: two layers with different geometries and timepoints

The problem is similar to this question but here the two layers use different geometries, geom_tile and geom_point. The idea is to have tiles show up at different locations only in frames 2, 5, 8, and the point move along the diagonal in every frame.
When trying to run the following example, I get the error:
Error: time data must be the same class in all layers
Example
require(data.table)
require(ggplot2)
require(gganimate)
# 3 tiles along x = 10-y; present at time points 2, 5, 8
dtP1 = data.table(x = c(1, 5, 9),
y = c(9, 5, 1),
t = c(2, 5, 8))
# 9 points along x=y; present at every time point
dtP2 = data.table(x = 1:9,
y = 1:9,
t = 1:9)
p = ggplot() +
geom_tile(data = dtP1,
aes(x = x,
y = y),
color = "#000000") +
geom_point(data = dtP2,
aes(x = x,
y = y),
color = "#FF0000") +
gganimate::transition_time(t) +
gganimate::ease_aes('linear')
pAnim = gganimate::animate(p,
renderer = av_renderer("~/test.mp4"),
fps = 1,
nframes = 9,
height = 400, width = 400)
Does the following work for you?
library(dplyr)
p <- rbind(dtP1 %>% mutate(group = "group1"),
dtP2 %>% mutate(group = "group2")) %>%
tidyr::complete(t, group) %>%
ggplot(aes(x = x, y = y)) +
geom_tile(data = . %>% filter(group == "group1"),
color = "black") +
geom_point(data = . %>% filter(group == "group2"),
color = "red") +
ggtitle("{frame_time}") + # added this to show the frame explicitly; optional
transition_time(t) +
ease_aes('linear')
animate(p, nframes = 9, fps = 1)

How to extend line across entire violin plot

Dataframe as example:
library(tidyverse)
set.seed(123)
df <- data.frame("b" = runif(1000, min = 2, max = 10),
"c" = runif(1000, min = 2, max = 10),
"d" = runif(1000, min = 2, max = 10))
df_2 <- data.frame(id = c("b", "c", "d"),
cutoff = c(5, 3, 5),
stringsAsFactors = FALSE)
df <-
pivot_longer(
df,
cols = c("b", "c", "d"),
names_to = "id",
values_to = "value"
) %>%
left_join(df_2, by = "id")
I can now make a violin plot (or a boxplot, same issue) with a line overlaid:
df %>%
ggplot(aes(x = id)) +
geom_violin(aes(y = value)) +
geom_line(aes(x = id, y = cutoff, group = 1), color = red)
What I'd like though is three lines (don't need to be connected) each of which extend across the entire width of a single violin, at the cutoff value specified in df_2.
I can do this manually with geom_segment, but is there a better, more programmatic way?
df %>%
ggplot(aes(x = id)) +
geom_violin(aes(y = value)) +
geom_segment(aes(x = 0.55, xend = 1.45, y = 5, yend = 5), color = "blue") +
geom_segment(aes(x = 1.55, xend = 2.45, y = 3, yend = 3), color = "blue") +
geom_segment(aes(x = 2.55, xend = 3.45, y = 5, yend = 5), color = "blue")
I understand that at some fundamental level the x-axis is ordered by factor level, with b = 1, c = 2 etc., so asking for a line intersecting x = 0.9 would require specifying corresponding y value. In another sense though, ggplot2 clearly knows (in some sense) that the region above x = 0.9 (that is, y values intersected by a vertical line at x = 0.9) is associated with factor level b because the corresponding violin for b overlaps that region. Is there a way to get at that information?
You can use geom_errorbar(). So change your second block to:
df %>%
ggplot(aes(x = id)) +
geom_violin(aes(y = value)) +
geom_errorbar(aes(x = id, ymin = cutoff,ymax = cutoff), color = "red")

ggplot2 plot an angle between two lines

I would like to plot an angle between two lines using ggplot2, meaning something similar to the bold red line in the plot below. Is there an easy solution to this?
Data and code to make the plot without the red line:
library(tidyverse)
df <- tibble(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5))
ggplot(
df, aes(x, y, group = line))+
geom_path()
have a look at geom_curve, e.g. :
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve(aes(x = 1.5, y = 2, xend = 2, yend = 1.5), curvature = -0.5, color = "red", size = 3)
You will have to tweak it a bit to use it in a more robust, automatic way, for example:
red_curve <- df %>%
group_by(line) %>%
summarise( avg_x = mean(x),
avg_y = mean(y))
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve( data = red_curve, aes(x = avg_x[1], y = avg_y[1], xend = avg_x[2], yend = avg_y[2]), curvature = 0.5, color = "red", size = 3)
Here is a solution with geom_arc of the ggforce package.
library(ggplot2)
library(ggforce)
angle <- function(p, c){
M <- p - c
Arg(complex(real = M[1], imaginary = M[2]))
}
O <- c(1,1)
P1 <- c(5,3)
P2 <- c(3,5)
a1 <- angle(P1, O)
a2 <- angle(P2, O)
df <- data.frame(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5)
)
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE)
The arc does not look like a true arc circle. That's because the aspect ratio is not set to 1. To set the aspect ratio to 1:
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE) +
coord_fixed()

Line graph customization (add circles, colors)

With data
value <- c(9, 4, 10, 7, 10,
10, 10, 4, 10,
4, 10, 2, 5, 5, 4)
names <- c("a","b",
"c","d","e",
"f", "g","h",
"i","j","k","l",
"m","n","p")
df <- data.frame(value, names)
df$names <- as.character(df$names)
p <- ggplot(data = df, aes(y = value,x= names,group=1))+
geom_point(color = I("red"),shape=23, lwd=3,fill="red")+
geom_line(group = I(1),color = I("red"))+
theme_bw()+
coord_flip()
p + xlab("") +ylab("")
I produce this
But now I would like to create plot similar picture below, where "a", "b", "c" and "D" would be x aes labels and belong to PART 1 and names "p", "n", "m", "i", "k" would belong in PART 2 (and so on). the key part here is how to add circles inside plot.
I've also looked here
How can I add freehand red circles to a ggplot2 graph?
but no luck.
If this in upper pocture is not possible, than I would like my output to be like below picture
In order to achieve the facetting by part as in your last plot you can create a new column that groups your values, e.g.:
df$part <- rep(c("part3", "part2", "part1"), each = 5)
In order to plot the open circles you can add another geom_point() layer. I created a new data frame that consists of all combinations of names and value for each part:
library(dplyr)
library(tidyr)
df2 <- df %>%
group_by(part, names) %>%
expand(value = min(df$value):max(df$value))
Then you plot a facetted plot with circles:
ggplot() +
geom_point(data = df2, aes(x = value, y = names),
shape = 1) +
geom_point(data = df, aes(y = names, x = value, group = 1), colour = I("red"), shape = 23, lwd = 3, fill = "red") +
geom_line(data = df, aes(y = names, x = value, group = 1), group = I(1),color = I("red")) +
theme_bw() +
facet_wrap(~part, ncol = 1, scales = "free_y")
Note that I swapped x and y values as coord_flip() cannot be used with scales = "free_y" which is however necessary if you want only those names which have values in the respective facet.

Resources