Prevent geom_points and their corresponding labels from overlapping - r

Thanks for the suggested duplicate, this is however not only about the labels, but is also about adjusting the points themselves so they do not overlap.
have a quick look at the plot below...
I need the coloured points, and their corresponding labels, to never overlap. They should be clustered together and all visible, perhaps with some indication that they are spaced and not 100% accurate, perhaps some sort of call out? Open to suggestions on that.
I've tried adding position = 'jitter' to both geom_point and geom_text, but that doesn't seem to be working (assume it is only for small overlaps?)
Ideas?
# TEST DATA
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
main_plot <- ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text(aes(label=Key,size=6),show_guide = FALSE)
main_plot
Produces this plot (of course with your random data it will vary)
Thanks in advance.

Here's your plot with ggrepel geom_text_repel:
library(ggrepel)
# TEST DATA
set.seed(42)
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text_repel(aes(label=Key,size=6),show_guide = FALSE)

Related

How to separate aesthetics of two different geom_lines?

I'm trying to plot a liine on the x axis which is basically a bunch of zeros and ones. Ones are green and zeros are red. When I try to do that, the color_scale_gradient of the ggplot basically colors on top of the line.
It looks like this
Where the line should be colored as follows:
colorbar is a vector of zeros and ones.
p <- ggplot(data1,aes(newx,newy, group = 1, colour=newy))+
geom_line(size=1.5, show.legend = FALSE)+
scale_colour_gradient(low="red2", high="green3") +
geom_line(data = colorFrame, aes(as.numeric(x)-5,as.numeric(ys), color = colorbar),size=3, show.legend = FALSE)+
xlim(0,1300)
p <- p +
theme(panel.background = element_blank(), axis.ticks.x = element_blank(),
axis.text.x = element_blank(), axis.line.y = element_line(colour = 'black'),
axis.ticks.y.left = element_line(colour = 'black')) +
scale_y_continuous(breaks = seq(0, 12, 1), limits = c(-1, 12), expand = c(0,0))
One solution would be to create two subplots and stitch them together. I use cowplot and theme_void here, but really the second plot below could look however you want it to.
p1 <- ggplot(df, aes(x,y, group = 1, colour=y)) +
geom_line(size=1.5, show.legend = FALSE) +
scale_colour_gradient(low="red2", high="green3") +
theme(panel.background = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.line.y = element_line(colour = 'black'),
axis.ticks.y.left = element_line(colour = 'black')) +
scale_y_continuous(breaks = seq(0, 12, 1), limits = c(-1, 12), expand = c(0,0)) +
labs(x = NULL)
p2 <- ggplot(df, aes(x, y = 0, colour=z)) +
geom_line(size=1.5, show.legend = FALSE) +
scale_colour_gradient(low="red2", high="green3") +
theme_void()
cowplot::plot_grid(p1, p2,
ncol = 1,
rel_heights = c(1, .05),
align = 'v')
Data
df <- data.frame(x = 1:50,
y = runif(50, 0, 12),
z = sample(c(0,1), 50, replace = TRUE))

R ggplot2, display the number of subjects at the bottom (outside) of the plot [duplicate]

Is there a quick way to add a table to my ggplot2 graph? I would like this table to have the value of each line at the same breakpoints as specified in scale_x_continuous(), but with the percentage (%) symbol next to them. My end goal is to create something like the image below. However, I don't know how to add the table.
The following block of code just makes two lines in ggplot2 and should be adequate to provide me with an example:
require(ggplot2)
df <- data.frame(a = seq(0, 90, 10), b = seq(10, 100, 10))
df.plot <- ggplot(data = df, aes(x = seq(1, 100, 10))) + geom_line(aes(y = a), colour = 'red') +
geom_line(aes(y = b), colour = 'blue') + scale_x_continuous(breaks = seq(0,100,10))
df.plot
A similar question was asked here, but the given answer is more of a workaround and wouldn't look good for a table with 2 rows. I am going to mess around with the clues provided by Brian Diggs, but I figured I would post this in case anyone has already done something like this. Any help would be greatly appreciated!
Edit: Thanks to #baptiste for helping me figure this out. I posted my own response below that finished what he started.
Here's a basic example of the strategy used by learnr:
require(ggplot2)
df <- data.frame(a = seq(0, 90, 10), b = seq(10, 100, 10))
df.plot <- ggplot(data = df, aes(x = seq(1, 100, 10))) +
geom_line(aes(y = a), colour = 'red') +
geom_line(aes(y = b), colour = 'blue') +
scale_x_continuous(breaks = seq(0,100,10))
# make dummy labels for the table content
df$lab <- month.abb[ceiling((df$a+1)/10)]
df.table <- ggplot(df, aes(x = a, y = 0,
label = lab, colour = b)) +
geom_text(size = 3.5) +
theme_minimal() +
scale_y_continuous(breaks=NULL)+
theme(panel.grid.major = element_blank(), legend.position = "none",
panel.border = element_blank(), axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
gA <- ggplotGrob(df.plot)
gB <- ggplotGrob(df.table)[6,]
gB$heights <- unit(1,"line")
require(gridExtra)
gAB <- rbind(gA, gB)
grid.newpage()
grid.draw(gAB)
Here is a script that creates the general table that I set out to make. Notice that I included table titles by changing the names under scale_y_continuous for each row.
require(ggplot2)
require(gridExtra)
df <- data.frame(a = seq(0, 90, 10), b = seq(10, 100, 10))
df.plot <- ggplot(data = df, aes(x = seq(1, 100, 10))) +
geom_line(aes(y = a), colour = 'red') +
geom_line(aes(y = b), colour = 'blue') +
scale_x_continuous(breaks = seq(0,100,10))
# make dummy labels for the table content
lab.df <- data.frame(lab1 = letters[11:20],
lab2 = letters[1:10])
df.table1 <- ggplot(lab.df, aes(x = lab1, y = 0,
label = lab1)) +
geom_text(size = 5, colour = "red") +
theme_minimal() +
scale_y_continuous(breaks=NULL, name = "Model Lift") +
theme(panel.grid.major = element_blank(), legend.position = "none",
panel.border = element_blank(), axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_text(angle = 0, hjust = 5))
df.table2 <- ggplot(lab.df, aes(x = lab2, y = 0,
label = lab2)) +
geom_text(size = 5, colour = "blue") +
theme_minimal() +
scale_y_continuous(breaks=NULL, name = "Random")+
theme(panel.grid.major = element_blank(), legend.position = "none",
panel.border = element_blank(), axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_text(angle = 0, hjust = 3.84))
# silly business to align the two plot panels
gA <- ggplotGrob(df.plot)
gB <- ggplotGrob(df.table1)
gC <- ggplotGrob(df.table2)
maxWidth = grid::unit.pmax(gA$widths[2:3], gB$widths[2:3], gC$widths[2:3])
gA$widths[2:3] <- as.list(maxWidth)
gB$widths[2:3] <- as.list(maxWidth)
gC$widths[2:3] <- as.list(maxWidth)
grid.arrange(gA, gB, gC, ncol=1, heights=c(10, .3, .3))

How to put plots without any space using plot_grid?

I'm doing an arrangement of 2x2 plots. The plots share the same axis, so I want to put them together, e.g.
This code:
library(ggplot2)
library(cowplot)
Value <- seq(0,1000, by = 1000/10)
Index <- 0:10
DF <- data.frame(Index, Value)
plot <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme(aspect.ratio = 0.5)
plot_grid(plot, plot, plot, plot, align = "hv", ncol = 2)
produces
But I'd like something like:
How can I achieve a similar result?
I think this is a case for the ggarrange() function from the egg package. Doing this with plot_grid() would require endless fiddling and isn't worth it.
(The technical reason is that plot_grid() keeps the total area for each plot in the grid constant, but if some plots have an x axis and others don’t then they take up different areas. One could try to circumvent this by using the rel_heights argument but there’s no good way to calculate the correct values for rel_heights, so it would be trial and error. By contrast, ggarrange() separately looks at the plot panel and the surrounding elements and makes sure the plot panels have the same size.)
Here is the code using ggarrange():
Value <- seq(0,1000, by = 1000/10)
Index <- 0:10
DF <- data.frame(Index, Value)
pbase <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_bw()
ptopleft <- pbase +
scale_x_continuous(position = "top") +
theme(plot.margin = margin(5.5, 0, 0, 5.5),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
ptopright <- pbase +
scale_y_continuous(position = "right") +
scale_x_continuous(position = "top") +
theme(plot.margin = margin(5.5, 5.5, 0, 0),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
pbottomleft <- pbase +
theme(plot.margin = margin(0, 0, 5.5, 5.5))
pbottomright <- pbase +
scale_y_continuous(position = "right") +
theme(plot.margin = margin(0, 5.5, 5.5, 0))
library(egg)
ggarrange(ptopleft, ptopright,
pbottomleft, pbottomright,
ncol = 2)
Two comments:
To remove every last bit of space below the plot panel on the top plots, we need to move the x axis to the top, even though we're not showing it. This is a strange limitation of the theming mechanism. We can't fully get rid of just one axis.
I'm not a big fan of shared axis titles, as in your example. I think each axis should have a title. If you want shared axis titles, why not use the faceting mechanism?
You can set subtle plot.margin each plot, then grid.arrange and add labs.
library(ggplot2)
library(grid)
library(gridExtra)
Value <- seq(0,1000, by = 1000/10)
Index <- 0:10
DF <- data.frame(Index, Value)
plot1 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.text.x = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(5.5, 5.8, -50, 5.5), "pt"))
plot2 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.text.x = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(5.5, 5.5, -50, 5.5), "pt")) +
scale_y_continuous(position = "right")
plot3 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(-50, 5.8, -50, 5.5), "pt"))
plot4 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(-50, 5.5, -50, 5.5), "pt")) +
scale_y_continuous(position = "right")
grid.arrange(grobs = list(plot1, plot2, plot3, plot4), ncol = 2, bottom = 'Index', left = 'Value', right = 'Value')
final plot

Mean per group on a bubble plot with ggplot

I have a dataset with a lot of overlapping points and used ggplot to create a bubble plot to show that data. I need to add bars on my plot for the means of each group on the x axis (values can be 0, 1, or 2). I have tried to use geom_errorbar but haven't been able to get it to work with my data. Any help/suggestions would be greatly appreciated.
The following is my code and a script to generate fake data that is similar:
y <- seq(from=0, to=3.5, by=0.5)
x <- seq(from=0, to=2, by=1)
xnew <- sample(x, 100, replace=T)
ynew <- sample(y, 100, replace=T)
data <- data.frame(xnew,ynew)
data2 <- aggregate(data$xnew, by=list(x=data$xnew, y=data$ynew), length)
names(data2)[3] <- "Count"
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
I am not entirely sure that I understand your question correctly. It seems to me that in addition to the bubbles, you want to visualise the mean value of y for each value of x as a bar of some kind. (You mention error bars, but it seems that this is not a requirement, but just what you have tried. I will use geom_col() instead.)
I assume that you want to weigh the mean over y by the counts, i.e., sum(y * Count) / sum(Count). You can create a data frame that contains these values by using dplyr:
data2_mean
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 0 1.833333
## 2 1 1.750000
## 3 2 2.200000
When creating the plot, I use data2 as the data set for geom_point() and data2_mean as the data set for geom_col(). It is important to put the bars first, since the bubbles should be on top of the bars.
ggplot() +
geom_col(aes(x = x, y = y), data2_mean, fill = "gray60", width = 0.7) +
geom_point(aes(x = x, y = y, size = Count), data2) +
labs(x = "Copies", y = "Score") +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
Everything that I changed compared to your code comes before scale_x_continuous(). This produces the following plot:
Is this what you're after? I first calculated the group-level means using the dplyr package and then added line segments to your plot using geom_segment:
library(ggplot2)
library(dplyr)
data2 <- data2 %>% group_by(x) %>% mutate(mean.y = mean(y))
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10)) +
geom_segment(aes(y = mean.y, yend = mean.y, x = x -0.25, xend = x + 0.25))

How can I add a table to my ggplot2 output?

Is there a quick way to add a table to my ggplot2 graph? I would like this table to have the value of each line at the same breakpoints as specified in scale_x_continuous(), but with the percentage (%) symbol next to them. My end goal is to create something like the image below. However, I don't know how to add the table.
The following block of code just makes two lines in ggplot2 and should be adequate to provide me with an example:
require(ggplot2)
df <- data.frame(a = seq(0, 90, 10), b = seq(10, 100, 10))
df.plot <- ggplot(data = df, aes(x = seq(1, 100, 10))) + geom_line(aes(y = a), colour = 'red') +
geom_line(aes(y = b), colour = 'blue') + scale_x_continuous(breaks = seq(0,100,10))
df.plot
A similar question was asked here, but the given answer is more of a workaround and wouldn't look good for a table with 2 rows. I am going to mess around with the clues provided by Brian Diggs, but I figured I would post this in case anyone has already done something like this. Any help would be greatly appreciated!
Edit: Thanks to #baptiste for helping me figure this out. I posted my own response below that finished what he started.
Here's a basic example of the strategy used by learnr:
require(ggplot2)
df <- data.frame(a = seq(0, 90, 10), b = seq(10, 100, 10))
df.plot <- ggplot(data = df, aes(x = seq(1, 100, 10))) +
geom_line(aes(y = a), colour = 'red') +
geom_line(aes(y = b), colour = 'blue') +
scale_x_continuous(breaks = seq(0,100,10))
# make dummy labels for the table content
df$lab <- month.abb[ceiling((df$a+1)/10)]
df.table <- ggplot(df, aes(x = a, y = 0,
label = lab, colour = b)) +
geom_text(size = 3.5) +
theme_minimal() +
scale_y_continuous(breaks=NULL)+
theme(panel.grid.major = element_blank(), legend.position = "none",
panel.border = element_blank(), axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
gA <- ggplotGrob(df.plot)
gB <- ggplotGrob(df.table)[6,]
gB$heights <- unit(1,"line")
require(gridExtra)
gAB <- rbind(gA, gB)
grid.newpage()
grid.draw(gAB)
Here is a script that creates the general table that I set out to make. Notice that I included table titles by changing the names under scale_y_continuous for each row.
require(ggplot2)
require(gridExtra)
df <- data.frame(a = seq(0, 90, 10), b = seq(10, 100, 10))
df.plot <- ggplot(data = df, aes(x = seq(1, 100, 10))) +
geom_line(aes(y = a), colour = 'red') +
geom_line(aes(y = b), colour = 'blue') +
scale_x_continuous(breaks = seq(0,100,10))
# make dummy labels for the table content
lab.df <- data.frame(lab1 = letters[11:20],
lab2 = letters[1:10])
df.table1 <- ggplot(lab.df, aes(x = lab1, y = 0,
label = lab1)) +
geom_text(size = 5, colour = "red") +
theme_minimal() +
scale_y_continuous(breaks=NULL, name = "Model Lift") +
theme(panel.grid.major = element_blank(), legend.position = "none",
panel.border = element_blank(), axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_text(angle = 0, hjust = 5))
df.table2 <- ggplot(lab.df, aes(x = lab2, y = 0,
label = lab2)) +
geom_text(size = 5, colour = "blue") +
theme_minimal() +
scale_y_continuous(breaks=NULL, name = "Random")+
theme(panel.grid.major = element_blank(), legend.position = "none",
panel.border = element_blank(), axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_text(angle = 0, hjust = 3.84))
# silly business to align the two plot panels
gA <- ggplotGrob(df.plot)
gB <- ggplotGrob(df.table1)
gC <- ggplotGrob(df.table2)
maxWidth = grid::unit.pmax(gA$widths[2:3], gB$widths[2:3], gC$widths[2:3])
gA$widths[2:3] <- as.list(maxWidth)
gB$widths[2:3] <- as.list(maxWidth)
gC$widths[2:3] <- as.list(maxWidth)
grid.arrange(gA, gB, gC, ncol=1, heights=c(10, .3, .3))

Resources