Add a horizontal line to plot and legend in ggplot2 - r

This code creates a nice plot but I would like to add a horizontal black line at y=50 AND have the legend show a black line with the text "cutoff" in the legend, but leave points in the legend for the sources. I can add the line with geom_line but cannot get the line in the legend.
library(ggplot2)
the.data <- read.table( header=TRUE, sep=",",
text="source,year,value
S1,1976,56.98
S1,1977,55.26
S1,1978,68.83
S1,1979,59.70
S1,1980,57.58
S1,1981,61.54
S1,1982,48.65
S1,1983,53.45
S1,1984,45.95
S1,1985,51.95
S1,1986,51.85
S1,1987,54.55
S1,1988,51.61
S1,1989,52.24
S1,1990,49.28
S1,1991,57.33
S1,1992,51.28
S1,1993,55.07
S1,1994,50.88
S2,1993,54.90
S2,1994,51.20
S2,1995,52.10
S2,1996,51.40
S3,2002,57.95
S3,2003,47.95
S3,2004,48.15
S3,2005,37.80
S3,2006,56.96
S3,2007,48.91
S3,2008,44.00
S3,2009,45.35
S3,2010,49.40
S3,2011,51.19")
ggplot(the.data, aes( x = year, y = value ) ) +
geom_point(aes(colour = source)) +
geom_smooth(aes(group = 1))

(1) Try this:
cutoff <- data.frame( x = c(-Inf, Inf), y = 50, cutoff = factor(50) )
ggplot(the.data, aes( year, value ) ) +
geom_point(aes( colour = source )) +
geom_smooth(aes( group = 1 )) +
geom_line(aes( x, y, linetype = cutoff ), cutoff)
(2) Regarding your comment, if you don't want the cutoff listed as a separate legend it would be easier to just label the cutoff line right on the plot:
ggplot(the.data, aes( year, value ) ) +
geom_point(aes( colour = source )) +
geom_smooth(aes( group = 1 )) +
geom_hline(yintercept = 50) +
annotate("text", min(the.data$year), 50, vjust = -1, label = "Cutoff")
Update
This seems even better and generalizes to mulitple lines as shown:
line.data <- data.frame(yintercept = c(50, 60), Lines = c("lower", "upper"))
ggplot(the.data, aes( year, value ) ) +
geom_point(aes( colour = source )) +
geom_smooth(aes( group = 1 )) +
geom_hline(aes(yintercept = yintercept, linetype = Lines), line.data)

Another solution :
gg <- ggplot(the.data, aes( x = year, y = value ) ) +
geom_point(aes(colour = source)) +
geom_smooth(aes(group = 1))
cutoff <- data.frame(yintercept=50, cutoff=factor(50))
gg +
geom_hline(aes(yintercept=yintercept, linetype=cutoff), data=cutoff, show_guide=TRUE)
This code generates exactly the same graphic as the one in point (1) of #G. Grothendieck. But it is more easy to adapt to graphics with several layers.

Related

Is there an equivalent to points() on ggplot2

I'm working with stock prices and trying to plot the price difference.
I created one using autoplot.zoo(), my question is, how can I manage to change the point shapes to triangles when they are above the upper threshold and to circles when they are below the lower threshold. I understand that when using the basic plot() function you can do these by calling the points() function, wondering how I can do this but with ggplot2.
Here is the code for the plot:
p<-autoplot.zoo(data, geom = "line")+
geom_hline(yintercept = threshold, color="red")+
geom_hline(yintercept = -threshold, color="red")+
ggtitle("AAPL vs. SPY out of sample")
p+geom_point()
We can't fully replicate without your data, but here's an attempt with some sample generated data that should be similar enough that you can adapt for your purposes.
# Sample data
data = data.frame(date = c(2001:2020),
spread = runif(20, -10,10))
# Upper and lower threshold
thresh <- 4
You can create an additional variable that determines the shape, based on the relationship in the data itself, and pass that as an argument into ggplot.
# Create conditional data
data$outlier[data$spread > thresh] <- "Above"
data$outlier[data$spread < -thresh] <- "Below"
data$outlier[is.na(data$outlier)] <- "In Range"
library(ggplot2)
ggplot(data, aes(x = date, y = spread, shape = outlier, group = 1)) +
geom_line() +
geom_point() +
geom_hline(yintercept = c(thresh, -thresh), color = "red") +
scale_shape_manual(values = c(17,16,15))
# If you want points just above and below# Sample data
data = data.frame(date = c(2001:2020),
spread = runif(20, -10,10))
thresh <- 4
data$outlier[data$spread > thresh] <- "Above"
data$outlier[data$spread < -thresh] <- "Below"
ggplot(data, aes(x = date, y = spread, shape = outlier, group = 1)) +
geom_line() +
geom_point() +
geom_hline(yintercept = c(thresh, -thresh), color = "red") +
scale_shape_manual(values = c(17,16))
Alternatively, you can just add the points above and below the threshold as individual layers with manually specified shapes, like this. The pch argument points to shape type.
# Another way of doing this
data = data.frame(date = c(2001:2020),
spread = runif(20, -10,10))
# Upper and lower threshold
thresh <- 4
ggplot(data, aes(x = date, y = spread, group = 1)) +
geom_line() +
geom_point(data = data[data$spread>thresh,], pch = 17) +
geom_point(data = data[data$spread< (-thresh),], pch = 16) +
geom_hline(yintercept = c(thresh, -thresh), color = "red") +
scale_shape_manual(values = c(17,16))

ggplot plot - reorder variable & alter line thickness/colour

This code creates a basic plot but I can't work out how to order the values in order of value (fct_reorder is included but I must have done something wrong). I also wanted to colour the lines and make them thicker.
library(tidyverse)
dat2 <- tibble(Percentage = c(12.5,58.9,9.1,3.6,7.3,7.3),
ICDDx = c("Dx1","Dx2","Dx3","Dx4","Dx5","Dx6"))
library(ggplot2)
ggplot(dat2, aes(Percentage,ICDDx, fct_reorder(Percentage))) +
geom_segment(aes(x = 0, y = ICDDx, xend = Percentage,
yend = ICDDx), color = "grey50") +
geom_point(size=6)
I tried to specify geom_line(size = 3), but received this error:
Error: `data` must be a data frame, or other object coercible by
`fortify()`, not an S3 object with class LayerInstance/Layer/ggproto/gg
Just use geom_lollipop():
library(tidyverse)
dat2 <- tibble(Percentage = c(12.5,58.9,9.1,3.6,7.3,7.3),
ICDDx = c("Dx1","Dx2","Dx3","Dx4","Dx5","Dx6"))
mutate(dat2, ICDDx = fct_reorder(ICDDx, Percentage)) %>%
mutate(Percentage = Percentage/100) %>%
ggplot() +
ggalt::geom_lollipop(
aes(Percentage, ICDDx), horizontal=TRUE,
colour = "#6a3d9a", size = 2,
point.colour = "#ff7f00", point.size = 4
) +
hrbrthemes::scale_x_percent(
expand=c(0,0.01), position = "top", limits = c(0,0.6)
) +
labs(
x = NULL, y = NULL
) +
hrbrthemes::theme_ipsum_rc(grid="X")
Here is my answer based on my interpretation of your question.
dat2 %>%
arrange(Percentage) %>%
ggplot(aes(Percentage,ICDDx,col=ICDDx,size=4))+
geom_segment(aes(x = 0, y = ICDDx, xend = Percentage, yend = ICDDx))+
geom_point(size=6)
That gives the following plot:
You could do a ranking first.
dat2 <- dat2[order(dat2$Percentage), ] # order by percentage
dat2$rank <- 1:nrow(dat2) # add ranking variable
ggplot(dat2, aes(x=Percentage, y=rank, group=rank, color=ICDDx)) +
geom_segment(aes(x=0, y=rank, xend=Percentage,
yend=rank), col="grey50", size=2) +
geom_point(size=6) +
scale_y_continuous(breaks=1:length(dat2$ICDDx), labels=dat2$ICDDx) + # optional
scale_color_discrete(labels=dat2$ICDDx)
Yielding

how to write conditions that define what color each point should be

Here is an example data as # Gregor suggested
df = data.frame(x = rnorm(10), y = rnorm(10))
but I want to plot left part of plot (below zero in another color) and right part of the figure in another color
So I tried to plot it with ggplot but I am stuck , any suggestion?
g <- ggplot(data=df, aes(x=df[,1], y=df[,2])) +
geom_point() +
xlim(c(-2, 2)) + ylim(c(0,8)) +
xlab("my x axis") + ylab(" my y axis") +
guides(colour = guide_legend(override.aes = list(shape=16)))
g
what I want is to be able to plot it like this using ggplot
Okay. Let's be clear that this isn't a ggplot question at all, but a question of how to write conditions that define what color each point should be.
Starting with your original color scheme - I think I've correctly interpreted it as this
res$my_grouping = with(res, ifelse(pvalue < 0.5 | abs(log2FoldChange) > 1, ifelse(pvalue < 0.5 & abs(log2FoldChange) > 1, 'both', 'one'), 'neither'))
Now let's paste on on whether it is positive or negtiave:
res$my_grouping = paste(ifelse(res$log2FoldChange < 0, 'neg', 'pos'), res$my_grouping, sep = ', ')
Then plot:
ggplot(data=res, aes(x = log2FoldChange, y = -log10(pvalue), color = my_grouping)) +
geom_point() +
xlim(c(-2, 2)) + ylim(c(0,8)) +
labs(x = "log2 fold change", y = "-log10 p-value", color = 'key') +
guides(colour = guide_legend(override.aes = list(shape=16)))
Yielding this:
You can customize the color values using scale_color_manual.

Saving ggplot to a list then applying to grid.arrange geom_line from last plot populates all previous plots

I am very new to R and ggplot2. I am trying to create a grid of plots of correlations as well as their trailing max and min values using a for loop. The plots are then saved as a PDF to a directory. When they are saved the blue lines(min max) are correctly plotted. However when I then use the do.call(grid.arrange,t) or any other call to the plots in the list. you do not get the correct blue lines, but the last plots blue lines populate all of the plots.
I dont understand how this can plot and save the pdf correctly but not store the ggplot object correctly in the t list() or how there is some confusion in the render using do.call(grid.arrange,t). How can the original line (black) plot correctly but the geom_line additions do not ? I am really confused.
If someone could kindly help me check this code and find out how to plot all lines correctly then place them in a grid that would be great.
reproducable code below using random data
require(TTR)
require(ggplot2)
library(gridExtra)
set.seed(12345)
filelocation = "c:/"
values <- as.data.frame(matrix( rnorm(5*500,mean=0,sd=3), 500, 5))
t <- list()
rollLength = 25
for( i in 1:(ncol(values)))
{
p <- ggplot(data=values, aes(x = index(values)) )
p <- p + geom_line(data=values, aes_string(y = colnames(values)[i]))
p <- p + geom_line(data = values, aes(x = index(values), y = runMax(values[,i], n = rollLength) ), colour = "blue", linetype = "longdash" )
p <- p + geom_line(data = values, aes(x = index(values), y = runMin(values[,i], n = rollLength) ), colour = "blue", linetype = "longdash" )
p <- p + ggtitle(colnames(values)[i]) + xlab("Date") + ylab("Pearson Correlation")
print(p)
ggsave( file = paste(colnames(values)[i],".pdf",sep = "") , path = filelocation)
assign(paste("p", i, sep = ""), p)
t[[i]] <- p
}
do.call(grid.arrange,t)
Hmm, this isn't exactly what you want I think, but close, and less code
require(TTR)
require(ggplot2)
set.seed(12345)
values <- as.data.frame(matrix( rnorm(5*500,mean=0,sd=3), 500, 5))
rollLength = 25
library(reshape2)
dfmelt <- melt(values)
dfmelt$max <- runMax(dfmelt$value, n=rollLength)
dfmelt$min <- runMin(dfmelt$value, n=rollLength)
dfmelt$row <- index(dfmelt)
ggplot(dfmelt, aes(x = row, y = value)) +
geom_line() +
geom_line(aes(x = row, y = max), data=dfmelt, colour = "blue",
linetype = "longdash") +
geom_line(aes(x = row, y = min), data=dfmelt, colour = "blue",
linetype = "longdash") +
facet_wrap(~ variable, scales="free")

Reuse ggplot layers in multiple plots

I am plotting tons of graphs which essentially use the same type of formatting. Just wondering if it possible to store these layers in a variable and reuse them.
Approach 1 (does not work)
t <- layer1() + layer2()
ggplot(df,aes(x,y)) + t
Approach 2 (works but not very elegant)
t <- function(x) x + layer1() + layer2()
t(ggplot(df,aes(x,y))
Any suggestion along the lines of approach 1?
Thanks!
While I wait for some clarification, here are a few examples that demonstrate how to add previously created layers to an existing plot:
p <- ggplot(mtcars,aes(x = cyl,y = mpg)) +
geom_point()
new_layer <- geom_point(data = mtcars,aes(x = cyl,y = hp),colour = "red")
new_layer1 <- geom_point(data = mtcars,aes(x = cyl,y = wt),colour = "blue")
p + new_layer
p + list(new_layer,new_layer1)
Based on the Joran's answer, I now put my layers into a list, and add it in my plots. Works like a charm :
r = data.frame(
time=c(5,10,15,20),
mean=c(10,20,30,40),
sem=c(2,3,1,4),
param1=c("A", "A", "B", "B"),
param2=c("X", "Y", "X", "Y")
)
gglayers = list(
geom_point(size=3),
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.3),
scale_x_continuous(breaks = c(0, 30, 60, 90, 120, 180, 240)),
labs(
x = "Time(minutes)",
y = "Concentration"
)
)
ggplot(data=r, aes(x=time, y=mean, colour=param1, shape=param1)) +
gglayers +
labs(
color = "My param1\n",
shape = "My param1\n"
)
ggplot(data=r, aes(x=time, y=mean, colour=param2, shape=param2)) +
gglayers +
labs(
color = "My param2\n",
shape = "My param2\n"
)
I know this is old, but here is one that avoids the clunky t(ggplot(...)))
t<-function(...) ggplot(...) + layer1() + layer2()
t(df, aes(x, y))

Resources