Custom manhattan plot multi x-axis - r
I have the following data set gwas_data
Running
head -n 23 gwas_data gives me the following table.
gwas_data <-
data.frame(
stringsAsFactors = FALSE,
udi = c("A","B","C","D","E",
"F","G","H","I","J","K","A","B","C","D","E",
"F","G","H","I","J","K"),
snp = c("rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A","rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A"),
chr = c(1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L),
bp = c(154988255L,154988255L,
154988255L,154988255L,154988255L,154988255L,154988255L,
154988255L,154988255L,154988255L,154988255L,
155108167L,155108167L,155108167L,155108167L,155108167L,
155108167L,155108167L,155108167L,155108167L,
155108167L,155108167L),
p = c(0.580621191,0.356577427,
0.494774059,0.984005886,0.492034614,0.581479389,
0.24820214,0.202720896,0.295462221,0.845848783,
0.954714162,0.343101621,0.740942238,0.929127071,0.717965027,
0.335111376,0.857154424,0.480087195,0.980307843,
0.521114038,0.583150471,0.925783695),
beta = c(0.000852277,0.003943912,
0.001091986,-3.18e-05,0.000564413,0.000120028,
0.026156467,0.000303135,0.069146449,-2.96e-07,-2.11e-05,
0.001274261,-0.001232397,0.000123948,-0.000498507,
-0.000689988,-3.41e-50,-0.013934416,5.12e-06,
-0.03696031,-7.28e-07,-3.01e-05),
bp_cum = c(1.154988255,1.154988255,
1.154988255,1.154988255,1.154988255,1.154988255,
1.154988255,1.154988255,1.154988255,1.154988255,
1.154988255,1.155108167,1.155108167,1.155108167,
1.155108167,1.155108167,1.155108167,1.155108167,1.155108167,
1.155108167,1.155108167,1.155108167)
)
I would like to make a manhattan plot, the X-axis should have chromosomal numbers from 1:22, I want each entry to be on the x-axis according to the BP position. The id should act as colour and the y-axis would be -log10(p).
I have rewritten the r command as follows, but my graph doesn't look correct.
library(plyr)
library(dplyr)
library(purrr)
library(tidyverse)
library(ggtext)
library(stringr)
gwas_data <- read.table("gwas_data", header=T)
sig <- 5e-8
manhplot <- ggplot(gwas_data, aes(x = bp_cum, y = -log10(p), color = udi)) +
geom_hline(yintercept = -log10(sig), color = "grey40", linetype = "dashed") +
geom_point(aes(color=as.factor(udi)), alpha=0.8, size=2) +
scale_x_continuous(label = axis_set$chr, breaks = axis_set$center) +
scale_y_continuous(expand = c(0,0), limits = c(0, ylim)) +
#scale_color_manual(values = rep(c("#276FBF", "#183059"), (length(axis_set$chr)))) +
scale_size_continuous(range = c(0.5,3)) +
theme_minimal()
print(manhplot)
I would also like to add the name of the ID and SNP if they are above the significant threshold.
My axis_set looks as follows with test data which goes from chromosome 1:4
chr center
1 179641307
2 354697451
3 553030055
4 558565909
My final graph looks as follows:
Related
How Insert an expression in legend in ggplot2?:: correct color + multiple lines and point
I am new to R and have not been able to correct the following graph. Xb_exp, it should have blue dots. Xb_dw, solid red line. Xb_f, dotted line. Xb_s, longdash line. The legend expression should be as shown with the subscript. I have not been able to correct it. Is there a way to do this? enter image description here my data CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30) Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059) Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018) Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0,0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477) Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040) dat <- c(CA, Xb_exp, Xb_dw, Xb_f, Xb_s) my code labels = c(expression(X[b_exp]),expression(X[b_dw]),expression(X[b_f]),expression(X[b_s])) color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF") Xb_D1 <- ggplot(data = dat) + theme_bw() + labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) + geom_point(aes(x=CA, y=Xb_exp, colour="Xb_exp"), size=3) + geom_line(aes(x=CA, y=Xb_dw,colour="Xb_dw"), size=1,linetype="solid") + geom_line(aes(x=CA, y=Xb_f,colour="Xb_f"), size=1,linetype="dotted") + geom_line(aes(x=CA, y=Xb_s,colour="Xb_s"), size=1,linetype="longdash") + scale_colour_manual(values=color4, labels=labels) + theme(legend.title = element_blank(),legend.position = c(0.8, 0.5), legend.text = element_text(size = 12)) + scale_x_continuous(limits = c(2,80)) plot(Xb_D1)
ggplot() requires a dataframe not a vector. If you modify your code with: dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s) and fix the typo in your Xb_f vector Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477) Your remaining code will work as but could be achieved more simply using the tidyverse approach below. Use pivot_longer to stack the y variables against your x variable. dat %>% pivot_longer(Xb_exp:Xb_s) %>% ggplot(aes(x = CA, y = value, colour = name)) + geom_point() + geom_line() + scale_colour_manual(values=color4, labels=labels) + theme_bw() + theme(legend.title = element_blank(),legend.position = c(0.8, 0.5), legend.text = element_text(size = 12)) + scale_x_continuous(limits = c(2,80)) + labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) ```
Ironically, setting this up with conventional ploting is rather simple: Given all the data above: linetypes4 <- c( Xb_exp=NA, Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" ) plot( NA, type="n", xlim=c(0,30), ylim=c(0,0.8), xlab = "Crank position (ºCA)", ylab = bquote('Burn fraction ('~X[b]~')'), panel.first = grid() ) with( dat, { points( x=CA, y=Xb_exp, pch=19, col=color4["Xb_exp"], size=3 ) for( n in c("Xb_dw", "Xb_f", "Xb_s")) { lines( x=CA, y=get(n), lty=linetypes[n], col=color4[n], lwd=2 ) } }) legend( x = "right", legend = labels, col = color4, lty = linetypes4, pch = c(19,NA,NA,NA), box.lwd = 0, inset = .02 )
There are some errors in your code suggesting you didn't try what you pasted. 0,0387, in your data should likely be 0.0387, otherwise nothing is right (no data measures several hundreds in there) c(CA, ... ) should likely be data.frame( CA, ... ) Now, the first problem is you are doing all the heavy lifting yourself, while ggplot sits there with nothing left to do. It was designed to set up colors and line types by group. You however need to transform the data first to take full advantage of that: library(tidyr) CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30) Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059) Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018) Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477) Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040) dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s) color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF") linetypes <- c( Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" ) dat2 <- pivot_longer( dat, cols=starts_with("Xb_") ) dat2.line <- dat2 %>% filter( name != "Xb_exp" ) dat2.point <- dat2 %>% filter( name == "Xb_exp" ) dat2 is now a long data set, with data category as a variable, not with a separate column for each data series. This is how ggplot likes it: dat2 # A tibble: 112 x 3 CA name value <dbl> <fct> <dbl> 1 3 Xb_exp 0.0231 2 3 Xb_dw 0.016 3 3 Xb_f 0.0021 4 3 Xb_s 0.0139 5 4 Xb_exp 0.0519 6 4 Xb_dw 0.0516 7 4 Xb_f 0.0031 8 4 Xb_s 0.0484 9 5 Xb_exp 0.0839 10 5 Xb_dw 0.0886 # … with 102 more rows I then split the data on what later goes to points and what goes ot lines, just not to make the plot code uglier than it has to be: Xb_D1 <- ggplot(data = dat2.line, aes(x=CA,y=value,color=name)) + theme_bw() + labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) + geom_point( data = dat2.point, size=3) + geom_line( aes(col=name,lty=name), size=1 ) + scale_colour_manual(values=color4) + scale_linetype_manual( values=linetypes, guide=FALSE ) + guides( color = guide_legend( override.aes=list( shape=c(NA,19,NA,NA), linetype=c("solid","solid","dashed","dotted") ) ) ) + theme(legend.title = element_blank(),legend.position = c(0.8, 0.5), legend.text.align = 0, legend.text = element_text(size = 12)) + scale_x_continuous(limits = c(2,30)) print(Xb_D1) no need to supply labels use line type as you would use color with ggplot, its just one more channel that can carry information (or aesthetic as they like to call it over there) align the legends left, looks nicer that way more sophisticated is the use of override.aes to take away the points from the legend categories who shouldn't have them. Now, I was unable to change the order of the data series in the labels, that can be a hazzle. Is it still ok for you the order they are?
Add an additional X axis to the plot and some lines/annotations to show the percentage of data under it
I was trying to recreate this plot: using the following code - library(tidyverse) set.seed(0); r <- rnorm(10000); df <- as.data.frame(r) avg <- round(mean(r),2) SD <- round(sd(r),2) x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD) x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD") df %>% ggplot(aes(r)) + geom_histogram(aes(y=..density..), bins = 20, colour="black", fill="lightblue") + geom_density(alpha=.2, fill="darkblue") + scale_x_continuous(breaks = x.scale, labels = x.lab) + labs(x = "") Using the code I plotted this: , but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question! It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly. I will be using Z.lin's amazing modification of the cowplot package. I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.) library(tidyverse) library(cowplot) set.seed(0); r <- rnorm(10000); foodf <- as.data.frame(r) avg <- round(mean(r),2) SD <- round(sd(r),2) x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1) x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD") x2lab <- -3:3 # calculate the density manually dens_r <- density(r) # for each x value, calculate the closest x value in the density object and get the respective y values y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))] # added annotation for segments and labels. # Arrow segments can be added in a similar way. p1 <- ggplot(foodf, aes(r)) + geom_histogram(aes(y=..density..), bins = 20, colour="black", fill="lightblue") + geom_density(alpha=.2, fill="darkblue") + scale_x_continuous(breaks = x.scale, labels = x.lab) + labs(x = NULL) +# use NULL here annotate(geom = "segment", x = x.scale, xend = x.scale, yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) + annotate(geom = "text", label = x.lab, x = x.scale, y = 1.2 * max(dens_r$y)) p2 <- ggplot(foodf, aes(r)) + scale_x_continuous(breaks = x.scale, labels = x2lab) + labs(x = NULL) + theme_classic() + theme(axis.line.y = element_blank()) # This is with the modified plot_grid() / align_plot() function!!! plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))
how do i combine multiple data sources in ggplot using split and sapply?
this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here: how do i vectorise (automate) plot creation in R Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this: However, i'm now trying to add the two other data sources into the plots, to look something like this: But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far... Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line): SITENAME_plot <- function(AllDates_TPAF){ ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) + geom_point(aes(colour = Risk), size = 3) + scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green", "Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) + labs(x = "Month", y = "Total PAF (% affected)") + scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) + facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+ scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + theme(legend.text=element_text(size=15)) + theme(axis.text=element_text(size=15), axis.title=element_text(size=15,face="bold")) + guides(color = guide_legend(reverse = TRUE))+ theme_bw() + ggtitle(unique(AllDates_TPAF$SITENAME)) } plot write function: SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){ flname <- file.path(dir, name) flname <- paste0(flname, ".jpg") png(filename = flname, width = 1500, height = 1000) print(g) dev.off() flname } Apply function to list split by SITENAME: sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME) gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE) mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd())) dev.off() I have uploaded samples of all 3 datasets here: Sample Data Apologies for not using gsub but there was too much data and I couldn't get it to work properly thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want: library(tidyverse) library(readxl) #Data df1 <- read_excel('Sample data.xlsx',1) #Create var df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High") #Other data df2 <- read_excel('Sample data.xlsx',2) df3 <- read_excel('Sample data.xlsx',3) #Split 1 L1 <- split(df1,df1$SITENAME) L2 <- split(df2,df2$SITENAME) L3 <- split(df3,df3$`Site Name`) #Function to create plots myplot <- function(x,y,z) { #Merge x and y #Check for duplicates and avoid column y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),] y$SITENAME <- NULL xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T) #Format to dates xy$DATE <- as.Date(xy$DATE) #Scale factor scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height) #Rename for consistency in names names(z)[4] <- 'DATE' #Format date z$DATE <- as.Date(z$DATE) #Plot #Plot G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) + geom_point(aes(colour = Risk), size = 3) + scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green", "Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) + scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) + geom_line(aes(x=DATE,y=Height*scaleFactor))+ scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+ labs(x = "Month") + geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+ facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + theme(legend.text=element_text(size=15)) + theme(axis.text=element_text(size=15), axis.title=element_text(size=15,face="bold")) + guides(color = guide_legend(reverse = TRUE))+ theme_bw() + ggtitle(unique(xy$SITENAME)) return(G) } #Create a list of plots Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE) #Now format names vnames <- paste0(names(Lplots),'.png') mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm') You will end up with plots like these saved in your dir: Some dashed lines do not appear in plots because they were not present in the data you provided.
ggplot2 won't add legend to boxplot with multiple strata
I am attempting to add a legend to my boxplot with this example data BM math loginc 1 2 1.4523 0 3 2.3415 1 1 0.6524 1 3 2.4562 0 1 3.5231 0 2 2.4532 Essentially, I have two groups BM = 0 and BM = 1, 3 categories in each group (math=1, 2 or 3), and a value of loginc. boxcolors=c('gray70','orange','red','gray70','orange','red') bothboxplot=ggplot(both, aes(x=math,y=loginc))+ geom_boxplot(fill=boxcolors)+ stat_summary(fun.y=mean,color=line,geom = "point",shape=3,size=2)+ scale_x_discrete(name='Site Category')+ scale_y_continuous(name='Log(Incidence/100,000)')+ facet_grid(.~BM) bothboxplot This yeilds the following plot: This plot is entirely correct except for the lack of a legend. I have played around with the placement of the aes() and it won't work. When aes() is placed within the ggplot() rather than the geom_plot(), my fill statement gives the error ("Error: Aesthetics must be either length 1 or the same as the data (187): fill". Ideally the legend I would like would have names of the 1,2,3 math categories, their corresponding colors, and the (+) symbol in each box to be labelled "Mean".
You need to pass a column for fill into the aesthetic: df <- tibble( loginc = rnorm(n = 12, mean = 0, sd = 1), BM = rep(c(0, 1), each = 6), math = rep(1:3, 4) ) %>% mutate(math = factor(math)) df %>% ggplot(aes(x = math, y = loginc, group = math, fill = math)) + geom_boxplot() + stat_summary(fun.y = mean, geom = "point", shape=3, size=2) + facet_grid(~ BM)
The point is that you do not map a variable to the fill aestehtic, i.e. map math on fill and set fill color manually with scale_fill_manual: library(ggplot2) both <- data.frame( BM = sample(0:1, 100, replace = TRUE), math = sample(1:3, 100, replace = TRUE), loginc = runif(100) ) bothboxplot <- ggplot(both, aes(factor(math), loginc, fill = factor(math))) + geom_boxplot() + stat_summary(fun = mean, geom = "point", shape = 3, size = 2) + scale_fill_manual(values = c("gray70", "orange", "red")) + scale_x_discrete(name = "Site Category") + scale_y_continuous(name = "Log(Incidence/100,000)") + facet_grid(. ~ BM) bothboxplot
Color one point and add an annotation in ggplot2?
I have a dataframe a with three columns : GeneName, Index1, Index2 I draw a scatterplot like this ggplot(a, aes(log10(Index1+1), Index2)) +geom_point(alpha=1/5) Then I want to color a point whose GeneName is "G1" and add a text box near that point, what might be the easiest way to do it?
You could create a subset containing just that point and then add it to the plot: # create the subset g1 <- subset(a, GeneName == "G1") # plot the data ggplot(a, aes(log10(Index1+1), Index2)) + geom_point(alpha=1/5) + # this is the base plot geom_point(data=g1, colour="red") + # this adds a red point geom_text(data=g1, label="G1", vjust=1) # this adds a label for the red point NOTE: Since everyone keeps up-voting this question, I thought I would make it easier to read.
Something like this should work. You may need to mess around with the x and y arguments to geom_text(). library(ggplot2) highlight.gene <- "G1" set.seed(23456) a <- data.frame(GeneName = paste("G", 1:10, sep = ""), Index1 = runif(10, 100, 200), Index2 = runif(10, 100, 150)) a$highlight <- ifelse(a$GeneName == highlight.gene, "highlight", "normal") textdf <- a[a$GeneName == highlight.gene, ] mycolours <- c("highlight" = "red", "normal" = "grey50") a textdf ggplot(data = a, aes(x = Index1, y = Index2)) + geom_point(size = 3, aes(colour = highlight)) + scale_color_manual("Status", values = mycolours) + geom_text(data = textdf, aes(x = Index1 * 1.05, y = Index2, label = "my label")) + theme(legend.position = "none") + theme()