Custom manhattan plot multi x-axis - r

I have the following data set gwas_data
Running
head -n 23 gwas_data gives me the following table.
gwas_data <-
data.frame(
stringsAsFactors = FALSE,
udi = c("A","B","C","D","E",
"F","G","H","I","J","K","A","B","C","D","E",
"F","G","H","I","J","K"),
snp = c("rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A","rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A"),
chr = c(1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L),
bp = c(154988255L,154988255L,
154988255L,154988255L,154988255L,154988255L,154988255L,
154988255L,154988255L,154988255L,154988255L,
155108167L,155108167L,155108167L,155108167L,155108167L,
155108167L,155108167L,155108167L,155108167L,
155108167L,155108167L),
p = c(0.580621191,0.356577427,
0.494774059,0.984005886,0.492034614,0.581479389,
0.24820214,0.202720896,0.295462221,0.845848783,
0.954714162,0.343101621,0.740942238,0.929127071,0.717965027,
0.335111376,0.857154424,0.480087195,0.980307843,
0.521114038,0.583150471,0.925783695),
beta = c(0.000852277,0.003943912,
0.001091986,-3.18e-05,0.000564413,0.000120028,
0.026156467,0.000303135,0.069146449,-2.96e-07,-2.11e-05,
0.001274261,-0.001232397,0.000123948,-0.000498507,
-0.000689988,-3.41e-50,-0.013934416,5.12e-06,
-0.03696031,-7.28e-07,-3.01e-05),
bp_cum = c(1.154988255,1.154988255,
1.154988255,1.154988255,1.154988255,1.154988255,
1.154988255,1.154988255,1.154988255,1.154988255,
1.154988255,1.155108167,1.155108167,1.155108167,
1.155108167,1.155108167,1.155108167,1.155108167,1.155108167,
1.155108167,1.155108167,1.155108167)
)
I would like to make a manhattan plot, the X-axis should have chromosomal numbers from 1:22, I want each entry to be on the x-axis according to the BP position. The id should act as colour and the y-axis would be -log10(p).
I have rewritten the r command as follows, but my graph doesn't look correct.
library(plyr)
library(dplyr)
library(purrr)
library(tidyverse)
library(ggtext)
library(stringr)
gwas_data <- read.table("gwas_data", header=T)
sig <- 5e-8
manhplot <- ggplot(gwas_data, aes(x = bp_cum, y = -log10(p), color = udi)) +
geom_hline(yintercept = -log10(sig), color = "grey40", linetype = "dashed") +
geom_point(aes(color=as.factor(udi)), alpha=0.8, size=2) +
scale_x_continuous(label = axis_set$chr, breaks = axis_set$center) +
scale_y_continuous(expand = c(0,0), limits = c(0, ylim)) +
#scale_color_manual(values = rep(c("#276FBF", "#183059"), (length(axis_set$chr)))) +
scale_size_continuous(range = c(0.5,3)) +
theme_minimal()
print(manhplot)
I would also like to add the name of the ID and SNP if they are above the significant threshold.
My axis_set looks as follows with test data which goes from chromosome 1:4
chr center
1 179641307
2 354697451
3 553030055
4 558565909
My final graph looks as follows:

Related

How Insert an expression in legend in ggplot2?:: correct color + multiple lines and point

I am new to R and have not been able to correct the following graph.
Xb_exp, it should have blue dots.
Xb_dw, solid red line.
Xb_f, dotted line.
Xb_s, longdash line.
The legend expression should be as shown with the subscript.
I have not been able to correct it.
Is there a way to do this?
enter image description here
my data
CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059)
Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018)
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0,0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040)
dat <- c(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
my code
labels = c(expression(X[b_exp]),expression(X[b_dw]),expression(X[b_f]),expression(X[b_s]))
color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF")
Xb_D1 <- ggplot(data = dat) +
theme_bw() +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) +
geom_point(aes(x=CA, y=Xb_exp, colour="Xb_exp"), size=3) +
geom_line(aes(x=CA, y=Xb_dw,colour="Xb_dw"), size=1,linetype="solid") +
geom_line(aes(x=CA, y=Xb_f,colour="Xb_f"), size=1,linetype="dotted") +
geom_line(aes(x=CA, y=Xb_s,colour="Xb_s"), size=1,linetype="longdash") +
scale_colour_manual(values=color4, labels=labels) +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,80))
plot(Xb_D1)
ggplot() requires a dataframe not a vector. If you modify your code with:
dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
and fix the typo in your Xb_f vector
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Your remaining code will work as but could be achieved more simply using the tidyverse approach below. Use pivot_longer to stack the y variables against your x variable.
dat %>%
pivot_longer(Xb_exp:Xb_s) %>%
ggplot(aes(x = CA, y = value, colour = name)) +
geom_point() +
geom_line() +
scale_colour_manual(values=color4, labels=labels) +
theme_bw() +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,80)) +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) ```
Ironically, setting this up with conventional ploting is rather simple:
Given all the data above:
linetypes4 <- c( Xb_exp=NA, Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" )
plot(
NA, type="n", xlim=c(0,30), ylim=c(0,0.8),
xlab = "Crank position (ºCA)", ylab = bquote('Burn fraction ('~X[b]~')'),
panel.first = grid()
)
with( dat, {
points( x=CA, y=Xb_exp, pch=19, col=color4["Xb_exp"], size=3 )
for( n in c("Xb_dw", "Xb_f", "Xb_s")) {
lines( x=CA, y=get(n), lty=linetypes[n], col=color4[n], lwd=2 )
}
})
legend(
x = "right",
legend = labels,
col = color4,
lty = linetypes4,
pch = c(19,NA,NA,NA),
box.lwd = 0,
inset = .02
)
There are some errors in your code suggesting you didn't try what you pasted.
0,0387, in your data should likely be 0.0387, otherwise nothing is right (no data measures several hundreds in there)
c(CA, ... ) should likely be data.frame( CA, ... )
Now, the first problem is you are doing all the heavy lifting yourself, while ggplot sits there with nothing left to do. It was designed to set up colors and line types by group. You however need to transform the data first to take full advantage of that:
library(tidyr)
CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059)
Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018)
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040)
dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF")
linetypes <- c( Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" )
dat2 <- pivot_longer( dat, cols=starts_with("Xb_") )
dat2.line <- dat2 %>% filter( name != "Xb_exp" )
dat2.point <- dat2 %>% filter( name == "Xb_exp" )
dat2 is now a long data set, with data category as a variable, not with a separate column for each data series. This is how ggplot likes it:
dat2
# A tibble: 112 x 3
CA name value
<dbl> <fct> <dbl>
1 3 Xb_exp 0.0231
2 3 Xb_dw 0.016
3 3 Xb_f 0.0021
4 3 Xb_s 0.0139
5 4 Xb_exp 0.0519
6 4 Xb_dw 0.0516
7 4 Xb_f 0.0031
8 4 Xb_s 0.0484
9 5 Xb_exp 0.0839
10 5 Xb_dw 0.0886
# … with 102 more rows
I then split the data on what later goes to points and what goes ot lines, just not to make the plot code uglier than it has to be:
Xb_D1 <- ggplot(data = dat2.line, aes(x=CA,y=value,color=name)) +
theme_bw() +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) +
geom_point( data = dat2.point, size=3) +
geom_line( aes(col=name,lty=name), size=1 ) +
scale_colour_manual(values=color4) +
scale_linetype_manual( values=linetypes, guide=FALSE ) +
guides(
color = guide_legend( override.aes=list( shape=c(NA,19,NA,NA), linetype=c("solid","solid","dashed","dotted") ) )
) +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text.align = 0,
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,30))
print(Xb_D1)
no need to supply labels
use line type as you would use color with ggplot, its just one more channel that can carry information (or aesthetic as they like to call it over there)
align the legends left, looks nicer that way
more sophisticated is the use of override.aes to take away the points from the legend categories who shouldn't have them.
Now, I was unable to change the order of the data series in the labels, that can be a hazzle. Is it still ok for you the order they are?

Add an additional X axis to the plot and some lines/annotations to show the percentage of data under it

I was trying to recreate this plot:
using the following code -
library(tidyverse)
set.seed(0); r <- rnorm(10000);
df <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
df %>% ggplot(aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = "")
Using the code I plotted this:
,
but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question!
It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly.
I will be using Z.lin's amazing modification of the cowplot package.
I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.)
library(tidyverse)
library(cowplot)
set.seed(0); r <- rnorm(10000);
foodf <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
x2lab <- -3:3
# calculate the density manually
dens_r <- density(r)
# for each x value, calculate the closest x value in the density object and get the respective y values
y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))]
# added annotation for segments and labels.
# Arrow segments can be added in a similar way.
p1 <-
ggplot(foodf, aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = NULL) +# use NULL here
annotate(geom = "segment", x = x.scale, xend = x.scale,
yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) +
annotate(geom = "text", label = x.lab,
x = x.scale, y = 1.2 * max(dens_r$y))
p2 <-
ggplot(foodf, aes(r)) +
scale_x_continuous(breaks = x.scale, labels = x2lab) +
labs(x = NULL) +
theme_classic() +
theme(axis.line.y = element_blank())
# This is with the modified plot_grid() / align_plot() function!!!
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

ggplot2 won't add legend to boxplot with multiple strata

I am attempting to add a legend to my boxplot with this example data
BM math loginc
1 2 1.4523
0 3 2.3415
1 1 0.6524
1 3 2.4562
0 1 3.5231
0 2 2.4532
Essentially, I have two groups BM = 0 and BM = 1, 3 categories in each group (math=1, 2 or 3), and a value of loginc.
boxcolors=c('gray70','orange','red','gray70','orange','red')
bothboxplot=ggplot(both, aes(x=math,y=loginc))+
geom_boxplot(fill=boxcolors)+
stat_summary(fun.y=mean,color=line,geom = "point",shape=3,size=2)+
scale_x_discrete(name='Site Category')+
scale_y_continuous(name='Log(Incidence/100,000)')+
facet_grid(.~BM)
bothboxplot
This yeilds the following plot:
This plot is entirely correct except for the lack of a legend. I have played around with the placement of the aes() and it won't work. When aes() is placed within the ggplot() rather than the geom_plot(), my fill statement gives the error ("Error: Aesthetics must be either length 1 or the same as the data (187): fill".
Ideally the legend I would like would have names of the 1,2,3 math categories, their corresponding colors, and the (+) symbol in each box to be labelled "Mean".
You need to pass a column for fill into the aesthetic:
df <-
tibble(
loginc = rnorm(n = 12, mean = 0, sd = 1),
BM = rep(c(0, 1), each = 6),
math = rep(1:3, 4)
) %>%
mutate(math = factor(math))
df %>%
ggplot(aes(x = math, y = loginc, group = math, fill = math)) +
geom_boxplot() +
stat_summary(fun.y = mean, geom = "point", shape=3, size=2) +
facet_grid(~ BM)
The point is that you do not map a variable to the fill aestehtic, i.e. map math on fill and set fill color manually with scale_fill_manual:
library(ggplot2)
both <- data.frame(
BM = sample(0:1, 100, replace = TRUE),
math = sample(1:3, 100, replace = TRUE),
loginc = runif(100)
)
bothboxplot <- ggplot(both, aes(factor(math), loginc, fill = factor(math))) +
geom_boxplot() +
stat_summary(fun = mean, geom = "point", shape = 3, size = 2) +
scale_fill_manual(values = c("gray70", "orange", "red")) +
scale_x_discrete(name = "Site Category") +
scale_y_continuous(name = "Log(Incidence/100,000)") +
facet_grid(. ~ BM)
bothboxplot

Color one point and add an annotation in ggplot2?

I have a dataframe a with three columns :
GeneName, Index1, Index2
I draw a scatterplot like this
ggplot(a, aes(log10(Index1+1), Index2)) +geom_point(alpha=1/5)
Then I want to color a point whose GeneName is "G1" and add a text box near that point, what might be the easiest way to do it?
You could create a subset containing just that point and then add it to the plot:
# create the subset
g1 <- subset(a, GeneName == "G1")
# plot the data
ggplot(a, aes(log10(Index1+1), Index2)) + geom_point(alpha=1/5) + # this is the base plot
geom_point(data=g1, colour="red") + # this adds a red point
geom_text(data=g1, label="G1", vjust=1) # this adds a label for the red point
NOTE: Since everyone keeps up-voting this question, I thought I would make it easier to read.
Something like this should work. You may need to mess around with the x and y arguments to geom_text().
library(ggplot2)
highlight.gene <- "G1"
set.seed(23456)
a <- data.frame(GeneName = paste("G", 1:10, sep = ""),
Index1 = runif(10, 100, 200),
Index2 = runif(10, 100, 150))
a$highlight <- ifelse(a$GeneName == highlight.gene, "highlight", "normal")
textdf <- a[a$GeneName == highlight.gene, ]
mycolours <- c("highlight" = "red", "normal" = "grey50")
a
textdf
ggplot(data = a, aes(x = Index1, y = Index2)) +
geom_point(size = 3, aes(colour = highlight)) +
scale_color_manual("Status", values = mycolours) +
geom_text(data = textdf, aes(x = Index1 * 1.05, y = Index2, label = "my label")) +
theme(legend.position = "none") +
theme()

Resources