I have an input file file1.txt:
V1 V2 Score
rs4939134 SIFT 1
rs4939134 Polyphen2 0
rs4939134 MutationAssessor -1.75
rs151252290 SIFT 0.101
rs151252290 Polyphen2 0.128
rs151252290 MutationAssessor 1.735
rs12364724 SIFT 0
rs12364724 Polyphen2 0.926
rs12364724 MutationAssessor 1.75
rs34448143 SIFT 0.005
rs34448143 Polyphen2 0.194
rs34448143 MutationAssessor 0.205
rs115694714 SIFT 0.007
rs115694714 Polyphen2 1
rs115694714 MutationAssessor 0.895
And this is my R code to plot this table as a heatmap:
library(ggplot2)
mydata <- read.table("file7.txt", header = FALSE, sep = "\t")
names(mydata) <- c("V1", "V2", "Score")
ggplot(data = mydata, aes(x = V1, y = V2, fill = Score)) +
geom_tile() +
geom_text(aes(V1, V2, label = Score), color = "black", size = 3) +
scale_fill_continuous(type = "viridis", limits = c(-5.76, 5.37)) +
labs(x = "pic1", y = "") +
theme_bw()
theme(panel.border = element_rect(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = 4))
And this the plot I got:
what I need is for each row (each type in V2) I need to put a legend that represented, so at the end there will be 3 legends, each represent (one for SIFT, second for Polyphen and the third for MutationAssessor) with different range that I can specify.
for example: SIFT from (0,1)
and Polyphen from (0,1)
and MutationAssessor from (-6,6)
I tried different thing of previous asked questions but nothing work with me.
I appreciate any help.
You can loop over three given variables and plot different plots for each of them. In the end you have to combine them.
Create dataset with wanted limits:
myLimits <- list(
list("SIFT", 0, 1),
list("Polyphen2", 0, 1),
list("MutationAssessor", -6, 6)
)
Function to plot heatmap only for one variable at a time:
plotHeat <- function(type, MIN, MAX) {
library(ggplot2)
p <- ggplot(subset(mydata, V2 == type),
aes(V1, V2, fill = Score, label = Score)) +
geom_tile() +
geom_text(color = "black", size = 3) +
scale_fill_continuous(type = "viridis", limits = c(MIN, MAX)) +
labs(x = "SNP",
y = NULL,
fill = type) +
theme_bw()
# Output x-axis only for the last plot
if (type != myLimits[[length(myLimits)]][[1]]) {
p <- p + theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank())
}
return(p)
}
Plot and combine plots using egg package:
res <- lapply(myLimits, function(x) {plotHeat(x[[1]], x[[2]], x[[3]])})
egg::ggarrange(plots = res)
This is maybe related to this.
xs <- split(mydata, f = mydata$V2)
p1 <- ggplot(data = xs$MutationAssessor, aes(x = V1, y = 0, fill = Score)) +
geom_tile() +
geom_text(aes(label = Score), color = "black", size = 3) +
scale_fill_continuous(type = "viridis", limits = c(-5.76, 5.37)) +
labs(x = "pic1", y = "") +
facet_grid(V2 ~ .) +
theme_bw() +
theme(panel.border = element_rect(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = 4))
p2 <- p1 %+% xs$Polyphen2
p3 <- p1 %+% xs$SIFT
library(gridExtra)
grid.arrange(p1, p2, p3)
And the result is:
EDIT:
In case you want different range for facets but you want values to be comparable (e.g. value around 5 should be yellow in all plots), there is a possible solution
First discretize your fill variable
mydata$colour <- cut(mydata$Score,
quantile(mydata$Score, c(0, 0.25, 0.5, 0.75, 1)),
include.lowest = T)
Then create plots:
xs <- split(mydata, f = mydata$V2)
p1 <- ggplot(data = xs$MutationAssessor, aes(x = V1, y = 0, fill = colour)) +
geom_tile() +
geom_text(aes(label = Score), color = "black", size = 3) +
labs(x = "pic1", y = "") +
facet_grid(V2 ~ .) +
theme_bw() +
theme(panel.border = element_rect(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = 4))
p2 <- p1 %+% xs$Polyphen2
p3 <- p1 %+% xs$SIFT
And finally change palette:
mypalette <- c("#FFFFCC", "#A1DAB4", "#41B6C4", "#2C7FB8", "#253494")
names(mypalette) <- levels(mydata$colour)
p1 <- p1 + scale_fill_manual(values = mypalette[levels(xs$MutationAssessor$colour)])
p2 <- p2 + scale_fill_manual(values = mypalette[levels(xs$Polyphen2$colour)])
p3 <- p3 + scale_fill_manual(values = mypalette[levels(xs$SIFT$colour)])
And the result is:
grid.arrange(p1, p2, p3)
Related
I am drawing multiple maps based on a list with the following ggplot2 custom function:
plot_list = list()
for (i in 1:length(input_map[[1]])) {
p = ggplot() +
geom_polygon(data = df_new, aes_string(x = "long.x", y = "lat.x", fill=input_map[[3]][i],
group = "commune"), size=0, colour = "lightgrey") +
geom_polygon(data = cantons_shp, aes(x = long, y = lat, group = group),
size= .1, colour = "lightgrey", fill = NA) +
coord_fixed() +
scale_fill_gradient(low = "white", high = input_map[[2]][i], name = input_map[[1]][i], na.value="white", limits=0:1) +
labs(title = input_map[[1]][i], x = "", y = "") +
theme_void() +
theme(plot.title = element_text(size=10, hjust = 0.5),
legend.position = "bottom", # c(0.85, 0.75)
legend.text = element_text(size=7),
legend.title = element_blank(),
plot.background = element_rect("white"),
plot.margin=unit(c(0,0,0,0), "cm"))
plot_list[[i]] = as.grob(p)
}
When arranging 6 maps with grid.arrange in two rows, the maps are getting too small and there is lots of white space between the plots.
# grid.arrange without do.call does not work
do.call("grid.arrange",
c(plot_list, nrow=2,
top=title,
bottom=paste0(n_recordings, " Opnamen aus ", n_communes, " Gemengen", "\nSchnëssen-Projet | (c) Universitéit Lëtzebuerg, 2019")
)
)
In the theme options, I already reduced plot.margin to 0, without avail.
# Create the data
library(tidyverse)
dat <- read.table(text = "A B C
1 23 234 324
2 34 534 12
3 56 324 124
4 34 234 124
5 123 534 654",
sep = "",
header = TRUE) %>%
gather(key = "variable", value = "value") %>%
group_by(variable) %>%
mutate(ind = as.factor(rep(1:5)),
perc = value / sum(value)) %>%
arrange(variable, -perc) %>%
mutate(ordering = row_number())
# Plot the data
ggplot(dat, aes(variable, perc, fill = interaction(
-ordering, variable)) # line #20
) +
geom_col(color = "white", size = 1.5, alpha = 0.25) +
facet_grid(~ variable, scales = "free_x") +
scale_fill_manual("ind", values = rep("black", length(dat$variable))) +
geom_col(data = dat %>% filter(ordering == 1),
color = "white",
size = 1.5,
fill = "red",
alpha = 0.5) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none") +
scale_y_continuous(labels = scales::percent_format())
I've got my highlighted, faceted, stacked bar graph above. I want to reverse the order of everything so I change -ordering on line #20 to ordering. That gives me this graph below.
You can see that my line #20 change did indeed reverse the order of the grey sections of this stacked bar graph. But the red highlights remain on the bottom of the graph when I want them to flip to the top of the graph.
How do I achieve this? I tried the answer in many similar SO questions of adding position = position_fill(reverse = TRUE)) to both of my geom_col() and to each separately, but these three new attempts didn't work either. I got the same plot as shown directly above.
Here is a similar but differently coded approach which includes a scale for alpha as well. The idea is to leave dat untouched but to set-up manual scales for fill and alpha. ordering is used directly; no need to call interaction().
red <- 1L
n_ord <- length(unique(dat$ordering))
fill_scale <- c("red", rep("black", n_ord - 1L)) %>%
setNames(red * seq(n_ord))
alpha_scale <- c(0.5, rep(0.25, n_ord - 1L)) %>%
setNames(red * seq(n_ord))
# Plot the data
ggplot(dat, aes(variable, perc, fill = factor(red * ordering), alpha = factor(red * ordering))) +
# ggplot(dat, aes(variable, perc, fill = interaction(
# -ordering, variable)) # line #20
# ) +
geom_col(color = "white", size = 1.5) +
scale_fill_manual(guide = "none", values = fill_scale) +
scale_alpha_manual(guide = "none", values = alpha_scale) +
facet_grid(~ variable, scales = "free_x") +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none") +
scale_y_continuous(labels = scales::percent_format())
The manual scales look as follows
fill_scale
1 2 3 4 5
"red" "black" "black" "black" "black"
alpha_scale
1 2 3 4 5
0.50 0.25 0.25 0.25 0.25
If the variable red is toggled, i.e., red <- -1L, we can reproduce OP's original plot:
You can use the interaction also in defining a variable and solve the problem in the following way:
library(tidyverse)
dat <- dat %>%
mutate(fill_breaks = as.character(interaction(-ordering, variable)),
fill_values = if_else(ordering == 1, "red", "black"))
fill_values <- dat$fill_values
names(fill_values) <- dat$fill_breaks
ggplot(dat, aes(variable, perc, fill = fill_breaks)) +
geom_col(color = "white", size = 1.5, alpha = 0.25) +
facet_grid( ~ variable, scales = "free_x") +
scale_fill_manual(values = fill_values) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none") +
scale_y_continuous(labels = scales::percent_format())
I'm trying to create a simple boxplot with connected lines similar to the one described in this question: Connect ggplot boxplots using lines and multiple factor.
However, the interaction term in that example produces an error:
geom_path: Each group consists of only one observation. Do you need to
adjust the group aesthetic?
I would like to connect each point using the index variable. Here is the code:
group <- c("A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B","B","B")
session <- c("one","two","one","two","one","two","one","two","one","two","one","two","one","two","one","two","one","two","one","two")
value <- c(1.02375,1.01425,1.00505,0.98105,1.09345,1.09495,0.98255,0.90240,0.99185,0.99855,0.88135,0.72685,0.94275,0.84775,1.01010,0.96825,0.85215,0.84175,0.89145,0.86985)
index <- c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10)
df <- data.frame(group,session,value,index)
# Graph plots
p <- ggplot(df, aes(x=group, y=value, fill=session))
p <- p + geom_boxplot(color="grey40", outlier.alpha=0.0) #alpha=0.6
p <- p + stat_summary(fun.y=mean,geom="point",pch="-",color="white",size=8, position = position_dodge(width=0.75)) # size=2 color="black"
p <- p + geom_point(size=2, alpha=0.6, aes(group=session), data=df, position = position_dodge(width=0.75))
p <- p + geom_line(aes(group = index), alpha = 0.6, colour = "black", position = position_dodge(width=0.75), data=df) #
p <- p + scale_fill_manual(values=c("#969696","#74c476"))
p <- p + theme(
axis.text.x = element_text(colour = "black"), #angle = 60, hjust = 1
axis.text.y = element_text(colour = "black"),
axis.title.x = element_blank(), #element_text(colour = "black"),
axis.title.y = element_text(colour = "black"),
legend.position = "none"
#panel.background = element_blank(), #element_rect(fill="white", colour="black", size=2),
#panel.grid.major = element_blank(),
#panel.grid.minor = element_blank(),
#panel.border = element_blank(),
#axis.line = element_line(size=1.5, colour = "black")
#panel.grid.major = element_line(size = .5, colour = "grey")
)
ggsave("~/Desktop/test.pdf", width=4, height=6, units=c("in"), plot=p)
However, that produces only vertical lines as in this image:
Some changes analogous as in my other answer:
df <- data.frame(group, session, value, index, U = interaction(session, group))
p <- ggplot(df, aes(x = U, y = value, fill = session)) +
scale_x_discrete(labels = rep(unique(group), each = 2))
p <- p + geom_line(aes(group = index), alpha = 0.6, colour = "black", data = df)
# no need for dodge
The rest is the same as in your code.
(The remaining vertical lines are from the boxplot.)
I have a dataset with a lot of overlapping points and used ggplot to create a bubble plot to show that data. I need to add bars on my plot for the means of each group on the x axis (values can be 0, 1, or 2). I have tried to use geom_errorbar but haven't been able to get it to work with my data. Any help/suggestions would be greatly appreciated.
The following is my code and a script to generate fake data that is similar:
y <- seq(from=0, to=3.5, by=0.5)
x <- seq(from=0, to=2, by=1)
xnew <- sample(x, 100, replace=T)
ynew <- sample(y, 100, replace=T)
data <- data.frame(xnew,ynew)
data2 <- aggregate(data$xnew, by=list(x=data$xnew, y=data$ynew), length)
names(data2)[3] <- "Count"
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
I am not entirely sure that I understand your question correctly. It seems to me that in addition to the bubbles, you want to visualise the mean value of y for each value of x as a bar of some kind. (You mention error bars, but it seems that this is not a requirement, but just what you have tried. I will use geom_col() instead.)
I assume that you want to weigh the mean over y by the counts, i.e., sum(y * Count) / sum(Count). You can create a data frame that contains these values by using dplyr:
data2_mean
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 0 1.833333
## 2 1 1.750000
## 3 2 2.200000
When creating the plot, I use data2 as the data set for geom_point() and data2_mean as the data set for geom_col(). It is important to put the bars first, since the bubbles should be on top of the bars.
ggplot() +
geom_col(aes(x = x, y = y), data2_mean, fill = "gray60", width = 0.7) +
geom_point(aes(x = x, y = y, size = Count), data2) +
labs(x = "Copies", y = "Score") +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
Everything that I changed compared to your code comes before scale_x_continuous(). This produces the following plot:
Is this what you're after? I first calculated the group-level means using the dplyr package and then added line segments to your plot using geom_segment:
library(ggplot2)
library(dplyr)
data2 <- data2 %>% group_by(x) %>% mutate(mean.y = mean(y))
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10)) +
geom_segment(aes(y = mean.y, yend = mean.y, x = x -0.25, xend = x + 0.25))
I'm trying to plot a stacked bar graph in R using ggplot. I also want to include percentage in each piece of bars for that piece. I tried to follow the posts 1, 2, 3 but the values are not exactly in their respective blocks. My data is a file in dropbox.
My code is as follows:
f<-read.table("Input.txt", sep="\t", header=TRUE)
ggplot(data=f, aes(x=Form, y=Percentage, fill=Position)) +
geom_bar(stat="identity", colour="black") +
geom_text(position="stack", aes(x=Form, y=Percentage, ymax=Percentage, label=Percentage, hjust=0.5)) +
facet_grid(Sample_name ~ Sample_type, scales="free", space="free") +
opts(title = "Input_profile",
axis.text.x = theme_text(angle = 90, hjust = 1, size = 8, colour = "grey50"),
plot.title = theme_text(face="bold", size=11),
axis.title.x = theme_text(face="bold", size=9),
axis.title.y = theme_text(face="bold", size=9, angle=90),
panel.grid.major = theme_blank(),
panel.grid.minor = theme_blank()) +
scale_fill_hue(c=45, l=80)
ggsave("Output.pdf")
The output is-
Any help is greatly appreciated.
Thank you for your help and time!
I think you're using an older version of ggplot2. Because with your code modified for ggplot2 v 0.9.3, I get this:
p <- ggplot(data = df, aes(x = Form, y = Percentage, fill = Position))
p <- p + geom_bar(stat = "identity", colour = "black")
p <- p + geom_text(position = "stack", aes(x = Form, y = Percentage, ymax = Percentage, label = Percentage, hjust = 0.5))
p <- p + facet_grid(Sample_name ~ Sample_type, scales="free", space="free")
p <- p + theme(plot.title = element_text("Input_profile"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 8, colour = "grey50"),
plot.title = element_text(face="bold", size=11),
axis.title.x = element_text(face="bold", size=9),
axis.title.y = element_text(face="bold", size=9, angle=90),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p <- p + scale_fill_hue(c=45, l=80)
p
You see that the text objects are normally placed properly. There are places where the bars are just too short so that the numbers overlap. You can also play with the size parameter.
To rectify that, you could do something like this to add up the numbers by yourself.
df <- ddply(df, .(Form, Sample_type, Sample_name), transform,
cum.perc = Reduce('+', list(Percentage/2,cumsum(c(0,head(Percentage,-1))))))
p <- ggplot(data = df, aes(x = Form, y = Percentage, fill = Position))
p <- p + geom_bar(stat = "identity", colour = "black")
p <- p + geom_text(aes(x = Form, y = cum.perc, ymax = cum.perc, label = Percentage, hjust = 0.5), size=2.7)
p <- p + facet_grid(Sample_name ~ Sample_type, scales="free", space="free")
p <- p + theme(plot.title = element_text("Input_profile"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 8, colour = "grey50"),
plot.title = element_text(face="bold", size=11),
axis.title.x = element_text(face="bold", size=9),
axis.title.y = element_text(face="bold", size=9, angle=90),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p <- p + scale_fill_hue(c=45, l=80)
p
This gives:
Here a solution using barchart from lattice.
library(latticeExtra)
barchart(Percentage~Form|Sample_type*Sample_name,data=dat,
groups =Position,stack=T,
panel=function(...){
panel.barchart(...)
ll <- list(...)
keep <- !is.na(ll$groups[ll$subscripts])
x <- as.numeric(ll$x[keep])
y <- as.numeric(ll$y[keep])
groups <- as.numeric(factor(ll$groups)[ll$subscripts[keep]])
for (i in unique(x)) {
ok <- x == i
ord <- sort.list(groups[ok])
pos <- y[ok][ord] > 0
nok <- sum(pos, na.rm = TRUE)
h <- y[ok][ord][pos]
panel.text(x = rep(i, nok),y = cumsum(h)-0.5*h,
label = h,cex=1.5)
}
},
auto.key = list(columns = 5),
par.settings = ggplot2like(n = 5),
lattice.options = ggplot2like.opts())