Connected points in ggplot boxplot - r

I'm trying to create a simple boxplot with connected lines similar to the one described in this question: Connect ggplot boxplots using lines and multiple factor.
However, the interaction term in that example produces an error:
geom_path: Each group consists of only one observation. Do you need to
adjust the group aesthetic?
I would like to connect each point using the index variable. Here is the code:
group <- c("A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B","B","B")
session <- c("one","two","one","two","one","two","one","two","one","two","one","two","one","two","one","two","one","two","one","two")
value <- c(1.02375,1.01425,1.00505,0.98105,1.09345,1.09495,0.98255,0.90240,0.99185,0.99855,0.88135,0.72685,0.94275,0.84775,1.01010,0.96825,0.85215,0.84175,0.89145,0.86985)
index <- c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10)
df <- data.frame(group,session,value,index)
# Graph plots
p <- ggplot(df, aes(x=group, y=value, fill=session))
p <- p + geom_boxplot(color="grey40", outlier.alpha=0.0) #alpha=0.6
p <- p + stat_summary(fun.y=mean,geom="point",pch="-",color="white",size=8, position = position_dodge(width=0.75)) # size=2 color="black"
p <- p + geom_point(size=2, alpha=0.6, aes(group=session), data=df, position = position_dodge(width=0.75))
p <- p + geom_line(aes(group = index), alpha = 0.6, colour = "black", position = position_dodge(width=0.75), data=df) #
p <- p + scale_fill_manual(values=c("#969696","#74c476"))
p <- p + theme(
axis.text.x = element_text(colour = "black"), #angle = 60, hjust = 1
axis.text.y = element_text(colour = "black"),
axis.title.x = element_blank(), #element_text(colour = "black"),
axis.title.y = element_text(colour = "black"),
legend.position = "none"
#panel.background = element_blank(), #element_rect(fill="white", colour="black", size=2),
#panel.grid.major = element_blank(),
#panel.grid.minor = element_blank(),
#panel.border = element_blank(),
#axis.line = element_line(size=1.5, colour = "black")
#panel.grid.major = element_line(size = .5, colour = "grey")
)
ggsave("~/Desktop/test.pdf", width=4, height=6, units=c("in"), plot=p)
However, that produces only vertical lines as in this image:

Some changes analogous as in my other answer:
df <- data.frame(group, session, value, index, U = interaction(session, group))
p <- ggplot(df, aes(x = U, y = value, fill = session)) +
scale_x_discrete(labels = rep(unique(group), each = 2))
p <- p + geom_line(aes(group = index), alpha = 0.6, colour = "black", data = df)
# no need for dodge
The rest is the same as in your code.
(The remaining vertical lines are from the boxplot.)

Related

How to set a conditional size scale based on name in ggplot?

Below is a simple bubble plot for three character traits (Lg_chr, Mid_chr, and Sm_chr) across three locations.
All good, except that because the range of Lg_chr is several orders of magnitude larger than the ranges for the other two traits, it swamps out the area differences between the smaller states, making the differences very difficult to see - for example, the area of the points for for Location_3's Mid_chr (70) and Sm_chr (5), look almost the same.
Is there a way to set a conditional size scale based on name in ggplot2 without having to facit wrap them? Maybe a conditional statement for scale_size_continuous(range = c(<>, <>)) separately for Lg_chr, Mid_chr, and Sm_chr?
test_df = data.frame(lg_chr = c(100000, 150000, 190000),
mid_chr = c(50, 90, 70),
sm_chr = c(15, 10, 5),
names = c("location_1", "location_2", "location_3"))
#reformat for graphing
test_df_long<- test_df %>% pivot_longer(!names,
names_to = c("category"),
values_to = "value")
#plot
ggplot(test_df_long,
aes(x = str_to_title(category),
y = str_to_title(names),
colour = str_to_title(names),
size = value)) +
geom_point() +
geom_text(aes(label = value),
colour = "white",
size = 3) +
scale_x_discrete(position = "top") +
scale_size_continuous(range = c(10, 50)) +
scale_color_manual(values = c("blue", "red",
"orange")) +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank()) ```
Edit:
You could use ggplot_build to manually modify the point layer [[1]] to specify the sizes of your points like this:
#plot
p <- ggplot(test_df_long,
aes(x = str_to_title(category),
y = str_to_title(names),
colour = str_to_title(names),
size = value)) +
geom_point() +
geom_text(aes(label = value),
colour = "white",
size = 3) +
scale_x_discrete(position = "top") +
scale_color_manual(values = c("blue", "red",
"orange")) +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank())
q <- ggplot_build(p)
q$data[[1]]$size <- c(7,4,1,8,5,2,9,6,3)*5
q <- ggplot_gtable(q)
plot(q)
Output:
You could use scale_size with a log10 scale to make the difference more visuable like this:
#plot
ggplot(test_df_long,
aes(x = str_to_title(category),
y = str_to_title(names),
colour = str_to_title(names),
size = value)) +
geom_point() +
geom_text(aes(label = value),
colour = "white",
size = 3) +
scale_size(trans="log10", range = c(10, 50)) +
scale_x_discrete(position = "top") +
scale_color_manual(values = c("blue", "red",
"orange")) +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank())
Output:

Remove standard deviation from legend ggplot in R

I would like to remove sd bars and mean from legend while keeping them on the main figure. In my case I have this:
And I want something like this:
This is my code:
data_summary <- function(x) {
m <- mean(x)
ymin <- m-std.error(x)
ymax <- m+std.error(x)
return(c(y=m,ymin=ymin,ymax=ymax))
}
a<-ggplot(esto,aes(x= Group, y=value, colour = Group, fill=fluency_test),
pattern_fill = "black",
colour = 'black') +
geom_boxplot(outlier.shape = NA,lwd=1.5) +
guides(colour = "none")+
geom_point(position=position_jitterdodge(),alpha=0.5)+
xlab("Group")+
labs(y = names(features)[[i_feature]])+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
stat_summary(fun.data=data_summary, color="black",position=position_dodge(width=0.75), size = 1.3)+
scale_shape_manual("Summary Statistics", values=c("Mean"="+"))+
scale_color_manual(values=c("#7CAE00","#F8766D","#00BFC4","#C77CFF"))+
scale_fill_manual(values=c("white","azure3"))+
theme_gray(base_size = 18)+
theme(legend.key.size = unit(2, "cm"),
legend.key.width = unit(1,"cm"),legend.title=element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
You can prevent the point range from being displayed in the legend by adding show.legend=FALSE to stat_summary.
Using a minimal reprex based on mtcars:
library(ggplot2)
ggplot(mtcars, aes(x = cyl, y = mpg, color = factor(cyl))) +
geom_boxplot() +
stat_summary(color = "black", show.legend = FALSE)
#> No summary function supplied, defaulting to `mean_se()`

Ho to add the count values calculated in the geom_histogram on top of the bars in R

I'd like to add the count values calculated in the geom_histogram function on ggplot2. I've put the ggplot2 into a loop so I can produce multiple plots, in my case 30 but for ease, here is a dummy set for only four plots. Facet wrap didn't work as the geom density was pooling the data across all factors before calculating proportions, rather than within a factor/variable. To produce this plot, I've essentially mixed a whole bunch of code from various sources so credit to them.
library(dplyr)
library(ggplot2)
library(ggridges)
library(reshape2)
library(gridExtra)
#Make the data#
df.fact <- data.frame("A"=rnorm(400, mean = 350, sd=160),"B"=rnorm(400, mean = 300, sd=100), "C"=rnorm(400, mean = 200, sd=80), names=rep(factor(LETTERS[23:26]), 100))
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
#Create the plotlist##
myplots <- list()
#Loop for plots##
for(i in 1:(length(unique(df.test$names)))){
p1 <- eval(substitute(
ggplot(data=df.test[df.test$names == levels(df.test$names)[i],], aes(x=Length2, group=TSM.FACT, colour = TSM.FACT, fill=TSM.FACT)) +
geom_histogram(aes( y = stat(width*density)), position = "dodge", binwidth = 50, alpha =0.4, show.legend=T)+
ggtitle(paste0(levels(df.test$names)[i]))+
geom_density_line(stat="density", aes(y=(..count..)/sum(..count..)*50), alpha=0.3, size=0.5, show.legend=F) +
geom_vline(data=ddply(df.test[df.test$names == levels(df.test$names)[i],], ~ TSM.FACT, numcolwise(mean)), mapping=aes(xintercept = Length2, group=TSM.FACT, colour=TSM.FACT), linetype=2, size=1, show.legend=F) +
scale_y_continuous(labels = percent_format()) +
ylab("relative frequency") +
scale_color_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
scale_fill_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
theme_bw() + theme(
plot.title = element_text(lineheight=0.5, hjust= 0.5, size=10),
strip.text.y = element_text(hjust = 1, angle = 0),
strip.text.x = element_text(size=10, vjust = 0.9),
strip.text=element_text(margin = margin(t=0.3,r=1,b=0.3,l=1), size=8, debug = F, vjust=0.2),
strip.background = element_blank(),
axis.text.x = element_text(size=8, angle=0, vjust=0.2, margin = margin(t=0.3,r=0.1,b=0.3,l=0.1)),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
panel.grid.minor = element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
legend.position=(c(0.9,0.9)),
legend.title = element_blank(),
legend.key = element_blank()),
list(i = i)))
print(i)
print(p1)
myplots[[i]] <- p1
plot(p1)
}
#Join the plots
panelplot=grid.arrange(plotlist = myplots, grobs = myplots, shared.legend=T)
Unfortunately I am unable to reproduce your example. I can recommend adding a column that includes the sum of each bar (let's name it "Bar")
The required addition to the ggplot code then involves:
geom_text(aes(label = Bar), position = position_stack(vjust = 1)) +
The text height above the bar can be adjusted with vjust

Multiple fill legends for each variable in heat map

I have an input file file1.txt:
V1 V2 Score
rs4939134 SIFT 1
rs4939134 Polyphen2 0
rs4939134 MutationAssessor -1.75
rs151252290 SIFT 0.101
rs151252290 Polyphen2 0.128
rs151252290 MutationAssessor 1.735
rs12364724 SIFT 0
rs12364724 Polyphen2 0.926
rs12364724 MutationAssessor 1.75
rs34448143 SIFT 0.005
rs34448143 Polyphen2 0.194
rs34448143 MutationAssessor 0.205
rs115694714 SIFT 0.007
rs115694714 Polyphen2 1
rs115694714 MutationAssessor 0.895
And this is my R code to plot this table as a heatmap:
library(ggplot2)
mydata <- read.table("file7.txt", header = FALSE, sep = "\t")
names(mydata) <- c("V1", "V2", "Score")
ggplot(data = mydata, aes(x = V1, y = V2, fill = Score)) +
geom_tile() +
geom_text(aes(V1, V2, label = Score), color = "black", size = 3) +
scale_fill_continuous(type = "viridis", limits = c(-5.76, 5.37)) +
labs(x = "pic1", y = "") +
theme_bw()
theme(panel.border = element_rect(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = 4))
And this the plot I got:
what I need is for each row (each type in V2) I need to put a legend that represented, so at the end there will be 3 legends, each represent (one for SIFT, second for Polyphen and the third for MutationAssessor) with different range that I can specify.
for example: SIFT from (0,1)
and Polyphen from (0,1)
and MutationAssessor from (-6,6)
I tried different thing of previous asked questions but nothing work with me.
I appreciate any help.
You can loop over three given variables and plot different plots for each of them. In the end you have to combine them.
Create dataset with wanted limits:
myLimits <- list(
list("SIFT", 0, 1),
list("Polyphen2", 0, 1),
list("MutationAssessor", -6, 6)
)
Function to plot heatmap only for one variable at a time:
plotHeat <- function(type, MIN, MAX) {
library(ggplot2)
p <- ggplot(subset(mydata, V2 == type),
aes(V1, V2, fill = Score, label = Score)) +
geom_tile() +
geom_text(color = "black", size = 3) +
scale_fill_continuous(type = "viridis", limits = c(MIN, MAX)) +
labs(x = "SNP",
y = NULL,
fill = type) +
theme_bw()
# Output x-axis only for the last plot
if (type != myLimits[[length(myLimits)]][[1]]) {
p <- p + theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank())
}
return(p)
}
Plot and combine plots using egg package:
res <- lapply(myLimits, function(x) {plotHeat(x[[1]], x[[2]], x[[3]])})
egg::ggarrange(plots = res)
This is maybe related to this.
xs <- split(mydata, f = mydata$V2)
p1 <- ggplot(data = xs$MutationAssessor, aes(x = V1, y = 0, fill = Score)) +
geom_tile() +
geom_text(aes(label = Score), color = "black", size = 3) +
scale_fill_continuous(type = "viridis", limits = c(-5.76, 5.37)) +
labs(x = "pic1", y = "") +
facet_grid(V2 ~ .) +
theme_bw() +
theme(panel.border = element_rect(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = 4))
p2 <- p1 %+% xs$Polyphen2
p3 <- p1 %+% xs$SIFT
library(gridExtra)
grid.arrange(p1, p2, p3)
And the result is:
EDIT:
In case you want different range for facets but you want values to be comparable (e.g. value around 5 should be yellow in all plots), there is a possible solution
First discretize your fill variable
mydata$colour <- cut(mydata$Score,
quantile(mydata$Score, c(0, 0.25, 0.5, 0.75, 1)),
include.lowest = T)
Then create plots:
xs <- split(mydata, f = mydata$V2)
p1 <- ggplot(data = xs$MutationAssessor, aes(x = V1, y = 0, fill = colour)) +
geom_tile() +
geom_text(aes(label = Score), color = "black", size = 3) +
labs(x = "pic1", y = "") +
facet_grid(V2 ~ .) +
theme_bw() +
theme(panel.border = element_rect(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(size = 4))
p2 <- p1 %+% xs$Polyphen2
p3 <- p1 %+% xs$SIFT
And finally change palette:
mypalette <- c("#FFFFCC", "#A1DAB4", "#41B6C4", "#2C7FB8", "#253494")
names(mypalette) <- levels(mydata$colour)
p1 <- p1 + scale_fill_manual(values = mypalette[levels(xs$MutationAssessor$colour)])
p2 <- p2 + scale_fill_manual(values = mypalette[levels(xs$Polyphen2$colour)])
p3 <- p3 + scale_fill_manual(values = mypalette[levels(xs$SIFT$colour)])
And the result is:
grid.arrange(p1, p2, p3)

R stacked bar graph plotting geom_text

I'm trying to plot a stacked bar graph in R using ggplot. I also want to include percentage in each piece of bars for that piece. I tried to follow the posts 1, 2, 3 but the values are not exactly in their respective blocks. My data is a file in dropbox.
My code is as follows:
f<-read.table("Input.txt", sep="\t", header=TRUE)
ggplot(data=f, aes(x=Form, y=Percentage, fill=Position)) +
geom_bar(stat="identity", colour="black") +
geom_text(position="stack", aes(x=Form, y=Percentage, ymax=Percentage, label=Percentage, hjust=0.5)) +
facet_grid(Sample_name ~ Sample_type, scales="free", space="free") +
opts(title = "Input_profile",
axis.text.x = theme_text(angle = 90, hjust = 1, size = 8, colour = "grey50"),
plot.title = theme_text(face="bold", size=11),
axis.title.x = theme_text(face="bold", size=9),
axis.title.y = theme_text(face="bold", size=9, angle=90),
panel.grid.major = theme_blank(),
panel.grid.minor = theme_blank()) +
scale_fill_hue(c=45, l=80)
ggsave("Output.pdf")
The output is-
Any help is greatly appreciated.
Thank you for your help and time!
I think you're using an older version of ggplot2. Because with your code modified for ggplot2 v 0.9.3, I get this:
p <- ggplot(data = df, aes(x = Form, y = Percentage, fill = Position))
p <- p + geom_bar(stat = "identity", colour = "black")
p <- p + geom_text(position = "stack", aes(x = Form, y = Percentage, ymax = Percentage, label = Percentage, hjust = 0.5))
p <- p + facet_grid(Sample_name ~ Sample_type, scales="free", space="free")
p <- p + theme(plot.title = element_text("Input_profile"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 8, colour = "grey50"),
plot.title = element_text(face="bold", size=11),
axis.title.x = element_text(face="bold", size=9),
axis.title.y = element_text(face="bold", size=9, angle=90),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p <- p + scale_fill_hue(c=45, l=80)
p
You see that the text objects are normally placed properly. There are places where the bars are just too short so that the numbers overlap. You can also play with the size parameter.
To rectify that, you could do something like this to add up the numbers by yourself.
df <- ddply(df, .(Form, Sample_type, Sample_name), transform,
cum.perc = Reduce('+', list(Percentage/2,cumsum(c(0,head(Percentage,-1))))))
p <- ggplot(data = df, aes(x = Form, y = Percentage, fill = Position))
p <- p + geom_bar(stat = "identity", colour = "black")
p <- p + geom_text(aes(x = Form, y = cum.perc, ymax = cum.perc, label = Percentage, hjust = 0.5), size=2.7)
p <- p + facet_grid(Sample_name ~ Sample_type, scales="free", space="free")
p <- p + theme(plot.title = element_text("Input_profile"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 8, colour = "grey50"),
plot.title = element_text(face="bold", size=11),
axis.title.x = element_text(face="bold", size=9),
axis.title.y = element_text(face="bold", size=9, angle=90),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p <- p + scale_fill_hue(c=45, l=80)
p
This gives:
Here a solution using barchart from lattice.
library(latticeExtra)
barchart(Percentage~Form|Sample_type*Sample_name,data=dat,
groups =Position,stack=T,
panel=function(...){
panel.barchart(...)
ll <- list(...)
keep <- !is.na(ll$groups[ll$subscripts])
x <- as.numeric(ll$x[keep])
y <- as.numeric(ll$y[keep])
groups <- as.numeric(factor(ll$groups)[ll$subscripts[keep]])
for (i in unique(x)) {
ok <- x == i
ord <- sort.list(groups[ok])
pos <- y[ok][ord] > 0
nok <- sum(pos, na.rm = TRUE)
h <- y[ok][ord][pos]
panel.text(x = rep(i, nok),y = cumsum(h)-0.5*h,
label = h,cex=1.5)
}
},
auto.key = list(columns = 5),
par.settings = ggplot2like(n = 5),
lattice.options = ggplot2like.opts())

Resources