Change color median line ggplot geom_boxplot() - r

I would like to change the color of the median line in geom_boxplot(). I have looked and can't find a way to do it. I have posted the R code here that I am using but I just really need a reference to how to change the color.
ggplot(invitro2) +
geom_boxplot(aes(x = reorder(CANCER_TYPE,tmedian), y = GeoMedian_IC50)) +
xlab("") +
geom_point(aes(x = reorder(CANCER_TYPE,tmedian), y = GeoMedian_IC50)) +
theme_bw() +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks(sides="l") +
theme(axis.text.x=element_text(angle=45,size=10,hjust=1),
panel.grid.major = element_blank())

You can use the details of the plot, to derive the coordinates of where the median line is, and then add colour to it using geom_segment.
library(ggplot2)
p <- ggplot(mtcars, aes(factor(am), mpg)) + geom_boxplot()
dat <- ggplot_build(p)$data[[1]]
p + geom_segment(data=dat, aes(x=xmin, xend=xmax,
y=middle, yend=middle), colour="red", size=2)
Also had to increase the size of the line so that it covers the original black median
line

Related

Shift text in ggplot up

Using the this code gives the plot printed below. As you can see the percentages are printed on the border of the bars. I would like to have them above the bars. Is there a way to achieve this?
p <- ggplot(data=iris, aes(x=factor(Species), fill=factor(Species)))
p + geom_bar() + scale_fill_discrete(name="Species") + labs(x="") +geom_text(aes(y = (..count..),label = scales::percent((..count..)/sum(..count..))), stat="bin",colour="darkgreen") + theme(legend.position="none")
Just add an arbitrary value to y.
p <- ggplot(data=iris, aes(x=factor(Species), fill=factor(Species)))
p + geom_bar() + scale_fill_discrete(name="Species") + labs(x="") +geom_text(aes(y = (..count..) + 10,label = scales::percent((..count..)/sum(..count..))), stat="bin",colour="darkgreen") + theme(legend.position="none")
Or, as per Heroka's comment, use vjust, which is a better solution
p <- ggplot(data=iris, aes(x=factor(Species), fill=factor(Species)))
p + geom_bar() + scale_fill_discrete(name="Species") + labs(x="") +
geom_text(aes(y = (..count..),
label = scales::percent((..count..)/sum(..count..))),
stat="bin",
colour="darkgreen", vjust = -0.5) +
theme(legend.position="none")
But as this makes things quite cramped at the top you might want to add + expand_limits(y = c(0, 60)) to give you a bit more space for the labels.

Draw lines between two facets in ggplot2

How can I draw several lines between two facets?
I attempted this by plotting points at the min value of the top graph but they are not between the two facets. See picture below.
This is my code so far:
t <- seq(1:1000)
y1 <- rexp(1000)
y2 <- cumsum(y1)
z <- rep(NA, length(t))
z[100:200] <- 1
df <- data.frame(t=t, values=c(y2,y1), type=rep(c("Bytes","Changes"), each=1000))
points <- data.frame(x=c(10:200,300:350), y=min(y2), type=rep("Bytes",242))
vline.data <- data.frame(type = c("Bytes","Bytes","Changes","Changes"), vl=c(1,5,20,5))
g <- ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour=I("black")) +
facet_grid(type ~ ., scales="free") +
scale_y_continuous(trans="log10") +
ylab("Log values") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), panel.margin = unit(0, "lines"))+
geom_point(data=points, aes(x = x, y = y), colour="green")
g
In order to achieve that, you have to set the margins inside the plot to zero. You can do that with expand=c(0,0). The changes I made to your code:
When you use scale_y_continuous, you can define the axis label inside that part and you don't need a seperarate ylab.
Changed colour=I("black") to colour="black" inside geom_line.
Added expand=c(0,0) to scale_x_continuous and scale_y_continuous.
The complete code:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_point(data=points, aes(x = x, y = y), colour="green") +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:
Adding lines can also be done with geom_segment. Normally the lines (segments) will appear in both facets. If you want them to appear between the two facets, you will have to restrict that in data parameter:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_segment(data=df[df$type=="Bytes",], aes(x=10, y=0, xend=200, yend=0), colour="green", size=2) +
geom_segment(data=df[df$type=="Bytes",], aes(x=300, y=0, xend=350, yend=0), colour="green", size=1) +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:

How to control aspect ratio with both continuous and discrete axes with geom_tile in ggplot2?

I want to produce a plot with ggplot2 and geom_tile(). I want the tiles to have same height and width, which seems to work fine if both x and y axis are discrete (as was discussed here: adjust ggplot2 geom tile height and width).
If I have values on the x axis that are interpreted as continuous, however, I either get a lot of grey space in the plot and no aspect ratio of 1 when I use a discrete scale for the x axis (MWE1), or I do get a plot without grey space with a continuous scale but still no aspect ratio of 1 (MWE2). (I would insert images but it seems it is not allowed because my reputation is not high enough.)
MWE1:
my <- data.frame(x=c(rep(c(0.1),3),rep(c(0.3),3),rep(c(0.5),3)),y=rep(c("frac(SP1)=0.5", "frac(SP1)=0.7", "frac(SP2)=0,3"),3),z=sample(seq(1:50),9))
p <- ggplot(my, aes(x, y)) + geom_tile(aes(fill = z)) +
scale_fill_gradient(low = "white",high = "steelblue") +
theme_grey() +
labs(x = "", y= "") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
coord_fixed(ratio=1) +
theme(axis.ticks = element_blank())
MWE2:
my <- data.frame(x=c(rep(c(0.1),3),rep(c(0.3),3),rep(c(0.5),3)),y=rep(c("frac(SP1)=0.5", "frac(SP1)=0.7", "frac(SP2)=0,3"),3),z=sample(seq(1:50),9))
p <- ggplot(my, aes(x, y)) + geom_tile(aes(fill = z)) +
scale_fill_gradient(low = "white",high = "steelblue") +
theme_grey() +
labs(x = "", y= "") +
scale_x_continuous(breaks=c(0.1,0.3,0.5),expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
coord_fixed(ratio=1) +
theme(axis.ticks = element_blank())
Is there a way that I can still control aspect ratio when dealing with discrete and continuous scale? Or a way to tell ggplot2 to interpret the x values as discrete?
I'm not sure I quite understand your question, but I think this is what you want:
p <- ggplot(my, aes(factor(x), y)) + geom_tile(aes(fill = z)) +
scale_fill_gradient(low = "white",high = "steelblue") +
theme_grey() +
labs(x = "", y= "") +
coord_fixed() +
theme(axis.ticks = element_blank())
print(p)
I converted your x variable to a factor since it's not really continuous anyways. Now you get a 3x3 heatmap:

Vertical line in histogram in r

I'm struggeling a bit with a peace of code in R. I am trying to create 6 different histograms in the same figure. It works fine, but I need to place 1 vertical line in each of the 6 histograms. The code I am working with could look something like this:
require(ggplot2)
require(reshape2)
require(gdata)
MC_beta=rbind(rnorm(1000,-2,0.1),rnorm(1000,-1,0.1),rnorm(1000,0,0.1),rnorm(1000,0.5,0.1),rnorm(1000,1,0.1),rnorm(1000,2,0.1))
df <- data.frame(MC_beta[1,], MC_beta[2,], MC_beta[3,], MC_beta[4,],MC_beta[5,],MC_beta[6,])
names(df)[1:6]<-c("1", "2", "3", "4","5","6")
df2 = melt(df)
z=c(-2,-1,0,0.5,1,2)
ggplot(df2, aes(x=value, fill = variable)) + geom_vline(xintercept = z, colour="black") +geom_histogram(binwidth=0.03,colour = "black") + scale_fill_manual(name = "",values = c('red','blue',"red","blue","red","blue")) +
facet_wrap(~variable,nrow=6, ncol=1) + scale_x_continuous(breaks=seq(-2.5,2.5,0.5)) + guides(fill=FALSE) +
theme_bw() + theme(strip.background = element_blank(),axis.text=element_text(size=14.5),strip.text.x = element_text(size = 14.5)) + stat_function(fun = dnorm)
The problem is with the statement geom_vline(xintercept = z, colour = "black"). Apparently instead of placing one vertical line in each histogram, it places all 6 lines in each histogram. So instead, I want the first element in z to make a vertical line in the first histogram, the second element in z to make a vertical line in the second histogram and so fourth.
Thanks
Your z needs to be a data.frame with the corresponding xintercept for every value of the variable that defines the facet. Try these changes:
z <- data.frame(variable=levels(df2$variable),
mean=c(-2,-1,0,0.5,1,2))
ggplot(df2, aes(x=value, fill = variable))+
geom_vline(data=z, aes(xintercept = mean), colour="black") +
geom_histogram(binwidth=0.03,colour = "black") +
scale_fill_manual(name = "",values = c('red','blue',"red","blue","red","blue")) +
facet_wrap(~variable,nrow=6, ncol=1) +
scale_x_continuous(breaks=seq(-2.5,2.5,0.5))+ guides(fill=FALSE) +
theme_bw() +
theme(strip.background = element_blank(), axis.text=element_text(size=14.5), strip.text.x = element_text(size = 14.5)) +
stat_function(fun = dnorm)
I hope that helps.
You have z outside the data, so you will draw a vertical line in each facet. Use
df2 <- (merge(df2, cbind.data.frame(variable=names(df), z)))
and then
geom_vline(aes(xintercept = z), colour="black")

can one offset jitter points in ggplot boxplot

In a ggplot boxplot, it is easy to use jitter to add the raw data points with varying degrees of jitter. With zero jitter the following code
dat <- data.frame(group=c('a', 'b', 'c'), values = runif(90))
ggplot(dat, aes(group, values)) +
geom_boxplot(outlier.size = 0) +
geom_jitter(position=position_jitter(width=0), aes(colour=group), alpha=0.7) +
ylim(0, 1) + stat_summary(fun.y=mean, shape=3, col='red', geom='point') +
opts(legend.position = "right") + ylab("values") + xlab("group")
produces the plot below.
Is it possible to use zero jitter but add an offset such that the points are in a line but shifted left by 25% of the box width? I tried geom_point with dodge but this generated a jitter.
If we convert group to numeric and then add an offset, you seem to get your desired output. There is probably a more effective / efficient way, but give this a whirl:
ggplot(dat, aes(group, values)) +
geom_boxplot(outlier.size = 0) +
geom_point(aes(x = as.numeric(group) + .25, colour=group), alpha=0.7) +
ylim(0, 1) + stat_summary(fun.y=mean, shape=3, col='red', geom='point') +
opts(legend.position = "right") + ylab("values") + xlab("group")

Resources