ggplot. Adding regression lines by group - r

If I plot this
dodge <- position_dodge(.35)
ggplot(mediat, aes(x=t, y=Value, colour=factor(act),group=id )) +
geom_point(position=dodge) + geom_errorbar(aes(ymin=Value-sdt, ymax=Value+sdt),
width=0, position=dodge) + theme_bw() + geom_smooth(method="lm",se=FALSE,
fullrange=TRUE)
I get this
As you can see the regression line is not plotted.
with +stat_smooth(method=lm, fullrange=TRUE, se = FALSE) the result is the same.
I've found that removing the "group=id" I can get the regression lines but
then
ggplot(mediat, aes(x=t, y=Value, colour=factor(act) ))+ geom_point(position=dodge) +
geom_errorbar(aes(ymin=Value-sdt, ymax=Value+sdt), width=0, position=dodge) +
theme_bw() + geom_smooth(method="lm",se=FALSE, fullrange=TRUE)
As you can see, now it plot the lines but I loose the dodge function by groups.
How can I get both things at once?. I mean, regression lines by "id" on the first uncluttered plot?
Any other solution with base plot, lattice or any other common package would also be welcome.
Regards

Related

Create line and curve line user defined values

Hello I have a smooth scatter plot same plot I wanted try with ggplot with, can anyone help me i have created plot using ggplot but not able create curve line and diagonal line same as smooth scatter plot
data
A B cat
0.8803 0.0342 data1
0.9174 0.0331 data1
0.9083 0.05 data1
0.7542 0.161 data2
0.8983 0.0593 data2
0.8182 0.1074 data2
0.3525 0.3525 data3
0.5339 0.2288 data3
0.7295 0.082 data3
smooth scatter plot
df=read.table("test.txt", sep='\t', header=TRUE)
smoothScatter(df$B,df$A,,nrpoints=Inf,xlim=c(0,1),ylim=c(0,1), pch=20,cex=1, col=df$cat)
points(c(0,1),c(1,0),type='l',col='green',lty=2,lwd=2)
p=0:1000/1000
points((1-p)^2,p^2,type='l',col='red',lty=2,lwd=2)
ggplot script
ggplot(df, aes(x=B, y=A))+
geom_point()
If you're trying to draw lines based on an equation, you can define the equation and then use geom_line to draw that line with stat="function". Here's how you can draw the lines in ggplot and simulate the same look:
library(ggplot2)
curvy <- function(x) { ((1-x)^2)^2 }
straight <- function(x) 1-x
ggplot(df, aes(x=B, y=A))+
geom_point(size=3) +
geom_line(stat='function', fun=straight, color='green', linetype='dashed', size=1) +
geom_line(stat='function', fun=curvy, color='red', linetype='dashed', size=1) +
xlim(0,1) + ylim(0,1) +
theme_classic()
As for the fuzzy points, you can give ggblur a try here's the github. It wasn't available for the version I'm using.
The other way to draw lines on a plot via regression would be to use geom_smooth(). You'll want to specify the method there - for linear you can use "lm" - "loess" is used by default.
ggplot(df, aes(x=B, y=A))+
geom_point(size=3) +
geom_line(stat='function', fun=straight, color='green', linetype='dashed', size=1) +
geom_line(stat='function', fun=curvy, color='red', linetype='dashed', size=1) +
geom_smooth(method='lm', alpha=0.2, color='blue', fill='skyblue', linetype='dotted') +
xlim(0,1) + ylim(0,1) +
theme_classic()

ggplot2: smooth and fill

I'd like to smooth the geom_lines and fill the area between. I've tried stat_smooth() to smooth the lines, and both geom_ribbon() and geom_polygon() but without success.
Apologies for the double barrel question.
bell <- data.frame(
month = c("Launch","1st","2nd","3rd","4th","5th","6th","7th","8th","9th","10th","11th","12th"),
rate = c(0,.05,.12,.18,.34,.42,.57,.68,.75,.81,.83,.85,.87))
bell$month <- factor(bell$month, levels = rev(c("Launch","1st","2nd","3rd","4th","5th","6th","7th","8th","9th","10th","11th","12th")))
ggplot() +
theme_minimal() +
coord_flip() +
scale_fill_manual(values=cols) +
geom_line(data=bell, aes(x=month, y=.5-(rate/2), group=1), color='pink', size=1) +
geom_line(data=bell, aes(x=month, y=.5+(rate/2), group=1), color='pink', size=1) +
theme(legend.position='none', axis.ticks=element_blank(), axis.text.x=element_blank(),axis.title.x=element_blank())
One option is to calculate the points of the loess regression outside of ggplot and then plot them using geom_line (for a line) or geom_area for a filled area (geom_area is geom_ribbon, but with ymin fixed at zero).
Also, you don't need coord_flip. Instead, just switch your x and y mappings. This is necessary anyway if you want to fill underneath the curve.
In the example below I've created a numeric month variable for the regression. I've also commented out the scale_fill_manual line because your example doesn't provide a cols vector and the plot code doesn't produce a legend anyway. I've also commented out the legend.position='none' line as it's superfluous.
bell$month.num = 0:12
m1 = loess(rate ~ month.num, data=bell)
bell$loess.mod = predict(m1)
ggplot(bell, aes(y=month, group=1)) +
theme_minimal() +
#scale_fill_manual(values=cols) +
geom_area(aes(x=.5-(loess.mod/2)), fill='pink', size=1) +
geom_area(aes(x=.5+(loess.mod/2)), fill='pink', size=1) +
theme(#legend.position='none',
axis.ticks=element_blank(),
axis.text.x=element_blank(),
axis.title.x=element_blank())

How to place multiple boxplots in the same column with ggplot(geom_boxplot)

I would like to built a boxplot in which the 4 factors (N1:N4) are overlaid in the same column. For example with the following data:
df<-data.frame(N=N,Value=Value)
Q<-c("C1","C1","C2","C3","C3","C1","C1","C2","C2","C3","C3","Q1","Q1","Q1","Q1","Q3","Q3","Q4","Q4","Q1","Q1","Q1","Q1","Q3","Q3","Q4","Q4")
N<-c("N2","N3","N3","N2","N3","N2","N3","N2","N3","N2","N3","N0","N1","N2","N3","N1","N3","N0","N1","N0","N1","N2","N3","N1","N3","N0","N1")
Value<-c(4.7,8.61,8.34,5.89,8.36,1.76,2.4,5.01,2.12,1.88,3.01,2.4,7.28,4.34,5.39,11.61,10.14,3.02,9.45,8.8,7.4,6.93,8.44,7.37,7.81,6.74,8.5)
with the following (usual) code, the output is 4 box-plots displayed in 4 columns for the 4 variables
ggplot(df, aes(x=N, y=Value,color=N)) + theme_bw(base_size = 20)+ geom_boxplot()
many thanks
Updated Answer
Based on your comment, here's a way to add marginal boxplots. We'll use the built-in mtcars data frame.
First, some set-up:
library(cowplot)
# Common theme elements
thm = list(theme_bw(),
guides(colour=FALSE, fill=FALSE),
theme(plot.margin=unit(rep(0,4),"lines")))
Now, create the three plots:
# Main plot
p1 = ggplot(mtcars, aes(wt, mpg, colour=factor(cyl), fill=factor(cyl))) +
geom_smooth(method="lm") + labs(colour="Cyl", fill="Cyl") +
scale_y_continuous(limits=c(10,35)) +
thm[-2] +
theme(legend.position = c(0.85,0.8))
# Top margin plot
p2 = ggplot(mtcars, aes(factor(cyl), wt, colour=factor(cyl))) +
geom_boxplot() + thm + coord_flip() + labs(x="Cyl", y="")
# Right margin plot
p3 = ggplot(mtcars, aes(factor(cyl), mpg, colour=factor(cyl))) +
geom_boxplot() + thm + labs(x="Cyl", y="") +
scale_y_continuous(limits=c(10,35))
Lay out the plots and add the legend:
plot_grid(plotlist=list(p2, ggplot(), p1, p3), ncol=2,
rel_widths=c(5,1), rel_heights=c(1,5), align="hv")
Original Answer
You can overlay all four boxplots in a single column, but the plot will be unreadable. The first example below removes N as the x coordinate, but keeps N as the colour aesthetic. This results in the four levels of N being plotted at a single tick mark (which I've removed by setting breaks to NULL). However, the plots are still dodged. To plot them one on top of the other, set the dodge width to zero, as I've done in the second example. However, the plots are not readable when they are overlaid.
ggplot(df, aes(x="", y=Value,color=N)) +
theme_bw(base_size = 20) +
geom_boxplot() +
scale_x_discrete(breaks=NULL) +
labs(x="")
ggplot(df, aes(x="", y=Value,color=N)) +
theme_bw(base_size = 20) +
geom_boxplot(position=position_dodge(0)) +
scale_x_discrete(breaks=NULL) +
labs(x="")

Draw mean and outlier points for box plots using ggplot2

I am trying to plot the outliers and mean point for the box plots in below using the data available here. The dataset has 3 different factors and 1 value column for 3600 rows.
While I run the below the code it shows the mean point but doesn't draw the outliers properly
ggplot(df, aes(x=Representations, y=Values, fill=Methods)) +
geom_boxplot() +
facet_wrap(~Metrics) +
stat_summary(fun.y=mean, colour="black", geom="point", position=position_dodge(width=0.75)) +
geom_point() +
theme_bw()
Again, while I am modify the code like in below the mean points disappear !!
ggplot(df, aes(x=Representations, y=Values, colour=Methods)) +
geom_boxplot() +
facet_wrap(~Metrics) +
stat_summary(fun.y=mean, colour="black", geom="point", position=position_dodge(width=0.75)) +
geom_point() +
theme_bw()
In both of the cases I am getting the message: "ymax not defined: adjusting position using y instead" 3 times.
Any kind suggestions how to fix it? I would like to draw the mean points within individual box plots and show outliers in the same colour as the plots.
EDIT:
The original data set does not have any outliers and that was reason for my confusion. Thanks to MrFlick's answer with randomly generated data which clarifies it properly.
Rather than downloading the data, I just made a random sample.
set.seed(18)
gg <- expand.grid (
Methods=c("BC","FD","FDFND","NC"),
Metrics=c("DM","DTI","LB"),
Representations=c("CHG","QR","HQR")
)
df <- data.frame(
gg,
Values=rnorm(nrow(gg)*50)
)
Then you should be able to create the plot you want with
library(ggplot2)
ggplot(df, aes(x=Representations, y=Values, fill=Methods)) +
geom_boxplot() +
stat_summary(fun.y="mean", geom="point",
position=position_dodge(width=0.75), color="white") +
facet_wrap(~Metrics)
which gave me
I was using ggplot2 version 0.9.3.1

Plot two regression lines (calculated on subset of the same data frame) on the same graph with ggplot

I have this kind of data frame:
df<-data.frame(x=c(1,2,3,4,5,6,7,8,9,10),y=c(2,11,24,30,45,65,90,110,126,145), a=c(0.2,0.2,0.3,0.4,0.1,0.8,0.7,0.6,0.8,0.9))
Using ggplot, I would like to plot on the same figure two regression lines, calculated for a subset of my data frame under condition (a > or < 0.5).
Visually, I would like that both regression lines:
df_a<-subset(df, df$a<0.5)
ggplot(df_a,aes(x,y))+
geom_point(aes(color = a), size=3.5) +
geom_smooth(method="lm", size=1, color="black") +
ylim(-5,155) +
xlim(0,11)
df_b<-subset(df, df$a>0.5)
ggplot(df_b,aes(x,y)) +
geom_point(aes(color = a), size=3.5) +
geom_smooth(method="lm", size=1, color="black") +
ylim(-5,155) +
xlim(0,11)
Appear on this figure:
ggplot(df,aes(x,y))+ geom_point(aes(color = a), size=3.5)
I've tried with par(new=TRUE) without success.
Make a flag variable, and use group:
df$small=df$a<0.5
ggplot(df,aes(x,y,group=small))+geom_point() + stat_smooth(method="lm")
and have yourself pretty colours and a legend if you want:
ggplot(df,aes(x,y,group=small,colour=small))+geom_point() + stat_smooth(method="lm")
Or maybe you want to colour the dots:
ggplot(df,aes(x,y,group=small)) +
stat_smooth(method="lm")+geom_point(aes(colour=a))

Resources