Plotting confidence intervals data with R - r

I would like to plot mean CPUE by year and add in CIs that are already calculated.
The CIs are calculated following an approach for trawl survey data so I do not think I can use any of the CI plot functions available in R. I would really appreciate any help.
I have been trying to figure this out following examples I found online, but the CIs are not being plotted on my data points. I have R version 3.1.0 on windows 8. This is my code.
dput(fall)
structure(list(Year = structure(1:7, .Label = c("2007", "2008",
"2009", "2010", "2011", "2012", "2013", "2014"), class = "factor"),
Season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Fall",
"Spring"), class = "factor"), CPUE = c(2.67597320766895,
1.13720423803133, 3.33880765324431, 0.806172684858967, 1.4489759307485,
10.5492990950043, 4.52479039663784), Variance = c(6.80824504958873,
0.320707030421567, 11.5769406857122, 1.05791053306542, 0.187046436602381,
15.8421823978692, 2.68384838783695), SD = c(2.60926139924476,
0.566310012644635, 3.40249036526369, 1.0285477786984, 0.432488654882855,
3.98022391303168, 1.63824552123207), Number = c(75, 91, 87,
85, 115, 157, 208), CV = c(0.975070076100538, 0.497984437364567,
1.01907348929115, 1.27584052153583, 0.298478839920718, 0.377297475139038,
0.362059980159386), lower = c(2.07563668109912, 1.01926446969017,
2.61363856286983, 0.584320068914576, 1.36908295705723, 9.92183627713643,
4.30084507885747), upper = c(3.27630973423878, 1.2551440063725,
4.06397674361879, 1.02802530080336, 1.52886890443977, 11.1767619128722,
4.7487357144182)), .Names = c("Year", "Season", "CPUE", "Variance",
"SD", "Number", "CV", "lower", "upper"), row.names = c(NA, 7L
), class = "data.frame")
My plot attempt was:
plot(fall$CPUE, type='n', xlab="Year", ylab='Mean CPUE', axes=F)
axis(1, at=1:8, labels=levels(fall$Year))
axis(2)
box()
lines(fall$Year, fall$CPUE, col=1)
points(fall$Year, fall$CPUE, col=1, pch=16)
arrows(y0 = fall$lower, y1 = fall$upper, x0 = fall$CPUE, x1 = fall$CPUE,
length=0.1, code = 3, col = 4, angle = 90)

Andre's answer here helped me to find a solution.
Data for plot
structure(list(CPUE = c(2.67597320766895, 1.13720423803133, 3.33880765324431, 0.806172684858967, 1.4489759307485, 10.5492990950043, 4.52479039663784 ), lower = c(2.25499288520513, 1.04583532734779, 2.80755247046762, 0.640225963050555, 1.37919786502951, 9.90712658332295, 4.26047455308042 ), upper = c(3.09695353013276, 1.22857314871488, 3.870062836021, 0.972119406667378, 1.51875399646749, 11.1914716066857, 4.78910624019525 )), .Names = c("CPUE", "lower", "upper"), class = "data.frame", row.names = c(NA, 7L))
Plot
plot(fall$CPUE, type='l', xlab="Year", ylab='Mean CPUE', axes=F,ylim=c(0,12))
axis(1, at=1:8, labels=levels(fall$Year))
axis(2)
box()
require(plotrix)
plotCI(fall$CPUE,y=NULL,uiw = fall$upper-fall$CPUE,ui=NULL,li=NULL,err="y", sfrac=0.01,gap=0,slty=par("lty"),add=T,scol="black",pch=18,pt.bg=par("bg"))

Related

How to put R2 and regression equation from different regression in one graph?

I have example data set below since I couldn't put all data set here:
structure(list(start_date = structure(c(1488153600, 1488153600,
1488758400, 1488758400, 1489363200, 1489363200, 1489968000, 1489968000,
1490572800, 1490572800), tzone = "UTC", class = c("POSIXct",
"POSIXt")), end_date = structure(c(1488758400, 1488758400, 1489363200,
1489363200, 1489968000, 1489968000, 1490572800, 1490572800, 1491177600,
1491177600), tzone = "UTC", class = c("POSIXct", "POSIXt")),
yyyy = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("2017", "2018"), class = "factor"), mmm = c("Feb",
"Feb", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar"
), dd = c(27, 27, 6, 6, 13, 13, 20, 20, 27, 27), season = c("Spring",
"Spring", "Spring", "Spring", "Spring", "Spring", "Spring",
"Spring", "Spring", "Spring"), parameter = c("Cl", "Na",
"Cl", "Na", "Cl", "Na", "Cl", "Na", "Cl", "Na"), Gruvebadet = c(301.983272818835,
212.350184538299, 568.590207629836, 383.734431751691, 1052.32022102137,
678.971053100071, 224.264161262609, 165.3726548191, 429.556015888673,
410.051446157094), Nordpolhotellet = c(490, 330, 610, 570,
350, 250, 400, 460, 600, 490), Zeppelin = c(294.285714285714,
202.142857142857, 311.428571428571, 235.714285714286, 772,
474, 132.857142857143, 145.714285714286, 231.428571428571,
216.428571428571)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
I would like to draw linear regression with and without intercept zero on one scatter plot. I use this code to do that:
my.formula <- y ~ x # linear equation without intercept zero
my.formula2 <- y ~ x - 1 #linear equation with intercept zero
library(ggplot2)
p <- ggplot(data=nacl2, aes(y=Nordpolhotellet, x=Gruvebadet)) +
geom_point() + geom_smooth(method="lm", formula=my.formula, se=F, col="red") +
geom_point() + geom_smooth(method="lm", formula=my.formula2, se=F, col="blue") +
theme_bw()
p2 <- p + facet_wrap(~parameter, ncol=2, scales="free",
labeller=as_labeller(c(Na="Na+", Cl="Cl-", NH4="NH4+",
K="K+", Mg="Mg++", Ca="Ca++", NO3="NO3-",
SO4="SO4--"))) +
theme(strip.background=element_blank(), strip.placement="outside") +
labs(y="Nordpolhotellet, Concentration (ng/m3)", x="Gruvebadet, Concentration (ng/m3)")
Then I want to add regression equation and R2 of each regression in one graph as well. I add this code:
library("ggpmisc")
p3 <- p2 + stat_poly_eq(aes(label=paste(stat(eq.label), stat(rr.label), sep="*\", \"*")),
formula=my.formula, coef.digits=4, rr.digits=3, parse=TRUE) +
stat_poly_eq(aes(label=paste(stat(eq.label), stat(rr.label), sep="*\", \"*")),
formula=my.formula2, coef.digits=4, rr.digits=3, parse=TRUE)
p3
However, I got the result where regression equation and R2 overlap with each other like this:
I desire to put equation and R2 in different line for my case. How to do that?
Luckily, I found the solution. I need to adjust the position of second equation because it should be below first equation. I use label.x.npc and label.y.npc by trial and error to adjust the position. Finally, found the best position I desire. Here is the completed code:
my.formula <- y ~ x # linear equation without intercept zero
my.formula2 <- y ~ x - 1 #linear equation with intercept zero
library(ggplot2)
library(ggpmisc)
#Add two regression line with different formula into scatterplot
p<-ggplot(data=df3,aes(y=Nordpolhotellet,x=Gruvebadet))+geom_point()+
geom_smooth(method="lm",formula=my.formula,se=F,col="red")+
geom_smooth(method="lm",formula=my.formula2,se=F,col="blue")+theme_bw()
#Make different scatter plot based on parameter
p2<-p+facet_wrap(~parameter, ncol=2, scales="free", labeller=as_labeller(c(Na="Na+", Cl="Cl-"))) +
theme(strip.background=element_blank(), strip.placement="outside") +
labs(y="Nordpolhotellet, Concentration (ng/m3)", x="Gruvebadet, Concentration (ng/m3)")
#Add regression equation and R2 for each line into graph
p3<-p2+stat_poly_eq(aes(label = paste(stat(eq.label),stat(rr.label), sep = "*\", \"*")),
formula=my.formula,coef.digits = 4,rr.digits=3,parse=TRUE,col="red")+
stat_poly_eq(aes(label = paste(stat(eq.label),stat(rr.label), sep = "*\", \"*")),
formula=my.formula2,coef.digits = 4,rr.digits=3,parse=TRUE,col="blue",
label.x.npc = 0.05, label.y.npc = 0.88)
#Display final graph
p3
Here is scatter plot that I desire:

Putting x-axis labels directly under tick marks in barplots in R

I have a table (below) showing the percentage of tree species (categorical variable) present in a group experiment. My objective is to plot the percentage of tree species on the y-axis and 'Species' on the x-axis within a barplot.
Issue
My problem is that I am experiencing problems with formatting the x-axis correctly. My objective is to ensure that the x-axis labels for**'Species'** are:-
Positioned directly underneath their bar at the tick mark
Do not overlap onto the plotting area
If anyone can help solve this issue, I would be incredibly grateful.
R code
df <- leaf.percent[order(leaf.percent$Leaf.Percentge, decreasing = TRUE),]
Tree.labels<-c("Quercus robar", "Quercus Patraea",
"Deciduous", "Oak",
"Plant", "Shrub")
par(mar=c(6, 6, 3, 3))
Tree<-barplot(df$Leaf.Percentge, names.arg = df$Species,
xaxt = "n",
ylab="Percentage %",
xlab="Tree Species",
col="lightblue",
ylim = c(0, 60))
axis(1, at=Tree, labels=FALSE)
text(seq(1, 6, by=1), par("usr")[3] - 0.2,
labels=unique(Tree.labels),
srt = 25, pos = 1,
xpd = TRUE, cex=0.7)
DATA
structure(list(Species = structure(1:6, .Label = c("Deciduous",
"Oak", "Plant", "Quercus_petraea", "Quercus_robur", "Shrub"), class = "factor"),
Frequency = c(48L, 29L, 6L, 70L, 206L, 4L), Leaf.Percentge = c(13.2231404958678,
7.98898071625344, 1.65289256198347, 19.2837465564738, 56.7493112947658,
1.10192837465565)), .Names = c("Species", "Frequency", "Leaf.Percentge"
), row.names = c(NA, -6L), class = "data.frame")

Add median trend line and p-value for one-sided repeated measures test in 2-y axis scatter plot [R]

Load sample data frame
df <- structure(list(ID = c(1,1,1,2,2,2,3,3,3),
time = c(0L,1L,2L,0L,1L,2L,0L,1L,2L),
M1a = c(0, 0.2, 0.3, 0, 1.5, 2.9,0, 2.4, 3.9),
M2a = c(0, 0.4, 0.6,0,0.9, 0.9,0,0.5, 0.7),
M3a = c(0,0.3, 0.4, 0, 0.6, 0.9,0, 0.5, 0.8),
M4a = c(0,0.6, 0.6,0, 0.4, 0.6,0, 0.2, 0.9),
M1b = c(0L, 200L, 300L,0L, 300L, 900L,0L, 900L, 1000L),
M2b = c(0L, 400L, 600L,0L, 600L, 900L,0L, 600L, 1000L),
M3b = c(0L, 300L, 400L,0L, 200L, 800L,0L, 200L, 900L),
M4b = c(0L, 600L, 600L,0L, 800L, 1000L,0L, 400L, 1100L)),
.Names = c("ID", "time", "M1a", "M2a", "M3a", "M4a","M1b", "M2b","M3b", "M4b"), class = "data.frame", row.names = c(NA, -9L))
Now plot two y-axis scatter plot
par(mar=c(5,4,4,5)+.1)
plot(df$time,df$M1a,type="p",col="red", main="M1", cex=0.5, cex.main=2, cex.lab=1.0, cex.axis=0.7)
par(new = TRUE)
plot(df$time,df$M1b,type="p",col="blue",xaxt="n",yaxt="n",xlab="",ylab="")
mtext("Relative change (%)",side=4,line=3)
axis(4)
legend("topleft",col=c("red","blue"),lty=1,legend=c("Absolute Change","Relative Change"))
What I am stuck with?
1.Median trend line
I was able to add regression line, but I want to have a median trend line connecting M1a and M1b medians for three time points.
2.Adding p-values to the plot, repeated one-way anova test
fit1=aov(df$M1a~df$time + Error(ID/time),na.action=na.exclude,data=df);
sig1= summary(fit1)$"Error: Within"$"Pr(>F)"
if (sig<0.001) star='**' else if (sig>=0.001&sig<0.05) star='*' else star='';
if (sig1<0.001) star='**' else star='';
I was planning to add use above code for adding p-value in my 2-y axis plot. Here, I get sig1 as NULL, however, sig1 should print out 0.153.
The final results should include * mark on the main title of plot (M1), if results are significant.
Any tips? Thanks in advance!
To answer #2 first, one needs to look at the inner structures of a summary.aov object:
dput(summary(fit1))
structure(list(`Error: ID` = structure(list(structure(list(Df = 1,
`Sum Sq` = 5.60666666666667, `Mean Sq` = 5.60666666666667,
`F value` = NA_real_, `Pr(>F)` = NA_real_), .Names = c("Df",
"Sum Sq", "Mean Sq", "F value", "Pr(>F)"), class = c("anova",
"data.frame"), row.names = "Residuals")), class = c("summary.aov",
"listof")), `Error: ID:time` = structure(list(structure(list(
Df = 1, `Sum Sq` = 11.3157142857143, `Mean Sq` = 11.3157142857143), .Names = c("Df",
"Sum Sq", "Mean Sq"), class = c("anova", "data.frame"), row.names = "df$time")), class = c("summary.aov",
"listof")), `Error: Within` = structure(list(structure(list(Df = c(1,
5), `Sum Sq` = c(0.325952380952381, 0.573888888888889), `Mean Sq` = c(0.325952380952381,
0.114777777777778), `F value` = c(2.83985617480293, NA), `Pr(>F)` = c(0.152766396924706,
NA)), .Names = c("Df", "Sum Sq", "Mean Sq", "F value", "Pr(>F)"
), class = c("anova", "data.frame"), row.names = c("df$time ",
"Residuals"))), class = c("summary.aov", "listof"))), .Names = c("Error: ID",
"Error: ID:time", "Error: Within"), class = "summary.aovlist")
And note that the values within summary(fit1)$"Error: Within" are actually buried one level deeper (and don't have names so need numeric index. Do this:
summary(fit1)$"Error: Within"[[1]]$`Pr(>F)`
[1] 0.1527664 NA
Now to see if I can figure out the two-0rdinate plot issue. Pretty sure one would need to do any median plotting before the par(new=TRUE) operation because that changes the user coordinate system based on the new data.
Adding a title with extracted value to your plot augmented by the helpful comment by #VincentBonhomme:
plot(df$time,df$M1a,type="p",col="red", cex=0.5, cex.main=2, cex.lab=1.0, cex.axis=0.7)
lines(unique(df$time),
tapply(df$M1a, df$time, median))
par(new = TRUE)
plot( df$time, df$M1b,type="p", col="blue", xaxt="n", yaxt="n", xlab="",ylab="")
lines(unique(df$time),
tapply(df$M1b, df$time, median))
mtext("Relative change (%)",side=4,line=3)
axis(4)
legend("topleft",col=c("red","blue"), lty=1,legend=c("Absolute Change","Relative Change"))
title(main=bquote("P-value for M1 (absolute scale)"==
.(round(summary(fit1)$"Error: Within"[[1]]$`Pr(>F)`, 3) ) ) )

How to wrap long titles in lattice graphics in R?

I need to add a long title to a graphic created with the likert function from the HH package, that uses lattice, but it (lattice) doesn't have this facility. Is there a way to do this?
My code:
library(HH)
ppi <- 150
jpeg("ssb_%02d.jpg", width=7*ppi, height=4*ppi, res=ppi)
for(i in 1:2){
plot_obj <- likert(Grup ~ . | Grup, data = ssb, as.percent = TRUE, positive.order = TRUE,
main=list(label = items[i,], cex=1.2), xlab=list(label="Percent", cex=1.1),
ylab="", ylab.right = list("Subjects per group", cex=1.1),
scales = list(y = list(relation = "free", labels=""), cex=1.1),
layout = c(1, 2), auto.key=list(space="bottom", columns=3, title="", cex=1.1))
print(plot_obj)
}
dev.off()
My data:
ssb <- structure(list(`Strongly Disagree` = c(2L, 1L), `Moderate Disagree` = 1:2,
`Slightly Disagree` = c(3L, 1L), `Slightly Agree` = c(1L,
5L), `Moderate Agree` = 4:5, `Strongly Agree` = c(9L, 6L),
Grup = c("Experimental grup", "Control grup")), .Names = c("Strongly Disagree",
"Moderate Disagree", "Slightly Disagree", "Slightly Agree", "Moderate Agree",
"Strongly Agree", "Grup"), row.names = c("1", "2"), class = "data.frame")
Title items:
items <- structure(list(V1 = structure(1:2, .Label = c("1. În cele mai multe privinţe, viaţa mea corespunde idealului meu.",
"2. Până în prezent am primit cele mai importante lucruri pe care le doresc în viață."
), class = "factor")), .Names = "V1", class = "data.frame", row.names = c(NA,
-2L))
UPDATE
Graphic titles are not added directly, but dynamically, from a data frame, and the data frame are loaded from a .csv file. If, as was suggested in comments, I add a \n to the long title in the .csv file this doesn't work.
I solved my problem, thanks to #josh-obrien. Now, when the graphic title is longer than 70 characters it is wrapped to 65 characters wide version.
library(HH)
ppi <- 150
jpeg("ssb_%02d.jpg", width=7*ppi, height=4*ppi, res=ppi)
for(i in 1:2){
if(stri_length(items[i,])>70){
graphic.title <- paste(strwrap(items[i,], width = 65), collapse="\n")
} else {
graphic.title <- items[i,]
}
plot_obj <- likert(Grup ~ . | Grup, data = ssb, as.percent = TRUE, positive.order = TRUE,
main=list(label = graphic.title, cex=1.2), xlab=list(label="Percent", cex=1.1),
ylab="", ylab.right = list("Subjects per group", cex=1.1),
scales = list(y = list(relation = "free", labels=""), cex=1.1),
layout = c(1, 2), auto.key=list(space="bottom", columns=3, title="", cex=1.1))
print(plot_obj)
}
dev.off()

error : ggplot2 doesn't know how to deal with data of class uneval

I have this code
ggplot() +
stat_density(kernel = "biweight",aes(x=fd, colour=id), data=foo1,position="identity",geom="line")+
coord_cartesian(xlim = c(0, 200))+
xlab("Flood Duration")+
ylab("Density")+
ggtitle("PDFs of Flood Duration")+
ggsave("pdf_fd_conus.png")
And I wrote this function
pdf.plot<-function(data,x,xl,yl,title,save){
ggplot() +
stat_density(data, kernel = "biweight",aes_string(x=x, colour='id'),
position="identity",geom="line")+
coord_cartesian(xlim = c(0, 200))+
xlab(xl)+
ylab(yl)+
ggtitle(title)+
ggsave(save)
}
Calling using this:
pdf.plot(data=foo1,x='fd',xl='b',
yl='a',title='a',save='y.png')
But I am getting this error:
Error: ggplot2 doesn't know how to deal with data of class uneval
Called from: eval(expr, envir, enclos)
This is dput(head(foo1,4))
structure(list(id = structure(c(1L, 1L, 1L, 1L), .Label = c("dfa",
"dfb", "cfa", "csb", "bsk"), class = "factor"), lon = c(-70.978611,
-70.978611, -70.945278, -70.945278), lat = c(42.220833, 42.220833,
42.190278, 42.190278), peakq = c(14.7531, 17.3865, 3.3414, 2.7751
), area = c(74.3327, 74.3327, 11.6549, 11.6549), fd = c(29, 54.75,
23, 1), tp = c(14.25, 19.75, 13.5, 0.5), rt = c(14.75, 35, 9.5,
0.5), bl = c(15485.3, 15485.3, 8242.64, 8242.64), el = c(0.643551,
0.643551, 0.474219, 0.474219), k = c(0.325279, 0.325279, 0.176624,
0.176624), r = c(81.947, 81.947, 38.7003, 38.7003), si = c(0.0037157,
0.0037157, -9999, -9999), rr = c(0.00529193, 0.00529193, 0.00469513,
0.00469513)), .Names = c("id", "lon", "lat", "peakq", "area",
"fd", "tp", "rt", "bl", "el", "k", "r", "si", "rr"), row.names = c(NA,
4L), class = "data.frame")
Could you please help?
Your problem is that you didn't specify what argument data is in stat_density. If you look at ?stat_density you'll see the first implied argument is actually mapping=. You need to change pdf.plot to:
pdf.plot<-function(data,x,xl,yl,title,save){
ggplot() +
stat_density(data = data, kernel = "biweight",aes_string(x=x, colour='id'),
position="identity",geom="line")+
coord_cartesian(xlim = c(0, 200))+
xlab(xl)+
ylab(yl)+
ggtitle(title)+
ggsave(save)
}

Resources