How to plot asymptote of a curve in R? - r

I have this data called mydf where I have hybrid sample comparison for efficiency. There are seven different efficiency columns for the intermixing of sampleA and sampleB. I want to see the plot for these seven efficiencies to see at which efficiency level will they significantly drop compared to the first few columns.
mydf<-structure(list(sample_A = structure(c(1L, 2L, 2L, 2L, 3L, 4L), .Label = c("2568",
"2669", "2670", "2671", "2946", "LPH-001-10_AK1", "LPH-001-12_AK2",
"LPH-001-9"), class = "factor"), sample_B = structure(c(1L, 2L,
3L, 4L, 3L, 4L), .Label = c("2568", "2669", "2670", "2671", "2946",
"LPH-001-10_AK1", "LPH-001-12_AK2", "LPH-001-9"), class = "factor"),
efficiency = c(1.02, 0.964, 0.415, 0.422, 0.98, 0.986), efficiency2 = c(1,
0.944, 0.395, 0.402, 0.96, 0.966), efficiency3 = c(0.9, 0.844,
0.295, 0.302, 0.86, 0.866), efficiency4 = c(0.32, 0.264,
-0.285, -0.278, 0.28, 0.286), efficiency5 = c(0.02, -0.0360000000000001,
-0.585, -0.578, -0.0200000000000001, -0.0140000000000001),
efficiency6 = c(0.12, 0.0639999999999999, -0.485, -0.478,
0.08, 0.086), efficiency7 = c(0.02, -0.036, -0.585, -0.578,
-0.02, -0.014)), .Names = c("sample_A", "sample_B", "efficiency",
"efficiency2", "efficiency3", "efficiency4", "efficiency5", "efficiency6",
"efficiency7"), row.names = c(NA, 6L), class = "data.frame")

Here's one way to plot your data:
mydf <- structure(list(sample_A=structure(c(1L,2L,2L,2L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),sample_B=structure(c(1L,2L,3L,4L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),efficiency=c(1.02,0.964,0.415,0.422,0.98,0.986),efficiency2=c(1,0.944,0.395,0.402,0.96,0.966),efficiency3=c(0.9,0.844,0.295,0.302,0.86,0.866),efficiency4=c(0.32,0.264,-0.285,-0.278,0.28,0.286),efficiency5=c(0.02,-0.0360000000000001,-0.585,-0.578,-0.0200000000000001,-0.0140000000000001),efficiency6=c(0.12,0.0639999999999999,-0.485,-0.478,0.08,0.086),efficiency7=c(0.02,-0.036,-0.585,-0.578,-0.02,-0.014)),.Names=c('sample_A','sample_B','efficiency','efficiency2','efficiency3','efficiency4','efficiency5','efficiency6','efficiency7'),row.names=c(NA,6L),class='data.frame');
effCis <- grep('^efficiency',names(mydf));
xlim <- c(1,length(effCis));
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
xticks <- seq_along(effCis);
yticks <- seq(ylim[1L],ylim[2L],0.1);
plot(NA,xlim=xlim,ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F);
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);
axis(1L,xticks,xticks,font=2L,cex.axis=0.7);
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
hybrid <- hybrid.col$hybrid[ri];
col <- hybrid.col$col[ri];
x <- xticks;
y <- c(as.matrix(mydf[hybrid,effCis]));
points(x,y,pch=16L,col=col,xpd=NA);
with(spline(x,y,splineN),{
lines(x,y,col=col,lwd=2,xpd=NA);
localwin <- which(x>2 & x<3);
tp <- which.min(abs(diff(y[localwin])));
if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L);
localwin <- which(x>2 & x<5);
tp <- which.min(diff(y[localwin]));
if (length(tp)>0L) {
m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
};
});
};
legend(5.5,0.95,paste0(mydf$sample_A,' / ',mydf$sample_B),fill=hybrid.col$col,cex=0.7,title='hybrid');
I wasn't 100% sure what you meant by the asymptote. I initially thought maybe you wanted the local maxima of the curves just prior to where they begin to drop, which is why I marked the local maxima with points (symbol X, i.e. pch=4L). But then I realized maybe you meant the tangent line along the drop, so I added lines tangent to the points of steepest slope.
This is the definition of asymptote:
a straight line approached by a given curve as one of the variables in the equation of the curve approaches infinity.
I don't think that's applicable here; plotting this data does not involve taking anything to infinity. I think you want either the local maxima or tangent lines.

Related

Interaction effect plot with CIs and emmeans contrast

I'm having trouble creating an interaction effect plot. There is probably something fairly simple I don't yet know how to do. I'm pretty new to R and ggplot. My reprex is below. Your insight is greatly appreciated!
The data is from UCLA and I'm also adapting their example for my purposes here.
library(here)
library(emmeans)
library(tidyverse)
dat <- read.csv("https://stats.idre.ucla.edu/wp-content/uploads/2019/03/exercise.csv")
Convert prog into factor variable
dat$prog <- factor(dat$prog, labels = c("jog","swim","read"))
The model
contcat <- lm(loss ~ hours * prog, data=dat)
summary(contcat)
I create mylist with certain points on hours and the two categories in prog that I want to contrast.
(mylist <- list(hours = seq(0, 4, .5), prog=c("jog","read")))
I then pass the object contcat into the emmeans. I request that predicted values of every combination of hours and prog be specified in at=mylist and store the output into an object called emcontcat.
emcontcat <- emmeans(contcat, ~ hours * prog, at=mylist)
I use emmip to output a set of values using plotit=FALSE.
contcatdat <- emmip(contcat, prog ~ hours, at = mylist, CIs=TRUE, plotit=FALSE)
The output object is fed to ggplot. The interaction effect is plotted along with CI bands.
ggplot(data=contcatdat, aes(x=hours, y=yvar, color=prog)) +
geom_line() +
geom_ribbon(aes(ymax=UCL, aymin=LCL, fill=prog), alpha=0.4)
The plot looks like this:
But overlapping CIs do not always correspond to the portions of the lines where there is no significant differences in predicted values. I want to add hashed lines for the portions of the lines where there is no significant difference in predicted values. This figure below
shows the kind of figure I'm trying to create. (The figure is from a paper by Trenton Mize (2019) found here at Fig. 14.)
To get the simple effect (i.e., difference of two predicted values), I pass emcontcat into a function called contrast where we can request "pairwise" differences (or simple effects). P-values are given for jog - read at each level of hours that was specified in mylist.
contrast(emcontcat, "pairwise", by="hours")
The output:
Where I am having trouble is how to incorporate the simple effect (i.e., the parts of hours where jog - read are significantly different or not) into ggplot as hashed or solid portions of the lines like the Mize 2019 figure.
We want to know if the intervals overlap, and if so, we want dashed lines. Actually that's easy by writing a respective function itvl_is_l(). However, on the LHS of the plot, there is just one point, but to draw a line we need a minimum of two. So we have to interpolate with "approximate", which is also done internally in the plot functions. Since we want to do everything for the two progs, we use by.
Preprocessing
## merge interpolations by prog
aux <- by(contcatdat, contcatdat$prog, \(x) {
x <- merge(x, data.frame(hours=with(x, seq.int(min(hours), max(hours),
length.out=1e3))), all=TRUE)
x$prog <- unique(na.omit(x$prog))
u <- c('yvar', 'LCL', 'UCL')
x[u] <- lapply(x[u], \(x) approx(x, xout=seq_along(x))$y)
x
})
## logical interval intersect function
itvl_is_l <- \(a, b) {unname(as.vector(ifelse(b[, 1] > a[, 2] | a[, 1] > b[2], TRUE, FALSE)))}
## check if intersecting CIs
its <- itvl_is_l(aux$jog[c('LCL', 'UCL')], aux$read[c('LCL', 'UCL')])
aux <- lapply(aux, `[<-`, 'its', val=its) ## add as variable
aux <- lapply(aux, \(x) transform(x, itsn=cumsum(c(0, diff(x$its)) != 0) + 1)) ## making a sequence out of it
contcatdat <- do.call(rbind, aux) ## combine back as contcatdat
Plot
clr <- c('#FF0000', '#0000FF', '#0000001A') ## some colors
png('foo.png', 600, 400) ## open .png device
plot(yvar ~ hours, contcatdat, type='n')
grid()
## lines left
lines(yvar ~ hours, contcatdat, subset=prog == 'jog' & itsn > 2, lwd=2, col=clr[1])
lines(yvar ~ hours, contcatdat, subset=prog == 'read' & itsn > 2, lwd=2, col=clr[2])
## lines middle, dashed
lines(yvar ~ hours, contcatdat, subset=prog == 'jog' & itsn == 2, lwd=2, col=clr[1], lty=2)
lines(yvar ~ hours, contcatdat, subset=prog == 'read' & itsn == 2, lwd=2, col=clr[2], lty=2)
## lines right
lines(yvar ~ hours, contcatdat, subset=prog == 'jog' & itsn < 2, lwd=2, col=clr[1])
lines(yvar ~ hours, contcatdat, subset=prog == 'read' & itsn < 2, lwd=2, col=clr[2])
## CIs
with(subset(contcatdat, prog == 'jog'),
polygon(c(hours, rev(hours)), c(UCL, rev(LCL)), border=NA, col=clr[3]))
with(subset(contcatdat, prog == 'read'),
polygon(c(hours, rev(hours)), c(UCL, rev(LCL)), border=NA, col=clr[3]))
## legend
legend('topleft', legend=unique(contcatdat$prog), title='Group', col=clr[1:2], lty=1, lwd=2)
dev.off() ## close .png device
You could also try to plot the polygons first and opaque with a border, if that might look better.
Data:
contcatdat <- structure(list(prog = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("jog",
"read"), class = "factor"), hours = c(0, 0, 0.5, 0.5, 1, 1, 1.5,
1.5, 2, 2, 2.5, 2.5, 3, 3, 3.5, 3.5, 4, 4), yvar = c(-6.78065983345649,
2.21637209230689, -3.05428518360714, 0.738291278604121, 0.672089466242214,
-0.739789535098646, 4.39846411609157, -2.21787034880141, 8.12483876594092,
-3.69595116250418, 11.8512134157903, -5.17403197620695, 15.5775880656396,
-6.65211278990971, 19.303962715489, -8.13019360361248, 23.0303373653383,
-9.60827441731525), SE = c(1.64384530410457, 1.48612021916972,
1.25520349531108, 1.14711211184156, 0.87926401607137, 0.820840725755632,
0.543079708493216, 0.531312719216624, 0.375535476484592, 0.376041650300328,
0.558013604603198, 0.501120592808483, 0.89777081499028, 0.781944232621328,
1.27470257475094, 1.1056003463909, 1.66373129934114, 1.44356083265185
), df = c(894, 894, 894, 894, 894, 894, 894, 894, 894, 894, 894,
894, 894, 894, 894, 894, 894, 894), LCL = c(-10.0069052579393,
-0.700318757711651, -5.51777400669205, -1.51305511813823, -1.05357261502514,
-2.35078883599747, 3.33260443922245, -3.26063588462286, 7.38780492844162,
-4.43397842739773, 10.7560441598055, -6.15754180868669, 13.815604150934,
-8.18677301395645, 16.8022045883112, -10.3000681349591, 19.7650632676689,
-12.4414373187615), UCL = c(-3.55441440897366, 5.13306294232543,
-0.590796360522233, 2.98963767534648, 2.39775154750957, 0.871209765800175,
5.46432379296068, -1.17510481297997, 8.86187260344022, -2.95792389761063,
12.946382671775, -4.19052214372721, 17.3395719803452, -5.11745256586298,
21.8057208426668, -5.96031907226584, 26.2956114630078, -6.77511151586902
), tvar = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("jog", "read"), class = "factor"),
xvar = c(0, 0, 0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2, 2.5, 2.5,
3, 3, 3.5, 3.5, 4, 4)), estName = "yvar", clNames = c("lower.CL",
"upper.CL"), pri.vars = c("prog", "hours"), adjust = "none", side = 0, delta = 0, type = "link", mesg = "Confidence level used: 0.95", row.names = c(NA,
18L), class = c("summary_emm", "data.frame"), labs = list(xlab = "hours",
ylab = "Linear prediction", tlab = "prog"), vars = list(byvars = character(0),
tvars = "prog"))

How to plot dynamically progressive plot properly in R

I have this dataframe called mydf. My code below plots the hybrid combination of plot for the efficiency in Y axis. What I want to do is replace the measurement in X axis for each sample combination (each line) to be represented by the measurement columns. So for efficiency1 I want it to be represented precisely by the values in measurement1 column instead of general 1 to 7 measurement I have in the code and I want to do this for all efficiency levels with their respective measurement columns. Can someone please help me achieve this goal.
mydf<-structure(list(sample_A = structure(c(1L, 2L, 2L, 2L, 3L, 4L), .Label = c("2568",
"2669", "2670", "2671", "2946", "LPH-001-10_AK1", "LPH-001-12_AK2",
"LPH-001-9"), class = "factor"), sample_B = structure(c(1L, 2L,
3L, 4L, 3L, 4L), .Label = c("2568", "2669", "2670", "2671", "2946",
"LPH-001-10_AK1", "LPH-001-12_AK2", "LPH-001-9"), class = "factor"),
efficiency1 = c(1.02, 0.964, 0.415, 0.422, 0.98, 0.986),
efficiency2 = c(1, 0.944, 0.395, 0.402, 0.96, 0.966), efficiency3 = c(0.9,
0.844, 0.295, 0.302, 0.86, 0.866), efficiency4 = c(0.32,
0.264, -0.285, -0.278, 0.28, 0.286), efficiency5 = c(0.02,
-0.0360000000000001, -0.585, -0.578, -0.0200000000000001,
-0.0140000000000001), efficiency6 = c(0.12, 0.0639999999999999,
-0.485, -0.478, 0.08, 0.086), efficiency7 = c(0.02, -0.036,
-0.585, -0.578, -0.02, -0.014), measurement1 = c(1, 1.2,
1, 1.3, 1.3, 1), measurement2 = c(2, 2.1, 2, 2.2, 2.3, 2),
measurement3 = c(3, 3.1, 3, 3.2, 3.3, 3), measurement4 = c(4,
4.1, 4, 4.2, 4.3, 4.1), measurement5 = c(5.1, 5.1, 4, 4.2,
4.3, 4.1), measurement6 = c(5.1, 6.1, 6, 6.2, 6.3, 6.1),
measurement7 = c(7.1, 7.1, 7, 7.2, 6.3, 7.1)), .Names = c("sample_A",
"sample_B", "efficiency1", "efficiency2", "efficiency3", "efficiency4",
"efficiency5", "efficiency6", "efficiency7", "measurement1",
"measurement2", "measurement3", "measurement4", "measurement5",
"measurement6", "measurement7"), row.names = c(NA, 6L), class = "data.frame")
Code I have:
effCis <- grep('^efficiency',names(mydf));
xlim <- c(1,length(effCis));
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
xticks <- seq_along(effCis);
yticks <- seq(ylim[1L],ylim[2L],0.1);
plot(NA,xlim=xlim,ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F);
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);
axis(1L,xticks,xticks,font=2L,cex.axis=0.7);
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
hybrid <- hybrid.col$hybrid[ri];
col <- hybrid.col$col[ri];
x <- xticks;
y <- c(as.matrix(mydf[hybrid,effCis]));
points(x,y,pch=16L,col=col,xpd=NA);
with(spline(x,y,splineN),{
lines(x,y,col=col,lwd=2,xpd=NA);
localwin <- which(x>2 & x<3);
tp <- which.min(abs(diff(y[localwin])));
if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L);
localwin <- which(x>2 & x<5);
tp <- which.min(diff(y[localwin]));
if (length(tp)>0L) {
m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
};
});
};
Here's how I would do it, you can play around with pretty labels (see function ?pretty). The parts I changed have spaces around them. Consider that this is not C so ; are not necessary. Putting some spaces and naming arguments makes the code perhaps more readable.
effCis <- grep('^efficiency',names(mydf));
find.measurements <- grep("^measurement", names(mydf))
xlim <- c(1,length(effCis));
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
yticks <- seq(ylim[1L],ylim[2L],0.1);
xticks <- seq(from = min(mydf[, find.measurements]), to = max(mydf[, find.measurements]), length.out = 7)
plot(NA,xlim=c(min(xticks), max(xticks)), ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F)
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);
axis(side = 1, at = xticks)
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
hybrid <- hybrid.col$hybrid[ri];
col <- hybrid.col$col[ri];
x <- xticks;
y <- c(as.matrix(mydf[hybrid,effCis]));
points(x,y,pch=16L,col=col,xpd=NA);
with(spline(x,y,splineN),{
lines(x,y,col=col,lwd=2,xpd=NA);
localwin <- which(x>2 & x<3);
tp <- which.min(abs(diff(y[localwin])));
if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L);
localwin <- which(x>2 & x<5);
tp <- which.min(diff(y[localwin]));
if (length(tp)>0L) {
m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
};
});
};

Create multiple lattice plots from a data table using lapply

I am trying to generate mean values by group for each of numerous variables (species) and then plot each of these separately. I have tried list and data table formats. The base plot function works in a for loop:
library(data.table)
for (i in 3:5) {
# generate a list of mean value for the species in column number i
temp <- v2[, lapply(.SD, mean), by="Season", .SDcols=i]
# plot each col of the list as a scatterplot with main title = header of 2nd col
plot(temp[[2]]~temp[[1]], main = colnames(temp)[[2]])
}
But when I try to create lattice plots only a single plot is generated for the last variable:
library(data.table)
library(lattice)
for (i in 3:5) {
# generate a list of mean value by season for the species in column number i
temp <- v2[, lapply(.SD, mean), by=c("Season", "Location"), .SDcols=i]
# Each group in a separate mini plot
xyplot(temp[[3]]~temp[[1]] | temp[[2]], main = colnames(temp)[3])
}
Have tried saving or printing each lattice plot, is that the right idea? Perhaps I am going about this the wrong way altogether?
Here is a small sample of my data:
structure(list(Location = structure(c(1L, 1L, 1L, 1L, 4L, 4L,
4L, 6L, 6L, 1L), .Label = c("BN", "BS", "GN", "GS", "SB", "SL"
), class = "factor"), Season = c(1981L, 1981L, 1981L, 1981L,
1995L, 1995L, 1995L, 1997L, 1997L, 2000L), Agrostis.magellanica = c(0.3,
0.3, 0.3, 0.01, 0.6, 0.3, 0.3, 0.3, 0.6, 0.05), Festuca.contracta = c(0.6,
0.05, 0.3, 0.01, 0.01, 0, 0, 0, 0.01, 0.05), Poa.annua = c(0.01,
0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.05, 0.01, 0.01)), .Names = c("Location",
"Season", "Agrostis.magellanica", "Festuca.contracta", "Poa.annua"
), class = c("data.table", "data.frame"), row.names = c(NA, -10L
)
This is in the R-FAQ. Need a print statement around grid graphics (lattice or ggplot) when used inside a function, and the for loop is a function:
# Needed
require(data.table) # before defining the object.
pdf() # pdf is a multipage device.
for (i in 3:5) {
# generate a list of mean value by season for the species in column number i
temp <- v2[, lapply(.SD, mean), by=c("Season", "Location"), .SDcols=i]
# Each group in a separate mini plot
print( xyplot(temp[[3]]~temp[[1]] | temp[[2]], main = colnames(temp)[3]) )
}
dev.off() # Failing to properly close a file graphics device is common error.

How to scale y-axis to intuitively detect small differences in data

I have a data set from a literature survey, where we looked at effects of pH to certain parameters (Metrics) in a group of animals. Because experiments are done on different time scales, I divided the response ratio by time.
This leads to very small differences around 1 (less than 1, there is a negative effect, greater than 1 a positive effect), which are still interesting and important (because the real values are divided by time). The problem is that some of the values are either very low or very high and the differences close to 1 are not visible.
Since values are close to 1, log transformation of y-axis scale does not help. How can I transform the y-axis scale in ggplot2 so that differences close to 1 are visible and yet intuitive? (that the reader can detect differences without thinking too much; I could standardize the values to minimum value, multiply by 10000 and take a log10 scale, but this would not lead to understandable differences.)
df <- structure(list(Study = c(1, 1, 2, 2, 3), pH_control = c(8.06,
8.06, 8.01, 8.01, 7.99), pH_treatment = c(7.86, 7.75, 7.8, 7.8,
7.45), time = c(120, 120, 60, 150, 140), Metrics = structure(c(3L,
1L, 2L, 3L, 1L), .Label = c("Growth", "Metabolism", "Survival"
), class = "factor"), RR_per_time_unit = c(0.9998, 1.001, 1.002,
0.98, 0.9), CI.max = c(1, 1.003, 1.00003, 0.9999, 0.92), CI.min = c(0.9996,
0.9999, 1.004, 0.9789, 0.89), pH_diff = c(0.2, 0.31, 0.21, 0.21,
0.54)), .Names = c("Study", "pH_control", "pH_treatment", "time",
"Metrics", "RR_per_time_unit", "CI.max", "CI.min", "pH_diff"), row.names = c(NA,
-5L), class = "data.frame")
df$pH_diff <- df$pH_control - df$pH_treatment
library(ggplot2)
ggplot(df, aes(y = RR_per_time_unit, x = pH_diff, ymin = CI.min, ymax = CI.max)) +
geom_pointrange(aes(color = Metrics)) + geom_hline(aes(yintercept = 1)) + coord_trans(y = "log10")

barchart with multiple overlaying errorbars

I am trying to create a barplot with multiple errorbars. Something like this: http://flyordie.sin.khk.be/r/histogram%20error%20bars.PNG
I have the following dataset:
http://flyordie.sin.khk.be/r/output.csv
I have tried using ggplot2 and lattice graphics, but haven't found anything that suits my needs.
My current code for showing the barchart is this:
data <- read.csv("c:/output.csv")
data
par(las=3)
barplot(data$PlateId,
height=data$HC.Maximum,
names.arg=data$PlateId,
col="lightblue")
And to show the highest errorbars i use this code
library(ggplot2)
limits <- aes(ymax = qc$HC.Maximum, ymin = qc$HC.Minimum)
p <- ggplot(qc, aes(colour=HC.Median,x=PlateId))
p + geom_bar(position="dodge")+ geom_errorbar(limits,position="dodge")
But I have no clue on how to put them on the same graphic (like in my example)
The data:
qc <- structure(list(row = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Row", class = "factor"),
ID = 1:14, PlateId = c(35276L, 35279L, 35280L, 35281L, 35282L,
35290L, 35291L, 35292L, 35293L, 35294L, 35295L, 35296L, 35297L,
35298L), LC.Median = c(439688.495, 509376.055, 475218.99,
497368.215, 481801.9, 468603.43, 494713.175, 459047.385,
482819.47, 495162.31, 449592.51, 460564.95, 478715.915, 452293.465
), LC.Stdev = c(52290.12229, 49648.49436, 55743.10306, 62002.53552,
46908.66149, 52489.615, 48016.94019, 52082.23899, 47934.37133,
58977.84845, 45827.62648, 53514.21095, 49638.98286, 139686.144
), LC.Minimum = c(279610.16, 423651.45, 356422.31, 411639.77,
397362.84, 345178.07, 406073.72, 352834.86, 339035.77, 369554.11,
348688.39, 357341.56, 370463.11, 210367.91), LC.Maximum = c(498195.9,
630648.53, 614625.78, 686737.35, 621372.36, 576491.41, 579708.95,
580633.28, 580125.9, 622108.73, 530234.87, 563616.65, 614936.33,
730272.63), HC.Median = c(507356.465, 553226.525, 447067.77,
452223.76, 453439.37, 422491.755, 447438.8, 435034.635, 446148.105,
438089.69, 466748.63, 440005.81, 454927.74, 483599.71), HC.Stdev = c(65355.46121,
72762.07338, 80118.37641, 43653.99318, 73389.12355, 62590.47601,
46421.36678, 62822.88532, 61175.4241, 64418.56174, 63101.2232,
68166.51814, 61256.74139, 87354.9441), HC.Minimum = c(381552.05,
391124.94, 280614.72, 395454.12, 291433.84, 252579.15, 331661.03,
296223.64, 240262.37, 299431.98, 375224.27, 278780.87, 310275.66,
213170.04), HC.Maximum = c(626483.6, 635111.41, 555357.3,
528822.8, 534172.42, 514927.42, 538385.26, 533024.74, 524973.99,
544335.94, 564954.87, 572206.98, 547489.1, 565338.09), zPrime = c(-3.96,
-23.73, -7.88, -5.81, -5.32, -5.54, -4.48, -7.98, -6.99,
-5.63, -22.54, -33.83, -11.92, -17.44), Sb = c(1.17, 1.03,
0.91, 0.91, 0.89, 0.89, 0.9, 0.92, 0.92, 0.89, 1.04, 0.98,
0.95, 1.09), Sn = c(1.37, 0.3, -0.83, -0.76, -1.22, -1.01,
-1.08, -0.74, -0.86, -0.95, 0.31, -0.2, -0.52, 0.27)), .Names = c("row",
"ID", "PlateId", "LC.Median", "LC.Stdev", "LC.Minimum", "LC.Maximum",
"HC.Median", "HC.Stdev", "HC.Minimum", "HC.Maximum", "zPrime",
"Sb", "Sn"), class = "data.frame", row.names = c(NA, -14L))
When creating plots with multiple layers, I approach it as follows:
Define common aesthetics in the initial call to ggplot
Define additional aesthetics in each additional layer
Note that I have modified your code, since I couldn't get your example to work:
Provide an explicit binwidth=1 to geom_bar to remove the warnings
Remove the position=dodge since this is the default and redundant
Supply an explicit stat=identity to geom_bar
The code:
ggplot(qc, aes(x=PlateId)) +
geom_bar(aes(y=HC.Median), binwidth=1, stat="identity", fill="cyan") +
geom_errorbar(aes(ymin=HC.Minimum, ymax=LC.Minimum), colour="red") +
geom_errorbar(aes(ymin=LC.Maximum, ymax=HC.Maximum), colour="purple")
Just add another geom_errorbar.
#Rename limits to limits_hi
limits_hi <- aes(ymax = qc$HC.Maximum, ymin = qc$HC.Minimum)
#Define the other error bar
limits_lo <- aes(ymax = qc$LC.Maximum, ymin = LC.Minimum)
#I'm not quite sure what you want in the bars; see if this looks right
p <- ggplot(qc, aes(factor(PlateId), HC.Median))
p +
geom_bar(position="dodge") +
geom_errorbar(limits_hi, position="dodge", colour = "red") +
geom_errorbar(limits_lo, position="dodge", colour = "blue") +
opts(axis.text.x = theme_text(angle = 30))

Resources