barchart with multiple overlaying errorbars - r

I am trying to create a barplot with multiple errorbars. Something like this: http://flyordie.sin.khk.be/r/histogram%20error%20bars.PNG
I have the following dataset:
http://flyordie.sin.khk.be/r/output.csv
I have tried using ggplot2 and lattice graphics, but haven't found anything that suits my needs.
My current code for showing the barchart is this:
data <- read.csv("c:/output.csv")
data
par(las=3)
barplot(data$PlateId,
height=data$HC.Maximum,
names.arg=data$PlateId,
col="lightblue")
And to show the highest errorbars i use this code
library(ggplot2)
limits <- aes(ymax = qc$HC.Maximum, ymin = qc$HC.Minimum)
p <- ggplot(qc, aes(colour=HC.Median,x=PlateId))
p + geom_bar(position="dodge")+ geom_errorbar(limits,position="dodge")
But I have no clue on how to put them on the same graphic (like in my example)
The data:
qc <- structure(list(row = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Row", class = "factor"),
ID = 1:14, PlateId = c(35276L, 35279L, 35280L, 35281L, 35282L,
35290L, 35291L, 35292L, 35293L, 35294L, 35295L, 35296L, 35297L,
35298L), LC.Median = c(439688.495, 509376.055, 475218.99,
497368.215, 481801.9, 468603.43, 494713.175, 459047.385,
482819.47, 495162.31, 449592.51, 460564.95, 478715.915, 452293.465
), LC.Stdev = c(52290.12229, 49648.49436, 55743.10306, 62002.53552,
46908.66149, 52489.615, 48016.94019, 52082.23899, 47934.37133,
58977.84845, 45827.62648, 53514.21095, 49638.98286, 139686.144
), LC.Minimum = c(279610.16, 423651.45, 356422.31, 411639.77,
397362.84, 345178.07, 406073.72, 352834.86, 339035.77, 369554.11,
348688.39, 357341.56, 370463.11, 210367.91), LC.Maximum = c(498195.9,
630648.53, 614625.78, 686737.35, 621372.36, 576491.41, 579708.95,
580633.28, 580125.9, 622108.73, 530234.87, 563616.65, 614936.33,
730272.63), HC.Median = c(507356.465, 553226.525, 447067.77,
452223.76, 453439.37, 422491.755, 447438.8, 435034.635, 446148.105,
438089.69, 466748.63, 440005.81, 454927.74, 483599.71), HC.Stdev = c(65355.46121,
72762.07338, 80118.37641, 43653.99318, 73389.12355, 62590.47601,
46421.36678, 62822.88532, 61175.4241, 64418.56174, 63101.2232,
68166.51814, 61256.74139, 87354.9441), HC.Minimum = c(381552.05,
391124.94, 280614.72, 395454.12, 291433.84, 252579.15, 331661.03,
296223.64, 240262.37, 299431.98, 375224.27, 278780.87, 310275.66,
213170.04), HC.Maximum = c(626483.6, 635111.41, 555357.3,
528822.8, 534172.42, 514927.42, 538385.26, 533024.74, 524973.99,
544335.94, 564954.87, 572206.98, 547489.1, 565338.09), zPrime = c(-3.96,
-23.73, -7.88, -5.81, -5.32, -5.54, -4.48, -7.98, -6.99,
-5.63, -22.54, -33.83, -11.92, -17.44), Sb = c(1.17, 1.03,
0.91, 0.91, 0.89, 0.89, 0.9, 0.92, 0.92, 0.89, 1.04, 0.98,
0.95, 1.09), Sn = c(1.37, 0.3, -0.83, -0.76, -1.22, -1.01,
-1.08, -0.74, -0.86, -0.95, 0.31, -0.2, -0.52, 0.27)), .Names = c("row",
"ID", "PlateId", "LC.Median", "LC.Stdev", "LC.Minimum", "LC.Maximum",
"HC.Median", "HC.Stdev", "HC.Minimum", "HC.Maximum", "zPrime",
"Sb", "Sn"), class = "data.frame", row.names = c(NA, -14L))

When creating plots with multiple layers, I approach it as follows:
Define common aesthetics in the initial call to ggplot
Define additional aesthetics in each additional layer
Note that I have modified your code, since I couldn't get your example to work:
Provide an explicit binwidth=1 to geom_bar to remove the warnings
Remove the position=dodge since this is the default and redundant
Supply an explicit stat=identity to geom_bar
The code:
ggplot(qc, aes(x=PlateId)) +
geom_bar(aes(y=HC.Median), binwidth=1, stat="identity", fill="cyan") +
geom_errorbar(aes(ymin=HC.Minimum, ymax=LC.Minimum), colour="red") +
geom_errorbar(aes(ymin=LC.Maximum, ymax=HC.Maximum), colour="purple")

Just add another geom_errorbar.
#Rename limits to limits_hi
limits_hi <- aes(ymax = qc$HC.Maximum, ymin = qc$HC.Minimum)
#Define the other error bar
limits_lo <- aes(ymax = qc$LC.Maximum, ymin = LC.Minimum)
#I'm not quite sure what you want in the bars; see if this looks right
p <- ggplot(qc, aes(factor(PlateId), HC.Median))
p +
geom_bar(position="dodge") +
geom_errorbar(limits_hi, position="dodge", colour = "red") +
geom_errorbar(limits_lo, position="dodge", colour = "blue") +
opts(axis.text.x = theme_text(angle = 30))

Related

problem with placement of a legend on a pie chart still remains

I have a problem with the legend on a pie chart, I already received help from here but it still doesn't work as I hoped (i don't have the entire legend on there, the frame of the legend is partly outside it) and I don't know why. Here is what I get when running the following code :
Here is my code :
pie1_PGS <- pie1_PGS[!(pie1_PGS$nb_sejours_2021 == 0),]
lab <- paste0(round(pie1_PGS$nb_sejours_2021/sum(pie1_PGS$nb_sejours_2021) * 100, 2),
"%")
library(plotrix)
pie3D(pie1_PGS$nb_sejours_2021,
radius = 0.75,
height = 0.1,
theta = 0.7,
border = "white",
col = rainbow(length(lab)),
shade = 0.5,
labels = lab,
labelcol = "red",
labelcex = 0.75,
explode = 0.2,
main = "Répartition des séjours 2021 par type")
legend(0.5, 1.1, pie1_PGS$type_de_sejour, cex = 0.3, fill = rainbow(length(lab)))
Here is my dataset :
structure(list(type_de_sejour = structure(c(1L, 2L, 4L), levels = c("Ambulatoires",
"Externes", "Fictifs", "Hospitalisé"), class = "factor"), nb_sejours_2021 = c(20794,
365, 7866)), row.names = c(1L, 2L, 4L), class = "data.frame")
Could anyone help ?
You could use "topright" and the argument inset to inset distance from the margin like this:
pie1_PGS <- structure(list(type_de_sejour = structure(c(1L, 2L, 4L), levels = c("Ambulatoires",
"Externes", "Fictifs", "Hospitalisé"), class = "factor"), nb_sejours_2021 = c(20794,
365, 7866)), row.names = c(1L, 2L, 4L), class = "data.frame")
pie1_PGS <- pie1_PGS[!(pie1_PGS$nb_sejours_2021 == 0),]
lab <- paste0(round(pie1_PGS$nb_sejours_2021/sum(pie1_PGS$nb_sejours_2021) * 100, 2),
"%")
library(plotrix)
pie3D(pie1_PGS$nb_sejours_2021,
radius = 0.75,
height = 0.1,
theta = 0.7,
border = "white",
col = rainbow(length(lab)),
shade = 0.5,
labels = lab,
labelcol = "red",
labelcex = 0.75,
explode = 0.2,
main = "Répartition des séjours 2021 par type")
legend("topright", legend = pie1_PGS$type_de_sejour, cex = 0.3, fill = rainbow(length(lab)), inset = 0.05)
Created on 2022-08-12 by the reprex package (v2.0.1)

How to plot many probability density functions (pdfs) without sharp edges?

I have an issue with plotting continuous distributions without sharp edges in ggplot2. I need to show two of them on one plot. Also, it does not have to be ggplot2 to achieve this result.
I am aware, that the number of data points directly influences the smoothness of the line, but it was not the case here. Below you can see some sample data (from dput)
sample.data<-list(beta.data = structure(list(cluster = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), beta.density = c(0, 3.42273368363934e-43, 8.42987148403021e-29,
2.04764468657484e-20, 1.69485562831516e-14, 6.07999638837842e-10, 2.88180370232676e-06, 0.00314064636750876, 0.954118897015866, 0, 0, 3.80101893822358e-36, 6.43342582657081e-22, 6.82956252277493e-14, 1.75291058931833e-08, 0.000131874335695378, 0.0750918340641428, 3.72532418759802, 5.05242078519544, 0), pr = c(0, 0.111111111111111, 0.222222222222222, 0.333333333333333, 0.444444444444444, 0.555555555555556, 0.666666666666667, 0.777777777777778, 0.888888888888889, 1, 0, 0.111111111111111, 0.222222222222222, 0.333333333333333, 0.444444444444444, 0.555555555555556, 0.666666666666667, 0.777777777777778, 0.888888888888889, 1)), row.names = c(NA, -20L), class = "data.frame"), beta.params = structure(list(cluster = 1:2, a = c(49, 50), b = c(2, 10), ni.beta = c(0.961,0.833), sd.beta = c(0.00072, 0.00228)), row.names = c(NA,-2L), class = "data.frame"))
Before I was using geom_col, but it discretizes values. I went with geom_area:
ggplot(sample.data$beta.data, aes(x = pr, y = beta.density)) +
geom_area(stat = "function",
fun = dbeta,
args = list(shape1 = sample.data$beta.params[1,"a"], shape2 = sample.data$beta.params[1,"b"]),
fill = "#F1C40F",
colour = "black",
alpha = 0.7) +
geom_area(stat = "function",
fun=dbeta,
args = list(shape1 = sample.data$beta.params[2,"a"], shape2 = sample.data$beta.params[2,"b"]),
fill = "#3498DB",
colour = "black",
alpha = 0.7)
I presented you the data with 10 points, but 1000 points look almost the same. It is not the case here, where even 100 points looks ok:
p = seq(0,1, length=100)
plot(p, dbeta(p, 50, 10), ylab="Density", type ="l", col=4, , lwd = 2)
Here I am attaching code to simulate the data. Oh, and these troublesome beta parameters were a = 49 and b = 2.
len <- 100
p <- seq(0,1, length.out = len)
df <- data.frame(rbind(cbind("cl" = rep(1, times = length(p)), "beta" = dbeta(p, 50, 10),"p"= p),
cbind("cl" = rep(1, times = length(p)), "beta" = dbeta(p, 40, 2),"p"= p)))
Do you have any ideas?
EDIT: The pdfs stands here for probability density functions. That is why I have not put "pdf" as a tag. My apologies for the confusion!
Anyway, when I tried to print graphic to PDF file, the result was poor as well (sharp edges). But it the end, it shouldn't matter. I want to see smooth lines whatever I do (reasonably).
EDIT2 It is possible to achieve because:
library(mosaic)
theme_set(theme_bw())
xpbeta(c(0.7, 0.90), shape1 = 49, shape2 = 2)
It produces nice, smoothed beta dist with parameters (49, 2). But then again, I need to show two dists in one chart.
I have found the answer. It still needs some editing (like transparency/alpha which I couldn't figure out), but in general, this is what I meant. Code:
library(mosaic)
plotDist('beta', params=list(49,2), kind='density', type = "h", col = "#3498DB", xlim = c(0,1))
plotDist('beta', params=list(50, 10), kind='density', , type = "h", col = "#F1C40F", add = TRUE)
plotDist('beta', params=list(49,2), kind='density', add = TRUE, col = "black")
plotDist('beta', params=list(50, 10), kind='density', add = TRUE, col = "black")
Result:
We can add as many distributions as we want, using "add" parameter.
Parameter type = "h", is used to draw filled distribution. Without it, the only line is visible. In my answer, I draw the two lines and two filled dists. I would be really happy if someone could show a better answer, though.
EDIT:
I think I found my perfect answer!
Here is the code:
library(ggformula)
theme_set(theme_bw())
gf_dist("beta", shape1 = 49, shape2 = 2, geom = "area", alpha = 0.5, fill = "#F1C40F") %>%
gf_dist("beta", shape1 = 49, shape2 = 2) %>%
gf_dist("beta", shape1 = 50, shape2 = 10, geom = "area", alpha = 0.5, fill = "#3498DB") %>%
gf_dist("beta", shape1 = 50, shape2 = 10)
It is much faster than the previous code, parameter alpha is obvious and it is relatively easy to combine many plots! Because of transparency, you can nicely see the overlap of both distributions.

Forest plot with table ggplot coding

I am trying to get a table side by side with my forest plot but I am having a lot of trouble doing so.
I am able to make a forest plot with the following code:
###dataframe
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf <- structure(list(labels = structure(1:36, .Label = c("Age*", "Sex – male vs. female",
"Body-mass index*,1 ", "Systolic blood pressure*", "Race - vs. white",
"Asian", "Black", "Townsend deprivation index", "Social habit",
"Smoking - vs. never", "Previous", "Current", "Alcohol use - vs. never",
"Once or twice a week", "Three or four times a week", "Daily or almost daily",
"Comorbidity", "Cancer", "Diabetes", "Chronic obstructive pulmonary disease2",
"Asthma", "Ischemic heart disease3", "Hypothyroidism", "Hypercholesterolemia",
"Allergic rhinitis", "Depression", "Serology", "White blood cell count",
"Red blood cell count", "Hemoglobin concentration", "Mean corpuscular volume",
"Mean corpuscular hemoglobin concentration", "Platelet count",
"Lymphocyte count", "Monocyte count", "Neutrophil count"), class = "factor"),
rr = c(1.18, 1.45, 1.76, 0.98, NA, 2.16, 2.65, 1.09, NA,
NA, 1.35, 1.15, NA, 0.73, 0.63, 0.63, NA, 1.23, 1.34, 1.51,
1.12, 1.46, 0.96, 1.1, 1.18, 1.38, NA, 1.03, 0.87, 0.93,
1, 0.94, 1, 1.03, 1.17, 1.06), rrhigh = c(1.08, 1.28, 1.57,
0.95, NA, 1.63, 2.03, 1.07, NA, NA, 1.18, 0.94, NA, 0.58,
0.49, 0.5, NA, 0.99, 1.08, 1.09, 0.93, 1.15, 0.71, 0.92,
0.91, 1.1, NA, 1.02, 0.73, 0.87, 0.99, 0.88, 1, 1.01, 1.03,
1.01), rrlow = c(1.28, 1.64, 1.97, 1.02, NA, 2.86, 3.44,
1.11, NA, NA, 1.55, 1.42, NA, 0.9, 0.79, 0.81, NA, 1.53,
1.66, 2.09, 1.34, 1.85, 1.3, 1.31, 1.52, 1.74, NA, 1.04,
1.03, 0.98, 1.01, 1.01, 1, 1.05, 1.32, 1.1)), class = "data.frame", row.names = c(NA,
-36L))
forestdf$labels <- factor(forestdf$labels,levels = forestdf$labels)
levels(forestdf$labels) 1.52, 1.74, NA, 1.04, 1.03, 0.98, 1.01, 1.01, 1, 1.05, 1.32,
#forestplot
p <- ggplot(forestdf, aes(x=rr, y=labels, xmin=rrlow, xmax=rrhigh))+
geom_pointrange(shape=22, fill="black")+
geom_vline(xintercept = 1, linetype=3)+
xlab("Variable")+ylab("Adjusted Relative Risk with 95% Confidence Interval")+theme_classic()+scale_y_discrete(limits = rev(labels))+
scale_x_log10(limits = c(0.25, 4), breaks = c(0.25, 0.5, 1, 2, 4), labels=c("0.25", "0.5", "1", "2", "4"), expand = c(0,0))
p
However, I cannot get the left panel with labels to work:
#dataframe for table
fplottable <- structure(list(labels = structure(c(1L, 30L, 7L, 33L, 27L, 4L,
6L, 35L, 32L, 31L, 26L, 11L, 2L, 24L, 34L, 12L, 10L, 8L, 14L,
9L, 5L, 18L, 17L, 16L, 3L, 13L, 29L, 36L, 28L, 15L, 21L, 20L,
25L, 19L, 22L, 23L), .Label = c("Age*", "Alcohol use - vs. never",
"Allergic rhinitis", "Asian", "Asthma", "Black", "Body-mass index*,1 ",
"Cancer", "Chronic obstructive pulmonary disease2", "Comorbidity",
"Current", "Daily or almost daily", "Depression", "Diabetes",
"Hemoglobin concentration", "Hypercholesterolemia", "Hypothyroidism",
"Ischemic heart disease3", "Lymphocyte count", "Mean corpuscular hemoglobin concentration",
"Mean corpuscular volume", "Monocyte count", "Neutrophil count",
"Once or twice a week", "Platelet count", "Previous", "Race - vs. white",
"Red blood cell count", "Serology", "Sex – male vs. female",
"Smoking - vs. never", "Social habit", "Systolic blood pressure*",
"Three or four times a week", "Townsend deprivation index", "White blood cell count"
), class = "factor"), No..of.Events = c(1073L, 581L, 1061L, 1031L,
NA, 57L, 68L, 1072L, NA, NA, 442L, 117L, NA, 262L, 191L, 172L,
NA, 96L, 107L, 41L, 146L, 86L, 52L, 170L, 66L, 84L, NA, 1009L,
1009L, 1009L, 1009L, 1009L, 1009L, 1005L, 1005L, 1005L), ARR..95..CI. = c("1.18 (1.08-1.28)",
"1.45 (1.28-1.64)", "1.76 (1.57-1.97)", "0.98 (0.95-1.02)", "",
"2.16 (1.63-2.86)", "2.65 (2.03-3.44)", "1.09 (1.07-1.11)", "",
"", "1.35 (1.18-1.55)", "1.15 (0.94-1.42)", "", "0.73 (0.58-0.90)",
"0.63 (0.49-0.79)", "0.63 (0.50-0.81)", "", "1.23 (0.99-1.53)",
"1.34 (1.08-1.66)", "1.51 (1.09-2.09)", "1.12 (0.93-1.34)", "1.46 (1.15-1.85)",
"0.96 (0.71-1.30)", "1.10 (0.92-1.31)", "1.18 (0.91-1.52)", "1.38 (1.10-1.74)",
"", "1.03 (1.02-1.04)", "0.87 (0.73-1.03)", "0.93 (0.87-0.98)",
"1.00 (0.99-1.01)", "0.94 (0.88-1.01)", "1.00 (1.00-1.00)", "1.03 (1.01-1.05)",
"1.17 (1.03-1.32)", "1.06 (1.01-1.10)")), class = "data.frame", row.names = c(NA,
-36L))
###NOT WORKING CODE THAT TRIES TO MAKE TABLE LEFT OF FOREST PLOT
data_table <- geom_text(data=fplottable,aes(y=labels)) +
geom_text(label=eventnum) +
geom_text(label=arr)
data_table
grid.arrange(data_table,p, ncol=2)
I am drawing inspiration from:
Reproduce table and plot from journal and trying to get something similar to what is shown in the forest plot with the pink boxes
There were a few issues as #efz pointed out. In addition, you need to refactor the labels in your second column to allow them to match up with those in your first. It's probably going to look messy with the y axis labels and title alongside the table, so these could be removed too.
That leaves you something like:
forestdf$colour <- rep(c("white", "gray95"), 18)
p <- ggplot(forestdf, aes(x = rr, y = labels, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = labels, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3) +
xlab("Variable") +
ylab("Adjusted Relative Risk with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$labels)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
names(fplottable) <- c("labels", "eventnum", "arr")
fplottable$labels <- factor(fplottable$labels, rev(levels(forestdf$labels)))
fplottable$colour <- rep(c("white", "gray95"), 18)
data_table <- ggplot(data = fplottable, aes(y = labels)) +
geom_hline(aes(yintercept = labels, colour = colour), size = 7) +
geom_text(aes(x = 0, label = labels), hjust = 0) +
geom_text(aes(x = 5, label = eventnum)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
You can simplify further by merging the two dataframes as fdf <- full_join(forestdf, fplottable, by = "labels") and running your p on fdf. Then p + geom_text(aes(x=22, label=paste(" ", arr," ",eventum, sep=' '))) will give the following output: output
Obviously, limits need to be expanded to 100 to include the table, and the full code is below:
p <- ggplot(fdf, aes(x=rr, y=labels, xmin=rrlow, xmax=rrhigh))+
geom_pointrange(shape=22, fill="black") +
geom_vline(xintercept = 1, linetype=3) +
xlab("Variable")+ylab("Adjusted Relative Risk with 95% Confidence Interval") +
theme_bw() +
#scale_y_discrete(limits = rev(labels))+
scale_x_log10(limits = c(0.25, 100),
breaks = c(0.25, 0.5, 1, 2, 4, 100),
labels=c("0.25", "0.5", "1", "2", "4", ""),
expand = c(0,0)
)+
geom_text(aes(x=22, label=paste(" ", arr," ",eventum, sep=' ')))
p
supposing
names(fplottable)<-c('labels','eventum','arr')
then there are a few issues with the code for data_table. If I understood correctly you meant something like:
data_table <- ggplot(data=fplottable)+geom_text(aes(x= 1, y=labels, label=arr))+geom_text(aes(x= 1.5, y=labels, label=eventum)).
You can play with the value of x and have only one geom_text where label=paste(arr, eventum, sep=' ')
in this case the command grid.arrange(data_table,p, ncol=2) seems to work fine. You can define the space of each panel with width.

How to plot asymptote of a curve in R?

I have this data called mydf where I have hybrid sample comparison for efficiency. There are seven different efficiency columns for the intermixing of sampleA and sampleB. I want to see the plot for these seven efficiencies to see at which efficiency level will they significantly drop compared to the first few columns.
mydf<-structure(list(sample_A = structure(c(1L, 2L, 2L, 2L, 3L, 4L), .Label = c("2568",
"2669", "2670", "2671", "2946", "LPH-001-10_AK1", "LPH-001-12_AK2",
"LPH-001-9"), class = "factor"), sample_B = structure(c(1L, 2L,
3L, 4L, 3L, 4L), .Label = c("2568", "2669", "2670", "2671", "2946",
"LPH-001-10_AK1", "LPH-001-12_AK2", "LPH-001-9"), class = "factor"),
efficiency = c(1.02, 0.964, 0.415, 0.422, 0.98, 0.986), efficiency2 = c(1,
0.944, 0.395, 0.402, 0.96, 0.966), efficiency3 = c(0.9, 0.844,
0.295, 0.302, 0.86, 0.866), efficiency4 = c(0.32, 0.264,
-0.285, -0.278, 0.28, 0.286), efficiency5 = c(0.02, -0.0360000000000001,
-0.585, -0.578, -0.0200000000000001, -0.0140000000000001),
efficiency6 = c(0.12, 0.0639999999999999, -0.485, -0.478,
0.08, 0.086), efficiency7 = c(0.02, -0.036, -0.585, -0.578,
-0.02, -0.014)), .Names = c("sample_A", "sample_B", "efficiency",
"efficiency2", "efficiency3", "efficiency4", "efficiency5", "efficiency6",
"efficiency7"), row.names = c(NA, 6L), class = "data.frame")
Here's one way to plot your data:
mydf <- structure(list(sample_A=structure(c(1L,2L,2L,2L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),sample_B=structure(c(1L,2L,3L,4L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),efficiency=c(1.02,0.964,0.415,0.422,0.98,0.986),efficiency2=c(1,0.944,0.395,0.402,0.96,0.966),efficiency3=c(0.9,0.844,0.295,0.302,0.86,0.866),efficiency4=c(0.32,0.264,-0.285,-0.278,0.28,0.286),efficiency5=c(0.02,-0.0360000000000001,-0.585,-0.578,-0.0200000000000001,-0.0140000000000001),efficiency6=c(0.12,0.0639999999999999,-0.485,-0.478,0.08,0.086),efficiency7=c(0.02,-0.036,-0.585,-0.578,-0.02,-0.014)),.Names=c('sample_A','sample_B','efficiency','efficiency2','efficiency3','efficiency4','efficiency5','efficiency6','efficiency7'),row.names=c(NA,6L),class='data.frame');
effCis <- grep('^efficiency',names(mydf));
xlim <- c(1,length(effCis));
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
xticks <- seq_along(effCis);
yticks <- seq(ylim[1L],ylim[2L],0.1);
plot(NA,xlim=xlim,ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F);
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);
axis(1L,xticks,xticks,font=2L,cex.axis=0.7);
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
hybrid <- hybrid.col$hybrid[ri];
col <- hybrid.col$col[ri];
x <- xticks;
y <- c(as.matrix(mydf[hybrid,effCis]));
points(x,y,pch=16L,col=col,xpd=NA);
with(spline(x,y,splineN),{
lines(x,y,col=col,lwd=2,xpd=NA);
localwin <- which(x>2 & x<3);
tp <- which.min(abs(diff(y[localwin])));
if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L);
localwin <- which(x>2 & x<5);
tp <- which.min(diff(y[localwin]));
if (length(tp)>0L) {
m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
};
});
};
legend(5.5,0.95,paste0(mydf$sample_A,' / ',mydf$sample_B),fill=hybrid.col$col,cex=0.7,title='hybrid');
I wasn't 100% sure what you meant by the asymptote. I initially thought maybe you wanted the local maxima of the curves just prior to where they begin to drop, which is why I marked the local maxima with points (symbol X, i.e. pch=4L). But then I realized maybe you meant the tangent line along the drop, so I added lines tangent to the points of steepest slope.
This is the definition of asymptote:
a straight line approached by a given curve as one of the variables in the equation of the curve approaches infinity.
I don't think that's applicable here; plotting this data does not involve taking anything to infinity. I think you want either the local maxima or tangent lines.

How to scale y-axis to intuitively detect small differences in data

I have a data set from a literature survey, where we looked at effects of pH to certain parameters (Metrics) in a group of animals. Because experiments are done on different time scales, I divided the response ratio by time.
This leads to very small differences around 1 (less than 1, there is a negative effect, greater than 1 a positive effect), which are still interesting and important (because the real values are divided by time). The problem is that some of the values are either very low or very high and the differences close to 1 are not visible.
Since values are close to 1, log transformation of y-axis scale does not help. How can I transform the y-axis scale in ggplot2 so that differences close to 1 are visible and yet intuitive? (that the reader can detect differences without thinking too much; I could standardize the values to minimum value, multiply by 10000 and take a log10 scale, but this would not lead to understandable differences.)
df <- structure(list(Study = c(1, 1, 2, 2, 3), pH_control = c(8.06,
8.06, 8.01, 8.01, 7.99), pH_treatment = c(7.86, 7.75, 7.8, 7.8,
7.45), time = c(120, 120, 60, 150, 140), Metrics = structure(c(3L,
1L, 2L, 3L, 1L), .Label = c("Growth", "Metabolism", "Survival"
), class = "factor"), RR_per_time_unit = c(0.9998, 1.001, 1.002,
0.98, 0.9), CI.max = c(1, 1.003, 1.00003, 0.9999, 0.92), CI.min = c(0.9996,
0.9999, 1.004, 0.9789, 0.89), pH_diff = c(0.2, 0.31, 0.21, 0.21,
0.54)), .Names = c("Study", "pH_control", "pH_treatment", "time",
"Metrics", "RR_per_time_unit", "CI.max", "CI.min", "pH_diff"), row.names = c(NA,
-5L), class = "data.frame")
df$pH_diff <- df$pH_control - df$pH_treatment
library(ggplot2)
ggplot(df, aes(y = RR_per_time_unit, x = pH_diff, ymin = CI.min, ymax = CI.max)) +
geom_pointrange(aes(color = Metrics)) + geom_hline(aes(yintercept = 1)) + coord_trans(y = "log10")

Resources