I want to reproduce the following drc::plot.drc graphs with ggplot2.
df1 <-
structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142",
"29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942",
"27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942",
"33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf),
Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L,
0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L,
31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L,
55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L,
15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L,
172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L,
0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L,
13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L,
57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L,
33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L,
0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L,
6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV",
"Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")
library(data.table)
dt1 <- data.table(df1)
library(drc)
dt1fm1 <-
drm(
formula = Germinated ~ Start + End
, curveid = TempV
# , pmodels =
# , weights =
, data = dt1
# , subset =
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
# , start =
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
## ----dt1fm1Plot1----
plot(
x = dt1fm1
, xlab = "Time (Hours)"
, ylab = "Proportion Germinated (\\%)"
# , ylab = "Proportion Germinated (%)"
, add = FALSE
, level = NULL
, type = "average" # c("average", "all", "bars", "none", "obs", "confidence")
, broken = FALSE
# , bp
, bcontrol = NULL
, conName = NULL
, axes = TRUE
, gridsize = 100
, log = ""
# , xtsty
, xttrim = TRUE
, xt = NULL
, xtField = NULL
, xField = "Time (Hours)"
, xlim = c(0, 200)
, yt = NULL
, ytField = NULL
, yField = "Proportion Germinated"
, ylim = c(0, 1.05)
, lwd = 1
, cex = 1.2
, cex.axis = 1
, col = TRUE
# , lty
# , pch
, legend = TRUE
# , legendText
, legendPos = c(40, 1.1)
, cex.legend = 0.6
, normal = FALSE
, normRef = 1
, confidence.level = 0.95
)
## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))
dt1fm1Plot2 <-
ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw() +
scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
# scale_y_continuous(expand = c(0, 0), labels = percent) +
expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
axis.title.y = element_text(size = 12, angle = 90, vjust = 0.25))
print(dt1fm1Plot2)
Question
There are few discrepancies in ggplot2 output. These discrepancies occur because the predict function gives output in different pattern than the given levels in the data.
Edited
Actually drm function changed the order of levels of TempV and this is clear from summary(dt1fm1) output and the graph of drc::plot.drc output.
As noted in the question, there is an issue related to drm shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.
In the end I approached this by calling the drm function once per factor level to build up a table of results one factor level at a time.
Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc and the ggplot version are both incorrect.
Let's start by wrapping your function call to drm() inside another wrapper function, to facilitate calling it repeatedly for each trace:
drcmod <- function(dt1){
drm(formula = Germinated ~ Start + End
, curveid = TempV
, data = dt1
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
}
Now we can use this wrapper to fit the drc model to each factor level in turn:
dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
dt <- dt1[TempV==levels(TempV)[i]]
dt[, TempV:=as.character(TempV)]
dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
dt[, cum_Germinated := cumsum(Germ_frac)]
dt[, Pred := c(predict(object=drcmod(dt)), NA)]
dt2 <- rbind(dt2, dt)
}
and plot:
ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw()
Edit
If we run the original code in the question using a subset of the data with fewer factor levels, for example using
dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)
all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm() function, rather than in plot.drc.
Related
I have been battling with this for a while now. As part of a large for-loop, want to take out some data points to be able to create concave hull around the resulting points (needs a minimum of 4 points). For this I have a line which makes sure that clusters where x or y values are ALL the same value are removed, as well as clusters with less than 4 lines. However, it can also happen that some points (not all) within a cluster are duplicates, causing the cluster to have >=4 lines, but the actual points are not >=4. To take out these duplicates I use distinct(), but sometimes this fails to take out the duplicates, as with the example data frame below. Any idea how to effectively take out these duplicates?
Example data
SP_occ <- structure(list(x = c(-28.212197, -130.758, -15, 47.549999, -29.346937,
-27.794644, -124.8, 47.416698, 47.75, -15.566667, 178.73, -29.344852,
175.432999, 47.75, 87, -10, 55.666668, 46.533, 47, 114.75, -29.356563,
87, 46, -128.296, -9, 154.21667, 47.549999, 47.549999, 87, -72.133301,
-157.89167, -23.055, 87, 46.366665, 55.45, 122.932999, -28.991,
153.216995, -29.35066, -29.122, 47.75, 123.967003, 121.5, 27.4167,
-27.96666, 47.266701, 87, 87, 47.583302, 114.75, -26.610647,
-26.589459, -10, 87, 122.949997, 47.583302, 125.400002, -15.533334,
-25.239904, 45.533, -28.295, 47.416698, 46, 52.0833, 87, 172.932999,
47.75, 5.4629, 121.667, 27.4167, -29.344852, -29.346937, -29.356563,
-9.387, -28.212197, -27.794644, 154.216667, -28.991, -28.991,
-29.35066, -25.239904, -26.610647, -26.589459, -27.96666, -15,
87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 52.0833, 45.533,
46.533, 114.75, -10, -15.533333, -15.566667, 178.73, -9.5, -9.466667,
-9.466667, -9.466667, -9.466667, -9.466667, -9.466667, -8.916667,
-8.916667, -9.083333, 152.756836, 138.74492, -9.321667, 5.4629,
139.416667, 55.666668), y = c(38.659904, -23.931, 55, -38.366699,
38.681605, 39.000465, -24.68, -38.349998, -38.650002, 28.183332,
-38.65, 38.68313, -28.1833, -38.650002, -27, 46, -4.582778, -39.033,
-9, -35, 38.671144, -27, -12, -24.328, 56, -20.85, -38.366699,
-38.9333, -27, 40.966702, 21.391684, 16.5667, -27, -9.416667,
-4.766666, 24.5, 42.497, -20.85, 37.997214, 42.432, -38.583302,
24.0667, -11, -33.3167, 38.962846, -38.950001, -27, -27, -38.966702,
-35, 40.341647, 40.357008, 46, -27, 24.299999, -38.966702, 24.5833,
28.266666, 37.900563, -40.416, 29.891666, -38.349998, -9, -36.5833,
-27, -28.5667, -38.583302, -26.1297, -11, -33.3167, 38.68313,
38.681605, 38.671144, 57.245, 38.659904, 39.000465, -20.85, 42.497,
42.497, 37.997214, 37.900563, 40.341647, 40.357008, 38.962846,
55, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -36.5833,
-40.416, -39.033, -35, 46, 28.266667, 28.183333, -38.65, 55.733333,
55.666667, 55.666667, 55.666667, 55.666667, 55.666667, 55.666667,
58.583333, 58.583333, 56.691667, -33.054223, 34.908889, 38.285,
-26.1297, 35.25, -4.582778), cluster = c(1L, 2L, 3L, 4L, 5L,
1L, 6L, 4L, 4L, 7L, 8L, 5L, 9L, 4L, 10L, 11L, 12L, 13L, 14L,
15L, 5L, 10L, 16L, 17L, 18L, 19L, 4L, 4L, 10L, 20L, 21L, 22L,
10L, 23L, 12L, 24L, 25L, 26L, 27L, 25L, 4L, 28L, 29L, 30L, 1L,
4L, 10L, 10L, 4L, 15L, 31L, 31L, 11L, 10L, 24L, 4L, 32L, 7L,
33L, 34L, 35L, 4L, 36L, 37L, 10L, 38L, 4L, 39L, 29L, 30L, 5L,
5L, 5L, 40L, 1L, 1L, 19L, 25L, 25L, 27L, 33L, 31L, 31L, 1L, 3L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 37L, 34L,
13L, 15L, 11L, 7L, 7L, 8L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
42L, 42L, 43L, 44L, 45L, 46L, 39L, 47L, 12L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L,
97L, 98L, 99L, 100L, 101L, 103L, 105L, 106L, 107L, 108L, 109L,
111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 123L,
125L, 126L, 135L, 136L, 141L), class = "data.frame")
Code
SP_occ <- SP_occ %>% distinct()
SP_occ <- SP_occ %>% group_by(cluster) %>% filter(!(n_distinct(round(x, 6)) == 1 || n_distinct(round(y, 6)) == 1) && n() >= 4)
SP_occ <- SP_occ[SP_occ$cluster != 0,]
SP_occ$Cluster <- SP_occ %>% group_indices(cluster)
SP_occ <- SP_occ[, c(1,2,4)]
Could you explain which records in your example are the problem you are referring to? After using distinct() there are no remaining exact duplicates in your data. If you want to remove records that are 'almost' identical (very small numerical differences) you could consider doing
SP_occ <- SP_occ %>%
mutate(x = round(x,5),
y = round(y,5)) %>%
distinct()
The result that I get is the DF below. Cluster 2 is made up by 4 points, of which 2 are actually unique.
x y Cluster
1 47.55000 -38.36670 1
2 47.41670 -38.35000 1
3 47.75000 -38.65000 1
4 -15.56667 28.18333 2
5 47.55000 -38.93330 1
6 47.75000 -38.58330 1
7 47.26670 -38.95000 1
8 47.58330 -38.96670 1
9 -15.53333 28.26667 2
10 -15.53333 28.26667 2
11 -15.56667 28.18333 2
I am trying to make square shaped plots with the same x and y tick-marks (i.e. aspect-ratio =1).
Originally I wanted to use facet_wrap with ggplot, but reading from a number of questions here on stackoverflow I realized this is not possible. So now I want to plot them one by one and use grid.arrange to organize the plots in the end.
BUT it is still not working for me. I can get the axis to be correct, but now the confidence interval from geom_smooth() is no longer correctly plotted.
dat <- structure(list(analyte = structure(c(2L, 8L, 9L, 5L, 6L, 4L,
1L, 7L, 10L, 3L, 9L, 10L, 7L, 7L, 10L, 10L, 10L, 10L, 6L, 6L,
10L, 6L, 4L, 6L, 7L, 4L, 2L, 10L, 10L, 4L, 2L, 6L, 6L, 8L, 10L,
1L, 1L, 3L, 8L, 2L, 1L, 10L, 7L, 6L, 3L, 3L, 7L, 7L, 6L, 6L,
9L, 5L, 9L, 7L, 6L, 7L, 8L, 7L, 5L, 7L, 5L), .Label = c("Alanine",
"Glutamic acid", "Glutamine", "Glycine", "Histidine", "Isoleucine",
"Leucine", "Phenylalanine", "Tyrosine", "Valine"), class = "factor"),
x = c(23.8, 51.5, 68.8, 83.5, 165.8, 178.6, 201.1, 387.4,
417.7, 550.1, 101.4, 103.1, 115.5, 119.9, 131.4, 156.9, 157.2,
169.9, 170.1, 174.6, 204.3, 21.8, 218.7, 22.2, 220.3, 226,
24.3, 259.3, 263.1, 301, 38.7, 39.8, 41.5, 42.4, 428.9, 431.7,
437.2, 440.1, 46.7, 47, 462.6, 470.1, 474.5, 51.3, 512.3,
516.4, 527.2, 547.3, 57.3, 58.5, 60.6, 63.9, 65.9, 69.9,
71.8, 771.9, 81.2, 82.4, 82.6, 823.5, 83.8), y = c(100L,
50L, 50L, 80L, 160L, 210L, 240L, 390L, 340L, 620L, 70L, 90L,
70L, 90L, 130L, 130L, 160L, 130L, 160L, 150L, 180L, 30L,
140L, 30L, 230L, 210L, 60L, 230L, 270L, 250L, 60L, 30L, 50L,
50L, 390L, 480L, 460L, 410L, 50L, 290L, 410L, 420L, 440L,
50L, 530L, 730L, 530L, 400L, 50L, 40L, 40L, 100L, 50L, 70L,
70L, 750L, 50L, 70L, 110L, 800L, 160L)), class = "data.frame", row.names = c(NA,
-61L))
and the plot:
my.formula <- y ~ x
p1 <- ggplot(dat[which(dat$analyte== 'Alanine'),], aes(x = x, y = y))+ geom_point()+
scale_x_continuous(limits=c(min(dat[which(dat$analyte== 'Alanine'),]$x, dat[which(dat$analyte== 'Alanine'),]$y), max(dat[which(dat$analyte== 'Alanine'),]$x,dat[which(dat$analyte== 'Alanine'),]$y))) +
scale_y_continuous(limits=c(min(dat[which(dat$analyte== 'Alanine'),]$x, dat[which(dat$analyte== 'Alanine'),]$y), max(dat[which(dat$analyte== 'Alanine'),]$x,dat[which(dat$analyte== 'Alanine'),]$y))) +
geom_smooth(method='lm') + stat_poly_eq(formula = my.formula, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = T, size=3)
p1
UPDATE:
So I try to combine the suggested code and some of my own settings and I am getting closer. But it is driving me crazy, why the confidence intervals are not plotted in some of the plots and plotted wrong in one plot (Alanine) (see the last picture)?
The updated code:
dat_split <- split(dat, dat$analyte)
plots <-
lapply(dat_split, function(df)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
scale_x_continuous(expand= c(0,0), limits=c(min(as.numeric(min(df$x)-as.numeric(1/8*min(df$x))), as.numeric(min(df$y)-as.numeric(1/8*min(df$y)))), max(as.numeric(max(df$x)+as.numeric(1/8*max(df$x))), as.numeric(max(df$y)+as.numeric(1/8*max(df$y)))))) +
scale_y_continuous(expand= c(0,0), limits=c(min(as.numeric(min(df$x)-as.numeric(1/8*min(df$x))), as.numeric(min(df$y)-as.numeric(1/8*min(df$y)))), max(as.numeric(max(df$x)+as.numeric(1/8*max(df$x))), as.numeric(max(df$y)+as.numeric(1/8*max(df$y)))))) +
theme(aspect.ratio = 1) +
geom_smooth(method = 'lm', inherit.aes = T, se=T) +
ggtitle(df$analyte[1]) +
ggpmisc::stat_poly_eq(formula = my.formula,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE, size=3))
gridExtra::grid.arrange(grobs = plots)
This seems to do roughly what you're looking for. For some of the analyte factors, the x and y ranges are considerably different, so I'm not sure you really want to show them all with identical axes.
dat_split <- split(dat, dat$analyte)
plots <-
lapply(dat_split, function(df)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
coord_equal() +
geom_smooth(method = 'lm', inherit.aes = T) +
ggtitle(df$analyte[1]) +
ggpmisc::stat_poly_eq(formula = my.formula,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = T, size=3))
gridExtra::grid.arrange(grobs = plots)
I want to fit log-logistic regression for the following data in drc R package. However, my code throws the following error.
df1 <-
structure(list(Temp = c(15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 20L, 20L, 20L, 20L, 20L, 20L, 25L, 25L, 25L, 25L, 30L, 30L,
30L, 30L, 35L, 35L, 35L, 35L, 40L, 40L, 40L, 40L), Start = c(0L,
24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 0L, 24L, 48L, 72L,
96L, 120L, 0L, 24L, 48L, 72L, 0L, 24L, 48L, 72L, 0L, 24L, 48L,
72L, 0L, 24L, 48L, 72L), End = c(24, 48, 72, 96, 120, 144, 168,
192, Inf, 24, 48, 72, 96, 120, Inf, 24, 48, 72, 96, 24, 48, 72,
Inf, 24, 48, 72, Inf, 24, 48, 72, Inf), Germinated = c(0L, 0L,
1L, 3L, 3L, 12L, 14L, 12L, 15L, 0L, 11L, 27L, 15L, 3L, 4L, 2L,
30L, 15L, 13L, 6L, 43L, 7L, 4L, 5L, 48L, 3L, 4L, 0L, 31L, 21L,
8L)), .Names = c("Temp", "Start", "End", "Germinated"), row.names = c(NA,
-31L), class = "data.frame")
library(drc)
fm1 <-
drm(
formula = Germinated ~ Start + End
, data = df1
, fct = LL.2()
, type = "event"
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
)
summary(fm1)
You need to group the data by Temp, because the time periods are repeated for each value of Temp. curveid = Temp does the trick:
fm1 <-drm(data = df1, curveid = Temp,
formula = Germinated ~ Start + End, fct = LL.2(), type = "event",
control = drmc(constr = FALSE, errorm = TRUE, maxIt = 1500, method = "BFGS",
noMessage = FALSE, relTol = 1e-07, rmNA = FALSE, useD = FALSE,
trace = FALSE, otrace = FALSE, warnVal = -1, dscaleThres = 1e-15, rscaleThres = 1e-15))
summary(fm1)
Model fitted: Log-logistic (ED50 as parameter) with lower limit at 0 and upper limit at 1 (2 parms)
Parameter estimates:
Estimate Std. Error t-value p-value
b:15 -6.03055 0.78915 -7.64179 0
b:20 -4.96450 0.60740 -8.17338 0
b:25 -4.43973 0.54904 -8.08639 0
b:30 -4.80876 0.60792 -7.91025 0
b:35 -5.45991 0.69159 -7.89467 0
b:40 -5.43892 0.79772 -6.81811 0
e:15 162.33568 6.10473 26.59177 0
e:20 64.71588 3.08660 20.96674 0
e:25 48.23883 2.68278 17.98090 0
e:30 36.38415 2.04252 17.81337 0
e:35 35.07398 1.85537 18.90405 0
e:40 48.44494 2.21375 21.88366 0
So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
I have the following data frame
structure(list(Lightbox = c(84L, 67L, 80L, 63L, 76L, 66L, 79L,
81L, 77L, 82L, 84L, 67L, 80L, 63L, 76L, 66L, 79L, 81L, 77L, 82L,
84L, 67L, 80L, 63L, 76L, 66L, 79L, 81L, 77L, 82L, 84L, 67L, 80L,
63L, 76L, 66L, 79L, 81L, 77L, 82L, 84L, 67L, 80L, 63L, 76L, 66L,
79L, 81L, 77L, 82L), variable = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("S1",
"S2", "S3", "S4", "S5"), class = "factor"), value = c(82L, 65L,
73L, 50L, 50L, 50L, 72L, 56L, 76L, 78L, 88L, 66L, 71L, 60L, 54L,
55L, 63L, 68L, 73L, 75L, 73L, 65L, 76L, 57L, 51L, 57L, 75L, 65L,
69L, 66L, 77L, 67L, 79L, 58L, 55L, 56L, 77L, 66L, 73L, 80L, 78L,
62L, 78L, 52L, 63L, 59L, 71L, 64L, 69L, 89L), mean = c(83, 66,
76.5, 56.5, 63, 58, 75.5, 68.5, 76.5, 80, 86, 66.5, 75.5, 61.5,
65, 60.5, 71, 74.5, 75, 78.5, 78.5, 66, 78, 60, 63.5, 61.5, 77,
73, 73, 74, 80.5, 67, 79.5, 60.5, 65.5, 61, 78, 73.5, 75, 81,
81, 64.5, 79, 57.5, 69.5, 62.5, 75, 72.5, 73, 85.5), diff = c(2L,
2L, 7L, 13L, 26L, 16L, 7L, 25L, 1L, 4L, -4L, 1L, 9L, 3L, 22L,
11L, 16L, 13L, 4L, 7L, 11L, 2L, 4L, 6L, 25L, 9L, 4L, 16L, 8L,
16L, 7L, 0L, 1L, 5L, 21L, 10L, 2L, 15L, 4L, 2L, 6L, 5L, 2L, 11L,
13L, 7L, 8L, 17L, 8L, -7L)), .Names = c("Lightbox", "variable",
"value", "mean", "diff"), row.names = c(NA, -50L), class = "data.frame")
I wish to plot a bland altman graph, difference against mean for 5 facet groups S1->S5 which is easy enough
p <- ggplot(df_melt, aes(mean, diff))+ geom_point(na.rm=TRUE)+ facet_wrap(~variable)
However, I would also like to add some geom_hline to each facet showing the mean for each group and the standard deviations. If I had only one group I would do the following:
yintercepts_mean <- c(mean(df_melt$diff, na.rm = TRUE))
yintercepts_mean_r <- round(yintercepts_mean,3)
yintercepts_sd_p <- c(mean(df_melt$diff, na.rm = TRUE) + c(2) * sd(df_melt$diff, na.rm = TRUE))
yintercepts_sd_n <- c(mean(df_melt$diff, na.rm = TRUE) + c(-2) * sd(df_melt$diff, na.rm = TRUE))
yintercepts_sd_p_r <- round(yintercepts_sd_p,3)
yintercepts_sd_n_r <- round(yintercepts_sd_n,3)
#ylabels <- c("- 2SD", "+ 2SD", "Mean")
ylabels <- c("mean")
ylabels2 <- c("+ 2SD")
ylabels3 <- c("- 2SD")
p + geom_hline(yintercept = yintercepts_mean_r, linetype=1, color='blue') +
geom_hline(yintercept = yintercepts_sd_p_r, linetype=2, color='blue') +
geom_hline(yintercept = yintercepts_sd_n_r, linetype=2, color='blue')
How can I incorporate the above when facetting my data?
library(plyr)
df2 <- ddply(df_melt,.(variable),summarise,mean=mean(diff, na.rm = TRUE),
sd=sd(diff, na.rm = TRUE))
library(ggplot2)
p <- ggplot(df_melt, aes(mean, diff)) +
geom_point(na.rm=TRUE) +
geom_hline(data=df2,aes(yintercept=c(round(mean,3),
round(mean+2*sd,3),
round(mean-2*sd,3))),
linetype=c(1,2,2), color='blue') +
facet_wrap(~variable)
print(p)