Related
I'm solving an equation with OLS which returns a timing point on an x/y/z surface mesh. I can solve for speed and directionality per the method outlined below. The problem is, my vectors are not parallel to the surface mesh and I'm uncertain of the correct transforms needed to get it in the correct direction.
The literature states:
"For the visualization of the vectors in the 3D geometry ... If only surface data were used in the calculation of the vectors, the conduction velocity vectors were geometrically projected into the plane orthogonal to the surface normals. This constraint corrected vectors pointing inside or outside the surface"
I have no idea how to do that.
Reprex Data:
library(plotly)
library(dplyr)
tri_list <- list(surf_tri = structure(c(1L, 1L, 4L, 8L, 11L, 12L, 17L, 6L,
19L, 13L, 3L, 5L, 22L, 1L, 26L, 18L, 18L, 8L, 21L, 14L, 27L,
4L, 14L, 7L, 6L, 29L, 22L, 16L, 23L, 26L, 3L, 28L, 2L, 13L, 12L,
31L, 33L, 35L, 32L, 28L, 33L, 31L, 20L, 9L, 36L, 12L, 29L, 20L,
31L, 27L, 17L, 26L, 8L, 15L, 9L, 29L, 9L, 35L, 15L, 11L, 16L,
16L, 38L, 31L, 37L, 32L, 24L, 35L, 23L, 23L, 23L, 37L, 10L, 35L,
2L, 4L, 5L, 7L, 10L, 13L, 16L, 18L, 20L, 2L, 2L, 7L, 3L, 3L,
25L, 17L, 27L, 17L, 7L, 2L, 18L, 27L, 28L, 5L, 7L, 1L, 30L, 29L,
30L, 31L, 13L, 5L, 5L, 12L, 14L, 33L, 10L, 25L, 25L, 20L, 12L,
12L, 28L, 36L, 15L, 19L, 4L, 7L, 37L, 4L, 27L, 13L, 36L, 36L,
7L, 16L, 10L, 32L, 16L, 15L, 23L, 30L, 16L, 34L, 34L, 37L, 35L,
24L, 13L, 24L, 26L, 11L, 11L, 38L, 3L, 2L, 2L, 6L, 9L, 14L, 15L,
8L, 9L, 14L, 13L, 21L, 23L, 22L, 24L, 8L, 17L, 15L, 20L, 28L,
6L, 6L, 19L, 4L, 4L, 22L, 16L, 22L, 22L, 32L, 23L, 21L, 28L,
31L, 19L, 34L, 34L, 32L, 26L, 19L, 9L, 33L, 21L, 11L, 11L, 9L,
1L, 9L, 32L, 29L, 29L, 31L, 9L, 8L, 8L, 17L, 33L, 38L, 38L, 38L,
39L, 23L, 39L, 37L, 10L, 38L, 39L, 25L, 26L, 39L, 24L, 38L, 37L,
39L), dim = c(74L, 3L), dimnames = list(NULL, c("V1", "V2", "V3"
))), coordinates = structure(c(1.9194540977478, 11.1435489654541,
5.42491292953491, -1.66259050369263, 7.44240760803223, -6.69524192810059,
-0.0333123430609703, -4.23227882385254, 21.1540908813477, 28.65553855896,
26.0386810302734, 24.6995525360107, 38.495361328125, 22.120174407959,
-2.17373514175415, 0.292593091726303, -11.4042282104492, -10.3343534469604,
17.5129470825195, 11.7786016464233, 9.42895698547363, -1.22890889644623,
23.1779632568359, 31.624174118042, 38.5983734130859, 45.1263847351074,
-11.1577320098877, 12.0059661865234, -2.91624927520752, 0.309472501277924,
30.9889793395996, 42.6134490966797, 23.7120399475098, 26.9033088684082,
30.3514823913574, 7.96694564819336, 31.1637763977051, 31.1527690887451,
25.4455451965332, -61.4243927001953, -56.5476036071777, -45.9739837646484,
-66.2752914428711, -66.6171951293945, -84.5177154541016, -78.7427749633789,
-92.5044403076172, -86.8224487304688, -90.4450073242188, -100.261276245117,
-64.494514465332, -54.8481636047363, -57.9494781494141, -109.206504821777,
-76.7904586791992, -95.8042831420898, -88.341796875, -64.2513122558594,
-69.0683898925781, -68.2359237670898, -69.7075653076172, -76.6936798095703,
-76.4758529663086, -78.4939727783203, -64.2876815795898, -86.8634643554688,
-61.3102798461914, -68.8540496826172, -74.4825897216797, -79.063835144043,
-81.4954528808594, -86.3758010864258, -87.7453536987305, -83.0354080200195,
-109.458961486816, -90.4418487548828, -90.170280456543, -80.8295745849609,
189.259994506836, 207.376663208008, 187.388595581055, 200.771896362305,
215.514175415039, 214.221862792969, 217.531005859375, 215.624313354492,
211.211242675781, 205.350524902344, 203.870376586914, 210.061508178711,
195.598449707031, 210.308349609375, 195.291015625, 177.159469604492,
194.172241210938, 210.24040222168, 213.330307006836, 216.293930053711,
216.293441772461, 180.884536743164, 163.47248840332, 163.422592163086,
168.801071166992, 186.711502075195, 207.275924682617, 212.505798339844,
190.655349731445, 176.77978515625, 204.541122436523, 191.065948486328,
209.147216796875, 206.617050170898, 172.961532592773, 211.9609375,
203.223968505859, 179.055145263672, 170.081588745117), dim = c(39L,
3L)), activation_timing_ms = c(-25.9116878216266, -35, 17.8156572141554,
-32.3085435482249, -30.425790486056, -22.7921350518616, -19.7547757364543,
-17.1422476398632, -5.21201605679312, 6.529324221754, 6.37415402817896,
-18.6397688908753, 6.16765189118496, -23.9073664920415, -6.74190321661786,
-17, -15.3633306040704, -20.0486637091315, -23.098251364298,
-23.5797990311573, -23.6085020691451, -27.8322123439857, 11.4184099588406,
12.5422915301849, 9.07414737038971, 5.42139612786991, -21.8214591939045,
-29.3013020135356, -33.236757970705, -19.3552220222891, -5.4698807256143,
-2.27373675443232e-13, -3.44126922278861, -0.401211436169433,
17.9550207172763, 1, 5.84672565064739, -2.04852001824679, 26.5347286690835
))
# Polynomial Surface Fitting
lm_dat <- cbind(tri_list$coordinates, activation_timing_ms = tri_list$activation_timing_ms) |>
as.data.frame() |>
rename(x = V1, y = V2, z = V3)
lm <- lm(activation_timing_ms~polym(x,y,z, degree=2, raw = TRUE), lm_dat)
lm_coef <- coef(lm)
a <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)2.0.0"]]
b <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)0.2.0"]]
c <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)0.0.2"]]
d <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)1.1.0"]]
e <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)1.0.1"]]
f <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)0.1.1"]]
g <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)1.0.0"]]
h <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)0.1.0"]]
i <- lm_coef[["polym(x, y, z, degree = 2, raw = TRUE)0.0.1"]]
j <- lm_coef[["(Intercept)"]]
# Differentiation
t_x_fun <- function(x,y,z){
2*a*x + d*y + e*z + g
}
t_y_fun <- function(x,y,z){
2*b*y + d*x + f*z + h
}
t_z_fun <- function(x,y,z){
2*c*z + e*x + f*y + i
}
CV_x_fun <- function(t_x,t_y,t_z){
t_x/(t_x^2 + t_y^2 + t_z^2)
}
CV_y_fun <- function(t_x,t_y,t_z){
t_y/(t_x^2 + t_y^2 + t_z^2)
}
CV_z_fun <- function(t_x,t_y,t_z){
t_z/(t_x^2 + t_y^2 + t_z^2)
}
fitted_activation_timing <- fitted(lm)
# Loop through the V1 coordinates
node_indices_tmp <- vector(length=nrow(tri_list$surf_tri))
v1_vertex_coord <- vector("list",
length = nrow(tri_list$coordinates))
for(i in seq_along(1:nrow(tri_list$coordinates))) {
v3_index_tmp <- tri_list$surf_tri[i,3]
v2_index_tmp <- tri_list$surf_tri[i,2]
v1_index_tmp <- tri_list$surf_tri[i,1]
timing_sorted <- matrix(c(fitted_activation_timing[v1_index_tmp], v1_index_tmp,
fitted_activation_timing[v2_index_tmp], v2_index_tmp,
fitted_activation_timing[v3_index_tmp], v3_index_tmp),
nrow = 3,
ncol = 2,
byrow=T)
timing_sorted <- timing_sorted[order(timing_sorted[, 1]), ]
v3_index <- timing_sorted[3,2]
v2_index <- timing_sorted[2,2]
v1_index <- timing_sorted[1,2]
v1_vertex_coord[[i]] <- tri_list$coordinates[v1_index,]
}
v1_vertex_coord <- matrix(unlist(v1_vertex_coord),
nrow = length(v1_vertex_coord),
ncol = 3,
byrow=T)
CV_x <- vector(length=nrow(v1_vertex_coord))
CV_y <- vector(length=nrow(v1_vertex_coord))
CV_z <- vector(length=nrow(v1_vertex_coord))
for(i in seq_along(1:nrow(v1_vertex_coord))) {
x <- v1_vertex_coord[i,1]
y <- v1_vertex_coord[i,2]
z <- v1_vertex_coord[i,3]
t_x <- t_x_fun(x,y,z)
t_y <- t_y_fun(x,y,z)
t_z <- t_z_fun(x,y,z)
CV_x[i] <- CV_x_fun(t_x,t_y,t_z)
CV_y[i] <- CV_y_fun(t_x,t_y,t_z)
CV_z[i] <- CV_z_fun(t_x,t_y,t_z)
}
CV <- cbind(lm_dat, CV_x, CV_y, CV_z, fitted_activation_timing) |> as.data.frame()
plot_ly() |>
add_trace(
name = "Faces",
type = "mesh3d",
x = tri_list$coordinates[,1], y = tri_list$coordinates[,2], z = tri_list$coordinates[,3],
i = tri_list$surf_tri[,1]-1, j = tri_list$surf_tri[,2]-1, k = tri_list$surf_tri[,3]-1,
opacity = 0.8,
flatshading = TRUE # we don't want smoothing
) |>
add_trace(
type = "scatter3d",
mode = "markers",
x = tri_list$coordinates[,1],
y = tri_list$coordinates[,2],
z = tri_list$coordinates[,3]) |>
add_trace(
type="cone",
name = "CV Vectors",
x = CV[,"x"],
y = CV[,"y"],
z = CV[,"z"],
u = CV[,"CV_x"],
v = CV[,"CV_y"],
w = CV[,"CV_z"],
opacity=0.8,
sizemode= 'absolute',
sizeref= 1
)
Relevant Sections of the literature:
Calculating the weights is beyond the scope of MRE and I left it out.
I would like to create a collection of plots like this:
So far I have managed to create a single such plot:
ggplot (df, aes (x, y)) + geom_point (col = 'yellow') + xlab ("A") + ylab ("B") + ggtitle ("Dose0.1") + geom_abline (slope = 1, linetype = 2, color = "red")
With calibrated measures on the axes
Preferably in the form of a function
Example data:
structure(list(Np. = 1:32, A = c(62L, 62L, 54L, 54L, 10L, 10L,
14L, 14L, 50L, 50L, 43L, 43L, 22L, 22L, 48L, 48L, 34L, 34L, 57L,
57L, 60L, 60L, 1L, 1L, 63L, 63L, 2L, 2L, 44L, 44L, 34L, 57L),
B = c(1L, 7L, 2L, 29L, 1L, 15L, 27L, 28L, 14L, 15L, 48L,
49L, 1L, 2L, 1L, 21L, 18L, 19L, 23L, 24L, 14L, 15L, 29L,
31L, 1L, 2L, 1L, 36L, 2L, 19L, 19L, 23L), Dose = c(0.1, 1,
10, 1, 1, 0.1, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.1, 0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 10, 10, 10, 10, 10, 10, 10, 1, 1
)), class = "data.frame", row.names = c(NA, -32L))
If you want to write it in your own function, you can combine the {{}} from rlang to tell R that it should look for your x and y arguments inside the data like this:
df <- structure(list(Np. = 1:32, A = c(62L, 62L, 54L, 54L, 10L, 10L,
14L, 14L, 50L, 50L, 43L, 43L, 22L, 22L, 48L, 48L, 34L, 34L, 57L,
57L, 60L, 60L, 1L, 1L, 63L, 63L, 2L, 2L, 44L, 44L, 34L, 57L),
B = c(1L, 7L, 2L, 29L, 1L, 15L, 27L, 28L, 14L, 15L, 48L,
49L, 1L, 2L, 1L, 21L, 18L, 19L, 23L, 24L, 14L, 15L, 29L,
31L, 1L, 2L, 1L, 36L, 2L, 19L, 19L, 23L), Dose = c(0.1, 1,
10, 1, 1, 0.1, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.1, 0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 10, 10, 10, 10, 10, 10, 10, 1, 1
)), class = "data.frame", row.names = c(NA, -32L))
library(ggplot2)
plot <- function(dataframe, xvalue, yvalue, grid){
ggplot(dataframe, aes({{xvalue}}, {{yvalue}})) +
geom_point(col = 'yellow') +
xlab ("A") + ylab ("B") +
geom_abline(slope = 1, linetype = 2, color = "red") +
theme_bw() +
facet_grid(~Dose)
}
plot(df, A, B, "Dose")
Created on 2022-07-04 by the reprex package (v2.0.1)
You may want to change formatting a little but essentially:
df$dose <- factor(df$Dose)
ggplot (df, aes (A, B)) +
geom_point(aes(color = dose, shape = dose)) +
xlab ("A") +
ylab ("B") +
geom_abline(slope = 1, linetype = 2, color = "black") +
facet_wrap(vars(dose)) +
theme_bw()
I have been battling with this for a while now. As part of a large for-loop, want to take out some data points to be able to create concave hull around the resulting points (needs a minimum of 4 points). For this I have a line which makes sure that clusters where x or y values are ALL the same value are removed, as well as clusters with less than 4 lines. However, it can also happen that some points (not all) within a cluster are duplicates, causing the cluster to have >=4 lines, but the actual points are not >=4. To take out these duplicates I use distinct(), but sometimes this fails to take out the duplicates, as with the example data frame below. Any idea how to effectively take out these duplicates?
Example data
SP_occ <- structure(list(x = c(-28.212197, -130.758, -15, 47.549999, -29.346937,
-27.794644, -124.8, 47.416698, 47.75, -15.566667, 178.73, -29.344852,
175.432999, 47.75, 87, -10, 55.666668, 46.533, 47, 114.75, -29.356563,
87, 46, -128.296, -9, 154.21667, 47.549999, 47.549999, 87, -72.133301,
-157.89167, -23.055, 87, 46.366665, 55.45, 122.932999, -28.991,
153.216995, -29.35066, -29.122, 47.75, 123.967003, 121.5, 27.4167,
-27.96666, 47.266701, 87, 87, 47.583302, 114.75, -26.610647,
-26.589459, -10, 87, 122.949997, 47.583302, 125.400002, -15.533334,
-25.239904, 45.533, -28.295, 47.416698, 46, 52.0833, 87, 172.932999,
47.75, 5.4629, 121.667, 27.4167, -29.344852, -29.346937, -29.356563,
-9.387, -28.212197, -27.794644, 154.216667, -28.991, -28.991,
-29.35066, -25.239904, -26.610647, -26.589459, -27.96666, -15,
87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 52.0833, 45.533,
46.533, 114.75, -10, -15.533333, -15.566667, 178.73, -9.5, -9.466667,
-9.466667, -9.466667, -9.466667, -9.466667, -9.466667, -8.916667,
-8.916667, -9.083333, 152.756836, 138.74492, -9.321667, 5.4629,
139.416667, 55.666668), y = c(38.659904, -23.931, 55, -38.366699,
38.681605, 39.000465, -24.68, -38.349998, -38.650002, 28.183332,
-38.65, 38.68313, -28.1833, -38.650002, -27, 46, -4.582778, -39.033,
-9, -35, 38.671144, -27, -12, -24.328, 56, -20.85, -38.366699,
-38.9333, -27, 40.966702, 21.391684, 16.5667, -27, -9.416667,
-4.766666, 24.5, 42.497, -20.85, 37.997214, 42.432, -38.583302,
24.0667, -11, -33.3167, 38.962846, -38.950001, -27, -27, -38.966702,
-35, 40.341647, 40.357008, 46, -27, 24.299999, -38.966702, 24.5833,
28.266666, 37.900563, -40.416, 29.891666, -38.349998, -9, -36.5833,
-27, -28.5667, -38.583302, -26.1297, -11, -33.3167, 38.68313,
38.681605, 38.671144, 57.245, 38.659904, 39.000465, -20.85, 42.497,
42.497, 37.997214, 37.900563, 40.341647, 40.357008, 38.962846,
55, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -27, -36.5833,
-40.416, -39.033, -35, 46, 28.266667, 28.183333, -38.65, 55.733333,
55.666667, 55.666667, 55.666667, 55.666667, 55.666667, 55.666667,
58.583333, 58.583333, 56.691667, -33.054223, 34.908889, 38.285,
-26.1297, 35.25, -4.582778), cluster = c(1L, 2L, 3L, 4L, 5L,
1L, 6L, 4L, 4L, 7L, 8L, 5L, 9L, 4L, 10L, 11L, 12L, 13L, 14L,
15L, 5L, 10L, 16L, 17L, 18L, 19L, 4L, 4L, 10L, 20L, 21L, 22L,
10L, 23L, 12L, 24L, 25L, 26L, 27L, 25L, 4L, 28L, 29L, 30L, 1L,
4L, 10L, 10L, 4L, 15L, 31L, 31L, 11L, 10L, 24L, 4L, 32L, 7L,
33L, 34L, 35L, 4L, 36L, 37L, 10L, 38L, 4L, 39L, 29L, 30L, 5L,
5L, 5L, 40L, 1L, 1L, 19L, 25L, 25L, 27L, 33L, 31L, 31L, 1L, 3L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 37L, 34L,
13L, 15L, 11L, 7L, 7L, 8L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
42L, 42L, 43L, 44L, 45L, 46L, 39L, 47L, 12L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L,
97L, 98L, 99L, 100L, 101L, 103L, 105L, 106L, 107L, 108L, 109L,
111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 123L,
125L, 126L, 135L, 136L, 141L), class = "data.frame")
Code
SP_occ <- SP_occ %>% distinct()
SP_occ <- SP_occ %>% group_by(cluster) %>% filter(!(n_distinct(round(x, 6)) == 1 || n_distinct(round(y, 6)) == 1) && n() >= 4)
SP_occ <- SP_occ[SP_occ$cluster != 0,]
SP_occ$Cluster <- SP_occ %>% group_indices(cluster)
SP_occ <- SP_occ[, c(1,2,4)]
Could you explain which records in your example are the problem you are referring to? After using distinct() there are no remaining exact duplicates in your data. If you want to remove records that are 'almost' identical (very small numerical differences) you could consider doing
SP_occ <- SP_occ %>%
mutate(x = round(x,5),
y = round(y,5)) %>%
distinct()
The result that I get is the DF below. Cluster 2 is made up by 4 points, of which 2 are actually unique.
x y Cluster
1 47.55000 -38.36670 1
2 47.41670 -38.35000 1
3 47.75000 -38.65000 1
4 -15.56667 28.18333 2
5 47.55000 -38.93330 1
6 47.75000 -38.58330 1
7 47.26670 -38.95000 1
8 47.58330 -38.96670 1
9 -15.53333 28.26667 2
10 -15.53333 28.26667 2
11 -15.56667 28.18333 2
I want to reproduce the following drc::plot.drc graphs with ggplot2.
df1 <-
structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142",
"29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942",
"27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942",
"33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf),
Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L,
0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L,
31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L,
55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L,
15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L,
172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L,
0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L,
13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L,
57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L,
33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L,
0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L,
6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV",
"Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")
library(data.table)
dt1 <- data.table(df1)
library(drc)
dt1fm1 <-
drm(
formula = Germinated ~ Start + End
, curveid = TempV
# , pmodels =
# , weights =
, data = dt1
# , subset =
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
# , start =
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
## ----dt1fm1Plot1----
plot(
x = dt1fm1
, xlab = "Time (Hours)"
, ylab = "Proportion Germinated (\\%)"
# , ylab = "Proportion Germinated (%)"
, add = FALSE
, level = NULL
, type = "average" # c("average", "all", "bars", "none", "obs", "confidence")
, broken = FALSE
# , bp
, bcontrol = NULL
, conName = NULL
, axes = TRUE
, gridsize = 100
, log = ""
# , xtsty
, xttrim = TRUE
, xt = NULL
, xtField = NULL
, xField = "Time (Hours)"
, xlim = c(0, 200)
, yt = NULL
, ytField = NULL
, yField = "Proportion Germinated"
, ylim = c(0, 1.05)
, lwd = 1
, cex = 1.2
, cex.axis = 1
, col = TRUE
# , lty
# , pch
, legend = TRUE
# , legendText
, legendPos = c(40, 1.1)
, cex.legend = 0.6
, normal = FALSE
, normRef = 1
, confidence.level = 0.95
)
## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))
dt1fm1Plot2 <-
ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw() +
scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
# scale_y_continuous(expand = c(0, 0), labels = percent) +
expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
axis.title.y = element_text(size = 12, angle = 90, vjust = 0.25))
print(dt1fm1Plot2)
Question
There are few discrepancies in ggplot2 output. These discrepancies occur because the predict function gives output in different pattern than the given levels in the data.
Edited
Actually drm function changed the order of levels of TempV and this is clear from summary(dt1fm1) output and the graph of drc::plot.drc output.
As noted in the question, there is an issue related to drm shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.
In the end I approached this by calling the drm function once per factor level to build up a table of results one factor level at a time.
Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc and the ggplot version are both incorrect.
Let's start by wrapping your function call to drm() inside another wrapper function, to facilitate calling it repeatedly for each trace:
drcmod <- function(dt1){
drm(formula = Germinated ~ Start + End
, curveid = TempV
, data = dt1
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
}
Now we can use this wrapper to fit the drc model to each factor level in turn:
dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
dt <- dt1[TempV==levels(TempV)[i]]
dt[, TempV:=as.character(TempV)]
dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
dt[, cum_Germinated := cumsum(Germ_frac)]
dt[, Pred := c(predict(object=drcmod(dt)), NA)]
dt2 <- rbind(dt2, dt)
}
and plot:
ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw()
Edit
If we run the original code in the question using a subset of the data with fewer factor levels, for example using
dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)
all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm() function, rather than in plot.drc.
So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))