R ggplot - Can't allocate big vector - r

I'm trying to plot a relatively small data set, and I can't get it to show me the plot. It keeps giving the error Error: cannot allocate vector of size 9.7 Gb. This doesn't make much sense to me as the data set is rather small.
> nrow(locs)
[1] 130
> head(locs)
STATION AVGTRANGE LAT LONG
1: USC00286979 22.13333 40.6971 -75.2042
2: USC00360022 21.33333 40.5361 -79.8152
3: USC00360132 24.37037 40.5227 -78.3694
4: USC00360140 19.80000 40.4949 -78.4667
5: USC00360147 22.36667 41.3585 -77.9262
6: USC00360457 20.68000 40.8209 -76.4983
How I'm plotting it.
gg <- ggplot(data = locs, aes(x = LONG, y = LAT)) +
geom_raster(aes(fill=AVGTRANGE), interpolate=TRUE)
gg # can't allocate here
Here is the dput my data.
> dput(locs)
structure(list(STATION = structure(1:130, .Label = c("USC00286979",
"USC00360022", "USC00360132", "USC00360140", "USC00360147", "USC00360457",
"USC00360560", "USC00360656", "USC00360754", "USC00360785", "USC00360861",
"USC00360868", "USC00361139", "USC00361212", "USC00361301", "USC00361350",
"USC00361354", "USC00361362", "USC00361377", "USC00361480", "USC00361485",
"USC00361705", "USC00361726", "USC00361751", "USC00361802", "USC00361810",
"USC00361838", "USC00361920", "USC00362071", "USC00362183", "USC00362323",
"USC00362470", "USC00362574", "USC00362721", "USC00362942", "USC00363018",
"USC00363028", "USC00363226", "USC00363311", "USC00363321", "USC00363343",
"USC00363417", "USC00363437", "USC00363451", "USC00363632", "USC00363665",
"USC00363698", "USC00364214", "USC00364325", "USC00364432", "USC00364763",
"USC00364778", "USC00364815", "USC00364839", "USC00364896", "USC00364934",
"USC00364976", "USC00364992", "USC00365050", "USC00365109", "USC00365344",
"USC00365573", "USC00365686", "USC00365738", "USC00365902", "USC00365918",
"USC00366111", "USC00366151", "USC00366194", "USC00366238", "USC00366508",
"USC00366649", "USC00366886", "USC00366921", "USC00366927", "USC00367029",
"USC00367073", "USC00367103", "USC00367167", "USC00367186", "USC00367229",
"USC00367409", "USC00367477", "USC00367732", "USC00367782", "USC00367863",
"USC00367931", "USC00367938", "USC00368073", "USC00368184", "USC00368308",
"USC00368361", "USC00368400", "USC00368449", "USC00368469", "USC00368596",
"USC00368668", "USC00368868", "USC00368873", "USC00368888", "USC00368905",
"USC00369298", "USC00369367", "USC00369408", "USC00369823", "USR0000PALL",
"USW00003761", "USW00004726", "USW00004751", "USW00004787", "USW00004843",
"USW00013739", "USW00014711", "USW00014712", "USW00014736", "USW00014737",
"USW00014751", "USW00014762", "USW00014770", "USW00014777", "USW00014778",
"USW00014860", "USW00054737", "USW00054782", "USW00054786", "USW00054789",
"USW00054792", "USW00093778", "USW00094732", "USW00094823"), class = "factor"),
AVGTRANGE = c(22.1333333333333, 21.3333333333333, 24.3703703703704,
19.8, 22.3666666666667, 20.68, 23.35, 21.4333333333333, 25.75,
23.4333333333333, 23.6428571428571, 26.4333333333333, 27.551724137931,
25.3448275862069, 25.0666666666667, 26.6842105263158, 23.4444444444444,
29.6, 23.3, 30.2631578947368, 27.0454545454545, 25.9333333333333,
24.2083333333333, 27.448275862069, 28.2333333333333, 21.4666666666667,
24.1111111111111, 25.7333333333333, 23.8571428571429, 21.6,
26.08, 26.2916666666667, 27.1034482758621, 28.3666666666667,
27.9259259259259, 23.6, 25.7, 26.3666666666667, 26.0344827586207,
20.2666666666667, 23.0909090909091, 27.2727272727273, 25.9666666666667,
24.8214285714286, 20.2413793103448, 24.0333333333333, 20.6333333333333,
26.0344827586207, 22.6, 29.0333333333333, NA, 25.625, 19.0333333333333,
18.7666666666667, 21.0689655172414, 22, 24.1333333333333,
25.0333333333333, 24.0666666666667, 24.3666666666667, 20.7333333333333,
32.5, 26.6666666666667, NA, 22.2666666666667, 25.1333333333333,
27.1481481481481, 22.7, 24.4827586206897, 21.6071428571429,
20.8461538461538, 29.9333333333333, 17.3928571428571, 26.2666666666667,
23.84, 23.1481481481481, 23.8275862068966, 26.9, 26.7931034482759,
25.3636363636364, NA, 23.5333333333333, 27.3571428571429,
17.2, 24.5, 22.0666666666667, NA, 23.8333333333333, 26.5172413793103,
27.6551724137931, 21.2307692307692, 26.5384615384615, 19.5,
20.8, 25.3, 18.6666666666667, 25.2758620689655, 23.8333333333333,
24.3461538461538, 27.6551724137931, 25.7666666666667, 24,
26.0344827586207, 24.6, 28.7333333333333, 27.7, 20.1034482758621,
18.6071428571429, 26.1785714285714, 22.5714285714286, 22.6071428571429,
17.1785714285714, 19.3571428571429, 21.6071428571429, 24.4285714285714,
23.6071428571429, 21.6785714285714, 19.9642857142857, 25.2142857142857,
22.7241379310345, 23.0357142857143, 17.8928571428571, 22.2962962962963,
21.2857142857143, 21.8571428571429, 21, 25.6428571428571,
25.6071428571429, 19.4444444444444, 22.6785714285714), LAT = c(40.6971,
40.5361, 40.5227, 40.4949, 41.3585, 40.8209, 40.8619, 39.9355,
41.0072, 40.3803, 40.3916, 41.8975, 40.8415, 41.6516, 41.5217,
39.848, 39.9353, 41.9301, 40.1468, 41.0489, 41.1922, 39.7994,
39.9969, 41.3575, 41.775, 41.7391, 41.9903, 40.2258, 40.46,
40.1275, 41.5216, 40.4681, 40.50194, 40.0136, 40.71306, 41.1184,
41.4004, 39.8815, 41.5631, 40.0962, 40.5513, 40.9666, 40.2305,
39.78333, 40.5511, 39.77056, 40.2817, 40.5972, 41.4992, 41.6767,
40.0499, 40.1167, 41.4234, 40.1692, 40.3333, 40.8223, 40.9474,
40.5864, 41.64583, 41.131, 40.8344, 40.3391, 39.7808, 41.6725,
40.6475, 40.5319, 40.412, 40.61417, 40.1482, 40.075, 39.8,
41.9245, 39.9587, 40.8729, 40.12, 41.7394, 39.7275, 41.8157,
40.6515, 41.589, 40.9248, 41.3299, 41.4196, 39.8958, 40.5101,
40.683, 40.7831, 40.335, 40.05889, 41.05583, 39.8582, 41.8162,
40.5711, 40.7933, 41.40389, 41.008, 40.8532, 41.8975, 41.4792,
41.63, 41.7511, 41.84667, 39.89861, 41.7004, 40.0417, 41.4864,
39.8593, 40.31611, 41.8, 41.17833, 41.62639, 39.87327, 40.1962,
40.36667, 40.29639, 40.64985, 40.21722, 40.35472, 40.82056,
41.3336, 41.2433, 42.0803, 40.12028, 40.23833, 40.33, 41.13889,
41.04667, 39.91806, 40.08194, 40.4846), LONG = c(-75.2042,
-79.8152, -78.3694, -78.4667, -77.9262, -76.4983, -75.6428,
-77.2577, -76.4482, -76.0274, -79.8594, -78.7144, -79.9163,
-76.8463, -77.4478, -79.5898, -77.6394, -79.297, -79.8986,
-77.9411, -79.4361, -79.3665, -79.5963, -79.2172, -78.0417,
-77.971, -77.1567, -77.1894, -76.8703, -79.4058, -76.4043,
-78.7289, -80.0833, -78.3653, -79.5144, -75.7277, -79.8305,
-77.3506, -78.6014, -75.7513, -80.2167, -78.5871, -75.4354,
-79.9166, -75.9913, -77.0325, -76.8703, -79.1186, -80.4681,
-78.8036, -76.2742, -76.4333, -76.4933, -79.1411, -76.4667,
-75.6962, -76.8786, -77.5692, -80.425, -77.4336, -76.1352,
-79.8604, -79.041, -75.0641, -80.3861, -80.2172, -79.7245,
-79.7191, -74.953, -76.0717, -76.05, -78.0072, -75.1728,
-78.2161, -75.5011, -75.4465, -79.913, -78.2873, -78.5551,
-75.3303, -79.2825, -77.7381, -78.7493, -76.3948, -79.5459,
-79.6684, -76.8617, -75.313, -77.5213, -80.06, -77.4774,
-80.4249, -75.2781, -77.8672, -78.0183, -75.1876, -76.7891,
-77.1419, -79.4432, -79.693, -76.443, -79.1494, -80.1655,
-77.3871, -78.5278, -79.1025, -75.7861, -78.8338, -78.6333,
-78.8988, -80.215, -75.2267, -76.7724, -75.9666, -78.3202,
-75.4477, -76.8513, -79.9216, -76.8641, -75.7269, -76.9217,
-80.1824, -76.2944, -75.5572, -75.1225, -75.3794, -78.4116,
-76.8741, -75.0111, -80.2144)), .Names = c("STATION", "AVGTRANGE",
"LAT", "LONG"), class = c("data.table", "data.frame"), row.names = c(NA,
-130L), .internal.selfref = <pointer: 0x2a40128>, sorted = "STATION")

I am not sure what your are trying to achieve with the geom_raster, as your data does not seem to fit the purpose.
Consider the outputs of dot plot:
gg <- ggplot(data = locs, aes(x = LONG, y = LAT, colour = AVGTRANGE)) +
geom_point()
#geom_raster(aes(fill=AVGTRANGE), interpolate=TRUE)
gg
data(faithfuld)
gg <- ggplot(faithfuld, aes(waiting, eruptions, colour = density)) +
geom_point()
#geom_raster(aes(fill = density), interpolate = TRUE)
gg
I have also tried geom_contour on your data and it does not work:
gg <- ggplot(data = locs, aes(x = LONG, y = LAT, z = AVGTRANGE)) +
geom_contour()
gg
UPDATE
I have checked the code of the geom_raster and the reason it tries to create the giant plot is that resolution of the plot is based on the minimal distance between points. As some of the points in your data are quite close to each other size of the matrix is so large.
If you round LAT and LONG to 2 digits code works.
locs$LAT <- round(locs$LAT, 0)
locs$LONG <- round(locs$LONG, 0)

Related

How to overlay a 2d density plot on top of a map

I need some help in overlaying a 2d density plot on top of a ggmap plot. I don't really know how to procede. Any help is welcome.
This is a subset of my data, including start and end coordinates for each individual ride of a fictional bike sharing company:
df <- structure(list(start_lat = c(41.94018, 41.890762, 41.845695,
41.857813, 41.9287386666667, 42.0044803333333, 41.879255, 41.886835,
41.874734, 41.95469, 41.95, 41.8809518333333, 41.96590013976,
41.909668, 41.931248, 41.96167, 41.912133, 41.87947235235, 41.936266,
41.922695, 41.9101756666667, 41.86, 41.91468, 41.892278, 42.03,
41.911386, 41.9716, 41.93, 41.940195, 41.9560855, 41.915784,
41.93314, 41.943739, 41.8671848333333, 41.87464, 41.882242, 41.926277,
41.96167, 41.76, 41.883668, 41.967096, 41.8, 41.9024035, 41.939743,
41.9093960065, 41.915983, 41.87772613, 41.8984238333333, 41.8836331666667,
41.925905, 41.967096, 41.92, 41.884576228, 41.838499, 41.9028846666667,
41.89993001, 41.8, 41.866095, 41.97, 41.9093960065, 41.88, 41.8922376666667,
41.81, 41.9438251666667, 41.883668, 41.9207793333333, 41.954383,
41.9434726666667, 41.8945555, 41.911386, 41.88917683258, 41.86722595682,
41.8531223333333, 41.92, 41.919936, 41.90096, 41.894722, 41.872187,
41.881892, 41.920082, 41.897448, 41.88917683258, 41.9, 41.925858,
41.89, 41.8908470406238, 41.85, 41.890173, 41.92556258, 41.885637,
41.9030376666667, 41.93314, 41.838198, 41.892278, 41.93, 41.894722,
41.90345, 41.6922943333333, 41.9080621666667, 42.025784), start_lng = c(-87.65304,
-87.631697, -87.6225141666667, -87.62455, -87.6538015, -87.6615086666667,
-87.639904, -87.62232, -87.6498425, -87.67393, -87.71, -87.6167566666667,
-87.6936384935, -87.648128, -87.644336, -87.65464, -87.634656,
-87.6256886059, -87.652662, -87.697153, -87.6823075, -87.62,
-87.64332, -87.612043, -87.71, -87.638677, -87.650154, -87.74,
-87.6529666666667, -87.668857, -87.634581, -87.64776, -87.66402,
-87.6260033333333, -87.65703, -87.641066, -87.630834, -87.65464,
-87.58, -87.64867, -87.667429, -87.58, -87.6277486666667, -87.658865,
-87.6776919292, -87.677335, -87.65478743, -87.6223878333333,
-87.629143, -87.64926, -87.667429, -87.7, -87.63188991, -87.6080766666667,
-87.6874035, -87.63443007, -87.59, -87.607267, -87.71, -87.6776919292,
-87.63, -87.6119485, -87.61, -87.671138, -87.64867, -87.6637163333333,
-87.648043, -87.6796343333333, -87.6534645, -87.638677, -87.6385057718,
-87.6153553902, -87.6318963333333, -87.74, -87.64883, -87.623777,
-87.634362, -87.661501, -87.648789, -87.677855, -87.628722, -87.6385057718,
-87.62, -87.638973, -87.66, -87.6186168193817, -87.72, -87.626185,
-87.65840426, -87.641823, -87.631299, -87.64776, -87.645143,
-87.612043, -87.71, -87.634362, -87.667747, -87.6426485, -87.6315093333333,
-87.684107), end_lat = c(41.918306, 41.886875, 41.8456825, 41.8530845574128,
41.890831, 41.99, 41.885637, 41.881319815, 41.88, 41.961068,
41.93, 41.880958, 41.966399801841, 41.89766, 41.9267559875, 41.9578665241517,
41.911386, 41.867888, 41.95078, 41.932588, 41.9245285, 41.8776751666667,
41.9105780349, 41.9239313113662, 42.0192226666667, 41.904613,
41.9947796884, 41.96, 41.9296915, 41.94, 41.94, 41.907066, 41.923931,
41.8707831666667, 41.87772613, 41.872187, 41.892278, 41.961004,
41.7689161666667, 41.8793563587, 41.95078, 41.79, 41.882242,
41.932225, 41.912133, 41.9093960065, 41.8810317, 41.9, 41.89,
41.912133, 41.926277, 41.93190196886, 41.874053, 41.8368228333333,
41.882754, 41.894666, 41.76, 41.882134, 41.96, 41.89637337, 41.87,
41.8787191666667, 41.8, 41.91, 41.917805, 41.88, 41.926277, 41.93,
41.8990156666667, 41.890762, 41.8854833079, 41.874754, 41.85,
41.94, 41.920771, 41.894345, 41.94334, 41.871737, 41.88338, 41.92154,
41.882134, 41.902997, 41.876243, 41.892278, 41.89, 41.886024,
41.86, 41.8918473721099, 41.9093960065, 41.8854833079, 41.89,
41.92883, 41.834734, 41.891466, 41.9296816666667, 41.902973,
41.918491153687, 41.75, 41.9218326666667, 41.9840446107), end_lng = c(-87.636282,
-87.62603, -87.6224476666667, -87.6319313049316, -87.6313945,
-87.66, -87.641823, -87.6295209193, -87.65, -87.695439, -87.71,
-87.616743, -87.6887042820454, -87.62351, -87.6344287848, -87.6495051383972,
-87.638677, -87.623041, -87.659172, -87.636427, -87.658447, -87.6240391666667,
-87.6494219288, -87.6358245313168, -87.6736431666667, -87.640552,
-87.6602845349, -87.69, -87.7080808333333, -87.67, -87.68, -87.667252,
-87.635825, -87.6257745, -87.65478743, -87.661501, -87.612043,
-87.649603, -87.634775, -87.6297910363, -87.659172, -87.6, -87.641066,
-87.658617, -87.634656, -87.6776919292, -87.62408432, -87.62,
-87.63, -87.634656, -87.630834, -87.7011951301, -87.627716, -87.6133453333333,
-87.6259215, -87.638437, -87.55, -87.625125, -87.69, -87.66098386,
-87.62, -87.6355345, -87.59, -87.66, -87.682437, -87.63, -87.630834,
-87.71, -87.6299358333333, -87.631697, -87.6523048564, -87.649807,
-87.64, -87.73, -87.663712, -87.622798, -87.67097, -87.65103,
-87.64117, -87.653818, -87.625125, -87.683825, -87.624426, -87.612043,
-87.65, -87.624117, -87.72, -87.6205801963806, -87.6776919292,
-87.6523048564, -87.63, -87.668507, -87.625813, -87.626761, -87.7081071666667,
-87.63128, -87.6974228024483, -87.64, -87.6439593333333, -87.6602738295
)), row.names = c(NA, -100L), class = "data.frame")
Next, i extracted min and max values for latitude and longitude and then used those values as limits for my map plot:
library(ggmap)
library(ggplot2)
map_lim <- data.frame(
min_lat = min(df[, c("start_lat", "end_lat")]),
max_lat = max(df[, c("start_lat", "end_lat")]),
min_lng = min(df[, c("start_lng", "end_lng")]),
max_lng = max(df[, c("start_lng", "end_lng")])
)
map_lim
## min_lat max_lat min_lng max_lng
## 1 41.69229 42.03 -87.74 -87.55
map <- get_stamenmap(
bbox = c(left = map_lim$min_lng, right = map_lim$max_lng, bottom = map_lim$min_lat, top = map_lim$max_lat)
)
ggmap(map)
This is the resulting map:
Now i want to add a 2d density plot based on the same set of coordinates on top of this map, but i don't know how to do it. This is my 2d density plot code.
density2d <- ggplot(df, aes(x = start_lng, y = start_lat)) +
coord_equal(xlim = c(map_lim$min_lng, map_lim$max_lng), ylim = c(map_lim$min_lat, map_lim$max_lat)) +
xlab("Longitude") +
ylab("Latitude") +
stat_density2d(aes(fill = ..level..), alpha = 0.5, geom = "polygon") +
scale_fill_viridis_c()
density2d
You simply add the density layer to your map:
ggmap(map) +
stat_density2d(data = df, aes(x = start_lng, y = start_lat,
fill = ..level..), alpha = 0.5, geom = "polygon") +
scale_fill_viridis_c()

boxplot displays incorrect when coverting from factor to numeric

My graph displays correctly without using scale. I want to have it looks better so I convert factor to numeric then using scale_x_continuous. However, the graph looks incorrect when I convert from factor to numeric (How to convert a factor to an integer\numeric without a loss of information?). I can't use scale without converting to numeric. Please run a sample code below with and without these lines ( main$U <- as.numeric(as.character(main$U)), and + scale_x_continuous(name="Temperature", limits=c(0, 160)) ). Thank you.
library("ggplot2")
library("plyr")
df<-data.frame(U = c(25, 25, 25, 25, 25, 85, 85, 85, 125, 125),
V =c(1.03, 1.06, 1.1,1.08,1.87,1.56,1.75,1.82, 1.85, 1.90),
type=c(2,2,2,2,2,2,2,2,2,2))
df1<-data.frame(U = c(25, 25,25,85, 85, 85, 85, 125, 125,125),
V =c(1.13, 1.24,1.3,1.17, 1.66,1.76,1.89, 1.90, 1.95,1.97),
type=c(5,5,5,5,5,5,5,5,5,5))
df2<-data.frame(U = c(25, 25, 25, 85, 85,85,125, 125,125),
V =c(1.03, 1.06, 1.56,1.75,1.68,1.71,1.82, 1.85,1.88),
type=c(7,7,7,7,7,7,7,7,7))
main <- rbind(df,df1,df2)
main$type <- as.factor(main$type)
main <- transform(main, type = revalue(type,c("2"="type2", "5"="type5", "7" = "type7")))
main$U <- as.factor(main$U)
main$U <- as.numeric(as.character(main$U))
ggplot(main, aes(U, V,color=type)) +
geom_boxplot(width=0.5/length(unique(main$type)), size=.3, position="identity") +
scale_x_continuous(name="Temperature", limits=c(0, 160))
You have to specify the group in your call to geom_boxplot, and to keep the legend you can use color=factor(U) (i.e, converting U back). To not lose information on the groups that have the same x-values, I think it is best to create a new grouping column first. You take all unique pairs of U and type and create a new variable based on which row falls into which of these pairs.
main$U <- as.character(main$U)
main$type <- as.character(main$type)
grp_keys <- unique(as.matrix(main[, c("U", "type")]))
grp_inds <- 1:nrow(grp_keys)
main$grps <- apply(main, 1, function(x) {
grp_inds[colSums(as.character(x[c("U", "type")]) == t(grp_keys)) == length(c("U", "type"))]
})
Then, plotting (width adjusted because it looks very small with higher range),
main$U <- as.numeric(as.character(main$U))
ggplot(main, aes(U, V,color=type)) +
geom_boxplot(aes(group = grps, color = type), width=20/length(unique(main$type)), size=.3, position="identity") +
scale_x_continuous(name="Temperature", limits=c(0, 160))

Color points by date in ggplot2

Hi all: I am struggling to color points by date in ggplot2. There are two outcomes that would work for me here. 1) colour the points by the variable recent_elections and just add straight lines denoting the date of the most recent election for each point. The current code does that. 2) preferably, but harder, just add the lines, coloured differently for each election, showing a legend that printed the date of the most recent federal election.
My current data and attempt is below.
library(dplyr)
library(tidyr)
library(ggplot2)
members <- structure(list(date = structure(c(6209, 6574, 7305, 14984, 15339,
15341, 17169, 17174), class = "Date"), members = c(180835, 193225,
200010, 86545, 95000, 128351, 41000, 124000), population = c(26449000,
26798000, 27512000, 33476688, 33476688, 33476688, 35151728, 35151728
), votes_previous_election = c(2359915, 2685263, 2685263, 4508474,
4508474, 4508474, 3470350, 3470350), vote_percent = c(18.8, 20.4,
20.4, 30.6, 30.6, 30.6, 19.7, 19.7), seats_previous_election = c(32,
43, 43, 103, 103, 103, 44, 44), recent_election = structure(c(5360,
6899, 6899, 15096, 15096, 15096, 16727, 16727), class = "Date")), .Names =
c("date",
"members", "population", "votes_previous_election", "vote_percent",
"seats_previous_election", "recent_election"), class = "data.frame",
row.names = c(NA,
-8L))
members %>%
select(population, votes_previous_election, seats_previous_election, members,
date, recent_election) %>%
mutate(., members_per_capita=members/population,
members_votes=members/votes_previous_election,
members_seats=members/seats_previous_election) %>%
gather(Variable, Value, c(members_per_capita,members_votes,
members_seats))%>%
ggplot(., aes(x=date, y=Value,
group=recent_election))+
geom_point(aes(fill=recent_election))+
facet_wrap(~Variable, scales='free')+
geom_vline(data=members, aes(xintercept=as.numeric(recent_election), col='red'), show.legend=F)
members %>%
select(population, votes_previous_election, seats_previous_election, members,
date, recent_election) %>%
mutate(., members_per_capita=members/population,
members_votes=members/votes_previous_election,
members_seats=members/seats_previous_election) %>%
gather(Variable, Value, c(members_per_capita,members_votes,
members_seats))%>%
ggplot(., aes(x=date, y=Value,
group=recent_election))+
geom_point()+
geom_vline(data=members, aes(xintercept=as.numeric(recent_election), col=factor(recent_election)), show.legend=T)+
facet_wrap(~Variable, scales='free') +
scale_color_discrete(name = "Recent Election") + xlim(as.Date("1984-01-01"), NA)
I changed the col="red" in geom_vline to col=factor(recent_election) so that the vertical lines are colored by recent_election. The factor() makes sure that recent_election is treated as discrete instead of continuous. scale_color_discrete sets the legend title. Note that the election date "1984-09-04" is going out of the x range of your points, so I added a xlim(as.Date("1984-01-01"), NA) to also include that election date. NA sets the upper limit automatically.

Synchronise and plot two timeseries data sets in R

I have two data sets from an experiment on a person during different ambient temperatures. P1 represents a patient's physiological response data and P1IAQ represent the environmental monitoring data during the experiment. P1 recorded data 32 times per second but P1IAQ recorded data every 10 seconds.
head(P1IAQ)
Time RH Temp CO2
1 12:04:07 44.2 19.89 664
2 12:04:17 44.2 19.89 664
3 12:04:27 44.2 19.89 665
4 12:04:37 44.2 19.89 665
5 12:04:47 44.2 19.89 666
6 12:04:57 44.2 19.89 668
head(P1)
Time SkinTemp HeartRate RespirationRate
1 00:00:00 27.781 70 10
2 00:00:00 27.780 70 10
3 00:00:00 27.779 70 10
4 00:00:00 27.779 70 10
5 00:00:00 27.778 70 10
6 00:00:00 27.777 70 10
The problem I have is that the time stamp on P1 is wrong. How can I plot them together on the same graph to see if the SkinTemp has a time-lag after the environmental temp is decreased?
EDIT: dput for P1IAQ
I've added the first twenty values for the environmental data. I think the best thing would be to subtract 12:04:07 from all values to make the starting time 00:00:00. I've tried looking at lubridate.
library(lubridate)
P1IAQ$Time<-hms(P1IAQ$Time)
This datetime post looks interesting, but it's for plotting dates rather than actually altering them.
Plotting data against time in R
dput(P1IAQ)
structure(list(Time = structure(1:19, .Label = c("12:04:07",
"12:04:17", "12:04:27", "12:04:37", "12:04:47", "12:04:57", "12:05:07",
"12:05:17", "12:05:27", "12:05:37", "12:05:47", "12:05:57", "12:06:07",
"12:06:17", "12:06:27", "12:06:37", "12:06:47", "12:06:57", "12:07:07"
), class = "factor"), RH = c(44.2, 44.2, 44.2, 44.2, 44.2, 44.2,
44.2, 44.2, 44.1, 44.1, 44.2, 44.2, 44.2, 44.3, 44.2, 44.2, 44.2,
44.3, 44.3), Temp = c(19.89, 19.89, 19.89, 19.89, 19.89, 19.89,
19.89, 19.89, 19.89, 19.89, 19.94, 19.89, 19.94, 19.94, 19.94,
19.94, 19.94, 19.94, 19.94), CO2 = c(664L, 664L, 665L, 665L,
666L, 668L, 668L, 669L, 667L, 670L, 670L, 672L, 675L, 677L, 682L,
684L, 685L, 686L, 687L)), .Names = c("Time", "RH", "Temp", "CO2"
), class = "data.frame", row.names = c(NA, -19L))
EDIT: I've synchronised the times using lubridate:
P1IAQ$Time<-period_to_seconds(hms(as.character(P1IAQ$Time))-hms("12:04:07"))
P1$Time<-period_to_seconds(hms(as.character(P1$Time)))
But now plotting them together is tricky. I've tried ggplot2 but I can't get two vertical axes. Any thoughts
ggplot() +
geom_line(data = P1IAQ, aes(x = Time, y = Temp, color = "red")) +
geom_line(data = P1, aes(x = Time, y = Temp, color = "blue")) +
xlab('Time (s)') +
ylab('Temperature ºC')
If you only need the times in each data frame to be on a common scale, you can convert both of them to numeric seconds elapsed since the start of the experiment and not worry about date or time classes. Then you can join the two data frames based on the common time scale.
I used your P1IAQ data sample and created fake P1 data to go with it. Time in my P1 is probably not in the same format as your actual data. If you post a sample of your P1, I can adjust the example below to fit your actual data.
library(dplyr)
library(reshape2)
library(hms)
library(zoo)
library(ggplot2)
theme_set(theme_light())
# Fake P1 data frame
set.seed(10)
n=32*60*3 + 1
P1 = data.frame(Time=as.POSIXct(seq(0,180,length.out=n), origin=as.Date("2016-05-01"), tz="GMT"),
SkinTemp = round(cumsum(rnorm(n, 0, 0.01)) + 27.78, 2),
RespirationRate=round(rnorm(n, 10, 0.5)))
Convert P1$Time and P1IAQ$Time to numeric values equal to the number of seconds elapsed since the start of the experiment. (Note that P1IAQ$Time in the data you posted is a factor, so I converted to character before further processing.):
P1$nTime = as.numeric(as.hms(P1$Time))
P1IAQ$nTime = as.numeric(as.hms(as.character(P1IAQ$Time)))
P1IAQ$nTime = P1IAQ$nTime - min(P1IAQ$nTime)
Join P1 and P1IAQ by nTime:
P1j = full_join(P1, P1IAQ, by="nTime", suffix=c("_P1","_P1IAQ")) %>%
# Make sure joined data frame is sorted by nTime
arrange(nTime) %>%
# Fill missing values with Last One Carried Forward
mutate_at(vars(Time_P1IAQ, RH, Temp, CO2), na.locf)
Plot after converting data from wide to long format:
ggplot(P1j %>% select(Time_P1IAQ, nTime, Skin=SkinTemp, Ambient=Temp) %>%
# Convert from wide to long format for plotting
melt(id.var=c("Time_P1IAQ", "nTime")),
aes(nTime, value, group=Time_P1IAQ)) +
geom_line() +
facet_grid(variable ~ ., scales="free_y") +
scale_y_continuous(expand=c(0.5,0)) +
labs(x="Elapsed Time (sec)", y=expression(Temperature~"("*degree*C*")"))
Another option is to plot temperature changes relative to the start of the experiment. That way, you can have both lines on the same panel without having to deal with them being in different locations:
ggplot(P1j %>% select(Time_P1IAQ, nTime, Skin=SkinTemp, Ambient=Temp) %>%
# Convert from wide to long format for plotting
melt(id.var=c("Time_P1IAQ", "nTime")) %>%
# Convert temperatures to difference from starting values
group_by(variable) %>%
mutate(value = value - value[nTime==min(nTime)]),
aes(nTime, value, colour=variable)) +
geom_line() +
labs(x="Elapsed Time (sec)", y=expression(Temperature~Change~"("*degree*C*")"),
colour="")
I don't have your data but i will prepare something similar... In this case the length of P1 is diferent with P1IAQ:
library(ggplot2)
#I create a sample of your data
P1<-data.frame(1:10,51:60)
P1IAQ<-data.frame(1:8,1:8)
colnames(P1)<-c("Time","Temp")
colnames(P1IAQ)<-c("Time","Temp")
# I cathegory your data for plot
df = data.frame(Time=c(P1$Time,P1IAQ$Time), values=c(P1$Temp,P1IAQ$Temp),type=c(rep("P1",length(P1$Time)),rep("P1IAQ",length(P1IAQ$Time))))
ggplot(data=df, aes(x=Time, y=values, color=type)) +
geom_line() +
facet_grid(type ~ ., scales="free") +
xlab('Time (s)') +
ylab('Temperature ºC')

Colour lattice wireframe from class formula by z data heights

Edit: I've discovered one solution, which is to transform the 2d Temp matrix into a 1D array, re-running the code with just this matrix gave me the output I wanted i.e. plot coloured by height of z data. The code is below:
mycols<-colorRampPalette(c("dodgerblue", "firebrick"), space="rgb")
wireframe(Temp,
zlim=c(10,18),
ylab=list(label="Time", rot=-35), scales=list(arrows=FALSE),
zlab=list(label=expression(paste("Tw (", degree, "C)")), rot=94),
xlab=list(label="Distance downstream (m)", rot=35),
drape = T, shade=F, colorkey = T, aspect = c(1,1),
col.regions=mycols(200), col="black")
I'm using Windows 7, R version 2.15.0 and lattice_0.20-6.
I'm plotting observed data in lattice using wireframe, I have 3 matrices of observed values (Temp,Dist,Time) so I'm using the formula method (Temp~Dist*Time). How do I instruct wireframe to colour the wireframe based on the heights/ values of my z (Temp) data. At present my code produces a wireframe that colours the plot based on the values of my y (Time) data.
I attach below my code.
mycols<-colorRampPalette(c("dodgerblue", "firebrick"), space="rgb")
wireframe(Temp[1:13,97:193]~Dist[1:13,97:193]*Time[1:13,97:193],
zlim=c(10,18),
ylab=list(label="Time", rot=-35), scales=list(arrows=FALSE),
zlab=list(label=expression(paste("Tw (", degree, "C)")), rot=94),
xlab=list(label="Distance downstream (m)", rot=35),
drape = T, shade=F, colorkey = T, aspect = c(1,1),
col.regions=mycols(200), col="black")
Some sample data. Please excuse the naming of matrix columns:
structure(list(Tw1.1. = c(12.15, 11.18526437, 10.51390093, 10.134,
9.711, 9.597, 9.59, 9.557, 9.602, 9.673, 9.753, 10.017, 10.32
), Tw1.1..1 = c(11.97, 11.05071394, 10.39239194, 10.011, 9.63,
9.546, 9.59, 9.571, 9.648, 9.745, 9.837, 10.171, 10.49), Tw1.1..2 = c(11.79,
10.90182264, 10.26796411, 9.893, 9.563, 9.52, 9.6, 9.619, 9.713,
9.808, 9.956, 10.321, 10.6), Tw1.1..3 = c(11.64, 10.74647418,
10.14505213, 9.788, 9.526, 9.525, 9.62, 9.682, 9.787, 9.914,
10.105, 10.42, 10.7), Tw1.1..4 = c(11.52, 10.58632287, 10.01657543,
9.699, 9.514, 9.543, 9.67, 9.743, 9.885, 10.049, 10.249, 10.528,
10.79), Tw1.1..5 = c(11.39, 10.46294559, 9.879615153, 9.619,
9.529, 9.577, 9.74, 9.823, 10.017, 10.21, 10.361, 10.58, 10.84
), Tw1.1..6 = c(11.26, 10.3417786, 9.765186747, 9.576, 9.557,
9.645, 9.81, 9.933, 10.186, 10.344, 10.474, 10.664, 10.94), Tw1.1..7 = c(11.1,
10.22494533, 9.674806064, 9.546, 9.605, 9.717, 9.9, 10.072, 10.338,
10.453, 10.557, 10.76, 11.03), Tw1.1..8 = c(10.93, 10.1003152,
9.604424236, 9.549, 9.676, 9.8, 10, 10.244, 10.436, 10.561, 10.668,
10.848, 11.12), Tw1.1..9 = c(10.76, 9.970098496, 9.545171854,
9.577, 9.757, 9.891, 10.15, 10.399, 10.558, 10.667, 10.778, 10.941,
11.22), Tw1.1..10 = c(10.63, 9.858851801, 9.501869458, 9.611,
9.851, 10.014, 10.31, 10.503, 10.683, 10.789, 10.868, 11.059,
11.34), Tw1.1..11 = c(10.51, 9.770908839, 9.48413064, 9.676,
9.946, 10.164, 10.48, 10.632, 10.8, 10.89, 10.945, 11.152, 11.41
), Tw1.1..12 = c(10.4, 9.702806469, 9.508922546, 9.756, 10.074,
10.332, 10.59, 10.774, 10.905, 10.96, 11.031, 11.213, 11.48)), .Names = c("Tw1.1.",
"Tw1.1..1", "Tw1.1..2", "Tw1.1..3", "Tw1.1..4", "Tw1.1..5", "Tw1.1..6",
"Tw1.1..7", "Tw1.1..8", "Tw1.1..9", "Tw1.1..10", "Tw1.1..11",
"Tw1.1..12"), class = "data.frame", row.names = c("Open", "B8",
"B12", "B25", "B26", "B13", "UsAWS", "B19", "B5", "B3", "B27",
"B17", "DSAWS"))

Resources