Related
I have a data frame with 4000 columns and daily observations sorted by time. I want to create new columns that lag all existing columns 50 times in the past. So for a column Y create 50 additional columns that are Y-1day,Y-2days,Y-3days...Y-50days.
So far I've wrapped the following loop which does what I need to make.
The issue is that it's not very fast. Is there a more efficient way I can test?
for(i in 2:ncol(Data)){
for(j in 1:50){
Data<- slide(Data, Var = names(Data[i]), slideBy = -j)
}}
I'm attaching a snapshot of my data frame for reproducible example:
structure(list(time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100), A = c(17.081545, 16.630901,
16.623749, 16.258942, 16.244635, 16.165951, 15.886981, 15.865522,
15.529327, 15.772532, 16.04435, 15.779685, 15.915594, 15.593705,
15.336195, 15.593705, 15.736767, 15.736767, 15.457797, 15.815451,
16.108727, 16.237482, 15.808297, 16.058655, 16.53791, 16.988556,
16.516453, 16.480686, 16.967096, 17.181688, 17.446352, 17.11731,
16.952789, 16.8598, 16.795422, 16.437769, 16.587982, 16.845493,
17.167381, 17.510729, 17.410587, 17.474964, 17.246065, 17.703863,
17.424892, 17.174536, 17.103004, 16.695278, 16.93133, 16.638054,
16.115879, 16.20887, 15.987124, 16.151646, 16.151646, 16.115879,
16.173105, 16.101574, 16.080114, 15.9299, 15.879828, 15.786839,
15.314735, 15.27897, 15.493563, 15.436337, 15.286123, 15.121602,
15.27897, 14.88555, 14.785408, 14.592275, 14.785408, 14.856938,
14.670959, 15.243204, 15.09299, 15.250358, 15.264664, 15.18598,
14.771102, 14.842632, 15, 15.150214, 15.200286, 15.078684, 15.379113,
15.658083, 15.636623, 15.879828, 15.715307, 15.729613, 15.422031,
16.080114, 16.39485, 16.502146, 16.74535, 16.902718, 17.088697,
16.831188), AAP = c(29.033333, 28.84, 28.893333, 28.866667, 28.700001,
28.799999, 28.973333, 28.866667, 28.806667, 28.973333, 29.713333,
29.033333, 28.626667, 28.546667, 28.173334, 28.166666, 28.24,
28.553333, 28.366667, 28.733334, 28.833334, 28.9, 29.166666,
29.846666, 30.08, 30.093334, 29.673334, 29.860001, 30.053333,
30.186666, 29.833334, 29.673334, 34.533333, 33.82, 33.373333,
33.633335, 33.593334, 33.833332, 33.586666, 33.946667, 34.66,
34.599998, 34.84, 34.779999, 34.093334, 33.713333, 33.560001,
33.933334, 33.086666, 33.139999, 33.279999, 33.200001, 33.259998,
32.466667, 32.713333, 32.686668, 33.053333, 33.806667, 33.333332,
33.613335, 33.633335, 33.799999, 34.206665, 34.5, 34.166668,
34.206665, 33.933334, 34, 34.373333, 33.700001, 33.173332, 32.633335,
32.639999, 34.013332, 33.566666, 34.053333, 34.053333, 34.826668,
35.106667, 35.68, 35.653332, 35.566666, 35.380001, 35.419998,
35.966667, 36.573334, 36.673332, 36.486668, 36.286667, 36.099998,
35.433334, 35.419998, 35.84, 36.533333, 36.779999, 38.98, 39.633335,
39.646667, 39.486668, 39.433334), AAPL = c(4.520714, 4.567143,
4.607143, 4.610714, 4.946429, 4.925714, 4.611429, 4.675714, 4.985714,
5.014286, 5.046429, 4.991428, 5.032857, 5.035, 5.054286, 5.146429,
5.160714, 5.188571, 5.284286, 5.492857, 5.537857, 5.687857, 5.557857,
5.631429, 5.638571, 5.778572, 5.624286, 5.597143, 5.800714, 6.045,
6.315, 6.437857, 6.272143, 6.200714, 6.092143, 6.302143, 6.352143,
6.356429, 6.408571, 6.357143, 6.302857, 5.97, 6.115714, 6.107143,
5.79, 5.621428, 5.69, 5.752857, 5.76, 5.851429, 5.882857, 6.035714,
6.137143, 6.242857, 6.118571, 6.078571, 6.071429, 6.075714, 5.964286,
6.114286, 5.952857, 5.841429, 5.87, 5.984286, 6.047143, 6.222857,
6.248571, 5.988572, 6.094285, 5.862857, 5.322857, 5.05, 5.088572,
5.298572, 5.072857, 5.311429, 5.071429, 5.282857, 5.17, 5.135714,
5.077143, 5.151429, 5.204286, 5.172857, 5.307143, 5.24, 5.32,
5.281428, 5.202857, 5.087143, 4.875714, 4.967143, 5.078571, 5.051429,
5.12, 5.364286, 5.364286, 5.68, 5.671429, 5.682857), ABC = c(14.5375,
14.4225, 14.395, 14.5175, 14.475, 14.475, 14.51, 14.515, 14.275,
14.3175, 14.4875, 14.375, 14.5025, 14.2525, 14.3925, 14.13, 14.47,
14.365, 14.5925, 14.57, 14.74, 14.71, 14.995, 14.9, 14.8625,
15.0325, 14.78, 14.875, 15.085, 15.0525, 15.4275, 15.3075, 14.9225,
15, 14.7025, 14.7975, 15, 15, 14.975, 15.3775, 15.435, 15.5325,
15.6625, 15.6575, 15.695, 15.1275, 15.1025, 15.0775, 15.265,
15.0325, 14.905, 15.1975, 15.215, 15.2025, 15.1025, 15.3775,
15.2775, 13.5075, 13.5275, 13.95, 14.3225, 14.09, 14.4275, 14.735,
14.6475, 14.8, 14.4575, 14.62, 14.7525, 14.7, 14.9, 15.125, 14.83,
14.9525, 14.825, 14.9625, 15, 14.975, 14.9675, 15.0975, 15.0875,
15.32, 15.5125, 15.38, 15.51, 15.575, 15.7475, 15.9975, 15.9175,
15.895, 15.955, 15.98, 16.209999, 16.459999, 16.5725, 16.514999,
16.4925, 16.5, 16.495001, 16.4825), ABMD = c(15.01, 14.98, 14.69,
14.52, 14.29, 14.42, 14.31, 14.17, 12.45, 12.05, 11.87, 11.97,
11.41, 11.16, 11.06, 11.2, 11.1, 11.57, 11.43, 11.88, 11.58,
11.12, 11.16, 11.32, 10.97, 10.88, 10.72, 10.3, 10.75, 10.25,
10.29, 10.41, 10.02, 10.05, 10.08, 10, 10.24, 10.89, 10.7, 10.8,
10.66, 10.71, 11.12, 11.18, 11.2, 10.95, 11.07, 11.12, 11.3,
11.19, 10.83, 10.56, 10.37, 10.47, 10.33, 10.17, 10.51, 10.4,
10.56, 10.74, 10.58, 10.6, 10.57, 10.71, 11.23, 11.28, 11.51,
11.15, 10.98, 10.98, 11.05, 10.76, 10.96, 11.1, 10.62, 11.1,
10.53, 10.69, 10.65, 10.73, 10.15, 10.15, 9.52, 9.6, 9.6, 9.52,
9.47, 9.44, 9.35, 9.27, 9.13, 8.92, 9.26, 9.45, 9.97, 10.25,
10.28, 9.99, 10.16, 10.17), ABT = c(22.392265, 22.166759, 21.912466,
22.40666, 22.790501, 23.011208, 22.588984, 22.517014, 22.085194,
22.19075, 22.089993, 22.09479, 21.95085, 22.061205, 22.037214,
22.027618, 22.018023, 21.811708, 21.720547, 21.600595, 21.854891,
21.898071, 21.907667, 21.840496, 21.874083, 21.725344, 21.667768,
21.581404, 22.166759, 22.305902, 22.488226, 22.469034, 22.339487,
22.26272, 21.802113, 21.946053, 22.243528, 22.200346, 22.066002,
22.051607, 22.099588, 22.075598, 22.267517, 22.382669, 22.310699,
22.02282, 22.209942, 22.070801, 22.128376, 21.907667, 21.792517,
21.365494, 21.336706, 21.048826, 20.996048, 21.39908, 21.562212,
21.677364, 21.95085, 22.430651, 22.368277, 22.161963, 22.157164,
22.646561, 22.843279, 23.19833, 22.963228, 22.91045, 22.98242,
23.049591, 23.169542, 23.927626, 23.500605, 23.111965, 22.69454,
23.078381, 22.824085, 22.920046, 23.001612, 23.255905, 23.073582,
23.586967, 23.692524, 23.634949, 23.850859, 23.601362, 23.519796,
23.543785, 23.438231, 23.634949, 23.567776, 23.395048, 23.735706,
23.706919, 23.678129, 23.529392, 23.452623, 23.366261, 23.351866,
23.145552), ACN = c(26.370001, 25.75, 25.65, 25.42, 26.610001,
26.959999, 26.5, 26.389999, 26.18, 26.290001, 26.1, 26, 25.67,
25.16, 24.9, 25.200001, 25.4, 25.68, 25.6, 26.049999, 25.99,
25.83, 25.48, 25.73, 25.77, 25.85, 25.51, 25.42, 25.200001, 24.639999,
24.9, 25.049999, 24.51, 24.9, 24.799999, 24.709999, 24.48, 25.15,
25.549999, 25.59, 25.42, 25.110001, 25.370001, 25.49, 25.32,
25.17, 24.950001, 24.459999, 24.48, 23.98, 24.030001, 23.950001,
23.66, 24.01, 24.280001, 24.299999, 24.4, 24.57, 24.16, 24.559999,
24.15, 24.440001, 24.35, 24.860001, 24.969999, 24.889999, 23.700001,
23.34, 23.440001, 23.120001, 22.860001, 22.5, 22.57, 22.440001,
21.9, 21.959999, 21.75, 21.85, 21.549999, 21.469999, 21.620001,
21.700001, 21.969999, 22.1, 22.1, 21.82, 22, 22.08, 21.860001,
21.92, 21.99, 22.049999, 22.01, 22.049999, 22.5, 22.790001, 22.719999,
22.76, 22.67, 22.34), ADBE = c(30.844999, 30.030001, 29.865,
29.370001, 29.389999, 29.41, 29.059999, 29.49, 29.110001, 29.115,
29.190001, 28.940001, 29.035, 28.535, 27.695, 27.790001, 28.004999,
28.084999, 27.74, 28.450001, 28.950001, 31.145, 31.709999, 31.995001,
31.76, 31.85, 31.295, 31.34, 31.85, 31.735001, 32.455002, 32.299999,
31.535, 31.415001, 30.754999, 30.875, 30.695, 30.715, 30.875,
31.17, 31.174999, 31.174999, 31.885, 32.535, 32.474998, 32.255001,
32.654999, 32.209999, 32.669998, 32.27, 31.594999, 31.945, 33.904999,
33.349998, 33.18, 33.134998, 33.27, 33.555, 33.110001, 33.865002,
33.584999, 33.380001, 33.290001, 33.424999, 34.049999, 34.195,
33.630001, 33.400002, 33.450001, 32.535, 31.74, 30.33, 27.385,
29.049999, 28.625, 29.77, 30.145, 30.02, 29.559999, 29.225, 29.235001,
29.735001, 28.575001, 28.645, 28.775, 28.459999, 28.85, 29.334999,
28.76, 28.965, 28.889999, 29.049999, 29.955, 29.889999, 30.549999,
31.059999, 31.115, 31.360001, 32.419998, 32.759998), ADI = c(36.389999,
35.400002, 35.560001, 35.5, 35.549999, 35.41, 35.080002, 35.560001,
35.099998, 35.639999, 36.07, 35.139999, 34.650002, 34.470001,
34.049999, 34.299999, 34.880001, 34.830002, 34.740002, 35.889999,
35.990002, 36.009998, 35.240002, 37.52, 37.52, 38.02, 37.18,
36.830002, 38.049999, 37.599998, 37.32, 37.130001, 36.700001,
36.299999, 36.5, 36.59, 37.32, 37.5, 36.720001, 38, 37.709999,
36.93, 37.119999, 37.049999, 36.950001, 36.919998, 37.849998,
37.130001, 37.209999, 36.57, 35.919998, 36.02, 35.830002, 35.709999,
35.830002, 36.23, 35.799999, 35.66, 35.119999, 36.330002, 36.139999,
35.709999, 35.599998, 35.310001, 35.41, 36.09, 35.669998, 35.34,
34.93, 34.099998, 33.650002, 32.84, 33.360001, 33.849998, 33.419998,
34.349998, 33.799999, 33.700001, 33.52, 33.360001, 33.52, 34.110001,
33.849998, 33.669998, 34.560001, 34.619999, 34.619999, 34.549999,
34.130001, 34.060001, 34.310001, 35.490002, 36.419998, 36.700001,
36.860001, 36.889999, 37.080002, 36.529999, 36.849998, 36.290001
)), row.names = c(NA, 100L), class = "data.frame")
We can use shift from data.table which can take a vector of values for n
library(data.table)
setDT(Data)
out <- Data[, shift(.SD, n = 1:50), .SDcols = -1]
names(out) <- paste0(rep(names(Data)[-1], each = 50), "_", 1:50, "days")
Data[, names(out) := out][]
df<-structure(list(BBAS3 = c(22.85, 22.78, 22.8, 22.22, 22.51, 21.11,
20.84, 20.79, 20.67, 20.9, 20.95, 20.7, 21.03, 21.96, 21.9, 21.8,
21.9, 22.49, 22.65, 22.9, 22.19, 22.44, 21.66, 22.5, 22.96, 23.36,
23.64, 23.46, 23.85, 23.74, 23.9, 23.97, 23.95, 23.85, 23.66,
23.52, 23.5, 23.57, 23.28, 23.09, 23.74, 24.09, 23.96, 23.93,
23.07, 23.54, 24.04, 24.82, 24.58, 24.51, 23.88, 23.16, 23.79,
24.61, 25.12, 26, 25.87, 25.44, 25.6, 27.04, 26.8, 27.52, 27.65,
28.36, 28.77, 28.59, 28.63, 28.16, 27.58, 27.49, 27.6, 27.28,
26.95, 27.09, 27.05, 27.29, 26.74, 26.61, 26.04, 26.05, 25.68,
25.8, 25.49, 25.05, 25.33, 25.24, 25.17, 25.16, 25.11, 24.88,
25.44, 24.74, 24.78, 24.97, 25.25, 24.54, 24.64, 24.4, 23.61,
23.24, 23.1, 22.89, 23.29, 23.42, 23.03, 22.57, 22.4, 22.05,
21.88, 22.09, 21.9, 21.47, 21, 20.71, 20.71, 21.13, 20.86, 20.17,
20.46, 20.67, 21.24, 21.29, 21.54, 21.42, 21.06, 20.74, 20.29,
19.56, 19.66, 20.38, 20.93, 21.24, 20.8, 21.05, 20.82, 20.64,
20.73, 20.98, 21.14, 20.89, 20.46, 20.73, 20.56, 20.37, 19.81,
19.27, 19.41, 18.85, 18.73, 18.76, 18.91, 18.63, 19.18, 20.33,
20.31, 19.75, 19.55, 19.5, 19.34, 19.38, 19.41, 20.09, 20.74,
21.09, 21.03, 20.93, 20.5, 21.17, 21.7, 21.49, 22.08, 22.17,
22.7, 23.15, 23.03, 23.45, 24.29, 24.31, 24.2, 24.16, 24.19,
23.8, 24, 23.41, 22.73, 22.29, 22.82, 22.7, 22.96, 22.83, 23,
22.95, 22.01, 22.39, 22.54, 22.7, 22.55, 22.75, 23.25, 23.66,
24.19, 23.99, 24.03, 24.23, 24.93, 25.43, 25.15, 25.61, 25.58,
25.45, 24.92, 24.4, 24.01, 23.88, 24.43, 24.97, 24.96, 24.79,
24.3, 24.07, 23.88, 24.03, 24.21, 24.22, 23.82, 23.47, 23.8,
23.86, 23.77, 24.02, 24.3, 24.39, 23.95, 23.84, 23.59, 23.72,
23.75, 23.33, 23.16, 22.49, 22.5, 22.78, 22.4, 21.52, 21.72,
21.33, 21.61, 21.08, 21.13, 21.09, 20.94, 21.19, 21.31, 21.12,
21.52, 21.61, 21.96, 22.16, 21.88, 22.49, 22.58, 22.71, 22.96,
23.2, 23.74, 23.79, 23.65, 23.85, 23.01, 23.29, 23.65, 23.42,
23.16, 23.03, 23.3, 22.85, 22.67, 22.19, 22.14, 22.32, 22.14,
21.43, 20.35, 19.73, 19.82, 19.98, 20.28, 20.11, 20.11, 19.97,
19.55, 19.63, 19.28, 19.29, 19.15, 19.49, 19.15, 19.15, 19.13,
18.8, 18.28, 18.36, 18.31, 18.28, 18.41, 18.43, 18.07, 18.22,
18.66, 19.53, 20.01, 19.52, 20.08, 18.85, 19.06, 19.04, 18.94,
19.31, 18.94, 19.14, 19.74, 19.74, 19.92, 20.04, 19.83, 20.09,
20.1, 19.52, 19.81, 19.83, 20.44, 21.33, 21.6, 21.83, 21.75,
21.57, 22.44, 22.41, 22.27, 22.98, 23.07, 23.22, 23.01, 22.56,
22.91, 23.05, 22.65, 22.19, 21.71, 21.55, 21.34, 20.82, 20.11,
20.21, 19.93, 20.11, 20.35, 20.69, 20.24, 19.8, 19.87, 20.17,
19.93, 19.99, 20.19, 20.14, 19.8, 19.22, 20.04, 20.68, 20.01,
20.43, 20.14, 20.38, 19.78, 18.89, 18.99, 18.65, 18.82, 18.5,
17.61, 16.47, 16.85, 16.72, 17.04, 17.02, 16.65, 15.57, 15.69,
16.08, 15.93, 15.87, 16.15, 16.63, 17.02, 17.06, 17.06, 16.81,
16.7, 16.05, 16.08, 15.95, 16.34, 17, 17.27, 17.43, 17.5, 16.81,
16.17, 16.01, 16.31, 16.35, 16.35, 16.2, 16.18, 15.85, 16.25,
16.82, 16.86, 16.79, 17.15, 17.37, 17.53, 17.38, 17.29, 18.05,
18.01, 16.98, 16.35, 17.19, 17.3, 17.95, 18.76, 18.77, 18.78,
19.26, 19.22, 19.05, 19, 19.28, 19.82, 19.79, 19.54, 19.36, 19.84,
19.99, 20.26, 20.03, 19.97, 19.65, 20.1, 20.59, 21.16, 20.67,
20.36, 20.24, 20.28, 20.34, 21.62, 21.73)), row.names = c(NA,
-460L), class = c("tbl_df", "tbl", "data.frame"))
With this data frame I create a list of dataframe with the code bellow:
samples_size <- c(9,7,5,3)
list_of_df <- lapply(samples_size,function(i)df %>% slice(1:i))
list_of_df
Now I have this vector bellow which will be my new column to included in each dataframe of the list list_of_df respecting the sample size:
time=seq(samples_size[i],1)
The Final result should be a list of dataframes like this:
list_of_df[[1]]<-cbind(list_of_df[[1]],seq(samples_size[1],1))
list_of_df[[1]]<-cbind(list_of_df[[2]],seq(samples_size[2],1))
list_of_df[[1]]<-cbind(list_of_df[[3]],seq(samples_size[3],1))
list_of_df[[1]]<-cbind(list_of_df[[4]],seq(samples_size[4],1))
And then, after this I would like to put the new column in the first position.
Is it possible to do all of these steps by using the first code above with lapply function :
list_of_df <- lapply(samples_size,function(i)df %>% slice(1:i))
and using dplyr Package??
Yes, and there is no need for dplyr:
lapply(samples_size, function(i) cbind(time = i:1, df[1:i, ]))
For instance, the first element will be
# [[1]]
# time BBAS3
# 1 9 22.85
# 2 8 22.78
# 3 7 22.80
# 4 6 22.22
# 5 5 22.51
# 6 4 21.11
# 7 3 20.84
# 8 2 20.79
# 9 1 20.67
As I was saying in the comment, lapply takes elements of sample_size one by one, in this case they are called i, and the manually defined anonymous function constructs some kind of result. Then all the results are returned a a list (contrary to sapply, which would try to merge the results to, say, a vector). So then this time column is simply i:1, which is a short way to write seq(i, 1), cbind stands for column which, which is what you are after, and by putting time = i:1 we make it as the first column in every intermediate result. Lastly, df[1:i, ] gives the first i rows of df, and, in this case, is a simple base R analogue of slice.
This is my code:
My dataframe is:
df<-structure(list(BBAS3 = c(22.85, 22.78, 22.8, 22.22, 22.51, 21.11,
20.84, 20.79, 20.67, 20.9, 20.95, 20.7, 21.03, 21.96, 21.9, 21.8,
21.9, 22.49, 22.65, 22.9, 22.19, 22.44, 21.66, 22.5, 22.96, 23.36,
23.64, 23.46, 23.85, 23.74, 23.9, 23.97, 23.95, 23.85, 23.66,
23.52, 23.5, 23.57, 23.28, 23.09, 23.74, 24.09, 23.96, 23.93,
23.07, 23.54, 24.04, 24.82, 24.58, 24.51, 23.88, 23.16, 23.79,
24.61, 25.12, 26, 25.87, 25.44, 25.6, 27.04, 26.8, 27.52, 27.65,
28.36, 28.77, 28.59, 28.63, 28.16, 27.58, 27.49, 27.6, 27.28,
26.95, 27.09, 27.05, 27.29, 26.74, 26.61, 26.04, 26.05, 25.68,
25.8, 25.49, 25.05, 25.33, 25.24, 25.17, 25.16, 25.11, 24.88,
25.44, 24.74, 24.78, 24.97, 25.25, 24.54, 24.64, 24.4, 23.61,
23.24, 23.1, 22.89, 23.29, 23.42, 23.03, 22.57, 22.4, 22.05,
21.88, 22.09, 21.9, 21.47, 21, 20.71, 20.71, 21.13, 20.86, 20.17,
20.46, 20.67, 21.24, 21.29, 21.54, 21.42, 21.06, 20.74, 20.29,
19.56, 19.66, 20.38, 20.93, 21.24, 20.8, 21.05, 20.82, 20.64,
20.73, 20.98, 21.14, 20.89, 20.46, 20.73, 20.56, 20.37, 19.81,
19.27, 19.41, 18.85, 18.73, 18.76, 18.91, 18.63, 19.18, 20.33,
20.31, 19.75, 19.55, 19.5, 19.34, 19.38, 19.41, 20.09, 20.74,
21.09, 21.03, 20.93, 20.5, 21.17, 21.7, 21.49, 22.08, 22.17,
22.7, 23.15, 23.03, 23.45, 24.29, 24.31, 24.2, 24.16, 24.19,
23.8, 24, 23.41, 22.73, 22.29, 22.82, 22.7, 22.96, 22.83, 23,
22.95, 22.01, 22.39, 22.54, 22.7, 22.55, 22.75, 23.25, 23.66,
24.19, 23.99, 24.03, 24.23, 24.93, 25.43, 25.15, 25.61, 25.58,
25.45, 24.92, 24.4, 24.01, 23.88, 24.43, 24.97, 24.96, 24.79,
24.3, 24.07, 23.88, 24.03, 24.21, 24.22, 23.82, 23.47, 23.8,
23.86, 23.77, 24.02, 24.3, 24.39, 23.95, 23.84, 23.59, 23.72,
23.75, 23.33, 23.16, 22.49, 22.5, 22.78, 22.4, 21.52, 21.72,
21.33, 21.61, 21.08, 21.13, 21.09, 20.94, 21.19, 21.31, 21.12,
21.52, 21.61, 21.96, 22.16, 21.88, 22.49, 22.58, 22.71, 22.96,
23.2, 23.74, 23.79, 23.65, 23.85, 23.01, 23.29, 23.65, 23.42,
23.16, 23.03, 23.3, 22.85, 22.67, 22.19, 22.14, 22.32, 22.14,
21.43, 20.35, 19.73, 19.82, 19.98, 20.28, 20.11, 20.11, 19.97,
19.55, 19.63, 19.28, 19.29, 19.15, 19.49, 19.15, 19.15, 19.13,
18.8, 18.28, 18.36, 18.31, 18.28, 18.41, 18.43, 18.07, 18.22,
18.66, 19.53, 20.01, 19.52, 20.08, 18.85, 19.06, 19.04, 18.94,
19.31, 18.94, 19.14, 19.74, 19.74, 19.92, 20.04, 19.83, 20.09,
20.1, 19.52, 19.81, 19.83, 20.44, 21.33, 21.6, 21.83, 21.75,
21.57, 22.44, 22.41, 22.27, 22.98, 23.07, 23.22, 23.01, 22.56,
22.91, 23.05, 22.65, 22.19, 21.71, 21.55, 21.34, 20.82, 20.11,
20.21, 19.93, 20.11, 20.35, 20.69, 20.24, 19.8, 19.87, 20.17,
19.93, 19.99, 20.19, 20.14, 19.8, 19.22, 20.04, 20.68, 20.01,
20.43, 20.14, 20.38, 19.78, 18.89, 18.99, 18.65, 18.82, 18.5,
17.61, 16.47, 16.85, 16.72, 17.04, 17.02, 16.65, 15.57, 15.69,
16.08, 15.93, 15.87, 16.15, 16.63, 17.02, 17.06, 17.06, 16.81,
16.7, 16.05, 16.08, 15.95, 16.34, 17, 17.27, 17.43, 17.5, 16.81,
16.17, 16.01, 16.31, 16.35, 16.35, 16.2, 16.18, 15.85, 16.25,
16.82, 16.86, 16.79, 17.15, 17.37, 17.53, 17.38, 17.29, 18.05,
18.01, 16.98, 16.35, 17.19, 17.3, 17.95, 18.76, 18.77, 18.78,
19.26, 19.22, 19.05, 19, 19.28, 19.82, 19.79, 19.54, 19.36, 19.84,
19.99, 20.26, 20.03, 19.97, 19.65, 20.1, 20.59, 21.16, 20.67,
20.36, 20.24, 20.28, 20.34, 21.62, 21.73)), row.names = c(NA,
-460L), class = c("tbl_df", "tbl", "data.frame"))
I want to create dataframes and put them in a list. The condition to create these dataframes will respect the size of the sample:
I would like to do this with dplyr package.
I did this with a for loop:
samples_size <- c(9,7,5,3)
my_samples <- vector(mode = "list", length(samples_size))
for(i in 1:length(samples_size)){
my_samples[[i]]<-df[1:samples_size[i],]
}
my_samples
How can I do this using dplyr package?
Any help?
Thanks
Using lapply and dplyr
samples_size <- c(9,7,5,3)
list_of_df <- lapply(samples_size,function(i)df %>% sample_n(i))
list_of_df
I have a large data set that consists of thousands of measurements of length and weight. I have provided a subset of 500 observations here:
df <- structure(list(length_cm = c(24.7, 23.8, 21.9, 23.2, 23.5, 22.2,
20.5, 22.6, 24, 21.6, 22.4, 21.2, 20.6, 23.1, 21.4, 23.1, 23.5,
23, 21.8, 22.4, 23, 23.8, 24, 21, 23.4, 23.2, 21.6, 25.9, 22.1,
30.6, 22.1, 21.7, 23.2, 21.1, 23.8, 23.2, 27.2, 23.8, 21.6, 21.1,
21.7, 22.9, 23.3, 24.1, 22.7, 20.4, 22.5, 21.7, 23.2, 22.7, 20.6,
23.7, 24.6, 23.5, 26.3, 23.6, 22.2, 23.6, 21.4, 23.3, 24.7, 24.4,
21.8, 24.9, 22.2, 23.1, 25, 23.5, 22.5, 20.4, 23.9, 23.7, 24,
24.2, 22.9, 36.4, 30, 26, 28.5, 27, 35.7, 24.3, 28.6, 29.8, 18.7,
25.7, 34.7, 31.4, 23.4, 37.7, 26.7, 28.3, 30.8, 29.2, 27.2, 25.6,
39, 35.1, 41.2, 35.7, 29.9, 25.7, 24.6, 24, 24.9, 31, 29.9, 29.4,
25.4, 20.2, 27.8, 32.7, 23.4, 29.1, 26.3, 25.7, 26, 24.9, 26.3,
31.5, 30.1, 25.9, 28.8, 37.9, 38.4, 21.5, 20.5, 21.3, 21.3, 20.9,
20.8, 22.5, 22.4, 21.4, 16.8, 17.3, 22.7, 19.7, 21.2, 18.1, 23.5,
18.1, 22, 18.5, 18.4, 19.2, 19.4, 19.9, 20.5, 18.6, 22.6, 20.9,
20.7, 20.6, 20.6, 21.6, 23.7, 22.8, 22.9, 20.8, 21.3, 23.5, 21.1,
21.6, 24, 21, 23.3, 20.3, 22.4, 23.7, 24.6, 20.7, 23.1, 22.6,
22.7, 19.5, 23, 19.8, 21, 19.8, 19.8, 17.2, 21.8, 25.3, 21.3,
19.2, 22.1, 24.5, 23.2, 22.6, 19, 22, 17.5, 19.9, 24.4, 23.7,
19.9, 23, 20.5, 18.3, 23.2, 21.1, 20.4, 22.2, 19.7, 19.2, 24,
23.3, 23.3, 19, 21.5, 22, 19.1, 23.7, 19.9, 21.2, 23, 27.3, 20.7,
22, 19.3, 24.9, 18.2, 20, 19.3, 25, 18, 21.8, 23.4, 23.9, 25.2,
18.5, 22.2, 24.6, 22, 20.4, 20.7, 21.7, 19.1, 23.1, 21.5, 21.2,
20.6, 22.3, 22.8, 21.3, 21.6, 22, 23, 24.2, 21.3, 19.7, 18.8,
20.9, 20.3, 22.3, 18.9, 19.9, 20.2, 23.9, 19.7, 19.5, 17.6, 23.1,
20.4, 20, 19.7, 20.3, 21.2, 23.9, 24, 25.6, 23.9, 23.5, 20.5,
30.8, 32.8, 28.4, 28.7, 28, 28.9, 29.8, 31, 31.7, 28.6, 28.7,
28.7, 26.7, 24.6, 30, 36.5, 26.5, 32, 29.6, 30.7, 27.7, 24.1,
29.8, 28.8, 26, 22.4, 24, 24.8, 22.7, 22.7, 23.8, 25.3, 32.3,
26.8, 22.1, 24.2, 23.8, 25.3, 24.1, 22.6, 22.9, 24.4, 26.7, 24.4,
24.7, 25, 23.7, 24.3, 22.3, 22.7, 20, 22.5, 24.5, 25.1, 24, 22,
20, 21.9, 18.3, 19.9, 19.4, 23.5, 20.2, 20, 17.8, 20.5, 23.2,
18.5, 21.2, 18.2, 19.1, 22.1, 18.3, 21.6, 19.5, 22.7, 23.6, 24.6,
23.2, 24.4, 19.1, 22.8, 23, 18.8, 22.6, 19, 21.7, 20.8, 23.7,
20.8, 20, 23.2, 22, 21.4, 20.6, 22.6, 23.8, 21, 26.4, 24.5, 32.6,
36.1, 36, 31, 33.1, 31.3, 34.2, 41.9, 35.4, 33.9, 31.9, 29.3,
34.2, 29.9, 36.4, 38.5, 30.7, 40.2, 34.1, 29.7, 37.8, 37.8, 35.3,
39, 39.5, 34.1, 30.5, 33.3, 33.2, 36, 31.6, 35, 34.2, 33.1, 31.5,
33.5, 33.7, 39, 33.2, 35, 34.1, 32.6, 36.2, 34.4, 31.7, 32, 37.5,
31.5, 32.7, 31.7, 35.7, 32.4, 28.5, 33.7, 33.9, 33.6, 34, 32,
29.8, 35, 36, 31.7, 32.5, 32, 31, 29.5, 33.4, 32.5, 26.5, 28,
35.3, 26, 26.5, 38.9, 32.7, 36.4, 35.7, 27.7, 25.8, 25.3, 30.1,
36, 33.4, 37, 33.6, 31.7, 29.7, 35.9, 28.5, 33.1, 33.9, 29, 36.5,
35.5, 29.2, 37.3, 40.3, 35.7, 32.6, 38.8, 40, 38.9, 39, 33.3,
33.5, 34.3, 38.8, 34.4, 36, 35.9, 35.1, 30.7, 38.1, 31.3, 35,
36.3, 32.4, 32.3, 35.5, 36.4, 36, 40.8, 34.2, 30.1, 35.6), wt_kg = c(0.165,
0.1412, 0.1043, 0.1225, 0.1247, 0.1099, 0.087, 0.1176, 0.1431,
0.1041, 0.1213, 0.0937, 0.0856, 0.1255, 0.1099, 0.124, 0.1361,
0.1384, 0.1021, 0.1113, 0.12, 0.1513, 0.1448, 0.0978, 0.138,
0.1232, 0.0942, 0.1881, 0.1038, 0.3498, 0.1122, 0.094, 0.1268,
0.1009, 0.1358, 0.12, 0.2388, 0.1456, 0.0982, 0.0903, 0.1005,
0.1252, 0.1138, 0.1476, 0.1326, 0.0849, 0.108, 0.0996, 0.1229,
0.1279, 0.0874, 0.1492, 0.1416, 0.1187, 0.193, 0.1383, 0.1125,
0.1449, 0.0941, 0.1265, 0.1823, 0.1455, 0.0948, 0.1603, 0.1119,
0.1124, 0.1641, 0.1259, 0.116, 0.086, 0.1361, 0.1284, 0.1403,
0.1461, 0.1195, 0.5985, 0.3099, 0.1829, 0.2688, 0.2244, 0.6214,
0.1554, 0.2475, 0.2976, 0.0683, 0.1731, 0.4751, 0.356, 0.1388,
0.5939, 0.2122, 0.2784, 0.3689, 0.3127, 0.2284, 0.1775, 0.6697,
0.5998, 0.8374, 0.5647, 0.3187, 0.1704, 0.1619, 0.1413, 0.1621,
0.3577, 0.319, 0.2846, 0.1815, 0.0776, 0.2567, 0.4483, 0.1337,
0.2798, 0.202, 0.1847, 0.1758, 0.1659, 0.1828, 0.3669, 0.3211,
0.1863, 0.2559, 0.6901, 0.6483, 0.0922, 0.088, 0.099, 0.0836,
0.094, 0.099, 0.1157, 0.1138, 0.1046, 0.0495, 0.0513, 0.119,
0.0761, 0.0936, 0.0564, 0.1438, 0.0636, 0.1134, 0.0641, 0.0594,
0.0713, 0.0733, 0.0804, 0.0853, 0.0689, 0.118, 0.0892, 0.0875,
0.0837, 0.0807, 0.1065, 0.1385, 0.1163, 0.1305, 0.0923, 0.0974,
0.1176, 0.0848, 0.1059, 0.157, 0.0932, 0.1127, 0.0779, 0.1048,
0.1327, 0.1688, 0.1096, 0.1304, 0.1173, 0.115, 0.0742, 0.129,
0.0629, 0.0992, 0.0758, 0.0722, 0.0535, 0.0958, 0.1721, 0.1017,
0.0766, 0.1099, 0.152, 0.128, 0.1185, 0.065, 0.1176, 0.0565,
0.0866, 0.163, 0.12, 0.0825, 0.1149, 0.0839, 0.0587, 0.1335,
0.0968, 0.0901, 0.1073, 0.0802, 0.0744, 0.1493, 0.1384, 0.1128,
0.0738, 0.1146, 0.1108, 0.08, 0.1285, 0.0829, 0.1116, 0.1368,
0.2348, 0.0995, 0.0989, 0.0748, 0.1484, 0.0629, 0.0823, 0.075,
0.1768, 0.0607, 0.1142, 0.1289, 0.1506, 0.1742, 0.0626, 0.1187,
0.1509, 0.1144, 0.0928, 0.0946, 0.099, 0.0717, 0.1318, 0.1025,
0.093, 0.0972, 0.1325, 0.1209, 0.0943, 0.1006, 0.1073, 0.1336,
0.1439, 0.1066, 0.0765, 0.0673, 0.1082, 0.0923, 0.1139, 0.068,
0.0758, 0.0868, 0.1499, 0.0779, 0.0794, 0.0575, 0.1392, 0.0915,
0.0845, 0.086, 0.084, 0.1049, 0.1486, 0.1573, 0.177, 0.1319,
0.13, 0.0872, 0.388, 0.4751, 0.2898, 0.2931, 0.2663, 0.2838,
0.3494, 0.3675, 0.4342, 0.2907, 0.3072, 0.2815, 0.2761, 0.1945,
0.3512, 0.615, 0.2195, 0.4818, 0.3684, 0.4056, 0.2841, 0.1617,
0.3425, 0.288, 0.1962, 0.1285, 0.1553, 0.1708, 0.1332, 0.1167,
0.1491, 0.2028, 0.1267, 0.2406, 0.1257, 0.1499, 0.1559, 0.1895,
0.1508, 0.1111, 0.1274, 0.1675, 0.2324, 0.1732, 0.1491, 0.1568,
0.1465, 0.1548, 0.1245, 0.1399, 0.0855, 0.1151, 0.1612, 0.1693,
0.1493, 0.1208, 0.088, 0.1106, 0.0654, 0.0827, 0.0794, 0.1331,
0.0834, 0.0837, 0.0619, 0.092, 0.1397, 0.071, 0.1035, 0.0676,
0.0729, 0.0906, 0.064, 0.0985, 0.0823, 0.1206, 0.155, 0.1438,
0.1357, 0.1695, 0.0834, 0.1359, 0.1289, 0.0764, 0.1249, 0.0775,
0.1139, 0.104, 0.1566, 0.1069, 0.0869, 0.1376, 0.1223, 0.105,
0.0996, 0.1356, 0.1335, 0.0951, 0.2162, 0.1744, 0.4547, 0.5789,
0.5555, 0.3899, 0.5037, 0.4281, 0.486, 1.0209, 0.5855, 0.5312,
0.488, 0.3133, 0.5054, 0.3724, 0.59, 0.8119, 0.3811, 0.797, 0.5139,
0.348, 0.7722, 0.743, 0.548, 0.8791, 0.9054, 0.5392, 0.4333,
0.5314, 0.4976, 0.5953, 0.4288, 0.5179, 0.5634, 0.5331, 0.4371,
0.5709, 0.5065, 0.8047, 0.5368, 0.5657, 0.5816, 0.4763, 0.5907,
0.533, 0.4384, 0.4949, 0.7277, 0.4445, 0.4894, 0.4655, 0.5384,
0.5106, 0.3343, 0.5186, 0.5262, 0.5311, 0.495, 0.4691, 0.3465,
0.5558, 0.5975, 0.4768, 0.4802, 0.4573, 0.4037, 0.3316, 0.5152,
0.4673, 0.2356, 0.2905, 0.5672, 0.2097, 0.2216, 0.7384, 0.4089,
0.6159, 0.5219, 0.2866, 0.2443, 0.2071, 0.3658, 0.5861, 0.5021,
0.6953, 0.5053, 0.3978, 0.3853, 0.6207, 0.2944, 0.507, 0.4412,
0.3424, 0.6597, 0.5892, 0.3295, 0.6505, 0.9334, 0.6674, 0.4919,
0.8392, 0.9123, 0.813, 0.8223, 0.5801, 0.5745, 0.5148, 0.8514,
0.5563, 0.6417, 0.6445, 0.5701, 0.4186, 0.8303, 0.46, 0.6041,
0.6537, 0.5221, 0.4782, 0.5657, 0.6499, 0.6667, 0.9074, 0.555,
0.6696, 0.6083)), .Names = c("length_cm", "wt_kg"), row.names = c(NA,
500L), class = "data.frame")
The relationship between length and weight is not linear. Unfortunately I could not include the whole data set here but when the whole data set is used a gam provides the best fit, unlike in this subset where loess is suggested.
I would like to focus on gam since an answer that works for the whole data set is what I am after.
It is obvious, even in the subset provided, that my data has some outliers, in the example data set (df) there are at least two obvious outliers.
library(ggplot2)
ggplot(df, aes(x=wt_kg, y=length_cm))+
geom_point()+
stat_smooth(method = "gam", formula = y ~ s(x), size = 1)
Moving forward with a gam approach I would like to generate the prediction interval so that I can identify which points fall in and out of say the 95% prediction interval.
This is extremely simple to do with a linear regression using predict:
l_model <- lm(wt_kg ~ length_cm, data=df)
df <- cbind(df, predict(l_model, interval = "prediction"))
Then simply plotting the upper and lower bounds of the interval
ggplot(df, aes(y=wt_kg, x=length_cm)) +
geom_ribbon(aes(ymin = lwr, ymax = upr),
fill = "blue", alpha = 0.2) +
geom_point()
But I can't seem to find a similar approach that works when using gam instead of lm. I have tried predict.gam from the mgcv package with no success.
library(mgcv)
df_model <- gam(wt_kg ~ length_cm, data=df)
gam_pred <- cbind(df, mgcv::predict.gam(df_model))
I don't get any errors when running this however what i get back is a single col of data which I am unsure how to interpret. Any help would be much appreciated.
I think that part of your code is:
require(broom)
require(gam)
mod <- gam(wt_kg ~ length_cm, data=df)
pred <- augment(mod)
But i dont understand the second ggplot2. "Pred" has the fitted value and others features about your regression, mainly .resid
I have a data frame with Lat Lon mean_wind and wind_dir in each grid cells.
I am trying to make a spatial plot with mean wind in background and wind direction as arrow on each grid cells.
I have tried following on sample data-frame wind.dt
win.plt<- ggplot(wind.dt,aes(x=Lon,y=Lat))+
#Mean wind plot : OK
geom_tile(aes(fill=mean_wind),alpha=1)+
geom_tile(aes(color=mean_wind), fill=NA) +
scale_fill_gradientn(colours=(brewer.pal(9,rev("RdYlGn"))))+
scale_color_gradientn(colours=(brewer.pal(9,rev("RdYlGn"))),guide=F)
#Wind Direction : doesnot work
geom_segment(arrow = arrow(),aes(yend = Lon + wind_dir, xend = Lat + wind_dir))
win.plt
wind.dt<-structure(list(Lon = c(170.25, 171, 171.75, 172.5, 173.25, 174,
174.75, 175.5, 176.25, 177, 177.75, 178.5, 179.25, 180, 180.75,
181.5, 182.25, 183, 183.75, 184.5, 185.25, 186, 186.75, 187.5,
188.25, 189, 189.75, 190.5, 191.25, 192, 192.75, 193.5, 194.25,
170.25, 171, 171.75, 172.5, 173.25, 174, 174.75, 175.5, 176.25,
177, 177.75, 178.5, 179.25, 180, 180.75, 181.5, 182.25, 183,
183.75, 184.5, 185.25, 186, 186.75, 187.5, 188.25, 189, 189.75,
190.5, 191.25, 192, 192.75, 193.5, 194.25, 170.25, 171, 171.75,
172.5, 173.25, 174, 174.75, 175.5, 176.25, 177, 177.75, 178.5,
179.25, 180, 180.75, 181.5, 182.25, 183, 183.75, 184.5, 185.25,
186, 186.75, 187.5, 188.25, 189, 189.75, 190.5, 191.25, 192,
192.75, 193.5, 194.25, 170.25, 171, 171.75, 172.5, 173.25, 174,
174.75, 175.5, 176.25, 177, 177.75, 178.5, 179.25, 180, 180.75,
181.5, 182.25, 183, 183.75, 184.5, 185.25, 186, 186.75, 187.5,
188.25, 189, 189.75, 190.5, 191.25, 192, 192.75, 193.5, 194.25,
170.25, 171, 171.75, 172.5, 173.25, 174, 174.75, 175.5, 176.25,
177, 177.75, 178.5, 179.25, 180, 180.75, 181.5, 182.25, 183,
183.75, 184.5, 185.25, 186, 186.75, 187.5, 188.25, 189, 189.75,
190.5, 191.25, 192, 192.75, 193.5, 194.25, 170.25, 171, 171.75,
172.5, 173.25, 174, 174.75, 175.5, 176.25, 177, 177.75, 178.5,
179.25, 180, 180.75, 181.5, 182.25, 183, 183.75, 184.5, 185.25,
186, 186.75, 187.5, 188.25, 189, 189.75, 190.5, 191.25, 192,
192.75, 193.5, 194.25), Lat = c(14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5), mean_wind = c(8.34, 8.33,
8.31, 8.29, 8.27, 8.24, 8.22, 8.2, 8.19, 8.16, 8.14, 8.13, 8.1,
8.08, 8.06, 8.02, 7.99, 7.96, 7.93, 7.89, 7.85, 7.81, 7.78, 7.73,
7.7, 7.67, 7.63, 7.62, 7.6, 7.58, 7.56, 7.53, 7.54, 8.65, 8.64,
8.61, 8.59, 8.56, 8.53, 8.51, 8.48, 8.46, 8.43, 8.41, 8.39, 8.38,
8.37, 8.33, 8.31, 8.28, 8.24, 8.2, 8.15, 8.12, 8.07, 8.03, 8.01,
7.97, 7.94, 7.92, 7.89, 7.87, 7.85, 7.85, 7.83, 7.8, 8.85, 8.84,
8.81, 8.8, 8.77, 8.74, 8.72, 8.69, 8.67, 8.65, 8.63, 8.61, 8.59,
8.58, 8.55, 8.54, 8.5, 8.46, 8.44, 8.4, 8.37, 8.33, 8.29, 8.26,
8.21, 8.18, 8.16, 8.13, 8.12, 8.09, 8.06, 8.06, 8.03, 9.01, 8.99,
8.96, 8.94, 8.91, 8.89, 8.86, 8.83, 8.82, 8.79, 8.78, 8.77, 8.75,
8.75, 8.73, 8.7, 8.68, 8.66, 8.63, 8.59, 8.55, 8.52, 8.47, 8.43,
8.4, 8.38, 8.35, 8.32, 8.31, 8.29, 8.26, 8.25, 8.23, 9.07, 9.06,
9.04, 9.01, 8.99, 8.97, 8.94, 8.92, 8.91, 8.9, 8.89, 8.88, 8.88,
8.87, 8.86, 8.84, 8.83, 8.8, 8.75, 8.74, 8.7, 8.67, 8.63, 8.59,
8.57, 8.53, 8.52, 8.51, 8.47, 8.47, 8.45, 8.42, 8.41, 9.1, 9.08,
9.06, 9.04, 9.02, 9, 8.98, 8.97, 8.96, 8.96, 8.95, 8.95, 8.97,
8.96, 8.96, 8.94, 8.91, 8.89, 8.86, 8.84, 8.8, 8.76, 8.73, 8.69,
8.67, 8.64, 8.63, 8.63, 8.61, 8.59, 8.57, 8.54, 8.53), wind_dir = c(81.27,
81.34, 81.38, 81.44, 81.47, 81.34, 81.31, 81.51, 81.56, 81.46,
81.54, 81.53, 81.42, 81.53, 81.66, 81.76, 81.86, 81.96, 82.02,
82.28, 82.65, 82.77, 83.07, 83.46, 83.78, 84.15, 84.52, 84.92,
85.39, 85.87, 86.15, 86.38, 86.53, 81.34, 81.34, 81.38, 81.31,
81.2, 81.25, 81.39, 81.36, 81.31, 81.4, 81.47, 81.48, 81.59,
81.64, 81.58, 81.62, 81.75, 81.98, 82.13, 82.26, 82.52, 82.77,
82.97, 83.15, 83.49, 83.74, 84.23, 84.78, 85.04, 85.49, 85.73,
86.05, 86.35, 81.5, 81.41, 81.32, 81.28, 81.32, 81.31, 81.24,
81.17, 81.28, 81.33, 81.24, 81.3, 81.44, 81.46, 81.55, 81.76,
81.8, 81.88, 82.11, 82.31, 82.4, 82.61, 82.88, 82.95, 83.29,
83.59, 83.93, 84.46, 84.8, 85.26, 85.47, 85.78, 86.11, 81.3,
81.29, 81.29, 81.28, 81.32, 81.22, 81.24, 81.32, 81.31, 81.23,
81.34, 81.47, 81.37, 81.42, 81.5, 81.6, 81.78, 81.98, 82.06,
82.26, 82.49, 82.52, 82.7, 82.79, 83.05, 83.46, 83.79, 84.18,
84.5, 84.91, 85.23, 85.49, 85.7, 81.31, 81.33, 81.28, 81.19,
81.26, 81.29, 81.36, 81.24, 81.16, 81.18, 81.23, 81.23, 81.23,
81.47, 81.5, 81.55, 81.73, 81.99, 82.14, 82.18, 82.41, 82.46,
82.63, 82.83, 82.97, 83.27, 83.62, 84.01, 84.34, 84.64, 85.01,
85.38, 85.55, 81.14, 81.14, 81.1, 81.15, 81.2, 81.1, 81.14, 81.06,
81.21, 81.26, 81.13, 81.16, 81.17, 81.22, 81.28, 81.63, 81.71,
81.77, 82.13, 82.22, 82.37, 82.48, 82.56, 82.7, 82.92, 83.19,
83.43, 83.74, 84.15, 84.59, 84.89, 85.22, 85.39)), row.names = c(NA,
-198L), .Names = c("Lon", "Lat", "mean_wind", "wind_dir"), class = c("tbl_df",
"tbl", "data.frame"))
geom_spoke was made for this particular sort of plot. Cleaned up a little,
library(ggplot2)
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir,
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
Adjust scaling and sizes as desired.
Edit: Controlling the number of arrows
To adjust the number of arrows, a quick-and-dirty route is to subset one of the aesthetics passed to geom_spoke with a recycling vector that will cause some rows to be dropped, e.g.
library(ggplot2)
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir[c(TRUE, NA, NA, NA, NA)], # causes some values not to plot
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
#> Warning: Removed 158 rows containing missing values (geom_spoke).
This depends on your data frame being in order and is not infinitely flexible, but if it gets you a nice plot with minimal effort, can be useless nonetheless.
A more robust approach is to make a subsetted data frame for use by geom_spoke, say, selecting every other value of Lon and Lat, here using recycling subsetting on a vector of distinct values:
library(dplyr)
wind.arrows <- wind.dt %>%
filter(Lon %in% sort(unique(Lon))[c(TRUE, FALSE)],
Lat %in% sort(unique(Lat))[c(TRUE, FALSE)])
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir,
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(data = wind.arrows, # this is the only difference in the plotting code
arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
This approach makes getting (and scaling) a grid fairly easy, but getting a diamond pattern will take a bit more logic:
wind.arrows <- wind.dt %>%
filter(( Lon %in% sort(unique(Lon))[c(TRUE, FALSE)] &
Lat %in% sort(unique(Lat))[c(TRUE, FALSE)] ) |
( Lon %in% sort(unique(Lon))[c(FALSE, TRUE)] &
Lat %in% sort(unique(Lat))[c(FALSE, TRUE)] ))