Subsetting data through a pairwise condition (coordinates) given through another matrix - r

I have the following problem. I want to subset data from data.frame A through the coordinates in matrix B. The matching must be pairwise between the coordinates of data.frame A and matrix B, for instance:
lon lat
315 10.47 52.26
342 10.47 52.37
314 10.36 52.26
341 10.36 52.37
316 10.58 52.26
288 10.47 52.15
343 10.58 52.37
287 10.36 52.15
369 10.47 52.48
Those are the coordinates, I want to use to select the rows, which have the same coordinates as in data.frame A.
Subset of data.frame A:
structure(list(V13138 = c(-15.0545539855957, -15.0118608474731,
-14.8698711395264, -14.7185792922974, -14.5449771881104, -14.3298683166504,
-14.093412399292, -13.8265686035156, -13.5609474182129, -13.2979116439819,
-12.9872589111328, -16.1379947662354, -16.0786437988281, -16.0000076293945,
-15.9013233184814, -15.8254327774048, -15.7928791046143, -15.7776985168457,
-15.7719392776489, -15.6555442810059, -15.5040102005005, -15.299674987793,
-15.1203699111938, -14.9610414505005, -14.9183511734009, -14.8809566497803,
-14.8522186279297, -14.7452983856201, -14.6159820556641, -14.4459781646729,
-14.2792196273804, -14.1135988235474, -13.919620513916, -13.7100219726562,
-13.4647169113159, -13.2096605300903, -12.9506988525391, -12.6352672576904,
-16.0543613433838, -15.9694194793701, -15.8757200241089, -15.7634477615356,
-15.6610631942749, -15.5705652236938, -15.4900894165039, -15.4035835266113,
-15.2854766845703, -15.1539916992188, -15.0065212249756, -14.8876695632935,
-14.7782440185547, -14.7028961181641, -14.6101722717285, -14.512882232666,
-14.3619556427002, -14.2041110992432, -14.0312938690186, -13.8672027587891,
-13.7057323455811, -13.5090990066528, -13.2931470870972, -13.0507898330688,
-12.7776670455933, -12.495795249939, -12.1937398910522, -15.8203887939453,
-15.7082033157349, -15.5987091064453, -15.4917774200439, -15.390435218811,
-15.2902202606201, -15.1883487701416, -15.0730466842651, -14.9352264404297,
-14.8101224899292, -14.6887359619141, -14.5857553482056, -14.479567527771,
-14.3734302520752, -14.2455368041992, -14.1091232299805, -13.9528331756592,
-13.7913122177124, -13.6249303817749), V13139 = c(-7.07704830169678,
-7.36577892303467, -7.60899782180786, -7.85753965377808, -8.1030740737915,
-8.29149341583252, -8.45194625854492, -8.52234935760498, -8.58086585998535,
-8.62695598602295, -8.63118934631348, -3.46103024482727, -3.60997128486633,
-3.75486493110657, -3.90544772148132, -4.07107162475586, -4.2713623046875,
-4.53771257400513, -4.8378758430481, -5.18377256393433, -5.52214574813843,
-5.85517549514771, -6.14670562744141, -6.42839002609253, -6.70926380157471,
-6.97871208190918, -7.23635053634644, -7.43477869033813, -7.61982440948486,
-7.77778148651123, -7.9507007598877, -8.140061378479, -8.27810287475586,
-8.39971256256104, -8.4821662902832, -8.54337215423584, -8.59362697601318,
-8.6127290725708, -3.92732691764832, -4.10400390625, -4.28167676925659,
-4.4712872505188, -4.68312835693359, -4.91524791717529, -5.20708131790161,
-5.51510334014893, -5.85246753692627, -6.17613887786865, -6.49274349212646,
-6.75846433639526, -7.00491952896118, -7.21479940414429, -7.39973735809326,
-7.57557010650635, -7.69487333297729, -7.81140756607056, -7.91438579559326,
-8.01808547973633, -8.12346649169922, -8.21732807159424, -8.30572509765625,
-8.38486099243164, -8.45881938934326, -8.52587699890137, -8.57262134552002,
-4.34183073043823, -4.54555749893188, -4.76105737686157, -4.99724388122559,
-5.26278305053711, -5.53893136978149, -5.85093879699707, -6.16052055358887,
-6.46362257003784, -6.73704147338867, -6.99943161010742, -7.20827674865723,
-7.38761377334595, -7.53203630447388, -7.64012908935547, -7.74198341369629,
-7.82141494750977, -7.89632749557495, -7.96634721755981), V13140 = c(2.38613152503967,
2.37324142456055, 2.38662815093994, 2.38441777229309, 2.35186982154846,
2.31384658813477, 2.2728853225708, 2.23825240135193, 2.20144987106323,
2.15977454185486, 2.13386940956116, 2.9677951335907, 2.92966151237488,
2.8759753704071, 2.8214259147644, 2.76297402381897, 2.70412373542786,
2.6543300151825, 2.61105895042419, 2.60811114311218, 2.60745763778687,
2.60840320587158, 2.60350298881531, 2.59271574020386, 2.54244041442871,
2.47917294502258, 2.40393853187561, 2.35462546348572, 2.31852698326111,
2.31014728546143, 2.29225921630859, 2.26293158531189, 2.23773765563965,
2.21094441413879, 2.18223357200623, 2.15001082420349, 2.11311554908752,
2.08600211143494, 2.89936757087708, 2.89069938659668, 2.87734007835388,
2.85613536834717, 2.8233802318573, 2.78204131126404, 2.73941993713379,
2.70192885398865, 2.68653988838196, 2.66766142845154, 2.64770603179932,
2.62153196334839, 2.5882031917572, 2.53414297103882, 2.46355938911438,
2.38226866722107, 2.31638383865356, 2.26739454269409, 2.24680852890015,
2.23866200447083, 2.23337078094482, 2.22489714622498, 2.21137762069702,
2.19127559661865, 2.16362285614014, 2.13074207305908, 2.08486270904541,
2.85043382644653, 2.87193655967712, 2.88829565048218, 2.89718008041382,
2.88119888305664, 2.85316681861877, 2.81990385055542, 2.7852942943573,
2.75193023681641, 2.72042202949524, 2.6889750957489, 2.650550365448,
2.60602164268494, 2.55256152153015, 2.48241400718689, 2.40686845779419,
2.32716631889343, 2.27034878730774, 2.23567771911621), lon = structure(c(10.36,
10.47, 10.58, 10.69, 10.8, 10.91, 11.02, 11.13, 11.24, 11.35,
11.46, 8.6, 8.71, 8.82, 8.93, 9.04, 9.15, 9.26, 9.37, 9.48, 9.59,
9.7, 9.81, 9.92, 10.03, 10.14, 10.25, 10.36, 10.47, 10.58, 10.69,
10.8, 10.91, 11.02, 11.13, 11.24, 11.35, 11.46, 8.6, 8.71, 8.82,
8.93, 9.04, 9.15, 9.26, 9.37, 9.48, 9.59, 9.7, 9.81, 9.92, 10.03,
10.14, 10.25, 10.36, 10.47, 10.58, 10.69, 10.8, 10.91, 11.02,
11.13, 11.24, 11.35, 11.46, 8.6, 8.71, 8.82, 8.93, 9.04, 9.15,
9.26, 9.37, 9.48, 9.59, 9.7, 9.81, 9.92, 10.03, 10.14, 10.25,
10.36, 10.47, 10.58), .Dim = 84L), lat = structure(c(52.15, 52.15,
52.15, 52.15, 52.15, 52.15, 52.15, 52.15, 52.15, 52.15, 52.15,
52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26,
52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26,
52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26, 52.26,
52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37,
52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37,
52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37, 52.37,
52.48, 52.48, 52.48, 52.48, 52.48, 52.48, 52.48, 52.48, 52.48,
52.48, 52.48, 52.48, 52.48, 52.48, 52.48, 52.48, 52.48, 52.48,
52.48), .Dim = 84L)), .Names = c("V13138", "V13139", "V13140",
"lon", "lat"), row.names = 287:370, class = "data.frame")
Matrix B with coordinates:
structure(list(lon = structure(c(10.47, 10.47, 10.36, 10.36,
10.58, 10.47, 10.58, 10.36, 10.47), .Dim = 9L), lat = structure(c(52.26,
52.37, 52.26, 52.37, 52.26, 52.15, 52.37, 52.15, 52.48), .Dim = 9L)), .Names = c("lon",
"lat"), out.attrs = structure(list(dim = c(27L, 23L), dimnames = structure(list(
Var1 = c("Var1= 8.60", "Var1= 8.71", "Var1= 8.82", "Var1= 8.93",
"Var1= 9.04", "Var1= 9.15", "Var1= 9.26", "Var1= 9.37", "Var1= 9.48",
"Var1= 9.59", "Var1= 9.70", "Var1= 9.81", "Var1= 9.92", "Var1=10.03",
"Var1=10.14", "Var1=10.25", "Var1=10.36", "Var1=10.47", "Var1=10.58",
"Var1=10.69", "Var1=10.80", "Var1=10.91", "Var1=11.02", "Var1=11.13",
"Var1=11.24", "Var1=11.35", "Var1=11.46"), Var2 = c("Var2=51.05",
"Var2=51.16", "Var2=51.27", "Var2=51.38", "Var2=51.49", "Var2=51.60",
"Var2=51.71", "Var2=51.82", "Var2=51.93", "Var2=52.04", "Var2=52.15",
"Var2=52.26", "Var2=52.37", "Var2=52.48", "Var2=52.59", "Var2=52.70",
"Var2=52.81", "Var2=52.92", "Var2=53.03", "Var2=53.14", "Var2=53.25",
"Var2=53.36", "Var2=53.47")), .Names = c("Var1", "Var2"))), .Names = c("dim",
"dimnames")), row.names = c(315L, 342L, 314L, 341L, 316L, 288L,
343L, 287L, 369L), class = "data.frame")

Matrix B has to have the same column names for the columns with the lon and lat coordinates as data.frame A. In the given data, the column names are lon and lat in both objects:
head.matrix(data.frame.A)
V13138 V13139 V13140 lon lat
287 -15.05455 -7.077048 2.386132 10.36 52.15
288 -15.01186 -7.365779 2.373241 10.47 52.15
289 -14.86987 -7.608998 2.386628 10.58 52.15
290 -14.71858 -7.857540 2.384418 10.69 52.15
291 -14.54498 -8.103074 2.351870 10.80 52.15
292 -14.32987 -8.291493 2.313847 10.91 52.15
matrix.B
lon lat
315 10.47 52.26
342 10.47 52.37
314 10.36 52.26
341 10.36 52.37
316 10.58 52.26
288 10.47 52.15
343 10.58 52.37
287 10.36 52.15
369 10.47 52.48
To subset data from data.frame A through the coordinates in matrix B, just use the following code:
subset.A <- merge(data.frame.A, matrix.B)
lon lat V13138 V13139 V13140
1 10.36 52.15 -15.05455 -7.077048 2.386132
2 10.36 52.26 -14.74530 -7.434779 2.354625
3 10.36 52.37 -14.36196 -7.694873 2.316384
4 10.47 52.15 -15.01186 -7.365779 2.373241
5 10.47 52.26 -14.61598 -7.619824 2.318527
6 10.47 52.37 -14.20411 -7.811408 2.267395
7 10.47 52.48 -13.79131 -7.896327 2.270349
8 10.58 52.26 -14.44598 -7.777781 2.310147
9 10.58 52.37 -14.03129 -7.914386 2.246809
It's a really simple solution. The columns with the coordinates will be placed in the first 2 columns of the subset.

Related

Systematically filling in variable columns

I need code to systematically label and fill in variables.
For example, current dataset looks like this:
data <- data.frame(Time = c(1:30),
Value = c(1:30)*2.3)
Time Value
1 2.3
2 4.6
3 6.9
4 9.2
5 11.5
6 13.8
7 16.1
8 18.4
9 20.7
10 23.0
11 25.3
12 27.6
13 29.9
14 32.2
15 34.5
16 36.8
17 39.1
18 41.4
19 43.7
20 46.0
21 48.3
22 50.6
23 52.9
24 55.2
25 57.5
26 59.8
27 62.1
28 64.4
29 66.7
30 69.0
I want to create two new variables Condition and Trial. There are 3 levels in the Condition variable (1~3) and 2 levels in the Trial variable (A or B). Condition level changes every 5 seconds in a specific pattern (1, 3, 2), and the Trial level alternates (A/B) for the first 4 seconds and disappears on the 5th second. Like this:
Time Condition Trial Value
1 1 A 2.3
2 1 B 4.6
3 1 A 6.9
4 1 B 9.2
5 1 <NA> 11.5
6 3 A 13.8
7 3 B 16.1
8 3 A 18.4
9 3 B 20.7
10 3 <NA> 23.0
11 2 A 25.3
12 2 B 27.6
13 2 A 29.9
14 2 B 32.2
15 2 <NA> 34.5
16 1 A 36.8
17 1 B 39.1
18 1 A 41.4
19 1 B 43.7
20 1 <NA> 46.0
21 3 A 48.3
22 3 B 50.6
23 3 A 52.9
24 3 B 55.2
25 3 <NA> 57.5
26 2 A 59.8
27 2 B 62.1
28 2 A 64.4
29 2 B 66.7
30 2 <NA> 69.0
How can I accomplish this by relying on Time? The code I'm imagining looks something like this:
for(every 5 seconds in Time){
data$Condition <- label as 1, 2, or 3
data$Trial <- label A or B in an alternating manner, skipping out on the last second}
#EDIT: I should specify that my actual dataset differs from the example I provide above. In reality, I am working with a massive dataset, with varying number of rows for a given time range. I need code that will use a specific range (e.g. every 70 seconds) in Time to fill the Condition and Trial values. For example, Condition has 6 levels, which will change every 70 seconds based on a given pattern (let's say, 1, 6, 4, 5, 2, 3). For instance, the Condition variable is labelled as 1 when Time = 0~40 seconds, 6 when Time = 40~80, 4 (80~120), 5 (120~160), 2(160~200), 3(200~240)1 (240~280), and so on until the end of the dataset. For each level in the Condition variable, the Trial variable alternates as A or B every 5 seconds (always starting from A). For example, for Condition 1 (Time = 0~40), Trial is labelled as A when Time = 0~5, B when Time = 5~10, A (10~15),..., B (35~40)..
Snippet of actual dataset:
data <- structure(list(Time = c(1.71, 3.2, 4.73, 5.65, 6.65,
6.75, 7.98, 8.29, 11.39, 13.31, 13.61, 14.28, 16.61, 19.39, 21.57,
22.77, 23.87, 24.05, 24.32, 24.68, 24.72, 24.79, 25.98, 26.43,
27.37, 27.67, 28.04, 29.27, 31.29, 31.42, 32.05, 33.45, 33.56,
34.11, 35.25, 35.84, 37.72, 38.09, 38.59, 39.03, 40.19, 40.64,
41.44, 42.78, 42.81, 43.15, 43.58, 44.43, 44.69, 44.9, 45.16,
45.63, 46.86, 48.91, 50.96, 52.03, 52.46, 53.13, 54.28, 55.51,
55.91, 57.36, 58, 58.17, 58.2, 58.53, 59.3, 59.83, 61.22, 61.75,
62.28, 63.58, 63.91, 65.04, 66.54, 67.1, 69.45, 71.67, 71.81,
74.04, 77.19, 78.04, 78.47, 80, 80.11, 81.36, 81.89, 83.09, 83.63,
83.66, 83.69, 84.26, 84.85, 85.71, 89.29, 90.23, 91.51, 91.78,
91.95, 96.3, 98.61, 99.08, 99.95, 101.14, 101.44, 102.5, 102.77,
103.57, 103.8, 105.15, 105.28, 105.48, 105.72, 107.38, 107.77,
107.93, 108.97, 109.13, 109.23, 109.6, 111.29, 113.12, 113.15,
113.18, 116.17, 116.37, 117.75, 120.44, 120.91, 121, 122.54,
123.17, 123.99, 124.39, 125.49, 127.71, 129.11, 130.4, 130.93,
132.16, 132.73, 133.04, 133.57, 134.15, 134.45, 136.46, 137.43,
138.43, 139.43, 140.25, 140.61, 143.3, 143.5, 143.56, 145.57,
146.65, 147.49, 147.61, 147.85, 148.02, 148.8, 151.07, 151.62,
151.75, 152.16, 153.79, 154.94, 155.04, 155.2, 156.64, 156.7,
156.77, 157.07, 158.95, 159.15, 160.36, 161.4, 162.07, 162.24,
162.44, 162.48, 162.67, 162.81, 163.07, 164.89, 165.39, 165.82,
166.09, 166.72, 166.83, 167.27, 168.61, 170.14, 171.52, 172.26,
173.13, 173.73, 174.04, 174.18, 174.21, 174.48, 175.21, 175.31,
175.48, 176.98, 177.56, 178.93, 179.03, 182.21, 184.03, 184.76,
185.06, 185.77, 186.39, 186.6, 186.95, 187.02, 187.58, 187.91,
188.08, 189.15, 189.88, 190.47, 191, 191.8, 193.5, 194.69, 195.29,
195.59, 197.07, 199.4, 200.35, 201.75, 202.28, 202.36, 202.92,
203.45, 203.62, 204.14, 204.57, 204.78, 204.87, 205.84, 206.47,
206.58, 207, 208.66, 208.99, 209.22, 212.51, 215.13, 216.02,
218.51, 218.61, 220.01, 220.04, 220.38, 221.53, 221.96, 222.63,
223.03, 223.17, 224.28, 225.64, 226.34, 226.38, 226.78, 226.81,
227.7, 227.76, 227.87, 228.2, 229.73, 230.36, 231.15, 231.58,
234.83, 235.66, 236.2, 236.46, 237.58, 237.85, 237.88, 238.32,
238.42, 239.21, 239.38, 240.05, 243.24, 243.87, 243.93, 245.45,
245.56, 245.75, 247.03, 247.12, 249.97, 250.78, 251.89, 253.99,
254.57, 257.68, 258.69, 258.85, 259.52, 259.99, 262.81, 263.28,
263.98, 265.93, 266.06, 268.1, 268.34, 270.18, 274.3, 276.99,
278.77, 279.54, 279.87, 280.43, 282.29, 282.35, 283.15, 283.35,
284.59, 285.2, 285.37, 290.75, 290.89, 291.12, 291.29, 293.53,
294.61, 296.86, 298.64, 299.64, 301.24, 303.29, 307.01, 307.18,
307.95, 309.66, 309.83, 309.86, 310.13, 310.69, 310.73, 312.01,
315.36, 316.1, 316.27, 316.56, 316.93, 317, 317.27, 317.9, 318.1,
319.25, 319.72, 319.99, 320.22, 322.3, 324.96, 326.42, 326.76,
327.62, 328.35, 328.47, 328.84, 329.27, 329.57, 330.43, 331,
332.22, 332.75, 334.05, 334.72, 334.86, 335.74, 338.75, 340.86,
341.84, 341.94, 343.14, 344.61, 344.71, 344.81, 345.85, 349.48,
349.68, 349.85, 350.61, 353.46, 353.53, 353.76, 354.36, 357.58,
360.8, 362.11, 362.15, 362.21, 362.35, 362.68, 364.18, 368.26,
369.02, 369.12, 369.35, 369.49, 369.85, 370.51, 371.68, 371.98,
372.01, 372.17, 372.47, 374.17, 376.28, 376.75, 377.32, 378.66,
379.37, 380.97, 381.3, 381.44, 381.54, 381.64, 381.87, 382.79,
383.13, 385.09, 385.59, 386.74, 387.68, 387.71, 390.29, 390.82,
391.23, 393.14, 393.21, 393.81, 395.08, 395.11, 395.21, 395.66,
395.83, 396.16, 396.29, 397.06, 397.23, 398.19, 398.66, 398.83,
402.77, 404.23, 404.36, 404.64, 405.03, 405.23, 405.27, 405.53,
406.41, 406.71, 407.18, 408.02, 408.08, 408.65, 409.66, 411.26,
411.54, 411.76, 412.3, 412.67, 412.95, 413.18, 413.21, 414.51,
415.09, 415.15, 415.22, 418.1, 418.64, 420.86, 421.55, 423.28,
424.08, 426.49, 427.42, 429.29, 429.54, 429.68, 429.94, 430.27,
430.47, 430.91, 431.64, 431.87, 432.34, 434.29, 434.66, 434.9,
436.21, 438.01, 438.75, 439.08, 439.08, 439.46, 442.56, 443.68,
444.11, 445, 445.5, 446.36, 446.56, 447.33, 447.36, 448.41, 449.25,
450.42, 451.2, 452.54, 454.25, 455.62, 455.75, 456.65, 457.43,
458.5, 460.54, 460.95, 461.02, 461.82, 463.32, 463.48, 464.31,
465.17, 466.99, 467.12, 467.59, 469.69, 470.64, 472.1, 473.49,
474.43, 475.16, 477.78, 478.28, 479.61, 480.56, 482.83, 483.89,
483.96, 484.86, 485.51, 486.76, 487.03, 487.09, 488.8, 489.23,
489.39, 489.64, 489.68, 489.94, 491.24, 491.31, 491.52, 492.65,
493.77, 494.77, 494.99, 495.63, 498.45, 500.6, 501.13, 503.42,
505.42, 505.78, 507.94, 510.02, 511.79, 516.21, 517.26, 517.46,
519.65, 520.98, 522.11, 523.23, 524.46, 526.09, 526.65, 528.64,
528.84, 529.08, 529.25, 529.83, 531.6, 532.39, 533.61, 534.71,
535.25, 535.68, 536.15, 537.53, 537.63, 539.8, 541.28, 542.29,
542.45, 543.12, 543.8, 544.34, 545.3, 545.64, 548.22, 548.28,
548.42, 549.06, 549.19, 549.78, 551.61, 552.97, 554.3, 554.71,
557.79, 558.05, 558.16, 560.54, 562.19, 563.56, 563.59, 563.65,
563.82, 564.09, 564.49, 565.68, 567.24, 567.48, 567.65, 567.68,
568.86, 568.92, 570.23, 571.31, 572.26, 572.76, 573.16, 574.09,
577.21, 579.71, 583.7, 584.1, 585.82, 585.88, 585.95, 586.45,
586.51, 586.65, 588.26, 588.42, 588.64, 588.87, 589.3, 589.47,
589.8, 590.84, 591.27, 591.54, 591.6, 592.52, 594.19, 594.65,
594.82, 595.12, 595.32, 595.64, 596.37, 596.5, 596.57, 596.67,
596.94, 596.97, 597.33, 597.44, 597.97, 598.44, 598.91, 598.96,
600.52, 602.71, 603.18, 603.57, 604.74, 607.12, 607.46, 608.12,
608.26, 608.76, 610.54, 611.08, 611.41, 612.2, 612.73, 615.19,
616.61, 617.68, 617.81, 619.2, 619.67, 620.97, 621.13, 621.63,
622.48, 623.01, 623.15, 624.15, 624.21, 624.55, 625.62, 626.07,
629.98, 630.65, 630.92, 632.57, 632.6, 633.5, 634, 634.77, 635.5,
635.86, 636.12, 638.79, 639.07, 639.41, 640.37, 642.58, 643.79,
644.72, 644.76, 645.05, 645.83, 645.85, 647.01, 647.37, 650.86,
651.09, 651.95, 655.01, 655.61, 656.36, 657.86, 658.83, 660.41,
660.61, 660.85, 662.35, 662.55, 662.64, 663.3, 664.56, 665.1,
665.49, 665.99, 666.13, 667.61, 667.75, 667.88, 667.95, 669.15,
670, 670.37, 670.67, 670.7, 670.9, 671.33, 671.54, 674.18, 677.27,
677.37, 678, 678.44, 679.14, 679.37, 679.69, 680.28, 681.38,
682.69, 682.95, 683.41, 685.67, 685.91, 685.97, 687.02, 687.39,
688.19, 688.29, 690.54, 690.68, 691.31, 692.14, 693.01, 693.24,
695.12, 696.23, 698.51, 699.98, 700.93, 701.23, 703.94, 707.06,
711.78, 712.9, 713, 713.13, 715.54, 718.03, 718.07, 719.39, 719.65,
720.28, 721.02, 721.39, 722.23, 722.77, 724.3, 726.09, 726.66,
727.16, 727.39, 729.1, 729.24, 729.57, 730.17, 730.97, 732.52,
733.93, 734.63, 735.64, 735.67, 735.84, 736.57, 736.91, 736.94,
737.11, 737.67, 738.89, 740.2, 740.7, 741.16, 742.08, 744.41,
744.5, 745.06, 745.86, 747.03, 747.85, 748.81, 749.18, 751.33,
751.63, 753.6, 753.9, 754.03, 754.49, 757.12, 758.67, 758.93,
761.48, 765.27, 767.94, 768.19, 769.12, 769.55, 769.95, 770.16,
771.77, 771.8, 772.74, 773.13, 773.5, 774.3, 774.77, 775.29,
775.96, 776.19, 776.52, 777.35, 777.72, 778.27, 778.61, 779.07,
780.61, 781.28, 781.36, 782.23, 782.7, 783.53, 785.04, 787.58,
788.92, 789.3, 789.8, 790.26, 790.86, 790.99, 791.5, 792.44,
793.78, 793.88, 794.68, 794.85, 795.16, 795.19, 795.96, 796.83,
799.01, 799.05, 799.32, 800.62, 801.48, 803.53, 803.84, 804.17,
806.18, 806.72, 807.06, 807.45, 808.02, 808.64, 809.64, 811.44,
812.28, 813.95, 815.67, 816.1, 818.24, 818.69, 819.42, 819.55,
819.66, 819.82, 821.63, 821.79, 821.87, 822.34, 824.87, 825.07,
825.39, 825.53, 825.96, 827.79, 827.92, 828.26, 828.41, 829.34,
829.64, 832.06, 832.83, 833.06, 833.53, 834.56, 836.91, 837.18,
837.54, 837.65, 839.1, 841.33, 841.4, 842.21, 842.38, 842.58,
842.82, 843.98, 844.52, 844.82, 845.17, 845.6, 846.8, 847.43,
849.78, 849.81, 850.18, 850.95, 851.48, 851.8, 852.37, 852.67,
852.87, 853.84, 855.19, 856.55, 858.05, 858.54, 859.5, 860.57,
860.88, 860.9, 862.19, 862.42, 862.85, 862.96, 863.69), Value = c(35.54,
28.32, 28.39, 27.83, 29.44, 29.94, 30.98, 32.92, 28.17, 29.62,
28.92, 29.91, 29.6, 31.72, 30.77, 30.67, 31.31, 31.04, 30.56,
31.2, 31.12, 31.12, 29.61, 31.43, 32.09, 32.29, 33.03, 34.83,
31.1, 31.73, 32.01, 32.98, 33.12, 32.38, 32.21, 32.92, 29.35,
31.12, 32, 32.08, 32.71, 33.73, 38.35, 38.42, 38.4, 38.77, 36.68,
38.61, 39.67, 40.4, 40.72, 40.54, 41.92, 40.41, 41.51, 39.74,
40.22, 42.03, 41.79, 42.13, 41.32, 41.98, 41.4, 41.01, 40.98,
41.09, 42.13, 41.88, 41.63, 42.42, 43.31, 42.09, 43.61, 44.24,
43.87, 45.36, 48.3, 48.66, 48.78, 32.48, 26.62, 26.02, 26.37,
27.24, 27.56, 29.06, 30.21, 30.16, 28.09, 27.32, 27.04, 27.08,
26.47, 26.18, 30.75, 28.65, 30.16, 30.37, 29.66, 25.69, 25.16,
24.91, 23.46, 25.76, 25.75, 24.21, 24.12, 25.98, 23.75, 22.23,
21.9, 21.85, 21.73, 24.61, 25.73, 25.84, 24.59, 24.3, 24.05,
24.69, 24.8, 27.17, 27.28, 27.26, 39.1, 39.76, 43.77, 45.35,
46.13, 46.03, 44.84, 45.13, 43.99, 43.5, 44.26, 44.79, 44.48,
44.77, 45.11, 45.24, 44.35, 43.7, 43.59, 44.54, 44.74, 44.18,
44.05, 41.75, 43.9, 45.22, 45.35, 45.45, 45.87, 45.79, 46.85,
48.39, 33.07, 32.45, 30.5, 29.41, 28.08, 24.81, 25.36, 25.41,
23.61, 24.48, 23.75, 23.38, 23.06, 25.85, 25.67, 25.35, 25.89,
27.49, 27.25, 26.85, 28.95, 22.96, 22.77, 22.67, 22.68, 23.35,
24.06, 25.23, 27.63, 28.12, 28.22, 28.37, 29.96, 30.35, 31.43,
32.05, 31.5, 32.77, 26.65, 27.91, 28.39, 28.17, 28.34, 28.25,
28.82, 29.06, 28.61, 28.99, 28, 28.6, 29.8, 29.87, 23.96, 23.85,
24.31, 24.14, 24.02, 23.79, 23.79, 24.23, 24.68, 28.65, 30.15,
31.06, 32.87, 34.21, 34.12, 34.12, 37.13, 39.15, 37.07, 37.99,
39.24, 42.75, 46.47, 45.9, 47.55, 47.35, 47.61, 46.34, 47.44,
47.19, 46.81, 47.15, 47.15, 47.4, 46.31, 46.6, 46.47, 46.42,
43.86, 45.1, 45.54, 43.95, 44.76, 45.27, 44.42, 44.58, 38.01,
36.84, 29.47, 27.04, 26.71, 24.72, 24.66, 24.64, 24.26, 23.69,
27.18, 27.15, 27.61, 27.75, 26.89, 26.77, 26.2, 25.65, 27.26,
21.86, 21.36, 21.32, 26.9, 28.57, 29.82, 30.53, 28.63, 27.27,
27.44, 27.06, 27.07, 30.38, 30.53, 25.36, 24.64, 23.12, 23.22,
26.04, 26.4, 27.51, 28.19, 28.05, 25.01, 18.68, 20.67, 23.42,
22.53, 28.56, 26.07, 26.04, 28.38, 26.85, 33.58, 34.9, 35.27,
33.2, 33.18, 32.88, 33.01, 35.34, 31.81, 32.89, 36.26, 36.04,
35.57, 35.25, 35.16, 35.33, 36.51, 36.82, 37.76, 37.67, 37.69,
42.1, 42.17, 42.04, 41.33, 30.25, 26.01, 27.93, 25.78, 28.27,
29.22, 28.64, 23.71, 23.46, 24.2, 23.42, 23.89, 23.88, 23.34,
22.91, 23.11, 24.58, 24.98, 24.25, 24.39, 24.03, 24.14, 24.14,
24.15, 24.69, 25.31, 23.35, 22.55, 22.71, 23.07, 24.62, 24.22,
23.7, 23.17, 23.39, 23.52, 23.05, 20.54, 20.37, 20.49, 20.62,
22.82, 24.33, 24.05, 28.24, 29.71, 30.06, 32.57, 35.14, 36.04,
35.25, 35.41, 38.18, 36.75, 36.65, 36.58, 39.1, 40.92, 41.23,
41.48, 38.61, 40.14, 40.14, 39.76, 40.31, 42.69, 41.24, 40.99,
40.87, 40.79, 40.38, 40.46, 42.82, 29.03, 30.32, 30.05, 29.86,
29.55, 29.05, 28.02, 28.68, 24.92, 24.77, 24.28, 25.34, 27.04,
27.84, 27.91, 28.63, 31.68, 30.74, 30.8, 30.34, 30.22, 30.31,
29.49, 25.3, 26.12, 26.94, 29.79, 29.16, 27.01, 28.54, 28.68,
28.01, 27.35, 27.63, 27.58, 27.42, 27.31, 23.24, 23.4, 23.32,
23.82, 23.12, 23.92, 24.14, 24.98, 25.17, 25.86, 25.71, 25.33,
23.64, 25.76, 25.52, 24.7, 24.15, 24.34, 24.4, 24.87, 25.75,
26.03, 28.34, 29.46, 29.38, 29.02, 30.2, 31.34, 31.06, 31.65,
31.66, 32.37, 33.28, 34.38, 34.41, 36.18, 35.25, 35.48, 35.9,
37.12, 36.49, 35.38, 35.92, 36.32, 36.85, 37.47, 37.9, 37.5,
37.2, 37.43, 37.64, 37.56, 37.39, 37.5, 36.7, 36.81, 36.05, 40.22,
39.11, 38.5, 38.97, 39.23, 40.3, 39.91, 39.62, 38.43, 22.1, 21.16,
21.51, 22.14, 23.15, 25.9, 25.29, 26.81, 26.87, 27.95, 25.05,
21.3, 21.28, 22.25, 24.42, 26.44, 27.01, 27.83, 26.74, 24.39,
21.13, 21.75, 21.78, 22.76, 24.01, 24.1, 24.61, 24.62, 25.13,
25.5, 26.6, 27.37, 23.47, 24.67, 24.28, 23.98, 23.33, 24.57,
25.34, 22.1, 25.41, 27.3, 30.81, 31.03, 35.26, 36.44, 36.46,
36.28, 36.68, 36.5, 36.77, 37.05, 37.69, 37.69, 38.26, 37.72,
38.02, 37.86, 38.6, 40, 40.5, 40.52, 42.02, 40.48, 36.9, 38.67,
38.12, 41.4, 41.87, 42.19, 39.6, 38.18, 22.66, 23.31, 24.07,
28.23, 28.73, 26.96, 25.21, 22.78, 23.07, 22.75, 21.77, 21.18,
21.72, 22.79, 24.25, 25.52, 24.09, 19.38, 20.42, 22.06, 21.88,
22.13, 21.74, 22.46, 23.42, 23.3, 23.7, 24.06, 25.72, 22.35,
24.7, 26.49, 25.8, 24.26, 24.49, 24.48, 25.63, 26.05, 25.9, 24.68,
23.99, 27.54, 26.73, 30.1, 30.17, 30.61, 33.7, 35.43, 39.35,
39.3, 39.43, 39.56, 40.18, 40.45, 41.19, 41.75, 41.58, 41.42,
41.63, 40.56, 40.6, 42.25, 41.04, 41.18, 41.56, 38.42, 37.57,
33.8, 38.25, 39.56, 41.87, 46.15, 46.23, 46.24, 39.31, 38, 35.89,
31.62, 30.74, 30.11, 30.44, 30.69, 30.64, 29.5, 27.87, 27.79,
23.97, 23.71, 22.41, 23.02, 24.78, 24.94, 24.52, 25.06, 24.95,
26.42, 26.09, 25.82, 25.13, 24.64, 24.67, 26.61, 27.55, 28.27,
28.1, 29.09, 29.14, 30.58, 27.81, 27.76, 29.08, 28.83, 29.98,
29.8, 29.31, 29.04, 27.59, 30.26, 30.69, 26.8, 21.32, 21.89,
25.36, 26.36, 26.15, 26.18, 27.75, 27.85, 26.3, 26.31, 21.29,
21.25, 20.7, 20.64, 21.66, 21.69, 21.06, 21.9, 20.57, 31.85,
32.71, 33.74, 37.93, 37.99, 37.47, 37.35, 39.15, 41.59, 42.64,
43.03, 43.12, 43.06, 43.59, 42.12, 36.73, 37.13, 38.57, 38.44,
38.23, 36.87, 36.71, 33.52, 35.4, 37.74, 38.44, 40.39, 39.12,
37.85, 35.71, 34.55, 32.94, 19.84, 19.52, 19.18, 20.23, 20.19,
20.08, 20.68, 21.35, 26.09, 27.68, 29.22, 29.2, 28.82, 28.32,
27.69, 27.7, 33.02, 21.7, 23.97, 24.85, 25.08, 25.45, 25.98,
24.65, 25.38, 32.03, 31.75, 31.32, 31.59, 30.15, 28.8, 22.79,
22.09, 23.24, 25.04, 25.51, 25.98, 27.46, 27.71, 27.69, 27.56,
26.96, 25.82, 25.3, 20.97, 21.08, 22.18, 22.95, 24.39, 23.71,
26.47, 30.37, 33.35, 27.92, 32.17, 33.73, 42.17, 46.03, 46.36,
46.49, 46.53, 46.25, 42.34, 41.32, 41.48, 40.65, 39.84, 39.87,
37.17, 37.34, 37.63, 37.93, 39.1, 42.72, 42.14, 42.01, 42.44,
41.78, 41.87, 42.63, 41.21, 41.86, 45.11, 33.58, 35.21, 35.98,
36.03, 35.03, 33.5, 32.57, 32.49, 31.72, 31.39, 30.1, 29.55,
29, 28.6, 26.68, 26.82, 26.81, 27.16, 30.05, 30.39, 28.92, 27.95,
27.66, 27.67, 28.15, 27.51, 28.21, 28.34, 28.78, 27.03, 24.3,
24.62, 26.67, 26.03, 24.02, 22.97, 25.12, 25.81, 25.61, 25.55,
26.67, 26.89, 27.75, 29.21, 30.68, 33.93, 36.45, 38.18, 38.85,
38.85, 36.66, 35.16, 35.77, 37.94, 39.01, 39.28, 41.23, 43.02,
43.33, 44.4, 43.69, 44.51, 45.45, 43.49, 41.61, 40.32, 40.81,
40.51, 41.82, 42.14, 42.39, 42.32, 41.96, 41.99, 41.64, 41.71,
41.63, 41.6, 41.66, 40.55, 40.51, 40.59, 41.31, 43.52, 42.96,
41.95, 42.12, 41.77, 32.63, 28.05, 29.48, 30.68, 31.49, 30.03,
30.22, 24.67, 28.49, 27.23, 26.41, 26.52, 29.27, 28.79, 28.65,
29.42, 29.6, 29.71, 24.26, 24.34, 24.37, 24.6, 24.24, 23.72,
23.69, 23.89, 24.73, 25.76, 25.77, 26.02, 26.55, 26.5, 26.94,
22.51, 24.7, 24.11, 24.83, 23.39, 24.2, 23.39, 23.16, 23.37,
24.85, 23.16, 23.1, 24.34, 24.6, 24.58, 24.56, 26.69, 27.8, 27.91,
27.22, 26.6, 31.89, 35.08, 38.79, 38.8, 40.26, 40.81, 40.71,
39.31, 38.55, 38.27, 38.45, 37.41, 38.27, 39.23, 37.43, 36.85,
35.66, 37.19, 36.85, 36.78, 35.91, 36.03, 36.87, 37.03, 37.28
)), row.names = c(NA, -1000L), class = c("tbl_df", "tbl", "data.frame"
))
I am offering a simple and transparent solution. Get the length of time, as 30 in your example. Create a list for Condition with a "rep" function using the length (30) and members of the respective list (3 or 5).
Condition= rep(c(1,3,2), 30/3)
Follow the same idea with Trial,
Trial=rep(c("A", "B", "A", "B", "NA"), 30/5)
Add the columns to the original data set.
data$Condition=Condition
data$Trial=Trial
You should be able to achieve this by using %/% and %% operations
data <- data.frame(Time = c(1:30),
Value = c(1:30)*2.3)
conditionlabel=c(1,3,2)
triallabel=c('A','B','A','B', NA)
data2 = data %>%
mutate(
condition = conditionlabel[((Time-1) %/% 5 %% 3) + 1],
trial = triallabel[(Time-1) %% 5 + 1]
)
> data2
Time Value condition trial
1 1 2.3 1 A
2 2 4.6 1 B
3 3 6.9 1 A
4 4 9.2 1 B
5 5 11.5 1 <NA>
6 6 13.8 3 A
7 7 16.1 3 B
8 8 18.4 3 A
9 9 20.7 3 B
10 10 23.0 3 <NA>
11 11 25.3 2 A
12 12 27.6 2 B
13 13 29.9 2 A
14 14 32.2 2 B
15 15 34.5 2 <NA>
16 16 36.8 1 A
17 17 39.1 1 B
18 18 41.4 1 A
19 19 43.7 1 B
20 20 46.0 1 <NA>
21 21 48.3 3 A
22 22 50.6 3 B
23 23 52.9 3 A
24 24 55.2 3 B
25 25 57.5 3 <NA>
26 26 59.8 2 A
27 27 62.1 2 B
28 28 64.4 2 A
29 29 66.7 2 B
30 30 69.0 2 <NA>

Get position indices after min / max aggregation on matrix

Sequel to my first problem here (How to aggregate hourly values into 24h-average means without timestamp).
Now I want to calculate the max (and min) from my timeseries of each 12-hour interval.
I have got my hourly data measurements (data_measure). Now I changed it into a time series of half-days.
t_measure <- ts(data = data_measure, frequency = 12)
then I used the aggregate function from {stats}
data_measure_daily_max <- aggregate(t_measure, 1, max)
data_measure <- structure(c(8.29, 7.96, 8.14, 7.27, 7.37, 7.3, 7.23, 7.53,
7.98, 10.2, 12.39, 14.34, 14.87, 14.39, 12.54, 11.84, 10.3, 10.62,
10.65, 10.56, 10.43, 10.35, 9.85, 9.12, 8.95, 8.82, 8.92, 9.33,
9.44, 9.3, 9.15, 9.37, 9.54, 10.24, 12.13, 12.43, 12.65, 13,
13.18, 13.58, 13.64, 13.75, 13.85, 13.94, 13.79, 13.84, 13.94,
14.26, 24.93, 24.64, 23.67, 21.46, 21.33, 20.83, 21.12, 21.1,
23.75, 25.39, 30.72, 30.71, 30.81, 30.92, 32.61, 32.37, 32.49,
30.68, 30.23, 30.45, 28.1, 26.9, 25.09, 25.07, 24.59, 24.22,
23.05, 22.21, 22.07, 21.6, 21.24, 21.22, 21.85, 24.87, 28.85,
29.42, 30.82, 30.97, 31.32, 30.81, 30.83, 29.9, 30.01, 30.31,
30, 27.91, 25.78, 25.88, 8.78, 8.47, 8.49, 7.65, 8.63, 9.02,
9.02, 8.11, 7.63, 9.19, 11.25, 12.24, 13.62, 12.09, 10.6, 11.1,
10.16, 10.44, 9.58, 10.04, 10.01, 10.23, 9.51, 9.2, 9.34, 9.6,
9.4, 9.45, 9.36, 9.26, 9.3, 9.46, 9.58, 9.89, 10.6, 11.04, 12.1,
12.61, 13.12, 13.47, 13.55, 13.51, 13.63, 13.84, 13.93, 14.17,
13.97, 13.86), .Dim = c(48L, 3L), .Dimnames = list(NULL, c("station1",
"station2", "station3")))
So actually I need an index/vector which tells me where my max and min of these time intervals are, so later on I can extract exactly these for an other data sets to make a comparison.
My first trial:
max_index <- which(aggregate(t_measure, 1, max)) # argument to 'which' is not logical
Use which.max and which.min with aggregate
a1 <- aggregate(t_measure, 1, which.min)
a2 <- aggregate(t_measure, 1, which.max)
a1
#Time Series:
#Start = 1
#End = 4
#Frequency = 1
# station1 station2 station3
#1 7 6 9
#2 12 12 12
#3 2 8 6
#4 1 11 1
a2
#Time Series:
#Start = 1
#End = 4
#Frequency = 1
# station1 station2 station3
#1 12 11 12
#2 1 3 1
#3 12 12 12
#4 12 3 10
If you want index for min with reference to original data_measure dataframe we can do
vals <- nrow(t_measure)/12
index_min <- a1 + (12 * (seq_len(vals) - 1))
index_min
#Time Series:
#Start = 1
#End = 4
#Frequency = 1
# station1 station2 station3
#1 7 6 9
#2 24 24 24
#3 26 32 30
#4 37 47 37
This can be read as for station1 in 1st 12 hour interval max value is present in 7th row of data_measure, for next 12 hour interval it is present in 24th row and same for other stations.

Impute missing values with average of previous 13 values

I have a dataset with few missing observations. My objective is to impute the missing value in each variable with the average of previous 13 values. In case there is a missing value before the 13th observation, the average of whatever there before should be used for imputing that variable. I am not sure how to do it.
Please use the below to replicate my dataset. Your help is much appreciated.
df1 <- structure(list(V1 = c(276.12, 53.4, 20.64, 181.8, 216.96, 10.44,
69, 144.24, 10.32, 239.76, 79.32, 257.64, 28.56, 117, 244.92,
234.48, NA, 337.68, 83.04, 176.76, 262.08, 284.88, 15.84, NA,
74.76, 315.48, 171.48, 288.12, 298.56, 84.72, 351.48, 135.48,
NA, 318.72, 114.84, 348.84, 320.28, 89.64, 51.72, 273.6, 243,
212.4, 352.32, 248.28, NA, 210.12, 107.64, 287.88, 272.64, 80.28,
239.76, 120.48, 259.68, 219.12, 315.24, 238.68, 8.76, 163.44,
252.96), V2 = c(45.36, 47.16, 55.08, 49.56, 12.96, 58.68, 39.36,
NA, 2.52, 3.12, 6.96, 28.8, NA, 9.12, 39.48, 57.24, 43.92, 47.52,
24.6, 28.68, 33.24, 6.12, 19.08, 20.28, 15.12, 4.2, 35.16, NA,
32.52, 19.2, 33.96, 20.88, 1.8, 24, 1.68, NA, 52.56, 59.28, 32.04,
45.24, 26.76, 40.08, 33.24, 10.08, 30.84, 27, 11.88, 49.8, 18.96,
14.04, 3.72, 11.52, 50.04, 55.44, 34.56, NA, 33.72, 23.04, 59.52
)), class = "data.frame", row.names = c(NA, -59L))
You can use zoo::rollapply to compute the mean over the 13 values:
mean13 = zoo::rollapply(
df1$V1,
13,
function(x) {
mean(na.omit(x))
},
align = "right",
fill = NA,
partial = TRUE
)
df1$V1_prev_mean = c(df1$V1[1], head(mean13, -1))
df1$V1 = ifelse(is.na(df1$V1), df1$V1_prev_mean, df1$V1)
Output:
V1 V2 V1_prev_mean
1 276.1200 45.36 276.1200
2 53.4000 47.16 276.1200
3 20.6400 55.08 164.7600
4 181.8000 49.56 116.7200
5 216.9600 12.96 132.9900
6 10.4400 58.68 149.7840
7 69.0000 39.36 126.5600
8 144.2400 NA 118.3371
9 10.3200 2.52 121.5750
10 239.7600 3.12 109.2133
11 79.3200 6.96 122.2680
12 257.6400 28.80 118.3636
13 28.5600 NA 129.9700
14 117.0000 9.12 122.1692
15 244.9200 39.48 109.9292
16 234.4800 57.24 124.6615
17 141.1108 43.92 141.1108 # <- this row filled
18 337.6800 47.52 137.7200
19 83.0400 24.60 147.7800
20 176.7600 28.68 153.8300

Splitting two messy vectors in a data frame into one common column

Sample of dataset:
library(dplyr)
sample <- structure(list(Rank = c(15, 17, 20, 2, 16, 8, 21, 5, 13, 31, 22, 18, 2, 19, 11, 11, 8, 7, 12, 9, 5, 23, 17, 16, 15, 14, 4, 20, 13, 2), Athlete = c("François Gourmet(BEL)", "Agustín Félix(ESP)", "Keisuke Ushiro", "Michael Schrader", "Pieter Braun", "Laurent Hernu(FRA)", "Dmitriy Karpov", "Laurent Hernu(FRA)", "Thomas van der Plaetsen", "Attila Szabó", "Nadir El Fassi", "Eduard Mikhan", "Leonel Suárez", "Janek Õiglane", "Hans van Alphen(BEL)", "Roman Šebrle", "André Niklaus(GER)", "Pascal Behrenbruch", "Pieter Braun", "Oleksandr Yurkov(UKR)", "Eelco Sintnicolaas", "Brent Newdick", "Kim Kun-woo", "Akihiko Nakamura", "Bastien Auzeil", "Frédéric Xhonneux", "Janek Õiglane", "Keisuke Ushiro", "Roman Šebrle", "Rico Freimuth"), Total = c(7974, 7749, 7498, 8670, 7890, 8280, 7550, 8218, 8069, 7610, 7922, 7968, 8640, 7581, 8034, 8266, 8020, 8211, 8114, 8264, 8298, 7915, 7860, 7745, 7922, 7616, 8371, 7532, 8069, 8564), `100m` = c(10.67, 11.17, 11.53, 10.73, 11.22, 10.97, 11.24, 11.2, 11.2, 11.15, 11.12, 10.97, 11.13, 11.51, 11.11, 11.16, 11.19, 11.08, 11.11, 10.93, 10.76, 11.11, 11.11, 10.86, 11.35, 11.28, 11.08, 11.51, 11.25, 10.53), LJ = c(7.15, 7.12, 6.64, 7.85, 7.17, 7.31, 6.86, 7.22, 7.79, 7.09, 7.26, 7.42, 7.24, 6.78, 7.35, 7.8, 7.21, 6.8, 7.29, 7.37, 7.29, 7.42, 7.24, 7.26, 6.87, 7.21, 7.33, 6.73, 7.3, 7.48), SP = c(13.74, 13.29, 13.43, 14.56, 14.48, 14.43, 15.69, 13.99, 12.76, 13.92, 13.62, 14.15, 15.2, 14.43, 14.67, 14.98, 13.87, 16.01, 13.9, 15.15, 14.13, 14.35, 12.96, 11.67, 15.23, 12.92, 15.13, 14.93, 15.2, 14.85), HJ = c(1.85, 2.03, 1.96, 1.99, 1.93, 2.03, 1.93, 2.03, 2.17, 1.84, 1.99, 1.96, 2.11, 1.92, 1.88, 2.11, 1.97, 1.93, 2.04, 1.97, 1.93, 1.99, 1.96, 1.95, 1.96, 2.03, 2.05, 1.89, 2.05, 1.99), `400m` = c(47.98, 52.08, 51.43, 47.66, 48.54, 49.31, 52.01, 48.95, 49.46, 49.79, 51.35, 48.8, 48, 50.95, 48.52, 50.42, 49.95, 49.9, 48.24, 49.45, 48.35, 50.1, 49.24, 47.81, 50.36, 49.04, 49.58, 50.85, 51.18, 48.41), `110mh` = c(15.02, 14.75, 15.35, 14.29, 14.67, 14.01, 14.64, 14.15, 14.79, 14.65, 14.9, 14.82, 14.45, 15.33, 14.77, 14.44, 14.5, 14.33, 14.37, 14.41, 14.42, 14.82, 14.95, 14.72, 14.59, 15.75, 14.56, 15.43, 14.75, 13.68), DT = c(39.87, 43.67, 47.64, 46.44, 42.59, 43.93, 47.1, 46.13, 37.2, 43.75, 42.25, 48, 44.71, 40.94, 44.3, 46.3, 42.68, 48.56, 42.09, 48.1, 42.23, 43.6, 39.53, 33.48, 46.86, 38.62, 42.11, 46.85, 46.93, 51.17), PV = c(5, 5, 4.6, 5, 4.7, 5.1, 4.8, 4.9, 5.1, 4.4, 4.8, 4.6, 5, 4.6, 4.3, 4.6, 5.1, 4.9, 4.9, 5, 5.2, 4.8, 4.9, 4.7, 4.8, 4.7, 5.1, 4.7, 4.8, 4.8), JT = c(57.73, 56.69, 63.28, 65.67, 59.26, 59.9, 46.91, 59.63, 58.91, 59.56, 57.65, 50.74, 75.19, 68.51, 65.71, 65.61, 57.55, 66.5, 56.95, 58.63, 61.07, 51.52, 53.33, 53.57, 60.8, 50.18, 71.73, 56.52, 67.28, 62.34), `1500m` = c(265.51, 288.27, 291.9, 265.38, 278.4, 277.41, 298.41, 268.4, 285.86, 285.64, 256.51, 273.71, 267.25, 283.06, 262.5, 290.33, 268.8, 276.64, 272.46, 278.43, 265.4, 270.57, 255.63, 256.36, 279.8, 262.71, 279.24, 283.51, 296.5, 281.57), Year = structure(c(4L, 4L, 9L, 7L, 9L, 1L, 6L, 2L, 6L, 5L, 5L, 7L, 5L, 8L, 4L, 5L, 2L, 6L, 8L, 1L, 6L, 5L, 6L, 8L, 9L, 3L, 9L, 8L, 6L, 9L), .Label = c("2001", "2003", "2005", "2007", "2009", "2011", "2013", "2015", "2017"), class = "factor"), Nationality = c(NA, NA, "Japan(JPN)", "Germany(GER)", "Netherlands(NED)", NA, "Kazakhstan(KAZ)", NA, "Belgium(BEL)", "Hungary", "France", "Belarus(BLR)", "Cuba", "Estonia(EST)", NA, "Czech Republic", NA, "Germany(GER)", "Netherlands(NED)", NA, "Netherlands(NED)", "New Zealand", "South Korea(KOR)", "Japan(JPN)", "France(FRA)", NA, "Estonia(EST)", "Japan(JPN)", "Czech Republic(CZE)", "Germany(GER)"), Notes = c(NA, NA, NA, "PB", NA, NA, NA, NA, NA, NA, "SB", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "PB", "NR", NA, "SB", NA, "PB", NA, NA, NA)), .Names = c("Rank", "Athlete", "Total", "100m", "LJ", "SP", "HJ", "400m", "110mh", "DT", "PV", "JT", "1500m", "Year", "Nationality", "Notes"), row.names = c(NA, -30L), class = c("tbl_df", "tbl", "data.frame"))
# A tibble: 30 x 16
Rank Athlete Total `100m` LJ SP HJ `400m` `110mh` DT PV JT `1500m` Year Nationality Notes
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fctr> <chr> <chr>
1 15 François Gourmet(BEL) 7974 10.67 7.15 13.74 1.85 47.98 15.02 39.87 5.0 57.73 265.51 2007 <NA> <NA>
2 17 Agustín Félix(ESP) 7749 11.17 7.12 13.29 2.03 52.08 14.75 43.67 5.0 56.69 288.27 2007 <NA> <NA>
3 20 Keisuke Ushiro 7498 11.53 6.64 13.43 1.96 51.43 15.35 47.64 4.6 63.28 291.90 2017 Japan(JPN) <NA>
4 2 Michael Schrader 8670 10.73 7.85 14.56 1.99 47.66 14.29 46.44 5.0 65.67 265.38 2013 Germany(GER) PB
5 16 Pieter Braun 7890 11.22 7.17 14.48 1.93 48.54 14.67 42.59 4.7 59.26 278.40 2017 Netherlands(NED) <NA>
6 8 Laurent Hernu(FRA) 8280 10.97 7.31 14.43 2.03 49.31 14.01 43.93 5.1 59.90 277.41 2001 <NA> <NA>
7 21 Dmitriy Karpov 7550 11.24 6.86 15.69 1.93 52.01 14.64 47.10 4.8 46.91 298.41 2011 Kazakhstan(KAZ) <NA>
8 5 Laurent Hernu(FRA) 8218 11.20 7.22 13.99 2.03 48.95 14.15 46.13 4.9 59.63 268.40 2003 <NA> <NA>
9 13 Thomas van der Plaetsen 8069 11.20 7.79 12.76 2.17 49.46 14.79 37.20 5.1 58.91 285.86 2011 Belgium(BEL) <NA>
10 31 Attila Szabó 7610 11.15 7.09 13.92 1.84 49.79 14.65 43.75 4.4 59.56 285.64 2009 Hungary <NA>
# ... with 20 more rows
I have two character vectors, "Athlete and "Nationality", in my dataset where some entries have country codes in brackets attached at the end. I want to be able to split only the country codes from these two vectors into a new variable, say "countrycode", while getting rid of the brackets at the same time. I'm not sure what the best way or syntax to go about splitting would be though - dplyr::separate possibly? Though I'm uncertain how to incorporate the combinations of characters in the country codes within the brackets during the split, and the fact that some entries don't need splitting.
I would then do something like this after to remove the brackets from the new variable.
sample$countrycode<- gsub(pattern="\\(",replacement="",x=sample$countrycode)
sample$countrycode<- gsub(pattern="\\)",replacement="",x=sample$countrycode)
Thanks
Hope this works for you:
library(dplyr)
res <- sample %>% mutate(
countrycode = case_when(
is.na(Nationality) & grepl('\\(', Athlete) ~ gsub('.*?\\((.*)\\)', '\\1', Athlete),
grepl('\\(', Nationality) ~ gsub('.*?\\((.*)\\)', '\\1', Nationality),
TRUE ~ Nationality
)
)
sample output:
res %>% select(Athlete, Nationality, countrycode)
# # A tibble: 30 x 3
# Athlete Nationality countrycode
# <chr> <chr> <chr>
# 1 François Gourmet(BEL) NA BEL
# 2 Agustín Félix(ESP) NA ESP
# 3 Keisuke Ushiro Japan(JPN) JPN
# 4 Michael Schrader Germany(GER) GER
# 5 Pieter Braun Netherlands(NED) NED
# 6 Laurent Hernu(FRA) NA FRA
# 7 Dmitriy Karpov Kazakhstan(KAZ) KAZ
# 8 Laurent Hernu(FRA) NA FRA
# 9 Thomas van der Plaetsen Belgium(BEL) BEL
# 10 Attila Szabó Hungary Hungary
# # ... with 20 more rows
Remove the TRUE ~ Nationality to extract only country code as commented by Frank:
sample %>% mutate(
countrycode = case_when(
is.na(Nationality) & grepl('\\(', Athlete) ~ gsub('.*?\\((.*)\\)', '\\1', Athlete),
grepl('\\(', Nationality) ~ gsub('.*?\\((.*)\\)', '\\1', Nationality)
))
An ugly approach would be to use sub:
library(data.table)
DT = data.table(sample)
patt = "^.*\\((.{3})\\).*$"; rp = "\\1"
DT[Athlete %like% patt, cc := sub(patt, rp, Athlete)]
DT[Nationality %like% patt, cc := sub(patt, rp, Nationality)]
Something like str_extract from the stringr package would probably be cleaner if you're already working with tidyverse packages. Also, for the dplyr analogue to the code above, maybe look at the case_when function. (I am not familiar enough with these tools to know the exact syntax.)
The result looks like...
> DT[, .(Athlete, Nationality, cc)]
Athlete Nationality cc
1: François Gourmet(BEL) NA BEL
2: Agustín Félix(ESP) NA ESP
3: Keisuke Ushiro Japan(JPN) JPN
4: Michael Schrader Germany(GER) GER
5: Pieter Braun Netherlands(NED) NED
6: Laurent Hernu(FRA) NA FRA
7: Dmitriy Karpov Kazakhstan(KAZ) KAZ
8: Laurent Hernu(FRA) NA FRA
9: Thomas van der Plaetsen Belgium(BEL) BEL
10: Attila Szabó Hungary NA
11: Nadir El Fassi France NA
12: Eduard Mikhan Belarus(BLR) BLR
13: Leonel Suárez Cuba NA
14: Janek Õiglane Estonia(EST) EST
15: Hans van Alphen(BEL) NA BEL
16: Roman Šebrle Czech Republic NA
17: André Niklaus(GER) NA GER
18: Pascal Behrenbruch Germany(GER) GER
19: Pieter Braun Netherlands(NED) NED
20: Oleksandr Yurkov(UKR) NA UKR
21: Eelco Sintnicolaas Netherlands(NED) NED
22: Brent Newdick New Zealand NA
23: Kim Kun-woo South Korea(KOR) KOR
24: Akihiko Nakamura Japan(JPN) JPN
25: Bastien Auzeil France(FRA) FRA
26: Frédéric Xhonneux NA NA
27: Janek Õiglane Estonia(EST) EST
28: Keisuke Ushiro Japan(JPN) JPN
29: Roman Šebrle Czech Republic(CZE) CZE
30: Rico Freimuth Germany(GER) GER
Athlete Nationality cc
This simple solution works too.
library(stringr)
data1$country_code <- sapply(data1$Nationality, function(x) unlist(stri_extract_all(str = x, regex = '([A-Z]+)'))[2])
Nationality country_code
1: NA NA
2: NA NA
3: Japan(JPN) JPN
4: Germany(GER) GER
5: Netherlands(NED) NED
6: NA NA

Inserting value in NAs repetitively in order [duplicate]

This question already has answers here:
Replacing NAs with latest non-NA value
(21 answers)
Closed 5 years ago.
I have a problem of making my data complete. Below is my data
> head(DF1)
# A tibble: 6 x 4
Date Coalprice Gasprice Co2emissionprice
<date> <dbl> <dbl> <dbl>
1 2015-12-31 47.45 14.40 8.22
2 2015-12-30 47.45 14.30 8.22
3 2015-12-29 47.40 15.40 8.27
4 2015-12-28 47.00 14.42 8.32
5 2015-12-25 47.00 14.20 8.22
6 2015-12-24 47.00 14.20 8.22
So data goes down all the way down to 2011-01-01 from 2015-12-31. But now, if you look carefully, my data has regular missing values. Every weekend's value is missing. So I want to put the prices for weekends as well to fill up NA. What I want to do is fill up every weekend (Sat and Sun) with the same prices on a day before every weekend, so Friday.
So in this example, 2015-12-25' prices 47 14.20 8.22 will go to Sat and Sun as well. Then next weekend's prices will be the same as Friday in that week.
Can you guys help me out with syntax?
Thank you very much for your advice.
dput info is below:
> dput(head(DF1, 30))
structure(list(Date = structure(c(16800, 16799, 16798, 16797,
16794, 16793, 16792, 16791, 16790, 16787, 16786, 16785, 16784,
16783, 16780, 16779, 16778, 16777, 16776, 16773, 16772, 16771,
16770, 16769, 16766, 16765, 16764, 16763, 16762, 16759), class = "Date"),
Coalprice = c(47.45, 47.45, 47.4, 47, 47, 47, 47, 47.6, 47.6,
47.8, 47.75, 47.75, 47.7, 47.65, 47.35, 47.4, 47.45, 47.4,
47.75, 48.55, 48.95, 49.1, 49.7, 49.95, 50.3, 53.85, 53.95,
53.95, 54, 54.35), Gasprice = c(14.4, 14.3, 15.4, 14.42,
14.2, 14.2, 13.93, 13.85, 14.35, 14.9, 15.5, 15.25, 15.95,
16.08, 16.23, 16.5, 16.65, 16.75, 16.78, 17.15, 17.15, 17.85,
17.95, 18.2, 17.7, 17.7, 17.88, 17.7, 17.6, 17.5), Co2emissionprice = c(8.22,
8.22, 8.27, 8.32, 8.22, 8.22, 8.22, 8.25, 8.18, 8.07, 8.07,
8.12, 8.19, 8.09, 8.07, 8.36, 8.4, 8.42, 8.42, 8.52, 8.58,
8.49, 8.55, 8.58, 8.56, 8.58, 8.62, 8.65, 8.56, 8.51)), .Names = c("Date",
"Coalprice", "Gasprice", "Co2emissionprice"), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
You can use tidyr packages to do this in a single line.
library(tidyr)
df <- fill(df, contains("price"), .direction = "down")

Resources