I want to combine columns of a list into a single dataframe, however, some lists are of different lengths. The maximum length is 17, and I've thought of a way around this and that's by creating a new row to match the maximum length number for column concatenation.
If row layers do not match in length, then fill the missing value between 1 and 17, and replace the values column enc_ with the number 0.
Here's a sample of the dataset:
[[1]]
layer pland_01_evergreen_needleleaf
1 1 0.016832782
2 2 0.024552628
3 3 0.024377985
4 4 0.009584417
5 5 0.013569500
6 6 0.021745836
7 7 0.024301743
8 8 0.028323187
9 9 0.029710995
10 10 0.020706332
11 11 0.025760934
12 12 0.025148797
13 13 0.028520806
14 14 0.021327549
15 15 0.024794668
16 16 0.027986949
17 17 0.022970945
[[2]]
layer pland_02_evergreen_broadleaf
1 7 0.02329869
2 11 0.02910651
3 12 0.04234851
4 13 0.02788104
5 14 0.01899742
6 15 0.02639924
7 16 0.02601143
8 17 0.03166427
My expected output:
[[1]]
layer pland_01_evergreen_needleleaf pland_02_evergreen_broadleaf
1 1 0.016832782 0
2 2 0.024552628 0
3 3 0.024377985 0
4 4 0.009584417 0
5 5 0.013569500 0
6 6 0.021745836 0
7 7 0.024301743 0.02329869
8 8 0.028323187 0
9 9 0.029710995 0
10 10 0.020706332 0
11 11 0.025760934 0.02910651
12 12 0.025148797 0.04234851
13 13 0.028520806 0.02788104
14 14 0.021327549 0.01899742
15 15 0.024794668 0.02639924
16 16 0.027986949 0.02601143
17 17 0.022970945 0.03166427
I have tried:
do.call(plyr::rbind.fill, test.enc)
Though, it does not replace the rows and just fills values in columns with NA's.
Reproducible code:
test.enc <- list(structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17), pland_01_evergreen_needleleaf = c(0.0168327818172984,
0.0245526278078456, 0.0243779845525292, 0.00958441728108318,
0.0135694997972973, 0.0217458355, 0.0243017425347303, 0.0283231869863014,
0.0297109945836134, 0.0207063315181945, 0.0257609335769293, 0.0251487967356828,
0.0285208063526021, 0.0213275492944468, 0.0247946677520666, 0.0279869491599538,
0.0229709450323356)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(7, 11, 12, 13, 14, 15, 16, 17),
pland_02_evergreen_broadleaf = c(0.0232986892474108,
0.029106514197793, 0.0423485148880614, 0.0278810399372792,
0.0189974225113402, 0.0263992402670516, 0.0260114284210526,
0.0316642657775499)), row.names = c(NA, -8L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17), pland_03_deciduous_needleleaf = c(0.0224730632077946,
0.0272254714759945, 0.0179234332099727, 0.0233360434693878,
0.0289772211061947, 0.0279319832599034, 0.0240684032409326,
0.0193554670384615, 0.0279649463078261, 0.0269396070886525,
0.0185719102763596, 0.018542528637931, 0.012709947072028,
0.04239139)), row.names = c(NA, -14L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_04_deciduous_broadleaf = c(0.0237555990295715,
0.0250673634976813, 0.0215182227341075, 0.00714736670909091,
0.0290969429050279, 0.0267860332636672, 0.0270534621613419,
0.026721714630264, 0.0238709596184027, 0.0249074332489268,
0.0304618992970835, 0.0260209517100003, 0.015865886959611,
0.0243338004003074, 0.0201179804026253, 0.0332228978795843
)), row.names = c(NA, -16L), class = "data.frame"), structure(list(
layer = c(1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17), pland_05_mixed_forest = c(0.0205357761652226,
0.0241299700965417, 0.0225027270827694, 0.00985684546268657,
0.0311072087096774, 0.0252826755994332, 0.0271736973582555,
0.0283303792425047, 0.0229465085587453, 0.0262387189000513,
0.0349808141373789, 0.0269785067137574, 0.0178032039611502,
0.0251414066142756, 0.0237955553523809, 0.0349799640745083
)), row.names = c(NA, -16L), class = "data.frame"), structure(list(
layer = c(3, 5, 6, 7, 10, 11, 13, 14, 15, 16, 17), pland_06_closed_shrubland = c(0.005861055,
0.0247702364814815, 0.0217156349945235, 0.0266147094731707,
0.0273557187764706, 0.02247895109375, 0.0314803993053339,
0.0199688156521739, 0.0250040668072976, 0.024064520016,
0.0289086554672578)), row.names = c(NA, -11L), class = "data.frame"),
structure(list(layer = c(1, 2, 5, 6, 7, 10, 13, 15, 16, 17
), pland_07_open_shrubland = c(0.0239835098420742, 0.0196024526993901,
0.0275470745648515, 0.0205289891038188, 0.0252871031854839,
0.0225145242857143, 0.0277447744846797, 0.0273150363541667,
0.0372795540909091, 0.0258269711946903)), row.names = c(NA,
-10L), class = "data.frame"), structure(list(layer = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17),
pland_08_woody_savanna = c(0.0234895073226773, 0.0254242177795502,
0.0222844341348828, 0.010322404308595, 0.0115202866290984,
0.022858064298995, 0.0261324981159272, 0.0269339113300467,
0.0272905667936239, 0.0243445938197004, 0.0263085547098274,
0.031577225982848, 0.027366790080755, 0.0170917603078201,
0.0245166202483043, 0.0230437328068511, 0.0302480713824274
)), row.names = c(NA, -17L), class = "data.frame"), structure(list(
layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17), pland_09_savanna = c(0.024511496338631,
0.0263438531740197, 0.0230784856467449, 0.0103841481938194,
0.0112631119225057, 0.0218656878147517, 0.0263293450194207,
0.0272377655722272, 0.0277590005710358, 0.0248185191981168,
0.0264710300465011, 0.0311785029047626, 0.027764701873438,
0.018296641767007, 0.0243240673465086, 0.0269793925823536,
0.0261431798468939)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_10_grassland = c(0.0241048000322165,
0.0257675668336232, 0.0223383845545, 0.0189068612261722,
0.0261390898788855, 0.0261454176785369, 0.0262590636755884,
0.0273476886308152, 0.0282016510452861, 0.0249749584240885,
0.0269017127896855, 0.0309276372122874, 0.0280081024050942,
0.0171571967814629, 0.024706397187938, 0.0229732030207295,
0.0271717635000233)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_11_wetland = c(0.0261045398315745,
0.0270077896857178, 0.0228967718773374, 0.0199122837701645,
0.0227976864969644, 0.0275306004374101, 0.0271334525693991,
0.0285065610334257, 0.0281986960454696, 0.0235630515843985,
0.0235566291662858, 0.0272662707441063, 0.0242547847851237,
0.020220947639907, 0.0229653844016148, 0.0189523223219292,
0.016330738598504)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_12_cropland = c(0.0247481645364914,
0.0269929124824351, 0.0233212451104437, 0.0209935752243073,
0.027662987546265, 0.0267526016850953, 0.0264659030703554,
0.0276911097027454, 0.027704723980107, 0.0258298011360007,
0.0293761963259958, 0.0304401704151498, 0.0297272977127787,
0.0191320152910558, 0.022300483848187, 0.0310418860633282,
0.0194552407910497)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_13_urban = c(0.0250541999489398,
0.0249789151674128, 0.0219325183761915, 0.0174050192638298,
0.0198481538465096, 0.0273040101927991, 0.0261348274108392,
0.0274315478205557, 0.0284040130969821, 0.0255357946798584,
0.0276680704963855, 0.0283009734389356, 0.0273947664869961,
0.0191846595896345, 0.0225736950645381, 0.0185572109335283,
0.0266912368721673)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_14_mosiac = c(0.0244553100335083,
0.0265608905797148, 0.0230754220937747, 0.0126737591788462,
0.0208868797777778, 0.0264543431506849, 0.0271490616452074,
0.0275509256793189, 0.0274870231454383, 0.0260302106124036,
0.0294514198552019, 0.0317358807321971, 0.0303629153539886,
0.0191054718841496, 0.0221332367959672, 0.0332987653767865,
0.0153846531471452)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_15_barren = c(0.0254695416164035,
0.0260217783555025, 0.0278294141356033, 0.022098210265976,
0.0232223153248193, 0.0277460892260692, 0.0280945051729643,
0.0308188510180505, 0.0283990843854084, 0.0282966180792079,
0.0292701060708535, 0.02484902225, 0.0202313840629426, 0.02730348265625,
0.0252544010927835, 0.012387523087037, 0.0243783162068618
)), row.names = c(NA, -17L), class = "data.frame"))
You can use mergeand as you have many columns to be added you can call it using Reduce.
Reduce(function(x,y) merge(x,y, all=TRUE), test.enc)
# layer pland_01_evergreen_needleleaf pland_02_evergreen_broadleaf pland_03_deciduous_needleleaf pland_04_deciduous_broadleaf pland_05_mixed_forest pland_06_closed_shrubland pland_07_open_shrubland pland_08_woody_savanna pland_09_savanna pland_10_grassland pland_11_wetland pland_12_cropland pland_13_urban pland_14_mosiac pland_15_barren
#1 1 0.016832782 NA 0.02247306 0.023755599 0.020535776 NA 0.02398351 0.02348951 0.02451150 0.02410480 0.02610454 0.02474816 0.02505420 0.02445531 0.02546954
#2 2 0.024552628 NA 0.02722547 0.025067363 0.024129970 NA 0.01960245 0.02542422 0.02634385 0.02576757 0.02700779 0.02699291 0.02497892 0.02656089 0.02602178
#3 3 0.024377985 NA 0.01792343 0.021518223 0.022502727 0.005861055 NA 0.02228443 0.02307849 0.02233838 0.02289677 0.02332125 0.02193252 0.02307542 0.02782941
#4 4 0.009584417 NA NA 0.007147367 0.009856845 NA NA 0.01032240 0.01038415 0.01890686 0.01991228 0.02099358 0.01740502 0.01267376 0.02209821
#5 5 0.013569500 NA NA NA NA 0.024770236 0.02754707 0.01152029 0.01126311 0.02613909 0.02279769 0.02766299 0.01984815 0.02088688 0.02322232
#6 6 0.021745836 NA NA 0.029096943 0.031107209 0.021715635 0.02052899 0.02285806 0.02186569 0.02614542 0.02753060 0.02675260 0.02730401 0.02645434 0.02774609
#7 7 0.024301743 0.02329869 0.02333604 0.026786033 0.025282676 0.026614709 0.02528710 0.02613250 0.02632935 0.02625906 0.02713345 0.02646590 0.02613483 0.02714906 0.02809451
#8 8 0.028323187 NA 0.02897722 0.027053462 0.027173697 NA NA 0.02693391 0.02723777 0.02734769 0.02850656 0.02769111 0.02743155 0.02755093 0.03081885
#9 9 0.029710995 NA 0.02793198 0.026721715 0.028330379 NA NA 0.02729057 0.02775900 0.02820165 0.02819870 0.02770472 0.02840401 0.02748702 0.02839908
#10 10 0.020706332 NA 0.02406840 0.023870960 0.022946509 0.027355719 0.02251452 0.02434459 0.02481852 0.02497496 0.02356305 0.02582980 0.02553579 0.02603021 0.02829662
#11 11 0.025760934 0.02910651 0.01935547 0.024907433 0.026238719 0.022478951 NA 0.02630855 0.02647103 0.02690171 0.02355663 0.02937620 0.02766807 0.02945142 0.02927011
#12 12 0.025148797 0.04234851 0.02796495 0.030461899 0.034980814 NA NA 0.03157723 0.03117850 0.03092764 0.02726627 0.03044017 0.02830097 0.03173588 0.02484902
#13 13 0.028520806 0.02788104 0.02693961 0.026020952 0.026978507 0.031480399 0.02774477 0.02736679 0.02776470 0.02800810 0.02425478 0.02972730 0.02739477 0.03036292 0.02023138
#14 14 0.021327549 0.01899742 0.01857191 0.015865887 0.017803204 0.019968816 NA 0.01709176 0.01829664 0.01715720 0.02022095 0.01913202 0.01918466 0.01910547 0.02730348
#15 15 0.024794668 0.02639924 0.01854253 0.024333800 0.025141407 0.025004067 0.02731504 0.02451662 0.02432407 0.02470640 0.02296538 0.02230048 0.02257370 0.02213324 0.02525440
#16 16 0.027986949 0.02601143 0.01270995 0.020117980 0.023795555 0.024064520 0.03727955 0.02304373 0.02697939 0.02297320 0.01895232 0.03104189 0.01855721 0.03329877 0.01238752
#17 17 0.022970945 0.03166427 0.04239139 0.033222898 0.034979964 0.028908655 0.02582697 0.03024807 0.02614318 0.02717176 0.01633074 0.01945524 0.02669124 0.01538465 0.02437832
Related
I have a dataframe df that looks like this:
indx adj_coords
1 1 2, 3, 4, 5, 6, 7
2 2 1, 3, 7, 8, 9, 10
3 3 1, 2, 4, 10, 11, 12
4 4 1, 3, 5, 12, 13, 14
5 5 1, 4, 6, 14, 15, 16
6 6 1, 5, 7, 16, 17, 18
I also have a vector vec that looks like this:
vec<-c(1,4,5,3,1)
I would like to get a dataframe of length 5 where each row has the adj_coords of the indx given in vec. It should look something like:
vec adj_coords
1 2, 3, 4, 5, 6, 7
4 1, 3, 5, 12, 13, 14
5 1, 4, 6, 14, 15, 16
3 1, 2, 4, 10, 11, 12
1 2, 3, 4, 5, 6, 7
After that I would like to sample adj_coords so that I have something like:
vec adj_coords sampled_adj_coords
1 2, 3, 4, 5, 6, 7 3
4 1, 3, 5, 12, 13, 14 5
5 1, 4, 6, 14, 15, 16 14
3 1, 2, 4, 10, 11, 12 11
1 2, 3, 4, 5, 6, 7 6
tried something for you... see if something similar you are looking for...
vec <- c(1,4,5,3,1)
vec <- data.frame("vec"=vec, indx=vec)
df <- structure(list(indx = 1:6, adj_coords = list(2:7, c(1L, 3L, 7L, 8L, 9L, 10L), c(1L, 2L, 4L, 10L, 11L, 12L), c(1L, 3L, 5L, 12L, 13L, 14L), c(1L, 4L, 6L, 14L, 15L, 16L), c(1L, 5L, 7L, 16L, 17L, 18L))), row.names = c(NA, 6L), class = "data.frame")
library(dplyr)
inner_join(vec, df, by = 'indx')
results:
vec indx adj_coords
1 1 1 2, 3, 4, 5, 6, 7
2 4 4 1, 3, 5, 12, 13, 14
3 5 5 1, 4, 6, 14, 15, 16
4 3 3 1, 2, 4, 10, 11, 12
5 1 1 2, 3, 4, 5, 6, 7
Just drop the column that is not needed...
Another option:
df <- df[vec,]
Output:
indx adj_coords
1 1 2, 3, 4, 5, 6, 7
4 4 1, 3, 5, 12, 13, 14
5 5 1, 4, 6, 14, 15, 16
3 3 1, 2, 4, 10, 11, 12
1.1 1 2, 3, 4, 5, 6, 7
For the random sample you can use this:
df$sampled_adj_coords <- apply(df[-1], 1, function(x) {sample(unlist(x), 1)})
Output:
indx adj_coords sampled_adj_coords
1 1 2, 3, 4, 5, 6, 7 2
4 4 1, 3, 5, 12, 13, 14 12
5 5 1, 4, 6, 14, 15, 16 4
3 3 1, 2, 4, 10, 11, 12 2
1.1 1 2, 3, 4, 5, 6, 7 3
I am working with time-use data and want to calculate the duration of a started measurement at each time step (per column) and select the longest duration for each measurement. The measurement are numbered from 1 to 27. The length is weighted with 1 (e.g increment is set to 1). I am not sure how to handle if a measurement is fragmented and has multiple durations times.
Data format:
Desired output (example for the measurement number 1):
Time Measurement Duration
04:00 1 1
04:10 1 1
04:20 1 2
04:20 1 2
04:20 1 2
Longest duration
Time Measurement Duration
04:20 1 2
Sample data:
df<-structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14), `04:00` = c(1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11), `04:10` = c(1, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11), `04:20` = c(1, 11, 1, 1, 11, 11, 11, 11, 11,
1, 1, 11, 11, 11), `04:30` = c(1, 11, 1, 1, 3, 11, 11, 11, 11,
1, 1, 13, 11, 11), `04:40` = c(1, 11, 1, 1, 3, 12, 11, 11, 4,
1, 1, 13, 4, 11), `04:50` = c(4, 11, 11, 11, 3, 12, 11, 11, 4,
11, 11, 13, 4, 11), `05:00` = c(4, 11, 11, 11, 3, 12, 11, 11,
4, 13, 11, 13, 4, 11), `05:10` = c(4, 11, 11, 11, 3, 12, 11,
11, 4, 13, 11, 13, 4, 11), `05:20` = c(4, 11, 11, 11, 11, 13,
4, 11, 4, 13, 11, 13, 4, 11), `05:30` = c(4, 11, 11, 11, 11,
13, 4, 13, 4, 13, 11, 1, 4, 13), `05:40` = c(4, 11, 3, 11, 11,
13, 4, 13, 11, 13, 11, 1, 1, 13), `05:50` = c(11, 11, 3, 11,
11, 13, 4, 13, 11, 13, 11, 1, 11, 13), `06:00` = c(11, 1, 3,
11, 11, 13, 4, 13, 1, 11, 11, 11, 11, 13), `06:10` = c(11, 1,
3, 11, 11, 13, 4, 13, 1, 11, 11, 11, 11, 13), `06:20` = c(11,
1, 3, 11, 11, 11, 11, 13, 1, 11, 11, 11, 11, 13)), row.names = c(NA,
-14L), spec = structure(list(cols = list(id = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `04:10` = structure(list(), class = c("collector_double",
"collector")), `04:20` = structure(list(), class = c("collector_double",
"collector")), `04:30` = structure(list(), class = c("collector_double",
"collector")), `04:40` = structure(list(), class = c("collector_double",
"collector")), `04:50` = structure(list(), class = c("collector_double",
"collector")), `05:00` = structure(list(), class = c("collector_double",
"collector")), `05:10` = structure(list(), class = c("collector_double",
"collector")), `05:20` = structure(list(), class = c("collector_double",
"collector")), `05:30` = structure(list(), class = c("collector_double",
"collector")), `05:40` = structure(list(), class = c("collector_double",
"collector")), `05:50` = structure(list(), class = c("collector_double",
"collector")), `06:00` = structure(list(), class = c("collector_double",
"collector")), `06:10` = structure(list(), class = c("collector_double",
"collector")), `06:20` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
Here's a function, mainly using rle, that will get you the desired output for a specific measurement:
f <- function(n){
l <- lapply(df[-1], \(x) with(rle(x), lengths[values == n]))
enframe(l, name = "Time", value = "Duration") %>%
unnest("Duration") %>%
mutate(Measurement = n, .before = "Duration")
}
output
> f(1)
# A tibble: 20 × 3
Time Measurement Duration
<chr> <dbl> <int>
1 04:00 1 1
2 04:10 1 1
3 04:20 1 1
4 04:20 1 2
5 04:20 1 2
6 04:30 1 1
7 04:30 1 2
8 04:30 1 2
9 04:40 1 1
10 04:40 1 2
11 04:40 1 2
12 05:30 1 1
13 05:40 1 2
14 05:50 1 1
15 06:00 1 1
16 06:00 1 1
17 06:10 1 1
18 06:10 1 1
19 06:20 1 1
20 06:20 1 1
Get the maximum with slice_max:
f(1) %>%
slice_max(Duration, n = 1, with_ties = F)
# A tibble: 1 × 3
Time Measurement Duration
<chr> <dbl> <int>
1 04:20 1 2
library(tidyverse)
library(lubridate)
df %>%
pivot_longer(-id, names_to = "timepoint", values_to = "Measurement") %>%
arrange(id, Measurement) %>%
type_convert() %>%
group_by(id) %>%
# Duration to first time point for each id
mutate(Duration = timepoint - min(timepoint)) %>%
# get the longest duration
filter(Duration == max(Duration))
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> timepoint = col_time(format = "")
#> )
#> # A tibble: 14 × 4
#> # Groups: id [14]
#> id timepoint Measurement Duration
#> <dbl> <time> <dbl> <drtn>
#> 1 1 06:20 11 8400 secs
#> 2 2 06:20 1 8400 secs
#> 3 3 06:20 3 8400 secs
#> 4 4 06:20 11 8400 secs
#> 5 5 06:20 11 8400 secs
#> 6 6 06:20 11 8400 secs
#> 7 7 06:20 11 8400 secs
#> 8 8 06:20 13 8400 secs
#> 9 9 06:20 1 8400 secs
#> 10 10 06:20 11 8400 secs
#> 11 11 06:20 11 8400 secs
#> 12 12 06:20 11 8400 secs
#> 13 13 06:20 11 8400 secs
#> 14 14 06:20 13 8400 secs
Created on 2022-05-16 by the reprex package (v2.0.0)
I have two dataframes and one function. The function is supposed to take the variables start_month & end_month, select for each row the values in the second dataframe in the month-column, calculate the rate_of_change between each start_month and end_month variable in a given year. Finally calculate the mean(rate_of_change) and place it into the first dataframe as a new variable in the vector average_ratio.
So far I've created a code that calculates the average ratio, but I can't manage to put it into a for loop or an apply function so that the loop runs through the whole first data frame. I have two ideas, but they don't work so far.
structure(Total) # Df containing total combinations of all existing month starting in September
.
i | start_month | end_month | average_ratio (expected output)
1 | 9 | 10 | -23
2 | 9 | 11 | 13
3 | 9 | 12 | -4
4 | 9 | 1 |
5 | 9 | 2 | # ... with 61 more rows
and
structure(Cologne)
# A tibble: 3,000 x 4
year month price town (rate of change)
<dbl> <dbl> <dbl> <chr>
1 1531 7 7575 Cologne
2 1531 8 588 Cologne
3 1531 9 615 Cologne
4 1531 10 69 Cologne -88%
5 1531 11 712 Cologne
6 1531 12 590 Cologne
7 1532 1 72 Cologne
8 1532 2 675 Cologne
9 1532 3 6933 Cologne
10 1532 4 54 Cologne
11 1532 5 425 Cologne
12 1532 6 12 Cologne
13 1532 7 323 Cologne
14 1532 8 32 Cologne
15 1532 9 58 Cologne
16 1532 10 84 Cologne 42%
# ... with 2,990 more rows
# rate of change function
rateofchange <- function(x,y) {
((x-y)/y)*100
}
# avg_ratio function
avg_ratio <- function(x,y,z) {
dt.frame <- filter(x, month==y | month==z)
pre_p <- lag(dt.frame$price, 1)
dt.frame <- cbind(dt.frame, pre_p)
for (i in 1:nrow(dt.frame)) {
dt.frame$roc <- rateofchange(dt.frame$price,dt.frame$pre_p)
}
result <- mean(dt.frame$roc,na.rm=TRUE)
return(result)
}
May_Aug <- avg_ratio(Cologne, 5,7)
################ works until here ################
# Now, Idea 1
Total <- Total %>%
mutate(Total, ratio = avg_ratio(Cologne,Total$start_mth,Total$end_mth)
)
Warning messages:
1: In month == y :
longer object length is not a multiple of shorter object length
2: In month == z :
longer object length is not a multiple of shorter object length
# and Idea 2
ratio <- c()
Total_new <- for(i in 1:nrow(Total)) {
ratio [i] <- c(ratio, avg_ratio(Cologne,Total$start_mth[i],Total$end_mth[i]))
return(cbind(Total,ratio))
}
> dput(Cologne[1:20,])
structure(list(year = c(1531, 1531, 1531, 1531, 1531, 1531, 1532,
1532, 1532, 1532, 1532, 1532, 1532, 1532, 1532, 1532, 1532, 1532,
1533, 1533), month = c(7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 1, 2), price = c(7575, 588, 615, 69, 712,
72, 72, 675, 6933, 70, 656, 66, 62, 48, 48, 462, 45, 45, 456,
46), town = c("Cologne", "Cologne", "Cologne", "Cologne", "Cologne",
"Cologne", "Cologne", "Cologne", "Cologne", "Cologne", "Cologne",
"Cologne", "Cologne", "Cologne", "Cologne", "Cologne", "Cologne",
"Cologne", "Cologne", "Cologne")), spec = structure(list(cols = list(
Jahr = structure(list(), class = c("collector_double", "collector"
)), Monat = structure(list(), class = c("collector_double",
"collector")), cologne_wheat_monthly = structure(list(), class = c("collector_number",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"), row.names = c(NA,
20L), class = c("tbl_df", "tbl", "data.frame"))
> dput(Total) structure(list(start_mth = c(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7), end_mth = c(10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 3, 4, 5, 6, 7, 8, 3, 4, 5, 6, 7, 8, 4, 5, 6, 7, 8, 5, 6, 7, 8, 6, 7, 8, 7, 8, 8)), class = "data.frame", row.names = c(NA, -66L))
You can do:
Total$average_ratio <- mapply(avg_ratio, y = Total$start_mth, z = Total$end_mth, MoreArgs = list(x = cologne))
Your function is not vectorized, that's why this doesn't work:
Total <- Total %>%
mutate(ratio = avg_ratio(cologne, start_mth, end_mth))
The mapply() function iterates (or vectorizes) through the arguments provided, you don't want to iterate over cologne however, that's why you pass it inside MoreArgs = , so it gets taken as it is.
I have the following data frame:
structure(list(C1 = c(1, 2, 2, 3, 4, 5, 5, 6), C2 = c(3.5, 3,
2.5, 2, 3, 2, 3, 5), C3 = c(6.5, 8, 9, 5, 7, 4, 3, 6)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
The first column is an index. The first observation is characterised by 1 point, the second by 2 points.
I need to make the intersection of all combinations of observations, one way. The result creates a new dataframe with a new index, with again some observations that are characterised by 2 rows/points: 1-2, 1-3, 1-4, 1-5, 1-6, 2-3, 2-4, 2-5, 2-6, 3-4, 3-5, 3-6, 4-5, 4-6, 5-6:
df2 = structure(list(C1 = c(1, 2, 3, 4, 4, 5, 6,7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 15, 15), C2 = c(3,2,3,3,2,3.5,2,3,2,3,3,2,3,2,2,2,3,3,2,3), C3 = c(6.5,5,6.5,3,4,6,5,7,4,3,6,5,3,4,5,4,3,6,4,3)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
where 3 in the first column is the new observation created by intersecting the 2 former.
I though I could use pmin in each row but it does not work. Can somenone tackle this?
I am not sure if the code is the thing you want, where cummin() is used
df2 <- cbind(df[1],cummin(df[-1]))
> df2
C1 C2 C3
1 1 3.5 6.5
2 2 3.0 6.5
3 2 2.5 6.5
4 3 2.0 5.0
5 4 2.0 5.0
6 5 2.0 4.0
7 5 2.0 3.0
8 6 2.0 3.0
DATA
df <- structure(list(C1 = c(1, 2, 2, 3, 4, 5, 5, 6), C2 = c(3.5, 3,
2.5, 2, 3, 2, 3, 5), C3 = c(6.5, 8, 9, 5, 7, 4, 3, 6)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
I have a data.table xSet with multiple columns. I need a new table with a moving 4 row average for each column individually.
We could use rollapplyr from zoo
library(zoo)
library(dplyr)
df1 %>%
mutate_all(funs(New = rollapplyr(., FUN = mean, width = 4, partial = TRUE)))
Or similar option with data.table
library(data.table)
setDT(df1)[, paste0("New", names(df1)) := lapply(.SD,
function(x) rollapplyr(x, FUN = mean, width = 4, partial = TRUE))]
data
set.seed(24)
df1 <- as.data.frame(matrix(sample(0:9, 3 * 15, replace = TRUE),
ncol = 3, dimnames = list(NULL, paste0("Col", 1:3))))
The answers by akrun and G. Grothendieck call the rollapplr() function which uses a right aligned window by default.
But this is in contrast to the definition the OP has shown in the image.
This can be visualised by creating some suitable input data and by using toString() instead of mean() as aggregation function:
library(data.table)
# create suitable input data
DT <- data.table(col1 = 1:15, col2 = 21:35, col3 = 41:55)
DT[, cbind(.SD, New = zoo::rollapplyr(.SD, 4, toString, partial = TRUE))]
col1 col2 col3 New.col1 New.col2 New.col3
1: 1 21 41 1 21 41
2: 2 22 42 1, 2 21, 22 41, 42
3: 3 23 43 1, 2, 3 21, 22, 23 41, 42, 43
4: 4 24 44 1, 2, 3, 4 21, 22, 23, 24 41, 42, 43, 44
5: 5 25 45 2, 3, 4, 5 22, 23, 24, 25 42, 43, 44, 45
6: 6 26 46 3, 4, 5, 6 23, 24, 25, 26 43, 44, 45, 46
7: 7 27 47 4, 5, 6, 7 24, 25, 26, 27 44, 45, 46, 47
8: 8 28 48 5, 6, 7, 8 25, 26, 27, 28 45, 46, 47, 48
9: 9 29 49 6, 7, 8, 9 26, 27, 28, 29 46, 47, 48, 49
10: 10 30 50 7, 8, 9, 10 27, 28, 29, 30 47, 48, 49, 50
11: 11 31 51 8, 9, 10, 11 28, 29, 30, 31 48, 49, 50, 51
12: 12 32 52 9, 10, 11, 12 29, 30, 31, 32 49, 50, 51, 52
13: 13 33 53 10, 11, 12, 13 30, 31, 32, 33 50, 51, 52, 53
14: 14 34 54 11, 12, 13, 14 31, 32, 33, 34 51, 52, 53, 54
15: 15 35 55 12, 13, 14, 15 32, 33, 34, 35 52, 53, 54, 55
col1 is equal to the row numbers, New.col1 shows the row indices which are being involved in computing rollapplyr().
Compared to OP's image, only rows 1 and 2 do match. Apparently, a right aligned window does not meet OP's definition.
We can compare OP's requirement with the other alignment options for rolling windows:
DT <- data.table(col1 = 1:15, col2 = 21:35, col3 = 41:55)
align_window <- c("center", "left", "right")
DT[, (align_window) := lapply(align_window,
function(x) zoo::rollapply(
col1, 4, toString, partial = TRUE, align = x))]
# add OP's definition from image
DT[1:2, OP := right][3, OP := toString(2:4)][4:15, OP := center][]
col1 col2 col3 center left right OP
1: 1 21 41 1, 2, 3 1, 2, 3, 4 1 1
2: 2 22 42 1, 2, 3, 4 2, 3, 4, 5 1, 2 1, 2
3: 3 23 43 2, 3, 4, 5 3, 4, 5, 6 1, 2, 3 2, 3, 4
4: 4 24 44 3, 4, 5, 6 4, 5, 6, 7 1, 2, 3, 4 3, 4, 5, 6
5: 5 25 45 4, 5, 6, 7 5, 6, 7, 8 2, 3, 4, 5 4, 5, 6, 7
6: 6 26 46 5, 6, 7, 8 6, 7, 8, 9 3, 4, 5, 6 5, 6, 7, 8
7: 7 27 47 6, 7, 8, 9 7, 8, 9, 10 4, 5, 6, 7 6, 7, 8, 9
8: 8 28 48 7, 8, 9, 10 8, 9, 10, 11 5, 6, 7, 8 7, 8, 9, 10
9: 9 29 49 8, 9, 10, 11 9, 10, 11, 12 6, 7, 8, 9 8, 9, 10, 11
10: 10 30 50 9, 10, 11, 12 10, 11, 12, 13 7, 8, 9, 10 9, 10, 11, 12
11: 11 31 51 10, 11, 12, 13 11, 12, 13, 14 8, 9, 10, 11 10, 11, 12, 13
12: 12 32 52 11, 12, 13, 14 12, 13, 14, 15 9, 10, 11, 12 11, 12, 13, 14
13: 13 33 53 12, 13, 14, 15 13, 14, 15 10, 11, 12, 13 12, 13, 14, 15
14: 14 34 54 13, 14, 15 14, 15 11, 12, 13, 14 13, 14, 15
15: 15 35 55 14, 15 15 12, 13, 14, 15 14, 15
None of the alignment options does completely meet OP's definition. "center" is the best match except for the first 3 rows.