Related
I want to combine columns of a list into a single dataframe, however, some lists are of different lengths. The maximum length is 17, and I've thought of a way around this and that's by creating a new row to match the maximum length number for column concatenation.
If row layers do not match in length, then fill the missing value between 1 and 17, and replace the values column enc_ with the number 0.
Here's a sample of the dataset:
[[1]]
layer pland_01_evergreen_needleleaf
1 1 0.016832782
2 2 0.024552628
3 3 0.024377985
4 4 0.009584417
5 5 0.013569500
6 6 0.021745836
7 7 0.024301743
8 8 0.028323187
9 9 0.029710995
10 10 0.020706332
11 11 0.025760934
12 12 0.025148797
13 13 0.028520806
14 14 0.021327549
15 15 0.024794668
16 16 0.027986949
17 17 0.022970945
[[2]]
layer pland_02_evergreen_broadleaf
1 7 0.02329869
2 11 0.02910651
3 12 0.04234851
4 13 0.02788104
5 14 0.01899742
6 15 0.02639924
7 16 0.02601143
8 17 0.03166427
My expected output:
[[1]]
layer pland_01_evergreen_needleleaf pland_02_evergreen_broadleaf
1 1 0.016832782 0
2 2 0.024552628 0
3 3 0.024377985 0
4 4 0.009584417 0
5 5 0.013569500 0
6 6 0.021745836 0
7 7 0.024301743 0.02329869
8 8 0.028323187 0
9 9 0.029710995 0
10 10 0.020706332 0
11 11 0.025760934 0.02910651
12 12 0.025148797 0.04234851
13 13 0.028520806 0.02788104
14 14 0.021327549 0.01899742
15 15 0.024794668 0.02639924
16 16 0.027986949 0.02601143
17 17 0.022970945 0.03166427
I have tried:
do.call(plyr::rbind.fill, test.enc)
Though, it does not replace the rows and just fills values in columns with NA's.
Reproducible code:
test.enc <- list(structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17), pland_01_evergreen_needleleaf = c(0.0168327818172984,
0.0245526278078456, 0.0243779845525292, 0.00958441728108318,
0.0135694997972973, 0.0217458355, 0.0243017425347303, 0.0283231869863014,
0.0297109945836134, 0.0207063315181945, 0.0257609335769293, 0.0251487967356828,
0.0285208063526021, 0.0213275492944468, 0.0247946677520666, 0.0279869491599538,
0.0229709450323356)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(7, 11, 12, 13, 14, 15, 16, 17),
pland_02_evergreen_broadleaf = c(0.0232986892474108,
0.029106514197793, 0.0423485148880614, 0.0278810399372792,
0.0189974225113402, 0.0263992402670516, 0.0260114284210526,
0.0316642657775499)), row.names = c(NA, -8L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17), pland_03_deciduous_needleleaf = c(0.0224730632077946,
0.0272254714759945, 0.0179234332099727, 0.0233360434693878,
0.0289772211061947, 0.0279319832599034, 0.0240684032409326,
0.0193554670384615, 0.0279649463078261, 0.0269396070886525,
0.0185719102763596, 0.018542528637931, 0.012709947072028,
0.04239139)), row.names = c(NA, -14L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_04_deciduous_broadleaf = c(0.0237555990295715,
0.0250673634976813, 0.0215182227341075, 0.00714736670909091,
0.0290969429050279, 0.0267860332636672, 0.0270534621613419,
0.026721714630264, 0.0238709596184027, 0.0249074332489268,
0.0304618992970835, 0.0260209517100003, 0.015865886959611,
0.0243338004003074, 0.0201179804026253, 0.0332228978795843
)), row.names = c(NA, -16L), class = "data.frame"), structure(list(
layer = c(1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17), pland_05_mixed_forest = c(0.0205357761652226,
0.0241299700965417, 0.0225027270827694, 0.00985684546268657,
0.0311072087096774, 0.0252826755994332, 0.0271736973582555,
0.0283303792425047, 0.0229465085587453, 0.0262387189000513,
0.0349808141373789, 0.0269785067137574, 0.0178032039611502,
0.0251414066142756, 0.0237955553523809, 0.0349799640745083
)), row.names = c(NA, -16L), class = "data.frame"), structure(list(
layer = c(3, 5, 6, 7, 10, 11, 13, 14, 15, 16, 17), pland_06_closed_shrubland = c(0.005861055,
0.0247702364814815, 0.0217156349945235, 0.0266147094731707,
0.0273557187764706, 0.02247895109375, 0.0314803993053339,
0.0199688156521739, 0.0250040668072976, 0.024064520016,
0.0289086554672578)), row.names = c(NA, -11L), class = "data.frame"),
structure(list(layer = c(1, 2, 5, 6, 7, 10, 13, 15, 16, 17
), pland_07_open_shrubland = c(0.0239835098420742, 0.0196024526993901,
0.0275470745648515, 0.0205289891038188, 0.0252871031854839,
0.0225145242857143, 0.0277447744846797, 0.0273150363541667,
0.0372795540909091, 0.0258269711946903)), row.names = c(NA,
-10L), class = "data.frame"), structure(list(layer = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17),
pland_08_woody_savanna = c(0.0234895073226773, 0.0254242177795502,
0.0222844341348828, 0.010322404308595, 0.0115202866290984,
0.022858064298995, 0.0261324981159272, 0.0269339113300467,
0.0272905667936239, 0.0243445938197004, 0.0263085547098274,
0.031577225982848, 0.027366790080755, 0.0170917603078201,
0.0245166202483043, 0.0230437328068511, 0.0302480713824274
)), row.names = c(NA, -17L), class = "data.frame"), structure(list(
layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17), pland_09_savanna = c(0.024511496338631,
0.0263438531740197, 0.0230784856467449, 0.0103841481938194,
0.0112631119225057, 0.0218656878147517, 0.0263293450194207,
0.0272377655722272, 0.0277590005710358, 0.0248185191981168,
0.0264710300465011, 0.0311785029047626, 0.027764701873438,
0.018296641767007, 0.0243240673465086, 0.0269793925823536,
0.0261431798468939)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_10_grassland = c(0.0241048000322165,
0.0257675668336232, 0.0223383845545, 0.0189068612261722,
0.0261390898788855, 0.0261454176785369, 0.0262590636755884,
0.0273476886308152, 0.0282016510452861, 0.0249749584240885,
0.0269017127896855, 0.0309276372122874, 0.0280081024050942,
0.0171571967814629, 0.024706397187938, 0.0229732030207295,
0.0271717635000233)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_11_wetland = c(0.0261045398315745,
0.0270077896857178, 0.0228967718773374, 0.0199122837701645,
0.0227976864969644, 0.0275306004374101, 0.0271334525693991,
0.0285065610334257, 0.0281986960454696, 0.0235630515843985,
0.0235566291662858, 0.0272662707441063, 0.0242547847851237,
0.020220947639907, 0.0229653844016148, 0.0189523223219292,
0.016330738598504)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_12_cropland = c(0.0247481645364914,
0.0269929124824351, 0.0233212451104437, 0.0209935752243073,
0.027662987546265, 0.0267526016850953, 0.0264659030703554,
0.0276911097027454, 0.027704723980107, 0.0258298011360007,
0.0293761963259958, 0.0304401704151498, 0.0297272977127787,
0.0191320152910558, 0.022300483848187, 0.0310418860633282,
0.0194552407910497)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_13_urban = c(0.0250541999489398,
0.0249789151674128, 0.0219325183761915, 0.0174050192638298,
0.0198481538465096, 0.0273040101927991, 0.0261348274108392,
0.0274315478205557, 0.0284040130969821, 0.0255357946798584,
0.0276680704963855, 0.0283009734389356, 0.0273947664869961,
0.0191846595896345, 0.0225736950645381, 0.0185572109335283,
0.0266912368721673)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_14_mosiac = c(0.0244553100335083,
0.0265608905797148, 0.0230754220937747, 0.0126737591788462,
0.0208868797777778, 0.0264543431506849, 0.0271490616452074,
0.0275509256793189, 0.0274870231454383, 0.0260302106124036,
0.0294514198552019, 0.0317358807321971, 0.0303629153539886,
0.0191054718841496, 0.0221332367959672, 0.0332987653767865,
0.0153846531471452)), row.names = c(NA, -17L), class = "data.frame"),
structure(list(layer = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17), pland_15_barren = c(0.0254695416164035,
0.0260217783555025, 0.0278294141356033, 0.022098210265976,
0.0232223153248193, 0.0277460892260692, 0.0280945051729643,
0.0308188510180505, 0.0283990843854084, 0.0282966180792079,
0.0292701060708535, 0.02484902225, 0.0202313840629426, 0.02730348265625,
0.0252544010927835, 0.012387523087037, 0.0243783162068618
)), row.names = c(NA, -17L), class = "data.frame"))
You can use mergeand as you have many columns to be added you can call it using Reduce.
Reduce(function(x,y) merge(x,y, all=TRUE), test.enc)
# layer pland_01_evergreen_needleleaf pland_02_evergreen_broadleaf pland_03_deciduous_needleleaf pland_04_deciduous_broadleaf pland_05_mixed_forest pland_06_closed_shrubland pland_07_open_shrubland pland_08_woody_savanna pland_09_savanna pland_10_grassland pland_11_wetland pland_12_cropland pland_13_urban pland_14_mosiac pland_15_barren
#1 1 0.016832782 NA 0.02247306 0.023755599 0.020535776 NA 0.02398351 0.02348951 0.02451150 0.02410480 0.02610454 0.02474816 0.02505420 0.02445531 0.02546954
#2 2 0.024552628 NA 0.02722547 0.025067363 0.024129970 NA 0.01960245 0.02542422 0.02634385 0.02576757 0.02700779 0.02699291 0.02497892 0.02656089 0.02602178
#3 3 0.024377985 NA 0.01792343 0.021518223 0.022502727 0.005861055 NA 0.02228443 0.02307849 0.02233838 0.02289677 0.02332125 0.02193252 0.02307542 0.02782941
#4 4 0.009584417 NA NA 0.007147367 0.009856845 NA NA 0.01032240 0.01038415 0.01890686 0.01991228 0.02099358 0.01740502 0.01267376 0.02209821
#5 5 0.013569500 NA NA NA NA 0.024770236 0.02754707 0.01152029 0.01126311 0.02613909 0.02279769 0.02766299 0.01984815 0.02088688 0.02322232
#6 6 0.021745836 NA NA 0.029096943 0.031107209 0.021715635 0.02052899 0.02285806 0.02186569 0.02614542 0.02753060 0.02675260 0.02730401 0.02645434 0.02774609
#7 7 0.024301743 0.02329869 0.02333604 0.026786033 0.025282676 0.026614709 0.02528710 0.02613250 0.02632935 0.02625906 0.02713345 0.02646590 0.02613483 0.02714906 0.02809451
#8 8 0.028323187 NA 0.02897722 0.027053462 0.027173697 NA NA 0.02693391 0.02723777 0.02734769 0.02850656 0.02769111 0.02743155 0.02755093 0.03081885
#9 9 0.029710995 NA 0.02793198 0.026721715 0.028330379 NA NA 0.02729057 0.02775900 0.02820165 0.02819870 0.02770472 0.02840401 0.02748702 0.02839908
#10 10 0.020706332 NA 0.02406840 0.023870960 0.022946509 0.027355719 0.02251452 0.02434459 0.02481852 0.02497496 0.02356305 0.02582980 0.02553579 0.02603021 0.02829662
#11 11 0.025760934 0.02910651 0.01935547 0.024907433 0.026238719 0.022478951 NA 0.02630855 0.02647103 0.02690171 0.02355663 0.02937620 0.02766807 0.02945142 0.02927011
#12 12 0.025148797 0.04234851 0.02796495 0.030461899 0.034980814 NA NA 0.03157723 0.03117850 0.03092764 0.02726627 0.03044017 0.02830097 0.03173588 0.02484902
#13 13 0.028520806 0.02788104 0.02693961 0.026020952 0.026978507 0.031480399 0.02774477 0.02736679 0.02776470 0.02800810 0.02425478 0.02972730 0.02739477 0.03036292 0.02023138
#14 14 0.021327549 0.01899742 0.01857191 0.015865887 0.017803204 0.019968816 NA 0.01709176 0.01829664 0.01715720 0.02022095 0.01913202 0.01918466 0.01910547 0.02730348
#15 15 0.024794668 0.02639924 0.01854253 0.024333800 0.025141407 0.025004067 0.02731504 0.02451662 0.02432407 0.02470640 0.02296538 0.02230048 0.02257370 0.02213324 0.02525440
#16 16 0.027986949 0.02601143 0.01270995 0.020117980 0.023795555 0.024064520 0.03727955 0.02304373 0.02697939 0.02297320 0.01895232 0.03104189 0.01855721 0.03329877 0.01238752
#17 17 0.022970945 0.03166427 0.04239139 0.033222898 0.034979964 0.028908655 0.02582697 0.03024807 0.02614318 0.02717176 0.01633074 0.01945524 0.02669124 0.01538465 0.02437832
I have a dataframe that looks like this
> head(printing_id_map_unique_frames)
# A tibble: 6 x 5
# Groups: frame_number [6]
X1 X2 X3 row_in_frame frame_number
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 1
2 1 2 3 15 2
3 1 2 3 15 3
4 1 2 3 15 4
5 1 2 3 15 5
6 1 2 3 15 6
As you can see, X1,X2,X3, row_in_frame is identical
However, eventually you get to a
X1 X2 X3 row_in_frame frame_number
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 32
2 1 2 3 15 33
3 1 2 3 5 34**
4 1 4 5 15 35
5 1 4 5 15 36
What I would like to do is essentially compute a dataframe that looks like:
X1 X2 X3 row_in_frame num_duplicates
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 33
2 1 2 3 5 1
...
Essentially, what I want is to "collapse" over identical first 4 columns and count how many rows of that type there are in the "num_duplicates" column.
Is there a nice way to do this in dplyr without a messy for loop that tracks a count and if there is a change.
Below please find a full data structure via dput:
> dput(printing_id_map_unique_frames)
structure(list(X1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), X2 = c(2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
), X3 = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5), row_in_frame = c(15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 5, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 5
), frame_number = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68)), row.names = c(NA, -68L), class = c("tbl_df",
"tbl", "data.frame"))
Here is one option with count
library(dplyr) # 1.0.0
df1 %>%
count(!!! rlang::syms(names(.)[1:4]))
Or specify the unquoted column names
df1 %>%
count(X1, X2, X3, row_in_frame)
If we don't want to change the order, an option is to convert the first 4 columns to factor with levels specified as the unique values (which is the same as the order of occurrence of values) and then apply the count
df1 %>%
mutate(across(1:4, ~ factor(.x, levels = unique(.x)))) %>%
count(!!! rlang::syms(names(.)[1:4])) %>%
type.convert(as.is = TRUE)
# A tibble: 4 x 5
# X1 X2 X3 row_in_frame n
# <int> <int> <int> <int> <int>
#1 1 2 3 15 33
#2 1 2 3 5 1
#3 1 4 5 15 33
#4 1 4 5 5 1
This question already has answers here:
How collect additional row data on binned data in R
(1 answer)
Group value in range r
(3 answers)
Closed 3 years ago.
I am doing a statistic analysis in a big data frame (more than 48.000.000 rows) in r. Here is an exemple of the data:
structure(list(herd = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), cows = c(1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16), `date` = c("11/03/2013",
"12/03/2013", "13/03/2013", "14/03/2013", "15/03/2013", "16/03/2013",
"13/05/2012", "14/05/2012", "15/05/2012", "16/05/2012", "17/05/2012",
"18/05/2012", "10/07/2016", "11/07/2016", "12/07/2016", "13/07/2016",
"11/03/2013", "12/03/2013", "13/03/2013", "14/03/2013", "15/03/2013",
"16/03/2013", "13/05/2012", "14/05/2012", "15/05/2012", "16/05/2012",
"17/05/2012", "18/05/2012", "10/07/2016", "11/07/2016", "12/07/2016",
"13/07/2016", "11/03/2013", "12/03/2013", "13/03/2013", "14/03/2013",
"15/03/2013", "16/03/2013", "13/05/2012", "14/05/2012", "15/05/2012",
"16/05/2012", "17/05/2012", "18/05/2012", "10/07/2016", "11/07/2016",
"12/07/2016", "13/07/2016"), glicose = c(240666, 23457789, 45688688,
679, 76564, 6574553, 78654, 546432, 76455643, 6876, 7645432,
876875, 98654, 453437, 98676, 9887554, 76543, 9775643, 986545,
240666, 23457789, 45688688, 679, 76564, 6574553, 78654, 546432,
76455643, 6876, 7645432, 876875, 98654, 453437, 98676, 9887554,
76543, 9775643, 986545, 240666, 23457789, 45688688, 679, 76564,
6574553, 78654, 546432, 76455643, 6876)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -48L))
I need to identify how many cows are in the following category of glicose by herd and by date:
<=100000
100000 and <=150000
150000 and <=200000
200000 and <=250000
250000 and <=400000
>400000
I tried to use the functions filter() and select() but could not categorize the variable like that.
I tried either to make a vector for each category but it did not work:
ht <- df %>% group_by(herd, date) %>%
filter(glicose < 100000)
Actually I do not have a clue of how I could do this. Please help!
I expect to get the number of cows in each category of each herd based on each date in a table like this:
Calling your data df,
df %>%
mutate(glicose_group = cut(glicose, breaks = c(0, seq(1e5, 2.5e5, by = 0.5e5), 4e5, Inf)),
date = as.Date(date, format = "%d/%m/%Y")) %>%
group_by(herd, date, glicose_group) %>%
count
# # A tibble: 48 x 4
# # Groups: herd, date, glicose_group [48]
# herd date glicose_group n
# <dbl> <date> <fct> <int>
# 1 1 2012-05-13 (0,1e+05] 1
# 2 1 2012-05-14 (4e+05,Inf] 1
# 3 1 2012-05-15 (4e+05,Inf] 1
# 4 1 2012-05-16 (0,1e+05] 1
# 5 1 2012-05-17 (4e+05,Inf] 1
# 6 1 2012-05-18 (4e+05,Inf] 1
# 7 1 2013-03-11 (2e+05,2.5e+05] 1
# 8 1 2013-03-12 (4e+05,Inf] 1
# 9 1 2013-03-13 (4e+05,Inf] 1
# 10 1 2013-03-14 (0,1e+05] 1
# # ... with 38 more rows
I also threw in a conversion to Date class, which is probably a good idea.
This is my list:
mylist=list(list(a = c(2, 3, 4, 5), b = c(3, 4, 5, 5), c = c(3, 7, 5,
5), d = c(3, 4, 9, 5), e = c(3, 4, 5, 9), f = c(3, 4, 1, 9),
g = c(3, 1, 5, 9), h = c(3, 3, 5, 9), i = c(3, 17, 3, 9),
j = c(3, 17, 3, 9)), list(a = c(2, 5, 48, 4), b = c(7, 4,
5, 5), c = c(3, 7, 35, 5), d = c(3, 843, 9, 5), e = c(3, 43,
5, 9), f = c(3, 4, 31, 39), g = c(3, 1, 5, 9), h = c(3, 3, 5,
9), i = c(3, 17, 3, 9), j = c(3, 17, 3, 9)), list(a = c(2, 3,
4, 35), b = c(3, 34, 5, 5), c = c(3, 37, 5, 5), d = c(38, 4,
39, 5), e = c(3, 34, 5, 9), f = c(33, 4, 1, 9), g = c(3, 1, 5,
9), h = c(3, 3, 35, 9), i = c(3, 17, 33, 9), j = c(3, 137, 3,
9)), list(a = c(23, 3, 4, 85), b = c(3, 4, 53, 5), c = c(3, 7,
5, 5), d = c(3, 4, 9, 5), e = c(3, 4, 5, 9), f = c(3, 34, 1,
9), g = c(38, 1, 5, 9), h = c(3, 3, 5, 9), i = c(3, 137, 3, 9
), j = c(3, 17, 3, 9)), list(a = c(2, 3, 48, 5), b = c(3, 4,
5, 53), c = c(3, 73, 53, 5), d = c(3, 43, 9, 5), e = c(33, 4,
5, 9), f = c(33, 4, 13, 9), g = c(3, 81, 5, 9), h = c(3, 3, 5,
9), i = c(3, 137, 3, 9), j = c(3, 173, 3, 9)))
As you can see my list has 5 entries. Each entry has 10 others entries filled by 4 elements.
> mylist[[4]][[1]]
[1] 23 3 4 85
I want to create another list with only one entry.
All want to put all entr of tipe mylist[[i]][[1]] in first position of a new list: mynewlist[[1]][[1]] will be filled by the mylist[[1]][[1]],mylist[[2]][[1]],mylist[[3]][[1]],mylist[[4]][[1]],mylist[[5]][[1]] elements.
The secon position of mynewlist (mynewlist[[2]][[1]]) will be: mylist[[1]][[2]],mylist[[2]][[2]],mylist[[3]][[2]],mylist[[4]][[2]],mylist[[5]][[2]] elements.
Until
The fith position of mynewlist (mynewlist[[5]][[1]]) will be: mylist[[1]][[5]],mylist[[2]][[5]],mylist[[3]][[5]],mylist[[4]][[5]],mylist[[5]][[5]] elements.
In other words, I want to put every mylist[[i]][[1]]$a in the mynewlist[[1]][[1]] position; the mylist[[i]][[1]]$b in the mynewlist[[1]][[2]] position and so on until mylist[[i]][[1]]$j in the mynewlist[[1]][[10]]
This should be my output for the first position of mynewlist:
#[[1]]
#[1] 2 3 4 5
2 5 48 4
2 3 4 35
23 3 4 85
2 3 48 5
Any help?
We can use transpose
library(dplyr)
out <- mylist %>%
purrr::transpose(.)
out[[1]]
#[[1]]
#[1] 2 3 4 5
#[[2]]
#[1] 2 5 48 4
#[[3]]
#[1] 2 3 4 35
#[[4]]
#[1] 23 3 4 85
#[[5]]
#[1] 2 3 48 5
I have a data.table xSet with multiple columns. I need a new table with a moving 4 row average for each column individually.
We could use rollapplyr from zoo
library(zoo)
library(dplyr)
df1 %>%
mutate_all(funs(New = rollapplyr(., FUN = mean, width = 4, partial = TRUE)))
Or similar option with data.table
library(data.table)
setDT(df1)[, paste0("New", names(df1)) := lapply(.SD,
function(x) rollapplyr(x, FUN = mean, width = 4, partial = TRUE))]
data
set.seed(24)
df1 <- as.data.frame(matrix(sample(0:9, 3 * 15, replace = TRUE),
ncol = 3, dimnames = list(NULL, paste0("Col", 1:3))))
The answers by akrun and G. Grothendieck call the rollapplr() function which uses a right aligned window by default.
But this is in contrast to the definition the OP has shown in the image.
This can be visualised by creating some suitable input data and by using toString() instead of mean() as aggregation function:
library(data.table)
# create suitable input data
DT <- data.table(col1 = 1:15, col2 = 21:35, col3 = 41:55)
DT[, cbind(.SD, New = zoo::rollapplyr(.SD, 4, toString, partial = TRUE))]
col1 col2 col3 New.col1 New.col2 New.col3
1: 1 21 41 1 21 41
2: 2 22 42 1, 2 21, 22 41, 42
3: 3 23 43 1, 2, 3 21, 22, 23 41, 42, 43
4: 4 24 44 1, 2, 3, 4 21, 22, 23, 24 41, 42, 43, 44
5: 5 25 45 2, 3, 4, 5 22, 23, 24, 25 42, 43, 44, 45
6: 6 26 46 3, 4, 5, 6 23, 24, 25, 26 43, 44, 45, 46
7: 7 27 47 4, 5, 6, 7 24, 25, 26, 27 44, 45, 46, 47
8: 8 28 48 5, 6, 7, 8 25, 26, 27, 28 45, 46, 47, 48
9: 9 29 49 6, 7, 8, 9 26, 27, 28, 29 46, 47, 48, 49
10: 10 30 50 7, 8, 9, 10 27, 28, 29, 30 47, 48, 49, 50
11: 11 31 51 8, 9, 10, 11 28, 29, 30, 31 48, 49, 50, 51
12: 12 32 52 9, 10, 11, 12 29, 30, 31, 32 49, 50, 51, 52
13: 13 33 53 10, 11, 12, 13 30, 31, 32, 33 50, 51, 52, 53
14: 14 34 54 11, 12, 13, 14 31, 32, 33, 34 51, 52, 53, 54
15: 15 35 55 12, 13, 14, 15 32, 33, 34, 35 52, 53, 54, 55
col1 is equal to the row numbers, New.col1 shows the row indices which are being involved in computing rollapplyr().
Compared to OP's image, only rows 1 and 2 do match. Apparently, a right aligned window does not meet OP's definition.
We can compare OP's requirement with the other alignment options for rolling windows:
DT <- data.table(col1 = 1:15, col2 = 21:35, col3 = 41:55)
align_window <- c("center", "left", "right")
DT[, (align_window) := lapply(align_window,
function(x) zoo::rollapply(
col1, 4, toString, partial = TRUE, align = x))]
# add OP's definition from image
DT[1:2, OP := right][3, OP := toString(2:4)][4:15, OP := center][]
col1 col2 col3 center left right OP
1: 1 21 41 1, 2, 3 1, 2, 3, 4 1 1
2: 2 22 42 1, 2, 3, 4 2, 3, 4, 5 1, 2 1, 2
3: 3 23 43 2, 3, 4, 5 3, 4, 5, 6 1, 2, 3 2, 3, 4
4: 4 24 44 3, 4, 5, 6 4, 5, 6, 7 1, 2, 3, 4 3, 4, 5, 6
5: 5 25 45 4, 5, 6, 7 5, 6, 7, 8 2, 3, 4, 5 4, 5, 6, 7
6: 6 26 46 5, 6, 7, 8 6, 7, 8, 9 3, 4, 5, 6 5, 6, 7, 8
7: 7 27 47 6, 7, 8, 9 7, 8, 9, 10 4, 5, 6, 7 6, 7, 8, 9
8: 8 28 48 7, 8, 9, 10 8, 9, 10, 11 5, 6, 7, 8 7, 8, 9, 10
9: 9 29 49 8, 9, 10, 11 9, 10, 11, 12 6, 7, 8, 9 8, 9, 10, 11
10: 10 30 50 9, 10, 11, 12 10, 11, 12, 13 7, 8, 9, 10 9, 10, 11, 12
11: 11 31 51 10, 11, 12, 13 11, 12, 13, 14 8, 9, 10, 11 10, 11, 12, 13
12: 12 32 52 11, 12, 13, 14 12, 13, 14, 15 9, 10, 11, 12 11, 12, 13, 14
13: 13 33 53 12, 13, 14, 15 13, 14, 15 10, 11, 12, 13 12, 13, 14, 15
14: 14 34 54 13, 14, 15 14, 15 11, 12, 13, 14 13, 14, 15
15: 15 35 55 14, 15 15 12, 13, 14, 15 14, 15
None of the alignment options does completely meet OP's definition. "center" is the best match except for the first 3 rows.