Filter days based on groupby - r

I have a df and I want to filter out a column based on a grouping. I want to keep group by combinations ((cc, odd, tree1, and tree2) if day > 4, then keep it, otherwise drop it
df <- data_frame(
cc = c('BB', 'BB', 'BB', 'BB','BB', 'BB','BB', 'BB', 'DD', 'DD', 'DD', 'DD', 'DD', 'DD', 'DD', 'DD',
'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ'),
odd = c(3434, 3434, 3434, 3434, 3435, 3435, 3435, 3435, 3434, 3434, 3434, 3434, 3435, 3435, 3435, 3435, 3434, 3434, 3434, 3434, 3435, 3435, 3435, 3435),
tree1 = c('ASP', 'ASP', 'ASP', 'ASP', 'SAP', 'SAP', 'SAP', 'SAP', 'ASP', 'ASP', 'ASP', 'ASP', 'SAP', 'SAP', 'SAP', 'SAP', 'ASP', 'ASP', 'ASP', 'ASP', 'SAP', 'SAP', 'SAP', 'SAP'),
tree2 = c('ATK', 'ATK','ATK','ATK','ATK','ATK','ATK','ATK', 'ATK', 'ATK','ATK','ATK','ATK','ATK','ATK','ATK', 'ATK', 'ATK','ATK','ATK','ATK','ATK','ATK','ATK'),
day = c(1, 2, 3, 4, 3, 4, 5, 6, 2, 3, 4, 5, 1, 3, 5, 7, 1, 2, 6, 8, 2, 4, 6, 8)
)
I tried this but this drops any row with day value smaller than 4
df1 <- df %>%
arrange(cc, odd, tree1, tree2, day) %>%
group_by(cc, odd, tree1, tree2) %>%
filter(day > 4)
I would like to get a df as below.
df2 <- data_frame(
cc = c('BB', 'BB','BB', 'BB', 'DD', 'DD', 'DD', 'DD', 'DD', 'DD', 'DD', 'DD',
'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ', 'ZZ'),
odd = c(3435, 3435, 3435, 3435, 3434, 3434, 3434, 3434, 3435, 3435, 3435, 3435, 3434, 3434, 3434, 3434, 3435, 3435, 3435, 3435),
tree1 = c('SAP', 'SAP', 'SAP', 'SAP', 'ASP', 'ASP', 'ASP', 'ASP', 'SAP', 'SAP', 'SAP', 'SAP', 'ASP', 'ASP', 'ASP', 'ASP', 'SAP', 'SAP', 'SAP', 'SAP'),
tree2 = c('ATK','ATK','ATK','ATK', 'ATK', 'ATK','ATK','ATK','ATK','ATK','ATK','ATK', 'ATK', 'ATK','ATK','ATK','ATK','ATK','ATK','ATK'),
day = c(3, 4, 5, 6, 2, 3, 4, 5, 1, 3, 5, 7, 1, 2, 6, 8, 2, 4, 6, 8)
)

You can try
df %>%
group_by(cc, odd, tree1, tree2) %>%
filter(any(day > 4))
# A tibble: 20 x 5
cc odd tree1 tree2 day
<chr> <dbl> <chr> <chr> <dbl>
1 BB 3435 SAP ATK 3
2 BB 3435 SAP ATK 4
3 BB 3435 SAP ATK 5
4 BB 3435 SAP ATK 6
5 DD 3434 ASP ATK 2
6 DD 3434 ASP ATK 3
7 DD 3434 ASP ATK 4
8 DD 3434 ASP ATK 5
9 DD 3435 SAP ATK 1
10 DD 3435 SAP ATK 3
11 DD 3435 SAP ATK 5
12 DD 3435 SAP ATK 7
13 ZZ 3434 ASP ATK 1
14 ZZ 3434 ASP ATK 2
15 ZZ 3434 ASP ATK 6
16 ZZ 3434 ASP ATK 8
17 ZZ 3435 SAP ATK 2
18 ZZ 3435 SAP ATK 4
19 ZZ 3435 SAP ATK 6
20 ZZ 3435 SAP ATK 8

Related

How to solve a barplot with myltiple error lines using ggplot?

I am having some issues plotting this data. I try to replicate the code in enter link description here to process the data.
Treatment TIME N len sd se
1 M1 4 44,025 2,35990819 1,179954095
1 M1 4 43,45 2,653927907 1,326963953
1 M1 4 39,825 2,681262141 1,34063107
1 M1 4 43,975 5,341894171 2,670947085
1 M1 4 41,375 3,096637962 1,548318981
1 M1 4 43,425 2,547384279 1,27369214
1 M1 4 39,45 2,598076211 1,299038106
1 M1 4 41,05 3,511409973 1,755704987
1 M1 4 40,925 1,77270979 0,886354895
1 M1 4 40,075 3,237668915 1,618834457
1 M1 4 42,375 2,758471799 1,3792359
2 M1 4 40,975 4,560975773 2,280487886
2 M1 4 40,55 3,660145717 1,830072858
2 M1 4 36,975 3,358943286 1,679471643
2 M1 4 43,175 1,64797856 0,82398928
2 M1 4 36,45 5,453744891 2,726872445
2 M1 4 41,2 2,246478726 1,123239363
2 M1 4 42,7 4,48924641 2,244623205
2 M1 4 39,5 2,759226945 1,379613472
2 M1 4 44,375 6,335810919 3,167905459
2 M1 4 42,75 1,721433511 0,860716756
2 M1 4 40,85 1,707825128 0,853912564
3 M1 4 45,975 0,699404509 0,349702254
3 M1 4 44,2 3,03644529 1,518222645
3 M1 4 42,6 4,429446918 2,214723459
3 M1 4 45,55 5,269092268 2,634546134
3 M1 4 46,525 2,022168803 1,011084401
3 M1 4 45,675 3,597568623 1,798784312
3 M1 4 47,075 2,46221445 1,231107225
3 M1 4 47,3 0,783156008 0,391578004
3 M1 4 42,025 2,639917928 1,319958964
3 M1 4 49,05 5,382997926 2,691498963
3 M1 4 48,25 4,591659105 2,295829552
1 M2 4 216,5 5,066228051 2,533114026
1 M2 4 205,75 4,991659711 2,495829855
1 M2 4 210,75 11,8988795 5,94943975
1 M2 4 204,75 23,41473895 11,70736947
1 M2 4 198,75 6,396613687 3,198306844
1 M2 4 219,75 8,732124598 4,366062299
1 M2 4 195,75 16,56049516 8,280247581
1 M2 4 219,75 7,719024118 3,859512059
1 M2 4 197,5 5,259911279 2,62995564
1 M2 4 216,25 8,995369179 4,49768459
1 M2 4 212 12,4365054 6,218252702
2 M2 4 210,25 7,041543391 3,520771696
2 M2 4 214,25 16,31716887 8,158584436
2 M2 4 208,75 9,708243919 4,85412196
2 M2 4 220,75 16,17353806 8,086769029
2 M2 4 218 30,62678566 15,31339283
2 M2 4 234 40,02499219 20,0124961
2 M2 4 217,5 5,567764363 2,783882181
2 M2 4 214,25 12,28481447 6,142407237
2 M2 4 207 13,6381817 6,819090848
2 M2 4 210,25 8,578072822 4,289036411
2 M2 4 202,75 11,52894907 5,764474535
3 M2 4 98,75 19,92276755 9,961383773
3 M2 4 101,25 10,04572878 5,022864389
3 M2 4 96,75 14,43086969 7,215434845
3 M2 4 110,5 18,06469854 9,03234927
3 M2 4 102,25 4,031128874 2,015564437
3 M2 4 109 20,54263858 10,27131929
3 M2 4 114 14,49137675 7,245688373
3 M2 4 116,25 12,71154331 6,355771655
3 M2 4 90,75 61,74881915 30,87440958
3 M2 4 123,5 26,78930135 13,39465067
3 M2 4 132,75 27,54844218 13,77422109
1 M3 4 249,75 26,06881918 13,03440959
1 M3 4 268,75 21,8384218 10,9192109
1 M3 4 241,25 27,80137886 13,90068943
1 M3 4 232,25 26,107151 13,0535755
1 M3 4 271,5 20,63169083 10,31584542
1 M3 4 277,25 26,77529956 13,38764978
1 M3 4 242 12,75408431 6,377042157
1 M3 4 260 19,4422221 9,721111048
1 M3 4 256,25 23,8100119 11,90500595
1 M3 4 254 4,898979486 2,449489743
1 M3 4 250,25 13,72042273 6,860211367
2 M3 4 256,75 16,58061117 8,290305583
2 M3 4 264,5 26,71454036 13,35727018
2 M3 4 246,5 14,10673598 7,05336799
2 M3 4 266 17,64464036 8,822320179
2 M3 4 266,25 24,87803583 12,43901791
2 M3 4 266,75 16,17353806 8,086769029
2 M3 4 247,25 51,93184636 25,96592318
2 M3 4 258,25 37,93305507 18,96652753
2 M3 4 238,5 65,26612189 32,63306095
2 M3 4 260 19,8158186 9,907909298
2 M3 4 248,75 27,80137886 13,90068943
3 M3 4 108,75 26,65051594 13,32525797
3 M3 4 106,25 17,05627939 8,528139696
3 M3 4 109,25 17,93274472 8,966372362
3 M3 4 120,5 20,48576742 10,24288371
3 M3 4 107,25 2,5 1,25
3 M3 4 129 23,98610709 11,99305355
3 M3 4 131,5 15,75859554 7,879297769
3 M3 4 143,75 19,87251033 9,936255163
3 M3 4 117,75 80,267781 40,1338905
3 M3 4 139,5 36,24453982 18,12226991
3 M3 4 154,75 25,61737691 12,80868846
Here the example to be reproduced:
alt2 <- structure(list(Treatment = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3), measurement = c("D", "D", "D", "D", "D",
"D", "D", "D", "D", "D", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "D", "D", "D", "D",
"D", "D", "D", "D", "D", "D", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F"), B1 = c(20.56, 19.7, 22.9, 21.1, 20.11, 22.98,
19.17, 21.67, 21.56, 20.56, 28.91, 28.01, 28.45, 29.23, 28.34,
28.1, 29.03, 28.22, 29.36, 29.87, 22.56, 21.48, 17.63, 20.78,
24.79, 25, 24.67, 23.51, 19.47, 22.85, 27.98, 28.1, 28.2, 28.22,
28.15, 28.97, 29.43, 29.05, 29.37, 29.39, 25.3, 24.56, 22.76,
23.47, 22.73, 24.98, 20.56, 27.1, 25.87, 23.46, 29.03, 29.67,
29.56, 28.69, 28.93, 29.01, 29.73, 29.77, 28.79, 28.83), B2 = c(19.78,
20.98, 22.27, 21.68, 21.56, 24.86, 23.45, 24.61, 23.56, 21.46,
28.56, 28.74, 28.37, 29.04, 29.85, 28.15, 27.99, 29.88, 28.74,
28.57, 21.47, 20.48, 25.12, 21.13, 22.76, 18.48, 22.76, 23.91,
17.27, 24.26, 28.64, 28.73, 28.47, 28.38, 28.26, 28.88, 29.06,
29.28, 29.59, 29.64, 21.45, 22.56, 27.45, 23.11, 20.03, 20.9,
21.1, 25.02, 24.16, 22.71, 28.54, 27.09, 29.03, 29.47, 29.58,
29.38, 28.05, 29.74, 28.5, 27.3), B3 = c(20.24, 16.42, 23.51,
22.41, 21.63, 24.61, 24.11, 23.57, 18.31, 19.61, 28.27, 29.07,
26.98, 29.33, 28.19, 28.54, 29.08, 29.7, 29.59, 29.58, 17.9,
21.45, 20.56, 22.74, 23.59, 20.01, 21.17, 22.11, 24.14, 23.35,
28.16, 28.38, 28.47, 28.94, 28.46, 27.47, 26.45, 28.49, 29.05,
29.79, 20.98, 26.93, 20.75, 19.63, 24.72, 24.07, 17.26, 25.66,
21.23, 21.78, 29.79, 29.64, 29.57, 29.32, 29.48, 29.77, 29.05,
29.11, 28.97, 29.59), B4 = c(25.61, 20.12, 19.42, 22.67, 24.31,
23.12, 18.24, 17.24, 21.58, 22.48, 29.01, 29.7, 28.77, 28.59,
28.74, 28.49, 28.08, 28.39, 28.4, 28.67, 19.02, 18.65, 20.72,
21.61, 20.41, 22.01, 23.71, 20.05, 22.13, 20.1, 28.46, 28.47,
28.38, 29.06, 28.48, 28.73, 27.9, 29.59, 29.4, 28.38, 24.31,
19.09, 24.89, 24.64, 21.47, 25.04, 22.51, 21.1, 20.27, 23.64,
28.57, 28.08, 29.19, 29.61, 29.84, 28.07, 29.18, 29.59, 29.58,
28.22), N = c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-60L))
the code used is this:
library(ggplot2)
ggplot(alt2, aes(x = Treatment)) +
geom_bar(aes(y=len, fill=TIME),
stat = "identity", alpha = 0.5, position =
position_dodge()
) +
scale_fill_manual(values = c("grey", "black", "blue")) +
geom_errorbar(aes(ymin = len-se, ymax = len+se, group = TIME),
width = 0.2, colour = "black",
position = position_dodge(0.9)
) +
theme_light() +
xlab("doses") +
ylab("len")
I dont understand why i got that figure.
I am working with 3 doses(1,2,3) and in different times (M1,M2,M3).
But I am getting this figure with multiple error lines. How can I solve this ?.

the difference of multiple columns in R based in DATE [duplicate]

This question already has answers here:
subtract value from previous row by group
(3 answers)
Closed 1 year ago.
I have this data :
structure(list(new_col = c(1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5,
6, 7, 1, 2, 3, 4, 5, 6, 7), DATE = structure(c(1608249600, 1608249600,
1608249600, 1608249600, 1608249600, 1608249600, 1608249600, 1608336000,
1608336000, 1608336000, 1608336000, 1608336000, 1608336000, 1608336000,
1608422400, 1608422400, 1608422400, 1608422400, 1608422400, 1608422400,
1608422400), tzone = "UTC", class = c("POSIXct", "POSIXt")),
HOSP_COUNT = c(582, 931, 1472, 2175, 2791, 3024, 2310, 588,
932, 1477, 2186, 2810, 3051, 2330, 590, 932, 1479, 2188,
2817, 3060, 2335)), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame"))
HOPS_COUNT is a cumulative variable, while I need daily measure. Something like this :
structure(list(X1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21), new_col = c(1, 2, 3, 4,
5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7), DATE = c("12/18/2020",
"12/18/2020", "12/18/2020", "12/18/2020", "12/18/2020", "12/18/2020",
"12/18/2020", "12/19/2020", "12/19/2020", "12/19/2020", "12/19/2020",
"12/19/2020", "12/19/2020", "12/19/2020", "12/20/2020", "12/20/2020",
"12/20/2020", "12/20/2020", "12/20/2020", "12/20/2020", "12/20/2020"
), HOSP_COUNT = c(582, 931, 1472, 2175, 2791, 3024, 2310, 6,
1, 5, 11, 19, 27, 20, 2, 1, 2, 2, 7, 9, 15)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame"))
So I need a new column the formula is that
(HOPS_CASE of new_col(1) in the second date (12/19/2020)= (CASE_HOSP of new_col(1) in the first day 12/18/2020)-(Current CASE_HOSP of new_col(1) in the first day 12/19/2020)
This should work for you:
library(dplyr)
df %>%
group_by(new_col) %>%
mutate(new_count = HOSP_COUNT - lag(HOSP_COUNT),
new_count = ifelse(is.na(new_count), HOSP_COUNT, new_count))
Where new_count is your corrected counts:
# A tibble: 20 x 4
# Groups: new_col [7]
new_col DATE HOSP_COUNT new_count
<dbl> <dttm> <dbl> <dbl>
1 1 2020-12-18 00:00:00 582 582
2 2 2020-12-18 00:00:00 931 931
3 3 2020-12-18 00:00:00 1472 1472
4 4 2020-12-18 00:00:00 2175 2175
5 5 2020-12-18 00:00:00 2791 2791
6 6 2020-12-18 00:00:00 3024 3024
7 7 2020-12-18 00:00:00 2310 2310
8 1 2020-12-19 00:00:00 588 6
9 2 2020-12-19 00:00:00 932 1
10 3 2020-12-19 00:00:00 1477 5
11 4 2020-12-19 00:00:00 2186 11
12 5 2020-12-19 00:00:00 2810 19
13 6 2020-12-19 00:00:00 3051 27
14 7 2020-12-19 00:00:00 2330 20
15 1 2020-12-20 00:00:00 590 2
16 2 2020-12-20 00:00:00 932 0
17 3 2020-12-20 00:00:00 1479 2
18 4 2020-12-20 00:00:00 2188 2
19 5 2020-12-20 00:00:00 2817 7
20 6 2020-12-20 00:00:00 3060 9

Why can't I plot my model selection plot correctly?

I am working with multiple regression models. After running the dredge function, I got approximately 54 000 different combinations. I selected the first 300 models and ran this code:
par(mar=c(1,4,10,3))
> plot(fitt, labels = c("Intercept",
+ "YOFE",
+ "'RW Closeness'",
+ "'LW Closeness'",
+ "Age",
+ "SES",
+ "'GAD-7 Score'",
+ "Fantasy",
+ "'Personal Distress'",
+ "'Empathic Concern'",
+ "'Perspective Taking'",
+ "'PHQ-9 Score'",
+ "'Religioius Affinity'",
+ "'Agreement with IH'",
+ "'Moral Judgement of IH'",
+ "'Harm Assessment of IH'",
+ "'Agreement with IB'",
+ "'Moral Judgement of IB'",
+ "RMET",
+ "Sex"),ylab = expression("Cumulative" ~italic(w[i]*(AICc))),col = c(colfunc(1)), border = "gray30",labAsExpr = TRUE)
10 minutes later, I got this error:
Error in (function (text, side = 3, line = 0, outer = FALSE, at = NA, :
zero-length 'text' specified
In addition: Warning message:
In max(strwidth(arg[["text"]], cex = arg$cex, units = "in")) :
no non-missing arguments to max; returning -Inf
And this is the output plot:
I've tried plotting only the first model and the same error appears:
This also happens when using the whole model selection table (54 000 combinations).
What is a solution to this?
I'm running the latest version of R and RStudio on my 2016 12 inch Macbook.
Note: I've tried increasing the plot-window size manually by dragging the edges without any improvement.
This is what I'd like my plot to look like:
EDIT: Here is the data file data and the code.
modeloglobal<-lm(PROMEDIO_CREENCIA_NFALSA_CORONAVIRUS~Edad+Sex+
AnEdu+
Estrato_1+
GAD_TOTAL+
PHQ_TOTAL+
PracticRel_2+
CercanPolDer_1+
CercanPolIz_1+
RMET_TOTAL+
IRI_PREOCUPACIÓN_EMPATICA+
IRI_FANTASÍA+
IRI_MALESTAR_PERSONAL+
IRI_TOMA_DE_PERSPECTIVA+
PROMEDIO_DILEMAS_BI_ACTUARIGUAL_CORONAVIRUS+
PROMEDIO_DILEMAS_BI_BIENOMAL_CORONAVIRUS+
PROMEDIO_DI_SINPOL_ACTUARIGUAL+
PROMEDIO_DI_SINPOL_BIENOMAL+
PROMEDIO_DI_SINPOL_DANO, data=fake_news,na.action="na.fail")
library(MuMIn)
fitt<-dredge(modeloglobal,trace=2)
m.sel <- model.sel(fitt)
m.sel2 <- m.sel[1:300,]
library(binovisualfields)
And the code that runs the error (using a subset of the first 300 rows):
par(mar=c(1,4,10,3))
> plot(m.sel2, labels = c("Intercept",
+ "YOFE",
+ "'RW Closeness'",
+ "'LW Closeness'",
+ "Age",
+ "SES",
+ "'GAD-7 Score'",
+ "Fantasy",
+ "'Personal Distress'",
+ "'Empathic Concern'",
+ "'Perspective Taking'",
+ "'PHQ-9 Score'",
+ "'Religioius Affinity'",
+ "'Agreement with IH'",
+ "'Moral Judgement of IH'",
+ "'Harm Assessment of IH'",
+ "'Agreement with IB'",
+ "'Moral Judgement of IB'",
+ "RMET",
+ "Sex"),ylab = expression("Cumulative" ~italic(w[i]*(AICc))),col = c(colfunc(1)), border = "gray30",labAsExpr = TRUE)
EDIT 2: Here's the data frame I got from dput().
ResponseId Edad Sex Genero Nacion Resid Estrato_1 Gastos salud
1 R_25GEak825Ohmb9G 18 Female Femenino Colombia Colombia 7 Seguro privado
2 R_1kT7u0PALDHV8H6 20 Female Femenino Colombia Colombia 5 Seguro privado
3 R_2cpBb5Ifzj7lVGs 21 Female Femenino Colombia Colombia 6 Seguro privado
4 R_sGqNUMTXTJzwC09 20 Male Masculino Colombia Colombia 5 Seguro del Estado
5 R_2Cpixt9Z5FJkhg1 36 Male Masculino Colombia Colombia 6 Otro (especifique)
6 R_3QFq50SZNs6CePA 18 Female Femenino Colombia Colombia 7 Seguro privado
Relig PracticRel_2 AnEdu Q161 Ecron Epsiq Q183 Eneu Q184
1 Ninguna 0 15 Estudiante 1 0 <NA> 0 <NA>
2 Cristianismo (Catolicismo) 2 15 Estudiante 0 0 <NA> 0 <NA>
3 Cristianismo (Catolicismo) 2 19 Estudiante 0 0 <NA> 0 <NA>
4 Cristianismo (Catolicismo) 2 15 Estudiante 0 0 <NA> 0 <NA>
5 Cristianismo (Catolicismo) 1 17 Empleado de tiempo completo 0 0 <NA> 0 <NA>
6 Cristianismo (Catolicismo) 4 15 Estudiante 0 0 <NA> 0 <NA>
NPviven Sustancias Pviven AdhAS LevantarAS_1 CumplimAS_1 HorasFuera
1 1 1 Padres 1 5 6 Menos de una hora
2 3 0 Padres,Hermanos 1 1 6 Menos de una hora
3 4 0 Padres,Hermanos 1 2 6 Menos de una hora
4 4 0 Padres,Hermanos 1 2 6 Menos de una hora
5 3 0 Pareja,Hijos 1 2 3 Entre cuatro y seis horas
6 3 0 Padres,Hermanos 1 2 6 Entre una y tres horas
Apoyo CV19_1 ContagUd ContagEC Prob_1_contagio Prob_2_familiar_contagio
1 1 No 0 81 100
2 4 No 0 81 35
3 6 No 0 60 80
4 4 No 0 4 15
5 5 No 0 40 40
6 6 No 0 79 86
Prob_3_contagio_poblaciongeneral Caract_1 Caract_2 Inv_3 Caract_3 Caract_4 Caract_5 Caract_6 Caract_8
1 87 4 2 1 6 4 5 4 5
2 81 5 4 3 4 4 5 2 3
3 80 4 4 1 6 6 6 1 2
4 20 6 5 5 2 1 5 1 5
5 60 2 1 2 5 4 3 2 3
6 70 5 4 2 5 6 2 5 6
Caract_9 Caract_11 Caract_14 INV_15 Caract_15 Caract_16 Caract_17 CompPan_1 CompPan_2 CompPan_3
1 5 3 2 4 3 5 5 1 6 1
2 4 5 4 5 2 3 3 4 5 8
3 6 1 6 6 1 6 6 1 1 1
4 5 5 2 6 1 3 1 1 3 2
5 4 1 1 5 2 2 2 2 2 2
6 6 2 3 5 2 6 5 2 7 3
CompPan_4 CompPan_5 CompPan_6 CercanPolDer_1 CercanPolIz_1 IDpol_1 PHQ_TOTAL GAD_TOTAL
1 5 5 7 8 2 5 8 6
2 8 8 8 7 3 5 4 3
3 3 2 4 6 3 4 2 3
4 4 3 3 5 5 4 3 3
5 3 3 2 5 5 4 2 2
6 6 2 7 3 8 3 7 7
INTEROCEPCION_TOTAL BIS BAS_FUN_SEEKING BAS_REWARD_RESPONSIVENESS BAS_DRIVE BAS_TOTAL
1 45 19 14 19 11 44
2 44 20 10 17 14 41
3 24 17 10 19 13 42
4 17 17 9 14 8 31
5 36 21 10 17 11 38
6 41 25 6 17 13 36
IRI_TOMA_DE_PERSPECTIVA IRI_MALESTAR_PERSONAL IRI_FANTASÍA IRI_PREOCUPACIÓN_EMPATICA RMET_TOTAL
1 14 13 14 19 7
2 18 11 14 20 4
3 17 4 10 20 10
4 16 9 11 12 7
5 10 11 7 10 10
6 16 11 16 18 8
PROMEDIO_TIEMPO_REACCION_RMET PROMEDIO_CREENCIA_NFALSA_TODAS PROMEDIO_CREENCIA_NFALSA_CORONAVIRUS
1 2.411750 2.8 2.666667
2 3.348500 2.8 2.333333
3 3.261083 2.4 2.000000
4 6.390500 2.2 1.666667
5 13.212667 1.8 1.333333
6 4.218583 3.6 2.666667
PROMEDIO_CREENCIA_NFALSA_OTRO PROMEDIO_TIEMPOREACCION_NFALSA PROMEDIO_CREENCIA_NVERDADERA_TODAS
1 3.0 4.3438 3.333333
2 3.5 9.4222 3.000000
3 3.0 5.9734 3.666667
4 3.0 10.1448 2.666667
5 2.5 16.3196 1.333333
6 5.0 7.1954 3.333333
PROMEDIO_CREENCIA_NVERDADERA_CORONAVIRUS PROMEDIO_CREENCIA_NVERDADERA_OTRO
1 5 5
2 4 5
3 6 5
4 5 3
5 1 3
6 6 4
PROMEDIO_TIEMPOREACCION_NVERDADERA PROMEDIO_CREENCIA_NMISLEADING_TODAS
1 5.6440 2.666667
2 7.0430 2.666667
3 8.0265 3.666667
4 4.0495 3.000000
5 32.2400 1.666667
6 9.5830 4.333333
PROMEDIO_TIEMPOREACCION_NMISLEADING PROMEDIO_DILEMAS_BI_BIENOMAL_CORONAVIRUS
1 5.726667 1.000000
2 12.012333 4.000000
3 5.753000 4.333333
4 4.969667 1.333333
5 15.233000 0.000000
6 30.045667 3.666667
PROMEDIO_DILEMAS_BI_ACTUARIGUAL_CORONAVIRUS DILEMA_BI_CONTROL_BIENOMAL DILEMA_BI_CONTROL_ACTUARIGUAL
1 5.666667 4 7
2 7.666667 5 4
3 9.666667 2 6
4 4.333333 0 2
5 3.666667 -3 2
6 9.333333 4 10
PROMEDIO_DILEMAS_BI_BIENOMAL_JUNTOS PROMEDIO_DILEMAS_BI_ACTUARIGUAL_JUNTOS
1 1.75 6.00
2 4.25 6.75
3 3.75 8.75
4 1.00 3.75
5 -0.75 3.25
6 3.75 9.50
PROMEDIO_DILEMAS_DI_BIENOMAL PROMEDIO_DILEMAS_DI_ACTUARIGUAL PROMEDIO_DILEMAS_DI_DANO
1 0.5000000 6.666667 5.666667
2 1.8333333 7.666667 6.166667
3 0.5000000 5.666667 5.333333
4 1.6666667 5.000000 5.500000
5 0.8333333 4.833333 5.666667
6 0.1666667 5.166667 7.000000
TIEMPOREACCION_DILEMAS_DI TIEMPOREACCION_DILEMAS_BI PROMEDIO_DI_SINPOL_BIENOMAL
1 12.140500 7.89900 0.2
2 9.130667 9.99550 1.2
3 6.998333 9.25175 -1.0
4 1.857833 2.84125 0.4
5 19.014333 32.82850 0.8
6 11.633667 16.92000 0.2
PROMEDIO_DI_SINPOL_ACTUARIGUAL PROMEDIO_DI_SINPOL_DANO COMPRAS_COVID19 PERCEPCION_RIESGO_TOTAL
1 7.00 7.25 4.166667 39
2 8.00 6.75 6.833333 37
3 4.25 7.25 2.000000 42
4 4.50 7.00 2.666667 38
5 5.00 7.75 2.333333 26
6 5.50 7.75 4.500000 46
PERCEPCION_RIESGO_INDICE PROB_CONTAGIO_TOTAL PROMEDIO_DILEMASPOLITICOS_BIENOMAL
1 3.9 89.33333 1.0
2 3.7 65.66667 2.5
3 4.2 73.33333 4.0
4 3.8 13.00000 4.0
5 2.6 46.66667 0.5
6 4.6 78.33333 0.0
PROMEDIO_DILEMASPOLITICOS_ACTUARIGUAL PROMEDIO_DILEMASPOLITICOS_DANO D31_1_DI D32_2_DI D33_3_DI
1 6.0 2.5 -2 4 9
2 7.0 5.0 3 9 7
3 8.5 1.5 -3 3 8
4 6.0 2.5 0 3 8
5 4.5 1.5 -2 4 8
6 4.5 5.5 4 9 7
D41_1_DI D42_2_DI D43_3_DI D51_1_DI D52_2_DI D53_3_DI D61_1_DI D62_2_DI D63_3_DI D71_1_DIP D72_2_DIP
1 -1 7 7 5 10 4 -1 7 9 0 4
2 1 8 9 0 7 4 2 8 7 3 7
3 0 6 7 1 5 6 -3 3 8 3 7
4 0 5 8 4 7 3 -2 3 9 4 3
5 3 7 9 1 3 7 2 6 7 -2 2
6 1 8 6 0 4 9 -4 1 9 -4 1
D73_3_DIP D81_1_DIP D82_2_DIP D83_3_DIP D91_1_BI D92_2_BI D101_1_BI D102_2_BI D111_1_BI D112_2_BI
1 3 2 8 2 -3 4 3 9 3 4
2 6 2 7 4 3 8 5 8 4 7
3 2 5 10 1 5 10 5 10 3 9
4 2 4 9 3 4 9 0 2 0 2
5 2 3 7 1 -1 3 3 6 -2 2
6 8 4 8 3 4 9 5 10 2 9
D121_1_BI D122_2_BI total_iri promedio_falsaymisleading prediccioncompraspercprob
1 4 7 60 2.750 4.249759
2 5 4 63 2.750 4.404450
3 2 6 51 2.875 4.431635
4 0 2 48 2.500 5.143974
5 -3 2 38 1.750 3.765907
6 4 10 61 3.875 4.893797
prediccioncomprasperc
1 4.474456
2 4.439994
3 4.521980
4 4.689385
5 3.762449
6 4.967286
Here is the raw dput() output:
structure(list(ResponseId = c("R_25GEak825Ohmb9G", "R_1kT7u0PALDHV8H6",
"R_2cpBb5Ifzj7lVGs", "R_sGqNUMTXTJzwC09", "R_2Cpixt9Z5FJkhg1",
"R_3QFq50SZNs6CePA"), Edad = c(18, 20, 21, 20, 36, 18), Sex = structure(c(2L,
2L, 2L, 1L, 1L, 2L), .Label = c("Male", "Female"), class = "factor"),
Genero = c("Femenino", "Femenino", "Femenino", "Masculino",
"Masculino", "Femenino"), Nacion = c("Colombia", "Colombia",
"Colombia", "Colombia", "Colombia", "Colombia"), Resid = c("Colombia",
"Colombia", "Colombia", "Colombia", "Colombia", "Colombia"
), Estrato_1 = c(7, 5, 6, 5, 6, 7), `Gastos salud` = c("Seguro privado",
"Seguro privado", "Seguro privado", "Seguro del Estado",
"Otro (especifique)", "Seguro privado"), Relig = c("Ninguna",
"Cristianismo (Catolicismo)", "Cristianismo (Catolicismo)",
"Cristianismo (Catolicismo)", "Cristianismo (Catolicismo)",
"Cristianismo (Catolicismo)"), PracticRel_2 = c(0, 2, 2,
2, 1, 4), AnEdu = c(15, 15, 19, 15, 17, 15), Q161 = c("Estudiante",
"Estudiante", "Estudiante", "Estudiante", "Empleado de tiempo completo",
"Estudiante"), Ecron = c(1, 0, 0, 0, 0, 0), Epsiq = c(0,
0, 0, 0, 0, 0), Q183 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Eneu = c(0,
0, 0, 0, 0, 0), Q184 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NPviven = c("1",
"3", "4", "4", "3", "3"), Sustancias = c(1, 0, 0, 0, 0, 0
), Pviven = c("Padres", "Padres,Hermanos", "Padres,Hermanos",
"Padres,Hermanos", "Pareja,Hijos", "Padres,Hermanos"), AdhAS = c(1,
1, 1, 1, 1, 1), LevantarAS_1 = c(5, 1, 2, 2, 2, 2), CumplimAS_1 = c(6,
6, 6, 6, 3, 6), HorasFuera = c("Menos de una hora", "Menos de una hora",
"Menos de una hora", "Menos de una hora", "Entre cuatro y seis horas",
"Entre una y tres horas"), `Apoyo CV19_1` = c(1, 4, 6, 4,
5, 6), ContagUd = c("No", "No", "No", "No", "No", "No"),
ContagEC = c(0, 0, 0, 0, 0, 0), Prob_1_contagio = c(81, 81,
60, 4, 40, 79), Prob_2_familiar_contagio = c(100, 35, 80,
15, 40, 86), Prob_3_contagio_poblaciongeneral = c(87, 81,
80, 20, 60, 70), Caract_1 = c(4, 5, 4, 6, 2, 5), Caract_2 = c(2,
4, 4, 5, 1, 4), Inv_3 = c(1, 3, 1, 5, 2, 2), Caract_3 = c(6,
4, 6, 2, 5, 5), Caract_4 = c(4, 4, 6, 1, 4, 6), Caract_5 = c(5,
5, 6, 5, 3, 2), Caract_6 = c(4, 2, 1, 1, 2, 5), Caract_8 = c(5,
3, 2, 5, 3, 6), Caract_9 = c(5, 4, 6, 5, 4, 6), Caract_11 = c(3,
5, 1, 5, 1, 2), Caract_14 = c(2, 4, 6, 2, 1, 3), INV_15 = c(4,
5, 6, 6, 5, 5), Caract_15 = c(3, 2, 1, 1, 2, 2), Caract_16 = c(5,
3, 6, 3, 2, 6), Caract_17 = c(5, 3, 6, 1, 2, 5), CompPan_1 = c(1,
4, 1, 1, 2, 2), CompPan_2 = c(6, 5, 1, 3, 2, 7), CompPan_3 = c(1,
8, 1, 2, 2, 3), CompPan_4 = c(5, 8, 3, 4, 3, 6), CompPan_5 = c(5,
8, 2, 3, 3, 2), CompPan_6 = c(7, 8, 4, 3, 2, 7), CercanPolDer_1 = c(8,
7, 6, 5, 5, 3), CercanPolIz_1 = c(2, 3, 3, 5, 5, 8), IDpol_1 = c(5,
5, 4, 4, 4, 3), PHQ_TOTAL = c(8, 4, 2, 3, 2, 7), GAD_TOTAL = c(6,
3, 3, 3, 2, 7), INTEROCEPCION_TOTAL = c(45, 44, 24, 17, 36,
41), BIS = c(19, 20, 17, 17, 21, 25), BAS_FUN_SEEKING = c(14,
10, 10, 9, 10, 6), BAS_REWARD_RESPONSIVENESS = c(19, 17,
19, 14, 17, 17), BAS_DRIVE = c(11, 14, 13, 8, 11, 13), BAS_TOTAL = c(44,
41, 42, 31, 38, 36), IRI_TOMA_DE_PERSPECTIVA = c(14, 18,
17, 16, 10, 16), IRI_MALESTAR_PERSONAL = c(13, 11, 4, 9,
11, 11), IRI_FANTASÍA = c(14, 14, 10, 11, 7, 16), IRI_PREOCUPACIÓN_EMPATICA = c(19,
20, 20, 12, 10, 18), RMET_TOTAL = c(7, 4, 10, 7, 10, 8),
PROMEDIO_TIEMPO_REACCION_RMET = c(2.41175, 3.3485, 3.26108333333333,
6.3905, 13.2126666666667, 4.21858333333333), PROMEDIO_CREENCIA_NFALSA_TODAS = c(2.8,
2.8, 2.4, 2.2, 1.8, 3.6), PROMEDIO_CREENCIA_NFALSA_CORONAVIRUS = c(2.66666666666667,
2.33333333333333, 2, 1.66666666666667, 1.33333333333333,
2.66666666666667), PROMEDIO_CREENCIA_NFALSA_OTRO = c(3, 3.5,
3, 3, 2.5, 5), PROMEDIO_TIEMPOREACCION_NFALSA = c(4.3438,
9.4222, 5.9734, 10.1448, 16.3196, 7.1954), PROMEDIO_CREENCIA_NVERDADERA_TODAS = c(3.33333333333333,
3, 3.66666666666667, 2.66666666666667, 1.33333333333333,
3.33333333333333), PROMEDIO_CREENCIA_NVERDADERA_CORONAVIRUS = c(5,
4, 6, 5, 1, 6), PROMEDIO_CREENCIA_NVERDADERA_OTRO = c(5,
5, 5, 3, 3, 4), PROMEDIO_TIEMPOREACCION_NVERDADERA = c(5.644,
7.043, 8.0265, 4.0495, 32.24, 9.583), PROMEDIO_CREENCIA_NMISLEADING_TODAS = c(2.66666666666667,
2.66666666666667, 3.66666666666667, 3, 1.66666666666667,
4.33333333333333), PROMEDIO_TIEMPOREACCION_NMISLEADING = c(5.72666666666667,
12.0123333333333, 5.753, 4.96966666666667, 15.233, 30.0456666666667
), PROMEDIO_DILEMAS_BI_BIENOMAL_CORONAVIRUS = c(1, 4, 4.33333333333333,
1.33333333333333, 0, 3.66666666666667), PROMEDIO_DILEMAS_BI_ACTUARIGUAL_CORONAVIRUS = c(5.66666666666667,
7.66666666666667, 9.66666666666667, 4.33333333333333, 3.66666666666667,
9.33333333333333), DILEMA_BI_CONTROL_BIENOMAL = c(4, 5, 2,
0, -3, 4), DILEMA_BI_CONTROL_ACTUARIGUAL = c(7, 4, 6, 2,
2, 10), PROMEDIO_DILEMAS_BI_BIENOMAL_JUNTOS = c(1.75, 4.25,
3.75, 1, -0.75, 3.75), PROMEDIO_DILEMAS_BI_ACTUARIGUAL_JUNTOS = c(6,
6.75, 8.75, 3.75, 3.25, 9.5), PROMEDIO_DILEMAS_DI_BIENOMAL = c(0.5,
1.83333333333333, 0.5, 1.66666666666667, 0.833333333333333,
0.166666666666667), PROMEDIO_DILEMAS_DI_ACTUARIGUAL = c(6.66666666666667,
7.66666666666667, 5.66666666666667, 5, 4.83333333333333,
5.16666666666667), PROMEDIO_DILEMAS_DI_DANO = c(5.66666666666667,
6.16666666666667, 5.33333333333333, 5.5, 5.66666666666667,
7), TIEMPOREACCION_DILEMAS_DI = c(12.1405, 9.13066666666666,
6.99833333333333, 1.85783333333333, 19.0143333333333, 11.6336666666667
), TIEMPOREACCION_DILEMAS_BI = c(7.899, 9.9955, 9.25175,
2.84125, 32.8285, 16.92), PROMEDIO_DI_SINPOL_BIENOMAL = c(0.2,
1.2, -1, 0.4, 0.8, 0.2), PROMEDIO_DI_SINPOL_ACTUARIGUAL = c(7,
8, 4.25, 4.5, 5, 5.5), PROMEDIO_DI_SINPOL_DANO = c(7.25,
6.75, 7.25, 7, 7.75, 7.75), COMPRAS_COVID19 = c(4.16666666666667,
6.83333333333333, 2, 2.66666666666667, 2.33333333333333,
4.5), PERCEPCION_RIESGO_TOTAL = c(39, 37, 42, 38, 26, 46),
PERCEPCION_RIESGO_INDICE = c(3.9, 3.7, 4.2, 3.8, 2.6, 4.6
), PROB_CONTAGIO_TOTAL = c(89.3333333333333, 65.6666666666667,
73.3333333333333, 13, 46.6666666666667, 78.3333333333333),
PROMEDIO_DILEMASPOLITICOS_BIENOMAL = c(1, 2.5, 4, 4, 0.5,
0), PROMEDIO_DILEMASPOLITICOS_ACTUARIGUAL = c(6, 7, 8.5,
6, 4.5, 4.5), PROMEDIO_DILEMASPOLITICOS_DANO = c(2.5, 5,
1.5, 2.5, 1.5, 5.5), D31_1_DI = c(-2, 3, -3, 0, -2, 4), D32_2_DI = c(4,
9, 3, 3, 4, 9), D33_3_DI = c(9, 7, 8, 8, 8, 7), D41_1_DI = c(-1,
1, 0, 0, 3, 1), D42_2_DI = c(7, 8, 6, 5, 7, 8), D43_3_DI = c(7,
9, 7, 8, 9, 6), D51_1_DI = c(5, 0, 1, 4, 1, 0), D52_2_DI = c(10,
7, 5, 7, 3, 4), D53_3_DI = c(4, 4, 6, 3, 7, 9), D61_1_DI = c(-1,
2, -3, -2, 2, -4), D62_2_DI = c(7, 8, 3, 3, 6, 1), D63_3_DI = c(9,
7, 8, 9, 7, 9), D71_1_DIP = c(0, 3, 3, 4, -2, -4), D72_2_DIP = c(4,
7, 7, 3, 2, 1), D73_3_DIP = c(3, 6, 2, 2, 2, 8), D81_1_DIP = c(2,
2, 5, 4, 3, 4), D82_2_DIP = c(8, 7, 10, 9, 7, 8), D83_3_DIP = c(2,
4, 1, 3, 1, 3), D91_1_BI = c(-3, 3, 5, 4, -1, 4), D92_2_BI = c(4,
8, 10, 9, 3, 9), D101_1_BI = c(3, 5, 5, 0, 3, 5), D102_2_BI = c(9,
8, 10, 2, 6, 10), D111_1_BI = c(3, 4, 3, 0, -2, 2), D112_2_BI = c(4,
7, 9, 2, 2, 9), D121_1_BI = c(4, 5, 2, 0, -3, 4), D122_2_BI = c(7,
4, 6, 2, 2, 10), total_iri = c(60, 63, 51, 48, 38, 61), promedio_falsaymisleading = c(2.75,
2.75, 2.875, 2.5, 1.75, 3.875), prediccioncompraspercprob = c(`1` = 4.24975892576113,
`2` = 4.40445037029013, `3` = 4.43163539588384, `4` = 5.14397435590305,
`5` = 3.76590707825915, `6` = 4.8937968160894), prediccioncomprasperc = c(`1` = 4.47445595202732,
`2` = 4.4399943212902, `3` = 4.52198006754018, `4` = 4.68938453833302,
`5` = 3.7624488758014, `6` = 4.96728571465517)), row.names = c(NA,
6L), class = c("tbl_df", "tbl", "data.frame"))

Full Join in dplyr

I have a dataframe looking like:
library(tidyverse)
df <- tibble::tribble(
~sub_date, ~period,
"2019-01", 1,
"2019-01", 2,
"2019-01", 3,
"2019-02", 1,
"2019-02", 2,
"2019-03", 1,
"2019-03", 2,
"2019-03", 3,
"2019-03", 4
)
sub_date period
<chr> <dbl>
1 2019-01 1
2 2019-01 2
3 2019-01 3
4 2019-02 1
5 2019-02 2
6 2019-03 1
7 2019-03 2
8 2019-03 3
9 2019-03 4
and another:
period <- tibble::tribble(
~period, ~forecast,
1, 10,
2, 20,
3, 30,
4, 40,
5, 50,
6, 60,
7, 70
)
period forecast
<dbl> <dbl>
1 1 10
2 2 20
3 3 30
4 4 40
5 5 50
6 6 60
7 7 70
I am struggling to join them in a way that in df I can fill the missing periods in the table period, aka the number of rows in period X the different sub_date in df.
as follows:
df_output <- tibble::tribble(
~sub_date, ~period, ~forecast,
"2019-01", 1, 10,
"2019-01", 2, 20,
"2019-01", 3, 30,
"2019-01", 4, 40,
"2019-01", 5, 50,
"2019-01", 6, 60,
"2019-01", 7, 70,
"2019-02", 1, 10,
"2019-02", 2, 20,
"2019-02", 3, 30,
"2019-02", 4, 40,
"2019-02", 5, 50,
"2019-02", 6, 60,
"2019-02", 7, 70,
"2019-03", 1, 10,
"2019-03", 2, 20,
"2019-03", 3, 30,
"2019-03", 4, 40,
"2019-03", 5, 50,
"2019-03", 6, 60,
"2019-03", 7, 70
)
# A tibble: 21 x 3
sub_date period forecast
<chr> <dbl> <dbl>
1 2019-01 1 10
2 2019-01 2 20
3 2019-01 3 30
4 2019-01 4 40
5 2019-01 5 50
6 2019-01 6 60
7 2019-01 7 70
8 2019-02 1 10
9 2019-02 2 20
10 2019-02 3 30
# … with 11 more rows
I assumed it was a full join but I don't get the desired result.
Any help?
you can use tidyr::crossing to obtained your desired result:
crossing(select(df, sub_date), period)
Note that you are not looking for a join since you want every combination of sub_date combinded (or crossed) with every combination of period and forecast.
You can try to merge the tables? Try this to see if it gives you what you need?
df <- df %>% distinct(sub_date)
answer <- merge(periods, df, all = TRUE)

map over and arrange a list based on the list names

I have a list which looks like:
List of 8
$ 9 :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2014-03-22" "2019-03-18" "2018-04-28" ...
..$ .id : num [1:40] 9 9 9 9 9 9 9 9 9 9 ...
$ c(1, 7) :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2004-08-26" "2012-10-21" "2007-03-10" ...
..$ .id : num [1:40] 7 7 1 7 7 7 7 1 7 7 ...
$ c(13, 18) :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2016-01-31" "2016-03-24" "2018-10-17" ...
..$ .id : num [1:40] 13 13 13 18 13 18 13 13 13 13 ...
$ c(18, 2, 7, 13):Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2013-04-05" "2019-04-23" "2005-03-05" ...
..$ .id : num [1:40] 13 2 7 2 2 13 13 7 13 7 ...
$ c(19, 5) :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2018-04-10" "2016-08-03" "2012-05-18" ...
..$ .id : num [1:40] 5 19 5 5 5 5 5 5 19 5 ...
$ c(2, 7, 18) :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2018-02-01" "2011-03-08" "2009-09-29" ...
..$ .id : num [1:40] 7 7 2 18 2 18 2 2 7 2 ...
$ c(5, 19) :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2011-05-14" "2005-08-31" "2015-07-06" ...
..$ .id : num [1:40] 19 5 5 5 5 19 5 5 5 5 ...
$ c(7, 1, 2, 18) :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 40 obs. of 2 variables:
..$ date: Date[1:40], format: "2003-04-12" "2014-12-03" "2001-02-21" ...
..$ .id : num [1:40] 7 1 1 7 2 1 1 18 2 1 ...
The names of the list are the following:
9
c(1, 7)
c(13, 18)
c(18, 2, 7, 13)
c(19, 5)
c(2, 7, 18)
c(5, 19)
c(7, 1, 2, 18)
Two of the lists look like:
$`c(19, 5)`
# A tibble: 40 x 2
date .id
<date> <dbl>
1 2018-04-10 5
2 2016-08-03 19
3 2012-05-18 5
4 2007-09-11 5
5 2011-11-03 5
6 2007-04-09 5
7 2001-07-12 5
8 2018-07-30 5
9 2013-07-30 19
10 2001-08-13 5
# ... with 30 more rows
$`c(2, 7, 18)`
# A tibble: 40 x 2
date .id
<date> <dbl>
1 2018-02-01 7
2 2011-03-08 7
3 2009-09-29 2
4 2014-07-30 18
5 2004-04-17 2
6 2016-11-21 18
7 2007-10-27 2
8 2009-02-08 2
9 2016-01-18 7
10 2010-09-27 2
# ... with 30 more rows
What I would like to do is to arrange the lists by the .id and the date columns. However the .id arranged by the order it appears in the list names. So for the c(19, 5) list the 19 would be first (as well as ordered by date) and the 5 would be second (as well as ordered by date). For the c(5, 19) list the 5 would be ordered first (as well as ordered by date) and the 19 would be second (as well as ordered by date).
Any advice on how to do this would be great.
Data:
lst <- list(`9` = structure(list(date = structure(c(16151, 17973, 17649,
17738, 17388, 13927, 11594, 13095, 15312, 12030, 13805, 13240,
15660, 15926, 11645, 12139, 17853, 15328, 12561, 13595, 14147,
12142, 14112, 14083, 16057, 13074, 11458, 14735, 12892, 16139,
11935, 17666, 14789, 12231, 12343, 17012, 13099, 17682, 15150,
14195), class = "Date"), .id = c(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame")), `c(1, 7)` = structure(list(date = structure(c(12656,
15634, 13582, 17498, 15079, 12265, 18031, 17399, 11603, 13886,
16876, 16022, 16303, 17776, 12717, 15154, 12950, 13693, 17561,
16963, 15690, 12581, 14883, 18010, 14280, 12672, 16108, 14347,
14326, 14628, 17913, 13771, 15369, 14765, 12067, 16397, 11555,
14855, 16308, 12824), class = "Date"), .id = c(7, 7, 1, 7, 7,
7, 7, 1, 7, 7, 1, 1, 7, 1, 7, 1, 1, 1, 7, 1, 7, 1, 1, 1, 1, 1,
1, 7, 7, 1, 7, 7, 7, 7, 1, 1, 7, 7, 1, 1)), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame")), `c(13, 18)` = structure(list(
date = structure(c(16831, 16884, 17821, 15686, 14680, 16428,
17462, 15693, 14707, 16889, 17534, 17556, 15243, 17308, 16886,
17212, 15199, 15669, 17761, 17103, 16992, 17396, 17584, 15904,
15643, 16748, 17554, 16822, 17184, 16264, 15425, 16715, 15268,
15205, 14772, 17285, 17184, 16112, 15327, 17100), class = "Date"),
.id = c(13, 13, 13, 18, 13, 18, 13, 13, 13, 13, 13, 13, 18,
13, 18, 13, 13, 13, 18, 18, 13, 13, 13, 13, 18, 18, 13, 13,
13, 18, 13, 13, 13, 13, 13, 13, 18, 18, 18, 13)), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame")), `c(18, 2, 7, 13)` = structure(list(
date = structure(c(15800, 18009, 12847, 12378, 12365, 14864,
14961, 14562, 15723, 15856, 11545, 11755, 15080, 13149, 12655,
14898, 13067, 14375, 15499, 16681, 15682, 18030, 15732, 14452,
17624, 15741, 17894, 12768, 17295, 12015, 16533, 13589, 17072,
14678, 14067, 14348, 16846, 18125, 17826, 16874), class = "Date"),
.id = c(13, 2, 7, 2, 2, 13, 13, 7, 13, 7, 7, 7, 7, 2, 7,
7, 7, 7, 7, 18, 13, 13, 18, 7, 2, 7, 7, 7, 13, 2, 2, 2, 7,
18, 7, 2, 2, 18, 13, 18)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame")), `c(19, 5)` = structure(list(date = structure(c(17631,
17016, 15478, 13767, 15281, 13612, 11515, 17742, 15916, 11547,
12959, 16713, 12521, 12457, 12174, 18054, 16407, 13462, 14704,
16642, 12551, 16289, 12034, 17676, 16486, 15009, 17220, 16753,
13335, 12498, 12697, 17725, 17833, 16329, 17182, 16435, 11475,
14732, 15210, 17823), class = "Date"), .id = c(5, 19, 5, 5, 5,
5, 5, 5, 19, 5, 5, 19, 5, 5, 5, 19, 5, 5, 5, 5, 5, 5, 5, 5, 19,
5, 5, 5, 5, 5, 5, 19, 19, 19, 19, 5, 5, 19, 5, 5)), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame")), `c(2, 7, 18)` = structure(list(
date = structure(c(17563, 15041, 14516, 16281, 12525, 17126,
13813, 14283, 16818, 14879, 15860, 16616, 17303, 15356, 14899,
14306, 15254, 17836, 12555, 15367, 17721, 16216, 16787, 16603,
14723, 13608, 13276, 17852, 16922, 17774, 14676, 16696, 17059,
15518, 13829, 14623, 17787, 14534, 17579, 15137), class = "Date"),
.id = c(7, 7, 2, 18, 2, 18, 2, 2, 7, 2, 7, 7, 18, 7, 7, 7,
7, 18, 7, 2, 7, 2, 7, 2, 2, 7, 2, 18, 18, 2, 18, 18, 2, 2,
7, 2, 7, 2, 2, 7)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame")), `c(5, 19)` = structure(list(date = structure(c(15108,
13026, 16622, 12813, 11591, 15364, 16033, 16594, 15353, 14652,
14697, 17160, 17084, 16686, 13560, 11401, 16433, 11722, 17606,
15924, 16235, 17817, 16172, 14612, 12021, 17276, 18080, 16222,
16849, 14746, 14036, 17850, 11350, 15036, 15577, 14833, 16464,
15322, 15988, 17023), class = "Date"), .id = c(19, 5, 5, 5, 5,
19, 5, 5, 5, 5, 19, 19, 19, 19, 5, 5, 19, 5, 19, 5, 19, 19, 5,
19, 5, 19, 5, 19, 19, 19, 5, 19, 5, 19, 5, 19, 5, 5, 19, 19)), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame")), `c(7, 1, 2, 18)` = structure(list(
date = structure(c(12154, 16407, 11374, 12594, 13229, 13812,
12462, 16255, 16181, 15333, 15337, 16019, 14551, 16383, 13281,
15422, 12951, 17836, 16740, 12130, 18142, 16458, 18148, 15173,
12506, 15581, 15244, 16519, 15785, 17916, 17575, 15128, 15274,
15808, 12137, 16425, 15927, 14696, 12771, 12894), class = "Date"),
.id = c(7, 1, 1, 7, 2, 1, 1, 18, 2, 1, 2, 2, 1, 7, 7, 1,
1, 18, 2, 2, 2, 1, 18, 2, 1, 1, 7, 18, 7, 18, 2, 18, 1, 7,
2, 1, 7, 2, 2, 2)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame")))
You could do something like the following:
# Loop over names of list
newlist <- lapply(names(lst), function(i) {
# Subset list by name
thislist <- lst[[i]]
# evaluate the list name
i <- eval(parse(text = i))
# order list
thislist[order(factor(thislist$.id, levels = as.character(i))),]
})
We can use imap from purrr and use match and order to order each dataframe
purrr::imap(lst, ~.x[order(match(.x$.id, eval(parse(text = .y)))), ])
#$`9`
# A tibble: 40 x 2
# date .id
# <date> <dbl>
# 1 2014-03-22 9
# 2 2019-03-18 9
# 3 2018-04-28 9
# 4 2018-07-26 9
# 5 2017-08-10 9
# 6 2008-02-18 9
# 7 2001-09-29 9
# 8 2005-11-08 9
# 9 2011-12-04 9
#10 2002-12-09 9
# … with 30 more rows
#$`c(1, 7)`
# A tibble: 40 x 2
# date .id
# <date> <dbl>
# 1 2007-03-10 1
# 2 2017-08-21 1
# 3 2016-03-16 1
# 4 2013-11-13 1
# 5 2018-09-02 1
# 6 2011-06-29 1
# 7 2005-06-16 1
# 8 2007-06-29 1
# 9 2016-06-11 1
#10 2004-06-12 1
# … with 30 more rows
#....
#.....
In base R, that can be achieved using Map
Map(function(x, y) x[order(match(x$.id, y)), ], lst,
lapply(names(lst), function(x) eval(parse(text = x))))

Resources