I have data as follows:
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(`25` = 1, `100` = 2,
`250` = 1, `500` = 1, `1000` = 1, Infinity = 3, SUM = 1), c(`25` = 1,
`100` = 2, `250` = 1, `500` = 1, Infinity = 4, SUM = 1), c(`25` = 1,
`50` = 1, `100` = 1, `250` = 1, `500` = 1, Infinity = 4, SUM = 1
))), row.names = c(NA, 3L), class = "data.frame")
total_colspan = c(0, 25, 50, 100, 250, 500, 1000, 1500, 3000, "Infinity", "SUM")
rn freq colspan
1 type_A 0, 0, 0, 5, 7, 16, 28 1, 2, 1, 1, 1, 3, 1
2 type_B 2, 1, 0, 5, 0, 8 1, 2, 1, 1, 4, 1
3 type_C 0, 0, 3, 5, 12, 53, 73 1, 1, 1, 1, 1, 4, 1
I would like to create a table with varying column spans (but they all add up to 10), in an R-markdown Word document, like the table below:
I was advised to try flextable for this (link). I am trying to use the header options to create these varying colspan. I thought about doing something like:
dat_table <- flextable(dat)
dat_table <- lapply(dat_table, add_header_row, values = unlist(freq), colwidths = unlist(colspan))
But this is not working.
EDIT:
My second attempt:
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(1, 2, 1, 1, 1, 3, 1), c(1, 2, 1, 1, 4, 1), c(1, 1, 1, 1, 1, 4, 1
))), row.names = c(NA, 3L), class = "data.frame")
# The thresholds as in the picture
thresholds <- data.frame(c("Lower threshold","Upper threshold"), c(0,25), c(25,50), c(50,100), c(100,250), c(250,500),c(500,1000),c(1000,1500),c(1500,3000),c(3000, "Infinity"), c("", "SUM"))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
thresholds <- flextable(thresholds)
# There was one column to few in the example
dat <- transform(dat, colspan=Map('c', 1, dat[["colspan"]] ))
dat <- transform(dat, freq=Map('c', "", dat[["freq"]] ))
# for loop to stick to the syntax
for (i in nrow(dat)) {
thresholds <- add_header_row(thresholds, values = dat[[2]][[i]], colwidths = dat[[3]][[i]])
}
For some reason it only adds one row (while it allows for more headers to be added).
Here's a solution that is perhaps way too overkill, but seems to do what you're looking for:
library(tidyverse)
library(flextable)
dat <- structure(list(rn = c("type_A", "type_B", "type_C"
), freq = list(c(0, 0, 0, 5, 7, 16, 28), c(2, 1, 0, 5, 0, 8),
c(0, 0, 3, 5, 12, 53, 73)), colspan = list(c(1, 2, 1, 1, 1, 3, 1), c(1, 2, 1, 1, 4, 1), c(1, 1, 1, 1, 1, 4, 1
))), row.names = c(NA, 3L), class = "data.frame")
# The thresholds as in the picture
thresholds <- data.frame(c("Lower threshold","Upper threshold"), c(0,25), c(25,50), c(50,100), c(100,250), c(250,500),c(500,1000),c(1000,1500),c(1500,3000),c(3000, "Infinity"), c("", "SUM"))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
out <- map(1:nrow(dat), function(index){
out <- data.frame("freq" = dat$freq[[index]],
"span" = dat$colspan[[index]]) %>%
tidyr::uncount(span, .id = 'span') %>%
mutate(freq = ifelse(span>1, NA, freq)) %>%
t %>%
as.data.frame() %>%
mutate(rn = dat$rn[[index]],
across(everything(), ~as.character(.))) %>%
select(rn, everything()) %>%
set_names(nm = names(thresholds)) %>%
slice(1)
return(out)
})
combined <- thresholds %>%
mutate(across(everything(), ~as.character(.))) %>%
bind_rows(out)
spans <- map(1:length(dat$colspan), function(index){
spans <- dat$colspan[[index]] %>%
as_tibble() %>%
mutate(idx = row_number()) %>%
tidyr::uncount(value, .remove = F) %>%
group_by(idx) %>%
mutate(pos = 1:n(),
value = ifelse(pos != 1, 0, value)) %>%
ungroup() %>%
select(value) %>%
t
return(append(1, spans))
})
myft <- flextable(combined) %>%
theme_box()
myft$body$spans$rows[3:nrow(myft$body$spans$rows),] <- matrix(unlist(spans), ncol = ncol(combined), byrow = TRUE)
myft
Created on 2022-04-29 by the reprex package (v2.0.1)
This makes the table:
I don't think you can pass colspan options here without quite a bit of hacking. If at all possible, I would suggest adding the information which cells need to be combined manually. This is the only option, as far as I know, in flextable:
library(flextable)
library(tidyverse)
# clean up the object
dat_clean <- dat %>%
mutate(freq = map2(freq, colspan, ~rep(.x, .y))) %>%
select(-colspan) %>%
unnest(freq) %>%
group_by(rn) %>%
mutate(col = paste0("col_", row_number())) %>%
pivot_wider(names_from = col, values_from = freq)
flextable(dat_clean) %>%
merge_at(i = 1, j = 3:4, part = "body") %>%
merge_at(i = 1, j = 7:9, part = "body") %>%
border_inner(part="all", border = fp_border_default()) %>%
align(align = "center", part = "all")
Created on 2022-04-25 by the reprex package (v2.0.1)
It is a bit tricky to merge those two tables. This is the closest I came to reproduce your desired table. First I created your data in a suitable way:
thresholds <- data.frame(c("Lower threshold", "Upper threshold", "type_A", "type_B", "type_C"),
c(0,25, 0, 2, 0),
c(25,50, 0, 1, 0),
c(50,100, NA, NA,3),
c(100,250,0,0,5),
c(250,5005,5,5,12),
c(500,1000,7,0,53),
c(1000,1500,16,NA,NA),
c(1500,3000,NA,NA,NA),
c(3000, "Infinity",NA,NA,NA),
c("SUM", "SUM", 28,8,73))
names(thresholds) <- c("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven")
Using the officer package you can give the horizontal and vertical lines different colors you want. Using the merge_at function you can merge certain cells. With the border_inner function you get borders in the table. You can use the following code:
library(officer)
std_border = fp_border(color="gray")
library(flextable)
library(dplyr)
thresholds %>%
flextable() %>%
merge_at(i = 3, j = 3:4, part = "body") %>%
merge_at(i = 4, j = 3:4, part = "body") %>%
merge_at(i = 3, j = 8:10, part = "body") %>%
merge_at(i = 4, j = 7:10, part = "body") %>%
merge_at(i = 5, j = 7:10, part = "body") %>%
border_inner(border = std_border) %>%
align(align = "left", part = "all")
Output:
Related
I am trying to create a bar chart or column chart plot to compare pre and post scores between participants. I managed to do this in a line graph, however, I am struggling to visualise this within a bar chart, can anyone help me with this?
Here is the data I am using:
structure(list(Participant = c(2, 3, 5, 7), PRE_QUIP_RS = c(24,
24, 20, 20), POST_QUIP_RS = c(10, 23, 24, 14), PRE_PDQ8 = c(11,
8, 10, 4), POST_PDQ8 = c(7, 7, 9, 4), PRE_GDS = c(1, 7, 1, 0),
POST_GDS = c(1, 4, 2, 0), PRE_PERSISTENT = c(9, 13, 6, 2),
POST_PERSISTENT = c(9, 13, 11, 3), PRE_EPISODIC = c(3, 4,
2, 0), POST_EPISODIC = c(2, 5, 6, 2), PRE_AVOIDANCE = c(6,
3, 0, 2), POST_AVOIDANCE = c(3, 3, 4, 1), PRE_IPQ = c(39,
48, 40, 37), POST_IPQ = c(16, 44, 30, 17), PRE_GSE = c(28,
31, 36, 29), POST_GSE = c(29, 30, 30, 29), PRE_BCI = c(11,
9, 5, 3), POST_BCI = c(3, 15, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
In terms of how I roughly want it to look, I want the bars to be placed together for pre and post for each participant, kind of like this:
You may try
library(tidyverse)
df %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1],
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")
PRE POST order
dummy %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1] %>%
factor(., levels = c("PRE", "POST")), # HERE
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")
I have a dataframe as follows
library(tidyverse)
library(tidymodels)
#df <- read_csv("C:\\Users\\omarl\\OneDrive\\Escritorio\\games.csv")
df <- structure(list(gameId = 3326086514, creationTime = 1504279457970,
gameDuration = 1949, seasonId = 9, winner = 1, firstBlood = 2,
firstTower = 1, firstInhibitor = 1, firstBaron = 1, firstDragon = 1,
firstRiftHerald = 2, t1_champ1id = 8, t1_champ1_sum1 = 12,
t1_champ1_sum2 = 4, t1_champ2id = 432, t1_champ2_sum1 = 3,
t1_champ2_sum2 = 4, t1_champ3id = 96, t1_champ3_sum1 = 4,
t1_champ3_sum2 = 7, t1_champ4id = 11, t1_champ4_sum1 = 11,
t1_champ4_sum2 = 6, t1_champ5id = 112, t1_champ5_sum1 = 4,
t1_champ5_sum2 = 14, t1_towerKills = 11, t1_inhibitorKills = 1,
t1_baronKills = 2, t1_dragonKills = 3, t1_riftHeraldKills = 0,
t1_ban1 = 92, t1_ban2 = 40, t1_ban3 = 69, t1_ban4 = 119,
t1_ban5 = 141, t2_champ1id = 104, t2_champ1_sum1 = 11, t2_champ1_sum2 = 4,
t2_champ2id = 498, t2_champ2_sum1 = 4, t2_champ2_sum2 = 7,
t2_champ3id = 122, t2_champ3_sum1 = 6, t2_champ3_sum2 = 4,
t2_champ4id = 238, t2_champ4_sum1 = 14, t2_champ4_sum2 = 4,
t2_champ5id = 412, t2_champ5_sum1 = 4, t2_champ5_sum2 = 3,
t2_towerKills = 5, t2_inhibitorKills = 0, t2_baronKills = 0,
t2_dragonKills = 1, t2_riftHeraldKills = 1, t2_ban1 = 114,
t2_ban2 = 67, t2_ban3 = 43, t2_ban4 = 16, t2_ban5 = 51), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
df <- df %>%
mutate(winner = ifelse(winner == 1, "team1", "team2")) %>%
mutate(firstBlood = ifelse(firstBlood == 1, "team1", "team2")) %>%
mutate(firstTower = ifelse(firstTower == 1, "team1", "team2")) %>%
mutate(firstInhibitor = ifelse(firstInhibitor == 1, "team1", "team2")) %>%
mutate(firstBaron = ifelse(firstBaron == 1, "team1", "team2")) %>%
mutate(firstDragon = ifelse(firstDragon == 1, "team1", "team2")) %>%
mutate(firstRiftHerald = ifelse(firstRiftHerald == 1, "team1", "team2")) %>%
select(-gameId, -creationTime) %>%
filter(seasonId == 9) %>%
select(gameDuration, winner, firstBlood, firstTower, firstInhibitor, firstBaron, firstDragon,
firstRiftHerald)
As you can see, mutate is really redundant here, because I'm copying the code for every variable. Is there any way to apply the ifelse to columns that start with first, t1, etc. programatically?
You may try
library(dplyr)
df %>%
mutate(across(starts_with("t1")|starts_with("first"), ~ifelse(.x == 1, "team1", "team2")))
Park gave a best (one liner) solution. But if you want to look at some other options, here is how we can do it via using some other functions in dplyr:
df %>%
gather(key, value, firstBlood:t1_ban5) %>%
mutate(value = ifelse(value == 1, "team1", "team2")) %>%
spread(key, value) %>%
select(-gameId, -creationTime) %>%
filter(seasonId == 9) %>%
select(gameDuration, winner, firstBlood, firstTower, firstInhibitor, firstBaron, firstDragon,
firstRiftHerald)
I want to recode several variables together. All these variables will undergo same recoding change.
For this, I followed the thread below. The thread below describes two ways of doing it.
1). Using column number
2). using variable names
I tried both but I get an error message.
Error message for 1) and 2).
Error in (function (var, recodes, as.factor, as.numeric = TRUE, levels) :
unused arguments (2 = "1", 3 = "1", 1 = "0", 4 = "0", na.rm = TRUE)
recode variable in loop R
#Uploading libraries
library(dplyr)
library(magrittr)
library(plyr)
library(readxl)
library(tidyverse)
#Importing file
mydata <- read_excel("CCorr_Data.xlsx")
df <- data.frame(mydata)
attach(df)
#replacing codes for variables
df %>%
mutate_at(c(1:7), recode, '2'='1', '3'='1', '1'='0', '4'='0', na.rm = TRUE) %>%
mutate_at(c(15:24), recode, '2'='0', na.rm = TRUE)
df %>%
mutate_at(vars(E301, E302, E303), recode,'2'='1', '3'='1', '1'='0', '4'='0', na.rm = TRUE) %>%
mutate_at(vars(B201, B202, B203), recode, '2'='0', na.rm = TRUE)
Can someone tell me where am I going wrong?
In my dataset there are missing values that's why I have included na.rm = T. I even tried without including the missing value command, the error message was the same even then.
Please see below for sample data.
structure(list(Country = c(1, 1, 1, 1, 1, 1), HHID = c("12ae5148e245079f-122042",
"12ae5148e245079f-123032", "12ae5148e245079f-123027", "12ae5148e245079f-123028",
"12ae5148e245079f-N123001", "12ae5148e245079f-123041"), HHCode = c("122042",
"123032", "123027", "123028", "N123001", "123041"), A103 = c(2,
2, 2, 2, 2, 2), A104 = c("22", "23", "23", "23", "23", "23"),
Community = c("Mehmada", "Dhobgama", "Dhobgama", "Dhobgama",
"Dhobgama", "Dhobgama"), E301 = c(3, 3, 3, 3, 3, 3), E302 = c(3,
2, 4, 4, 3, 3), E303 = c(3, 2, 3, 3, 3, 3), E304 = c(3, 4,
4, 4, 3, 3), E305 = c(3, 2, 3, 3, 3, 3), E306 = c(3, 3, 3,
3, 3, 3), E307 = c(3, 3, 3, 3, 3, 3), E308 = c(3, 1, 3, 3,
3, 3), B201.1 = c(NA, 1, 1, 1, 1, 1), B202.1 = c(NA, 1, 1,
1, 1, 1), B203.1 = c(NA, 1, 1, 2, 2, 1), B204.1 = c(NA, 2,
1, 2, 1, 1), B205.1 = c(NA, 2, 1, 2, 2, 2), B206.1 = c(NA,
1, 1, 1, 2, 1), B207.1 = c(NA, 2, 1, 2, 2, 1), B208.1 = c(NA,
2, 2, 2, 2, 2), B209.1 = c(NA, 2, 1, 1, 1, 1), B210.1 = c(NA,
1, 1, 1, 1, 1)), row.names = c(NA, 6L), class = "data.frame")
```
The issue is with in the na.rm = TRUE, recode doesn't have that argument
library(dplyr)
df %>%
mutate_at(vars(E301, E302, E303), recode,'2'='1', '3'='1', '1'='0', '4'='0') %>%
mutate_at(vars(B201, B202, B203), recode, '2'='0')
Try using :
library(dplyr)
df %>%
mutate_at(1:7, recode, '2'='1', '3'='1', '1'='0', '4'='0') %>%
mutate_at(15:24, recode, '2'='0')
Here is my initial data set:
data_x <- tribble(
~price, ~id, ~cost, ~revenue,
1, 10, 0.20, 0,
2, 20, 0.30, 60,
3, 20, 0.30, 0,
4, 10, 0.20, 100,
5, 30, 0.10, 40,
6, 10, 0.20, 0,
1, 20, 0.30, 80,
2 , 10, 0.20, 0,
3, 30, 0.10, 20,
3, 20, 0.30, 40,
)
Then, I have a new variable that is zet:
data_y <- data_x %>%
mutate(zet = cost/revenue) %>%
mutate_if(is.numeric, list(~na_if(., Inf))) %>%
mutate_all(funs(replace_na(.,0)))
Now, I plot the price distribution while showing the zet distribution, as well. Here is my desired plot:
To do this, I first wanted to see price and zet distribution even they are not percentage now.
price_dist <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(price_n = n_distinct(price)) %>%
pivot_wider(names_from = priceseg, values_from = price_n)
zet_dist <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(zet_n = n_distinct(zet)) %>%
pivot_wider(names_from = priceseg, values_from = zet_n)
I would be grateful if you could help me to plot my desired chart.
d <- data_y %>%
group_by(priceseg = cut(as.numeric(price), c(0, 1, 3, 5, 6))) %>%
summarise(price_n = n_distinct(price),
zet_n = n_distinct(zet)) %>%
mutate(price_n = 100 * prop.table(price_n),
zet_n2 = 100 * prop.table(zet_n))
ggplot(d) +
geom_col(aes(x = priceseg, y = price_n)) +
geom_line(data = d, mapping = aes(x = priceseg, y = zet_n2, group = 1)) +
geom_label(data = d, mapping = aes(x = priceseg, y = zet_n2, label = zet_n), nudge_y = 5)
I would appreciate any help to efficiently subset a data frame into several chunks to be passed to a list of lists based on imput and Weights_x, where x = {1, 2, ..., 10}.
This question and my attempt are based on this and this. The difference is that now I want to create a list of lists, where each of the lists has only one value of imput and one column of the variables Weights_x.
My code below, expanded from #DarrenTsai, works for a few columns of Weights_x, but I have 100 imput * 1000 weights subsets to create. Therefore, I need a more efficient approach which accomplishes the same outcome without too much code.
my data:
dat <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4), imput = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
1, 2, 3, 4, 5, 1, 2, 3, 4, 5), A = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), B = c(1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0), Pass = c(278, 278,
278, 278, 278, 100, 100, 100, 100, 100, 153, 153, 153, 153, 153,
79, 79, 79, 79, 79), Fail = c(740, 743, 742, 743, 740, 7581,
7581, 7581, 7581, 7581, 1231, 1232, 1235, 1235, 1232, 1731, 1732,
1731, 1731, 1731), Weights_1 = c(4, 3, 4, 3, 3, 1, 2, 1, 2, 1,
12, 12, 11, 12, 12, 3, 5, 3, 3, 3), Weights_2 = c(3, 3, 3, 3,
3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3), Weights_3 = c(4,
3, 3, 3, 3, 1, 2, 1, 1, 1, 12, 12, 11, 12, 12, 3, 3, 3, 3, 3),
Weights_4 = c(3, 3, 4, 3, 3, 1, 1, 1, 2, 1, 12, 12, 13, 12,
12, 3, 2, 3, 3, 3), Weights_5 = c(3, 3, 3, 3, 3, 1, 0, 1,
1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3), Weights_6 = c(4,
3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3,
3), Weights_7 = c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12,
12, 12, 3, 3, 3, 3, 3), Weights_8 = c(3, 3, 3, 3, 3, 1, 1,
1, 1, 1, 15, 12, 12, 12, 12, 3, 3, 3, 3, 3), Weights_9 = c(3,
3, 3, 4, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 2, 3, 3, 3,
3), Weights_10 = c(3, 3, 4, 3, 3, 1, 1, 1, 1, 1, 12, 10,
12, 12, 12, 3, 3, 3, 3, 3)), class = "data.frame", row.names = c(NA,
-20L))
my approach:
##Weights = `Weights_1`
myvars_1 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_1")
dat_1 <- dat[myvars_1]
mylist_1 <- by(dat_1, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_1
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_2`
myvars_2 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_2")
dat_2 <- dat[myvars_2]
mylist_2 <- by(dat_2, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_2
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_3`
myvars_3 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_3")
dat_3 <- dat[myvars_3]
mylist_3 <- by(dat_3, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_3
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_4`
myvars_4 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_4")
dat_4 <- dat[myvars_4]
mylist_4 <- by(dat_4, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_4
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_5`
myvars_5 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_5")
dat_5 <- dat[myvars_5]
mylist_5 <- by(dat_5, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_5
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_6`
myvars_6 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_6")
dat_6 <- dat[myvars_6]
mylist_6 <- by(dat_6, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_6
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_7`
myvars_7 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_7")
dat_7 <- dat[myvars_7]
mylist_7 <- by(dat_7, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_7
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_8`
myvars_8 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_8")
dat_8 <- dat[myvars_8]
mylist_8 <- by(dat_8, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_8
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_9`
myvars_9 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_9")
dat_9 <- dat[myvars_9]
mylist_9 <- by(dat_9, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_9
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##Weights = `Weights_10`
myvars_10 <- c("id", "imput", "A", "B", "Pass", "Fail", "Weights_10")
dat_10 <- dat[myvars_10]
mylist_10 <- by(dat_10, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- x$Weights_10
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
##create list of lists
mylistslist <- list(mylist_1, mylist_2, mylist_3, mylist_4, mylist_5,
mylist_6, mylist_7, mylist_8, mylist_9, mylist_10)
Thanks in advance for any help.
Using lapply one solution might be
myvars <- c("id", "imput", "A", "B", "Pass", "Fail")
cols <- which(names(dat) %in% myvars)
ind <- grep('^Weights_\\d+$', names(dat))
out <- lapply(ind, function (k) {
dat_1 <- dat[c(cols,k)]
by(dat_1, dat$imput, function(x){
nn <- x$Fail + x$Pass
weights <- .subset2(x, ncol(x))
return(list(N = nrow(x), ncases = x$Pass, A = x$A, B = x$B,
id = x$id, P = x$imput, nn = nn, weights = weights))
})
})