I have a dataframe "df" like this:
col1 col2 col3 col4 col5 col6
1 2 2 3 5 7
2 4 6 4 8 2
5 9 7 3 2 5
3 4 5 6 8 1
and I would like to create a new dataframe "new_df" in which there is 1 blank column (called "empty") every 2 columns, like this:
empty col1 col2 empty col3 col4 empty col5 col6
NA 1 2 NA 2 3 NA 5 7
NA 2 4 NA 6 4 NA 8 2
NA 5 9 NA 7 3 NA 2 5
NA 3 4 NA 5 6 NA 8 1
How can I add the blank column in this way?
I have tried using:
n = length(df)
empty <- NA
for (i in seq(1,n-2,2))
{
new_df <- add_column(df, empty, .before=i)
}
but it memorizes only the last step, giving me this result:
col1 col2 col3 col4 empty col5 col6
1 2 2 3 NA 5 7
2 4 6 4 NA 8 2
5 9 7 3 NA 2 5
3 4 5 6 NA 8 1
Another base R solution
tmp1=seq(1,ncol(df),3)
tmp2=!(1:ncol(df) %in% tmp1)
df2=data.frame(matrix(NA,nrow(df),ncol(df)+ncol(df)/2))
df2[tmp2]=df
colnames(df2)[tmp1]=paste0("empty",1:length(tmp1))
colnames(df2)[tmp2]=colnames(df)
empty1 col1 col2 empty2 col3 col4 empty3 col5 col6
1 NA 1 2 NA 2 3 NA 5 7
2 NA 2 4 NA 6 4 NA 8 2
3 NA 5 9 NA 7 3 NA 2 5
4 NA 3 4 NA 5 6 NA 8 1
The base R solution would be:
do.call(cbind, lapply(seq(1, ncol(df), by = 2), function(i) cbind(empty = rep(NA, nrow(df)), df[, seq(i, i+1)])))
# empty col1 col2 empty col3 col4 empty col5 col6
#1 NA 1 2 NA 2 3 NA 5 7
#2 NA 2 4 NA 6 4 NA 8 2
#3 NA 5 9 NA 7 3 NA 2 5
#4 NA 3 4 NA 5 6 NA 8 1
A "tidy" solution could be:
library(tidyverse)
map_dfc(seq(from = 1, to = ncol(df), by = 2),
~df %>%
mutate(empty = NA) %>%
select(empty, .x, .x+1))
#New names:
#* empty -> empty...1
#* empty -> empty...4
#* empty -> empty...7
#empty...1 col1 col2 empty...4 col3 col4 empty...7 col5 col6
#1 NA 1 2 NA 2 3 NA 5 7
#2 NA 2 4 NA 6 4 NA 8 2
#3 NA 5 9 NA 7 3 NA 2 5
#4 NA 3 4 NA 5 6 NA 8 1
Using append().
for (i in 0:2*ncol(dat)/2) dat <- as.data.frame(append(dat, list(emp=NA), i))
dat
# emp col1 col2 emp.1 col3 col4 emp.2 col5 col6
# 1 NA 1 2 NA 2 3 NA 5 7
# 2 NA 2 4 NA 6 4 NA 8 2
# 3 NA 5 9 NA 7 3 NA 2 5
# 4 NA 3 4 NA 5 6 NA 8 1
Data:
dat <- structure(list(col1 = c(1L, 2L, 5L, 3L), col2 = c(2L, 4L, 9L,
4L), col3 = c(2L, 6L, 7L, 5L), col4 = c(3L, 4L, 3L, 6L), col5 = c(5L,
8L, 2L, 8L), col6 = c(7L, 2L, 5L, 1L)), class = "data.frame", row.names = c(NA,
-4L))
And here comes the ...
Microbenchmark
# Unit: microseconds
# expr min lq mean median uq max neval cld
# ronak() 969.707 990.9945 1001.4807 1012.282 1017.368 1022.453 3 d
# user() 349.937 358.0145 364.3877 366.092 371.613 377.134 3 a
# jay() 2098.003 2100.8540 2115.7640 2103.705 2124.644 2145.584 3 e
# groth1() 2164.896 2262.5745 2363.6133 2360.253 2462.972 2565.691 3 f
# groth2() 424.546 438.0185 455.0820 451.491 470.350 489.209 3 ab
# groth3() 722.551 728.0910 733.1910 733.631 738.511 743.391 3 c
# r.user() 612.432 619.6570 636.9573 626.882 649.220 671.558 3 bc
## and with the usual expanded data frame:
set.seed(42)
dat <- dat[sample(nrow(dat), 1e6, replace=T), ]
microbenchmark::microbenchmark(ronak(), user(), jay(), groth1(), groth2(), groth3(), r.user(), times=3L)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# ronak() 1375.139030 1456.858743 1564.509886 1538.578457 1659.19531 1779.81217 3 c
# user() 89.017416 200.845539 251.548652 312.673662 332.81427 352.95488 3 a
# jay() 7.655812 8.382333 9.941684 9.108855 11.08462 13.06039 3 a
# groth1() 501.263785 514.097103 621.755474 526.930421 682.00132 837.07222 3 b
# groth2() 143.438836 147.783741 189.033391 152.128645 211.83067 271.53269 3 a
# groth3() 1387.314877 1406.898863 1469.493158 1426.482849 1510.58230 1594.68175 3 c
# r.user() 1469.543881 1472.770464 1483.834022 1475.997046 1490.97909 1505.96114 3 c
Code:
ronak <- \() {
split_data <- split.default(dat,rep(seq_along(dat), each=2, length.out=ncol(dat)))
do.call(cbind, Map(function(x, y) cbind(setNames(data.frame(NA), paste0('empty', x)), y),
seq_along(split_data), split_data))
}
user <- \() {
tmp1=seq(1, 9,3);tmp2=!(1:9 %in% tmp1);dat2=data.frame(matrix(NA,nrow(dat),ncol(dat)+ncol(dat)/2))
dat2[tmp2]=dat;colnames(dat2)[tmp1]=paste0("empty",1:length(tmp1))
colnames(dat2)[tmp2]=colnames(dat);dat2
}
jay <- \() {for (i in 0:2*ncol(dat)/2) dat <- as.data.frame(append(dat, list(emp=NA), i));dat}
groth1 <- \() suppressMessages({
require(dplyr):require(purrr)
dat %>% split.default(as.numeric(gl(ncol(.), 2, ncol(.)))) %>% map(~ bind_cols(empty=NA, .)) %>%
bind_cols
})
groth2 <- \() {
ix <- cumsum(seq_along(dat) %% 2 + 1);dat2 <- replace(data.frame(matrix(NA, nrow(dat), max(ix))), ix, dat)
names(dat2) <- replace(rep("empty", ncol(dat2)), ix, names(dat));dat2
}
groth3 <- \() {
ix <- as.numeric(gl(ncol(dat), 2, ncol(dat))) # 1 1 2 2 3 3
do.call("cbind", Map(cbind, empty = NA, split.default(dat, ix)))
}
r.user <- \() do.call(cbind, lapply(seq(1, ncol(dat), by=2), function(i)
cbind(empty=rep(NA, nrow(dat)), dat[, seq(i, i+1)])))
!) dplyr/purrr Split the data frame, DF, bind an NA column before each component and bind the resulting components back together. Using the same column name in multiple columns as in the sample output in the question has the problem that it is not possible to identify columns by name so this uses unique names.
library(dplyr)
library(purrr)
DF %>%
split.default(as.numeric(gl(ncol(.), 2, ncol(.)))) %>%
map(~ bind_cols(empty = NA, .)) %>%
bind_cols
giving:
empty...1 col1 col2 empty...4 col3 col4 empty...7 col5 col6
1 NA 1 2 NA 2 3 NA 5 7
2 NA 2 4 NA 6 4 NA 8 2
3 NA 5 9 NA 7 3 NA 2 5
4 NA 3 4 NA 5 6 NA 8 1
2) Base R Create a vector ix which gives the indexes of the original data frame in the result data frame and then create an empty result and copy DF and its names into it.
ix <- cumsum(seq_along(DF) %% 2 + 1) # 2 3 5 6 8 9
DF2 <- replace(data.frame(matrix(NA, nrow(DF), max(ix))), ix, DF)
names(DF2) <- replace(rep("empty", ncol(DF2)), ix, names(DF))
DF2
giving:
empty col1 col2 empty col3 col4 empty col5 col6
1 NA 1 2 NA 2 3 NA 5 7
2 NA 2 4 NA 6 4 NA 8 2
3 NA 5 9 NA 7 3 NA 2 5
4 NA 3 4 NA 5 6 NA 8 1
3) Base R This is another Base R solution. It roughly translates (1) into Base R. It gives the same result as (2).
ix <- as.numeric(gl(ncol(DF), 2, ncol(DF))) # 1 1 2 2 3 3
do.call("cbind", Map(cbind, empty = NA, split.default(DF, ix)))
4) eList The eList package can be used for a particularly short solution.
library(eList)
DF(for(i in seq(1, ncol(DF), 2)) list(empty = NA, DF[seq(i, len = 2)]))
giving:
empty col1 col2 empty.1 col3 col4 empty.2 col5 col6
1 NA 1 2 NA 2 3 NA 5 7
2 NA 2 4 NA 6 4 NA 8 2
3 NA 5 9 NA 7 3 NA 2 5
4 NA 3 4 NA 5 6 NA 8 1
Note
The input in reproducible form.
Lines <- "col1 col2 col3 col4 col5 col6
1 2 2 3 5 7
2 4 6 4 8 2
5 9 7 3 2 5
3 4 5 6 8 1"
DF <- read.table(text = Lines, header = TRUE)
Here's a base R option -
We can split the data every 2 columns into list of dataframe and use Map to add a new column with NA in each dataframe.
split_data <- split.default(df,rep(seq_along(df), each = 2, length.out = ncol(df)))
do.call(cbind, Map(function(x, y)
cbind(setNames(data.frame(NA), paste0('empty', x)), y),
seq_along(split_data), split_data)) -> result
result
# empty1 col1 col2 empty2 col3 col4 empty3 col5 col6
#1 NA 1 2 NA 2 3 NA 5 7
#2 NA 2 4 NA 6 4 NA 8 2
#3 NA 5 9 NA 7 3 NA 2 5
#4 NA 3 4 NA 5 6 NA 8 1
It is not a good practice to have duplicate column names in a dataframe hence I name them as empty1, empty2 etc.
data
df <- structure(list(col1 = c(1L, 2L, 5L, 3L), col2 = c(2L, 4L, 9L,
4L), col3 = c(2L, 6L, 7L, 5L), col4 = c(3L, 4L, 3L, 6L), col5 = c(5L,
8L, 2L, 8L), col6 = c(7L, 2L, 5L, 1L)),
class = "data.frame", row.names = c(NA, -4L))
Related
I am counting row-based Na values according to col1 variable in the data set. I want to add a condition to this query:
When calculating the number of NA,
For col2 = a and b, also look at col4 column, for col2 = c, do not look at col4 column
# creating a dataframe
data_frame <- data.frame(col1 = sample(6:9, 9 , replace = TRUE),
col2 = letters[1:3],
col3 = c(1,NA,NA,1,NA,NA,2,NA,2),
col4 = c(1,4,NA,1,NA,NA,NA,1,2))
data_frame = data_frame %>%
rowwise() %>%
mutate(Count_NA = sum(is.na(cur_data()))) %>%
ungroup
#print (data_frame)
data_frame %>% group_by(col1) %>%
summarize(Sum_Count_NA=sum(Count_NA))
The output I want is;
col1
col2
col3
col4
Count_NA
8
a
1
1
0
6
b
NA
4
1
8
c
NA
NA
2
7
a
1
1
0
8
b
NA
NA
2
8
c
NA
NA
2
8
a
2
NA
1
8
b
NA
1
1
9
c
2
2
0
After adding the condition, the output I want is;
Counting Na in col4 for col2 = c
col1
col2
col3
col4
Count_NA
8
a
1
1
0
6
b
NA
4
1
8
c
NA
NA
1
7
a
1
1
0
8
b
NA
NA
2
8
c
NA
NA
1
8
a
2
NA
1
8
b
NA
1
1
9
c
2
2
0
An option is also to replace the NA elements in the 'col4' with non-NA when 'col2' is 'c' and then do the rowSums on the logical matrix
library(dplyr)
data_frame %>%
mutate(Count_Na = rowSums(is.na(cbind(col3, replace(col4, col2 == 'c', 999)))))
-output
col1 col2 col3 col4 Count_Na
1 7 a 1 1 0
2 9 b NA 4 1
3 9 c NA NA 1
4 7 a 1 1 0
5 7 b NA NA 2
6 7 c NA NA 1
7 7 a 2 NA 1
8 9 b NA 1 1
9 7 c 2 2 0
You can do this:
library(dplyr)
data_frame %>%
mutate(sum = rowSums(is.na(select(., contains("col3")))) + (col2 == "c" & is.na(col4)))
col1 col2 col3 col4 sum
1 8 a 1 1 0
2 6 b NA 4 1
3 9 c NA NA 2
4 8 a 1 1 0
5 7 b NA NA 1
6 7 c NA NA 2
7 7 a 2 NA 0
8 9 b NA 1 1
9 7 c 2 2 0
data
data_frame <- structure(list(col1 = c(8L, 6L, 9L, 8L, 7L, 7L, 7L, 9L, 7L),
col2 = c("a", "b", "c", "a", "b", "c", "a", "b", "c"), col3 = c(1,
NA, NA, 1, NA, NA, 2, NA, 2), col4 = c(1, 4, NA, 1, NA, NA,
NA, 1, 2)), class = "data.frame", row.names = c(NA, -9L))
This must be easy but my brain is blocked!
I have this dataframe:
col1
<chr>
1 A
2 B
3 NA
4 C
5 D
6 NA
7 NA
8 E
9 NA
10 F
df <- structure(list(col1 = c("A", "B", NA, "C", "D", NA, NA, "E",
NA, "F")), row.names = c(NA, -10L), class = c("tbl_df", "tbl",
"data.frame"))
I want to add a column with uniqueID only for values that are not NA with tidyverse.
Expected output:
col1 uniqueID
<chr> <dbl>
1 A 1
2 B 2
3 NA NA
4 C 3
5 D 4
6 NA NA
7 NA NA
8 E 5
9 NA NA
10 F 6
I have tried: n(), row_number(), cur_group_id ....
We could do this easily in data.table. Specify the condition in i i.e. non-NA elements in 'col1', create the column 'uniqueID' with the sequence of elements by assignment (:=)
library(data.table)
setDT(df)[!is.na(col1), uniqueID := seq_len(.N)]
-output
df
col1 uniqueID
1: A 1
2: B 2
3: <NA> NA
4: C 3
5: D 4
6: <NA> NA
7: <NA> NA
8: E 5
9: <NA> NA
10: F 6
In dplyr, we can use replace
library(dplyr)
df %>%
mutate(uniqueID = replace(col1, !is.na(col1),
seq_len(sum(!is.na(col1)))))
-output
# A tibble: 10 x 2
col1 uniqueID
<chr> <chr>
1 A 1
2 B 2
3 <NA> <NA>
4 C 3
5 D 4
6 <NA> <NA>
7 <NA> <NA>
8 E 5
9 <NA> <NA>
10 F 6
Another approach:
library(dplyr)
df %>%
mutate(UniqueID = cumsum(!is.na(col1)),
UniqueID = if_else(is.na(col1), NA_integer_, UniqueID))
# A tibble: 10 x 2
col1 UniqueID
<chr> <int>
1 A 1
2 B 2
3 NA NA
4 C 3
5 D 4
6 NA NA
7 NA NA
8 E 5
9 NA NA
10 F 6
A base R option using match + na.omit + unique
transform(
df,
uniqueID = match(col1, na.omit(unique(col1)))
)
gives
col1 uniqueID
1 A 1
2 B 2
3 <NA> NA
4 C 3
5 D 4
6 <NA> NA
7 <NA> NA
8 E 5
9 <NA> NA
10 F 6
A weird tidyverse solution:
library(dplyr)
df %>%
mutate(id = ifelse(is.na(col1), 0, 1),
id = cumsum(id == 1),
id = ifelse(is.na(col1), NA, id))
# A tibble: 10 x 2
col1 id
<chr> <int>
1 A 1
2 B 2
3 NA NA
4 C 3
5 D 4
6 NA NA
7 NA NA
8 E 5
9 NA NA
10 F 6
I would like to sum a column (by ID) depending on another variable (group). If we take for instance:
ID t group
1 12 1
1 14 1
1 2 6
2 0.5 7
2 12 1
3 3 1
4 2 4
I'd like to sum values of column t separately for each ID only if group==1, and obtain:
ID t group sum
1 12 1 26
1 14 1 26
1 2 6 NA
2 0.5 7 NA
2 12 1 12
3 3 1 3
4 2 4 NA
Using dplyr,
df %>%
group_by(ID) %>%
mutate(new = sum(t[group == 1]),
new = replace(new, group != 1, NA))
which gives,
# A tibble: 7 x 4
# Groups: ID [4]
ID t group new
<int> <dbl> <int> <dbl>
1 1 12 1 26
2 1 14 1 26
3 1 2 6 NA
4 2 0.5 7 NA
5 2 12 1 12
6 3 3 1 3
7 4 2 4 NA
Consider base R with ifelse and ave() for conditional inline aggregation.
df$sum <- with(df, ifelse(group == 1, ave(t, ID, group, FUN=sum), NA))
df
# ID t group sum
# 1 1 12.0 1 26
# 2 1 14.0 1 26
# 3 1 2.0 6 NA
# 4 2 0.5 7 NA
# 5 2 12.0 1 12
# 6 3 3.0 1 3
# 7 4 2.0 4 NA
Rextester demo
We can use data.table methods. Convert the 'data.frame' to 'data.table' (setDT(df)), grouped by 'ID', specify the i with the logical expression group ==1, get the sum of 't' and assign (:=) it to 'new'. By default, other rows are assigned to NA by default
library(data.table)
setDT(df)[group == 1, new := sum(t), ID]
df
# ID t group new
#1: 1 12.0 1 26
#2: 1 14.0 1 26
#3: 1 2.0 6 NA
#4: 2 0.5 7 NA
#5: 2 12.0 1 12
#6: 3 3.0 1 3
#7: 4 2.0 4 NA
data
df <- structure(list(ID = c(1L, 1L, 1L, 2L, 2L, 3L, 4L), t = c(12,
14, 2, 0.5, 12, 3, 2), group = c(1L, 1L, 6L, 7L, 1L, 1L, 4L)),
class = "data.frame", row.names = c(NA,
-7L))
I'm fairly used to adding in missing cases for data but this use case escapes me.
I have a number of dataframes (which differ slightly), an example would be:
> t1
3 4 5
2 1 0 0
3 0 2 2
4 2 6 4
5 1 2 1
structure(list(`3` = c(1L, 0L, 2L, 1L), `4` = c(0L, 2L, 6L, 2L
), `5` = c(0L, 2L, 4L, 1L)), .Names = c("3", "4", "5"), row.names = c("2",
"3", "4", "5"), class = "data.frame")
Row names & Column names should be from 1:5 and, obviously, where these were missing the cell value set to NA. For the example above this would give:
> t1
1 2 3 4 5
1 NA NA NA NA NA
2 NA NA 1 0 0
3 NA NA 0 2 2
4 NA NA 2 6 4
5 NA NA 1 2 1
In each case ANY one or more rows AND/OR columns might be missing.
I can readily get the missing columns using the method described by Josh O'Brien here but am missing the row method.
Can anyone help?
We can do this in a much easier way with base R by creating a matrix of NAs of the required dimensions and then assign the values of 't1' based on the row names and column names of 't1'
m1 <- matrix(NA, ncol=5, nrow=5, dimnames = list(1:5, 1:5))
m1[row.names(t1), colnames(t1)] <- unlist(t1)
m1
# 1 2 3 4 5
#1 NA NA NA NA NA
#2 NA NA 1 0 0
#3 NA NA 0 2 2
#4 NA NA 2 6 4
#5 NA NA 1 2 1
Or using tidyverse
library(tidyverse)
rownames_to_column(t1, "rn") %>%
gather(Var, Val, -rn) %>%
mutate_at(vars(rn, Var), as.integer) %>%
complete(rn = seq_len(max(rn)), Var = seq_len(max(Var))) %>%
spread(Var, Val)
# A tibble: 5 × 6
# rn `1` `2` `3` `4` `5`
#* <int> <int> <int> <int> <int> <int>
#1 1 NA NA NA NA NA
#2 2 NA NA 1 0 0
#3 3 NA NA 0 2 2
#4 4 NA NA 2 6 4
#5 5 NA NA 1 2 1
Based on the solution you mentioned by Josh O'Brien, you can do the same but use rownames instead of names. Take a look at the code below..
df <- data.frame(a=1:4, e=4:1)
colnms <- c("a", "b", "d", "e")
rownms <- c("1", "2", "3", "4", "5")
rownames(df) <- c("1", "3", "4", "5")
## find missing columns and replace with zero, and order them
Missing <- setdiff(colnms, names(df))
df[Missing] <- 0
df <- df[colnms]
df
## do the same for rows
MissingR <- setdiff(rownms, rownames(df))
df[MissingR,] <- 0
df <- df[rownms,]
df
# > df
# a b d e
#1 1 0 0 4
#2 0 0 0 0
#3 2 0 0 3
#4 3 0 0 2
#5 4 0 0 1
This question already has answers here:
Error in <my code> : target of assignment expands to non-language object
(1 answer)
sum two columns in R
(7 answers)
Closed 7 years ago.
Let's say I have sample dataframe (df) :
id col1 col2 col3 col4 col5 col6
1 2 3 2 6 2 8
2 3 2 4 1 3 2
3 4 2 9 7 8 7
4 7 6 3 2 9 2
Now I am trying to add 2 columns at a time and create new column i.e col1+col2, col3+col4, col5+col6
Desired output:
id col1 col2 col3 col4 col5 col6 t_1 t_3 t_5
1 2 3 2 6 2 8 5 8 10
2 3 2 4 1 3 2 5 5 5
3 4 2 9 7 8 7 6 16 15
4 7 6 3 2 9 2 13 5 11
I wrote following code:
for(i in c(1, 3, 5)){
paste('df$t', i, sep= '_') <- as.numeric(df[, i]) + as.numeric(df[, i+1])
}
but i get following error:
Error in paste("df$t", i, sep = "_") <- as.numeric(df[, :
target of assignment expands to non-language object`
Am I doing something wrong here ?
Based on the expected output, we can subset the alternating columns of 'df1' without the first 'id' column and we + those datasets with similar dimensions, and create new columns in the original dataset based on that output.
df1[paste('t', c(1,3,5), sep="_")] <- df1[-1][c(TRUE, FALSE)]+
df1[-1][c(FALSE, TRUE)]
df1
# id col1 col2 col3 col4 col5 col6 t_1 t_3 t_5
#1 1 2 3 2 6 2 8 5 8 10
#2 2 3 2 4 1 3 2 5 5 5
#3 3 4 2 9 7 8 7 6 16 15
#4 4 7 6 3 2 9 2 13 5 11
Just for clarity, the first step is removing the first column df1[-1] and then we subset every alternating column using the logical vector (c[TRUE, FALSE)]). This will be recycled to the length of the dataset.
df1[-1][c(TRUE, FALSE)]
# col1 col3 col5
#1 2 2 2
#2 3 4 3
#3 4 9 8
#4 7 3 9
Similarly, we subset the next alternating pair of columns.
df1[-1][c(FALSE, TRUE)]
# col2 col4 col6
#1 3 6 8
#2 2 1 2
#3 2 7 7
#4 6 2 2
Both the subset datasets have the same dimensions, so we just + to get the output columns that will + for corresponding elements
df1[-1][c(TRUE, FALSE)]+df1[-1][c(FALSE, TRUE)]
# col1 col3 col5
#1 5 8 10
#2 5 5 5
#3 6 16 15
#4 13 5 11
data
df1 <- structure(list(id = 1:4, col1 = c(2L, 3L, 4L, 7L), col2 = c(3L,
2L, 2L, 6L), col3 = c(2L, 4L, 9L, 3L), col4 = c(6L, 1L, 7L, 2L
), col5 = c(2L, 3L, 8L, 9L), col6 = c(8L, 2L, 7L, 2L)), .Names = c("id",
"col1", "col2", "col3", "col4", "col5", "col6"), class = "data.frame",
row.names = c(NA, -4L))
This will do...
df$t_1 <- df$col1 + df$col2
df$t_3 <- df$col3 + df$col4
df$t_5 <- df$col5 + df$col6
You don't need to run a loop.
I think it is worth mentioning other approach by Tyler Rinker in this post adapted to this problem. We create a list of pairs of columns to pass it later to lappy. Finally, we combine the original data frame (df1) and the matrix (df2).
n <- ncol(df1)
ind <- split(2:n, rep(2:n, each = 2, length = n - 1))
df2 <- do.call(cbind, lapply(ind, function(i) rowSums(df1[, i])))
cbind(df1, df2
Output:
id col1 col2 col3 col4 col5 col6 2 3 4
1 1 2 3 2 6 2 8 5 8 10
2 2 3 2 4 1 3 2 5 5 5
3 3 4 2 9 7 8 7 6 16 15
4 4 7 6 3 2 9 2 13 5 11