Expand rows with lists as observations - r

I have the following frame:
df <- structure(list(returns = list(c(1,2,3,4,5,6), c(7,8,9,10,11,12)), indexId = c("a", "b")), class = "data.frame", row.names = 1:2)
Is there an easy way to convert this into a normal data.frame so it appears as:
Choice ppl
1 a
2 a
3 a
4 a
5 a
6 a
7 b
8 b
9 b
10 b
11 b
12 b
I have a solution using For but I am looking for something simpler.
All help is much appreciated!

df <- structure(list(returns = list(c(1,2,3,4,5,6), c(7,8,9,10,11,12)),
indexId = c("a", "b")), class = "data.frame", row.names = 1:2)
library(tidyverse)
df %>% separate_rows()
# returns indexId
# 1 1 a
# 2 2 a
# 3 3 a
# 4 4 a
# 5 5 a
# 6 6 a
# 7 7 b
# 8 8 b
# 9 9 b
# 10 10 b
# 11 11 b
# 12 12 b

Or :
data.frame(choice = unlist(df$returns), ppl = rep(df$indexId, lapply(df$returns, length)))

Related

How can we use complete for alphabetic letters?

I have this dataframe:
df <- structure(list(x = c(1, 5, 6, 7, 8), y = c("a", "e", "f", "g",
"h")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-5L))
x y
<dbl> <chr>
1 1 a
2 5 e
3 6 f
4 7 g
5 8 h
With complete from tidyr package:
I can do:
df %>%
complete(x = full_seq(min(x):max(x), 1))
x y
<dbl> <chr>
1 1 a
2 2 NA
3 3 NA
4 4 NA
5 5 e
6 6 f
7 7 g
8 8 h
Now I would like to do the same with the y column:
df %>%
complete(y = full_seq(min(y):max(y), 1))
This obviously will not work.
How can I use complete from tidyr package for alphabetical order?
I don't think that's possible, especially because except in the case of 1 letter, it would not be possible to complete strings with more than one letter. You can still use the letters data set:
df %>%
complete(y = letters[full_seq(min(x):max(x), 1)])
or, to be entirely relying on y:
df %>%
complete(y = letters[which(letters == min(y)):which(letters == max(y))])
y x
1 a 1
2 b NA
3 c NA
4 d NA
5 e 5
6 f 6
7 g 7
8 h 8

How to add a row to data frame based on a condition

I've a dataframe which I want to add a row on the basis of the following conditions. The conditions are when column a is equal to C and column b is equal to 3 or 5.
Here is my dataframe
df <- data.frame(a = c("A", "B", "C", "D", "C", "A", "C", "E"),
b = c(seq(8)), stringsAsFactors = TRUE)
Whenever the condition is TRUE I want to add a row below where the condition is met add 3. I have tried the following
rbind(df, data.frame(a="add", b = "3"))
# a b
# 1 A 1
# 2 B 2
# 3 C 3
# 4 D 4
# 5 C 5
# 6 A 6
# 7 C 7
# 8 E 8
# 9 add 3
This is not the output I want. The output I want is
# a b
# 1 A 1
# 2 B 2
# 3 C 3
# 4 add 3
# 5 D 4
# 6 C 5
# 7 add 3
# 8 A 6
# 9 C 7
# 10 E 8
How can I do that? I am new to R and thank you for your help.
lens = ifelse(df$b %in% c(3, 5) & df$a == "C", 2, 1)
ind = rep(1:NROW(df), lens)
df2 = df[ind,]
df2$a = as.character(df2$a)
df2$a[cumsum(lens)[which(lens == 2)]] = "add"
df2$b[cumsum(lens)[which(lens == 2)]] = 3
df2
# a b
#1 A 1
#2 B 2
#3 C 3
#3.1 add 3
#4 D 4
#5 C 5
#5.1 add 3
#6 A 6
#7 C 7
#8 E 8
A solution using the tidyverse package.
library(tidyverse)
df2 <- df %>%
mutate(Group = lag(cumsum(a == "C" & b %in% c(3, 5)), default = FALSE)) %>%
group_split(Group) %>%
map_dfr(~ .x %>% bind_rows(tibble(a = "add", b = 3))) %>%
slice(-n()) %>%
select(-Group)
df2
# # A tibble: 10 x 2
# a b
# <chr> <dbl>
# 1 A 1
# 2 B 2
# 3 C 3
# 4 add 3
# 5 D 4
# 6 C 5
# 7 add 3
# 8 A 6
# 9 C 7
# 10 E 8
In base R, we can find out position where a = "c" and b is 3 or 5. Repeat those rows in the dataframe and replace them with required values.
pos <- which(df$a == "C" & df$b %in% c(3, 5))
df <- df[sort(c(seq(nrow(df)), pos)), ]
df[seq_along(pos) + pos, ] <- list("add", 3)
row.names(df) <- NULL
df
# a b
#1 A 1
#2 B 2
#3 C 3
#4 add 3
#5 D 4
#6 C 5
#7 add 3
#8 A 6
#9 C 7
#10 E 8
data
df <- data.frame(a = c("A", "B", "C", "D", "C", "A", "C", "E"),
b = c(seq(8)), stringsAsFactors = FALSE)

Modify, extract, and concatenate list sub-elements into a data.frame in R with tidyverse

I'm trying to find an elegant way to work with list structures in R. In particular, in this case, I'd like to extract sub-elements from a list, modify them based on their associated data in that list, and concatenate them into a data frame. Perhaps easier with an example:
mystruct <- structure(list(dataset1 = structure(list(data1 = structure(list(
a = c(1, 2, 3), b = c(4, 5, 6)), .Names = c("a", "b"), row.names = c(NA,
-3L), class = "data.frame"), data2 = c("a", "b", "c", "d", "e"
)), .Names = c("data1", "data2")), dataset2 = structure(list(
data1 = structure(list(a = c(7, 8, 9), b = c(10, 11, 12)), .Names = c("a",
"b"), row.names = c(NA, -3L), class = "data.frame"), data2 = c("f",
"g", "h", "i", "j")), .Names = c("data1", "data2"))), .Names = c("dataset1",
"dataset2"))
I can concatenate data1 elements like this:
> mystruct %>% map_dfr(~.x$data1)
a b
1 1 4
2 2 5
3 3 6
4 7 10
5 8 11
6 9 12
But I would like to add a "dataset" column, which is populated by the name of the list element from whence the data was taken:
dataset a b
1 dataset1 1 4
2 dataset1 2 5
3 dataset1 3 6
4 dataset2 7 10
5 dataset2 8 11
6 dataset2 9 12
Is there a way to do this nicely with the tidyverse? I'd also be open to data.table solutions.
Thanks,
Allie
Provide an .id parameter to map_df, which will create a column giving the name of the list:
map_df(mystruct, 'data1', .id='dataset')
# dataset a b
#1 dataset1 1 4
#2 dataset1 2 5
#3 dataset1 3 6
#4 dataset2 7 10
#5 dataset2 8 11
#6 dataset2 9 12
Or map_dfr should work as well:
map_dfr(mystruct, 'data1', .id='dataset')
map_dfr has an .id argument:
mystruct %>% map_dfr(~ .x$data1, .id = "id")
giving:
id a b
1 dataset1 1 4
2 dataset1 2 5
3 dataset1 3 6
4 dataset2 7 10
5 dataset2 8 11
6 dataset2 9 12
Restructure as a "tidy" table with list columns...
library(data.table)
tabstruct = rbindlist(lapply(mystruct, lapply, list), id = TRUE)
# .id data1 data2
# 1: dataset1 <data.frame> a,b,c,d,e
# 2: dataset2 <data.frame> f,g,h,i,j
Then "unnest" data1:
tabstruct[, rbindlist(setNames(data1, .id), id=TRUE)]
# .id a b
# 1: dataset1 1 4
# 2: dataset1 2 5
# 3: dataset1 3 6
# 4: dataset2 7 10
# 5: dataset2 8 11
# 6: dataset2 9 12
Or unnest data2:
tabstruct[, .(val = unlist(data2)), by=.id]
# .id val
# 1: dataset1 a
# 2: dataset1 b
# 3: dataset1 c
# 4: dataset1 d
# 5: dataset1 e
# 6: dataset2 f
# 7: dataset2 g
# 8: dataset2 h
# 9: dataset2 i
# 10: dataset2 j
Here is an option to do this on multiple datasets in the list
map(c('data1', 'data2'), ~
map2_df(mystruct, .x, ~ .x[[.y]], .id = 'id'))
#[[1]]
# id a b
#1 dataset1 1 4
#2 dataset1 2 5
#3 dataset1 3 6
#4 dataset2 7 10
#5 dataset2 8 11
#6 dataset2 9 12
#[[2]]
# A tibble: 5 x 3
# id dataset1 dataset2
# <chr> <chr> <chr>
#1 1 a f
#2 1 b g
#3 1 c h
#4 1 d i
#5 1 e j

Creating a new data frame using existing data

I would like to create a new data from my existing data frame "ab". The new data frame should look like "Newdf".
a<- c(1:5)
b<-c(11:15)
ab<-data.frame(C1=a,c2=b)
ab
df<-c(1,11,2,12,3,13,4,14,5,15)
CMT<-c(1:2)
CMT1<-rep.int(CMT,times=5)
Newdf<-data.frame(DV=df,Comp=CMT1)
Newdf
Can we use dplyr package? If yes, how?
More importantly than dplyr, you'd need tidyr:
library(tidyr)
library(dplyr)
ab %>%
gather(Comp, DV) %>%
mutate(Comp = recode(Comp, "C1" = 1, "c2" = 2))
# Comp DV
# 1 1 1
# 2 1 2
# 3 1 3
# 4 1 4
# 5 1 5
# 6 2 11
# 7 2 12
# 8 2 13
# 9 2 14
# 10 2 15
Using dplyr and tidyr gives you something close...
library(tidyr)
library(dplyr)
df2 <- ab %>%
mutate(Order=1:n()) %>%
gather(key=Comp,value=DV,C1,c2) %>%
arrange(Order) %>%
mutate(Comp=recode(Comp,"C1"=1,"c2"=2)) %>%
select(DV,Comp)
df2
DV Comp
1 1 1
2 11 2
3 2 1
4 12 2
5 3 1
6 13 2
7 4 1
8 14 2
9 5 1
10 15 2
Although the OP has asked for a dpylr solution, I felt challenged to look for a data.table solution. So, FWIW, here is an alternative approach using melt().
Note that this solution does not depend on specific column names in ab as the two other dplyr solutions do. In addition, it should be working for more than two columns in ab as well (untested).
library(data.table)
melt(setDT(ab, keep.rownames = TRUE), id.vars = "rn", value.name = "DV"
)[, Comp := rleid(variable)
][order(rn)][, c("rn", "variable") := NULL][]
# DV Comp
# 1: 1 1
# 2: 11 2
# 3: 2 1
# 4: 12 2
# 5: 3 1
# 6: 13 2
# 7: 4 1
# 8: 14 2
# 9: 5 1
#10: 15 2
Data
ab <- structure(list(C1 = 1:5, c2 = 11:15), .Names = c("C1", "c2"),
row.names = c(NA, -5L), class = "data.frame")
ab
# C1 c2
#1 1 11
#2 2 12
#3 3 13
#4 4 14
#5 5 15

What is the equivalent of the SumIf function in R

I am new to R and this site, but I searched and didn't find the answer I was looking for.
If I have the following data set "total":
names <- c("a", "b", "c", "d", "a", "b", "c", "d")
x <- cbind(x1 = 3, x2 = c(3:10))
total <- data.frame(names, x)
total
names x1 x2
1 a 3 3
2 b 3 4
3 c 3 5
4 d 3 6
5 a 3 7
6 b 3 8
7 c 3 9
8 d 3 10
How can I create a new data set that works like the SumIf Excel function with just unique rows?
The answer should be a new data set "summary" that is 4 x 3.
names <- unique(names)
summary <- data.frame(names)
summary$Sumx1 <- ?????
summary$Sumx2 <- ?????
summary
names Sumx1 Sumx2
1 a 6 10
2 b 6 12
3 c 6 14
4 d 6 16
In base R:
aggregate(. ~ names, data=total, sum)
You can use ddply from the plyr package:
library(plyr)
ddply(total, .(names), summarise, Sumx1 = sum(x1), Sumx2 = sum(x2))
names Sumx1 Sumx2
1 a 6 10
2 b 6 12
3 c 6 14
4 d 6 16
You can also use data.table:
library(data.table)
DT <- as.data.table(total)
DT[ , lapply(.SD, sum), by = "names"]
names x1 x2
1: a 6 10
2: b 6 12
3: c 6 14
4: d 6 16
With the new dplyr package, you can do:
library(dplyr)
total %>%
group_by(names) %>%
summarise(Sumx1 = sum(x1), Sumx2 = sum(x2))
names Sumx1 Sumx2
1 d 6 16
2 c 6 14
3 b 6 12
4 a 6 10

Resources