Spread data table by id - r

I have the following data.table:
> df
month student A B C D
1: 1 Amy 9 6 1 11
2: 1 Bob 8 5 5 2
3: 2 Amy 7 7 2 4
4: 2 Bob 6 6 6 6
5: 3 Amy 6 8 10 7
6: 3 Bob 9 7 11 3
I want to transform this data.table to this format: > df1
month cols Amy Bob
1: 1 A 9 8
2: 1 B 6 5
3: 1 C 1 5
4: 1 D 11 2
5: 2 A 7 6
6: 2 B 7 6
7: 2 C 2 6
8: 2 D 4 6
9: 3 A 6 9
10: 3 B 8 7
11: 3 C 10 11
12: 3 D 7 3
I tried multiple ways using dcast etc. but I couldn't transform the data. Help please!

You have to melt the dataframe and then dcast -
tmp = melt(df, id = c("month", "student"), , variable.name = "cols")
df1 = dcast(tmp, month + cols ~ student, value.var = "value")
Both are from the data.table library

A tidyr approach.
> library(tidyr)
> df %>%
gather(cols, values, A:D) %>%
spread(student, values)
month cols Amy Bob
1 1 A 9 8
2 1 B 6 5
3 1 C 1 5
4 1 D 11 2
5 2 A 7 6
6 2 B 7 6
7 2 C 2 6
8 2 D 4 6
9 3 A 6 9
10 3 B 8 7
11 3 C 10 11
12 3 D 7 3

Related

Create a function to Impute values form one data frame into another

The NA values in column A should be filled by the A value from the dat data frame and so on for the other variables.
id <- factor(rep(letters[1:2], each=5))
A <- c(1,2,NA,6,8,9,0,6,7,9)
B <- c(5,6,1,9,8,1,NA,9,7,4)
C <- c(2,3,5,NA,NA,2,7,6,4,6)
D <- c(6,5,8,3,2,9,NA,2,6,8)
df <- data.frame(id, A, B,C,D)
df
id A B C D
1 a 1 5 2 6
2 a 2 6 3 5
3 a NA 1 5 8
4 a 6 9 NA 3
5 a 8 8 NA 2
6 b 9 1 2 9
7 b 0 NA 7 NA
8 b 6 9 6 2
9 b 7 7 4 6
10 b 9 4 6 8
dat <- data.frame(col=c("A","B","C","D"), value=c(23,45,26,89))
dat
dat
col value
1 A 23
2 B 45
3 C 26
4 D 89
It should look like:
id A B C D
1 a 1 5 2 6
2 a 2 6 3 5
3 a 23 1 5 8
4 a 6 9 26 3
5 a 8 8 26 2
6 b 9 1 2 9
7 b 0 45 7 89
8 b 6 9 6 2
9 b 7 7 4 6
10 b 9 4 6 8
I was thinking something like this but I dont know how to connect those data frames in a function...
test <- function(i){
df[,i][is.na(df[,i])] <- dat$value
}
test(2)
If you want it in your format
test <- function(i){
df[,i][is.na(df[,i])] <<- dat$value[dat$col==i]
}
test("A")
id A B C D
1 a 1 5 2 6
2 a 2 6 3 5
3 a 23 1 5 8
4 a 6 9 NA 3
5 a 8 8 NA 2
6 b 9 1 2 9
7 b 0 NA 7 NA
8 b 6 9 6 2
9 b 7 7 4 6
10 b 9 4 6 8
One approach is to iterate over the columns and values and use coalesce():
library(dplyr)
library(purrr)
df[-1] <- map2_df(df[-1], dat$value, coalesce)
df
id A B C D
1 a 1 5 2 6
2 a 2 6 3 5
3 a 23 1 5 8
4 a 6 9 26 3
5 a 8 8 26 2
6 b 9 1 2 9
7 b 0 45 7 89
8 b 6 9 6 2
9 b 7 7 4 6
10 b 9 4 6 8
Or same using replace():
map2_df(df[-1], dat$value, ~ replace(.x, is.na(.x), .y))

R DataTable Solution Fast Reshape

data1=data.frame("StudentID"=c(1,2,3,4,5),
"a1cat"=c(9,10,2,0,10),
"a2cat"=c(0,2,8,6,7),
"a3cat"=c(4,2,1,6,5),
"a1dog"=c(8,4,4,5,8),
"a2dog"=c(1,9,10,5,7),
"a3dog"=c(9,3,2,7,7),
"q20fox"=c(2,8,6,1,9),
"q22fox"=c(8,10,9,6,6),
"q24fox"=c(5,0,2,9,7))
data2=data.frame("StudentID" = sort(rep(1:5,each=3)),
"timeX" = c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3),
"meow" = c(9,0,4,10,2,2,2,8,1,0,6,6,10,7,5),
"bark" = c(8,1,9,4,9,3,4,10,2,5,5,7,8,7,7),
"woof"=c(2,8,5,8,10,0,6,9,2,1,6,9,9,6,7))
I have 'data1' and wish to get 'data2' using data.table to reshape the data and give new names for each column.
data1x=data.frame("StudentID"=c(1,2,3,4,5),
"a1cat"=c(9,10,2,0,10),
"a2cat"=c(0,2,8,6,7),
"a3cat"=c(4,2,1,6,5),
"a1dog"=c(8,4,4,5,8),
"a2dog"=c(1,9,10,5,7),
"a3dog"=c(9,3,2,7,7),
"fox20"=c(2,8,6,1,9),
"fox22"=c(8,10,9,6,6),
"fox24"=c(5,0,2,9,7))
We can use melt with measure patterns
library(data.table)
melt(setDT(data1), measure = patterns("cat$", "dog$", "fox\\d*$"),
value.name = c("meow", "bark", "woof"),
variable.name = 'timeX')[order(StudentID)]
# StudentID timeX meow bark woof
# 1: 1 1 9 8 2
# 2: 1 2 0 1 8
# 3: 1 3 4 9 5
# 4: 2 1 10 4 8
# 5: 2 2 2 9 10
# 6: 2 3 2 3 0
# 7: 3 1 2 4 6
# 8: 3 2 8 10 9
# 9: 3 3 1 2 2
#10: 4 1 0 5 1
#11: 4 2 6 5 6
#12: 4 3 6 7 9
#13: 5 1 10 8 9
#14: 5 2 7 7 6
#15: 5 3 5 7 7

data.table manipulation and merging

I have data
dat1 <- data.table(id=1:8,
group=c(1,1,2,2,2,3,3,3),
value=c(5,6,10,11,12,20,21,22))
dat2 <- data.table(group=c(1,2,3),
value=c(3,6,13))
and I would like to subtract dat2$value from each of the dat1$value, based on group.
Is this possible using data.table or does it require additional packages?
With data.table, you could do:
library(data.table)
dat1[dat2, on = "group"][, new.value := value - i.value, by = "group"][]
Which returns:
id group value i.value new.value
1: 1 1 5 3 2
2: 2 1 6 3 3
3: 3 2 10 6 4
4: 4 2 11 6 5
5: 5 2 12 6 6
6: 6 3 20 13 7
7: 7 3 21 13 8
8: 8 3 22 13 9
Alternatively, you can do this in one step as akrun mentions:
dat1[dat2, newvalue := value - i.value, on = .(group)]
id group value newvalue
1: 1 1 5 2
2: 2 1 6 3
3: 3 2 10 4
4: 4 2 11 5
5: 5 2 12 6
6: 6 3 20 7
7: 7 3 21 8
8: 8 3 22 9

Subset data.frame by column

I have this data.frame:
a <- c(rep("1", 3), rep("2", 3), rep("3",3), rep("4",3), rep("5",3))
b <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
df <-data.frame(a,b)
a b
1 1 1
2 1 2
3 1 3
4 2 4
5 2 5
6 2 6
7 3 7
8 3 8
9 3 9
10 4 10
11 4 11
12 4 12
13 5 13
14 5 14
15 5 15
I want to have something like this:
a <- c(rep("2", 3), rep("3", 3))
b <- c(4,5,6,7,8,9)
dffinal<-data.frame(a,b)
a b
1 2 4
2 2 5
3 2 6
4 3 7
5 3 8
6 3 9
I could use the "subset" function, but its not working
sub <- subset(df,c(2,3) == a )
a b
5 2 5
8 3 8
This command only takes one row of "2" and "3" in column "a".
Any Help?
You're confusing == with %in%:
subset(df, a %in% c(2,3))
# a b
# 4 2 4
# 5 2 5
# 6 2 6
# 7 3 7
# 8 3 8
# 9 3 9
what about this?
library(dplyr)
df %>% filter(a == 2 | a==3)
a b
1 2 4
2 2 5
3 2 6
4 3 7
5 3 8
6 3 9
We can use data.table. We convert the 'data.frame' to 'data.table' (setDT(df)), and set the 'key' as column 'a', then we subset the rows.
library(data.table)
setDT(df, key= 'a')[c('2','3')]
# a b
#1: 2 4
#2: 2 5
#3: 2 6
#4: 3 7
#5: 3 8
#6: 3 9

subset function in R with more than one conditions [duplicate]

I have this data.frame:
a <- c(rep("1", 3), rep("2", 3), rep("3",3), rep("4",3), rep("5",3))
b <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
df <-data.frame(a,b)
a b
1 1 1
2 1 2
3 1 3
4 2 4
5 2 5
6 2 6
7 3 7
8 3 8
9 3 9
10 4 10
11 4 11
12 4 12
13 5 13
14 5 14
15 5 15
I want to have something like this:
a <- c(rep("2", 3), rep("3", 3))
b <- c(4,5,6,7,8,9)
dffinal<-data.frame(a,b)
a b
1 2 4
2 2 5
3 2 6
4 3 7
5 3 8
6 3 9
I could use the "subset" function, but its not working
sub <- subset(df,c(2,3) == a )
a b
5 2 5
8 3 8
This command only takes one row of "2" and "3" in column "a".
Any Help?
You're confusing == with %in%:
subset(df, a %in% c(2,3))
# a b
# 4 2 4
# 5 2 5
# 6 2 6
# 7 3 7
# 8 3 8
# 9 3 9
what about this?
library(dplyr)
df %>% filter(a == 2 | a==3)
a b
1 2 4
2 2 5
3 2 6
4 3 7
5 3 8
6 3 9
We can use data.table. We convert the 'data.frame' to 'data.table' (setDT(df)), and set the 'key' as column 'a', then we subset the rows.
library(data.table)
setDT(df, key= 'a')[c('2','3')]
# a b
#1: 2 4
#2: 2 5
#3: 2 6
#4: 3 7
#5: 3 8
#6: 3 9

Resources