Reshape R dataframe - r

I have created the following dataframe object in R
df<-data.frame("Attribute"<-c("A", "B", "C", "D"), "Name1"<-c(1,2,3,4),
"Name2"<-c(2,1,2,1), "Name3"<-c(1,3,2,4)
names(df)<-c("Attributes", "Name1", "Name2", "Name3")
df
I would like the following output.
names attributes''
1 A D B
2 B A C
3 C B
4 D
I am unable to get a solution for this. Request your help in this regard

Here is a base R solution using stack and by:
# Sample data
df <- data.frame(
Attribute = c("A", "B", "C", "D"),
Name1 = c(1,2,3,4),
Name2 = c(2,1,2,1),
Name3 = c(1,3,2,4))
df.stacked <- data.frame(stack(df[, -1]), Attribute = df$Attribute);
by(df.stacked, df.stacked$values, function(x) list(unique(x$Attribute)))
#[1] A B D
#Levels: A B C D
#------------------------------------------------------------
#[1] B A C
#Levels: A B C D
#------------------------------------------------------------
#[1] C B
#Levels: A B C D
#------------------------------------------------------------
#[1] D
#Levels: A B C D

Try this
library(dplyr)
library(data.table)
setDT(df)
df2 <- melt(df, id = 1, measure = patterns("Name"), value.name = "names")
df2 %>%
select(-2) %>%
group_by(names) %>%
distinct() %>%
summarise(attributes = paste(Attributes, collapse = " "))
# output
# A tibble: 4 x 2
names attributes
<dbl> <chr>
1 1 A B D
2 2 B A C
3 3 C B
4 4 D

Here is a solution with base R:
df <- data.frame(Attribute=c("A", "B", "C", "D"), Name1=c(1,2,3,4), Name2=c(2,1,2,1), Name3=c(1,3,2,4))
df
A <- df$Attribute
df <- as.matrix(df[-1])
lapply(1:max(df), function(x) A[apply(df==x, 1, any)])
# > lapply(1:max(df), function(x) A[apply(df==x, 1, any)])
# [[1]]
# [1] A B D
# Levels: A B C D
#
# [[2]]
# [1] A B C
# Levels: A B C D
#
# [[3]]
# [1] B C
# Levels: A B C D
#
# [[4]]
# [1] D
# Levels: A B C D
Here is a solution with data.table:
library("data.table")
df <- data.frame(Attribute=c("A", "B", "C", "D"), Name1=c(1,2,3,4), Name2=c(2,1,2,1), Name3=c(1,3,2,4))
df
A <- df$Attribute
df <- setDT(df[-1])
lapply(1:max(as.matrix(df)), function(a) unique(unlist(sapply(df, function(x) A[x==a]))))
# > lapply(1:max(as.matrix(df)), function(a) unique(unlist(sapply(df, function(x) A[x==a]))))
# [[1]]
# [1] A B D
# Levels: A B C D
#
# [[2]]
# [1] B A C
# Levels: A B C D
#
# [[3]]
# [1] C B
# Levels: A B C D
#
# [[4]]
# [1] D
# Levels: A B C D

Related

Combination with conditions in R

how to make a combination of letters
label=c("A","B","C","D","E")
into a dataframe with 4 group (G1, G2, G3, G4) as follows
k2=data.frame(G1=c("AB","AC","AD","AE","BC","BD","BE","CD","CE","DE"),
G2=c("C","B","B","B","A","A","A","A","A","A"),
G3=c("D","D","C","C","D","C","C","B","B","B"),
G4=c("E","E","E","D","E","E","D","E","D","C"))
and if i want to make group into 3 (G1, G2, G3) and give condition so that "B" and "C" can't separate like below dataframe how to do?
k3=data.frame(G1=c("BCD","BCE","BCA","AE","AD","DE"),
G2=c("A","A","D","BC","BC","BC"),
G3=c("E","D","E","D","E","A"))
Thank you very much for the help
Here is one way to do what you want to do:
a <- t(combn(c("A", "B", "C", "D", "E"), 2))
a <- paste0(a[, 1], a[, 2])
b <- t(apply(a, 1, function(x) setdiff(c("A", "B", "C", "D", "E"), x)))
k2 <- data.frame(a, b)
colnames(k2) <- paste0("G", 1:4)
k2
# G1 G2 G3 G4
# 1 AB C D E
# 2 AC B D E
# 3 AD B C E
# 4 AE B C D
# 5 BC A D E
# 6 BD A C E
# 7 BE A C D
# 8 CD A B E
# 9 CE A B D
# 10 DE A B C
The simplest way to do the second version is to exclude "C" and add it at the end:
d <- t(combn(c("A", "B", "D", "E"), 2))
d <- paste0[d[, 1], d[, 2]]
e <- t(apply(d, 1, function(x) setdiff(c("A", "B", "D", "E"), x)))
k3 <- data.frame(d, e)
colnames(k3) <- paste0("G", 1:3)
k3 <- data.frame(sapply(g, function(x) gsub("B", "BC", x)))
k3
# G1 G2 G3
# 1 ABC D E
# 2 AD BC E
# 3 AE BC D
# 4 BCD A E
# 5 BCE A D
# 6 DE A BC
This does not match your k3 exactly, but it is more consistent with k2.

Error: replacement has 0 rows, data has 22 in for loop [duplicate]

I have a data frame containing (in random places) a character value (say "foo") that I want to replace with a NA.
What's the best way to do so across the whole data frame?
This:
df[df == "foo"] <- NA
One way to nip this in the bud is to convert that character to NA when you read the data in in the first place.
df <- read.csv("file.csv", na.strings = c("foo", "bar"))
Using dplyr::na_if, you can replace specific values with NA. In this case, that would be "foo".
library(dplyr)
set.seed(1234)
df <- data.frame(
id = 1:6,
x = sample(c("a", "b", "foo"), 6, replace = T),
y = sample(c("c", "d", "foo"), 6, replace = T),
z = sample(c("e", "f", "foo"), 6, replace = T),
stringsAsFactors = F
)
df
#> id x y z
#> 1 1 a c e
#> 2 2 b c foo
#> 3 3 b d e
#> 4 4 b d foo
#> 5 5 foo foo e
#> 6 6 b d e
na_if(df$x, "foo")
#> [1] "a" "b" "b" "b" NA "b"
If you need to do this for multiple columns, you can pass "foo" through from mutate with across (updated for dplyr v1.0.0+).
df %>%
mutate(across(c(x, y, z), na_if, "foo"))
#> id x y z
#> 1 1 a c e
#> 2 2 b c <NA>
#> 3 3 b d e
#> 4 4 b d <NA>
#> 5 5 <NA> <NA> e
#> 6 6 b d e
Another option is is.na<-:
is.na(df) <- df == "foo"
Note that its use may seem a bit counter-intuitive, but it actually assigns NA values to df at the index on the right hand side.
This could be done with dplyr::mutate_all() and replace:
library(dplyr)
df <- data_frame(a = c('foo', 2, 3), b = c(1, 'foo', 3), c = c(1,2,'foobar'), d = c(1, 2, 3))
> df
# A tibble: 3 x 4
a b c d
<chr> <chr> <chr> <dbl>
1 foo 1 1 1
2 2 foo 2 2
3 3 3 foobar 3
df <- mutate_all(df, funs(replace(., .=='foo', NA)))
> df
# A tibble: 3 x 4
a b c d
<chr> <chr> <chr> <dbl>
1 <NA> 1 1 1
2 2 <NA> 2 2
3 3 3 foobar 3
Another dplyr option is:
df <- na_if(df, 'foo')
Assuming you do not know the column names or have large number of columns to select, is.character() might be of use.
df <- data.frame(
id = 1:6,
x = sample(c("a", "b", "foo"), 6, replace = T),
y = sample(c("c", "d", "foo"), 6, replace = T),
z = sample(c("e", "f", "foo"), 6, replace = T),
stringsAsFactors = F
)
df
# id x y z
# 1 1 b d e
# 2 2 a foo foo
# 3 3 a d foo
# 4 4 b foo foo
# 5 5 foo foo e
# 6 6 foo foo f
df %>%
mutate_if(is.character, list(~na_if(., "foo")))
# id x y z
# 1 1 b d e
# 2 2 a <NA> <NA>
# 3 3 a d <NA>
# 4 4 b <NA> <NA>
# 5 5 <NA> <NA> e
# 6 6 <NA> <NA> f
One alternate way to solve is below:
for (i in 1:ncol(DF)){
DF[which(DF[,i]==""),columnIndex]<-"ALL"
FinalData[which(is.na(FinalData[,columnIndex])),columnIndex]<-"ALL"
}

r create new data frame that matches in rows elements grouped by another column

I want to create a new data frame from the df one below. In the new data frame (df2), each element in df$name is placed in the first column and matched in its row with other element of df$name grouped by df$group.
df <- data.frame(group = rep(letters[1:2], each=3),
name = LETTERS[1:6])
> df
group name
1 a A
2 a B
3 a C
4 b D
5 b E
6 b F
In this example, "A", "B", and "C" in df$name belong to "a" in df$group, and I want to put them in the same row in a new data frame. The desired output looks like this:
> df2
V1 V2
1 A B
2 A C
3 B A
4 B C
5 C A
6 C B
7 D E
8 D F
9 E D
10 E F
11 F D
12 F E
We could do this in base R with merge
out <- setNames(subset(merge(df, df, by.x = 'group', by.y = 'group'),
name.x != name.y, select = -group), c("V1", "V2"))
row.names(out) <- NULL
out
# V1 V2
#1 A B
#2 A C
#3 B A
#4 B C
#5 C A
#6 C B
#7 D E
#8 D F
#9 E D
#10 E F
#11 F D
#12 F E
In my opinion its case of self-join. Using dplyr a solution can be as:
library(dplyr)
inner_join(df, df, by="group") %>%
filter(name.x != name.y) %>%
select(V1 = name.x, V2 = name.y)
# V1 V2
# 1 A B
# 2 A C
# 3 B A
# 4 B C
# 5 C A
# 6 C B
# 7 D E
# 8 D F
# 9 E D
# 10 E F
# 11 F D
# 12 F E
df <- data.frame(group = rep(letters[1:2], each=3),
name = LETTERS[1:6])
library(tidyverse)
df %>%
group_by(group) %>% # for every group
summarise(v = list(expand.grid(V1=name, V2=name))) %>% # create all combinations of names
select(v) %>% # keep only the combinations
unnest(v) %>% # unnest combinations
filter(V1 != V2) # exclude rows with same names
# # A tibble: 12 x 2
# V1 V2
# <fct> <fct>
# 1 B A
# 2 C A
# 3 A B
# 4 C B
# 5 A C
# 6 B C
# 7 E D
# 8 F D
# 9 D E
# 10 F E
# 11 D F
# 12 E F

rstudio dplyr group _by multiple column

In Rstudio, I have a dataframe which contains 4 columns and I need to get the list of every different triplet of the 3 first columns sorted decreasingly by the sum on the 4th column. For example, with:
A B C 2
D E F 5
A B C 4
G H I 5
D E F 3
I need as a result:
D E F 8
A B C 6
G H I 5
I've tried the following different approach but I can't manage to have exactly the result I need:
df_list<-df_raw_data %>%
group_by(param1, param2, param3) %>%
summarise_all(total = sum(param4))
arrange(df_list, desc(total))
and:
df_list<-unique(df_raw_data[, c('param1', 'param2', 'param3')])
cbind(df_list, total)
for(i in 1:nrow(df_raw_data))
{
filter ???????????
}
I would prefer to use the dplyr package since it's a more elegant solution.
EDIT: Okay, thanks for your working answers. I think that I've lost some time figuring out that the plyr package shouldn't be loaded after dplyr...
We can use group_by_at to select the columns to group.
library(dplyr)
dat2 <- dat %>%
group_by_at(vars(-V4)) %>%
summarise(V4 = sum(V4)) %>%
ungroup()
dat2
# # A tibble: 3 x 4
# V1 V2 V3 V4
# <chr> <chr> <chr> <int>
# 1 A B C 6
# 2 D E F 8
# 3 G H I 5
Or use group_by_if to select columns to group based on column types.
dat2 <- dat %>%
group_by_if(is.character) %>%
summarise(V4 = sum(V4)) %>%
ungroup()
dat2
# # A tibble: 3 x 4
# V1 V2 V3 V4
# <chr> <chr> <chr> <int>
# 1 A B C 6
# 2 D E F 8
# 3 G H I 5
DATA
dat <- read.table(text = "A B C 2
D E F 5
A B C 4
G H I 5
D E F 3",
header = FALSE, stringsAsFactors = FALSE)
Would this be what you are looking for?
df <- data_frame(var1 = c("A", "D", "A", "G", "D"),
var2 = c("B", "E", "B", "H", "E"),
var3 = c("C", "F", "C", "I", "F"),
var4 = c(2, 5, 4, 5, 3))
df %>% group_by(var1, var2, var3) %>%
summarise(sum = sum(var4)) %>%
arrange(desc(sum))

Replacing character values with NA in a data frame

I have a data frame containing (in random places) a character value (say "foo") that I want to replace with a NA.
What's the best way to do so across the whole data frame?
This:
df[df == "foo"] <- NA
One way to nip this in the bud is to convert that character to NA when you read the data in in the first place.
df <- read.csv("file.csv", na.strings = c("foo", "bar"))
Using dplyr::na_if, you can replace specific values with NA. In this case, that would be "foo".
library(dplyr)
set.seed(1234)
df <- data.frame(
id = 1:6,
x = sample(c("a", "b", "foo"), 6, replace = T),
y = sample(c("c", "d", "foo"), 6, replace = T),
z = sample(c("e", "f", "foo"), 6, replace = T),
stringsAsFactors = F
)
df
#> id x y z
#> 1 1 a c e
#> 2 2 b c foo
#> 3 3 b d e
#> 4 4 b d foo
#> 5 5 foo foo e
#> 6 6 b d e
na_if(df$x, "foo")
#> [1] "a" "b" "b" "b" NA "b"
If you need to do this for multiple columns, you can pass "foo" through from mutate with across (updated for dplyr v1.0.0+).
df %>%
mutate(across(c(x, y, z), na_if, "foo"))
#> id x y z
#> 1 1 a c e
#> 2 2 b c <NA>
#> 3 3 b d e
#> 4 4 b d <NA>
#> 5 5 <NA> <NA> e
#> 6 6 b d e
Another option is is.na<-:
is.na(df) <- df == "foo"
Note that its use may seem a bit counter-intuitive, but it actually assigns NA values to df at the index on the right hand side.
This could be done with dplyr::mutate_all() and replace:
library(dplyr)
df <- data_frame(a = c('foo', 2, 3), b = c(1, 'foo', 3), c = c(1,2,'foobar'), d = c(1, 2, 3))
> df
# A tibble: 3 x 4
a b c d
<chr> <chr> <chr> <dbl>
1 foo 1 1 1
2 2 foo 2 2
3 3 3 foobar 3
df <- mutate_all(df, funs(replace(., .=='foo', NA)))
> df
# A tibble: 3 x 4
a b c d
<chr> <chr> <chr> <dbl>
1 <NA> 1 1 1
2 2 <NA> 2 2
3 3 3 foobar 3
Another dplyr option is:
df <- na_if(df, 'foo')
Assuming you do not know the column names or have large number of columns to select, is.character() might be of use.
df <- data.frame(
id = 1:6,
x = sample(c("a", "b", "foo"), 6, replace = T),
y = sample(c("c", "d", "foo"), 6, replace = T),
z = sample(c("e", "f", "foo"), 6, replace = T),
stringsAsFactors = F
)
df
# id x y z
# 1 1 b d e
# 2 2 a foo foo
# 3 3 a d foo
# 4 4 b foo foo
# 5 5 foo foo e
# 6 6 foo foo f
df %>%
mutate_if(is.character, list(~na_if(., "foo")))
# id x y z
# 1 1 b d e
# 2 2 a <NA> <NA>
# 3 3 a d <NA>
# 4 4 b <NA> <NA>
# 5 5 <NA> <NA> e
# 6 6 <NA> <NA> f
One alternate way to solve is below:
for (i in 1:ncol(DF)){
DF[which(DF[,i]==""),columnIndex]<-"ALL"
FinalData[which(is.na(FinalData[,columnIndex])),columnIndex]<-"ALL"
}

Resources