Related
Can we transform this data as shown in expected output?
ColA ColB ColC
A - 0
B X 3
C Y 4
D X 51
D Y 32
Expected Output
ColA X Y
A 0 0
B 3 0
C 0 4
D 51 32
A short solution based on data.table (most recommended option if you have voluminous data)
colA = c("A","B","C","D","D")
colB = c(NA,"X","Y","X","Y")
colC = c(0,3,4,51,32)
library(data.table)
dt <- data.table(colA = colA, colB = colB, colC = colC)
dt <- dcast(dt, colA ~ ..., fill = 0)
dt[,"NA" := NULL]
dt
colA X Y
1: A 0 0
2: B 3 0
3: C 0 4
4: D 51 32
dcast makes your data from long to wide format.
Probably, something like this :
library(dplyr)
library(tidyr)
df %>%
mutate_at(vars(ColB:ColC), ~replace(., . == '-', 'X')) %>%
pivot_wider(names_from = ColB, values_from = ColC, values_fill = list(ColC = 0))
# ColA X Y
# <fct> <chr> <chr>
#1 A 0 0
#2 B 3 0
#3 C 0 4
#4 D 51 32
data
df <- structure(list(ColA = structure(c(1L, 2L, 3L, 4L, 4L), .Label = c("A",
"B", "C", "D"), class = "factor"), ColB = structure(c(1L, 2L,
3L, 2L, 3L), .Label = c("-", "X", "Y"), class = "factor"), ColC = c(0L,
3L, 4L, 51L, 32L)), class = "data.frame", row.names = c(NA, -5L))
Yes we can.
Using xtabs:
res <- xtabs(ColC ~ ColA + ColB, dat)[,-1]
# ColB
# ColA X Y
# A 0 0
# B 3 0
# C 0 4
# D 51 32
To get a data frame, do:
res <- as.data.frame(unclass(res))
res
# X Y
# A 0 0
# B 3 0
# C 0 4
# D 51 32
Or, all in one:
res <- as.data.frame(unclass(xtabs(ColC ~ ColA + ColB, dat)[,-1]))
Data
dat <- structure(list(ColA = c("A", "B", "C", "D", "D"), ColB = c("-",
"X", "Y", "X", "Y"), ColC = c(0L, 3L, 4L, 51L, 32L)), row.names = c(NA,
-5L), class = "data.frame")
I would like to join repeatedly between two tables. Here is the table.
structure(list(key = structure(1:4, .Label = c("A", "B", "C", "D"),
class = "factor")), class = "data.frame", row.names = c(NA,
-4L))
structure(list(key = structure(c(1L, 2L, 2L, 3L), .Label = c("A",
"B", "C"), class = "factor"), source = structure(c(1L, 1L, 2L, 2L), .Label = c("a", "b"), class = "factor"), value = c(1L, 1L, 2L, 2L)), class = "data.frame", row.names = c(NA, -4L))
<joined>
key
A
B
C
D
<joining>
key source value
A a 1
B a 1
B b 2
C b 2
If I use left_join function like left_join(joined, joining, by = "key"), the results is here.
key source value
1 A a 1
2 B a 1
3 B b 2
4 C b 2
5 D <NA> NA
However, I want to join grouping by "source". My expected results are here.
joining_a <- joining %>%
filter(source == "a")
joining_b <- joining %>%
filter(source == "b")
left_join(joined, joining_a, by = "key")
left_join(joined, joining_b, by = "key")
bind_rows(left_join(joined, joining_a, by = "key"), left_join(joined, joining_b, by = "key"))
key source value
1 A a 1
2 B a 1
3 C <NA> NA
4 D <NA> NA
5 A <NA> NA
6 B b 2
7 C b 2
8 D <NA> NA
How do I join the tables not dividing these tables?
We can group_split(or split from base R) the 'joining' into a list and then do the left_join with 'joined' using map
library(tidyverse)
joining %>%
group_split(source) %>%
map_dfr(~ left_join(joined, .x, by = 'key'))
# key source value
#1 A a 1
#2 B a 1
#3 C <NA> NA
#4 D <NA> NA
#5 A <NA> NA
#6 B b 2
#7 C b 2
#8 D <NA> NA
Or without a lambda function
joining %>%
group_split(source) %>%
map_dfr(left_join, x = joined, by = 'key')
data
joined <- structure(list(key = structure(1:4, .Label = c("A", "B", "C",
"D"), class = "factor")), class = "data.frame", row.names = c(NA,
-4L))
joining <- structure(list(key = structure(c(1L, 2L, 2L, 3L),
.Label = c("A",
"B", "C"), class = "factor"), source = structure(c(1L, 1L, 2L,
2L), .Label = c("a", "b"), class = "factor"), value = c(1L, 1L,
2L, 2L)), class = "data.frame", row.names = c(NA, -4L))
This is my dataframe:
df<-list(structure(list(Col1 = structure(1:6, .Label = c("A", "B",
"C", "D", "E", "F"), class = "factor"), Col2 = structure(c(1L,
2L, 3L, 2L, 4L, 5L), .Label = c("B", "C", "D", "F", "G"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L)), structure(list(Col1 = structure(c(1L, 4L, 5L, 6L, 2L,
3L), .Label = c("A", "E", "H", "M", "N", "P"), class = "factor"),
Col2 = structure(c(1L, 2L, 3L, 2L, 4L, 5L), .Label = c("B",
"C", "D", "F", "G"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L)), structure(list(Col1 = structure(c(1L, 4L, 6L, 5L, 2L,
3L), .Label = c("A", "W", "H", "M", "T", "U"), class = "factor"),
Col2 = structure(c(1L, 2L, 3L, 2L, 4L, 5L), .Label = c("B",
"C", "D", "S", "G"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L)))
I want to extract col1=df[[1]][1] as a dataframe. Then col1 of the second position of this list I want to merge to the df[[1]][1], then I will have a dataframe with 2 columns.
After this I want to merge the column 1 of the third position of the list to the dataframe with two columns, then I will have a dataframe with 3 columns.
In other words my dataframe should have 3 columns, all the first columns of each entry of my list.
The dplyr package can helpme to do this?
Any help?
You can use lapply to extract the three columns named "Col1 in one go. Then set the names of the result.
col1 <- as.data.frame(lapply(df, '[[', "Col1"))
names(col1) <- letters[seq_along(col1)]
col1
# a b c
#1 A A A
#2 B M M
#3 C N U
#4 D P T
#5 E E W
#6 F H H
Choose any other column names that you might find better.
A dplyr way could be
df %>%
unlist(recursive = FALSE) %>%
as.data.frame %>%
select(., starts_with("Col1"))
# Col1 Col1.1 Col1.2
#1 A A A
#2 B M M
#3 C N U
#4 D P T
#5 E E W
#6 F H H
With map_dfc from purrr:
library(purrr)
map_dfc(df, `[`, 1)
Output:
Col1 Col11 Col12
1 A A A
2 B M M
3 C N U
4 D P T
5 E E W
6 F H H
Alternative use of map_dfc making use of purrr's concise element extraction syntax that allows specifying elements of elements by name or position. The first is, for example, equivalent to
map_dfc(df, `[[`, 1)
which differs from the use of [ in that the columns will not be named variations of Col1 and just get V names instead, which may be desirable since names like Col11 and Col12 may be confusing.
df <- list(structure(list(Col1 = structure(1:6, .Label = c("A", "B", "C", "D", "E", "F"), class = "factor"), Col2 = structure(c(1L, 2L, 3L, 2L, 4L, 5L), .Label = c("B", "C", "D", "F", "G"), class = "factor")), class = "data.frame", row.names = c(NA, -6L)), structure(list(Col1 = structure(c(1L, 4L, 5L, 6L, 2L, 3L), .Label = c("A", "E", "H", "M", "N", "P"), class = "factor"), Col2 = structure(c(1L, 2L, 3L, 2L, 4L, 5L), .Label = c("B", "C", "D", "F", "G"), class = "factor")), class = "data.frame", row.names = c(NA, -6L)), structure(list(Col1 = structure(c(1L, 4L, 6L, 5L, 2L, 3L), .Label = c("A", "W", "H", "M", "T", "U"), class = "factor"), Col2 = structure(c(1L, 2L, 3L, 2L, 4L, 5L), .Label = c("B", "C", "D", "S", "G"), class = "factor")), class = "data.frame", row.names = c(NA, -6L)))
library(purrr)
map_dfc(df, 1)
#> # A tibble: 6 x 3
#> V1 V2 V3
#> <fct> <fct> <fct>
#> 1 A A A
#> 2 B M M
#> 3 C N U
#> 4 D P T
#> 5 E E W
#> 6 F H H
map_dfc(df, "Col1")
#> # A tibble: 6 x 3
#> V1 V2 V3
#> <fct> <fct> <fct>
#> 1 A A A
#> 2 B M M
#> 3 C N U
#> 4 D P T
#> 5 E E W
#> 6 F H H
Created on 2018-09-19 by the reprex package (v0.2.0).
res<-1:nrow(df[[1]][1])
for(i in 1:length(df)){
print ( as.vector(df[[i]][1]))
res<-cbind(res,as.data.frame(df[[i]][1]))
}
res$res<-NULL
So, the output is:
Col1 Col1 Col1
1 A A A
2 B M M
3 C N U
4 D P T
5 E E W
6 F H H
Using dplyr
library(dplyr)
df %>%
sapply('[[',1) %>%
as.data.frame
#returns
V1 V2 V3
1 A A A
2 B M M
3 C N U
4 D P T
5 E E W
6 F H H
File 1:Ele A B C DEs 1 2 3 4Ep 2 4 3 4Ek 1 9 3 8File2:A 1 B 2 C 3 D 5
Need is to ensure that each element under Column A (file 1) gets multiplied by the value assigned to A in file 2 (and so on). I know matrix multiplication in R but this is not the case of matrix multiplication I suppose. Help would be greatly appreciated. Thanks
You could try
indx <- df2$Col1
df1[indx]*df2$Col2[col(df1[indx])]
# A B C D
#1 1 4 9 20
#2 2 8 9 20
#3 1 18 9 40
Or you could use sweep
sweep(df1[indx], 2, df2$Col2, '*')
# A B C D
#1 1 4 9 20
#2 2 8 9 20
#3 1 18 9 40
data
df1 <- structure(list(Ele = c("Es", "Ep", "Ek"), A = c(1L, 2L, 1L),
B = c(2L, 4L, 9L), C = c(3L, 3L, 3L), D = c(4L, 4L, 8L)),
.Names = c("Ele", "A", "B", "C", "D"), class = "data.frame",
row.names = c(NA, -3L))
df2 <- structure(list(Col1 = c("A", "B", "C", "D"), Col2 = c(1L, 2L,
3L, 5L)), .Names = c("Col1", "Col2"), class = "data.frame",
row.names = c(NA, -4L))
I have a sales report table (DF1)and I need to replace only a few product codes by their associated group codes
Model SOLD
A 5
B 4
C 4
D 3
F 11
I have another table (DF2) where I have the Model# and the associated group codes
Model Group
A 1
B 1
C 2
D 2
I would like to replace the model# in DF1 by the group number if the model exist in DF2.
The wanted end result:
Model SOLD
1 5
1 4
2 4
2 3
F 11
Thank you!
You can do this with qdapTools's lookup family, specifically, the binary operator %lc+% (a wrapper for the data.table package). The l stands for lookup, the c forces te terms to character and the + only replaces those elements that are found in the lookup table:
library(qdap)
df1$Model <- df1$Model %lc+% df2
Here it is more explicitly:
df1 <- structure(list(Model = structure(1:5, .Label = c("A", "B", "C",
"D", "F"), class = "factor"), SOLD = c(5L, 4L, 4L, 3L, 11L)), .Names = c("Model",
"SOLD"), class = "data.frame", row.names = c(NA, -5L))
df2 <- structure(list(Model = structure(1:4, .Label = c("A", "B", "C",
"D"), class = "factor"), Group = c(1L, 1L, 2L, 2L)), .Names = c("Model",
"Group"), class = "data.frame", row.names = c(NA, -4L))
library(qdap)
df1$Model <- df1$Model %lc+% df2
df1
## Model SOLD
## 1 1 5
## 2 1 4
## 3 2 4
## 4 2 3
## 5 F 11