Here is a small sample of my data
AB AN AQ AP AA
1 O1 N 12 13
2 K1 B 22 16
I want to generate this table
AB AN AQ New AP
1 O1 N 1 12
1 O1 N 2 13
2 K1 B 1 22
2 K1 B 2 16
The logic is to stack the same data in AB, AN and AQ next generate a new colour which gets 1 and then AP.
Under this raw, the same data but the New column get 2 and then AA. So the new column is 1 and 2
An option with reshape from base R
names(df)[4:5] <- paste0("AP", 1:2)
reshape(df, direction = "long", varying = 4:5, sep= "", timevar = "New")
# AB AN AQ New AP id
#1.1 1 O1 N 1 12 1
#2.1 2 K1 B 1 22 2
#1.2 1 O1 N 2 13 1
#2.2 2 K1 B 2 16 2
data
df <- structure(list(AB = 1:2, AN = c("O1", "K1"), AQ = c("N", "B"),
AP = c(12L, 22L), AA = c(13L, 16L)),
class = "data.frame", row.names = c(NA, -2L))
You can get the data in long format and then generate a new column based on unique column values.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(AP, AA),
values_to = 'AP',
names_to = 'New') %>%
mutate(New = match(New, unique(New)))
# AB AN AQ New AP
# <int> <chr> <chr> <int> <int>
#1 1 O1 N 1 12
#2 1 O1 N 2 13
#3 2 K1 B 1 22
#4 2 K1 B 2 16
data
df <- structure(list(AB = 1:2, AN = c("O1", "K1"), AQ = c("N", "B"),
AP = c(12L, 22L), AA = c(13L, 16L)),
class = "data.frame", row.names = c(NA, -2L))
Related
this is the dataframe
name team stat1 stat2
a aa 1 4
b aa 2 3
c bb 3 2
d bb 4 1
want to calculate a new variable which is calculated
-> (( stat1 of player 'a' / sum of stat1 for that team ) + ( stat2 of player 'a' / sum of stat2 for that team ))
-> ((1/1+2) + (4/4+3))
any idea on how to do this?
We can group by 'team', and then do the calculation to create a new column
library(dplyr)
df1 <- df1 %>%
group_by(team) %>%
mutate(new = (stat1/sum(stat1) + (stat2/sum(stat2)))) %>%
ungroup
-output
df1
# A tibble: 4 × 5
name team stat1 stat2 new
<chr> <chr> <int> <int> <dbl>
1 a aa 1 4 0.905
2 b aa 2 3 1.10
3 c bb 3 2 1.10
4 d bb 4 1 0.905
data
df1 <- structure(list(name = c("a", "b", "c", "d"), team = c("aa", "aa",
"bb", "bb"), stat1 = 1:4, stat2 = 4:1), class = "data.frame",
row.names = c(NA,
-4L))
For each row in df1 I would like to execute mult 10 times, once for each year in df2.
One option I can think of is to repeat df1 multiple times and join it to df2. But my actual data are much larger (~20k sections, 15 areas and 100 years), so I am looking for a more efficient way to do this.
# df1
section area a b c
1 1 1 0.1208916 0.7235306 0.7652636
2 2 1 0.8265642 0.2939602 0.6491496
3 1 2 0.9101611 0.7363248 0.1509295
4 2 2 0.8807047 0.5473221 0.6748055
5 1 3 0.2343558 0.2044689 0.9647333
6 2 3 0.4112479 0.9523639 0.1533197
----------
# df2
year d
1 1 0.7357432
2 2 0.4591575
3 3 0.3654561
4 4 0.1996439
5 5 0.2086226
6 6 0.5628826
7 7 0.4772953
8 8 0.8474007
9 9 0.8861693
10 10 0.6694851
mult <- function(a, b, c, d) {a * b * c * d}
The desired output would look something like this
section area year e
1 1 1 1 results of mult()
2 2 1 1 results of mult()
3 1 2 1 results of mult()
4 2 2 1 results of mult()
5 1 3 1 results of mult()
6 2 3 1 results of mult()
7 1 1 2 results of mult()
8 2 1 2 results of mult()
...
dput(df1)
structure(list(section = c(1L, 2L, 1L, 2L, 1L, 2L), area = c(1L,
1L, 2L, 2L, 3L, 3L), a = c(0.12089157756418, 0.826564211165532,
0.91016107192263, 0.880704707000405, 0.234355789143592, 0.411247851792723
), b = c(0.72353063733317, 0.293960151728243, 0.736324765253812,
0.547322086291388, 0.204468948533759, 0.952363904565573), c = c(0.765263637062162,
0.649149592733011, 0.150929539464414, 0.674805536167696, 0.964733332861215,
0.15331974090077)), out.attrs = list(dim = structure(2:3, .Names = c("section",
"area")), dimnames = list(section = c("section=1", "section=2"
), area = c("area=1", "area=2", "area=3"))), class = "data.frame", row.names = c(NA,
-6L))
dput(df2)
structure(list(year = 1:10, d = c(0.735743158031255, 0.459157506935298,
0.365456136409193, 0.199643932981417, 0.208622586680576, 0.562882597092539,
0.477295308141038, 0.847400720929727, 0.886169332079589, 0.669485098216683
)), class = "data.frame", row.names = c(NA, -10L))
Edit: full sized toy dataset
library(dplyr)
df1 <- expand.grid(section = 1:20000,
area = 1:15) %>%
mutate(a = runif(300000),
b = runif(300000),
c = runif(300000))
df2 <- data.frame(year = 1:100,
d = runif(100))
You can use crossing to create combinations of df1 and df2 and apply mult to them.
tidyr::crossing(df1, df2) %>% dplyr::mutate(e = mult(a, b, c, d))
I have a list of dataframes and my goal it is transpose them to bind into one. How could i do this? Below it is my list
$pri
$pri$x
a b
1 1 3
2 2 4
$pri$y
a b c
1 1 3 5
2 2 4 6
$sec
$sec$w
a b
1 7 9
2 8 10
$sec$z
a b c d
1 11 13 15 17
2 12 14 16 18
I aim the output like this
"col1" "col2"
a ; 1 ; 2
b ; 3 ; 4
a ; 1 ; 2
b ; 3 ; 4
c ; 5 ; 6
a ; 7 ; 8
b ; 9 ; 10
a ; 11 ; 12
b ; 13 ; 14
c ; 15 ; 16
d ; 17 ; 18
library(purrr)
pri <-
list(
x = data.frame(a = 1:2, b = 3:4),
y = data.frame(a = 1:2, b = 3:4, c = 5:6)
)
sec <-
list(
w = data.frame(a = 7:8, b = 9:10),
z = data.frame(a = 11:12, b = 13:14, c = 15:16, d = 17:18)
)
list(pri = pri, sec = sec) %>% flatten() %>% map(t) %>% reduce(rbind)
#> [,1] [,2]
#> a 1 2
#> b 3 4
#> a 1 2
#> b 3 4
#> c 5 6
#> a 7 8
#> b 9 10
#> a 11 12
#> b 13 14
#> c 15 16
#> d 17 18
Created on 2020-03-12 by the reprex package (v0.3.0)
Assuming your data given like this (According to your question):
frame_list <- list(pri = list(x = structure(list(a = 1:2, b = 3:4), class = "data.frame", row.names = c(NA,
-2L)), y = structure(list(a = 1:2, b = 3:4, c = 5:6), class = "data.frame", row.names = c(NA,
-2L))), sec = list(w = structure(list(a = 7:8, b = 9:10), class = "data.frame", row.names = c(NA,
-2L)), z = structure(list(a = 11:12, b = 13:14, c = 15:16, d = 17:18), class = "data.frame", row.names = c(NA,
-2L))))
then you can do:
df <- t(do.call('cbind', unlist(frame_list, recursive = FALSE)))
rownames(df) <- gsub('\\w+\\.\\w\\.', '', rownames(df))
Note: The output will be a matrix, In case you need to convert this to a dataframe, you can use data.frame but this will change the rownames by appending some number to it to make it unique.
Output:
[,1] [,2]
a 1 2
b 3 4
a 1 2
b 3 4
c 5 6
a 7 8
b 9 10
a 11 12
b 13 14
c 15 16
d 17 18
In case you want it into a dataframe, then you can do:
df <- data.frame(t(do.call('cbind', unlist(frame_list, recursive = FALSE))), stringsAsFactors = FALSE)
df$newcol <- gsub('\\w+\\.\\w\\.', '', rownames(df))
rownames(df) <- NULL
Output:
X1 X2 newcol
1 1 2 a
2 3 4 b
3 1 2 a
4 3 4 b
5 5 6 c
6 7 8 a
7 9 10 b
8 11 12 a
9 13 14 b
10 15 16 c
11 17 18 d
you could also solve your problem using base R functions as follows:
dfs <- list(pri = list(x = structure(list(a = 1:2, b = 3:4), class = "data.frame", row.names = c(NA,
-2L)), y = structure(list(a = 1:2, b = 3:4, c = 5:6), class = "data.frame", row.names = c(NA,
-2L))), sec = list(w = structure(list(a = 7:8, b = 9:10), class = "data.frame", row.names = c(NA,
-2L)), z = structure(list(a = 11:12, b = 13:14, c = 15:16, d = 17:18), class = "data.frame", row.names = c(NA,
-2L))))
t(Reduce(cbind, unlist(dfs, FALSE)))
# [,1] [,2]
# a 1 2
# b 3 4
# a 1 2
# b 3 4
# c 5 6
# a 7 8
# b 9 10
# a 11 12
# b 13 14
# c 15 16
# d 17 18
This question already has answers here:
Frequency count of two column in R
(8 answers)
Closed 6 years ago.
I have a data frame like this:
ID Cont
1 a
1 a
1 b
2 a
2 c
2 d
I need to report the frequence of "Cont" by ID. The output should be
ID Cont Freq
1 a 2
1 b 1
2 a 1
2 c 1
2 d 1
Using dplyr, you can group_by both ID and Cont and summarise using n() to get Freq:
library(dplyr)
res <- df %>% group_by(ID,Cont) %>% summarise(Freq=n())
##Source: local data frame [5 x 3]
##Groups: ID [?]
##
## ID Cont Freq
## <int> <fctr> <int>
##1 1 a 2
##2 1 b 1
##3 2 a 1
##4 2 c 1
##5 2 d 1
Data:
df <- structure(list(ID = c(1L, 1L, 1L, 2L, 2L, 2L), Cont = structure(c(1L,
1L, 2L, 1L, 3L, 4L), .Label = c("a", "b", "c", "d"), class = "factor")), .Names = c("ID",
"Cont"), class = "data.frame", row.names = c(NA, -6L))
## ID Cont
##1 1 a
##2 1 a
##3 1 b
##4 2 a
##5 2 c
##6 2 d
library(data.table)
setDT(x)[, .(Freq = .N), by = .(ID, Cont)]
# ID Cont Freq
# 1: 1 a 2
# 2: 1 b 1
# 3: 2 a 1
# 4: 2 c 1
# 5: 2 d 1
With base R:
df1 <- subset(as.data.frame(table(df)), Freq != 0)
if you want to order by ID, add this line:
df1[order(df1$ID)]
ID Cont Freq
1 1 a 2
3 1 b 1
2 2 a 1
6 2 c 1
8 2 d 1
I would like to change this list to a data.frame:
[[1]]
AA AB
21 1
[[2]]
AA AB
19 4
[[3]]
AA AB
23 1
[[4]]
AA AB BB
15 3 6
I tried "as.data.frame(r)", but I got the following error:
Error in data.frame(c(21L, 1L), c(19L, 4L), c(23L, 1L), c(15L, 3L, 6L), :
arguments imply differing number of rows: 2, 3
How can I get something like:
AA AB BB
V1 21 1
V2 19 4
V3 23 1
V4 15 3 6
If the list elements are named vectors,
library(stringi)
res <- as.data.frame(t(stri_list2matrix(r)))
colnames(res) <- unique(unlist(sapply(r, names)))
res
# AA AB BB
#1 21 1 <NA>
#2 19 4 <NA>
#3 23 1 <NA>
#4 15 3 6
Or if the list elements are 'data.frame'
library(data.table)
rbindlist(r1, fill=TRUE)
data
r <- list(structure(c(21, 1), .Names = c("AA", "AB")), structure(c(19,
4), .Names = c("AA", "AB")), structure(c(23, 1), .Names = c("AA",
"AB")), structure(c(15, 3, 6), .Names = c("AA", "AB", "BB")))
r1 <- lapply(r, as.data.frame.list)