I have a data like following
class.df <- data.frame(
A = sample(1:2, 100, replace=TRUE),
B = sample(1:2, 100, replace=TRUE),
C = sample(1:2, 100, replace=TRUE),
D = sample(1:2, 100, replace=TRUE)
)
ids_df <- t(combn(names(class.df), 2))
fisher_tests <- apply(ids_df, 1, function(i) tryCatch(fisher.test(table(class.df[,i])), error = function(e) NA_real_))
edge_table <- cbind(ids_df, t(sapply(fisher_tests, "[", c("p.value", "estimate"))))
edge_table
p.value estimate
[1,] "A" "B" 0.6826874 0.7919741
[2,] "A" "C" 0.6873498 1.219358
[3,] "A" "D" 0.5473441 0.7356341
[4,] "B" "C" 0.6828843 0.8164863
[5,] "B" "D" 1 1.033625
[6,] "C" "D" 0.2257244 0.5789776
write.csv(edge_table,"/Users/Results/EE2.csv")
But when i try to write.csv and open the csv file. The last column (estimate) shows weird values of matrix like following. I just need numerical values as it was showing in R. How to resolve the issue.
Your edge_table object is an array that contains list columns due to the way you have created it. If you want a standard data frame, you can try:
edge_table <- as.data.frame(edge_table)
edge_table[] <- lapply(edge_table, unlist)
write.csv(edge_table, 'test.csv', row.names = FALSE)
read.csv('test.csv')
#> V1 V2 p.value estimate
#> 1 A B 0.2372953 0.6160489
#> 2 A C 0.3164033 1.6070638
#> 3 A D 0.3238934 1.5017413
#> 4 B C 0.8411522 1.1562830
#> 5 B D 0.6882391 0.8017990
#> 6 C D 0.5514280 1.2965724
Created on 2022-12-06 with reprex v2.0.2
Related
I have the following data.frame:
df <- data.frame(V1 = c("A","X","A","Z","B","Y"),
V2 = c("B","Y","C","Y","C","W"),
stringsAsFactors=FALSE)
df
# V1 V2
# 1 A B
# 2 X Y
# 3 A C
# 4 Z Y
# 5 B C
# 6 Y W
I want to group all the values that occur together at some point and get the following:
list(c("A","B","C"), c("X","Y","Z","W"))
# [[1]]
# [1] "A" "B" "C"
#
# [[2]]
# [1] "X" "Y" "Z" "W"
Network analyses can help.
library(igraph)
df <- data.frame(V1 = c("A","X","A","Z","B","Y"),
V2 = c("B","Y","C","Y","C","W"),
stringsAsFactors=FALSE)
g <- graph_from_data_frame(df, directed = FALSE)
clust <- clusters(g)
clusters <- data.frame(name = names(clust$membership),
cluster = clust$membership,
row.names = NULL,
stringsAsFactors = FALSE)
split(clusters$name, clusters$cluster)
$`1`
[1] "A" "B" "C"
$`2`
[1] "X" "Z" "Y" "W"
You can of course leave everything in the cluster data.frame for further analyses.
I have a three dimensional excel table which I would like to convert into a two dimensional dataframe that I can use in R. I think the best way is to read it in R and then transform it directly within R, but I do not find how. Here is an example. I have a df1-like dataframe that I want to transform to df2:
a1 <- paste("a","b","c",sep = ";")
a2 <- paste("e","f","g",sep = ";")
df1 <- data.frame(v1=a1, v2=a2, row.names = "w1")
df2 <- data.frame(w1=c(rep("v1",3),rep("v2",3)), "value"=letters[1:6])
You can achieve this by using reshape2
sub_df1 <- apply(df1,2,FUN= strsplit,split = ";")
# $v1
# $v1$w1
# [1] "a" "b" "c"
# $v2
# $v2$w1
# [1] "e" "f" "g
sub_df2 <- sapply(apply(df1,2,FUN= strsplit,split = ";"), FUN = unlist,use.names = TRUE, recursive = FALSE)
# v1 v2
# w11 "a" "e"
# w12 "b" "f"
# w13 "c" "g"
melt(sub_df2)[-1]
# Var2 value
# 1 v1 a
# 2 v1 b
# 3 v1 c
# 4 v2 e
# 5 v2 f
# 6 v2 g
You can then delete the first column by adding the [-1]
I have 2 tables as below:
a = read.table(text=' a b
1 c
1 d
2 c
2 a
2 b
3 a
', head=T)
b = read.table(text=' a c
1 x i
2 y j
3 z k
', head=T)
And I want result to be like this:
1 x i c d
2 y j c a b
3 z k a
Originally I thought to use tapply to transform them to lists (eg. aa = tapply(a[,2], a[,1], function(x) paste(x,collapse=","))), then append it back to table b, but I got stuck...
Any suggestion to do this?
Thanks a million.
One way to do it:
mapply(FUN = c,
lapply(split(b, row.names(b)), function(x) as.character(unlist(x, use.names = FALSE))),
split(as.character(a$b), a$a),
SIMPLIFY = FALSE)
# $`1`
# [1] "x" "i" "c" "d"
#
# $`2`
# [1] "y" "j" "c" "a" "b"
#
# $`3`
# [1] "z" "k" "a"
I have a list like so:
head(myList)
[[1]]
[1] "a" "b"
[[2]]
[1] "c" "d"
[[3]]
[1] "e" "f"
My desired output is a dataframe:
col1 col2
a b
c d
e f
How can I do this? Thanks in advance.
This should do the trick:
a<-list(c("a", "b"), c("c", "d"), c("e", "f"))
t(data.frame(a))
Cheerio.
UPDATE:
data.frame(t(data.frame(a, row.names=c("col1", "col2"))), row.names=1:length(a))
# col1 col2
# 1 a b
# 2 c d
# 3 e f
UPDATE 2:
Here's another version.
data.frame(t(matrix(unlist(a), 2, 3, dimnames=list(c("col1", "col2"), 1:length(a)))))
which fixes the formatting and the ugliness.
The typical approach is do.call(rbind, a), as mentioned by #DavidArenburg. But, as with many things R, there are alternatives.
Here are two:
In base R, you can use simplify2array (but it's been known to be slow).
t(simplify2array(a))
You can also use sti_list2matrix, which is used for similar purposes as simplify2array but is much faster and doesn't require transposing:
library(stringi)
stri_list2matrix(a, byrow = TRUE)
# [,1] [,2]
# [1,] "a" "b"
# [2,] "c" "d"
# [3,] "e" "f"
Combining that with some dimnames<- trickery, you can get to your desired data.frame quite easily.
as.data.frame(`dimnames<-`(stri_list2matrix(a, byrow = TRUE),
list(NULL, c("col1", "col2"))))
# col1 col2
# 1 a b
# 2 c d
# 3 e f
I have the following matrix 'x'
a b
a 1 3
b 2 4
It is a really large matrix (trimmed down for this question)
I would like to print out this matrix by each row name and column name combination along with the value in that cell. So the expected output would be
a,a,1
a,b,3
b,a,2
b,b,4
I could loop through them, but I'm pretty sure this can be avoided (apply?). Any pointers much appreciated.
One way is to use the melt function from the reshape2 package.
x <- matrix(1:4, nrow = 2, ncol = 2,
dimnames = list(dim1 = c("a", "b"), dim2 = c("a", "b")))
library(reshape2)
melt(x)
# dim1 dim2 value
# 1 a a 1
# 2 b a 2
# 3 a b 3
# 4 b b 4
Edit
If your data is so big that speed is an issue, I would also suggest:
data.frame(dim1 = rep(rownames(x), ncol(x)),
dim2 = rep(colnames(x), each = nrow(x)),
value = c(x))
Edit2
After testing with relatively big data, I would not rule out melt:
x <- matrix(runif(9e6), nrow = 3000, ncol = 3000,
dimnames = list(dim1 = paste0("x", runif(3000)),
dim2 = paste0("y", runif(3000))))
system.time(y1 <- melt(x))
# user system elapsed
# 1.17 0.44 1.61
system.time(y2 <- data.frame(dim1 = rep(rownames(x), ncol(x)),
dim2 = rep(colnames(x), each = nrow(x)),
value = c(x)))
# user system elapsed
# 1.98 0.37 2.36
You can also use the base R function row and col
If you want to reference the row.names and col.names then use as.factor = T. Using as.character and as.numeric flattens the matrix.
do.call(data.frame,list(lapply(list(row = row(x, T),col=col(x,T)), as.character),
value =as.numeric(x)))
## row col value
## 1 a a 1
## 2 b a 2
## 3 a b 3
## 4 b b 4
If you want a matrix you will need to have all the columns as the same class (character or numeric. You could then use
do.call(cbind, lapply(list(row = row(x), col = col(x), value = x), as.numeric))
## row col value
## [1,] 1 1 1
## [2,] 2 1 2
## [3,] 1 2 3
## [4,] 2 2 4
Or as character
do.call(cbind, lapply(list(row = row(x, T), col = col(x, T), value = x), as.character))
## row col value
## [1,] "a" "a" "1"
## [2,] "b" "a" "2"
## [3,] "a" "b" "3"
## [4,] "b" "b" "4"