Using sapply inside sapply - r

I need to execute sapply inside another sapply.
This is the working code I have.
animal <- c("Dog", "Cat", "Bird", "Fish", "Monkey", "Lion", "Dolphin", "Panda")
a <- as.data.frame(sapply(1:7, function(y) rbinom(30, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL))))
colnames(a) <- (animal)
I would like to build this data frame 10 time without doing this.
animal <- c("Dog", "Cat", "Bird", "Fish", "Monkey", "Lion", "Dolphin", "Panda")
a <- as.data.frame(sapply(1:7, function(y) rbinom(30, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL))))
colnames(a) <- (animal)
b <- as.data.frame(sapply(1:7, function(y) rbinom(30, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL))))
colnames(b) <- (animal)
...
j <- as.data.frame(sapply(1:7, function(y) rbinom(30, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL))))
colnames(j) <- (animal)
I have tried this without success
sapply(letters[1:10], function(z) as.data.frame(sapply(1:7, function(y) rbinom(300, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL)))), colnames(letters[1:10]) <- (animal))
Thanks

If you need to do this with two apply type functions, you can do something like this:
Also you have Eight animals in animal and only making 7 columns. So I have shortened animal.
Using lapply on the outer loop will always return a list, which makes it a bit neater than sapply from what I understand you are trying to do.
animal <- c("Dog", "Cat", "Bird", "Fish", "Monkey", "Lion", "Dolphin")
lapply(1:10, function(x){
a <- as.data.frame(
sapply(1:7, function(y) rbinom(30, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL)))
)
names(a) <- (animal)
a
})

You are using sapply, so it is not clear if you want the end result to be a matrix or a list. If you want a matrix as output, then a straightforward approach would be to use your existing code, but start with an expanded vector (animals x replicates).
animal.reps = sapply(c("Dog", "Cat", "Bird", "Fish", "Monkey", "Lion", "Dolphin", "Panda"), paste, letters[1:10], sep = ".")
a = sapply(animal.reps, function(y) rbinom(30, 1, sample(seq(.4, .9, by=.1), 1, prob = NULL)) )
This gives a 30x80 matrix:
> dim(a)
[1] 30 80
> a[1:10, 1:10]
Dog.a Dog.b Dog.c Dog.d Dog.e Dog.f Dog.g Dog.h Dog.i Dog.j
[1,] 1 1 1 1 1 0 1 1 1 0
[2,] 1 1 0 0 1 0 0 1 1 0
[3,] 1 0 1 1 1 0 1 1 1 0
[4,] 1 1 0 1 1 1 1 1 1 0
[5,] 1 1 1 0 1 1 0 1 1 0
[6,] 0 1 0 1 1 0 0 1 1 1
[7,] 1 1 0 1 1 1 1 1 1 1
[8,] 1 1 1 1 1 0 1 1 1 1
[9,] 1 1 0 1 1 0 1 1 1 0
[10,] 0 1 1 1 1 1 1 1 1 1

Related

Adjacency Matrix from a dataframe

I am trying to convert an edgelist to an adjacent matrix.
Below is the sample data
#Sample Data
User<-c("1","1","2","3","4")
v1 <- c("b", "b", "a", "d", "c")
v2 <- c("c", "d", "c", "a", "a")
v3 <- c(0, 0, "d", 0, "b")
v4 <- c(0, 0, 0, 0, 0)
v5 <- c(0, 0, 0, 0, 0)
my_data<-data.frame(User, v1, v2, v3, v4, v5)
my_data
If you run this code you will get the below as output,
User v1 v2 v3 v4 v5
1 b c 0 0 0
1 b d 0 0 0
2 a c d 0 0
3 d a 0 0 0
4 c a b 0 0
Using the data, I want to create an adjacent matrix that looks like follows:
a b c d
a 0 0 2 2
b 0 0 1 1
c 2 1 0 1
d 2 1 1 0
Basically, the desired output diplays the count how many times each pair appeared in column v1~v5 in the sample data frame.
I have tried to use AdjacencyFromEdgelist function from dils library, also tried to create a matrix shell with NAs and fill out the matrix by looping through the dataframe.
However, I could not get neither way to work.
I think this may be close to what you have in mind. In the rows where there are more than 2 vertices, I considered every existing pairs:
library(igraph)
do.call(rbind, my_data[-1] |>
apply(1, \(x) x[x != 0]) |>
lapply(\(x) t(combn(x, m = 2)))) |>
graph_from_edgelist(directed = FALSE) %>%
as_adjacency_matrix()
4 x 4 sparse Matrix of class "dgCMatrix"
b c d a
b . 2 1 1
c 2 . 1 2
d 1 1 . 2
a 1 2 2 .
Or without the pip operator in base R:
tmp <- apply(my_data[-1], 1, function(x) x[x != 0])
tmp <- do.call(rbind, lapply(tmp, function(x) t(combn(x, m = 2))))
my_graph <- graph_from_edgelist(tmp, directed = FALSE)
adj_mat <- as_adjacency_matrix(my_graph)
adj_mat
Another attempt, minus the need to calculate all the combinations with combn
sel <- my_data[-1] != 0
dat <- data.frame(row=row(my_data[-1])[sel], value = my_data[-1][sel])
out <- crossprod(table(dat))
diag(out) <- 0
out
# value
#value a b c d
# a 0 1 2 2
# b 1 0 2 1
# c 2 2 0 1
# d 2 1 1 0
Matches the result from #AnoushiravanR:
adj_mat[c("a","b","c","d"), c("a","b","c","d")]
#4 x 4 sparse Matrix of class "dgCMatrix"
# a b c d
#a . 1 2 2
#b 1 . 2 1
#c 2 2 . 1
#d 2 1 1 .
Another igraph option
do.call(
rbind,
combn(df, 2, setNames, nm = c("from", "to"), simplify = FALSE)
) %>%
filter(from > 0 & to > 0) %>%
arrange(from) %>%
graph_from_data_frame(directed = FALSE) %>%
get.adjacency(sparse = FALSE)
gives
a b c d
a 0 1 2 2
b 1 0 2 1
c 2 2 0 1
d 2 1 1 0

How to loop a function over all elements of a vector except one and store the result in separate columns of a data frame

I have a data frame with several columns. I want to run a function [pmax() in this case] over all columns whose name is stored in a vector except one, and store the result in new separate columns. At the end, I would also like to store the names of all new columns in a separate vector. A minimal example would be:
Name <- c("Case 1", "Case 2", "Case 3", "Case 4", "Case 5")
C1 <- c(1, 0, 1, 1, 0)
C2 <- c(0, 1, 1, 1, 0)
C3 <- c(0, 1, 0, 0, 0)
C4 <- c(1, 1, 0, 1, 0)
Data <- data.frame(Name, C1, C2, C3, C4)
var.min <- function(data, col.names){
new.df <- data
# This is how I would do it outside a function and without loop:
new.df$max.def.col.exc.1 <- pmax(new.df$C2, new.df$C3)
new.df$max.def.col.exc.2 <- pmax(new.df$C1, new.df$C3)
new.df$max.def.col.exc.3 <- pmax(new.df$C1, new.df$C2)
new.columns <- c("max.def.col.exc.1", "max.def.col.exc.2", "max.def.col.exc.3")
return(new.df)
}
new.df <- var.min(Data,
col.names= c("C1", "C2", "C3"))
The result should look like:
Name C1 C2 C3 C4 max.def.col.exc.1 max.def.col.exc.2 max.def.col.exc.3
1 Case 1 1 0 0 1 0 1 1
2 Case 2 0 1 1 1 1 1 1
3 Case 3 1 1 0 0 1 1 1
4 Case 4 1 1 0 1 1 1 1
5 Case 5 0 0 0 0 0 0 0
Anyone with an idea? Many thanks in advance!
Here is a base R solution with combn. It gets all pairwise combinations of the column names and calls a function computing pmax.
Note that the order of the expected output columns is the same as the one output by the code below. If the columns vector is c("C1", "C2", "C3"), the order will be different.
Note also that the function is now a one-liner and accepts combinations of any number of columns, 2, 3 or more.
var.min <- function(cols, data) Reduce(pmax, data[cols])
cols <- c("C3", "C2", "C1")
combn(cols, 2, var.min, data = Data)
# [,1] [,2] [,3]
#[1,] 0 1 1
#[2,] 1 1 1
#[3,] 1 1 1
#[4,] 1 1 1
#[5,] 0 0 0
Now it's just a matter of assigning column names and cbinding with the input data.
tmp <- combn(cols, 2, var.min, data = Data)
colnames(tmp) <- paste0("max.def.col.exc.", seq_along(cols))
Data <- cbind(Data, tmp)
rm(tmp) # final clean-up

Converting counts to individual observations in r

I have a data set that looks as follows
df <- data.frame( name = c("a", "b", "c"),
judgement1= c(5, 0, NA),
judgement2= c(1, 1, NA),
judgement3= c(2, 1, NA))
I want to reshape the dataframe to look like this
# name judgement1 judgement2 judgement3
# a 1 0 0
# a 1 0 0
# a 1 0 0
# a 1 0 0
# a 1 0 0
# b 1 0 0
# b 0 1 0
# b 0 0 1
And so on. I have seen that untable is recommended on some other threads, but it does not appear to work with the current version of r. Is there a package that can convert summarised counts into individual observations?
You could try something like this:
df <- data.frame( name = c("a", "b", "c"),
judgement1= c(5, 0, NA),
judgement2= c(1, 1, NA),
judgement3= c(2, 1, NA))
rep.vec <- colSums(df[colnames(df) %in% paste0("judgement", (1:nrow(df)), sep="")], na.rm = TRUE)
want <- data.frame(name=df$name, cbind(diag(nrow(df))))
colnames(want)[-1] <- paste0("judgement", (1:nrow(df)), sep="")
(want <- want[rep(1:nrow(want), rep.vec), ])
I wrote a function that works to give you your desired output:
untabl <- function(df, id.col, count.cols) {
df[is.na(df)] <- 0 # replace NAs
out <- lapply(count.cols, function(x) { # for each column with counts
z <- df[rep(1:nrow(df), df[,x]), ] # replicate rows
z[, -c(id.col)] <- 0 # set all other columns to zero
z[, x] <- 1 # replace the count values with 1
z
})
out <- do.call(rbind, out) # combine the list
out <- out[order(out[,c(id.col)]),] # reorder (you can change this)
rownames(out) <- NULL # return to simple row numbers
out
}
untabl(df = df, id.col = 1, count.cols = c(2,3,4))
# name judgement1 judgement2 judgement3
#1 a 1 0 0
#2 a 1 0 0
#3 a 1 0 0
#4 a 1 0 0
#5 a 1 0 0
#6 a 0 1 0
#7 b 0 1 0
#8 a 0 0 1
#9 a 0 0 1
#10 b 0 0 1
And for your reference, reshape::untable consists of the following code:
function (df, num)
{
df[rep(1:nrow(df), num), ]
}

R: print certain values of a matrix to a csv-file

I have a matrix with 1 and 0 in it. Now I want to create a csv-file with the following syntax where only the values=1 were printed:
j1.i1, 1
j1.i2, 1
j2.i2, 1
...
j1 should be the name of the row 1
i1 should be the name of column 1
and so on...
Edit:
M1 = matrix(c(1, 0, 1, 0, 1, 0), nrow=2, ncol=3, byrow = TRUE)
row.names(M1) <- c(100, 101)
colnames(M1) <- c("A", "B", "C")
M1
A B C
100 1 0 1
101 0 1 0
If we take this easy example the solution i'm looking for is:
100.A, 1
100.C, 1
101.B, 1

how to create 3 by 3 Contingency table with two variables in R

Example:
x <- c( 1, NA, 0, 1)
y <- c(NA, NA, 0, 1)
table(x,y, useNA="always") # --->
# y
# x 0 1 <NA>
# 0 1 0 0
# 1 0 1 1
# <NA> 0 0 1
My question is:
a <- c(NA, NA, NA, NA)
b <- c(1, 1, 1, 1)
table(a, b, useNA="always") ## --> It is 1X2 matrix.
# b
# a 1 <NA>
# <NA> 4 0
I want to get a 3X3 table with the same colnames, rownames and dimensions as the example above.. Then I will apply chisq.test for the table.
Thank you very much for your answers!
You can achieve this by converting both a and b into factors with the same levels. This works because factor vectors keep track of all possible values (aka levels) that their elements might take, even when they in fact contain just a subset of those.
a <- c(NA, NA, NA, NA)
b <- c(1, 1, 1, 1)
levs <- c(0, 1)
table(a = factor(a, levels = levs),
b = factor(b, levels = levs),
useNA = "always")
# b
# a 0 1 <NA>
# 0 0 0 0
# 1 0 0 0
# <NA> 0 4 0

Resources