This question already has an answer here:
What is the difference between [ ] and [[ ]] in R? [duplicate]
(1 answer)
Closed 1 year ago.
I have a list of matrices constructed by the following loops:
# Set up Row and Column Names for prediction coefficients.
rows = c("Intercept", "actsBreaks0", "actsBreaks1","actsBreaks2","actsBreaks3","actsBreaks4","actsBreaks5","actsBreaks6",
"actsBreaks7","actsBreaks8","actsBreaks9","tBreaks0","tBreaks1","tBreaks2","tBreaks3", "unitBreaks0", "unitBreaks1",
"unitBreaks2","unitBreaks3", "covgBreaks0","covgBreaks1","covgBreaks2","covgBreaks3","covgBreaks4","covgBreaks5",
"covgBreaks6","yearBreaks2016","yearBreaks2015","yearBreaks2014","yearBreaks2013","yearBreaks2011",
"yearBreaks2010","yearBreaks2009","yearBreaks2008","yearBreaks2007","yearBreaks2006","yearBreaks2005",
"yearBreaks2004","yearBreaks2003","yearBreaks2002","yearBreaks2001","yearBreaks2000","yearBreaks1999",
"yearBreaks1998","plugBump0","plugBump1","plugBump2","plugBump3")
cols = c("Value")
# Build Matrix for dummy coefficient values.
matrix1 <- matrix(c(1:48), nrow = 48, ncol = 1, byrow = TRUE, dimnames = list(rows,cols))
matrix1
# Extract each variable type into own matrix (i.e. all "actsBreaks{x}")
#
Beta_names <- list()
betabreaks <- unique(gsub("[0-9]*", "", rows))
for (bc in betabreaks)
{
Breaks <- grep(paste0(bc, "[0-9]*"), rows)
Beta_names[[bc]] <- matrix1[Breaks, ,drop = FALSE]
Beta_names[[bc]] <- data.matrix(unlist(Beta_names[[bc]])) #, byrow = TRUE)
}
# Set up matrices for excluded/test data
one_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,2,0,10)
two_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,3,0,10)
three_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,4,10,0)
four_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,5,0,10)
five_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,6,0,10)
six_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,7,0,10)
seven_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,8,0,10)
eight_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,9,0,10)
nine_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,1,0,10)
ten_column <-c(1,1,2,3,3,3,4,4,4,4,4,5,6,9,9,8,7,5,4,7,7,8,0,0,10)
DF1 <- data.frame (one_column ,two_column ,three_column ,
four_column ,five_column ,six_column ,
seven_column ,eight_column ,nine_column ,
ten_column )
paralength <- 5
Xnames <- list()
datindc <- 1
while ( datindc <= paralength )
{
Xbreaks <- factor(DF1[[datindc]],levels=sort(unique.default(DF1[[datindc]]),decreasing=FALSE))
Xnames[[datindc]] <- data.frame(model.matrix(~Xbreaks -1), stringsAsFactors = FALSE)
datindc <- datindc + 1
}
#
Xlngth <- length(Xnames)
BtaXind <- 1
BetaiXi <- list()
while ( BtaXind <= Xlngth )
{
BetaiXi[[BtaXind]] <- (Beta_names[[BtaXind + 1]] * Xnames[[BtaXind]])
BtaXind <- (BtaXind + 1)
}
I need to add each of those matrices' rows to each other, which I am trying to do by turning each matrix into a vector
BiXilngth <- length(BetaiXi)
BetaiXiTr <- list()
BtaiXiTrd <- 1
while (BtaiXiTrd <= BiXilngth)
{
Var1 <- c(t(BetaiXi[[BtaiXiTrd]]))
BetaiXiTr[BtaiXiTrd] <- Var1
BtaiXiTrd <- BtaiXiTrd + 1
}
and adding the vectors, effectively transposing the matrices. However, when I tried to convert the first matrix BetaiXi[[1]] to a vector and add it to the list with this command BetaiXiTr[BtaiXiTrd] <- c(t(BetaiXi[[BtaiXiTrd]])) I got the following message:
Warning message:
In BetaiXiTr[BtaiXiTrd] <- c(t(BetaiXi[[BtaiXiTrd]])) :
number of items to replace is not a multiple of replacement length
I then tried using unlist():
> BetaiXiTr[BtaiXiTrd] <-unlist(c(t(BetaiXi[[1]])))
Warning message:
In BetaiXiTr[BtaiXiTrd] <- unlist(c(t(BetaiXi[[1]]))) :
number of items to replace is not a multiple of replacement length
with the same result. Finally, I tried assigning the first vector to a variable > Var1 <- c(t(BetaiXi[[BtaiXiTrd]])) and assigning that vector to the list > BetaiXiTr[BtaiXiTrd] <- Var1 with, yet again, the same warning:
Warning message:
In BetaiXiTr[BtaiXiTrd] <- Var1 :
number of items to replace is not a multiple of replacement length
I searched for the warning message to determine what exactly I was being warned of but ended being more confused. Most reproduce or encountered the error message by trying to replace a vector of so many elements with a vector of fewer, while (to my understanding) I am simply trying to add a vector to a list. Am I going about this the incorrect way?
I was using [ ] and [ [ ] ] incorrectly in BetaiXiTr[BtaiXiTrd]. It needs to be BetaiXiTr[[BtaiXiTrd]]and that allows the vectors to be added.
If the last value of each sublist in the list ListResiduals (e.g: OptionAOptionD) is > than the value with the corresponding name in the ListSigma (e.g: OptionAOptionD), it adds the name (e.g: OptionAOptionD) to the Watchlist list.
In the last line of the code I put "> 5" just for the example work, it's the "> 5" that I want to replace in the condition that I mentioned in the previous paragraph.
DF <- data.frame("OptionA" = sample(1:100, 50),
"OptionB" = sample(1:100, 50),
"OptionC" = sample(1:100, 50),
"OptionD" = sample(1:100, 50))
#Unfolding options and creating DF
UnFolding <- data.frame(
First = as.vector(sapply(names(DF[]), function(x)
sapply(names(DF[]), function(y)
paste0(x)))),
Second = as.vector(sapply(names(DF[]), function(x)
sapply(names(DF[]), function(y)
paste0(y)))))
#Deleting lines with the same names
UnFolding <-
UnFolding[UnFolding$First != UnFolding$Second, ]
#Creating list with dependent and independent variables
LMList <- apply(UnFolding, 1, function(x)
as.formula(paste(x[1], "~", x[2])))
#Change list data to variable names
names(LMList) <- substring(lapply(LMList, paste, collapse = ""), 2)
#Linear regression - lm()
LMListRegression <- lapply(LMList, function(x) {
eval(call("lm", formula = x, data = DF))
})
#Residuals
ListResiduals <- lapply(LMListRegression, residuals)
#Sigma
ListSigma <- lapply(LMListRegression, function(x) {
sigma(x)*2
})
#Watchlist
Watchlist <- as.list(unlist(lapply(ListResiduals,
function(x) names(x)[1][tail(x, 1) > 5])))
I would gravitate towards converting your Simga and Residual values to a vector and compare the vectors. You could also use a data.frame approach to be sure the order of your lists/vectors doesn't change.
# create a vector with the last value from the Residuals list.
last_residual <- sapply(ListResiduals, `[`, 50)
names(last_residual) <- substr(names(last_residual), 1, stop = -4)
# Using sapply() rather than lapply, will return a named vector
sigma_vector <- sapply(LMListRegression, function(x) {
sigma(x)*2
})
Watchlist <- sigma_vector[last_residual > sigma_vector]
Watchlist
# named numeric(0)
In your example, it returns an empty named vector because no values meet your condition
max(last_residual)
# [1] 31.70949
min(sigma_vector)
# [1] 52.93234
# To demonstrate that it works, let's devide sigma by 2 so that at least some values will pass
half_sigma <- sigma_vector/2
Watchlist2 <- sigma_vector[last_residual > half_sigma]
Watchlist2
# OptionDOptionA OptionDOptionB OptionDOptionC
# 54.52411 57.09503 56.79341
I am trying to write a code that would automatically calculate Wilcoxon test p-value for several comparisons.
Data used: 2 data sets with the same information representing two groups of participants completed the same 5 tasks which means that the each table contains 5 columns (tasks) and X rows with tasks scores.
data_17_18_G2 # first data set (in data.table format)
data_18_20_G2 # second data set (in data.table format)
Both data sets have identical names of column which are to be used in the W-test the next way:
wilcox.test(Group1Task1, Group2Task1, paired = F)
wilcox.test(Group1Task2, Group2Task2, paired = F)
and so on.
The inputs (e.g., Grou1Task1) are two vectors of task scores (the first one will be from data_17_18_G2 and the other one from data_18_20_G2
Desired output: a data table with a column of p-values
The problem I faced is that no matter how I manipulated the val1 and val2 empty objects, in the second and the third lines the right size "as.numeric(unlist(data_17_18_G2[, ..i]))" gives a correct output (a numeric vector) but it's left size "val1[i]" always returns only one value from the vector. That gave me the idea that the main problem appeared on the step of creating an empty vector, however, I wasn't able to solve it.
Empty objects:
result <- data.table(matrix(ncol=2))
val1 <- as.numeric() # here I also tried functions "numeric" and "vector"
val2 <- as.numeric()
res <- vector(mode = "list", length = 7)
For loop
for (i in 1:5) {
val1[i] <- as.numeric(unlist(data_17_18_G2[ , ..i]))
val2[i] <- as.numeric(unlist(data_18_20_G2[ , ..i]))
res[i] <- wilcox.test(val1[i], val2[i], paired = F)
result[i, 1] <- i
result[i, 2] <- res$p.value
}
Output:
Error in `[<-.data.table`(`*tmp*`, i, 2, value = NULL) :
When deleting columns, i should not be provided
1: В val1[i] <- as.numeric(unlist(data_17_18_G2[, ..i])) :
number of items to replace is not a multiple of replacement length
2: В val2[i] <- as.numeric(unlist(data_18_20_G2[, ..i])) :
number of items to replace is not a multiple of replacement length
3: В res[i] <- wilcox.test(val1[i], val2[i], paired = F) :
number of items to replace is not a multiple of replacement length
Alternative:
I changed the second and the third lines
for (i in 1:5) {
val1[i] <- as.numeric(data_17_18_G2[ , ..i])
val2[i] <- as.numeric(data_18_20_G2[ , ..i])
res[i] <- wilcox.test(val1[i], val2[i], paired = F)
result[i, 1] <- i
result[i, 2] <- res$p.value
}
And got this
Error in as.numeric(data_17_18_G2[, ..i]) :
(list) object cannot be coerced to type 'double'
which means that the function wilcox.test cannot interpret this type of input.
How can I improve the code so that I get a data table of p-values?
There would appear to be some bugs in the code. I have rewritten the code using the cars dataset as a example.
## use the cars dataset as a example (change with appropriate data)
data(cars)
data_17_18_G2 <- as.data.table(cars)
data_18_20_G2 <- data_17_18_G2[,2:1]
## Fixed code
result <- data.table(matrix(as.numeric(), nrow=ncol(data_17_18_G2), ncol=2))
val1 <- as.numeric()
val2 <- as.numeric()
res <- vector(mode = "list", length = 7)
for (i in 1:ncol(data_17_18_G2)) {
val1 <- as.numeric(unlist(data_17_18_G2[ , ..i]))
val2 <- as.numeric(unlist(data_18_20_G2[ , ..i]))
res[[i]] <- wilcox.test(val1, val2, paired = F)
result[i, 1] <- as.numeric(i)
result[i, 2] <- as.numeric(res[[i]]$p.value)
}
Hope this gives you the output you are after.
fname = file.choose()
two = read.csv(fname.header=T)
rec = two$Receipt
del = two$Delivery
date = two$Date
net = rec-del
yrec = matrix(rec,nrow=365,ncol=4,byrow=F)
ydel = matrix(del,nrow=365,ncol=4,byrow=F)
ynet = matrix(net,nrow=365,ncol=4,byrow=F)
yrecsum = 0
yrecavg = 0
for(i in 1:4)
{
for(j in 1:365)
{
yrecsum[i] = yrecsum[i]+yrec[j,i]
}
yrecavg[i] = yrecsum[i]/365
}
So what I have are three matrices of the same size with days in integers (from 1 to 365) on the rows and years integers (from 1 to 4) on the columns. Each matrix is filled in with the data that I'm working with.
I'm trying to find the average of each column for all three matrices and I would like to put those averages in a vector for each matrix.
I've looked around and found some information about the zoo library and chron library and such but I can't get those to work.
lapply(list(yrec, ydel, ynet), colMeans)
[[1]]
[1] 732.9370 731.9836 705.3808 751.6986
[[2]]
[1] 704.7178 714.2877 735.4822 767.5123
[[3]]
[1] 749.1041 715.4164 711.1425 746.3370
#Data
yrec <- matrix(sample(365*4), ncol=4)
ydel <- matrix(sample(365*4), ncol=4)
ynet <- matrix(sample(365*4), ncol=4)
this should get you started (even though I would convert the matrices to data.frames):
#some sample data
m <- matrix(sample(10000, 365*4),365,4)
# get the mean of all the columns of your matrix
colMeans(m)
if you have 3 matrices and you want to combine the results I would do:
# some sample data:
m1 <- matrix(sample(10000, 365*4),365,4)
m2 <- matrix(sample(10000, 365*4),365,4)
m3 <- matrix(sample(10000, 365*4),365,4)
do.call("cbind", lapply(list(m1,m2,m3), colMeans))