I am looking to save the values from "roc_full_resolution" into a vector. Any ideas?
for(i in 1:10) repeat {
CarefulR<- merge(data,reliable,by.x ="Response_ID" )
CarefullRespondents <- CarefulR %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
CarelessRespondents<- unreliable %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
CR2<- sample_n(CarelessRespondents, 146,replace = TRUE)
df <- rbind(CarefullRespondents,CR2)[-1]
simulation1_mahad <- mahad_raw <- mahad(df )
rounded_scores <- round(simulation1_mahad, digits=1)
roc_rounded <- roc(df$Category, rounded_scores)
roc_full_resolution <- roc(df$Category,rounded_scores)
print(roc_full_resolution)
break}
I would create the roc_full_resolution variable before the for loop, then append the results on each iteration. I would also move the setup outside the loop, and I don't think the repeat and break are needed in this situation.
CarefulR <- merge(data, reliable, by.x = "Response_ID")
CarefullRespondents <- CarefulR %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
CarelessRespondents <- unreliable %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
roc_full_resolution<-NULL
for(i in 1:10) {
CR2 <- sample_n(CarelessRespondents, 146, replace = TRUE)
df <- rbind(CarefullRespondents, CR2)[-1]
simulation1_mahad <- mahad_raw <- mahad(df)
rounded_scores <- round(simulation1_mahad, digits = 1)
roc_rounded <- roc(df$Category, rounded_scores)
roc_full_resolution <- append(roc_full_resolution, roc_rounded)
}
print(roc_full_resolution)
Related
I am trying to make multiple data frames (df_1,,, df_N) with the same structure.
For now, I have made them all individually, but I imagine there should be more efficient way of writing the codes.
Below are the matrices I created (only three for now, but can be more than 100 later on)
quantileMatrix_1 <- matrix(NA,nrow=ncol(outDf_1), ncol = 3)
for(jj in 1:ncol(outDf_1)){
quantiles <- outDf_1[,jj] %>% quantile(probs=c(.5,.025,.975))
quantileMatrix_1[jj,] <- quantiles
}
quantileMatrix_2 <- matrix(NA,nrow=ncol(outDf_2), ncol = 3)
for(jj in 1:ncol(outDf_2)){
quantiles <- outDf_2[,jj] %>% quantile(probs=c(.5,.025,.975))
quantileMatrix_2[jj,] <- quantiles
}
quantileMatrix_3 <- matrix(NA,nrow=ncol(outDf_3), ncol = 3)
for(jj in 1:ncol(outDf_3)){
quantiles <- outDf_3[,jj] %>% quantile(probs=c(.5,.025,.975))
quantileMatrix_3[jj,] <- quantiles
}
I would use another for loop, to put every df in a list.
my_matrix <- list()
for (d in 1:100) {
quantileMatrix_d <- matrix(NA,nrow=ncol(outDf_1), ncol = 3)
for(jj in 1:ncol(outDf_1)){
quantiles <- outDf_1[,jj] %>% quantile(probs=c(.5,.025,.975))
quantileMatrix_d[jj,] <- quantiles
}
my_matrix[[d]] <- quantileMatrix_d
}
I have a table with samples of data named Sample_1, Sample_2, etc. I take user input as a string for which samples are wanted (Sample_1,Sample_3,Sample_5). Then after parsing the string, I have a for-loop which I pass each sample name to and the program filters the original dataset for the name and creates a DF with calculations. I then append the DF to a list after each iteration of the loop and at the end, I rbind the list for a complete DF.
sampleloop <- function(samplenames) {
data <- unlist(strsplit(samplenames, ","))
temp = list()
for(inc in 1:length(data)) {
df <- CT[CT[["Sample_Name"]] == data[inc],]
........
tempdf = goitemp
temp[inc] <- tempdf
}
newdf <- do.call(rbind.data.frame, temp)
}
The inner function on its own produces the correct wanted output. However, with the loop the function produces the following wrong DF if the input is "Sample_3,Sample_9":
I'm wondering if it has something to do with the rbind?
The issue seems to be using [ instead of [[ to access and assign to the list element`
sampleloop <- function(samplenames) {
data <- unlist(strsplit(samplenames, ","))
temp <- vector('list', length(data))
for(inc in seq_along(data)) {
df <- CT[CT[["Sample_Name"]] == data[inc],]
........
tempdf <- goitemp
temp[[inc]] <- tempdf
}
newdf <- do.call(rbind.data.frame, temp)
return(newdf)
}
The difference can be noted with the reproducible example below
lst1 <- vector('list', 5)
lst2 <- vector('list', 5)
for(i in 1:5) {
lst1[i] <- data.frame(col1 = 1:5, col2 = 6:10)
lst2[[i]] <- data.frame(col1 = 1:5, col2 = 6:10)
}
I have a large data frame.
As you can see, a pattern exists code below:
data_1<-data_1
data_2<-data_2 %>% filter(rowSums(data_2[,1:1])==0)
data_3<-data_3 %>% filter(rowSums(data_3[,1:2])==0)
data_4<-data_4 %>% filter(rowSums(data_4[,1:3])==0)
data_5<-data_5 %>% filter(rowSums(data_5[,1:4])==0)
data_6<-data_6 %>% filter(rowSums(data_6[,1:5])==0)
data_7<-data_7 %>% filter(rowSums(data_7[,1:6])==0)
data_8<-data_8 %>% filter(rowSums(data_8[,1:7])==0)
data_9<-data_9 %>% filter(rowSums(data_9[,1:8])==0)
data_10<-data_10 %>% filter(rowSums(data_10[,1:9])==0)
data_11<-data_11 %>% filter(rowSums(data_11[,1:10])==0)
data_12<-data_12 %>% filter(rowSums(data_12[,1:11])==0)
data_13<-data_13 %>% filter(rowSums(data_13[,1:12])==0)
data_14<-data_14 %>% filter(rowSums(data_14[,1:13])==0)
data_15<-data_15 %>% filter(rowSums(data_15[,1:14])==0)
data_16<-data_16 %>% filter(rowSums(data_16[,1:15])==0)
data_17<-data_17 %>% filter(rowSums(data_17[,1:16])==0)
data_18<-data_18 %>% filter(rowSums(data_18[,1:17])==0)
data_19<-data_19 %>% filter(rowSums(data_19[,1:18])==0)
data_20<-data_20 %>% filter(rowSums(data_20[,1:19])==0)
data_21<-data_21 %>% filter(rowSums(data_21[,1:20])==0)
I tried to make loop like this
for(i in 1:21){
data_i <- data_i %>% filter(rowSums(data_i[,1:i-1])==0)
but, data_i is far away from my intention.
how do I solve this problem?
1) for We use the test data in the Note at the end based on the built in anscombe data frame that comes with R. It is best to keep related data frames in a list so we first create such a list L and then iterate over it producing a new list L2 so that we don't overwrite the original list. Keeping the input and output separate makes it easier to debug.
We could alternately write seq_along(L)[-1] as seq(2, length(L)) and we could alternately write seq_len(i-1) as seq(1, i-1). Note that if DF is a data frame then DF[, 1] is the first column as a column vector but DF[, 1, drop = FALSE] is a one column data frame.
No packages are used.
L <- mget(ls(pattern = "^data_\\d+$"))
L2 <- L
for(i in seq_along(L)[-1]) {
Li <- L[[i]]
Sum <- rowSums(Li[, seq_len(i-1), drop = FALSE])
L2[[i]] <- Li[Sum == 0, ]
}
2) lapply Alternately we could use lapply:
L <- mget(ls(pattern = "^data_\\d+$"))
L2 <- L
L2[-1] <- lapply(seq_along(L)[-1], function(i) {
Li <- L[[i]]
Sum <- rowSums(Li[, seq_len(i-1), drop = FALSE])
Li[Sum == 0, ]
})
3) Map or use Map
L3 <- L
f3 <- function(d, i) {
Sum <- rowSums(d[, seq_len(i-1), drop = FALSE])
d[Sum == 0, ]
}
L3[-1] <- Map(f3, L[-1], seq_along(L)[-1])
or special case the first element like this. Note that it will take the component names from the first argument to Map after the function so it is important that f4 be defined so that that argument is L.
f4 <- function(d, i) {
if (i == 1) d
else {
Sum <- rowSums(d[, seq_len(i-1), drop = FALSE])
d[Sum == 0, ]
}
}
L4 <- Map(f4, L, seq_along(L))
Note
# create test data
data_1 <- anscombe
data_1[1, 1] <- 0
data_2 <- 10 * anscombe
data_2[2, 1:2] <- 0
data_3 <- 100 * anscombe
data_3[3, 1:3] <- 0
library('dplyr')
num_ens <- 10
I have a piece of code that takes num_ens files of data from a directory, reads them in, takes the average across them, and saves them as 1 object
A_tree <- lapply(1:num_ens, function(i) {
# importing data on each index i
r <- read.csv(
paste0("/Users/sethparker/vox_LA_max/top_down_individ_sd1/ens_",i,"_0_tree_from_data.txt"),
header = FALSE
)
# creating add columns
colnames(r) <- c("GPP","NPP","LA")
r$month <- seq.int(nrow(r))
r$run <- i
return(r)
})
A_tree <- bind_rows(A_tree)
A_tree <- A_tree %>% group_by(month) %>% summarize(across(c(GPP,NPP,LA), mean))
I would like to auto automate this same process to go across 7 directories:
/top_down_individ_sd1/ through /top_down_individ_sd7/
and to generate a series of objects:
A_tree through G_tree
I have unsuccessfully tried to achieve this with a few variations of the following for loop, which have yielded errors
letters <- LETTERS[seq(from = 1, to = 7)]
sd <- c("sd1","sd2","sd3","sd4","sd5","sd6","sd7")
for (j in 1:7) {
paste0(letters[j],"_tree") <- lapply(1:num_ens, function(i) {
# importing data on each index i
r <- read.csv(paste0(paste0("/Users/sethparker/vox_LA_max/top_down_individ_",sd[j]),"/ens_",i,"_0_tree_from_data.txt"),
header = FALSE)
# creating add columns
colnames(r) <- c("GPP","NPP","LA")
r$month <- seq.int(nrow(r))
r$run <- i
return(r)
})
paste0(letters[j],"_tree") <- bind_rows(paste0(letters[j],"_tree"))
paste0(letters[j],"_tree") <- paste0(letters[j],"_tree") %>% group_by(month) %>% summarize(across(c(GPP,NPP,LA), mean))
}
How can I achieve this goal without errors
tree <- list()
for (j in 1:7) {
tree[[j]] <- lapply(1:num_ens, function(i) {
# importing data on each index i
r <- read.csv(paste0(paste0("/Users/sethparker/vox_LA_max/top_down_individ_",sd[j]),"/ens_",i,"_0_tree_from_data.txt"),
header = FALSE)
# creating add columns
colnames(r) <- c("GPP","NPP","LA")
r$month <- seq.int(nrow(r))
r$run <- i
return(r)
})
tree[[j]] <- bind_rows(tree[[j]])
tree[[j]] <- tree[[j]] %>% group_by(month) %>% summarize(across(c(GPP,NPP,LA), mean))
}
I want to visualize a mosaic plot in form of a tree. For example
mosaicplot(~ Sex + Age + Survived, data = Titanic, color = TRUE)
Now what I want is to represent this in a tree form where the first node
for example be sex the second node be age and at the terminal node be number of people survived. May be it should something like http://addictedtor.free.fr/graphiques/RGraphGallery.php?graph=84 where instead of p giving the number of counts.
Is there an function in R to do this or should I write it on my own by taking at a look
at the party:::plot.BinaryTree function
Here is how I managed to get what I wanted with the lovely igraph package. The code is an ugly hack. It will be great to have you suggestions
library(igraph)
rm(list=ls())
req.data <- as.data.frame(Titanic)
lookup <- c("M","F","C","A","N","Y")
names(lookup) <- c("Male","Female","Child","Adult","Yes","No")
req.data$board <- "board"
req.data$Class.m <- paste(req.data$board,req.data$Class,sep="_")
req.data$Sex.m <- paste(req.data$board,req.data$Class,req.data$Sex,
sep="_")
req.data$Age.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,sep="_")
req.data$Survived.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,req.data$Survived,sep="_")
tmp <- data.frame(from=
do.call("c",lapply(req.data[,c("board",
"Class.m",
"Sex.m",
"Age.m")],as.character)),
to=do.call("c",lapply(req.data[,c("Class.m",
"Sex.m",
"Age.m",
"Survived.m")],as.character)),
stringsAsFactors=FALSE)
tmp <- tmp [!duplicated(tmp ),];rownames(tmp) <- NULL
tmp$num <- unlist(lapply(strsplit(tmp$to,"_"),
FUN=function(x){
check1 <- req.data$Class==x[2]
check2 <- req.data$Sex == x[3]
check3 <- req.data$Age == x[4]
check4 <- req.data$Survived == x[5]
sum(req.data$Freq[ifelse(is.na(check1),TRUE,check1) &
ifelse(is.na(check2),TRUE,check2) &
ifelse(is.na(check3),TRUE,check3) &
ifelse(is.na(check4),TRUE,check4)])}))
g <- graph.data.frame(tmp, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5,vertex.size=7)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
### To find the case for crew members
tmp1 <- tmp [grepl("Crew",tmp$from),];rownames(tmp1) <- NULL
g <- graph.data.frame(tmp1, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp1$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
Here is the plot I generate. You can modify the vertex/edge colors/size as you want
This is pretty close and looks a lot easier to me.. I post it here in case it may be of use. First I convert the ftable to a more traditional long data frame using expand.dft https://stat.ethz.ch/pipermail/r-help/2009-January/185561.html Then I just use the plot.dendrite function from the plotrix package.
expand.dft <- function(x, var.names = NULL, freq = "Freq", ...)
{
# allow: a table object, or a data frame in frequency form
if(inherits(x, "table"))
x <- as.data.frame.table(x, responseName = freq)
freq.col <- which(colnames(x) == freq)
if (length(freq.col) == 0)
stop(paste(sQuote("freq"), "not found in column names"))
DF <- sapply(1:nrow(x),
function(i) x[rep(i, each = x[i, freq.col]), ],
simplify = FALSE)
DF <- do.call("rbind", DF)[, -freq.col]
for (i in 1:ncol(DF))
{
DF[[i]] <- type.convert(as.character(DF[[i]]), ...)
}
rownames(DF) <- NULL
if (!is.null(var.names))
{
if (length(var.names) < dim(DF)[2])
{
stop(paste("Too few", sQuote("var.names"), "given."))
} else if (length(var.names) > dim(DF)[2]) {
stop(paste("Too many", sQuote("var.names"), "given."))
} else {
names(DF) <- var.names
}
}
DF
}
library(plotrix)
r = ftable(Titanic)
plot.dendrite(makeDendrite(expand.dft(data.frame(r))))