I only can write some simple R code. For example:
data <- subset(PCB,PCB1.cat3 == "Low" | PCB1.cat3 == "High")
data <- data[order(data$PCB1.cat3),] ;table(data$PCB1.cat3)
mydata <- data.frame(data[,c(19:134)]);mydata <- t(mydata)
library(limma)
design <- cbind(Grp1=1,Grp2vs1=rep(c(0,1), times = c(27,26)))
fit <- lmFit(mydata,design)
fit <- eBayes(fit)
results <- topTable(fit,adjust = "fdr",coef=2, sort.by="P", number=100)
It works well for variable PCB1.cat3. However, I have 11 variables: PCB2.cat3, PCB3.cat3 ... How to make a loop and the summary of the results?
I figured out by myself. Hope it is helpful for someone who is interested.
library(limma)
for (i in 1:8)
{
data <- PCB[,c(135:142,3:10,19:134)]
data <- subset(data,data[,i] == "Low" | data[,i] == "High")
data <- data[order(data[,i]),] ;table(data[,i])
mydata <- data.frame(data[,c(17:132)]);mydata <- t(mydata)
design <- cbind(Grp1=1,Grp2vs1=rep(c(0,1), times = c(27,26)))
fit <- lmFit(mydata,design)
fit <- eBayes(fit)
results <- topTable(fit,adjust = "fdr",coef=2, sort.by="P", number=20)
assign(paste("res.quartiles",colnames(data[i]),sep="."),results)
}
Related
I want to use mixture copula for reliability analysis, now ,with the help of a friend ,I've already finished it ‘RVMs_fitted’ 。now i want to perform the probability integral transformation (PIT),but the function of RVINEPIT can’t use,because RVINEPIT(data,RVM),this RVM not RVINEMATRIX Here is my code:
library(vineclust)
data1 <- read.csv("D:/ASTUDY/Rlanguage/Mix copula/data.csv", header = FALSE)
fit <- vcmm(data = data1, total_comp=3,is_cvine = 0)
print(fit)
summary(fit)
RVMs_fitted <- list()
RVMs_fitted[[1]] <- VineCopula::RVineMatrix(Matrix=fit$output$vine_structure[,,1],
family=fit$output$bicop_familyset[,,1],
par=fit$output$bicop_param[,,1],
par2=fit$output$bicop_param2[,,1])
RVMs_fitted[[2]] <- VineCopula::RVineMatrix(Matrix=fit$output$vine_structure[,,2],
family=fit$output$bicop_familyset[,,2],
par=fit$output$bicop_param[,,2],
par2=fit$output$bicop_param2[,,2])
RVMs_fitted[[3]] <- VineCopula::RVineMatrix(Matrix=fit$output$vine_structure[,,3],
family=fit$output$bicop_familyset[,,3],
par=fit$output$bicop_param[,,3],
par2=fit$output$bicop_param2[,,3])
RVM<-RVMs_fitted
meanx <- c(0.47,0.508,0.45,0.52,0.48)
sigmax <- c(0.318,0.322,0.296,0.29,0.279)
ux1<-pnorm(x[1],meanx[1],sigmax[1])
ux2<-pnorm(x[2],meanx[2],sigmax[2])
ux3<-pnorm(x[3],meanx[3],sigmax[3])
ux4<-pnorm(x[4],meanx[4],sigmax[4])
ux5<-pnorm(x[5],meanx[5],sigmax[5])
data <- c(ux1,ux2,ux3,ux4,ux5)
du=RVinePIT(data, RVM)
y=t(qnorm(t(du)))
Error:
In RVinePIT: RVM has to be an RVineMatrix object.
You have multiple problems here:
RVM is a list. However, you tried to fit RVinePIT to a list, while it works for one data at a time.
The same holds for the y.
I do not have your data, but try it with other data.
Here is the code (it should work):
library(vineclust)
library(VineCopula)
data1 <- read.csv("D:/ASTUDY/Rlanguage/Mix copula/data.csv", header = FALSE)
fit <- vcmm(data = data, total_comp=3,is_cvine = 0)
print(fit)
summary(fit)
RVMs_fitted <- list()
RVMs_fitted[[1]] <- RVineMatrix(Matrix=fit$output$vine_structure[,,1],
family=fit$output$bicop_familyset[,,1],
par=fit$output$bicop_param[,,1],
par2=fit$output$bicop_param2[,,1])
RVMs_fitted[[2]] <- RVineMatrix(Matrix=fit$output$vine_structure[,,2],
family=fit$output$bicop_familyset[,,2],
par=fit$output$bicop_param[,,2],
par2=fit$output$bicop_param2[,,2])
RVMs_fitted[[3]] <- RVineMatrix(Matrix=fit$output$vine_structure[,,3],
family=fit$output$bicop_familyset[,,3],
par=fit$output$bicop_param[,,3],
par2=fit$output$bicop_param2[,,3])
RVM<-RVMs_fitted
meanx <- c(0.47,0.508,0.45,0.52,0.48)
sigmax <- c(0.318,0.322,0.296,0.29,0.279)
ux1<-pnorm(x[1],meanx[1],sigmax[1])
ux2<-pnorm(x[2],meanx[2],sigmax[2])
ux3<-pnorm(x[3],meanx[3],sigmax[3])
ux4<-pnorm(x[4],meanx[4],sigmax[4])
ux5<-pnorm(x[5],meanx[5],sigmax[5])
data <- c(ux1,ux2,ux3,ux4,ux5)### This must be a matrix to work with RVinePIT
du=lapply(1:3, function(i) RVinePIT(data, RVM[[i]]))
y <-lapply(1:3, function(i) t(qnorm(t(du[[i]]))))
I have a set of lists which I would like to convert into the nested list of a certain structure. My initial data look like list_1_1 ... list_2_2. I would like them to be like final_desired_output.
I can do this step by step by extracting desired variable and appending to the output list one by one. However, this dummy example contains only 2 data subsets (first_lists and list second_lists), while the real life data are far >1 GB. Thus, I would like to do it with a function, which I unfortunatly do not know how to do, as nested lists are not well covered in tutorials. Any assistance?
# some dummy data
one_1 <- c(1:10)
one_2 <- c(2:15)
one_3 <- c(3:20)
starting_one_1 <- 1
starting_one_2 <- 2
starting_one_3 <- 3
ending_one_1 <- c(11)
ending_one_2 <- c(16)
ending_one_3 <- c(21)
two_1 <- c(1:100)
two_2 <- c(1:15)
starting_two_1 <- 5
starting_two_2 <- 10
ending_two_1 <- c(101)
ending_two_2 <- c(16)
# lists mimicking output I currently have
list_1_1 <- list(one_1, one_2, one_3)
list_1_2 <- list(starting_one_1, starting_one_2, starting_one_3)
list_1_3 <- list(ending_one_1, ending_one_2, ending_one_3)
list_2_1 <- list(two_1, two_2)
list_2_2 <- list(starting_two_1, starting_two_2)
list_2_3 <- list(ending_two_1, ending_two_2)
# producing desired otput
list_1_1_desired <- list()
list_1_1_desired[["sequence"]] <- one_1
list_1_1_desired[["starting"]] <- starting_one_1
list_1_1_desired[["ending"]] <- ending_one_1
list_1_2_desired <- list()
list_1_2_desired[["sequence"]] <- one_2
list_1_2_desired[["starting"]] <- starting_one_2
list_1_2_desired[["ending"]] <- ending_one_2
list_1_3_desired <- list()
list_1_3_desired[["sequence"]] <- one_3
list_1_3_desired[["starting"]] <- starting_one_3
list_1_3_desired[["ending"]] <- ending_one_3
list_2_1_desired <- list()
list_2_1_desired[["sequence"]] <- two_1
list_2_1_desired[["starting"]] <- starting_two_1
list_2_1_desired[["ending"]] <- ending_two_1
list_2_2_desired <- list()
list_2_2_desired[["sequence"]] <- two_2
list_2_2_desired[["starting"]] <- starting_two_2
list_2_2_desired[["ending"]] <- ending_two_2
first_lists <- list(list_1_1_desired, list_1_2_desired, list_1_3_desired)
names(first_lists) <- c("one_1", "one_2", "one_3")
second_lists <- list(list_2_1_desired, list_2_2_desired)
names(second_lists) <- c("two_1", "two_2")
# this is what I would like to obtain
final_desired_output <- list()
final_desired_output[["one"]] <- first_lists
final_desired_output[["two"]] <- second_lists
You could use purrr::transpose:
out <- mget(ls(pattern = '^list.*\\d$')) %>%
split(sub("_\\d+$", '', names(.))) %>%
map(~transpose(set_names(.,c('sequence', 'starting', 'ending'))))
all.equal(out, final_desired_output, check.attributes = FALSE)
[1] TRUE
I'm sure there is an elegant way to extract the best alpha and lambda after running cva.glmnet but somehow I cannot find it.
Here is the code I am using in the meantime.
Thank you
library(data.table);library(glmnetUtils);library(useful)
# make some dummy data
data(iris)
x <- useful::build.x(data = iris,formula = Sepal.Length ~ .)
y <- iris$Sepal.Length
# run cv for alpha in c(0,0.5,1)
output.of.cva.glmnet <- cva.glmnet(x=x,y=y,alpha = c(0,0.5,1))
# extract the best parameters
number.of.alphas.tested <- length(output.of.cva.glmnet$alpha)
cv.glmnet.dt <- data.table()
for (i in 1:number.of.alphas.tested){
glmnet.model <- output.of.cva.glmnet$modlist[[i]]
min.mse <- min(glmnet.model$cvm)
min.lambda <- glmnet.model$lambda.min
alpha.value <- output.of.cva.glmnet$alpha[i]
new.cv.glmnet.dt <- data.table(alpha=alpha.value,min_mse=min.mse,min_lambda=min.lambda)
cv.glmnet.dt <- rbind(cv.glmnet.dt,new.cv.glmnet.dt)
}
best.params <- cv.glmnet.dt[which.min(cv.glmnet.dt$min_mse)]
Based on a thread I read on GitHub the author wants people to use plot(fit) instead of just outputting the best parameters. However, that isn't always possible, especially when cross validation is involved. These helper functions can be a good workaround.
# Train model.
fit <- cva.glmnet(X, y)
# Get alpha.
get_alpha <- function(fit) {
alpha <- fit$alpha
error <- sapply(fit$modlist, function(mod) {min(mod$cvm)})
alpha[which.min(error)]
}
# Get all parameters.
get_model_params <- function(fit) {
alpha <- fit$alpha
lambdaMin <- sapply(fit$modlist, `[[`, "lambda.min")
lambdaSE <- sapply(fit$modlist, `[[`, "lambda.1se")
error <- sapply(fit$modlist, function(mod) {min(mod$cvm)})
best <- which.min(error)
data.frame(alpha = alpha[best], lambdaMin = lambdaMin[best],
lambdaSE = lambdaSE[best], eror = error[best])
}
I am doing systematic calculations for my created dataframe. I have the code for the calculations but I would like to:
1) Wite it as a function and calling it for the dataframe I created.
2) reset the calculations for next ID in the dataframe.
I would appreciate your help and advice on this.
The dataframe is created in R using the following code:
#Create a dataframe
dosetimes <- c(0,6,12,18)
df <- data.frame("ID"=1,"TIME"=sort(unique(c(seq(0,30,1),dosetimes))),"AMT"=0,"A1"=NA,"WT"=NA)
doserows <- subset(df, TIME%in%dosetimes)
doserows$AMT[doserows$TIME==dosetimes[1]] <- 100
doserows$AMT[doserows$TIME==dosetimes[2]] <- 100
doserows$AMT[doserows$TIME==dosetimes[3]] <- 100
doserows$AMT[doserows$TIME==dosetimes[4]] <- 100
#Add back dose information
df <- rbind(df,doserows)
df <- df[order(df$TIME,-df$AMT),]
df <- subset(df, (TIME==0 & AMT==0)==F)
df$A1[(df$TIME==0)] <- df$AMT[(df$TIME ==0)]
#Time-dependent covariate
df$WT <- 70
df$WT[df$TIME >= 12] <- 120
#The calculations are done in a for-loop. Here is the code for it:
#values needed for the calculation
C <- 2
V <- 10
k <- C/V
#I would like this part to be written as a function
for(i in 2:nrow(df))
{
t <- df$TIME[i]-df$TIME[i-1]
A1last <- df$A1[i-1]
df$A1[i] = df$AMT[i]+ A1last*exp(-t*k)
}
head(df)
plot(A1~TIME, data=df, type="b", col="blue", ylim=c(0,150))
The other thing is that the previous code assumes the subject ID=1 for all time points. If subject ID=2 when the WT (weight) changes to 120. How can I reset the calculations and make it automated for all subject IDs in the dataframe? In this case the original dataframe would be like this:
#code:
rm(list=ls(all=TRUE))
dosetimes <- c(0,6,12,18)
df <- data.frame("ID"=1,"TIME"=sort(unique(c(seq(0,30,1),dosetimes))),"AMT"=0,"A1"=NA,"WT"=NA)
doserows <- subset(df, TIME%in%dosetimes)
doserows$AMT[doserows$TIME==dosetimes[1]] <- 100
doserows$AMT[doserows$TIME==dosetimes[2]] <- 100
doserows$AMT[doserows$TIME==dosetimes[3]] <- 100
doserows$AMT[doserows$TIME==dosetimes[4]] <- 100
df <- rbind(df,doserows)
df <- df[order(df$TIME,-df$AMT),]
df <- subset(df, (TIME==0 & AMT==0)==F)
df$A1[(df$TIME==0)] <- df$AMT[(df$TIME ==0)]
df$WT <- 70
df$WT[df$TIME >= 12] <- 120
df$ID[(df$WT>=120)==T] <- 2
df$TIME[df$ID==2] <- c(seq(0,20,1))
Thank you in advance!
In general, when doing calculations on different subject's data, I like to split the dataframe by ID, pass the vector of individual subject data into a for loop, do all the calculations, build a vector containing all the newly calculated data and then collapse the resultant and return the dataframe with all the numbers you want. This allows for a lot of control over what you do for each subject
subjects = split(df, df$ID)
forResults = vector("list", length=length(subjects))
# initialize these constants
C <- 2
V <- 10
k <- C/V
myFunc = function(data, resultsArray){
for(k in seq_along(subjects)){
df = subjects[[k]]
df$A1 = 100 # I assume this should be 100 for t=0 for each subject?
# you could vectorize this nested for loop..
for(i in 2:nrow(df)) {
t <- df$TIME[i]-df$TIME[i-1]
A1last <- df$A1[i-1]
df$A1[i] = df$AMT[i]+ A1last*exp(-t*k)
}
head(df)
# you can add all sorts of other calculations you want to do on each subject's data
# when you're done doing calculations, put the resultant into
# the resultsArray and we'll rebuild the dataframe with all the new variables
resultsArray[[k]] = df
# if you're not using RStudio, then you want to use dev.new() to instantiate a new plot canvas
# dev.new() # dont need this if you're using RStudio (which doesnt allow multiple plots open)
plot(A1~TIME, data=df, type="b", col="blue", ylim=c(0,150))
}
# collapse the results vector into a dataframe
resultsDF = do.call(rbind, resultsArray)
return(resultsDF)
}
results = myFunc(subjects, forResults)
Do you want this:
ddf <- data.frame("ID"=1,"TIME"=sort(unique(c(seq(0,30,1),dosetimes))),"AMT"=0,"A1"=NA,"WT"=NA)
myfn = function(df){
dosetimes <- c(0,6,12,18)
doserows <- subset(df, TIME%in%dosetimes)
doserows$AMT[doserows$TIME==dosetimes[1]] <- 100
doserows$AMT[doserows$TIME==dosetimes[2]] <- 100
doserows$AMT[doserows$TIME==dosetimes[3]] <- 100
doserows$AMT[doserows$TIME==dosetimes[4]] <- 100
#Add back dose information
df <- rbind(df,doserows)
df <- df[order(df$TIME,-df$AMT),]
df <- subset(df, (TIME==0 & AMT==0)==F)
df$A1[(df$TIME==0)] <- df$AMT[(df$TIME ==0)]
#Time-dependent covariate
df$WT <- 70
df$WT[df$TIME >= 12] <- 120
#The calculations are done in a for-loop. Here is the code for it:
#values needed for the calculation
C <- 2
V <- 10
k <- C/V
#I would like this part to be written as a function
for(i in 2:nrow(df))
{
t <- df$TIME[i]-df$TIME[i-1]
A1last <- df$A1[i-1]
df$A1[i] = df$AMT[i]+ A1last*exp(-t*k)
}
head(df)
plot(A1~TIME, data=df, type="b", col="blue", ylim=c(0,150))
}
myfn(ddf)
For multiple calls:
for(i in 1:N) {
myfn(ddf[ddf$ID==i,])
readline(prompt="Press <Enter> to continue...")
}
I want to visualize a mosaic plot in form of a tree. For example
mosaicplot(~ Sex + Age + Survived, data = Titanic, color = TRUE)
Now what I want is to represent this in a tree form where the first node
for example be sex the second node be age and at the terminal node be number of people survived. May be it should something like http://addictedtor.free.fr/graphiques/RGraphGallery.php?graph=84 where instead of p giving the number of counts.
Is there an function in R to do this or should I write it on my own by taking at a look
at the party:::plot.BinaryTree function
Here is how I managed to get what I wanted with the lovely igraph package. The code is an ugly hack. It will be great to have you suggestions
library(igraph)
rm(list=ls())
req.data <- as.data.frame(Titanic)
lookup <- c("M","F","C","A","N","Y")
names(lookup) <- c("Male","Female","Child","Adult","Yes","No")
req.data$board <- "board"
req.data$Class.m <- paste(req.data$board,req.data$Class,sep="_")
req.data$Sex.m <- paste(req.data$board,req.data$Class,req.data$Sex,
sep="_")
req.data$Age.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,sep="_")
req.data$Survived.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,req.data$Survived,sep="_")
tmp <- data.frame(from=
do.call("c",lapply(req.data[,c("board",
"Class.m",
"Sex.m",
"Age.m")],as.character)),
to=do.call("c",lapply(req.data[,c("Class.m",
"Sex.m",
"Age.m",
"Survived.m")],as.character)),
stringsAsFactors=FALSE)
tmp <- tmp [!duplicated(tmp ),];rownames(tmp) <- NULL
tmp$num <- unlist(lapply(strsplit(tmp$to,"_"),
FUN=function(x){
check1 <- req.data$Class==x[2]
check2 <- req.data$Sex == x[3]
check3 <- req.data$Age == x[4]
check4 <- req.data$Survived == x[5]
sum(req.data$Freq[ifelse(is.na(check1),TRUE,check1) &
ifelse(is.na(check2),TRUE,check2) &
ifelse(is.na(check3),TRUE,check3) &
ifelse(is.na(check4),TRUE,check4)])}))
g <- graph.data.frame(tmp, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5,vertex.size=7)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
### To find the case for crew members
tmp1 <- tmp [grepl("Crew",tmp$from),];rownames(tmp1) <- NULL
g <- graph.data.frame(tmp1, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp1$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
Here is the plot I generate. You can modify the vertex/edge colors/size as you want
This is pretty close and looks a lot easier to me.. I post it here in case it may be of use. First I convert the ftable to a more traditional long data frame using expand.dft https://stat.ethz.ch/pipermail/r-help/2009-January/185561.html Then I just use the plot.dendrite function from the plotrix package.
expand.dft <- function(x, var.names = NULL, freq = "Freq", ...)
{
# allow: a table object, or a data frame in frequency form
if(inherits(x, "table"))
x <- as.data.frame.table(x, responseName = freq)
freq.col <- which(colnames(x) == freq)
if (length(freq.col) == 0)
stop(paste(sQuote("freq"), "not found in column names"))
DF <- sapply(1:nrow(x),
function(i) x[rep(i, each = x[i, freq.col]), ],
simplify = FALSE)
DF <- do.call("rbind", DF)[, -freq.col]
for (i in 1:ncol(DF))
{
DF[[i]] <- type.convert(as.character(DF[[i]]), ...)
}
rownames(DF) <- NULL
if (!is.null(var.names))
{
if (length(var.names) < dim(DF)[2])
{
stop(paste("Too few", sQuote("var.names"), "given."))
} else if (length(var.names) > dim(DF)[2]) {
stop(paste("Too many", sQuote("var.names"), "given."))
} else {
names(DF) <- var.names
}
}
DF
}
library(plotrix)
r = ftable(Titanic)
plot.dendrite(makeDendrite(expand.dft(data.frame(r))))