I want to visualize a mosaic plot in form of a tree. For example
mosaicplot(~ Sex + Age + Survived, data = Titanic, color = TRUE)
Now what I want is to represent this in a tree form where the first node
for example be sex the second node be age and at the terminal node be number of people survived. May be it should something like http://addictedtor.free.fr/graphiques/RGraphGallery.php?graph=84 where instead of p giving the number of counts.
Is there an function in R to do this or should I write it on my own by taking at a look
at the party:::plot.BinaryTree function
Here is how I managed to get what I wanted with the lovely igraph package. The code is an ugly hack. It will be great to have you suggestions
library(igraph)
rm(list=ls())
req.data <- as.data.frame(Titanic)
lookup <- c("M","F","C","A","N","Y")
names(lookup) <- c("Male","Female","Child","Adult","Yes","No")
req.data$board <- "board"
req.data$Class.m <- paste(req.data$board,req.data$Class,sep="_")
req.data$Sex.m <- paste(req.data$board,req.data$Class,req.data$Sex,
sep="_")
req.data$Age.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,sep="_")
req.data$Survived.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,req.data$Survived,sep="_")
tmp <- data.frame(from=
do.call("c",lapply(req.data[,c("board",
"Class.m",
"Sex.m",
"Age.m")],as.character)),
to=do.call("c",lapply(req.data[,c("Class.m",
"Sex.m",
"Age.m",
"Survived.m")],as.character)),
stringsAsFactors=FALSE)
tmp <- tmp [!duplicated(tmp ),];rownames(tmp) <- NULL
tmp$num <- unlist(lapply(strsplit(tmp$to,"_"),
FUN=function(x){
check1 <- req.data$Class==x[2]
check2 <- req.data$Sex == x[3]
check3 <- req.data$Age == x[4]
check4 <- req.data$Survived == x[5]
sum(req.data$Freq[ifelse(is.na(check1),TRUE,check1) &
ifelse(is.na(check2),TRUE,check2) &
ifelse(is.na(check3),TRUE,check3) &
ifelse(is.na(check4),TRUE,check4)])}))
g <- graph.data.frame(tmp, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5,vertex.size=7)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
### To find the case for crew members
tmp1 <- tmp [grepl("Crew",tmp$from),];rownames(tmp1) <- NULL
g <- graph.data.frame(tmp1, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp1$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
Here is the plot I generate. You can modify the vertex/edge colors/size as you want
This is pretty close and looks a lot easier to me.. I post it here in case it may be of use. First I convert the ftable to a more traditional long data frame using expand.dft https://stat.ethz.ch/pipermail/r-help/2009-January/185561.html Then I just use the plot.dendrite function from the plotrix package.
expand.dft <- function(x, var.names = NULL, freq = "Freq", ...)
{
# allow: a table object, or a data frame in frequency form
if(inherits(x, "table"))
x <- as.data.frame.table(x, responseName = freq)
freq.col <- which(colnames(x) == freq)
if (length(freq.col) == 0)
stop(paste(sQuote("freq"), "not found in column names"))
DF <- sapply(1:nrow(x),
function(i) x[rep(i, each = x[i, freq.col]), ],
simplify = FALSE)
DF <- do.call("rbind", DF)[, -freq.col]
for (i in 1:ncol(DF))
{
DF[[i]] <- type.convert(as.character(DF[[i]]), ...)
}
rownames(DF) <- NULL
if (!is.null(var.names))
{
if (length(var.names) < dim(DF)[2])
{
stop(paste("Too few", sQuote("var.names"), "given."))
} else if (length(var.names) > dim(DF)[2]) {
stop(paste("Too many", sQuote("var.names"), "given."))
} else {
names(DF) <- var.names
}
}
DF
}
library(plotrix)
r = ftable(Titanic)
plot.dendrite(makeDendrite(expand.dft(data.frame(r))))
Related
I am looking to save the values from "roc_full_resolution" into a vector. Any ideas?
for(i in 1:10) repeat {
CarefulR<- merge(data,reliable,by.x ="Response_ID" )
CarefullRespondents <- CarefulR %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
CarelessRespondents<- unreliable %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
CR2<- sample_n(CarelessRespondents, 146,replace = TRUE)
df <- rbind(CarefullRespondents,CR2)[-1]
simulation1_mahad <- mahad_raw <- mahad(df )
rounded_scores <- round(simulation1_mahad, digits=1)
roc_rounded <- roc(df$Category, rounded_scores)
roc_full_resolution <- roc(df$Category,rounded_scores)
print(roc_full_resolution)
break}
I would create the roc_full_resolution variable before the for loop, then append the results on each iteration. I would also move the setup outside the loop, and I don't think the repeat and break are needed in this situation.
CarefulR <- merge(data, reliable, by.x = "Response_ID")
CarefullRespondents <- CarefulR %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
CarelessRespondents <- unreliable %>% select(Response_ID,Category,Q17_1,Q17_2,Q17_3,Q17_4,Q17_5,Q17_6,Q17_7,Q17_8,Q17_9,Q17_10,Q17_11,Q17_12,Q17_13,Q17_14,Q17_15,Q17_16,Q17_17,Q17_18,Q17_19,Q17_20,Q17_21,Q17_22,Q17_23,Q17_24)
roc_full_resolution<-NULL
for(i in 1:10) {
CR2 <- sample_n(CarelessRespondents, 146, replace = TRUE)
df <- rbind(CarefullRespondents, CR2)[-1]
simulation1_mahad <- mahad_raw <- mahad(df)
rounded_scores <- round(simulation1_mahad, digits = 1)
roc_rounded <- roc(df$Category, rounded_scores)
roc_full_resolution <- append(roc_full_resolution, roc_rounded)
}
print(roc_full_resolution)
library('dplyr')
num_ens <- 10
I have a piece of code that takes num_ens files of data from a directory, reads them in, takes the average across them, and saves them as 1 object
A_tree <- lapply(1:num_ens, function(i) {
# importing data on each index i
r <- read.csv(
paste0("/Users/sethparker/vox_LA_max/top_down_individ_sd1/ens_",i,"_0_tree_from_data.txt"),
header = FALSE
)
# creating add columns
colnames(r) <- c("GPP","NPP","LA")
r$month <- seq.int(nrow(r))
r$run <- i
return(r)
})
A_tree <- bind_rows(A_tree)
A_tree <- A_tree %>% group_by(month) %>% summarize(across(c(GPP,NPP,LA), mean))
I would like to auto automate this same process to go across 7 directories:
/top_down_individ_sd1/ through /top_down_individ_sd7/
and to generate a series of objects:
A_tree through G_tree
I have unsuccessfully tried to achieve this with a few variations of the following for loop, which have yielded errors
letters <- LETTERS[seq(from = 1, to = 7)]
sd <- c("sd1","sd2","sd3","sd4","sd5","sd6","sd7")
for (j in 1:7) {
paste0(letters[j],"_tree") <- lapply(1:num_ens, function(i) {
# importing data on each index i
r <- read.csv(paste0(paste0("/Users/sethparker/vox_LA_max/top_down_individ_",sd[j]),"/ens_",i,"_0_tree_from_data.txt"),
header = FALSE)
# creating add columns
colnames(r) <- c("GPP","NPP","LA")
r$month <- seq.int(nrow(r))
r$run <- i
return(r)
})
paste0(letters[j],"_tree") <- bind_rows(paste0(letters[j],"_tree"))
paste0(letters[j],"_tree") <- paste0(letters[j],"_tree") %>% group_by(month) %>% summarize(across(c(GPP,NPP,LA), mean))
}
How can I achieve this goal without errors
tree <- list()
for (j in 1:7) {
tree[[j]] <- lapply(1:num_ens, function(i) {
# importing data on each index i
r <- read.csv(paste0(paste0("/Users/sethparker/vox_LA_max/top_down_individ_",sd[j]),"/ens_",i,"_0_tree_from_data.txt"),
header = FALSE)
# creating add columns
colnames(r) <- c("GPP","NPP","LA")
r$month <- seq.int(nrow(r))
r$run <- i
return(r)
})
tree[[j]] <- bind_rows(tree[[j]])
tree[[j]] <- tree[[j]] %>% group_by(month) %>% summarize(across(c(GPP,NPP,LA), mean))
}
I'm making world maps with the rworldmap package. I'm using a function to access trade data from UN Comtrade.
I edited my original question so I can show a real example of what I'm doing. Here is a map that I could make:
Function
library(rjson)
library(rworldmap)
get.Comtrade <- function(url="http://comtrade.un.org/api/get?", maxrec=50000,
type="C", freq="A", px="HS", ps="now", r, p, rg="all",
cc="TOTAL", fmt="json") {
string <- paste(url
, "max=", maxrec,"&" # maximum no. of records returned
, "type=", type, "&" # type of trade (c=commodities)
, "freq=", freq, "&" # frequency
, "px=", px, "&" # classification
, "ps=", ps, "&" # time period
, "r=", r, "&" # reporting area
, "p=", p, "&" # partner country
, "rg=", rg, "&" # trade flow
, "cc=", cc, "&" # classification code
, "fmt=", fmt # Format
, sep="")
if (fmt == "csv") {
raw.data <- read.csv(string,header=TRUE)
return(list(validation=NULL, data=raw.data))
} else {
if (fmt == "json" ) {
raw.data <- fromJSON(file=string)
data <- raw.data$dataset
validation <- unlist(raw.data$validation, recursive=TRUE)
ndata <- NULL
if (length(data) > 0) {
var.names <- names(data[[1]])
data <- as.data.frame(t(sapply(data,rbind)))
ndata <- NULL
for (i in 1:ncol(data)) {
data[sapply(data[, i], is.null), i] <- NA
ndata <- cbind(ndata, unlist(data[, i]))
}
ndata <- as.data.frame(ndata)
colnames(ndata) <- var.names
}
return(list(validation=validation, data=ndata))
}
}
}
Usage
dt2 <- get.Comtrade(r=32, p="all", rg=1, fmt="csv")
dt2df <- as.data.frame(do.call(rbind, dt2))
total <- sum(dt2df$Trade.Value..US..)
dt2df$p <- 100*dt2df$Trade.Value..US../total
dt2df <- dt2df[order(-dt2df[, "p"]), ]
top3 <- dt2df[4, "p"]
top10 <- dt2df[11, "p"]
q3 <- dt2df[as.integer(1*nrow(dt2df)/4), "p"]
q2 <- dt2df[as.integer(2*nrow(dt2df)/4), "p"]
q1 <- dt2df[as.integer(3*nrow(dt2df)/4), "p"]
mapped_data <- joinCountryData2Map(dt2df, joinCode="ISO3",
nameJoinColumn="Partner.ISO")
mapCountryData(mapped_data, nameColumnToPlot="p", numCats=6,
catMethod=c(0, q1, q2, q3, top10, top3, 100),
colourPalette=c('cornsilk', 'cornsilk2', 'palegreen1',
'palegreen2', 'palegreen4', 'darkgreen'),
mapTitle="", addLegend=FALSE)
The result is the map that I'm looking for, except that I don't need to see the Antarctica. How can I remove it?
I tried with xlim & ylim, but it didn't work.
Try
new_world <- subset(mapped_data, continent != "Antarctica")
after
mapped_data <- joinCountryData2Map(dt2df, joinCode = "ISO3", nameJoinColumn = "Partner.ISO")
then continue
mapCountryData(new_world, nameColumnToPlot = "p", numCats=6, catMethod =
c(0,q1,q2,q3,top10,top3,100), colourPalette = c('cornsilk','cornsilk2','palegreen1','palegreen2','palegreen4','darkgreen'), mapTitle="", addLegend=FALSE)
I have a data frame "ref.df" that has info about 12 networks. I want to create 100 random networks for each subject according their node and edge numbers.
I've tried this code but it didn't work well:
library(igraph)
random.networks <- list()
for(i in ref.df$subject){
cat("...")
for( j in 1:100){
random.networks[[j]] <- sample_gnm(n=ref.df$node,m=ref.df$edge, directed = TRUE, loops = FALSE)
}
cat(i,"\n")
}
This code generate 100 random networks only for the first subject.
Thanks for your time and advice in advance.
You can reproduce my data frame:
ref.df <- data.frame(subject=c("Civil.Liberties","Foreign.Policy","Women.s.Rights","Workers..Rights",
"Political.Polarisation","Kurdish.Peace.Process","Parallel.State",
"HDP.Election.Slogans","Related.With.Election","CHP.Election.Slogans",
"AKP.Election.Slogans","MHP.Election.Slogans"),
group=c(298,1150,474,2522,0,2570,718,2736,0,1661,2175,1460),
mod=c(0.77,0.73,0.84,0.78,0,0.72,0.66,0.62,0,0.68,0.76,0.66),
node=c(13524,68792,21925,87094,195678,98008,28499,93024,201342,61539,91640,63035),
edge=c(18694,183932,27120,143032,710044,249267,108352,255615,579919,17590,3313147,213367))
If the problem is that you want 12 x 100 networks and you are only getting a list of 100, with a minimal modification to your code, you can do as follows:
random.networks <- list()
for (subj in ref.df$subject){
cat("...")
for (i in 1:100) {
tmp <- sample_gnm(n=ref.df$node[ref.df$subject == subj],
m=ref.df$edge[ref.df$subject == subj],
directed = TRUE, loops = FALSE)
random.networks[[(length(random.networks) + 1)]] <- tmp
names(random.networks)[length(random.networks)] <- paste(as.vector(subj), i, sep = "_")
}
cat(as.vector(subj),"\n")
}
random.networks
If you want to make sure that the random networks you generate are different, you may want trying the following approach, but because of the cross comparisons, this will be very slow.
random.networks <- list()
look.up <- list()
for (subj in ref.df$subject){
cat("...")
for (i in 1:100) {
tmp <- NA
# enforce uniqueness
while(is.na(tmp)|
as.character(tmp)[4] %in% look.up) {
tmp <- sample_gnm(n=ref.df$node[ref.df$subject == subj],
m=ref.df$edge[ref.df$subject == subj],
directed = TRUE, loops = FALSE)
}
random.networks[[(length(random.networks) + 1)]] <- tmp
look.up[[(length(look.up) + 1)]] <- as.character(tmp)[4]
names(random.networks)[length(random.networks)] <- paste(as.vector(subj), i, sep = "_")
}
cat(as.vector(subj),"\n")
}
I only can write some simple R code. For example:
data <- subset(PCB,PCB1.cat3 == "Low" | PCB1.cat3 == "High")
data <- data[order(data$PCB1.cat3),] ;table(data$PCB1.cat3)
mydata <- data.frame(data[,c(19:134)]);mydata <- t(mydata)
library(limma)
design <- cbind(Grp1=1,Grp2vs1=rep(c(0,1), times = c(27,26)))
fit <- lmFit(mydata,design)
fit <- eBayes(fit)
results <- topTable(fit,adjust = "fdr",coef=2, sort.by="P", number=100)
It works well for variable PCB1.cat3. However, I have 11 variables: PCB2.cat3, PCB3.cat3 ... How to make a loop and the summary of the results?
I figured out by myself. Hope it is helpful for someone who is interested.
library(limma)
for (i in 1:8)
{
data <- PCB[,c(135:142,3:10,19:134)]
data <- subset(data,data[,i] == "Low" | data[,i] == "High")
data <- data[order(data[,i]),] ;table(data[,i])
mydata <- data.frame(data[,c(17:132)]);mydata <- t(mydata)
design <- cbind(Grp1=1,Grp2vs1=rep(c(0,1), times = c(27,26)))
fit <- lmFit(mydata,design)
fit <- eBayes(fit)
results <- topTable(fit,adjust = "fdr",coef=2, sort.by="P", number=20)
assign(paste("res.quartiles",colnames(data[i]),sep="."),results)
}