How do I create venn-diagram with item labels inside - r

I have these three clusters below. I want to create venn diagram with three clusters and showing their labels inside the diagram with proper size and spread so it looks beautiful. Below is the code I tried, but doesn't give what I wanted.
Clusters:
ClusterI<- c("HanXRQChr10g0293411T", "HanXRQChr09g0239551T", "HanXRQChr15g0489401R", "HanXRQChr02g0052061T", "HanXRQChr14g0430311N", "HanXRQChr15g0482661N", "HanXRQChr02g0046611R", "HanXRQChr02g0048181R", "HanXRQChr09g0260361N", "HanXRQChr08g0224171C", "HanXRQChr15g0489421R", "HanXRQChr03g0065841N", "HanXRQChr05g0129181R")
ClusterII<- c("HanXRQChr03g0082411N", "HanXRQChr13g0421521N", "HanXRQChr09g0240011N", "HanXRQChr11g0348661N", "HanXRQChr16g0505221N", "HanXRQChr15g0468571C", "HanXRQChr16g0522521T", "HanXRQChr10g0317141T", "HanXRQChr16g0520121T", "HanXRQChr13g0421611N", "HanXRQChr03g0077151T", "HanXRQChr15g0477941C", "HanXRQChr04g0103931T", "HanXRQChr04g0098561T", "HanXRQChr06g0183851T", "HanXRQChr09g0267021N", "HanXRQChr10g0279361N", "HanXRQChr06g0184181T", "HanXRQChr09g0240261N", "HanXRQChr03g0077061T", "HanXRQChr10g0279351N", "HanXRQChr02g0050681T", "HanXRQChr01g0016951T", "HanXRQChr13g0423781N", "HanXRQChr15g0478941C", "HanXRQChr09g0239991T", "HanXRQChr11g0320701N", "HanXRQChr04g0098511T", "HanXRQChr02g0037011N", "HanXRQChr13g0426201C", "HanXRQChr04g0117551T", "HanXRQChr09g0243851N", "HanXRQChr03g0079391N", "HanXRQChr09g0239281T", "HanXRQChr09g0241811T", "HanXRQChr04g0101181T", "HanXRQChr01g0029301C", "HanXRQChr08g0209681T", "HanXRQChr14g0453551N", "HanXRQChr05g0149501T", "HanXRQChr13g0397101N", "HanXRQChr13g0417981C", "HanXRQChr10g0316961N")
ClusterIII <- c("HanXRQChr03g0065091T", "HanXRQChr01g0016931T", "HanXRQChr17g0550881C", "HanXRQChr03g0064011T", "HanXRQChr09g0239211T", "HanXRQChr06g0183841T", "HanXRQChr04g0095771T", "HanXRQChr09g0240621T", "HanXRQChr12g0374601C", "HanXRQChr14g0430731R", "HanXRQChr10g0298171T", "HanXRQChr08g0211081T", "HanXRQChr02g0050711T", "HanXRQChr12g0361091T", "HanXRQChr06g0175651N")
code:
v2 <- venn.diagram(list(ClusterI=ClusterI, ClusterII=ClusterII,ClusterIII=ClusterIII),
fill = c("red", "blue","green"),
alpha = c(0.5, 0.5, 0.5), cat.cex = 1.5, cex=0.25,
filename=NULL)
# have a look at the default plot
grid.newpage()
grid.draw(v2)
v2[[7]]$label <- paste(setdiff(ClusterI, ClusterII), collapse="\n")
# in ClusterII only
v2[[8]]$label <- paste(setdiff(ClusterII, ClusterI) , collapse="\n")
# intesection ClusterI and ClusterII
v2[[9]]$label <- paste(intersect(ClusterI, ClusterII), collapse="\n")
# fora: out
v2[[10]]$label <- paste(ClusterIII, collapse="\n")
grid.newpage()
grid.draw(v2)

Try this. It appears there is no overlap among three clusters.
library(VennDiagram)
ClusterI<- c("HanXRQChr10g0293411T", "HanXRQChr09g0239551T", "HanXRQChr15g0489401R", "HanXRQChr02g0052061T", "HanXRQChr14g0430311N", "HanXRQChr15g0482661N", "HanXRQChr02g0046611R", "HanXRQChr02g0048181R", "HanXRQChr09g0260361N", "HanXRQChr08g0224171C", "HanXRQChr15g0489421R", "HanXRQChr03g0065841N", "HanXRQChr05g0129181R")
ClusterII<- c("HanXRQChr03g0082411N", "HanXRQChr13g0421521N", "HanXRQChr09g0240011N", "HanXRQChr11g0348661N", "HanXRQChr16g0505221N", "HanXRQChr15g0468571C", "HanXRQChr16g0522521T", "HanXRQChr10g0317141T", "HanXRQChr16g0520121T", "HanXRQChr13g0421611N", "HanXRQChr03g0077151T", "HanXRQChr15g0477941C", "HanXRQChr04g0103931T", "HanXRQChr04g0098561T", "HanXRQChr06g0183851T", "HanXRQChr09g0267021N", "HanXRQChr10g0279361N", "HanXRQChr06g0184181T", "HanXRQChr09g0240261N", "HanXRQChr03g0077061T", "HanXRQChr10g0279351N", "HanXRQChr02g0050681T", "HanXRQChr01g0016951T", "HanXRQChr13g0423781N", "HanXRQChr15g0478941C", "HanXRQChr09g0239991T", "HanXRQChr11g0320701N", "HanXRQChr04g0098511T", "HanXRQChr02g0037011N", "HanXRQChr13g0426201C", "HanXRQChr04g0117551T", "HanXRQChr09g0243851N", "HanXRQChr03g0079391N", "HanXRQChr09g0239281T", "HanXRQChr09g0241811T", "HanXRQChr04g0101181T", "HanXRQChr01g0029301C", "HanXRQChr08g0209681T", "HanXRQChr14g0453551N", "HanXRQChr05g0149501T", "HanXRQChr13g0397101N", "HanXRQChr13g0417981C", "HanXRQChr10g0316961N")
ClusterIII <- c("HanXRQChr03g0065091T", "HanXRQChr01g0016931T", "HanXRQChr17g0550881C", "HanXRQChr03g0064011T", "HanXRQChr09g0239211T", "HanXRQChr06g0183841T", "HanXRQChr04g0095771T", "HanXRQChr09g0240621T", "HanXRQChr12g0374601C", "HanXRQChr14g0430731R", "HanXRQChr10g0298171T", "HanXRQChr08g0211081T", "HanXRQChr02g0050711T", "HanXRQChr12g0361091T", "HanXRQChr06g0175651N")
v2 <- draw.triple.venn(
area1 = 60,
area2 = 60,
area3 = 60,
n12 = 20,
n23 = 10,
n13 = 15,
n123 = 5,
cex = 0.25,
cat.cex = 1.5,
alpha = c(0.5, 0.5, 0.5),
category = c("ClusterI", "ClusterII", "ClusterIII"),
fill = c("red", "blue","green")
)
overlaps <- calculate.overlap(list("ClusterI"=ClusterI, "ClusterII"=ClusterII, "ClusterIII"=ClusterIII))
overlaps
indx <- as.numeric(substr(names(overlaps),2,2))
for (i in 1:length(overlaps)){
v2[[6+indx[i] ]]$label <- paste(overlaps[[i]], collapse = "\n")
}
grid.newpage()
grid.draw(v2)
Good luck!

Related

How to adjust column width in dataframe of foretsplotter function

I am trying to create a forestplot, using forestplotter function, am able to get a beautiful graph, but am not able to see the entire graph, the column widths in few of the columns are so big, even if the string size is less, making the width of the entire graph, so big to see, can someone help me with this and also is it possible to align the datahrame contents uniformly centre aligned......Please help me with this
The code and relevant data are
###Required packages###
library(grid)
library(forestploter)
library(rmeta)
library(gridExtra)
#Data entered#
df <- data.frame(Study=c("A","B","C","D","Summary"),
nA = c(24,187,36,26,273),
median_A = c(4.9,5.69,8.866995074,8.5,NA),
Q1A =c(3,2.86,4.495073892,2,NA),
Q3A =c(8.5,9.78,14.96305419,32,NA),
nP = c(23,193,36,26,278),
median_P = c(7.2,6.79,8.990147783,12.5,NA),
Q1P =c(3.4,3.59,4.002463054,2,NA),
Q3P =c(10.9,10.12,12.06896552,43,NA),
W = c("10.6%","80.8%","8.0%","0.70%",NA),
E=c(-2.3,-1.1,-0.123152709,-4,-1.16881587),
UL=c(1.161473203,0.156288294,3.881699516,10.02689306,-0.039791047),
LL=c(-5.761473203,-2.356288294,-4.128004935,-18.02689306,-2.297840692))
#Calculate SE for box size#
df$SE <- (df$UL-df$E)/1.96
#Column for Confidence intervals for Drug A and Placebo, with 2 significant digit#
df$IQRA <- sprintf("%.2f (%.2f to %.2f)",df$median_A,df$Q1A, df$Q3A)
df$IQRP <- sprintf("%.2f (%.2f to %.2f)",df$median_P,df$Q1P, df$Q3P)
#Column for Confidence intervals for NET EFFECT, with 2 significant digit#
df$MD <- sprintf("%.2f (%.2f to %.2f)", df$E, df$LL, df$UL)
#Create a column with space for forest plot#
df$" "<- paste(rep(" ", 16), collapse = " ")
##Forest plot theme##
#To be modified as needed#
ftn <-forest_theme(
base_size = 16,
base_family = "serif",
ci_pch = 15,
ci_col = "black",
ci_lty = 1,
ci_lwd = 1,
ci_Theight = 0.25,
legend_name = " ",
legend_position = "right",legend_value = "",
xaxis_lwd = 1,
xaxis_cex = 0.7,
refline_lwd = 1,
refline_lty = "dashed",
refline_col = "red",
summary_fill = "blue",
summary_col = "blue",
footnote_cex = 0.4,
footnote_fontface = "plain",
footnote_col = "black",
title_just = c("center"),
title_cex = 1.1,
title_fontface = "bold",
title_col = "black",
show.rownames = FALSE)
##Table in Order for Forest plot##
#First get Column names#
colnames(df)
df2 <-df[,c(1,2,15,6,16,18,17)]
#Make NA cells empty
df2[5,3] <-c(" ")
df2[5,5] <-c(" ")
##Forestplot##
plot<-forest(df2,
est = df$E,
lower = df$LL,
upper = df$UL,
sizes = (df$SE/10),
ci_column = 6,
ref_line = 0,
arrow_lab = c("Drug A Better", "Placebo Better"),
xlim = c(-7, 6),
is_summary = c(FALSE,FALSE,FALSE,FALSE,TRUE),
xlog = FALSE,
ticks_digits = 0,ticks_at = c(-6,0,6),
theme = ftn)
##Show plot
print(plot, autofit = FALSE)

How can I code a self arrow in my node diagram in R?

The code I found creates a population matrix node diagram. All I want to do is add a self arrow on the final node i.e. one that points to itself. Hope you can help.
library(diagram)
Numgenerations <- 6
DiffMat <- matrix(data = 0, nrow = Numgenerations, ncol = Numgenerations)
AA <- as.data.frame(DiffMat)
AA[[1,5]] <- "f[4]"
AA[[1,6]] <- "f[5]"
#
AA[[2,1]] <- "s[list(0,1)]"
AA[[3,2]] <- "s[list(1,2)]"
AA[[4,3]] <- "s[list(2,3)]"
AA[[5,4]] <- "s[list(3,4)]"
AA[[6,5]] <- "s[list(4,5)]"
#
name <- c(expression(Age[0]), expression(Age[1]), expression(Age[2]),
expression(Age[3]), expression(Age[4]), expression(Age[5]))
#
plotmat(A = AA, pos = 6, curve = 0.7, name = name, lwd = 2,
arr.len = 0.6, arr.width = 0.25, my = -0.2,
box.size = 0.05, arr.type = "triangle", dtext = 0.95,
main = "Age-structured population model")
You can add the arrow this way:
AA[[6,6]] <- "s[list(5,5)]"
Or, if you want it labeled as a self arrow,
AA[[6,6]] <- "self"
I needed to set relsize to slightly less than 1 to prevent the self arrow from being clipped at the right edge.
plotmat(A = AA, pos = 6, curve = 0.7, name = name, lwd = 2,
arr.len = 0.6, arr.width = 0.25, my = -0.2,
box.size = 0.05, arr.type = "triangle", dtext = 0.95,
main = "Age-structured population model",
relsize=0.97)

How to set the y range in boxplot graph?

I'm using boxplot() in R. My code is:
#rm(list=ls())
#B2
fps_error <- c(0.058404273, 0.028957446, 0.026276044, 0.07084294, 0.078438563, 0.024000178, 0.120678965, 0.081774358, 0.025644741, 0.02931614)
fps_error = fps_error *100
fps_qp_error <-c(1.833333333, 1.69047619, 1.666666667, 3.095238095, 2.738095238, 1.714285714, 3.634146341, 5.142857143, 1.238095238, 2.30952381)
bit_error <- c(0.141691737, 0.136173785, 0.073808209, 0.025057931, 0.165722097, 0.004276999, 0.365353752, 0.164757488, 0.003362543, 0.022423845)
bit_error = bit_error *100
bit_qp_error <-c(0.666666667, 0.785714286, 0.428571429, 0.142857143, 0.785714286, 0.023809524, 1.523809524, 0.976190476, 0.023809524, 0.142857143)
ssim_error <-c(0.01193773, 0.015151569, 0.003144532, 0.003182908, 0.008125274, 0.013796366, 0.00359078, 0.019002591, 0.005031524, 0.004370175)
ssim_error = ssim_error * 100
ssim_qp_error <-c(3.833333333, 1.80952381, 0.69047619, 0.571428571, 2, 1.904761905, 0.761904762, 2.119047619, 0.857142857, 0.976190476)
all_errors = cbind(fps_error, bit_error, ssim_error)
all_qp_errors = cbind(fps_qp_error, bit_qp_error, ssim_qp_error)
modes = cbind(rep("FPS error",10), rep("Bitrate error",10), rep("SSIM error",10))
journal_linear_data <-data.frame(fps_error, fps_qp_error,bit_error,bit_qp_error,ssim_error,ssim_qp_error )
yvars <- c("fps_error","bit_error","ssim_error")
yvars_qp <-c("fps_qp_error","bit_qp_error","ssim_qp_error")
xvars <- c("FPS", "Bitrate", "SSIM")
graphics.off()
bmp(filename="boxplot_B2_error.bmp")
op <- par(mfrow = c(1, 3), #matrix of plots
oma = c(0,0,2,0),mar=c(5.1, 7.1, 2.1, 2.1),mgp=c(4,1,0)) #outer margins
par(cex.lab=3)
par(cex.axis=3)
for (i in 1:3) {boxplot(journal_linear_data[,yvars[i]], xlab=xvars[i], ylab="Percentage error", outcex = 2)}
par(op)
mtext(text="Percentage error per mode for B2",side=3, line=1.5, font=2, cex=2,adj=0.95, col='black')
dev.off()
The image output is shown below. As you can see the y-axis does not have the same range for all graphs. How can I fix this? For example starting in 0.5 or 0.
You can simply put an ylim = c(0, 5) in all your boxplot() call. This sets y-axis range (roughly) between 0 and 5.
Perhaps you did not see ylim argument in ?boxplot; the "Arguments" section also does not mention it. But ylim is just a trivial graphical parameter passed via "...". You can also find such example in the "Examples" session of ?boxplot:
boxplot(len ~ dose, data = ToothGrowth,
boxwex = 0.25, at = 1:3 - 0.2,
subset = supp == "VC", col = "yellow",
main = "Guinea Pigs' Tooth Growth",
xlab = "Vitamin C dose mg",
ylab = "tooth length",
xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")

R: How to add highlighted angle lines in polar plots?

Please consider the following sample polar plot:
library(plotrix)
testlen <- c(rnorm(36)*2 + 5)
testpos <- seq(0, 350, by = 10)
polar.plot(testlen, testpos, main = "Test Polar Plot",
lwd = 3, line.col = 4, rp.type = "s")
I would like to add lines at angles 30 and 330 as well as 150 and 210 (from the center to the outside). I experimented with the line function but could not get it to work.
The calculations for exact placement are a bit goofy but using your test data
set.seed(15)
testlen<-c(rnorm(36)*2+5)
testpos<-seq(0,350,by=10)
polar.plot(testlen,testpos,main="Test Polar Plot",
lwd=3,line.col=4,rp.type="s")
You can add lines at 20,150,210,300 with
add.line <- c(30,330, 150,210)/360*2*pi
maxlength <- max(pretty(range(testlen)))-min(testlen)
segments(0, 0, cos(add.line) * maxlength, sin(add.line) * maxlength,
col = "red")
And that makes the following plot
You can just use the rp.type = "r" argument and add = TRUE. So, something like
library(plotrix)
set.seed(1)
testlen <- c(rnorm(36)*2 + 5)
testpos <- seq(0,350, by = 10)
polar.plot(testlen, testpos, main = "Test Polar Plot",
lwd = 3, line.col = 4, rp.type = "s")
followed by
pos <- c(30, 330, 150, 210)
len <- c(10, 10, 10, 10)
polar.plot(lengths = len, polar.pos = pos,
radial.lim = c(0, 15),
lwd = 2, line.col = 2, rp.type = "r", add = TRUE)
yields your desired output.

plot an item map (based on difficulties)

I have a data set of item difficulties that correspond to items on a questionnaire that looks like this:
## item difficulty
## 1 ITEM_01_A 2.31179818
## 2 ITEM_02_B 1.95215238
## 3 ITEM_03_C 1.93479536
## 4 ITEM_04_D 1.62610855
## 5 ITEM_05_E 1.62188759
## 6 ITEM_06_F 1.45137544
## 7 ITEM_07_G 0.94255210
## 8 ITEM_08_H 0.89941812
## 9 ITEM_09_I 0.72752197
## 10 ITEM_10_J 0.61792597
## 11 ITEM_11_K 0.61288399
## 12 ITEM_12_L 0.39947791
## 13 ITEM_13_M 0.32209970
## 14 ITEM_14_N 0.31707701
## 15 ITEM_15_O 0.20902108
## 16 ITEM_16_P 0.19923607
## 17 ITEM_17_Q 0.06023317
## 18 ITEM_18_R -0.31155481
## 19 ITEM_19_S -0.67777282
## 20 ITEM_20_T -1.15013758
I want to make an item map of these items that looks similar (not exactly) to this (I created this in word but it lacks true scaling as I just eyeballed the scale). It's not really a traditional statistical graphic and so I don't really know how to approach this. I don't care what graphics system this is done in but I am more familiar with ggplot2 and base.
I would greatly appreciate a method of plotting this sort of unusual plot.
Here's the data set (I'm including it as I was having difficulty using read.table on the dataframe above):
DF <- structure(list(item = c("ITEM_01_A", "ITEM_02_B", "ITEM_03_C",
"ITEM_04_D", "ITEM_05_E", "ITEM_06_F", "ITEM_07_G", "ITEM_08_H",
"ITEM_09_I", "ITEM_10_J", "ITEM_11_K", "ITEM_12_L", "ITEM_13_M",
"ITEM_14_N", "ITEM_15_O", "ITEM_16_P", "ITEM_17_Q", "ITEM_18_R",
"ITEM_19_S", "ITEM_20_T"), difficulty = c(2.31179818110545, 1.95215237740899,
1.93479536058926, 1.62610855327073, 1.62188759115818, 1.45137543733965,
0.942552101641177, 0.899418119889782, 0.7275219669431, 0.617925967008653,
0.612883990709181, 0.399477905189577, 0.322099696946661, 0.31707700560997,
0.209021078266059, 0.199236065264793, 0.0602331732900628, -0.311554806052955,
-0.677772822413495, -1.15013757942119)), .Names = c("item", "difficulty"
), row.names = c(NA, -20L), class = "data.frame")
Thank you in advance.
Here is a quick example:
ggplot(DF, aes(x=1, y=difficulty, label = item)) +
geom_text(size = 3) +
scale_y_continuous(breaks = DF$difficulty, minor_breaks = NULL, labels = sprintf("%.02f", DF$difficulty)) +
scale_x_continuous(breaks = NULL) +
opts(panel.grid.major = theme_blank())
but sometimes two items are too narrow so overlapped. You may do like this:
m <- 0.1
nd <- diff(rev(DF$difficulty))
nd <- c(0, cumsum(ifelse(nd < m, m, nd)))
DF$nd <- rev(rev(DF$difficulty)[1] + nd)
ggplot(DF, aes(x=1, y=nd, label = item)) +
geom_text(size = 3) +
scale_y_continuous(breaks = DF$nd, labels = sprintf("%.02f", DF$difficulty), DF$difficulty, minor_breaks = NULL) +
scale_x_continuous(breaks = NULL) +
opts(panel.grid.major = theme_blank())
Here is a solution with base graphics.
# Compute the position of the labels to limit overlaps:
# move them as little as possible, but keep them
# at least .1 units apart.
library(quadprog)
spread <- function(b, eps=.1) {
stopifnot(b == sort(b))
n <- length(b)
Dmat <- diag(n)
dvec <- b
Amat <- matrix(0,nr=n,nc=n-1)
Amat[cbind(1:(n-1), 1:(n-1))] <- -1
Amat[cbind(2:n, 1:(n-1))] <- 1
bvec <- rep(eps,n-1)
r <- solve.QP(Dmat, dvec, Amat, bvec)
r$solution
}
DF <- DF[ order(DF$difficulty), ]
DF$position <- spread(DF$difficulty, .1)
ylim <- range(DF$difficulty)
plot( NA,
xlim = c(.5,2),
ylim = ylim + .1*c(-1,1)*diff(ylim),
axes=FALSE, xlab="", ylab=""
)
text(.9, DF$position, labels=round(DF$difficulty,3), adj=c(1,0))
text(1.1, DF$position, labels=DF$item, adj=c(0,0))
arrows(1,min(DF$position),1,max(DF$position),code=3)
text(1,min(DF$position),labels="Easier",adj=c(.5,2))
text(1,max(DF$position),labels="More difficult",adj=c(.5,-1))
text(.9, max(DF$position),labels="Difficulty",adj=c(1,-2))
text(1.1,max(DF$position),labels="Item", adj=c(0,-2))
My own attempt but I think I'm going to like Vincent's solution much better as it looks like my original specification.
DF <- DF[order(DF$difficulty), ]
par(mar=c(1, 1, 3, 0)+.4)
plot(rep(1:2, each=10), DF$difficulty, main = "Item Map ",
ylim = c(max(DF$difficulty)+1, min(DF$difficulty)-.2),
type = "n", xlab="", ylab="", axes=F, xaxs="i")
text(rep(1.55, 20), rev(DF$difficulty[c(T, F)]),
DF$item[c(F, T)], cex=.5, pos = 4)
text(rep(1, 20), rev(DF$difficulty[c(F, T)]),
DF$item[c(T, F)], cex=.5, pos = 4)
par(mar=c(0, 0, 0,0))
arrows(1.45, 2.45, 1.45, -1.29, .1, code=3)
text(rep(1.52, 20), DF$difficulty[c(T, F)],
rev(round(DF$difficulty, 2))[c(T, F)], cex=.5, pos = 2)
text(rep(1.44, 20), DF$difficulty[c(F, T)],
rev(round(DF$difficulty, 2))[c(F, T)], cex=.5, pos = 2)
text(1.455, .5, "DIFFICULTY", cex=1, srt = -90)
text(1.45, -1.375, "More Difficult", cex=.6)
text(1.45, 2.5, "Easier", cex=.6)
par(mar=c(0, 0, 0,0))

Resources