Related
I am trying to create a forestplot, using forestplotter function, am able to get a beautiful graph, but am not able to see the entire graph, the column widths in few of the columns are so big, even if the string size is less, making the width of the entire graph, so big to see, can someone help me with this and also is it possible to align the datahrame contents uniformly centre aligned......Please help me with this
The code and relevant data are
###Required packages###
library(grid)
library(forestploter)
library(rmeta)
library(gridExtra)
#Data entered#
df <- data.frame(Study=c("A","B","C","D","Summary"),
nA = c(24,187,36,26,273),
median_A = c(4.9,5.69,8.866995074,8.5,NA),
Q1A =c(3,2.86,4.495073892,2,NA),
Q3A =c(8.5,9.78,14.96305419,32,NA),
nP = c(23,193,36,26,278),
median_P = c(7.2,6.79,8.990147783,12.5,NA),
Q1P =c(3.4,3.59,4.002463054,2,NA),
Q3P =c(10.9,10.12,12.06896552,43,NA),
W = c("10.6%","80.8%","8.0%","0.70%",NA),
E=c(-2.3,-1.1,-0.123152709,-4,-1.16881587),
UL=c(1.161473203,0.156288294,3.881699516,10.02689306,-0.039791047),
LL=c(-5.761473203,-2.356288294,-4.128004935,-18.02689306,-2.297840692))
#Calculate SE for box size#
df$SE <- (df$UL-df$E)/1.96
#Column for Confidence intervals for Drug A and Placebo, with 2 significant digit#
df$IQRA <- sprintf("%.2f (%.2f to %.2f)",df$median_A,df$Q1A, df$Q3A)
df$IQRP <- sprintf("%.2f (%.2f to %.2f)",df$median_P,df$Q1P, df$Q3P)
#Column for Confidence intervals for NET EFFECT, with 2 significant digit#
df$MD <- sprintf("%.2f (%.2f to %.2f)", df$E, df$LL, df$UL)
#Create a column with space for forest plot#
df$" "<- paste(rep(" ", 16), collapse = " ")
##Forest plot theme##
#To be modified as needed#
ftn <-forest_theme(
base_size = 16,
base_family = "serif",
ci_pch = 15,
ci_col = "black",
ci_lty = 1,
ci_lwd = 1,
ci_Theight = 0.25,
legend_name = " ",
legend_position = "right",legend_value = "",
xaxis_lwd = 1,
xaxis_cex = 0.7,
refline_lwd = 1,
refline_lty = "dashed",
refline_col = "red",
summary_fill = "blue",
summary_col = "blue",
footnote_cex = 0.4,
footnote_fontface = "plain",
footnote_col = "black",
title_just = c("center"),
title_cex = 1.1,
title_fontface = "bold",
title_col = "black",
show.rownames = FALSE)
##Table in Order for Forest plot##
#First get Column names#
colnames(df)
df2 <-df[,c(1,2,15,6,16,18,17)]
#Make NA cells empty
df2[5,3] <-c(" ")
df2[5,5] <-c(" ")
##Forestplot##
plot<-forest(df2,
est = df$E,
lower = df$LL,
upper = df$UL,
sizes = (df$SE/10),
ci_column = 6,
ref_line = 0,
arrow_lab = c("Drug A Better", "Placebo Better"),
xlim = c(-7, 6),
is_summary = c(FALSE,FALSE,FALSE,FALSE,TRUE),
xlog = FALSE,
ticks_digits = 0,ticks_at = c(-6,0,6),
theme = ftn)
##Show plot
print(plot, autofit = FALSE)
I have been trying to create a risk radar using plotly and now plotrix, I have encountered limitiations with both (based on my need and also my skillset with R).
With plotly I had pretty much most of the things i wanted except the deal breaker was not being able to label the radial axis (Team A, Team B etc.)
My version of this using plotrix is almost where i need to be and just need some guidance to get me over the line?
I have 4 issues:
Can the title be moved to the left or right?
The radial? labels are bleeding into the chart circle making it hard
to read, can they be adjusted somehow?
Is it possible to change the font (size and/or colour) of the labels 0/30/60/90/180?
Is there anyway to add text to the plotted point, in my case i
wanted to have the RiskID as the labels
My chart looks like this:
library(plotrix)
# Build sample dataset
aRiskID <- c(1, 15, 23, 28, 35)
bRiskDays <- as.numeric(c(28, 15, 85, 153, 100))
cTheta <- as.integer(c(20, 80, 130, 240, 320))
dConsequence <- c("Major", "Major", "Minor", "Moderate", "Minor")
myRisks <- data.frame(RiskID = aRiskID, RiskDays = bRiskDays, Theta = cTheta, CurrentConsequence = dConsequence)
myLabels <- c("Team A", "Team B", "Team C", "Team D", "Team E", "Team F", "Team G", "Team H")
# Test different point colours
# initializing vector of colors as NA
colors_plot <- rep(NA,length(myRisks))
# set of conditions listed in the plot
colors_plot[myRisks$CurrentConsequence == "Major"] <- "black"
colors_plot[myRisks$CurrentConsequence == "Moderate"] <- "red"
colors_plot[myRisks$CurrentConsequence == "Minor"] <- "green"
# add more conditions as needed
# par(mar=c(2,5,5,5))
# plot the chart
radial.plot(myRisks$RiskDays,
myRisks$Theta,
start = pi/2,
clockwise = FALSE,
# start=pi/2,clockwise=TRUE,
show.grid.labels=1,
rp.type="s",
main="Risk Radar",
radial.lim=c(0,30,60,90,180),
radlab = TRUE,
point.symbols=17,
point.col=colors_plot,
cex = 2,
cex.axis = 0.25,
cex.lab = 0.25,
lwd=par("lwd"),mar=c(2,2,3,2),
# show.centroid=TRUE,
labels=myLabels)
I don't know where else to go with this and so any tips using plotrix or another charting package to achieve the end result would be great.
You should look at the functions radial.plot.labels and radial.grid
# plot the chart
radial.plot(myRisks$RiskDays,
myRisks$Theta,
start = pi/2,
clockwise = FALSE,
# start=pi/2,clockwise=TRUE,
show.grid.labels=1,
rp.type="s",
# main="Risk Radar",
radial.lim=c(0,30,60,90,180),
radial.labels = '',
radlab = TRUE,
point.symbols=17,
point.col=colors_plot,
cex = 2,
cex.axis = 0.25,
cex.lab = 0.25,
lwd=par("lwd"),mar=c(2,2,3,2),
# show.centroid=TRUE,
labels=NULL, label.pos = pi / 4 * 2:9)
# 1
mtext("Risk Radar", at = par('usr')[1], font = 2)
# 2
at <- c(0,30,60,90,180)
radial.plot.labels(max(at) + 35, pi / 4 * 2:9, labels = myLabels, radial.lim = at)
# 3
radial.plot.labels(at, pi / 2 * 3, labels = at, col = 1:5, cex = 1.5)
# 4
radial.plot.labels(myRisks$RiskDays, myRisks$Theta, start = pi/2,
clockwise = FALSE, labels = myRisks$RiskID)
If you really need perpendicular labels, you can use the radial.grid function or loop over the labels with separate rotations (srt). It's a real shame that srt isn't vectorized in text, it would make this a lot easier
th <- pi / 4 * 2:9
sapply(seq_along(th), function(ii) {
i <- ifelse((th[ii] > pi / 2) & (th[ii] < pi / 2 * 3), pi, 0)
radial.plot.labels(max(at) + 35, th[ii], labels = myLabels[ii],
radial.lim = at, srt = (th[ii] - i) * 180 / pi)
})
I accidentally made this lovely snowflake #accidental__aRt:
th <- pi / 4 * 2:9
sapply(th, function(x)
radial.plot.labels(max(at) + 35, pi / 4 * 2:9, labels = myLabels,
radial.lim = at, srt = x * 180 / pi))
I'm trying to use the legend function in R. I want the label to read $\alpha = 1, \beta = 2$, so I tried using
legend("topleft", c(expression(alpha = 1, beta = 2)))
But that did not do the trick. Any advice?
What if I wanted my label to read $Gamma(\alpha = 1, \beta = 2)$? I tried
legend("topleft", c(paste("Gamma( ", expression(alpha = 1, beta = 2))))
We can place everything within the expression itself
plot(1)
legend("topleft", expression(alpha~"= 1, "~beta~" = 2"))
If we need Gamma(
legend("topleft", expression(Gamma*"("*alpha~"= 1, "~beta~" = 2)"))
If we need the word Gamma
legend("topleft", expression("Gamma("*alpha~"= 1, "~beta~" = 2)"))
Another option is str2lang, which allows using variables
{
one<-1
two<-2
greek<-"Gamma"
greek1<-"alpha"
greek2<-"beta"
note <- paste0(greek,"*'('*",greek1,"*' = ",one,", '*",greek2,"*' = ",two,")' " )
plot(1)
legend("topleft", legend=str2lang(paste0("paste(",note,")") ) )
}
This question already has answers here:
Shading a kernel density plot between two points.
(5 answers)
Closed 7 years ago.
I've written code to plot density data for variations of an A/B test. I'd like to improve the visual by shading (with the fill being slightly transparent) the area below each curve. I'm currently using matplot, but understand ggplot might be a better option.
Any ideas? Thanks.
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l", lty = 1, col = plot_col, ylab = "Probability Density", xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
This produces the following plot:
# Setup data frame - these are results from an A/B experiment
conv_data = data.frame(
VarNames = c("Variation 1", "Variation 2", "Variation 3") # Set variation names
,NumSuccess = c(1,90,899) # Set number of successes / conversions
,NumTrials = c(10,100,1070) # Set number of trials
)
conv_data$NumFailures = conv_data$NumTrials - conv_data$NumSuccess # Set number of failures [no conversions]
num_var = NROW(conv_data) # Set total number of variations
plot_col = rainbow(num_var) # Set plot colors
get_density_data <- function(n_var, s, f) {
x = seq(0,1,length.out=100) # 0.01,0.02,0.03...1
dens_data = matrix(data = NA, nrow=length(x), ncol=(n_var+1))
dens_data[,1] = x
# set density data
for(j in 1:n_var) {
# +1 to s[], f[] to ensure uniform prior
dens_data[,j+1] = dbeta(x, s[j]+1, f[j]+1)
}
return(dens_data)
}
density_data = get_density_data(num_var, conv_data$NumSuccess, conv_data$NumFailures)
matplot(density_data[,1]*100, density_data[,-1], type = "l",
lty = 1, col = plot_col, ylab = "Probability Density",
xlab = "Conversion Rate %", yaxt = "n")
legend("topleft", col=plot_col, legend = conv_data$VarNames, lwd = 1)
## and add this part
for (ii in seq_along(plot_col))
polygon(c(density_data[, 1] * 100, rev(density_data[, 1] * 100)),
c(density_data[, ii + 1], rep(0, nrow(density_data))),
col = adjustcolor(plot_col[ii], alpha.f = .25))
Was able to answer own question with:
df = as.data.frame(t(conversion_data))
dfs = stack(df)
ggplot(dfs, aes(x=values)) + geom_density(aes(group=ind, colour=ind, fill=ind), alpha=0.3)
I have a data set of item difficulties that correspond to items on a questionnaire that looks like this:
## item difficulty
## 1 ITEM_01_A 2.31179818
## 2 ITEM_02_B 1.95215238
## 3 ITEM_03_C 1.93479536
## 4 ITEM_04_D 1.62610855
## 5 ITEM_05_E 1.62188759
## 6 ITEM_06_F 1.45137544
## 7 ITEM_07_G 0.94255210
## 8 ITEM_08_H 0.89941812
## 9 ITEM_09_I 0.72752197
## 10 ITEM_10_J 0.61792597
## 11 ITEM_11_K 0.61288399
## 12 ITEM_12_L 0.39947791
## 13 ITEM_13_M 0.32209970
## 14 ITEM_14_N 0.31707701
## 15 ITEM_15_O 0.20902108
## 16 ITEM_16_P 0.19923607
## 17 ITEM_17_Q 0.06023317
## 18 ITEM_18_R -0.31155481
## 19 ITEM_19_S -0.67777282
## 20 ITEM_20_T -1.15013758
I want to make an item map of these items that looks similar (not exactly) to this (I created this in word but it lacks true scaling as I just eyeballed the scale). It's not really a traditional statistical graphic and so I don't really know how to approach this. I don't care what graphics system this is done in but I am more familiar with ggplot2 and base.
I would greatly appreciate a method of plotting this sort of unusual plot.
Here's the data set (I'm including it as I was having difficulty using read.table on the dataframe above):
DF <- structure(list(item = c("ITEM_01_A", "ITEM_02_B", "ITEM_03_C",
"ITEM_04_D", "ITEM_05_E", "ITEM_06_F", "ITEM_07_G", "ITEM_08_H",
"ITEM_09_I", "ITEM_10_J", "ITEM_11_K", "ITEM_12_L", "ITEM_13_M",
"ITEM_14_N", "ITEM_15_O", "ITEM_16_P", "ITEM_17_Q", "ITEM_18_R",
"ITEM_19_S", "ITEM_20_T"), difficulty = c(2.31179818110545, 1.95215237740899,
1.93479536058926, 1.62610855327073, 1.62188759115818, 1.45137543733965,
0.942552101641177, 0.899418119889782, 0.7275219669431, 0.617925967008653,
0.612883990709181, 0.399477905189577, 0.322099696946661, 0.31707700560997,
0.209021078266059, 0.199236065264793, 0.0602331732900628, -0.311554806052955,
-0.677772822413495, -1.15013757942119)), .Names = c("item", "difficulty"
), row.names = c(NA, -20L), class = "data.frame")
Thank you in advance.
Here is a quick example:
ggplot(DF, aes(x=1, y=difficulty, label = item)) +
geom_text(size = 3) +
scale_y_continuous(breaks = DF$difficulty, minor_breaks = NULL, labels = sprintf("%.02f", DF$difficulty)) +
scale_x_continuous(breaks = NULL) +
opts(panel.grid.major = theme_blank())
but sometimes two items are too narrow so overlapped. You may do like this:
m <- 0.1
nd <- diff(rev(DF$difficulty))
nd <- c(0, cumsum(ifelse(nd < m, m, nd)))
DF$nd <- rev(rev(DF$difficulty)[1] + nd)
ggplot(DF, aes(x=1, y=nd, label = item)) +
geom_text(size = 3) +
scale_y_continuous(breaks = DF$nd, labels = sprintf("%.02f", DF$difficulty), DF$difficulty, minor_breaks = NULL) +
scale_x_continuous(breaks = NULL) +
opts(panel.grid.major = theme_blank())
Here is a solution with base graphics.
# Compute the position of the labels to limit overlaps:
# move them as little as possible, but keep them
# at least .1 units apart.
library(quadprog)
spread <- function(b, eps=.1) {
stopifnot(b == sort(b))
n <- length(b)
Dmat <- diag(n)
dvec <- b
Amat <- matrix(0,nr=n,nc=n-1)
Amat[cbind(1:(n-1), 1:(n-1))] <- -1
Amat[cbind(2:n, 1:(n-1))] <- 1
bvec <- rep(eps,n-1)
r <- solve.QP(Dmat, dvec, Amat, bvec)
r$solution
}
DF <- DF[ order(DF$difficulty), ]
DF$position <- spread(DF$difficulty, .1)
ylim <- range(DF$difficulty)
plot( NA,
xlim = c(.5,2),
ylim = ylim + .1*c(-1,1)*diff(ylim),
axes=FALSE, xlab="", ylab=""
)
text(.9, DF$position, labels=round(DF$difficulty,3), adj=c(1,0))
text(1.1, DF$position, labels=DF$item, adj=c(0,0))
arrows(1,min(DF$position),1,max(DF$position),code=3)
text(1,min(DF$position),labels="Easier",adj=c(.5,2))
text(1,max(DF$position),labels="More difficult",adj=c(.5,-1))
text(.9, max(DF$position),labels="Difficulty",adj=c(1,-2))
text(1.1,max(DF$position),labels="Item", adj=c(0,-2))
My own attempt but I think I'm going to like Vincent's solution much better as it looks like my original specification.
DF <- DF[order(DF$difficulty), ]
par(mar=c(1, 1, 3, 0)+.4)
plot(rep(1:2, each=10), DF$difficulty, main = "Item Map ",
ylim = c(max(DF$difficulty)+1, min(DF$difficulty)-.2),
type = "n", xlab="", ylab="", axes=F, xaxs="i")
text(rep(1.55, 20), rev(DF$difficulty[c(T, F)]),
DF$item[c(F, T)], cex=.5, pos = 4)
text(rep(1, 20), rev(DF$difficulty[c(F, T)]),
DF$item[c(T, F)], cex=.5, pos = 4)
par(mar=c(0, 0, 0,0))
arrows(1.45, 2.45, 1.45, -1.29, .1, code=3)
text(rep(1.52, 20), DF$difficulty[c(T, F)],
rev(round(DF$difficulty, 2))[c(T, F)], cex=.5, pos = 2)
text(rep(1.44, 20), DF$difficulty[c(F, T)],
rev(round(DF$difficulty, 2))[c(F, T)], cex=.5, pos = 2)
text(1.455, .5, "DIFFICULTY", cex=1, srt = -90)
text(1.45, -1.375, "More Difficult", cex=.6)
text(1.45, 2.5, "Easier", cex=.6)
par(mar=c(0, 0, 0,0))