Forestplot R - expanding plot, edit a variable name - r

I prepared a code to visualize my data:
library(forestplot)
test_data <- data.frame(coef=c(1.14, 0.31, 10.70),
low=c(1.01, 0.12, 1.14),
high=c(1.30, 0.83, 100.16),
boxsize=c(0.2, 0.2, 0.2))
row_names <- cbind(c("Variable", "Variable 1", "Variable 2", "So looooooong and nasty name of the variable"),
c("OR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) )
test_data <- rbind(rep(NA, 4), test_data)
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1,
xlog = TRUE,
xlab = "OR (95% CI)",
col = fpColors(lines="black", box="black"),
title="My Happy Happy Title \n o happy happy title...\n",
ci.vertices = TRUE,
xticks = c(0.1, 1, 10, 100))
It gives a following forestplot:
I would like to:
1) expand the plot and diminish font of the plot details on the left for better visualization
2) edit "So looooooong and nasty name of the variable" to move part "name..." below the row like:
"
So looooooong and nasty
name of the variable
"
However, when I write as "/nSo.../n" it gives another row of number from columns "OR" and "CIs".
How correct it?

Three possibilities (one more than you asked for):
1) change text of row labels with txt_gp.
2) cut column spacing from 6 mm default to half that value by passing colgap a grid call to unit. Fully understanding the options for forestplot requires understanding the grid system of plotting.
3) add a "\n" to the loooong label. (I'm puzzled you didn't see that possibility, since you already had a "\n" in the title.)
row_names <- cbind(c("Variable", "Variable 1", "Variable 2", "So looooooong and \nnasty name of the variable"),
c("OR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) )
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1, colgap = unit(3, "mm"), txt_gp=fpTxtGp(label= gpar(cex = 0.7),
title = gpar(cex = 1) ),
xlog = TRUE,
xlab = "OR (95% CI)",
col = fpColors(lines="black", box="black"),
title="My Happy Happy Title \n o happy happy title...\n",
ci.vertices = TRUE,
xticks = c(0.1, 1, 10, 100))
If I only used a cex of 0.7 in the call to gpar passed to 'label', it also affected the size of the title, so I needed to "reset" the 'cex' of the 'title' back to 1.

Related

How to adjust column width in dataframe of foretsplotter function

I am trying to create a forestplot, using forestplotter function, am able to get a beautiful graph, but am not able to see the entire graph, the column widths in few of the columns are so big, even if the string size is less, making the width of the entire graph, so big to see, can someone help me with this and also is it possible to align the datahrame contents uniformly centre aligned......Please help me with this
The code and relevant data are
###Required packages###
library(grid)
library(forestploter)
library(rmeta)
library(gridExtra)
#Data entered#
df <- data.frame(Study=c("A","B","C","D","Summary"),
nA = c(24,187,36,26,273),
median_A = c(4.9,5.69,8.866995074,8.5,NA),
Q1A =c(3,2.86,4.495073892,2,NA),
Q3A =c(8.5,9.78,14.96305419,32,NA),
nP = c(23,193,36,26,278),
median_P = c(7.2,6.79,8.990147783,12.5,NA),
Q1P =c(3.4,3.59,4.002463054,2,NA),
Q3P =c(10.9,10.12,12.06896552,43,NA),
W = c("10.6%","80.8%","8.0%","0.70%",NA),
E=c(-2.3,-1.1,-0.123152709,-4,-1.16881587),
UL=c(1.161473203,0.156288294,3.881699516,10.02689306,-0.039791047),
LL=c(-5.761473203,-2.356288294,-4.128004935,-18.02689306,-2.297840692))
#Calculate SE for box size#
df$SE <- (df$UL-df$E)/1.96
#Column for Confidence intervals for Drug A and Placebo, with 2 significant digit#
df$IQRA <- sprintf("%.2f (%.2f to %.2f)",df$median_A,df$Q1A, df$Q3A)
df$IQRP <- sprintf("%.2f (%.2f to %.2f)",df$median_P,df$Q1P, df$Q3P)
#Column for Confidence intervals for NET EFFECT, with 2 significant digit#
df$MD <- sprintf("%.2f (%.2f to %.2f)", df$E, df$LL, df$UL)
#Create a column with space for forest plot#
df$" "<- paste(rep(" ", 16), collapse = " ")
##Forest plot theme##
#To be modified as needed#
ftn <-forest_theme(
base_size = 16,
base_family = "serif",
ci_pch = 15,
ci_col = "black",
ci_lty = 1,
ci_lwd = 1,
ci_Theight = 0.25,
legend_name = " ",
legend_position = "right",legend_value = "",
xaxis_lwd = 1,
xaxis_cex = 0.7,
refline_lwd = 1,
refline_lty = "dashed",
refline_col = "red",
summary_fill = "blue",
summary_col = "blue",
footnote_cex = 0.4,
footnote_fontface = "plain",
footnote_col = "black",
title_just = c("center"),
title_cex = 1.1,
title_fontface = "bold",
title_col = "black",
show.rownames = FALSE)
##Table in Order for Forest plot##
#First get Column names#
colnames(df)
df2 <-df[,c(1,2,15,6,16,18,17)]
#Make NA cells empty
df2[5,3] <-c(" ")
df2[5,5] <-c(" ")
##Forestplot##
plot<-forest(df2,
est = df$E,
lower = df$LL,
upper = df$UL,
sizes = (df$SE/10),
ci_column = 6,
ref_line = 0,
arrow_lab = c("Drug A Better", "Placebo Better"),
xlim = c(-7, 6),
is_summary = c(FALSE,FALSE,FALSE,FALSE,TRUE),
xlog = FALSE,
ticks_digits = 0,ticks_at = c(-6,0,6),
theme = ftn)
##Show plot
print(plot, autofit = FALSE)

svyboxplot results change when using different categories versus subsets

I am new to the survey package and have a mystery problem. I have made data weights using anesrake package and then created a survey design.
I have a problem when using svyboxplot and a grouping variable:
It draws similar boxplots for each grouping category, which is not true
When I studied the problem subsetting each of my category (15 of them)
the values are different for each area / different boxplots for each area.
Can anyone help me? I am desperate!
Here`s sample to test
library(tidyverse)
col <- tibble(
name = c("seura 1", "seura 2", "seura 3", "seura 4", "seura 5", "seura 6", "seura 7", "seura 8", "seura 9"
, "seura 10", "seura 11", "seura 12"),
riistakeskus = c("Keski-Suomi","Keski-Suomi","Keski-Suomi","Keski-Suomi","Keski-Suomi","Satakunta","Satakunta",
"Satakunta","Uusimaa", "Uusimaa","Uusimaa","Uusimaa"),
hirvi_sarvisuositus = c(1,4,5,3,7,5,3,4,6,5,8,9),
weights = c(1.1461438,1.1461438,1.1461438,1.1461438,1.1461438,0.5107815,0.5107815,0.5107815,2.0461937,
2.0461937,2.0461937,2.0461937)
)
library(survey)
my_des1 <- svydesign(data = col, weights = ~weights, ids = ~1)
b <- svyboxplot(hirvi_sarvisuositus~factor(riistakeskus), my_des1, all.outliers = F, ylim = c(0,10))
svyboxplot(hirvi_sarvisuositus~1, subset(my_des1, riistakeskus == "Keski-Suomi"), ylim = c(0,10))
svyboxplot(hirvi_sarvisuositus~1, subset(my_des1, riistakeskus == "Satakunta"), ylim = c(0,10))
svyboxplot(hirvi_sarvisuositus~1, subset(my_des1, riistakeskus == "Uusimaa"), ylim = c(0,10))
I had the same problem and would like to add to Anthonys answer, but I cannot comment yet.
There is an error in survey:::svyboxplot.default as Anthony indicates but it does not seem to have anything to do with data points. If you use keep.var = FALSE with FUN=svyquantile it does return the overall quantiles instead of the group specific quantiles.
Compare
svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile, ci = FALSE,
keep.var = FALSE, quantiles = c(0, 0.25, 0.5, 0.75, 1),
na.rm = TRUE)
with
svyquantile(~hirvi_sarvisuositus, my_des1,
quantiles = c(0, 0.25, 0.5, 0.75, 1),
na.rm = TRUE)
Note that svyquantile can not compute the SE for some quantiles.
If you use keep.var=TRUE instead and try to extract the CIs, you get quantiles by group.
svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile,
quantiles = c(0, 0.25, 0.5, 0.75, 1), ci=TRUE, na.rm = TRUE,
keep.var = TRUE, vartype = "ci")
However, you can't change the svyquantile function options when calling svyboxplot. This needs to be fixed in the package. You could built your boxplots yourself instead. A simple base R solution:
q <- svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile,
quantiles = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE, ci=TRUE,
keep.var = TRUE,
vartype = "ci")
boxstats <- q[,2:6]
bxp(list(stats=t(as.matrix(boxstats)),
n = c(100,100,100),
names = rownames(boxstats)))
To prevent whiskers inside the box, you can change qrule to use a different way to calculate quantiles (e.g. qrule="hf7" for the quantile() default).
An alternative solution would be to use a weighted boxplot from ggplot2:
library(ggplot2)
ggplot(data=col, aes(y=hirvi_sarvisuositus, x=factor(riistakeskus), weight=weights)) +
geom_boxplot()
Please note that ggplot2 uses a slightly different estimation of the hinges, see help(geom_boxplot), which influences the results for low N.
great reproducible example, thank you! this result especially looks silly
svyboxplot(hirvi_sarvisuositus~riistakeskus,my_des1,ylim=c(0,10))
i think this largely happens because svyquantile just needs more data points to get reasonable estimates..
if you look at the code inside survey:::svyboxplot.default you can find the line that produces all of the same quantile results
svyby(~hirvi_sarvisuositus, ~riistakeskus, my_des1, svyquantile, ci = FALSE,
keep.var = FALSE, quantiles = c(0, 0.25, 0.5, 0.75, 1),
na.rm = TRUE)
not sure if this is really a bug that the survey package author would want to fix.. perhaps consider using the ?bxp function if your use case has that small of a dataset?

Add Space Between Panels of Openair windRose Plots

I created some windrose plots using the openair package and I'm pretty happy with how they turned out but aesthetically it would be nice to have some space between panels. Here's an example:
# windrose plot----
library(openair)
data("mydata")
windRose(mydata[1:144,], ws="ws", wd="wd",
paddle = F,
type = 'weekday',
key.header = 'Wind Speed (m/s)',
key.footer = "",
annotate = F,
angle = 30, # angle of "spokes"...sort of bins for wind direction
cols = 'jet',
key.position = 'right',
dig.lab = 2,
statistic = 'prop.count', #“prop.count” sizes bins according to the
# proportion of the frequency of measurements
fontsize = 20,
grid.line = 100,
max.freq = 105, # maximum value for the radial limits
key = list(header = "Wind Speed (m/s)",
footer = '',
labels = c('0 to 2', '2 to 4',
'4 to 6','6 or more'),
breaks = c(0,2,4,6)),
layout = c(6,1)
)
Anyone have any ideas of how to add space between the panels?
After some digging I found that this plot function utilizes trellis plots, here is a good rundown on them: https://www.stat.auckland.ac.nz/~ihaka/787/lectures-trellis.pdf
Specifically the xyplot function is used to create the trellis plot. The help documentation for ?xyplot shows that you can adjust the argument between to achieve spacing between panels. The between argument is a list containing x and y values that represent space between panels. Therefore we can adjust the above code simply by adding the argument between = list(x=0.25, y = 0.25) and can adjust x and y to our preference like this:
library(openair)
data("mydata")
windRose(mydata[1:144,], ws="ws", wd="wd",
paddle = F,
type = 'weekday',
key.header = 'Wind Speed (m/s)',
key.footer = "",
annotate = F,
angle = 30, # angle of "spokes"...sort of bins for wind direction
cols = 'jet',
key.position = 'right',
dig.lab = 2,
statistic = 'prop.count', #“prop.count” sizes bins according to the
# proportion of the frequency of measurements
fontsize = 20,
grid.line = 100,
max.freq = 105, # maximum value for the radial limits
key = list(header = "Wind Speed (m/s)",
footer = '',
labels = c('0 to 2', '2 to 4',
'4 to 6','6 or more'),
breaks = c(0,2,4,6)),
layout = c(6,1),
between = list(x=0.25, y=0.25)
)

Why Forest plot is not showing the confidence interval bars?

Hi I am generating a forest plot by following code. but my visual graph doesnot show the confidence interval on boxes. How can i improve this graphical representation.
mydf <- data.frame(
Variables=c('Variables','Neuroticism_2','Neuroticism_3','Neuroticism_4'),
HazardRatio=c(NA,1.109,1.296,1.363),
HazardLower=c(NA,1.041,1.206,1.274),
HazardUpper=c(NA,1.182,1.393,1.458),
Pvalue=c(NA,"0.001","<0.001","<0.001"),
stringsAsFactors=FALSE
)
#png('temp.png', width=8, height=4, units='in', res=400)
rowseq <- seq(nrow(mydf),1)
par(mai=c(1,0,0,0))
plot(mydf$HazardRatio, rowseq, pch=15,
xlim=c(-10,12), ylim=c(0,7),
xlab='', ylab='', yaxt='n', xaxt='n',
bty='n')
axis(1, seq(0,5,by=.5), cex.axis=.5)
segments(1,-1,1,6.25, lty=3)
segments(mydf$HazardLower, rowseq, mydf$HazardUpper, rowseq)
text(-8,6.5, "Variables", cex=.75, font=2, pos=4)
t1h <- ifelse(!is.na(mydf$Variables), mydf$Variables, '')
text(-8,rowseq, t1h, cex=.75, pos=4, font=3)
text(-1,6.5, "Hazard Ratio (95%)", cex=.75, font=2, pos=4)
t3 <- ifelse(!is.na(mydf$HazardRatio), with(mydf, paste(HazardRatio,' (',HazardLower,'-',HazardUpper,')',sep='')), '')
text(3,rowseq, t3, cex=.75, pos=4)
text(7.5,6.5, "P Value", cex=.75, font=2, pos=4)
t4 <- ifelse(!is.na(mydf$Pvalue), mydf$Pvalue, '')
text(7.5,rowseq, t4, cex=.75, pos=4)
#dev.off()
Edit
I even tried to do this by forestplot package. But i am not getting Confidence interval on grpah as well as i want presentation as above graph.
test_data <- data.frame(coef=c(1.109,1.296,1.363),
low=c(1.041,1.206,1.274),
high=c(1.182,1.393,1.458),
boxsize=c(0.1, 0.1, 0.1))
row_names <- cbind(c("Variable", "N_Quartile 1", "N_Quartile 2", "N_Quartile 3"),
c("HR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) )
test_data <- rbind(NA, test_data)
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
clip =c(0.1, 25),
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1,colgap = unit(3, "mm"), txt_gp=fpTxtGp(label= gpar(cex = 0.7),
title = gpar(cex = 1) ),
xlog = TRUE,
xlab = "HR (95% CI)",
col = fpColors(lines="black", box="black"),
ci.vertices = TRUE,
xticks = c(0.1, 1, 2.5,5,7.5))
Your intervals are quite small, so if you do it manually on plot it will take a while to refine the correct settings, and putting text together with it is not trivial. Right now your first code is not even 50% there.
My suggestion is to build up the plot slowly using forestplot, and identify the problem, for example if you just plot your data.frame, you see it works, that is the c.i is there, just that it's very narrow, and that's your problem at hand, adjusting the size using lwd.ci so that it is visible:
forestplot(test_data[,1:3],lwd.ci=3)
Now if we add in the text:
forestplot(
labeltext =row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
txt_gp=fpTxtGp(cex=0.8),
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,lwd.ci=3)
So the text is taking up a bit too much space, i think one way is to use the conventional est[ll - ul] way of representing estimate and confidence interval, you can see examples here. One way I can try below is to wrap the values for the CI into 1 string, and have just two columns for text:
library(stringr)
test_data <- data.frame(coef=c(1.109,1.296,1.363),
low=c(1.041,1.206,1.274),
high=c(1.182,1.393,1.458),
boxsize=c(0.1, 0.1, 0.1))
column1 = c("Variable", "N_Quartile 1", "N_Quartile 2", "N_Quartile 3")
column2 = cbind(c("HR", test_data$coef),
c("CI -95%", test_data$low),
c("CI +95%", test_data$high))
L = max(nchar(column2))
padded_text =apply(column2,1,
function(i)paste(str_pad(i,L),collapse=" "))
test_data <- rbind(NA, test_data)
pdf("test.pdf",width=8,height=4)
forestplot(
labeltext =cbind(column1,padded_text),
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
txt_gp=fpTxtGp(cex=0.8),align="c",
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,lwd.ci=3,
graphwidth=unit(100,'mm'))
dev.off()

Heatmap do not display row and column names

I am stuck with displaying row and column names using this code. Rather showing names it is showing numbers.
data <- read.csv("/Users/apple/Documents/42_heatmap_new.csv", sep=",")
mat_data <- data.matrix(data[,2:ncol(data)])
heatmaply(mat_data, file="heatmaply_preformed.html", dendrogram = "none",
plot_method= c("plotly"), margins = c(150,250,30,150), grid_gap = 0.1,
show_grid = TRUE, xlab = "Antibiotics", ylab = "Clinical isolates",
main="Heatmap for drug resistance profile of 42 isolates", cexCol = 0.9,
cexRow = 1.0, lwid =c(4,1), RowV=NA, Colv=NA, lhei =c(2,2))

Resources