How to merge plots from metafor and ggplot? - r

Please, find My Data q and p below
I want to merge a Forest Plot computed with library(metafor) with two ggplots
I have tried multiple approaches with gridArrange and par(), but without any success. I know there are several threads on this topic, but none of them addresses the combination of ggplot and metafor
The merged plot should be arranged like this:
The PLOT 1 and PLOT 2 is based on the following data w
w <- structure(list(WHO = c(1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L,
3L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 4L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 4L, 4L,
4L, 2L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
3L, 4L, 3L, 4L, 3L), response = c(0L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, NA, 1L), Death = c(0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, NA, 1L)), class = "data.frame", row.names = c(NA,
-111L))
PLOT 1
Is based on this script.
yaks <- c(0,2,4,6,8,10,12,14,16,18)
j <- ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$response==1]), stat = "count", alpha=0.2, colour="#1C73C2", fill="#ECF0F9") +
scale_x_discrete(name="", drop = FALSE, labels =c("WHO-I\nn=32","WHO-II\nn=23","WHO-III\nn=16", "Unknown\nn=5")) +
theme(axis.text.x = element_text(color = "grey20", size = 11))
j <-
j %+% scale_y_continuous(name = "Progression", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Progression rate per 100 person-yrs", breaks=yaks, labels=c("0","40","80","120","160","200","240","280","320","360"))) +
coord_cartesian(ylim=c(0, 18)) +
theme(axis.text.y.right = element_text(color = "red", size = 11), axis.title.y=element_text(size=14,face="bold", margin = margin(t = 0, r = 20, b = 0, l = 0))) +
theme(axis.text.y = element_text(color = "#1C73C2", size = 11), axis.title.y.right=element_text(size=14,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 20)))
j +
geom_point(mapping = aes(x = 1, y = 0.2677376), size=10, alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 3.2870709), size=10, shape=18,alpha=0.7, colour="red") +
geom_point(mapping = aes(x = 3, y = 16.98), size=10, shape=18,alpha=0.7, colour="red") +
annotate("text", x = c(1,2,3, 4) , y = c(-0.5, 2.2870709, 15.98, 0.5), label = c("5.4","65.7","339.6", "0 events"), col="red", fontface=2, cex=4)
And looks like this:
Whereas PLOT 2 is based on this script
j <- ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$Death==1]), stat = "count", alpha=0.2, colour="#1C73C2", fill="#ECF0F9")
yaks <- c(0,2,4,6,8,10,12,14,16,18)
j <-
j %+% scale_y_continuous(name = "Deaths", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Mortality rate per 100 person-yrs", breaks=yaks, labels=c("0","5","10","15","20","25","30","35","40","45"))) +
coord_cartesian(ylim=c(0, 18)) +
theme(axis.text.y.right = element_text(color = "red", size = 11), axis.title.y=element_text(size=14,face="bold",margin = margin(t = 0, r = 20, b = 0, l = 0))) +
theme(axis.text.y = element_text(color = "#1C73C2", size = 11), axis.title.y.right=element_text(size=14,face="bold",margin = margin(t = 0, r = 0, b = 0, l = 20)))
j <-
j +
geom_point(mapping = aes(x = 1, y = 3.329993), size=10,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 12.424504), size=10,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 3, y = 17.23519), size=10, alpha=0.7,shape=18, colour="red") +
geom_point(mapping = aes(x = 4, y = 4.549763), size=10, alpha=0.7, shape=18, colour="red") +
annotate("text", x = c(1,2,3,4) , y = c(2.329993, 11.424504, 16.23519,3.549763 ), label = c("8.3","31.1","43.1","11.4"), col="red", fontface=2, cex=4)
j + scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("WHO-I\nn=37","WHO-II\nn=29","WHO-III\nn=19","Unknown\nn=25")) +
theme(axis.text.x = element_text(color = "grey20", size = 11))
And looks like this:
Finally, PLOT 3 is a Forest Plot from metafor
The Forest Plot looks like this:
With the following data
q <- structure(list(study = structure(c(2L, 4L, 7L, 3L, 5L, 1L, 8L,
6L, 9L), .Label = c("WHO-I versus Unknown ", "WHO-I versus WHO-II",
"WHO-I versus WHO-II ", "WHO-I versus WHO-III", "WHO-I versus WHO-III ",
"WHO-II versus Unknown", "WHO-II versus WHO-III", "WHO-II versus WHO-III ",
"WHO-III versus Unknown"), class = "factor"), order = 9:1, x1i = c(4L,
4L, 15L, 9L, 9L, 9L, 15L, 15L, 12L), n1i = c(32L, 32L, 23L, 37L,
37L, 37L, 29L, 29L, 19L), t1i = c(74.7, 74.7, 22.8, 108.1, 108.1,
108.1, 48.3, 48.3, 27.9), x2i = c(15L, 15L, 15L, 15L, 12L, 9L,
12L, 9L, 9L), n2i = c(23L, 16L, 16L, 29L, 19L, 25L, 19L, 25L,
25L), t2i = c(22.8, 4.4, 4.4, 48.3, 27.9, 79.1, 27.9, 79.1, 79.1
), ir1 = c(5.4, 5.4, 65.7, 8.3, 8.3, 8.3, 31.1, 31.1, 43.1),
ir2 = c(65.7, 339.6, 339.6, 31.1, 43.1, 11.4, 43.1, 11.4,
11.4)), class = "data.frame", row.names = c(NA, -9L))
And script
q <- escalc(measure="IRR", x1i=x1i, t1i=t1i, x2i=x2i, t2i=t2i, data=q)
q1 <- rma(yi, vi, data=q, slab=paste(study, sep=", "), method = "REML")
## Forest
forest(q1, xlim=c(-27,8), atransf=exp, showweights = FALSE, psize = 1.2, refline=log(1),
cex=0.95, ylim=c(0.1, 17), font=1, col="white", border="white", order=order(q$order),
ilab=cbind(q$x1i, q$t1i, q$ir1, q$x2i, q$t2i,q$ir2),
ilab.xpos=c(-19.3,-17,-15,-12.3,-10,-8),
rows=c(2:7,11:13),xlab="Rate ratios", mlab="")
# Headlines
text(c(-19,-16.8,-15,-12,-9.8,-8) ,15.7,font=1, cex=0.9, c("Events\n per total", "Person-\nyrs", "IR", "Events\n per total", "Person-\nyrs","IR"))
text(c(-18.75,-18.75,-18.65) ,c(13,12,11),font=1, cex=0.94, c("/ 32", "/ 32", " / 23"))
text(c(-18.75,-18.75,-18.75) ,c(7,6,5),font=1, cex=0.94, c("/ 37", "/ 37", "/ 37"))
text(c(-18.65,-18.65,-18.65) ,c(4,3,2),font=1, cex=0.94, c(" / 29", " / 29", " / 19"))
text(c(-11.65,-11.65,-11.65) ,c(13,12,11),font=1, cex=0.94, c(" /23", " /16", " /16"))
text(c(-11.65,-11.65,-11.75) ,c(7,6,5),font=1, cex=0.94, c(" /29", "/19", " /25"))
text(c(-11.65,-11.75,-11.75) ,c(4,3,2),font=1, cex=0.94, c("/19", " / 25", " / 25"))
text(8 ,15.7,font=1, "Rate ratio [95% CI]", pos=2, cex=1)
text(-27 ,c(14,8),font=2, c("Progression rates","Mortality rates"), pos=4, cex=0.9)
text(-27 ,c(1,10),font=1, c("Cohort: 110 patients included","Cohort: 76 patients included"), pos=4, cex=0.8)

Related

ggsurvplot can't get cumulative events table font size to change

I started with this answer as a base for my ggsurvplot graph, and I'm using their reproducible code for my question as well.
My problem is I can't figure out how to change the font size for the cumulative event table. Based on this documentation, it looks like I should be able to specify cumevents.fontsize = x, but no matter what value I put in for that, the font size does not change. I'm trying to set it to teh same size as the risk.table.fontsize, but you can see that they are different in the graph below.
The graph was computed with this
library(survival)
library(survminer)
library(ggplot2)
fit <- survfit(Surv(p$time.recur.months, p$recurrence) ~ p$simpson.grade, conf.type="log", data=p)
ggsurvplot(
fit,
data = p,
risk.table = TRUE,
risk.table.fontsize = 2,
cumevents = TRUE,
cumevents.fontsize =2,
pval = TRUE,
pval.coord = c(0, 0.25),
conf.int = F,
legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3",
"Simpson Grade 4"),
size=c(0.7,0.7,0.7,0.7),
xlim = c(0,100),
alpha=c(0.7),
break.time.by = 10,
xlab="Time in months",
#ylab="Survival probability",
ggtheme = theme_gray(),
risk.table.y.text.col = T,
risk.table.y.text = TRUE,
ylim=c(0,0.5),
palette="Set1"
)
My Data
p <- structure(list(recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), time.recur.months = c(NA, NA,
NA, NA, NA, NA, 92L, NA, NA, NA, 74L, NA, NA, NA, 2L, 8L, NA,
NA, NA, NA, 58L, NA, NA, NA, NA, NA, 3L, NA, 4L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 39L, NA, NA, NA, NA, 15L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 12L, 56L, 57L, NA, NA, 49L, 17L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9L, NA,
89L, NA, NA, NA, 8L, 6L, 8L, 4L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 60L, NA, NA, 38L, NA, NA, NA, NA, NA, 90L,
NA, 58L, 54L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 53L, NA, NA, 124L, NA, NA,
NA, NA, NA, NA, 7L, NA), simpson.grade = c(3L, 1L, 1L, 2L, 4L,
1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 3L, 2L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 4L, 3L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 4L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 4L, 1L, 1L,
1L, 2L, 1L, 2L, 4L, 4L, 1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 4L,
4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 3L, 1L, 1L,
1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 2L, 4L, 1L, 4L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L)), .Names = c("recurrence",
"time.recur.months", "simpson.grade"), class = "data.frame", row.names
= c(NA,
-176L))
I can't see the documentation for a cumevents.fontsize argument, but a straightforward way to change it is to store the plot and change it directly:
mygg <- ggsurvplot(
fit,
data = p,
risk.table = TRUE,
risk.table.fontsize = 2,
cumevents = TRUE,
cumevents.fontsize =2,
pval = TRUE,
pval.coord = c(0, 0.25),
conf.int = F,
legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3",
"Simpson Grade 4"),
size=c(0.7,0.7,0.7,0.7),
xlim = c(0,100),
alpha=c(0.7),
break.time.by = 10,
xlab="Time in months",
#ylab="Survival probability",
ggtheme = theme_gray(),
risk.table.y.text.col = T,
risk.table.y.text = TRUE,
ylim=c(0,0.5),
palette="Set1"
)
So now you can do:
mygg$cumevents$layers[[1]]$aes_params$size <- 2
mygg
or
mygg$cumevents$layers[[1]]$aes_params$size <- 8
mygg
Here is a suggestion how to get a clean table: See here https://github.com/kassambara/survminer/issues/117
p1 <- ggsurvplot(
fit,
data = p,
risk.table = TRUE,
cumevents = TRUE,
risk.table.title = "No. at Risk",
cumevents.title = "Cumulative No. of Events",
risk.table.height = 0.15, cumevents.height = 0.15,
risk.table.y.text = TRUE,
cumevents.y.text = TRUE,
pval = TRUE,
pval.coord = c(0, 0.25),
conf.int = F,
legend.labs=c("Simpson Grade 1" ,"Simpson Grade 2", "Simpson Grade 3", "Simpson Grade 4"),
size=c(0.7,0.7,0.7,0.7),
xlim = c(0,100),
alpha=c(0.7),
break.time.by = 10,
xlab="Time in months",
ylab="Survival probability",
ggtheme = theme_gray(),
risk.table.y.text.col = T,
# risk.table.y.text = TRUE,
ylim=c(0,0.5),
palette="Set1"
)
p1$table <- p1$table + theme_cleantable()
p1$cumevents <- p1$cumevents + theme_cleantable()
p1

R code of scatter plot for four variables

I tried plotting ASB vs YOI for each Child grouped by Race
I got something like:
library(tidyverse)
Antisocial <- structure(list(Child = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L), ASB = c(1L, 1L, 1L, 0L, 0L, 0L, 5L, 5L, 5L, 2L), Race = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Y92 = c(0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L), Y94 = c(0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L), YOI = c(90L, 92L, 94L, 90L, 92L, 94L, 90L, 92L, 94L, 90L)), row.names = c(NA, 10L), class = "data.frame")
ggplot(data = Antisocial, aes(x = YOI, y = ASB)) +
geom_point( colour = "Black", size = 2) +
geom_line(data = Antisocial, aes(x= Child), size = 1) +
facet_grid(.~ Race)
Plot Image I generated: https://drive.google.com/file/d/1sZVsRFiGC0dIGg0GWhHhNDCaiW2iB-ky/view?usp=sharing
Full dataset- https://drive.google.com/file/d/1UeVTJ1M_eKQDNtvyUHRB77VDpSF1ASli/view?usp=sharing
I want to use 2 charts side by side Race=0, Race= 1 to plot ASB vs YOI for each Child grouped by Race. The line, however, should only connect to dots of the same child. As it is right now, all the dots are connected. Furthermore the scale of YOI should be (90,94).
Can you suggest what change should I do?
Thanks!
Thanks for providing the data. I changed 4 observations to race 0 to have some variation:
library(tidyverse)
Antisocial <- structure(list(Child = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L), ASB = c(1L, 1L, 1L, 0L, 0L, 0L, 5L, 5L, 5L, 2L), Race = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L), Y92 = c(0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L), Y94 = c(0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L), YOI = c(90L, 92L, 94L, 90L, 92L, 94L, 90L, 92L, 94L, 90L)), row.names = c(NA, 10L), class = "data.frame")
ggplot(data = Antisocial, aes(x = YOI, y = ASB, , group = Child)) +
geom_point( colour = "Black", size = 2) +
geom_line()+
facet_grid(.~ Race)
To connect the dots for each child, you need to include group = Child in the code. I think this is what you want? Let me know if this solved your problem :)

How can I synchronize the bar width between two histograms when merging with ggarrange()?

Please, find my data w below.
I have produced these two histograms, Plot A and Plot B, which I subsequently merged using ggarrange()
As you can see, the widths of the bars does not match between the two individual histograms. Its probably because of the 0 events in plot A.
How can I synchronize the width between the bars in plot A and plot B, respectively? Like this:
I used these scripts
# Plot A
io <- ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$response==1]), stat = "count", alpha=0.2, colour="#1C73C2", fill="#ECF0F9") +
scale_y_continuous(name = "Number of progressions", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Progression rate per 100 person-yrs", breaks=yaks, labels=c("0","40","80","120","160","200","240","280","320","360"))) +
scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("\nWHO-I\nn=32","\nWHO-II\nn=23","\nWHO-III\nn=16","\nUnknown\nn=5")) +
coord_cartesian(ylim=c(0, 18)) +
geom_point(mapping = aes(x = 1, y = 0.2677376), size=5, alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 3.2870709), size=5, shape=18,alpha=0.7, colour="red") +
geom_point(mapping = aes(x = 3, y = 16.98), size=5, shape=18,alpha=0.7, colour="red") +
annotate("text", x = c(1,2,3, 4) , y = c(0.2677376+1.3, 3.2870709+1.3, 16.98+1.3, 0.5), label = c("5.4","65.7","339.6", "0 events"), col="red", fontface=2, cex=4)+
theme(axis.text.y.right = element_text(color = "red", size = 11),
axis.title.y=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.y = element_text(color = "#1C73C2", size = 11),
axis.title.y.right=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 15)),
axis.text.x = element_text(color = "grey20", size = 11))
And
# Plot B
yt <- ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$Death==1]), stat = "count", alpha=0.2, colour="#1C73C2", fill="#ECF0F9") +
scale_y_continuous(name = "Number of deaths", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Mortality rate per 100 person-yrs", breaks=yaks, labels=c("0","5","10","15","20","25","30","35","40","45"))) +
scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("\nWHO-I\nn=37","\nWHO-II\nn=29","\nWHO-III\nn=19","\nUnknown\nn=25")) +
coord_cartesian(ylim=c(0, 18)) +
geom_point(mapping = aes(x = 1, y = 3.329993), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 12.424504), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 3, y = 17.23519), size=5, alpha=0.7,shape=18, colour="red") +
geom_point(mapping = aes(x = 4, y = 4.549763), size=5, alpha=0.7, shape=18, colour="red") +
annotate("text", x = c(1,2,3,4) , y = c(3.329993+1.3, 12.424504+1.3, 17.23519+1.3,4.549763+1.3 ), label = c("8.3","31.1","43.1","11.4"), col="red", fontface=2, cex=4) +
theme(axis.text.y.right = element_text(color = "red", size = 11),
axis.title.y=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.y = element_text(color = "#1C73C2", size = 11),
axis.title.y.right=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 15)),
axis.text.x = element_text(color = "grey20", size = 11))
And lastly
ggarrange(io, yt,labels = c("A", "B"), ncol = 1, nrow = 2, heights = 1.2)
My data w
w <- structure(list(WHO = c(1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L,
3L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 4L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 4L, 4L,
4L, 2L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
3L, 4L, 3L, 4L, 3L), response = c(0L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, NA, 1L), Death = c(0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, NA, 1L)), class = "data.frame", row.names = c(NA,
-111L))
You can just create a dummy data frame to obtain a 4th column in your plot A, that you will turn fully transparent :
w2=data.frame(WHO=4,response=1,Death=NA)
and then correct the two first rows of plot A as :
io <- ggplot(data = w[w$response==1,]) +
geom_bar(mapping = aes(x = WHO), stat = "count", alpha=0.2, colour="#1C73C2", fill="#ECF0F9") + geom_bar(data=w2,mapping = aes(x = WHO),alpha=0,stat = "count") +
scale_y_continuous(name = "Number of progressions", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Progression rate per 100 person-yrs", breaks=yaks, labels=c("0","40","80","120","160","200","240","280","320","360"))) +
scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("\nWHO-I\nn=32","\nWHO-II\nn=23","\nWHO-III\nn=16","\nUnknown\nn=5")) +
coord_cartesian(ylim=c(0, 18)) +
geom_point(mapping = aes(x = 1, y = 0.2677376), size=5, alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 3.2870709), size=5, shape=18,alpha=0.7, colour="red") +
geom_point(mapping = aes(x = 3, y = 16.98), size=5, shape=18,alpha=0.7, colour="red") +
annotate("text", x = c(1,2,3, 4) , y = c(0.2677376+1.3, 3.2870709+1.3, 16.98+1.3, 0.5), label = c("5.4","65.7","339.6", "0 events"), col="red", fontface=2, cex=4)+
theme(axis.text.y.right = element_text(color = "red", size = 11),
axis.title.y=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.y = element_text(color = "#1C73C2", size = 11),
axis.title.y.right=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 15)),
axis.text.x = element_text(color = "grey20", size = 11))
finally, just use ggarrange as before :
ggarrange(io, yt,labels = c("A", "B"), ncol = 1, nrow = 2, heights = 1.2)

Graph with two y axes in r

I was searching for a code to create a nice graph with two y axes. However i couldn't find a solution which helped me. (I am not using r often)
I have the time as x variable where i want to plot value1 and value2 on the right and left y achses respectively.
My data are in one dataset, so that i want to graph datasat$time on x axis and data$value1 on y left axis and data$value2 on right y axis. The way to illustrate should be a line with two different colours for each (data$value1 and data$value2). In addition there should be a legend for the lines.
Can someone help me with it?
Please, find my data w below.
Here is a script on how a dobbelt y-axis may be integrated using ggplot
ggplot() +
geom_bar(mapping = aes(x = w$WHO[w$Death==1]),
stat = "count", alpha=0.2, colour="#1C73C2",
fill="#ECF0F9") +
scale_y_continuous(name = "Number of deaths", breaks=yaks, sec.axis = sec_axis(~ . * 1 , name = "Mortality rate per 100 person-yrs", breaks=yaks, labels=c("0","5","10","15","20","25","30","35","40","45"))) +
scale_x_continuous(name="", breaks = c(1,2,3,4), labels =c("\nWHO-I\nn=37","\nWHO-II\nn=29","\nWHO-III\nn=19","\nUnknown\nn=25")) +
coord_cartesian(ylim=c(0, 18)) +
geom_point(mapping = aes(x = 1, y = 3.329993), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 2, y = 12.424504), size=5,alpha=0.7, shape=18, colour="red") +
geom_point(mapping = aes(x = 3, y = 17.23519), size=5, alpha=0.7,shape=18, colour="red") +
geom_point(mapping = aes(x = 4, y = 4.549763), size=5, alpha=0.7, shape=18, colour="red") +
annotate("text", x = c(1,2,3,4) , y = c(3.329993+1.3, 12.424504+1.3, 17.23519+1.3,4.549763+1.3 ), label = c("8.3","31.1","43.1","11.4"), col="red", fontface=2, cex=4) +
theme(axis.text.y.right = element_text(color = "red", size = 11),
axis.title.y=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.y = element_text(color = "#1C73C2", size = 11),
axis.title.y.right=element_text(color="darkgrey", size=11,face="bold", margin = margin(t = 0, r = 0, b = 0, l = 15)),
axis.text.x = element_text(color = "grey20", size = 11))
My data w
w <- structure(list(WHO = c(1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L,
3L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 4L, 4L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 4L, 4L,
4L, 2L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 3L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
3L, 4L, 3L, 4L, 3L), response = c(0L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, NA, 1L), Death = c(0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, NA, 1L)), class = "data.frame", row.names = c(NA,
-111L))

R: Recoding multiple dummy variables into a single variable and replacing the corresponding dummy value with the variable name

I have a dataset with 14 mutually exclusive categories of call type all coded as dummy variables. Here is a small sample:
dput(df)
structure(list(MON1_12 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), WEEK1_53 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AGENT_ID = structure(c(3L,
4L, 7L, 8L, 1L, 6L, 5L, 9L, 2L, 10L), .Label = c("A129", "A360",
"A407", "B891", "D197", "L145", "L722", "O518", "T443", "W764"
), class = "factor"), CallsHandled = c(1L, 4L, 2L, 14L, 1L, 2L,
5L, 1L, 1L, 3L), CONTENT = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), CLAIMS = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
CREDIT_CARD = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
DEDUCT_BILL = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L),
HCREFORM = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("MON1_12",
"WEEK1_53", "AGENT_ID", "CallsHandled", "CONTENT", "CLAIMS",
"CREDIT_CARD", "DEDUCT_BILL", "HCREFORM"), class = "data.frame", row.names = c(NA,
-10L))
I want to combine each of the dummy variables into a single new variable called "QUEUE" that replaces the value of "1" with the name of the dummy variable its corresponding dummy variable. Here is an example of what this would look like:
dput(df2)
structure(list(MON1_12 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), WEEK1_53 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AGENT_ID = structure(c(3L,
4L, 7L, 8L, 1L, 6L, 5L, 9L, 2L, 10L), .Label = c("A129", "A360",
"A407", "B891", "D197", "L145", "L722", "O518", "T443", "W764"
), class = "factor"), CallsHandled = c(1L, 4L, 2L, 14L, 1L, 2L,
5L, 1L, 1L, 3L), QUEUE = structure(c(1L, 4L, 2L, 4L, 1L, 3L,
3L, 5L, 5L, 4L), .Label = c("CLAIMS", "CONTENT", "CREDIT_CARD",
"DEDUCT_BILL", "HCREFORM"), class = "factor")), .Names = c("MON1_12",
"WEEK1_53", "AGENT_ID", "CallsHandled", "QUEUE"), class = "data.frame", row.names = c(NA,
-10L))
Edit in response to having question marked down: This is what I had tried this afternoon on recommendation with a slightly different sample dataframe:
df$Queue <- as.factor(df$CONTENT + df$CLAIMS*2 + df$CREDIT_CARD*3 + df$DEDUCT_BILL*4 + df$HCREFORM*5)
levels(df$Queue) <- c("CONTENT", "CLAIMS", "CREDIT_CARD","DEDUCT_BILL","HCREFORM")
View(df)
But I received a column of NA's in the Queue column. So, I recreated another sample dataset here. This dataframe is adequately representative of what I'll receive in reality, except I'll have about 40 variables and 2 million rows. When I run what I tried above on "df" above I get the following incorrect result:
dput(df)
structure(list(MON1_12 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), WEEK1_53 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AGENT_ID = structure(c(3L,
4L, 7L, 8L, 1L, 6L, 5L, 9L, 2L, 10L), .Label = c("A129", "A360",
"A407", "B891", "D197", "L145", "L722", "O518", "T443", "W764"
), class = "factor"), CallsHandled = c(1L, 4L, 2L, 14L, 1L, 2L,
5L, 1L, 1L, 3L), CONTENT = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), CLAIMS = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
CREDIT_CARD = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
DEDUCT_BILL = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L),
HCREFORM = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Queue = structure(c(2L,
1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("CONTENT",
"CLAIMS", "CREDIT_CARD", "DEDUCT_BILL", "HCREFORM"), class = "factor")), .Names = c("MON1_12",
"WEEK1_53", "AGENT_ID", "CallsHandled", "CONTENT", "CLAIMS",
"CREDIT_CARD", "DEDUCT_BILL", "HCREFORM", "Queue"), row.names = c(NA,
-10L), class = "data.frame")
I also tried:
df3 <- cbind(df[1:4], QUEUE = apply(df[5:9], 1, function(N) names(N)[as.logical(N)]))
but received the following error: "Error in data.frame("CLAIMS", character(0), character(0), "DEDUCT_BILL", :
arguments imply differing number of rows: 1, 0:
You could use max.col to get the column index that have a value of '1' in each row for columns 5 to 9. (The 'df' example is not correct as most of the rows were all 0s. The corrected one is below).
df$QUEUE <- names(df)[-c(1:4)][max.col(df[-c(1:4)])]
Or you can do
df$QUEUE <- names(df)[-(1:4)][(as.matrix(df[-(1:4)]) %*%
seq_along(df[-(1:4)]))[,1]]
Update
Based on the edit dataset 'df', some rows are all '0's for the columns 5:9, and in the expected result, it is showed that 'QUEUE' as 'CONTENT'. In that case, we can first modify the 'CONTENT' column to change the values where rows are all 0's and then apply either of the code above
df$CONTENT[!rowSums(df[5:9])] <- 1
df$QUEUE1 <- names(df)[5:9][max.col(df[5:9])]
df$QUEUE1
#[1] "CLAIMS" "CONTENT" "CONTENT" "DEDUCT_BILL" "CONTENT"
#[6] "CONTENT" "CONTENT" "CONTENT" "CONTENT" "CONTENT"
data
df <- structure(list(MON1_12 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), WEEK1_53 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
AGENT_ID = structure(c(3L,
4L, 7L, 8L, 1L, 6L, 5L, 9L, 2L, 10L), .Label = c("A129", "A360",
"A407", "B891", "D197", "L145", "L722", "O518", "T443", "W764"
), class = "factor"), CallsHandled = c(1L, 4L, 2L, 14L, 1L, 2L,
5L, 1L, 1L, 3L), CONTENT = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), CLAIMS = c(1,
0, 0, 0, 1, 0, 0, 0, 0, 0), CREDIT_CARD = c(0, 0, 0, 0, 0, 1,
1, 0, 0, 0), DEDUCT_BILL = c(0, 1, 0, 1, 0, 0, 0, 0, 0, 1),
HCREFORM = c(0,
0, 0, 0, 0, 0, 0, 1, 1, 0)), .Names = c("MON1_12", "WEEK1_53",
"AGENT_ID", "CallsHandled", "CONTENT", "CLAIMS", "CREDIT_CARD",
"DEDUCT_BILL", "HCREFORM"), row.names = c(NA, -10L), class = "data.frame")
This should produce the desired result:
df2 <- cbind(df[1:4], QUEUE = apply(df[5:9], 1, function(N) names(N)[as.logical(N)]))
provided that only one and exactly one of the dummy variables is 1 in any of the rows (which is not true in your original sample of df).
Explanation: df[1:4] selects the columns one through four to be preserved in the output. It is then column bound to QUEUE using cbind function. QUEUE is obtained by iterating through the dummy variables (columns five through nine), row-wise over the data set df and selecting the column-name that contains the value one.

Resources