Related
I am trying to reproduce the plot from this question, but code is deprecated and I cant seem to figure out why it always gives the error.
Error: Discrete value supplied to continuous scale.
I thought I had omitted each row for being discrete to figure uot what was going on, but anyway I do it it alyways breaks because of that. There are some minor errors due to axis.ticks.margin and panel.margin as well as vjust but I don think they are the main issue. Although not 100%.
Find the reproducible dataset here:
groupData <- dput(structure(list(ID = 1:12, Group = c("Renal Failure", "Renal Failure",
"Diabetes", "Diabetes", "PA Disease", "PA Disease", "CV Disease",
"CV Disease", "Sex", "Sex", "Age", "Age"), Subgroup = c("No",
"Yes", "No", "Yes", "No", "Yes", "No", "Yes", "Female", "Male",
">70 yr", "<70 yr"), NoP = c(4594L, 66L, 2523L, 2228L, 4366L,
385L, 4296L, 456L, 908L, 3843L, 1935L, 2815L), P_S = c(0.2, 0.37,
0.84, 0.06, 0.37, 0.33, 0.18, 0.69, 0.21, 0.47, 0.17, 0.77),
P_G = c(0.51, 0.51, 0.13, 0.13, 0.54, 0.54, 0.41, 0.41, 0.46,
0.46, 0.46, 0.46)), class = "data.frame", row.names = c(NA, -12L)))
Code
## REQUIRED PACKAGES
require(grid)
require(ggplot2)
require(plyr)
############################################
### CUSTOMIZE APPEARANCE WITH THESE ####
############################################
blankRows<-2 # blank rows under boxplot
titleSize<-4
dataSize<-4
boxColor<-"pink"
############################################
############################################
## BASIC THEMES (SO TO PLOT BLANK GRID)
theme_grid <- theme(
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length = unit(0.0001, "mm"),
axis.ticks.margin = unit(c(0,0,0,0), "lines"),
legend.position = "none",
panel.background = element_rect(fill = "transparent"),
panel.border = element_blank(),
panel.grid.major = element_line(colour="grey"),
panel.grid.minor = element_line(colour="grey"),
panel.margin = unit(c(-0.1,-0.1,-0.1,-0.1), "mm"),
plot.margin = unit(c(5,0,5,0.01), "mm")
)
theme_bare <- theme_grid +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
## LOAD GROUP DATA AND P values from csv file
groupData
## SYNTHESIZE SOME PLOT DATA - you can load csv instead
## EXPECTS 2 columns - integer for 'ID' matching groupdatacsv
## AND 'HR' Hazard Rate
hazardData<-expand.grid(ID=1:nrow(groupData),HR=1:6)
hazardData$HR<-1.3-runif(nrow(hazardData))*0.7
hazardData<-rbind(hazardData,ddply(groupData,.(Group),summarize,ID=max(ID)+0.1,HR=NA)[,2:3])
hazardData<-rbind(hazardData,data.frame(ID=c(0,-1:(-2-blankRows),max(groupData$ID)+1,max(groupData$ID)+2),HR=NA))
## Make the min/max mean labels
hrlabels<-ddply(hazardData[!is.na(hazardData$HR),],.(ID),summarize,lab=paste(round(mean(HR),2)," (",round(min(HR),2),"-",round(max(HR),2),")",sep=""))
## Points to plot on the log scale
scaledata<-data.frame(ID=0,HR=c(0.2,0.6,0.8,1.2,1.8))
## Pull out the Groups & P values
group_p<-ddply(groupData,.(Group),summarize,P=mean(P_G),y=max(ID)+0.1)
## identify the rows to be highlighted, and
## build a function to add the layers
hl_rows<-data.frame(ID=(1:floor(length(unique(hazardData$ID[which(hazardData$ID>0)]))/2))*2,col="lightgrey")
hl_rows$ID<-hl_rows$ID+blankRows+1
hl_rect<-function(col="white",alpha=0.5){
rectGrob( x = 0, y = 0, width = 1, height = 1, just = c("left","bottom"), gp=gpar(alpha=alpha, fill=col))
}
## DATA FOR TEXT LABELS
RtLabels<-data.frame(x=c(rep(length(unique(hazardData$ID))-0.2,times=3)),
y=c(0.6,6,10),
lab=c("Hazard Ratio\n(95% CI)","P Value","P Value for\nInteraction"))
LfLabels<-data.frame(x=c(rep(length(unique(hazardData$ID))-0.2,times=2)),
y=c(0.5,4),
lab=c("Subgroup","No. of\nPatients"))
LegendLabels<-data.frame(x=c(rep(1,times=2)),
y=c(0.5,1.8),
lab=c("Off-Pump CABG Better","On-Pump CABG Better"))
## BASIC PLOT
haz<-ggplot(hazardData,aes(factor(ID),HR))+ labs(x=NULL, y=NULL)
## RIGHT PANEL WITH LOG SCALE
rightPanel<-haz +
apply(hl_rows,1,function(x)annotation_custom(hl_rect(x["col"],alpha=0.4),as.numeric(x["ID"])-0.5,as.numeric(x["ID"])+0.5,-20,20)) +
geom_segment(aes(x = 2, y = 1, xend = 1.5, yend = 1)) +
geom_hline(aes(yintercept=1),linetype=2, linewidth=0.5)+
geom_boxplot(fill=boxColor,size=0.5, alpha=0.8)+
scale_y_log10() + coord_flip() +
geom_text(data=scaledata,aes(3,HR,label=HR), vjust=0.5, size=dataSize) +
geom_text(data=RtLabels,aes(x,y,label=lab, fontface="bold"), vjust=0.5, size=titleSize) +
geom_text(data=hrlabels,aes(factor(ID),4,label=lab),vjust=0.5, hjust=1, size=dataSize) +
geom_text(data=group_p,aes(factor(y),11,label=P, fontface="bold"),vjust=0.5, hjust=1, size=dataSize) +
geom_text(data=groupData,aes(factor(ID),6.5,label=P_S),vjust=0.5, hjust=1, size=dataSize) +
geom_text(data=LegendLabels,aes(x,y,label=lab, fontface="bold"),hjust=0.5, vjust=1, size=titleSize) +
geom_point(data=scaledata,aes(2.5,HR),shape=3,size=3) +
geom_point(aes(2,12),shape=3,alpha=0,vjust=0) +
geom_segment(aes(x = 2.5, y = 0, xend = 2.5, yend = 13)) +
geom_segment(aes(x = 2, y = 1, xend = 2, yend = 1.8),arrow=arrow(),linetype=1,size=1) +
geom_segment(aes(x = 2, y = 1, xend = 2, yend = 0.2),arrow=arrow(),linetype=1,size=1) +
theme_bare
## LEFT PANEL WITH NORMAL SCALE
leftPanel<-haz +
apply(hl_rows,1,function(x)annotation_custom(hl_rect(x["col"],alpha=0.4),as.numeric(x["ID"])-0.5,as.numeric(x["ID"])+0.5,-20,20)) +
coord_flip(ylim=c(0,5.5)) +
geom_point(aes(x=factor(ID),y=1),shape=3,alpha=0,vjust=0) +
geom_text(data=group_p,aes(factor(y),0.5,label=Group, fontface="bold"),vjust=0.5, hjust=0, size=dataSize) +
geom_text(data=groupData,aes(factor(ID),1,label=Subgroup),vjust=0.5, hjust=0, size=dataSize) +
geom_text(data=groupData,aes(factor(ID),5,label=NoP),vjust=0.5, hjust=1, size=dataSize) +
geom_text(data=LfLabels,aes(x,y,label=lab, fontface="bold"), vjust=0.5, hjust=0, size=4, size=titleSize) +
geom_segment(aes(x = 2.5, y = 0, xend = 2.5, yend = 5.5)) +
theme_bare
## PLOT THEM BOTH IN A GRID SO THEY MATCH UP
grid.arrange(leftPanel,rightPanel, widths=c(1,3), ncol=2, nrow=1)
The issue is that in your right panel plot you first map a continuous value on x via geom_segment and afterwards a discrete value via geom_boxplot. To fix that you could add a scale_x_discrete at the start of your code. Additionally I fixed the minor issues related to vjust in geom_point and a duplicated size argument in one of your geom_text layers.
## BASIC PLOT
haz <- ggplot(hazardData, aes(factor(ID), HR)) +
labs(x = NULL, y = NULL)
## RIGHT PANEL WITH LOG SCALE
rightPanel <- haz +
### Init the discrete x scale
scale_x_discrete() +
###
apply(hl_rows, 1, function(x) annotation_custom(hl_rect(x["col"], alpha = 0.4), as.numeric(x["ID"]) - 0.5, as.numeric(x["ID"]) + 0.5, -20, 20)) +
geom_segment(aes(x = 2, y = 1, xend = 1.5, yend = 1)) +
geom_hline(aes(yintercept = 1), linetype = 2, linewidth = 0.5) +
geom_boxplot(fill = boxColor, size = 0.5, alpha = 0.8) +
scale_y_log10() +
coord_flip() +
geom_text(data = scaledata, aes(3, HR, label = HR), vjust = 0.5, size = dataSize) +
geom_text(data = RtLabels, aes(x, y, label = lab, fontface = "bold"), vjust = 0.5, size = titleSize) +
geom_text(data = hrlabels, aes(factor(ID), 4, label = lab), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = group_p, aes(factor(y), 11, label = P, fontface = "bold"), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = groupData, aes(factor(ID), 6.5, label = P_S), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = LegendLabels, aes(x, y, label = lab, fontface = "bold"), hjust = 0.5, vjust = 1, size = titleSize) +
geom_point(data = scaledata, aes(2.5, HR), shape = 3, size = 3) +
geom_point(aes(2, 12), shape = 3, alpha = 0) +
geom_segment(aes(x = 2.5, y = 0, xend = 2.5, yend = 13)) +
geom_segment(aes(x = 2, y = 1, xend = 2, yend = 1.8), arrow = arrow(), linetype = 1, size = 1) +
geom_segment(aes(x = 2, y = 1, xend = 2, yend = 0.2), arrow = arrow(), linetype = 1, size = 1) +
theme_bare
## LEFT PANEL WITH NORMAL SCALE
leftPanel <- haz +
apply(hl_rows, 1, function(x) annotation_custom(hl_rect(x["col"], alpha = 0.4), as.numeric(x["ID"]) - 0.5, as.numeric(x["ID"]) + 0.5, -20, 20)) +
coord_flip(ylim = c(0, 5.5)) +
geom_point(aes(x = factor(ID), y = 1), shape = 3, alpha = 0) +
geom_text(data = group_p, aes(factor(y), 0.5, label = Group, fontface = "bold"), vjust = 0.5, hjust = 0, size = dataSize) +
geom_text(data = groupData, aes(factor(ID), 1, label = Subgroup), vjust = 0.5, hjust = 0, size = dataSize) +
geom_text(data = groupData, aes(factor(ID), 5, label = NoP), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = LfLabels, aes(x, y, label = lab, fontface = "bold"), vjust = 0.5, hjust = 0, size = titleSize) +
geom_segment(aes(x = 2.5, y = 0, xend = 2.5, yend = 5.5)) +
theme_bare
## PLOT THEM BOTH IN A GRID SO THEY MATCH UP
grid.arrange(leftPanel, rightPanel, widths = c(1, 3), ncol = 2, nrow = 1)
EDIT To get rid of the gap in your horizontal line and/or to extend the lines on the left and the right set yend=Inf and/or y=-Inf in the geom_segmentwhich draws the line.
library(gridExtra)
library(ggplot2)
## BASIC PLOT
haz <- ggplot(hazardData, aes(factor(ID), HR)) +
labs(x = NULL, y = NULL)
## RIGHT PANEL WITH LOG SCALE
rightPanel <- haz +
### Init the discrete x scale
scale_x_discrete() +
###
apply(hl_rows, 1, function(x) annotation_custom(hl_rect(x["col"], alpha = 0.4), as.numeric(x["ID"]) - 0.5, as.numeric(x["ID"]) + 0.5, -20, 20)) +
geom_segment(aes(x = 2, y = 1, xend = 1.5, yend = 1)) +
geom_hline(aes(yintercept = 1), linetype = 2, linewidth = 0.5) +
geom_boxplot(fill = boxColor, size = 0.5, alpha = 0.8) +
scale_y_log10() +
coord_flip() +
geom_text(data = scaledata, aes(3, HR, label = HR), vjust = 0.5, size = dataSize) +
geom_text(data = RtLabels, aes(x, y, label = lab, fontface = "bold"), vjust = 0.5, size = titleSize) +
geom_text(data = hrlabels, aes(factor(ID), 4, label = lab), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = group_p, aes(factor(y), 11, label = P, fontface = "bold"), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = groupData, aes(factor(ID), 6.5, label = P_S), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = LegendLabels, aes(x, y, label = lab, fontface = "bold"), hjust = 0.5, vjust = 1, size = titleSize) +
geom_point(data = scaledata, aes(2.5, HR), shape = 3, size = 3) +
geom_point(aes(2, 12), shape = 3, alpha = 0) +
geom_segment(aes(x = 2.5, y = 0, xend = 2.5, yend = Inf)) +
geom_segment(aes(x = 2, y = 1, xend = 2, yend = 1.8), arrow = arrow(), linetype = 1, size = 1) +
geom_segment(aes(x = 2, y = 1, xend = 2, yend = 0.2), arrow = arrow(), linetype = 1, size = 1) +
theme_bare
## LEFT PANEL WITH NORMAL SCALE
leftPanel <- haz +
apply(hl_rows, 1, function(x) annotation_custom(hl_rect(x["col"], alpha = 0.4), as.numeric(x["ID"]) - 0.5, as.numeric(x["ID"]) + 0.5, -20, 20)) +
coord_flip(ylim = c(0, 5.5)) +
geom_point(aes(x = factor(ID), y = 1), shape = 3, alpha = 0) +
geom_text(data = group_p, aes(factor(y), 0.5, label = Group, fontface = "bold"), vjust = 0.5, hjust = 0, size = dataSize) +
geom_text(data = groupData, aes(factor(ID), 1, label = Subgroup), vjust = 0.5, hjust = 0, size = dataSize) +
geom_text(data = groupData, aes(factor(ID), 5, label = NoP), vjust = 0.5, hjust = 1, size = dataSize) +
geom_text(data = LfLabels, aes(x, y, label = lab, fontface = "bold"), vjust = 0.5, hjust = 0, size = titleSize) +
geom_segment(aes(x = 2.5, y = -Inf, xend = 2.5, yend = Inf)) +
theme_bare
## PLOT THEM BOTH IN A GRID SO THEY MATCH UP
grid.arrange(leftPanel, rightPanel, widths = c(1, 3), ncol = 2, nrow = 1)
How can I add an arrow next to the x and y axis titles, such as the picture below?
quadrant <- ggplot(quadrants, aes(x=Ampolla, y=Energia, label=Branca_percIPI))+
coord_fixed() +
coord_cartesian(clip = 'off') +
scale_x_continuous(expand = c(0, 0), limits = c(-5, 50)) +
scale_y_continuous(expand = c(0, 0), limits = c(0,50))+
geom_vline(xintercept = 11.3, color = "grey50", size=1.2) +
geom_hline(yintercept = 17.7, color = "grey50", size=1.2) +
geom_text_repel(size = 7,
colour = "#2896BA",
min.segment.length = Inf, hjust="right", nudge_x=0.9, nudge_y=1.5, force=1,
arrow = arrow(length=unit(0.5,"cm"), ends="first"), lineheight = 1)+
geom_point(colour="#2896BA", size=3.5)+
labs(title = "Incidència dels colls d'ampolla i dependència energètica total",
subtitle="(% de variació interanual de l'IPI*, % d'empreses afectades pels colls d'ampolla i % de dependència energètica)",
x = "Colls d'ampolla (% d'empreses afectades)**",
y = "Dependència energètica total (%)")+
annotation_custom(segmentsGrob(c(0.3, -0.1), c(-0.085, 0.28),
c(1, -0.1), c(-0.085, 0.28), gp = gpar(lwd = 2),
arrow = arrow(length = unit(2.5, 'mm'))))
You can turn clipping off inside coord_cartesian and add custom annotations for the arrows using segmentsGrob from the grid package inside annotation_custom:
library(ggplot2)
library(grid)
ggplot(iris, aes(Sepal.Length, Petal.Width)) +
geom_point(color = '#2896ba') +
geom_vline(xintercept = 5.5, color = 'gray50') +
geom_hline(yintercept = 0.8, color = 'gray50') +
coord_cartesian(clip = 'off') +
theme_minimal(base_size = 16) +
theme(axis.title = element_text(hjust = 0),
plot.caption = element_text(hjust = 0),
panel.grid = element_blank()) +
annotation_custom(segmentsGrob(c(0.3, -0.1), c(-0.085, 0.28),
c(1, -0.1), c(-0.085, 1), gp = gpar(lwd = 2),
arrow = arrow(length = unit(2.5, 'mm')))) +
labs(caption = paste0("Here is a very long caption to demonstrate that ",
"it is possible\nto add a very long caption ",
'underneath the x axis, thereby\n',
'emulating the caption in the plot in the question.'),
title = 'Another iris plot', subtitle = 'Just in case you need one')
I'm trying to avoid the bottom annotation being clipped. It's the descender on the "p" that gets chopped off. I've used the "inward" option on vjust.
df <- data.frame(x=c(as.Date("2020-01-01"),as.Date("2022-01-01"))
,y=c(0,1))
df
ggplot(df) +
geom_point(mapping=aes(x=x,y=y)) +
annotate("text",x=mean(df$x),y=-Inf,label="Clipped",hjust=0.5,vjust="inward",size=12,colour="red") +
annotate("text",x=mean(df$x),y=Inf,label="Not Clipped",hjust=0.5,vjust="inward",size=12,colour="blue")
A possible approach would be to use the min and max y values:
library(tidyverse)
df <- data.frame(
x = c(as.Date("2020-01-01"), as.Date("2022-01-01")),
y = c(0, 1)
)
ggplot(df) +
geom_point(aes(x, y)) +
annotate("text", x = mean(df$x), y = min(df$y), label = "Clipped", hjust = 0.5, vjust = "inward", size = 12, colour = "red") +
annotate("text", x = mean(df$x), y = max(df$y), label = "Not Clipped", hjust = 0.5, vjust = "inward", size = 12, colour = "blue")
Created on 2022-07-02 by the reprex package (v2.0.1)
Interesting. Looks like this issue is related to what is chosen as the base line to align the text labels. This could be seen clearly when switching to geom_label where we see that for the clipped label the base line chosen for the alignment is not the end of the "p". Hence the "p"s get clipped off:
ggplot(df) +
geom_point(mapping = aes(x = x, y = y)) +
annotate("label", x = mean(df$x), y = -Inf, label = "Clipped",
hjust = 0.5, vjust = "inward", size = 12, colour = "red", label.padding = unit(0, "lines")) +
annotate("label", x = mean(df$x), y = Inf, label = "Not Clipped",
hjust = 0.5, vjust = "inward", size = 12, colour = "blue", label.padding = unit(0, "lines"))
One possible fix would be to switch to ggtext::GeomRichtext:
library(ggplot2)
library(ggtext)
ggplot(df) +
geom_point(mapping = aes(x = x, y = y)) +
annotate(ggtext::GeomRichtext, x = mean(df$x), y = -Inf, label = "Clipped",
hjust = 0.5, vjust = "inward", size = 12, colour = "red",
label.size = 0, fill = NA, label.padding = unit(0, "lines")) +
annotate(ggtext::GeomRichtext, x = mean(df$x), y = Inf, label = "Not Clipped",
hjust = 0.5, vjust = "inward", size = 12, colour = "blue",
label.size = 0, fill = NA, label.padding = unit(0, "lines"))
If you don't want it to be clipped on the same position, you can use coord_cartesian(clip = "off"):
df <- data.frame(x=c(as.Date("2020-01-01"),as.Date("2022-01-01"))
,y=c(0,1))
library(ggplot2)
ggplot(df) +
geom_point(mapping=aes(x=x,y=y)) +
annotate("text",x=mean(df$x),y=-Inf,label="Clipped",hjust=0.5,vjust="inward",size=12,colour="red") +
annotate("text",x=mean(df$x),y=Inf,label="Not Clipped",hjust=0.5,vjust="inward",size=12,colour="blue") +
coord_cartesian(clip = 'off')
Created on 2022-07-02 by the reprex package (v2.0.1)
I have This ggplot2 I made up to satisfy these conditions:
Remove default background that is hash colour to be plain.
Make (a) to be the plot title located within the plot area that is not close to the line (automatically).
Make $\phi = .8$ to be automatically at the head of the line (still within the plot area).
And sd = 1 to be automatically at the tail of the line.
The four(4) Borderlines to be present.
Gridlines to be a grey colour.
.
## simulate ARIMA(1, 0, 0)
set.seed(799837)
ts <- arima.sim(n = 10, model = list(ar = 0.95, order = c(1, 0, 0)), sd = 10)
gplot <- ggplot(NULL, aes(y = ts, x = seq_along(ts))) +
geom_line(color = "#F2AA4CFF") +
geom_point(color = "#101820FF") +
annotate("text", x = mean(seq_along(ts)), y = max(ts) * 1.1, label = "(a)")+
annotate("text", x = min(seq_along(ts)), y = max(ts) * 1.1, label = 'paste(~phi~"=.8")', parse = TRUE )+
annotate("text", x= max(seq_along(ts)), y = ts[[max(seq_along(ts))]] * 1.1, label = "sd=1") +
xlab('Time') +
ylab('Series') +
theme_bw() +
theme(axis.text = element_text(size = 40, angle = 0, vjust = 0.0, hjust = 0.0), #y-axis label size
axis.title = element_text(size = 40), #x-axis label size
axis.title.x = element_text(angle = 0, hjust = 0.5, vjust = 0.5, size = 40), # x-axis title
axis.title.y = element_text(angle = 90, hjust = 0.5, vjust = 0.5, size = 40), # y-axis title
plot.title = element_text(size = 40, margin = margin(t = 25, b = -20, l = 0, r = 0)),
panel.background = element_blank()) +
scale_x_continuous(breaks = seq(1,10,2)) +
scale_y_continuous(expand = c(0.0, 0.00))
gplot
I want the font of the plot title to increase. As you can see that despite setting the font of the plot title to 40 the font title refuse to increase. This question is a follow-up question from Remove Default Background Color and Make Title in Plot Zone
Daniel
If (a) is the 'title' it's not really the title, it's an annotation.
So to change it's size do it when you add the annotation.
annotate("text", x = mean(seq_along(ts)), y = max(ts) * 1.5, label = "(a)", size = 40)
You might also want to resize the other annotations.
annotate("text", x = mean(seq_along(ts)), y = max(ts) * 1.5, label = "(a)", size = 40) +
annotate("text", x = min(seq_along(ts)), y = max(ts) * 1.5, label = 'paste(~phi~"=.8")', parse = TRUE, size = 10)+
annotate("text", x= max(seq_along(ts)), y = ts[[max(seq_along(ts))]] * 1.5, label = "sd=1", size = 10)
What is the function for generating data for plotting an exponential curve between two points? Here's a logarithmically spaced sequence. I want to create more of a hockey stick between the start and end point, and the real end goal is the vector of values not the plot.
My use case is that I have a parameter for a plotting function that needs to ramp up slowly between the given values as I try to plot more data. This log sequence is better than a linear sequence, but it still rises too rapidly. I need to keep the values lower and then increase exponentially.
library(emdbook)
plot(lseq(.08, .25, 10000))
Update
Here is the full challenge for context. I'm plotting every 400th index value of s. The geom_dotplot in the final plot, p_diff, is wacky and needs certain binwidth values to correctly size the plot. I tried creating a log sequence called binsize and passing it to the parameter. It looks fine at low values of s, but increases to 0.25 too quickly (0.25 works for the final version with 10000 dots).
library(tidyverse)
library(ggtext)
library(patchwork)
library(truncnorm)
library(ggtext)
library(emdbook)
# simulate hypothetical population at control group mean/sd
set.seed(1)
pop <- data.frame(bdi3 = rtruncnorm(10000, a=0, b=63, mean=24.5, sd=10.7),
id = seq(1:10000))
# create plots
diff <- data.frame(NULL)
binsize = lseq(0.08695510, .25, 10000)
for (s in 1:10000) {
set.seed(s)
samp <-
pop %>%
sample_n(332, replace = FALSE)
ctr <-
samp %>%
sample_n(166, replace = FALSE) %>%
mutate(trt = 0)
trt <-
samp %>%
left_join(dplyr::select(ctr, id, trt), by="id") %>%
mutate(trt = ifelse(is.na(trt), 1, trt)) %>%
filter(trt==1)
diff[s,1] <- s
diff[s,2] <- (mean(trt$bdi3)-mean(ctr$bdi3))
names(diff) <- c("id", "diff")
dat <-
ctr %>%
bind_rows(trt)
if (s %in% seq(1, 10000, by=400)) {
# population
p_pop <-
pop %>%
left_join(dplyr::select(dat, id, trt), by="id") %>%
# mutate(trt = ifelse(is.na(trt), 3, trt),
# trt = factor(trt)) %>%
mutate(selected = ifelse(!is.na(trt), 1, 0),
selected = factor(selected)) %>%
ggplot(., aes(x=bdi3, fill=selected, group=id, alpha=selected)) +
geom_dotplot(method = 'dotdensity', binwidth = 0.25, dotsize = 1,
color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_fill_manual(values=c("grey", "#e69138")) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_text(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(pop$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(pop$bdi3)+1, y = 25,
label = paste0("Population mean = ",
format(round(mean(pop$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = 20,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("Imaginary population of 10,000 patients who meet study criteria",
subtitle="<span style='color:#e69138'>**Orange**</span> dots represent 332 selected patients")
p_samp <-
ggplot(dat, aes(x=bdi3)) + # group=id, fill=factor(trt)
geom_dotplot(method = 'dotdensity', binwidth = 1.2,
fill="#e69138", alpha=.8, color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up", stroke=1) +
#scale_fill_manual(values=c("#f7f265", "#1f9ac9")) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(dat$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(dat$bdi3)+2, y = 1,
label = paste0("Sample mean = ",
format(round(mean(dat$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("One possible sample of these patients (N=332)",
subtitle="Each dot is a patient sampled from the population who gets randomly assigned to a study arm") +
annotate("text", x = 50, y = .3,
label = "randomize to study arms",
size = 10, color="#696865") +
geom_curve(aes(x = 35, y = .6, xend = 50, yend = .35),
color = "#696865", arrow = arrow(type = "open",
length = unit(0.15, "inches")),
curvature = -.5, angle = 100, ncp =15)
p_ctr <-
ggplot(ctr, aes(x=bdi3)) +
geom_dotplot(method = 'dotdensity', binwidth = 1.6,
color="white", fill="#f7f265", alpha=1,
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(pop$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(ctr$bdi3)+2, y = 1,
label = paste0("Control mean = ",
format(round(mean(ctr$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("50% patients randomly assigned<br>to the <span style='color:#f7f265'>**control**</span> group",
subtitle="166 of the <span style='color:#e69138'>**orange**</span> dots turn <span style='color:#f7f265'>**yellow**</span>")
p_trt <-
ggplot(trt, aes(x=bdi3)) +
geom_dotplot(method = 'dotdensity', binwidth = 1.6,
color="white", fill="#1f9ac9", alpha=1,
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(trt$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(trt$bdi3)+2, y = 1,
label = paste0("Treatment mean = ",
format(round(trt$bdi3, 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("50% patients randomly assigned<br>to the <span style='color:#1f9ac9'>**treatment**</span> group",
subtitle="166 of the <span style='color:#e69138'>**orange**</span> dots turn <span style='color:#1f9ac9'>**blue**</span>")
p_diff <-
diff %>%
mutate(color=ifelse(diff < -2.3 | diff > 2.3, 1, 0)) %>%
mutate(color=factor(color)) %>%
ggplot(., aes(x=diff, fill=color, group=id)) +
geom_dotplot(method = 'dotdensity', binwidth = binsize[s], dotsize = 1,
color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_fill_manual(values=c("grey", "red")) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(breaks=c(-5:5), limits=c(-5, 5)) +
xlab("\nAverage Treatment Effect (Treatment Mean - Control Mean)") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_text(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27)) +
geom_vline(xintercept = 0, linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = 0.2, y = 25, label = "No effect",
hjust = 0, color = "#696865", size=10) +
ggtitle("Simulation based null distribution",
subtitle="Plausible estimates of the treatment effect if the hypothesis of no effect is true") +
geom_vline(xintercept = 2.3, linetype="dotted",
color = "red", size=1) +
geom_vline(xintercept = -2.3, linetype="dotted",
color = "red", size=1) +
annotate("text", x = 2.5, y = 25, label = "Reject null",
hjust = 0, color = "red", size=10) +
annotate("text", x = -2.5, y = 25, label = "Reject null",
hjust = 1, color = "red", size=10) +
annotate("text", x = -5, y = 20,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10)
p_all <- p_pop / p_samp / (p_trt + p_ctr) / p_diff +
plot_layout(heights = c(2, 2, 1, 2))
ggsave(paste0("animate/", s, ".png"),
height = 40, width = 18.5, units = "in",
dpi = 300)
}
}
The second plot to generate, s==401, looks fine. binsize[401] works for this many dots. But by the 5th plot, s==1601, the dots to not fit. binsize[1601] is too high.
I'm thinking that if I could create a better vector of values for binsize that rises more slowly to 0.25 this will work.
This is more of a maths question rather than a programming question, but there's a fairly simple programming solution.
Here's a simple function you can try. It allows you to produce a sequence of numbers between a starting and ending number just like the lseq function, but includes a shape parameter that controls how "exponential" the numbers appear.
seq_exp <- function(start, stop, n, shape)
{
(stop - start) * exp(seq(0, shape, length.out = n))/exp(shape) + start
}
So you're probably looking for something like this:
plot(seq_exp(0.08, 0.25, 10000, shape = 10))
If you set the shape parameter to 1 it is just a normal exponential curve like in lseq:
plot(seq_exp(0.08, 0.25, 10000, shape = 1))
And of course you can play around with different values:
plot(seq_exp(0.08, 0.25, 10000, shape = 5))
plot(seq_exp(0.08, 0.25, 10000, shape = 30))
Created on 2020-04-01 by the reprex package (v0.3.0)