Related
I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this
My base errorbar plot is:
library(ggplot2)
library(dplyr)
pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1,
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987,
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018,
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348,
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155,
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831,
0.00819832576725402, 0.0159620304815404, 0.00722904089045731,
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388,
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319,
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain",
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral",
"uncertain"), class = "factor"), language = structure(c(2L, 2L,
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486,
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319,
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA,
-9L), groups = structure(list(language = structure(1:3, .Label = c("english",
"dutch", "german"), class = "factor"), .rows = structure(list(
4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#dodge
pd = position_dodge(.75)
#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
geom_point(position=pd,size=2) +
geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p
What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:
#bump function
f = function(x){
v = c()
bump=0.025
constant = 0
for(i in x){
v = c(v,i+constant+bump)
bump = bump + 0.075
}
v
}
#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
combs %>% mutate(language='dutch'),
combs %>% mutate(language = "german")) %>%
left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
group_by(language)
#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>%
arrange(language,contrast)
#back to wide
combs_wide =combs_long %>% spread(long,probability)
combs_wide$p = rep(c('***',"*","ns"),3)
#plot
p +
geom_point(data=combs_long,
aes(x = language,
color=probability,
shape=probability,
y=y_pos),
inherit.aes = T,
position=pd,
size=2) +
geom_text(data=combs_wide,
aes(x=language,
label=p,
y=y_pos+.025,
group=X1),
color='black',
position=position_dodge(.75),
inherit.aes = F)
What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!
The labels for the mosaic plot don't fit the screen ( they're partially cut) so id like to move/shift the plot to the right so that the labels fully fit -- tried using ''par'' function but to no avail -- any ideas?
structure(list(Road_Type = structure(c(4L, 4L, 4L, 4L, 4L, 4L
), .Label = c("Roundabout", "One way Street", "Dual Carriageway",
"Single carriageway", "Slip Road"), class = "factor"), Accident_Severity_combined = structure(c(2L,
2L, 2L, 2L, 1L, 2L), .Label = c("Serious", "Slight"), class = "factor")), .Names = c("Road_Type",
"Accident_Severity_combined"), row.names = c(NA, 6L), class = "data.frame")
>
mos <- mosaic(~Road_Type + Accident_Severity_combined, data = uk1, shade = TRUE, legend = TRUE,
labeling_args = list(set_varnames = c(Accident_Severity_combined="Gender", Road_Type="survival"),
highlighting_fill = c("darlblue","red")
labeling=labeling_border(
rot_labels = c(90, 0, 90, 0),
just_labels=c("left","left","right","right"),
tl_varnames = FALSE,
gp_labels = gpar(fontsize = 9)))
I have two data sets like below
df1<- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L), .Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"
), class = "factor"), key = c("boy1", "boy2", "boy3", "boy1",
"boy2", "boy3", "boy1", "boy2", "boy3"), value = c(177.72258835,
0, 74.438539625, 134.3410045, 48915.1, 38.302204425, 97.32286187,
25865.25, 28.67291878), x = c("1", "2", "3", "1", "2", "3", "1",
"2", "3"), y = c(177.72258835, 0, 74.438539625, 134.3410045,
48915.1, 38.302204425, 97.32286187, 25865.25, 28.67291878)), .Names = c("time",
"place", "key", "value", "x", "y"), row.names = c(NA, -9L), class = "data.frame")
df2<- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L), .Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"
), class = "factor"), key = c("boy1", "boy2", "boy3", "boy1",
"boy2", "boy3", "boy1", "boy2", "boy3"), value = c(58.852340736,
0, 21.291893740908, 42.92051958201, 72521.52726, 16.309811239722,
32.403556124268, 38347.81965, 10.342042262244), x = c("1", "2",
"3", "1", "2", "3", "1", "2", "3"), y = c(58.852340736, 0, 21.291893740908,
42.92051958201, 72521.52726, 16.309811239722, 32.403556124268,
38347.81965, 10.342042262244)), .Names = c("time", "place", "key",
"value", "x", "y"), row.names = c(NA, -9L), class = "data.frame")
I want to plot them together with df2 as the standard deviation for df1
when I plot df1, I do the following
library(ggplot2)
ggplot(df1, aes(x, y, col = key)) +
geom_point() +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2,3)) +
facet_grid(time ~ .)
but now I want to have the second df as the standard deviation (i.e., the first y-value in df1 is 177.72259, so it's standard deviation is the corresponding y-value in df2, which is 58.85234).
If I understand your question correctly, it sounds like you want to include error bars in your plot. This can be accomplished using only a single data frame, if you just add the standard error as an additional variable like so:
df <- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
.Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"), class = "factor"),
key = c("boy1", "boy2", "boy3", "boy1", "boy2", "boy3", "boy1", "boy2", "boy3"),
value = c(58.852340736, 0, 21.291893740908, 42.92051958201, 72521.52726,
16.309811239722, 32.403556124268, 38347.81965, 10.342042262244),
x = c("1", "2", "3", "1", "2", "3", "1", "2", "3"), y = c(177.72258835, 0,
74.438539625, 134.3410045, 48915.1, 38.302204425, 97.32286187, 25865.25, 28.67291878),
sd = c(58.852340736, 0, 21.291893740908, 42.92051958201, 72521.52726, 16.309811239722,
32.403556124268,38347.81965, 10.342042262244)), .Names = c("time", "place", "key",
"value", "x", "y", "sd"), row.names = c(NA, -9L), class = "data.frame")
Then you can add error bars to the plot using geom_errorbar(), as follows (I am borrowing the "free-y" scale trick from #jazzurro's answer above):
ggplot(df, aes(x, y, col = key)) +
geom_point() +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2,3)) +
facet_grid(time ~ .) +
geom_errorbar(aes(ymin = y-sd, ymax = y+sd)) +
facet_grid(time ~ ., scale = "free_y")
Unfortunately your data is a little skewed, in that some measurements are way larger in magnitude than others (especially at time=48 and time=72); you may want to consider a log transformation so that the error bars for the smaller observations do not appear so negligible.
Here is one way for you. I changed the shape of the sd in the second geom_point(). Since the y-scale has a wide range for two of the plots, you see points overlapping.
ggplot() +
geom_point(data = df1, aes(x, y, col = key)) +
geom_point(data = df2, aes(x, y, col = key), shape = 22, alpha = 0.3) +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2, 3)) +
facet_grid(time ~ ., scale = "free_y")
this is my dataset:
> dput(dfw)
structure(list(SITE = c("ASPEN", "ASPEN", "BioCON", "DUKE", "Lancaster",
"Merrit Island", "Nevada FACE", "NZ", "ORNL", "PHACE", "BioCON"
), SPECIES = c("A", "AB", "Legume", "PITA", "mixed", "Oak", "desert",
"grassland", "SG", "grassland", "C3forb"), FRr = c(0.197028535345918,
0.296799297050907, 0.195436310641759, 0.152972526753089, 0.0313948973476966,
0.139533057346518, 0.188221278921143, NA, 0.70542764380006, 0.119320766735777,
0.135665667633474), Nupr = c(0.122177669046786, 0.305573297532757,
0.131181914007488, 0.217519050530067, -0.0436788294371676, 0.153632658941404,
-0.00803217169726427, 0.168440046857285, 0.145172439177718, -0.108563178158001,
0.00546006390438276), myc = c("ECM", "ECM", "N-fixing", "ECM",
"ECM", "ECM", "AM", "AM", "AM", "AM", "AM"), SITE_Sps = structure(c(1L,
2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L), .Label = c("Aspen FACE-A",
"Aspen FACE-AB", "BioCON", "BioCON-legumes", "Duke FACE", "Lascaster",
"Florida OTC", "Nevada FACE", "NZ FACE", "ORNL FACE", "PHACE"
), class = "factor")), row.names = c(NA, -11L), vars = list(SITE,
SPECIES, myc), indices = list(0L, 1L, 10L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
SITE = c("ASPEN", "ASPEN", "BioCON", "BioCON", "DUKE", "Lancaster",
"Merrit Island", "Nevada FACE", "NZ", "ORNL", "PHACE"), SPECIES = c("A",
"AB", "C3forb", "Legume", "PITA", "mixed", "Oak", "desert",
"grassland", "SG", "grassland"), myc = structure(c(2L, 2L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("am", "ecm",
"ecm+am"), class = "factor")), row.names = c(NA, -11L), class = "data.frame", vars = list(
SITE, SPECIES, myc), .Names = c("SITE", "SPECIES", "myc")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), .Names = c("SITE", "SPECIES",
"FRr", "Nupr", "myc", "SITE_Sps"))
I want to draw the same background as in the attached figure, added to my current ggplot code:
ggplot(dfw, aes(FRr, Nupr, group=myc, label = SITE_Sps)) +
geom_point(aes(fill=myc),size=4,shape = 21) +
geom_text() +
geom_hline(yintercept=0) + geom_vline(xintercept = 0) +
geom_abline(intercept = 0, slope = 1, linetype = "longdash")
I guess I should use the function geom_polygon, but I don't really know how to create a dataset to draw all the required segments, including the colour gradient from dark grey to light grey and white.
Perhaps this could be a start?
nlines <-
phis <- seq( 0, 2*pi, by=2*pi/nlines )
rad <- 999
xs <- rad * cos( phis )
ys <- rad * sin( phis )
Here is a way using geom_polygon:
nlines <- 25
inc <- pi/(nlines)
phis <- seq( -pi/2, by=inc, length.out = nlines )
rad <- 1
#Create the triangles
points <- lapply(phis, function(a) {
x <-c(0, rad*cos(a), rad*cos(a+inc),0, -rad*cos(a), -rad*cos(a+inc))
y <-c(0, rad*sin(a), rad*sin(a+inc),0, rad*sin(a), rad*sin(a+inc))
g <-c(a,a,a,a,a,a) # used for grouping
data.frame(x,y,g)
})
#Create a data.frame to be used on ggplot
bckg <- do.call(rbind,points)
#You need to set the data for each geometry as we have more than one dataset
ggplot(mapping=aes(FRr, Nupr, group=myc)) +
#Draw the background
geom_polygon(data=bckg,aes(x=x,y=y,group=g,alpha=g), fill = "gray50")+
geom_point(data=dfw, aes(FRr, Nupr, group=myc, fill=myc),size=4,shape = 21) +
geom_text(data=dfw, aes(FRr, Nupr, group=myc, label = SITE_Sps), nudge_y = -0.02) +
geom_hline(data=dfw,yintercept=0) + geom_vline(data=dfw,xintercept = 0) +
geom_abline(data=dfw,intercept = 0, slope = 1, linetype = "longdash")+
#We need to define a scale in ourder to deal with out of boundary points on the background
scale_x_continuous(limits = c(-0.2,0.4), oob=function(x, rg) x)+
scale_y_continuous(limits = c(-0.2,0.4), oob=function(x, rg) x)+
scale_alpha_continuous(guide="none", range=c(1.0,0))+
theme(panel.background = element_blank())
Here is the plot:
It is difficult to describe my problem without attaching an image of the plot. I have two groups of data, one with two observations having mean around 1 and error around 1.5; the other has two observations with mean around 30 and error around 2.
But in the plot the bars overlap and the y-axis tick marks are out of order:
0; 0.1; 1; 1.7; 2; 27.8; 29.3; 29.8; 3.2; 31.3; 31.8; 33.3
Data and code (dataframe my.data):
my.data <- structure(list(factor1 = structure(c(1L, 1L, 2L, 2L), .Label = c("oil1", "oil2"), class = "factor"), factor2 = structure(c(1L, 2L, 1L, 2L), .Label = c("prod1", "prod2"), class = "factor"), value = c(1.7, 1, 29.8, 31.3), err = c(1.5, 1, 2, 2), min = c(0.2, 0, 27.8, 29.3), max = c(3.2, 2, 31.8, 33.3)), .Names = c("factor1", "factor2", "value", "err", "min", "max"), class = "data.frame", row.names = c("1", "2", "3", "4"))
# Plots
p1 <- ggplot(data=my.data, aes(x=factor2, fill=factor1))
p2 <- p1 + geom_crossbar(aes(y=value, ymin=min, ymax=max), position = position_dodge(width = 0.66), width=0.6)
p2
I would greatly apreciate help on this, I have been stuck on it for two days now. Thanks in advance.
Using your data and your script I get: