Maybe it`s already too late for working in my code.
I have to redo my figure in ggplot2, and I am unabable to do so.
My errorbars are not showing and I cannot understand why.
dev.new()
ggplot() +
geom_point(data = conf_intervals, aes(y = mean_ALA, x = mean_LIN, shape=trophic,
color=feeding_type_2,
size = 3,
alpha = 0.5)) + scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(data = conf_intervals,aes(mean_ALA, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA,)) +
geom_errorbarh(data = conf_intervals,aes(mean_LIN, ymin = mean_LIN - se_LIN,
ymax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal, aes(y=ALA.d13C, x=LIN.d13C, color=feeding_type_2))
labs (title="Biplot of compound stable isotopes- Centroids with 95 % CI", subtitle="LIN VS. ALA",
y=expression({delta}^13*C[ALA]~'\211'~VPDB),
x=expression({delta}^13*C[LIN]~'\211'~VPDB)) +
# guides(color = FALSE, shape = FALSE) +
theme_classic()
Error: geom_errorbarh requires the following missing aesthetics: y
Run rlang::last_error() to see where the error occurred.
In addition: Warning message:
Ignoring unknown aesthetics: x
Here is my example data:
dput(head(CSIA_inverts_basal))
structure(list(d13C.VPDB = c(-35.10487341, -34.85465717, -34.67216423,
-34.06032315, -33.68548439, -33.4811546), d15.NAIR = c(-6.321847159,
-5.384989361, -2.638749276, -4.986045928, -5.946279778, -6.648526348
), ALA.d13C = c(-43.2375195, -44.77813854, -42.1921855, -41.58363894,
-39.156857, -40.33135344), LIN.d13C = c(-40.864145, -42.32043061,
-41.4247005, -36.08156681, -39.45744387, -37.76516617), combi = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("epilithon", "fresh.leaves",
"gammarus", "grazing.ephemeroptera", "predatory.plecoptera",
"salmonid.eyes", "shreddering.plecoptera", "submerged.leaves"
), class = "factor"), feeding_type = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), sampling.time = c("summer",
"fall", "summer", "fall", "fall", "fall"), year = c(2018L, 2016L,
2018L, 2016L, 2016L, 2016L), split = structure(c(2L, 2L, 2L,
2L, 2L, 2L), levels = c("consumer", "resource"), class = "factor"),
split_2 = c("epilithon", "epilithon", "epilithon", "epilithon",
"epilithon", "epilithon"), split_3 = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), feeding_type_2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), trophic = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Base", "Non-predatory invertebrate",
"Predatory invertebrate", "Predator"), class = "factor")), row.names = 2:7, class = "data.frame")
dput(conf_intervals)
structure(list(trophic = structure(c(1L, 1L, 1L, 2L, 2L, 3L,
4L), levels = c("Base", "Non-predatory invertebrate", "Predatory invertebrate",
"Predator"), class = "factor"), feeding_type_2 = structure(c(1L,
2L, 6L, 3L, 7L, 5L, 4L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), mean_ALA = c(-42.1, -39.7, -38.7,
-45.7, -40.3, -42.8, -42.7), mean_LIN = c(-39.2, -40, -37.2,
-40.8, -35.9, -36.7, -37.9), se_ALA = c(1.1, 1.1, 1.1, 2.2, 1.2,
1.9, 0.4), se_LIN = c(1.1, 1.1, 0.6, 1.8, 0.9, 1.3, 0.6), N_ALA = c(12L,
14L, 10L, 9L, 14L, 7L, 17L), LIN_N = c(12L, 14L, 10L, 9L, 14L,
7L, 17L)), class = "data.frame", row.names = c(NA, -7L))
Can someone help me?
geom_errorbarh doesn't have an aesthetic called x. It has a y, an xmin and an xmax. I suspect you are mixing up the x and y variables in your errorbar calls too, so check these carefully. Also, you should move alpha and size outside of aes so they don't appear in the legend.
It's also a good idea to make sure your code is formatted in such a way that it is easier to read and debug. Limiting your line length and using inheritance of the data passed to your initial ggplot call helps to simplify things a bit too.
ggplot(conf_intervals) +
geom_point(aes(y = mean_ALA, x = mean_LIN, shape = trophic,
color = feeding_type_2), size = 3, alpha = 0.5) +
scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(aes(mean_LIN, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA)) +
geom_errorbarh(aes(y = mean_ALA, xmin = mean_LIN - se_LIN,
xmax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal,
aes(y = ALA.d13C, x = LIN.d13C, color = feeding_type_2)) +
labs(title = "Biplot of compound stable isotopes- Centroids with 95 % CI",
subtitle = "LIN VS. ALA",
y = expression({delta}^13*C[ALA]~'\211'~VPDB),
x = expression({delta}^13*C[LIN]~'\211'~VPDB)) +
theme_classic()
Related
Hello everyone I would need help in order to get a nice geom_segment plot with ggplot2.
Here are the data
structure(list(molecule = structure(c(1L, 1L, 2L, 2L, 3L, 4L,
4L, 5L, 6L), .Label = c("scaffold1", "scaffold2", "scaffold3",
"scaffold4", "scaffold5", "scaffold6"), class = "factor"), gene = structure(1:9, .Label = c("Gene1",
"Gene2", "Gene3", "Gene4", "Gene5", "Gene6", "Gene7", "Gene8",
"Gene9"), class = "factor"), start_gene = c(64000L, 80000L, 60000L,
20000L, 22000L, 20000L, 35000L, 17000L, 2000L), end_gene = c(68000L,
83000L, 68000L, 28000L, 29000L, 33000L, 38000L, 19000L, 2500L
), start_scaff = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), end_scaff = c(125000L,
125000L, 80000L, 80000L, 60000L, 40000L, 40000L, 20000L, 5000L
), strand = structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("forward",
"reverse"), class = "factor"), direction = c(1L, 1L, 1L, -1L,
1L, -1L, 1L, -1L, 1L)), class = "data.frame", row.names = c(NA,
-9L))
Whit these data and this code :
library(ggplot2)
ggplot(tab, aes(x = start_scaff, xend = end_scaff,
y = molecule, yend = molecule)) +
geom_segment(size = 3, col = "grey80") +
geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
xend = ifelse(direction == 1, end_gene, start_gene)),
data = tab,
arrow = arrow(length = unit(0.1, "inches")), size = 2) +
geom_text(aes(x = start_gene, y = molecule, label = gene),
data = tab, nudge_y = 0.2) +
scale_y_discrete(limits = rev(levels(tab$molecule))) +
theme_minimal()
I can manage do create this plot :
And I'm looking for settings in order to get the segment more like rectangles with a small arrow such as :
I think you may want to look at the gggenes package - may help you not only for the arrows :)
From the example in the vignette, I used the settings to get the head to the same width as the segment.
Another advantage of using this geom, you can use alpha
library(gggenes)
library(ggplot2)
ggplot(mydat, aes(x = start_scaff, xend = end_scaff,
y = molecule, yend = molecule)) +
geom_segment(size = 3, col = "grey80") +
geom_gene_arrow(aes(xmin = ifelse(direction == 1, start_gene, end_gene),
xmax = ifelse(direction == 1, end_gene, start_gene)),
arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1, "mm")) +
geom_text(aes(x = start_gene, y = molecule, label = gene),
data = mydat, nudge_y = 0.2) +
scale_y_discrete(limits = rev(levels(mydat$molecule))) +
theme_minimal()
I am plotting the proportion of deep sleep (y axis) vs days (x axis). I would like to add vertical shaded area for a better understanding (e.g. grey for week-ends, orange for sick period...).
I have tried using geom_ribbon (I created a variable taking the value of 30, with is the top of my y axis if the data is during the WE - information given in another column), but instead of getting rectangles, I get trapezes.
In another post, someone proposed the use of "geom_rect", or "annotate" if one's know the x and y coordinates, but I don't see how to adapt it in my case, when I want to have the colored area repeated to all week-end (it is not exactly every 7 days because some data are missing).
Do you have any idea ?
Many thanks in advance !
ggplot(Sleep.data, aes(x = DATEID)) +
geom_line(aes(y = P.DEEP, group = 1), col = "deepskyblue3") +
geom_point(aes(y = P.DEEP, group = 1, col = Sign.deep)) +
guides(col=FALSE) +
geom_ribbon(aes(ymin = min, ymax = max.WE), fill = '#6495ED80') +
facet_grid(MONTH~.) +
geom_hline(yintercept = 15, col = "forestgreen") +
geom_hline(yintercept = 20, col = "forestgreen", linetype = "dashed") +
geom_vline(xintercept = c(7,14,21,28), col = "grey") +
scale_x_continuous(breaks=seq(0,28,7)) +
scale_y_continuous(breaks=seq(0,30,5)) +
labs(x = "Days",y="Proportion of deep sleep stage", title = "Deep sleep")
Proportion of deep sleep vs time
Head(Sleep.data)
> dput(head(Sleep.data))
structure(list(DATE = structure(c(1L, 4L, 7L, 10L, 13L, 16L), .Label = c("01-Dec-17",
"01-Feb-18", "01-Jan-18", "02-Dec-17", "02-Feb-18", "02-Jan-18",
"03-Dec-17", "03-Feb-18", "03-Jan-18", "04-Dec-17", "04-Feb-18",
"04-Jan-18", "05-Dec-17", "05-Feb-18", "05-Jan-18", "06-Dec-17",
"06-Feb-18", "06-Jan-18", "07-Dec-17", "07-Feb-18", "07-Jan-18",
"08-Dec-17", "08-Jan-18", "09-Dec-17", "09-Feb-18", "09-Jan-18",
"10-Dec-17", "10-Jan-18", "11-Dec-17", "11-Feb-18", "11-Jan-18",
"12-Dec-17", "12-Jan-18", "13-Dec-17", "13-Feb-18", "13-Jan-18",
"14-Dec-17", "14-Feb-18", "14-Jan-18", "15-Dec-17", "15-Jan-18",
"16-Dec-17", "16-Jan-18", "17-Dec-17", "17-Jan-18", "18-Dec-17",
"18-Jan-18", "19-Dec-17", "19-Jan-18", "20-Dec-17", "21-Dec-17",
"21-Jan-18", "22-Dec-17", "22-Jan-18", "23-Dec-17", "23-Jan-18",
"24-Dec-17", "24-Jan-18", "25-Dec-17", "25-Jan-18", "26-Dec-17",
"26-Jan-18", "27-Dec-17", "27-Jan-18", "28-Dec-17", "28-Jan-18",
"29-Dec-17", "29-Jan-18", "30-Dec-17", "30-Jan-18", "31-Dec-17",
"31-Jan-18"), class = "factor"), DATEID = 1:6, MONTH = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Decembre", "Janvier", "FĂ©vrier"
), class = "factor"), DURATION = c(8.08, 7.43, 6.85, 6.23, 7.27,
6.62), D.DEEP = c(1.67, 1.37, 1.62, 1.75, 1.95, 0.9), P.DEEP = c(17L,
17L, 21L, 24L, 25L, 12L), STIMS = c(0L, 0L, 0L, 0L, 390L, 147L
), D.REM = c(1.7, 0.95, 0.95, 1.43, 1.47, 0.72), P.REM = c(17L,
11L, 12L, 20L, 19L, 9L), D.LIGHT = c(4.7, 5.12, 4.27, 3.05, 3.83,
4.98), P.LIGHT = c(49L, 63L, 55L, 43L, 49L, 66L), D.AWAKE = c(1.45,
0.58, 0.47, 0.87, 0.37, 0.85), P.AWAKE = c(15L, 7L, 6L, 12L,
4L, 11L), WAKE.UP = c(-2L, 0L, 2L, -1L, 3L, 1L), AGITATION = c(-1L,
-3L, -1L, -2L, 2L, -1L), FRAGMENTATION = c(1L, -2L, 2L, 1L, 0L,
-1L), PERIOD = structure(c(3L, 3L, 4L, 4L, 4L, 4L), .Label = c("HOLIDAYS",
"SICK", "WE", "WORK"), class = "factor"), SPORT = structure(c(2L,
1L, 2L, 2L, 2L, 1L), .Label = c("", "Day", "Evening"), class = "factor"),
ACTIVITY = structure(c(6L, 1L, 3L, 4L, 5L, 1L), .Label = c("",
"Bkool", "eBike", "Gym", "Natation", "Run"), class = "factor"),
TABLETS = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5), Ratio = c(1.15,
2.36, 3.45, 2.01, 5.27, 1.06), Sign = structure(c(2L, 2L,
2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
Sign.ratio = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Sign.deep = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
Sign.awake = structure(c(1L, 2L, 2L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), Sign.light = structure(c(2L, 1L,
1L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
index = structure(c(1L, 1L, 1L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), min = c(0, 0, 0, 0, 0, 0), max.WE = c(30,
30, 0, 0, 0, 0)), .Names = c("DATE", "DATEID", "MONTH", "DURATION",
"D.DEEP", "P.DEEP", "STIMS", "D.REM", "P.REM", "D.LIGHT", "P.LIGHT",
"D.AWAKE", "P.AWAKE", "WAKE.UP", "AGITATION", "FRAGMENTATION",
"PERIOD", "SPORT", "ACTIVITY", "TABLETS", "Ratio", "Sign", "Sign.ratio",
"Sign.deep", "Sign.awake", "Sign.light", "index", "min", "max.WE"
), row.names = c(NA, 6L), class = "data.frame")
Thanks for adding the data, that makes it easier to understand exactly what you're working with and to confirm that an answer actually addresses your question.
I thought it would be helpful to make a separate table with just the start and end of each contiguous set of rows with the same PERIOD. I did this using dplyr::case_when, assuming we should mark dates as a "start" if they are the first row in the table (row_number() == 1), or they have a different PERIOD value than the prior row. I mark dates as an "end" if they are the last row of the table, or have a different PERIOD than the next row. I only keep the starts and ends, and spread these into new columns called start and end.
library(tidyverse)
Period_ranges <- Sleep.data %>%
mutate(period_status = case_when(row_number() == 1 ~ "start",
PERIOD != lag(PERIOD) ~ "start",
row_number() == n() ~ "end",
PERIOD != lead(PERIOD) ~ "end",
TRUE ~ "other")) %>%
filter(period_status %in% c("start", "end")) %>%
select(DATEID, PERIOD, period_status) %>%
mutate(PERIOD_NUM = cumsum(PERIOD != lag(PERIOD) | row_number() == 1)) %>%
spread(period_status, DATEID)
# Output based on sample data only. If there's a problem with the full data, please add more. To share full data, use `dput(Sleep.data)` or to share 20 rows use `dput(head(Sleep.data, 20))`.
>Period_ranges
PERIOD PERIOD_NUM end start
1 WE 1 2 1
2 WORK 2 6 3
We can now use that in the plot. If you want to toggle the inclusion or fiddle with the appearance separately of different PERIOD types, you could modify the code below with Period_ranges %>% filter(PERIOD == "WE"),
ggplot(Sleep.data, aes(x = DATEID)) +
# Here I specify that this geom should use its own data.
# I start the rectangles half a day before and end half a day after to fill the space.
geom_rect(data = Period_ranges, inherit.aes = F,
aes(xmin = start - 0.5, xmax = end + 0.5,
ymin = 0, ymax = 30,
fill = PERIOD), alpha = 0.5) +
# Here we can specify the shading color for each type of PERIOD
scale_fill_manual(values = c(
"WE" = '#6495ED80',
"WORK" = "gray60"
)) +
# rest of your code
Chart based on data sample:
trying to establish individual bar data labels ONLY if the value is negative. I was able to do it fine for a variable that comprised simple integers, but for a variable that needs to be formatted as dollar with the thousands separator, I can't seem to get rid of the "NA" label.
DolSumPlot <- ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(aes(label=scales::dollar(ifelse(DolSums$x < 0, DolSums$x,NA)),
y = DolSums$x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")
Data:
DolSums = structure(list(Group.1 = c((names)), Group.2 = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Radio",
"Video", "Engineering", "800Mhz", "PSSRP", "Other"), class = "factor"),
x = c(4646, 16008.5, 48793.1, 4040, 14468.25, 13332, 1565.5,
6060, 6549.85, 2929, 4444, 3257.25, 5904, 2029.5, 3321, 6767,
8105.25, 8105.25, 8130.5, 3131, 5075.25, 3383.5, 4418.75,
23381.5, 1363.5, -2323, 29133.45, 2550.25, 505, 26042.85,
35203.55, 35940.85, 1641.25, 45066.2, 37541.7, 606, 45439.9
)), .Names = c("Group.1", "Group.2", "x"), row.names = c(NA,
-37L), class = "data.frame")
You can do this by using the data argument in geom_label and subsetting only rows with negative x. Also note that since you already have DolSums as input, there is no need to write DolSums$x. Instead, use column name to refer to a specific column directly:
library(ggplot2)
ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(data = DolSums[DolSums$x < 0,],
aes(label=scales::dollar(x),
y = x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")
I want to add empty lines between groups to make the graph look less dense.
The first 10 rows of data is attached here:
> dput(droplevels(head(hosp, 10)))
structure(list(mzipid = c("FL-3", "FL-3", "FL-3", "FL-4", "FL-4",
"FL-4", "FL-4", "FL-4", "FL-4", "FL-4"), region_oe = c(1.6459234,
1.6459234, 1.6459234, 1.3399296, 1.3399296, 1.3399296, 1.3399296,
1.3399296, 1.3399296, 1.3399296), region_low_ci = c(0.53041852,
0.53041852, 0.53041852, 0.9696098, 0.9696098, 0.9696098, 0.9696098,
0.9696098, 0.9696098, 0.9696098), region_high_ci = c(3.8410261,
3.8410261, 3.8410261, 1.8049299, 1.8049299, 1.8049299, 1.8049299,
1.8049299, 1.8049299, 1.8049299), hosp_id = structure(c(5L, 9L,
4L, 7L, 8L, 6L, 2L, 1L, 9L, 3L), .Label = c("100025", "100054",
"100093", "100106", "100113", "100122", "100124", "100223", "non_local_hosp"
), class = "factor"), outc_n = c(2L, 2L, 1L, 1L, 6L, 3L, 1L,
19L, 2L, 9L), pre_outc_n = c(0.339173126965761, 0.577540323603898,
0.838447939604521, 0.463049655780196, 2.99622658733279, 1.86155441217125,
0.626489417627454, 12.4346171403304, 1.37301584333181, 6.90619195345789
), oe = c(NA, NA, NA, NA, 2.0025189, 1.6115564, NA, 1.5279924,
NA, 1.3031784), low_ci = c(NA, NA, NA, NA, 0.73123336, 0.32389662,
NA, 0.91951913, NA, 0.59465784), high_ci = c(NA, NA, NA, NA,
4.3587809, 4.7086916, NA, 2.3862863, NA, 2.4740036), state = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "FL", class = "factor"),
id = 1:10, outlier = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "black", class = "factor"), cv = c(" 66.88",
" 66.88", " 66.88", " 38.37", " 38.37", " 38.37", " 38.37",
" 38.37", " 38.37", " 38.37"), start_id = c(1L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 4L), end_id = c(3L, 3L, 3L, 12L, 12L,
12L, 12L, 12L, 12L, 12L)), .Names = c("mzipid", "region_oe",
"region_low_ci", "region_high_ci", "hosp_id", "outc_n", "pre_outc_n",
"oe", "low_ci", "high_ci", "state", "id", "outlier", "cv", "start_id",
"end_id"), row.names = 105:114, class = "data.frame")
The current code is like this:
## draw graph
ggplot(hosp, aes(x = id, y = oe, group = mzipid)) +
theme(panel.background = element_rect(fill = "white", colour = "grey50")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_point(size=1, show.legend = FALSE, colour = hosp$outlier) +
geom_linerange(aes(ymin = low_ci, ymax = high_ci), alpha = 0.4, na.rm = TRUE, colour = hosp$outlier)) +
scale_linetype_manual(values = linetype, guide = FALSE) +
geom_segment(aes(x = start_id, xend = end_id, y = region_oe, yend = region_oe, linetype = "1",
size = 1), na.rm = TRUE) +
geom_ribbon(aes(ymin = region_low_ci, ymax = region_high_ci), alpha=0.2, linetype = "blank", show.legend = FALSE) +
geom_hline(aes(yintercept = 1, alpha = 0.2, colour = "red", size = 1), show.legend = FALSE) +
scale_size_identity() +
scale_x_continuous(name = "hospital id", breaks = seq(0,350, by = 20)) +
scale_y_continuous(name = "O:E ratio", breaks = seq(0,6, by = 0.5),
sec.axis = sec_axis(trans = ~., breaks = c(0.1),
labels = "coefficient of variation")) +
stat_summaryh(fun.x = mean, aes(label = mzipid, y = 5.3), geom = "text", size = 2, srt = 35,
check_overlap = F) +
stat_summaryh(fun.x = mean, aes(label = cv, y = 0.1), geom = "text", size = 2, srt = 35,
check_overlap = F) +
guides(colour = FALSE) +
ggtitle("O:E ratio:hospital level\nmedian clusters") +
theme(plot.title = element_text(hjust = 0.5))
And the current graph looks like this:
How can I add empty lines/space between each zip group (mzipid)? Given the data provided, if anyone can show how to add space between FL-3 group and FL-4 group would be great.
Thanks!!
This might seem a really stupid mistake on my part but whenever I specify geom_point depending on a factor and choose a hollow point and a solid point (shapes 1 and 19), and plot error bars, it crosses the point.
Here are my data frames:
> dput(head(allbins.sum))
structure(list(T = c(0L, 0L, 10L, 10L, 20L, 20L), treatment = structure(c(1L,
2L, 1L, 2L, 1L, 2L), .Label = c("control bead", "dP bead"), class = "factor"),
N = c(3, 3, 3, 3, 3, 3), cellsBase = c(0, 0, 0.013028995209506,
0.135599858885737, -0.0130289952095061, 0.759359209760127
), sd = c(0, 0, 0.0597063567767786, 0.0469731690178533, 0.0983667566897066,
0.183436089048999), se = c(0, 0, 0.034471481157405, 0.0271199717771474,
0.0567920734541125, 0.105906875391532), ci = c(0, 0, 0.148318812500416,
0.116687820597672, 0.244356569875469, 0.455680506502609),
bin = c("BinA", "BinA", "BinA", "BinA", "BinA", "BinA")), .Names = c("T",
"treatment", "N", "cellsBase", "sd", "se", "ci", "bin"), row.names = c(NA,
6L), class = "data.frame")
> dput(head(allbins.fitdata))
structure(list(wellvidbin = structure(c(1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("A1-002-BinA", "A1-002-BinB", "A1-002-BinC", "A1-031-BinA",
"A1-031-BinB", "A1-031-BinC", "A3-004-BinA", "A3-004-BinB", "A3-004-BinC",
"B1-032-BinA", "B1-032-BinB", "B1-032-BinC", "B4-026-BinA", "B4-026-BinB",
"B4-026-BinC", "C4-027-BinA", "C4-027-BinB", "C4-027-BinC"), class = "factor"),
treatment = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("control bead",
"dP bead"), class = "factor"), wellvid = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A1-002", "A1-031", "A3-004",
"B1-032", "B4-026", "C4-027"), class = "factor"), bin = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("BinA", "BinB", "BinC"), class = "factor"),
T = c(0L, 10L, 20L, 30L, 40L, 50L), T.factor = structure(1:6, .Label = c("0",
"10", "20", "30", "40", "50", "60"), class = "factor"), cells = c(7L,
11L, 26L, 27L, 28L, 36L), cellsS = c(-1.36568429306349, -1.20296446240061,
-0.592765097414793, -0.552085139749072, -0.511405182083351,
-0.185965520757582), cellsBase = c(0, 0.162719830662884,
0.772919195648701, 0.813599153314422, 0.854279110980143,
1.17971877230591), treatT = structure(c(2L, 4L, 6L, 8L, 10L,
12L), .Label = c("control bead.0", "P bead.0", "control bead.10",
"P bead.10", "control bead.20", "P bead.20", "control bead.30",
"P bead.30", "control bead.40", "P bead.40", "control bead.50",
"P bead.50", "control bead.60", "P bead.60"), class = "factor"),
fit = c(0.0285939715820639, 0.304399288764407, 0.58020460594675,
0.856009923129092, 1.13181524031144, 1.40762055749378), se.fit = c(0.157415367032567,
0.132348142293459, 0.114707848741265, 0.108190467052118,
0.114707848741265, 0.132348142293459), upr = c(0.337128090965895,
0.563801647659587, 0.805031989479629, 1.06806323855124, 1.35664262384431,
1.66702291638896), lwr = c(-0.279940147801767, 0.0449969298692267,
0.35537722241387, 0.643956607706942, 0.906987856778556, 1.1482181985986
)), .Names = c("wellvidbin", "treatment", "wellvid", "bin",
"T", "T.factor", "cells", "cellsS", "cellsBase", "treatT", "fit",
"se.fit", "upr", "lwr"), class = c("data.table", "data.frame"
), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x0000000000100788>)
And the code:
ggplot(data=allbins.sum, aes(x=T, y=cellsBase, shape=treatment)) + geom_point(size=5, aes(shape=treatment))+
geom_errorbar(aes(ymin=cellsBase-se, ymax=cellsBase+se), width=2, size=1) +
geom_smooth(data=allbins.fitdata, size=1, aes(y=fit, ymin=lwr, ymax=upr),
color="black", method="lm", stat="identity", alpha=0.2)+
facet_grid(bin~.) +
scale_shape_manual(values=c(1, 19))
This gives me this plot:
Any hints on how to have the hollow circles to be hollowed?
I also tried specifying geom_shape (aes(fill=treatment) and then scale_fill_manual but then it is also applied to my geom_smooth
Thanks for the help!
If you mean that you don't want the line of the error bar to be visible through the 'hollow' points, then plot geom_errorbar first, then plot geom_point second, with solid fill, so it will overlay the error bar.
ggplot(data=allbins.sum, aes(x=T, y=cellsBase)) +
# plotting this first
geom_errorbar(aes(ymin=cellsBase-se, ymax=cellsBase+se), width=2, size=1) +
# plotting this second, with a hollow fillable shape, and black outline
geom_point(size=5, shape = 21, color='black',
aes(fill = treatment)) +
# solid black and solid white fill for the points
scale_fill_manual(values = c('black', 'white')) +
theme_bw()
(The data you posted only has these points for allbins.sum, and the code for allbins.fitdata has an error, so no error bars on this plot)