Related
I have a bar chart which I want also to include some lines that show the percentage difference between them as in the following figure:
The lines in the figure are drawn just to make my point of what I ideally want.
Can someone help me with this?
Here is the dataframe to replicate the figure:
structure(list(shares = c(0.39, 3.04, 9.32, 22.29, 64.97, 0.01,
0.11, 5.83, 21.4, 72.64), quantile = structure(c(4L, 1L, 2L,
3L, 5L, 4L, 1L, 2L, 3L, 5L), .Label = c("2nd Quantile", "3rd Quantile",
"4nd Quantile", "Poorest 20%", "Richest 20%"), class = "factor"),
case = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L
), .Label = c("No Debt", "With Debt"), class = "factor")), row.names = c(NA,
-10L), class = "data.frame")
And here is my code used to make the bar plot:
ggplot(df_cum, aes(fill = case , quantile, shares)) + geom_bar(position =
"dodge", stat = "identity") +
scale_x_discrete(limits = c(
"Poorest 20%",
"2nd Quantile",
"3rd Quantile",
"4nd Quantile",
"Richest 20%"
)) +
theme_minimal()
Your data unchanged:
library(tidyverse)
df_cum<-structure(list(shares = c(0.39, 3.04, 9.32, 22.29, 64.97, 0.01,0.11, 5.83, 21.4, 72.64),
quantile = structure(c(4L, 1L, 2L, 3L, 5L, 4L, 1L, 2L, 3L, 5L),
.Label = c("2nd Quantile", "3rd Quantile", "4nd Quantile", "Poorest 20%", "Richest 20%"), class = "factor"),
case = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("No Debt", "With Debt"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")
Your graph unchanged:
p <- ggplot(df_cum, aes(fill = case , quantile, shares)) +
geom_bar(position = "dodge", stat = "identity") +
scale_x_discrete(limits = c("Poorest 20%", "2nd Quantile", "3rd Quantile", "4nd Quantile", "Richest 20%")) +
theme_minimal()
I used the horizontal error bar to do the trick. Here is my solution:
y = rep(c(3, 5, 13, 25, 75),2)
x = rep(c(1:5), 2)
label = rep(c("-3%", "-5%", "-2%", "-1%", "10%"), 2)
p1 <- p + geom_text(x=x, y=y+2, label=label)
p1 + geom_errorbarh(aes(xmax = (x + 0.3), xmin = (x - 0.3), y = y), height = 0.5)
Now, you get:
You can also adjust both height and width if you like.
I have 2 datasets that I want to plot together. If the data from the second dataset matches a certain condition, it should be plotted with a semi-transparent red color, otherwise it should not be visible.
Furthermore, I would like to include a sub-legend for that second plot but only with 1 element (red color and label = FALSE)
In the example below, only the first bar from the variable "id_1" matches that criteria and therefore has an overlayed red bar. But I am still missing the correct legend.
Test Data:
df <- structure(list(id = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("id_1", "id_2"), class = "factor"),
day = structure(c(17897, 17903, 17910, 17917, 17897, 17903,
17910, 17917, 17897, 17903, 17910, 17917, 17897, 17903, 17910,
17917, 17897, 17903, 17910, 17917, 17897, 17903, 17910, 17917,
17897, 17903, 17910, 17917, 17897, 17903, 17910, 17917), class = "Date"),
variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("reas_1", "reas_12",
"reas_123", "reas_1234"), class = "factor"), value = c(0,
0, 3, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 1, 0, 1, 0, 0, 5,
0, 0, 0, 2, 0, 1, 6, 10, 0, 1, 9, 6)), row.names = c(NA,
-32L), class = "data.frame")
overlay <- structure(list(id = structure(1:2, .Label = c("id_1", "id_2"), class = "factor"),
day = structure(c(17897, 17897), class = "Date"), variable = structure(c(1L,
1L), .Label = "plausible_sum", class = "factor"), value = c(10,
0), fill = c("red", "transparent")), row.names = c(NA, -2L
), class = "data.frame")
cbPalette <- c('#3652a3', '#60a0df', '#b7dbff', '#dd0000', '#fd7676')
legendLabels <- c("Var1", "Var2", "Var3", "Var4")
Plot Code:
library(ggplot2)
ggplot() +
geom_bar(data = df, aes(x = day, y = value, fill = variable), stat = "identity", width = 3) +
geom_bar(data = overlay, aes(x = day, y = 16), fill = overlay$fill,
alpha = c(0.3, 0), stat="identity", width = 3) +
facet_wrap(~id, nrow = 1) +
scale_fill_manual(name = "Vars: ", values = c(cbPalette), labels = c(legendLabels))
Result vs. Expected:
The right image has a different legend.
Assign a value in aes() in the geom_bar(data=overlay,...) portion. That allows you to manipulate the legend box for this element in the scale_*_manual() function. In this example, I've assigned aes(..., colour="FALSE") and edited the legend in scale_colour_manual() using the guide argument. override.aes() allows you to manipulate aesthetic parameters of the legend.
library(ggplot2)
ggplot() +
geom_bar(data = df, aes(x = day, y = value, fill = variable), stat = "identity", width = 3) +
geom_bar(data = overlay, aes(x = day, y = 16, colour="FALSE"), fill = overlay$fill,
alpha = c(0.3, 0), stat="identity", width = 3) +
facet_wrap(~id, nrow = 1) +
scale_fill_manual(name = "Vars: ", values = c(cbPalette), labels = c(legendLabels)) +
scale_colour_manual(values = c("FALSE" = "transparent"),
name=c("Overlay"),
guide=guide_legend(override.aes = list(fill=overlay$fill[[1]],
alpha=0.3)))
Plot:
I am plotting the proportion of deep sleep (y axis) vs days (x axis). I would like to add vertical shaded area for a better understanding (e.g. grey for week-ends, orange for sick period...).
I have tried using geom_ribbon (I created a variable taking the value of 30, with is the top of my y axis if the data is during the WE - information given in another column), but instead of getting rectangles, I get trapezes.
In another post, someone proposed the use of "geom_rect", or "annotate" if one's know the x and y coordinates, but I don't see how to adapt it in my case, when I want to have the colored area repeated to all week-end (it is not exactly every 7 days because some data are missing).
Do you have any idea ?
Many thanks in advance !
ggplot(Sleep.data, aes(x = DATEID)) +
geom_line(aes(y = P.DEEP, group = 1), col = "deepskyblue3") +
geom_point(aes(y = P.DEEP, group = 1, col = Sign.deep)) +
guides(col=FALSE) +
geom_ribbon(aes(ymin = min, ymax = max.WE), fill = '#6495ED80') +
facet_grid(MONTH~.) +
geom_hline(yintercept = 15, col = "forestgreen") +
geom_hline(yintercept = 20, col = "forestgreen", linetype = "dashed") +
geom_vline(xintercept = c(7,14,21,28), col = "grey") +
scale_x_continuous(breaks=seq(0,28,7)) +
scale_y_continuous(breaks=seq(0,30,5)) +
labs(x = "Days",y="Proportion of deep sleep stage", title = "Deep sleep")
Proportion of deep sleep vs time
Head(Sleep.data)
> dput(head(Sleep.data))
structure(list(DATE = structure(c(1L, 4L, 7L, 10L, 13L, 16L), .Label = c("01-Dec-17",
"01-Feb-18", "01-Jan-18", "02-Dec-17", "02-Feb-18", "02-Jan-18",
"03-Dec-17", "03-Feb-18", "03-Jan-18", "04-Dec-17", "04-Feb-18",
"04-Jan-18", "05-Dec-17", "05-Feb-18", "05-Jan-18", "06-Dec-17",
"06-Feb-18", "06-Jan-18", "07-Dec-17", "07-Feb-18", "07-Jan-18",
"08-Dec-17", "08-Jan-18", "09-Dec-17", "09-Feb-18", "09-Jan-18",
"10-Dec-17", "10-Jan-18", "11-Dec-17", "11-Feb-18", "11-Jan-18",
"12-Dec-17", "12-Jan-18", "13-Dec-17", "13-Feb-18", "13-Jan-18",
"14-Dec-17", "14-Feb-18", "14-Jan-18", "15-Dec-17", "15-Jan-18",
"16-Dec-17", "16-Jan-18", "17-Dec-17", "17-Jan-18", "18-Dec-17",
"18-Jan-18", "19-Dec-17", "19-Jan-18", "20-Dec-17", "21-Dec-17",
"21-Jan-18", "22-Dec-17", "22-Jan-18", "23-Dec-17", "23-Jan-18",
"24-Dec-17", "24-Jan-18", "25-Dec-17", "25-Jan-18", "26-Dec-17",
"26-Jan-18", "27-Dec-17", "27-Jan-18", "28-Dec-17", "28-Jan-18",
"29-Dec-17", "29-Jan-18", "30-Dec-17", "30-Jan-18", "31-Dec-17",
"31-Jan-18"), class = "factor"), DATEID = 1:6, MONTH = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Decembre", "Janvier", "FĂ©vrier"
), class = "factor"), DURATION = c(8.08, 7.43, 6.85, 6.23, 7.27,
6.62), D.DEEP = c(1.67, 1.37, 1.62, 1.75, 1.95, 0.9), P.DEEP = c(17L,
17L, 21L, 24L, 25L, 12L), STIMS = c(0L, 0L, 0L, 0L, 390L, 147L
), D.REM = c(1.7, 0.95, 0.95, 1.43, 1.47, 0.72), P.REM = c(17L,
11L, 12L, 20L, 19L, 9L), D.LIGHT = c(4.7, 5.12, 4.27, 3.05, 3.83,
4.98), P.LIGHT = c(49L, 63L, 55L, 43L, 49L, 66L), D.AWAKE = c(1.45,
0.58, 0.47, 0.87, 0.37, 0.85), P.AWAKE = c(15L, 7L, 6L, 12L,
4L, 11L), WAKE.UP = c(-2L, 0L, 2L, -1L, 3L, 1L), AGITATION = c(-1L,
-3L, -1L, -2L, 2L, -1L), FRAGMENTATION = c(1L, -2L, 2L, 1L, 0L,
-1L), PERIOD = structure(c(3L, 3L, 4L, 4L, 4L, 4L), .Label = c("HOLIDAYS",
"SICK", "WE", "WORK"), class = "factor"), SPORT = structure(c(2L,
1L, 2L, 2L, 2L, 1L), .Label = c("", "Day", "Evening"), class = "factor"),
ACTIVITY = structure(c(6L, 1L, 3L, 4L, 5L, 1L), .Label = c("",
"Bkool", "eBike", "Gym", "Natation", "Run"), class = "factor"),
TABLETS = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5), Ratio = c(1.15,
2.36, 3.45, 2.01, 5.27, 1.06), Sign = structure(c(2L, 2L,
2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
Sign.ratio = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Sign.deep = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
Sign.awake = structure(c(1L, 2L, 2L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), Sign.light = structure(c(2L, 1L,
1L, 2L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
index = structure(c(1L, 1L, 1L, 1L, 2L, 1L), .Label = c("0",
"1"), class = "factor"), min = c(0, 0, 0, 0, 0, 0), max.WE = c(30,
30, 0, 0, 0, 0)), .Names = c("DATE", "DATEID", "MONTH", "DURATION",
"D.DEEP", "P.DEEP", "STIMS", "D.REM", "P.REM", "D.LIGHT", "P.LIGHT",
"D.AWAKE", "P.AWAKE", "WAKE.UP", "AGITATION", "FRAGMENTATION",
"PERIOD", "SPORT", "ACTIVITY", "TABLETS", "Ratio", "Sign", "Sign.ratio",
"Sign.deep", "Sign.awake", "Sign.light", "index", "min", "max.WE"
), row.names = c(NA, 6L), class = "data.frame")
Thanks for adding the data, that makes it easier to understand exactly what you're working with and to confirm that an answer actually addresses your question.
I thought it would be helpful to make a separate table with just the start and end of each contiguous set of rows with the same PERIOD. I did this using dplyr::case_when, assuming we should mark dates as a "start" if they are the first row in the table (row_number() == 1), or they have a different PERIOD value than the prior row. I mark dates as an "end" if they are the last row of the table, or have a different PERIOD than the next row. I only keep the starts and ends, and spread these into new columns called start and end.
library(tidyverse)
Period_ranges <- Sleep.data %>%
mutate(period_status = case_when(row_number() == 1 ~ "start",
PERIOD != lag(PERIOD) ~ "start",
row_number() == n() ~ "end",
PERIOD != lead(PERIOD) ~ "end",
TRUE ~ "other")) %>%
filter(period_status %in% c("start", "end")) %>%
select(DATEID, PERIOD, period_status) %>%
mutate(PERIOD_NUM = cumsum(PERIOD != lag(PERIOD) | row_number() == 1)) %>%
spread(period_status, DATEID)
# Output based on sample data only. If there's a problem with the full data, please add more. To share full data, use `dput(Sleep.data)` or to share 20 rows use `dput(head(Sleep.data, 20))`.
>Period_ranges
PERIOD PERIOD_NUM end start
1 WE 1 2 1
2 WORK 2 6 3
We can now use that in the plot. If you want to toggle the inclusion or fiddle with the appearance separately of different PERIOD types, you could modify the code below with Period_ranges %>% filter(PERIOD == "WE"),
ggplot(Sleep.data, aes(x = DATEID)) +
# Here I specify that this geom should use its own data.
# I start the rectangles half a day before and end half a day after to fill the space.
geom_rect(data = Period_ranges, inherit.aes = F,
aes(xmin = start - 0.5, xmax = end + 0.5,
ymin = 0, ymax = 30,
fill = PERIOD), alpha = 0.5) +
# Here we can specify the shading color for each type of PERIOD
scale_fill_manual(values = c(
"WE" = '#6495ED80',
"WORK" = "gray60"
)) +
# rest of your code
Chart based on data sample:
I am plotting an dbRDA ordination with ggplot.
data:
df1 <- structure(list(dbRDA1 = c(-0.277603544609864, -0.360978808436824,
-0.233134911000267, -0.310476800969782, -0.0906489101103515,
-0.145001415653946, -0.0332934057167847, -0.0275012617442552,
1.09301352917172, 1.06620520659396, -0.706049468959193, -0.322962763102308,
0.0621994470608108, 0.286233107477088), dbRDA2 = c(-0.0544365381093796,
-0.396097172755407, -0.179335397101395, -0.181894682898319, -0.0332360598060943,
-0.133901893994658, -0.231316948737708, -0.104267201073184, -0.446406664141594,
-0.324349265794935, 0.218701171555884, -0.0300275899654245, 0.595616680358468,
1.30095156246375), place = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("N", "O"), class = "factor"),
placecol = c("hollow", "hollow", "hollow", "hollow", "hollow",
"hollow", "hollow", "hollow", "non-hollow", "non-hollow",
"non-hollow", "non-hollow", "non-hollow", "non-hollow"),
treat = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("10", "A"), class = "factor"),
OTU = c(1.358, 5.94, 1.119, 2.113, 1.12, 0.604, 1.525, 0.712,
1.841, 3.203, 2.512, 1.087, 1.087, 1.506)), .Names = c("dbRDA1",
"dbRDA2", "place", "placecol", "treat", "OTU"), row.names = c("60759",
"60773", "60774", "60775", "60776", "60777", "60778", "60779",
"60780", "60781", "60782", "60783", "60784", "60785"), class = "data.frame")
And plot code:
rda.plot <- ggplot(df1, aes(x=dbRDA1, y=dbRDA2, color=factor(treat),shape=factor(place))) +
geom_point(size=sqrt(df1$OTU)/sqrt(max(df1$OTU))*10,
aes(fill=factor(treat), alpha = as.factor(place)))+
geom_point(size=sqrt(df1$OTU)/sqrt(max(df1$OTU))*10) +
geom_text(aes(label=row.names(df1)),size=3, position = position_nudge(y = -0.1)) +
geom_hline(yintercept=0, linetype="dotted") +
geom_vline(xintercept=0, linetype="dotted") +
scale_shape_manual(values=c(21,24)) +
scale_alpha_manual(values=c(1,0)) +
scale_colour_manual(values = c("indianred3","lightskyblue4"))+
scale_fill_manual(values =alpha(c("indianred3","lightskyblue4")))+
coord_fixed()
rda.plot
As it is seen, I have a lot of scaling and color codes, which is working, but (at least for myself) is quite confusing. It has arrived from following a bunch of different guidelines (I imagine it could be trimmed).
I want to add a final layer, corresponding to environmental vectors.
Environmental data frame:
vf_biplot <- structure(list(dbRDA1 = c(0.703799726303485, 0.108418920195407,
0.10268149348934, 0.393538121176574, -0.692749765298912), dbRDA2 = c(-0.507323360489587,
0.843331735032726, -0.549310027554416, -0.36248490838257, -0.291382798048848
), Envir = c("AA", "N",
"DOP", "Pm", "P")), .Names = c("dbRDA1", "dbRDA2",
"Envir"), row.names = c("AA", "N",
"DOP", "Pm", "P"), class = "data.frame")
My attempt to add the layer (inspired by stackoverflow):
rda.plot + geom_segment(data = vf_biplot,
aes(x = 0, xend = dbRDA1, y = 0, yend = dbRDA2),
arrow = arrow(length = unit(0.25, "cm")), colour = "grey")+
geom_text(data = vf_biplot, aes(x = dbRDA1, y = dbRDA2, label = Envir),
size = 3)
I get this error:
Error in sort.list(y) : 'x' must be atomic for 'sort.list'
Have you called 'sort' on a list?
I checked with str() but neither vf_biplot or df1 is a list (or containing lists).
How do I remove the the box, ribbon color, and points in the legend? I would just like a straight line representing each color of the color. I've tried using guides(), but it's not changing.
Sample data:
pdat1 <- structure(list(type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("10-year",
"20-year", "30-year"), class = "factor"), effect = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("A", "B"), class = "factor"), value = c(0,
-21.89, -27.36, -33.75, -40.57, -47.32, 0, -23, -28.31, -34.96,
-42.6, -50.81, 0, -16.9, -22.25, -28.87, -36.4, -44.52, 0, -10.24,
-16.8, -24.74, -33.52, -42.55, 0, -10.24, -16.8, -24.74, -33.52,
-42.55, 0, -10.24, -16.8, -24.74, -33.52, -42.55), temp = c(0,
1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5), value_max = c(2.91,
-19.02, -24.42, -30.88, -37.63, -44.35, 2.9, -20.09, -25.36,
-32.05, -39.67, -47.87, 2.97, -14.02, -19.27, -25.89, -33.49,
-41.58, 2.42, -7.74, -14.34, -22.27, -31.06, -40.02, 2.45, -7.8,
-14.36, -22.26, -31.07, -40.07, 2.46, -7.71, -14.23, -22.23,
-31.02, -40.05), value_min = c(-2.91, -24.76, -30.3, -36.63,
-43.5, -50.3, -2.9, -25.91, -31.27, -37.87, -45.52, -53.75, -2.97,
-19.77, -25.24, -31.85, -39.32, -47.46, -2.42, -12.74, -19.26,
-27.21, -35.98, -45.08, -2.45, -12.68, -19.24, -27.22, -35.96,
-45.02, -2.46, -12.77, -19.37, -27.25, -36.02, -45.05)), class = "data.frame", row.names = c(NA,
-36L), .Names = c("type", "effect", "value", "temp", "value_max",
"value_min"))
Plot Code
library(ggplot2)
ggplot(pdat1) +
geom_ribbon(aes(ymax = value_max, ymin = value_min, x = temp, linetype = NA, color = effect, fill = effect), fill = "#C0CCD9", alpha = 0.5 ) +
geom_line(aes(x = temp, y = value, color = effect, group = effect)) +
geom_point(aes(x = temp, y = value, color = effect), size = 0.5) +
ylab("Y") +
xlab("X") +
guides(color = guide_legend(keywidth = 2,
keyheight = 1,
override.aes = list(linetype = c(1, 1),
size = 1,
shape = c(0, 0)))) +
facet_wrap(~type)
Your ggplot code is a little bit messy, particularly for the ribbon. For example the fill aestetic is both mapped to the effect variable and set to a color value (#C0CCD9).
To remove the boxes in the legend key you need to use legend.key in theme but it works only after cleaning your ggplot code.
To avoid unnecessary repetitions I have moved severeal aestetics to the first ggplot call so that ggplot use them as default for the subsequent geom_XX calls.
ggplot(pdat1, aes(x = temp, y = value, group = effect)) +
geom_ribbon(aes(ymax = value_max, ymin = value_min), fill = "#C0CCD9", alpha = 0.5 ) +
geom_line(aes(color = effect)) +
geom_point(aes(color = effect), size = 0.5) +
ylab("Y") + xlab("X") +
guides(color = guide_legend(keywidth = 2, keyheight = 1,
override.aes = list(size = 1, shape = NA))) +
facet_wrap(~type) +
theme_bw() +
theme(legend.key = element_rect(fill = NA, color = NA))