I have a dataset containing y variable as Year and x variables as (A, B, C(%)). I have attached the dataset here.
dput(result)
structure(list(Year = 2008:2021, A = c(4L, 22L, 31L, 48L, 54L,
61L, 49L, 56L, 59L, 85L, 72L, 58L, 92L, 89L), B = c(1L, 2L, 6L,
7L, 14L, 21L, 15L, 27L, 27L, 46L, 41L, 26L, 51L, 62L), C... = c(25,
9.09, 19.35, 14.58, 25.93, 34.43, 30.61, 48.21, 45.76, 54.12,
56.94, 44.83, 55.43, 69.66)), class = "data.frame", row.names = c(NA,
-14L))
The variables A and B will be plotted as stacked bar graph and the C will be plotted as line chart in the same plot. I have generated the plot using excel like below:
How can I create the same plot in R?
You first need to reshape longer, for example with pivot_longer() from tidyr, and then you can use ggplot2 to plot the bars and the line in two separate layers. The fill = argument in the geom_bar(aes()) lets you stratify each bar according to a categorical variable - name is created automatically by pivot_longer().
library(ggplot2)
library(tidyr)
dat |>
pivot_longer(A:B) |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = value, fill = name)) +
geom_line(aes(y = `C(%)`), size = 2)
Created on 2022-06-09 by the reprex package (v2.0.1)
You're asking for overlaid bars, in which case there's no need to pivot, and you can add separate layers. However I would argue that this could confuse or mislead many people - usually in stacked plots bars are stacked, not overlaid, so thread with caution!
library(ggplot2)
library(tidyr)
dat |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = A), fill = "lightgreen") +
geom_bar(stat = "identity", aes(y = B), fill = "red", alpha = 0.5) +
geom_line(aes(y = `C(%)`), size = 2) +
labs(y = "", caption = "NB: bars are overlaid, not stacked!")
Created on 2022-06-09 by the reprex package (v2.0.1)
I propose this:
library(data.table)
library(ggplot2)
library(ggthemes)
dt <- fread("dataset.csv")
dt.long <- melt(dt, id.vars = c("Year"))
dt.AB <- dt.long[variable %in% c("A", "B"), ]
dt.C <- copy(dt.long[variable == "C(%)", .(Year, variable, value = value * 3/2)])
ggplot(dt.AB, aes(x = Year, y = value, fill = variable), ) +
geom_bar(stat = "identity") +
geom_line(data=dt.C, colour='red', aes(x = Year, y = value)) +
scale_x_continuous(breaks = pretty(dt.AB$Year,
n = length(unique(dt.AB$Year)))) +
scale_y_continuous(
name = "A&B",
breaks = seq (0, 150, 10),
sec.axis = sec_axis(~.*2/3, name="C(%)", breaks = seq (0, 100, 10))
) + theme_hc() +
scale_fill_manual(values=c("grey70", "grey50", "grey30")) +
theme(
axis.line.y = element_line(colour = 'black', size=0.5,
linetype='solid'))
Related
I am trying to overlay a line plot onto a barplot. I can plot both separately :
##plot sites
ggplot(graph, aes(x = Month, y = Anopheles_pos))
+ geom_col(size = 1, color = "darkblue", fill = "white")
##plot line
ggplot(graph, aes(x = Month, y = Mean_EVI)) +
geom_line(size = 1.5, color = "blue", group = 1)
However when I try to plot the line onto the barplot, it is a flat line at the bottom. I tried to deal with the issue by fixing the second y axis (on the right) to be the same scale as the line, but this has not fixed how the line plots.
##plot together
ggplot(graph) +
geom_col(aes( x = factor(Month, levels = month.name), y = Anopheles_pos), size = 1,
color = "darkblue", fill = "white") +
geom_line(aes(x = factor(Month, levels = month.name), y = Mean_EVI), size = 1.5,
color = "red", group = 1) +
scale_y_continuous(sec.axis = sec_axis(~./50, name = "Mean_EVI"))
One small other issue is I can't figure out how to make the x axis 0-100 as the Anopheles_pos values are percentages.
Thanks in advance!!
DATA:
Mean_EVI : c(0.5687068, 0.5663895, 0.5653846, 0.6504931, 0.584727, 0.5799395,
0.617363, 0.581645, 0.6190386, 0.5208025, 0.6097692, 0.5689)
Anopheles_pos : c(33L, 42L, 38L, 31L, 54L, 47L, 22L, 15L, 2L, 15L, 12L, 19L)
You need to scale up your Mean_EVI values by 50 to match the ./50 part of your sec.axis call.
Mean_EVI <- c(0.6190386, 0.5208025, 0.6097692, 0.5689, 0.5687068, 0.5663895, 0.5653846, 0.6504931, 0.584727, 0.5799395, 0.617363, 0.581645)
Anopheles_pos <- c(2L, 15L, 12L, 19L, 33L, 42L, 38L, 31L, 54L, 47L, 22L, 15L)
graph <- data.frame(Mean_EVI, Anopheles_pos, Month = 1:12)
ggplot(graph) +
geom_col(aes(x = factor(Month, labels = month.name), y = Anopheles_pos), size = 1,
color = "darkblue", fill = "white") +
geom_line(aes(x = factor(Month, labels = month.name), y = Mean_EVI*50), size = 1.5,
color = "red", group = 1) +
scale_y_continuous(sec.axis = sec_axis(~./50, name = "Mean_EVI")) +
coord_cartesian(ylim = c(0, 100))
Given a dataframe as follows:
df <- structure(list(date = structure(c(1L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 2L, 3L, 4L, 13L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 14L, 15L, 16L, 25L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
26L, 27L, 28L), .Label = c("2010/1/31", "2010/10/31", "2010/11/30",
"2010/12/31", "2010/2/28", "2010/3/31", "2010/4/30", "2010/5/31",
"2010/6/30", "2010/7/31", "2010/8/31", "2010/9/30", "2011/1/31",
"2011/10/31", "2011/11/30", "2011/12/31", "2011/2/28", "2011/3/31",
"2011/4/30", "2011/5/31", "2011/6/30", "2011/7/31", "2011/8/31",
"2011/9/30", "2012/1/31", "2012/10/31", "2012/11/30", "2012/12/31",
"2012/2/29", "2012/3/31", "2012/4/30", "2012/5/31", "2012/6/30",
"2012/7/31", "2012/8/31", "2012/9/30"), class = "factor"), pct = c(14,
17.9, 17.9, 18.1, 18.2, 18.2, 18.2, 18.2, 18.3, 18.3, 18.4, 18.8,
19.9, 15.8, 16.34, 16.5, 16.6, 16.8, 16.8, 16.9, 17, 17, 17,
18.5, 13.1, 14.7, 14.8, 14.7, 14.5, 14.4, 14.2, 14.1, 14.1, 14.1,
14.2, 14.5), values = c(12718.1, 25052.3, 36374, 47884.4, 60339.5,
72669.4, 84922.2, 97492, 111028.5, 125313.3, 139224.2, 154553.7,
15249, 29018.1, 42921.8, 56570.8, 71267.6, 85832.7, 100240.7,
114945.7, 130810.8, 147357.2, 163486.1, 181225.8, 17222.1, 33668.6,
49318.8, 64921.9, 81636.7, 98221.6, 114536.5, 131195.4, 149422,
168355.8, 186832.5, 207166.7)), class = "data.frame", row.names = c(NA,
-36L))
I have plotted it with the following code:
df$date <- as.Date(df$date, format = "%Y/%m/%d")
df_m <- melt(df, id.vars='date')
df_m_x <- df_m %>%
filter(variable %in% c("values"))
df_m_ratio_x <- df_m %>%
filter(variable %in% c("pct")) %>%
mutate(value = value * 10000)
coeff = 1/10000
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable, group = 1), alpha = 0.5, stat = 'identity') +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_date(limits = c(min(df$date), max(df$date)), breaks = date_breaks("6 months"), date_labels = "%Y-%m") +
geom_smooth(method="lm")
Out:
But as you may notice, the date in the x axis are missaligned by one month in the figure.
How could I solve this problem? Thanks.
The issue appears to be differences in how binning occurs between geom_bar and geom_point when you set the limits manually in scale_x_date. Perhaps omitting that would be acceptable:
library(ggplot2)
library(scales)
coeff = 1/10000
ggplot(data = df, aes(x = as.Date(date, format = "%Y/%m/%d"))) +
geom_bar(aes(y = values), alpha = 0.5, stat = 'identity', fill = "#F8766D") +
geom_point(aes(y = pct * 1/coeff), size = 3, color = "#F8766D") +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_date(date_breaks= "6 months", date_labels = "%Y-%m", name = "date")
The reason that the bars appear to be "off" is because the bars are actually plotted slightly before the breaks. Here is a blown up version:
An alternative might be to use the yearmon format from the zoo package:
library(zoo)
coeff = 1/10000
ggplot(data = df, aes(x = as.yearmon(date, format = "%Y/%m/%d"))) +
geom_bar(aes(y = values), alpha = 0.5, stat = 'identity', fill = "#F8766D") +
geom_point(aes(y = pct * 1/coeff), size = 3, color = "#F8766D") +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_yearmon(format = "%Y-%m", name = "date")
I am not sure if you have noticed. In OP, limits = c(min(df$date), max(df$date)) might have removed two observations, the first month and the last month on your bar chart.
I generated a marker for month from 1 to 36 over 3 years to show the problem:
df_m_x$month = c(1:36)
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable, group = 1), alpha = 0.5, stat = 'identity') +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_date(
limits = c(min(df$date), max(df$date)),
breaks = date_breaks("6 months"), date_labels = "%Y-%m") +
geom_smooth(method="lm") +
geom_text(data= df_m_x, aes(x = date, y = value, label = month))
Remove limits...,
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable, group = 1), alpha = 0.5, stat = 'identity') +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_date(
# limits = c(min(df$date), max(df$date)),
breaks = date_breaks("6 months"), date_labels = "%Y-%m") +
geom_smooth(method="lm") +
geom_text(data= df_m_x, aes(x = date, y = value, label = month))
I'm trying to build a line chart with ggplot2 in which I would like to have 2 lines, each adapted to a different axis. I'm trying the following code (where df4 is my data frame):
p1 = ggplot(df4, mapping = aes(x=taxon, y=cov, group = 1, colour = "Coverage", xlab("Cover"))) +
geom_line() +
labs (x = "Taxon", y = "Coverage") +
geom_line(aes(y=depth, colour = "Depth")) +
theme(axis.text.x = element_text(angle = 75, hjust= 1, vjust = 1)) +
scale_colour_manual(values = c("navyblue", "green4")) +
scale_y_continuous(sec.axis = sec_axis(~./4, name = "Depth"))
With this, I am able to build a chart with 2 y-axis and 2 lines, but both lines are adapted to the primary y-axis (the secondary axis is there, but it's useless). Is there maybe a parameter with which I can ask my data to follow this axis?
Blue line values only go until 1, so they should be adapted to the secondary axis
This is an example of my data:
structure(list(taxon = structure(c(80L, 57L, 74L, 32L, 1L, 3L,
41L, 9L, 70L, 12L), .Label = c("c__Tremellomycetes", "f__Listeriaceae",
"f__Saccharomycetaceae", "g__Escherichia", "g__Klebsiella", "g__Pseudomonas",
"g__Saccharomyces", "g__Salmonella", "g__Staphylococcus", "s__Bacillus_amyloliquefaciens",
"s__Bacillus_phage_phi105", "s__Bacillus_siamensis", "s__Bacillus_sp_JS",
"s__Bacillus_subtilis", "s__Bacillus_vallismortis", "s__Citrobacter_sp_30_2",
"s__Cronobacter_phage_ENT47670", "s__Enterobacter_cancerogenus",
"s__Enterobacteria_phage_BP_4795", "s__Enterobacteria_phage_cdtI",
"s__Enterobacteria_phage_ES18", "s__Enterobacteria_phage_fiAA91_ss",
"s__Enterobacteria_phage_HK629", "s__Enterobacteria_phage_IME10",
"s__Enterobacteria_phage_lambda", "s__Enterobacteria_phage_mEp237",
"s__Enterobacteria_phage_mEp460", "s__Enterobacteria_phage_Min27",
"s__Enterobacteria_phage_P22", "s__Enterobacteria_phage_YYZ_2008",
"s__Enterococcus_faecalis", "s__Enterococcus_gilvus", "s__Enterococcus_phage_phiEf11",
"s__Enterococcus_phage_phiFL1A", "s__Enterococcus_phage_phiFL3A",
"s__Escherichia_coli", "s__Escherichia_phage_HK639", "s__Escherichia_phage_P13374",
"s__Lactobacillus_fermentum", "s__Listeria_innocua", "s__Listeria_ivanovii",
"s__Listeria_marthii", "s__Listeria_monocytogenes", "s__Listeria_phage_2389",
"s__Listeria_phage_A118", "s__Listeria_phage_A500", "s__Paenibacillus_sp_ICGEB2008",
"s__Phage_Gifsy_1", "s__Phage_Gifsy_2", "s__Pseudomonas_aeruginosa",
"s__Pseudomonas_mendocina", "s__Pseudomonas_phage_B3", "s__Pseudomonas_phage_D3",
"s__Pseudomonas_phage_DMS3", "s__Pseudomonas_phage_F10", "s__Pseudomonas_phage_F116",
"s__Pseudomonas_phage_PAJU2", "s__Pseudomonas_phage_Pf1", "s__Pseudomonas_phage_phi297",
"s__Pseudomonas_sp_2_1_26", "s__Pseudomonas_sp_P179", "s__Salmonella_enterica",
"s__Salmonella_phage_Fels_1", "s__Salmonella_phage_Fels_2", "s__Salmonella_phage_SETP13",
"s__Salmonella_phage_ST64B", "s__Shigella_phage_Sf6", "s__Staphylococcus_aureus",
"s__Staphylococcus_phage_42E", "s__Staphylococcus_phage_55",
"s__Staphylococcus_phage_80alpha", "s__Staphylococcus_phage_P954",
"s__Staphylococcus_phage_phi2958PVL", "s__Staphylococcus_phage_phiMR25",
"s__Staphylococcus_phage_phiN315", "s__Staphylococcus_phage_phiNM3",
"s__Staphylococcus_phage_phiPVL_CN125", "s__Staphylococcus_phage_phiPVL108",
"s__Staphylococcus_phage_PT1028", "s__Staphylococcus_phage_StauST398_1",
"s__Staphylococcus_phage_StauST398_3", "s__Staphylococcus_prophage_phiPV83",
"s__Stx2_converting_phage_1717", "s__Stx2_converting_phage_86"
), class = "factor"), cov = c(0.987654320987654, 0.99685534591195,
0.994535519125683, 0.147003745318352, 0.390923694779116, 0.92831541218638,
0.99079754601227, 0.993055555555556, 0.497512437810945, 0.58144695960941
), depth = c(1.68148148148148, 0.99685534591195, 0.994535519125683,
0.147003745318352, 0.390923694779116, 0.92831541218638, 0.99079754601227,
1.34722222222222, 0.497512437810945, 0.58144695960941)), .Names = c("taxon",
"cov", "depth"), row.names = c(40L, 10L, 58L, 44L, 7L, 55L, 29L,
13L, 2L, 53L), class = "data.frame")
You just need to multiply the 'depth' geom_line with 4 :
ggplot(df4, mapping = aes(x=taxon, y=cov, group = 1, colour = "Coverage", xlab("Cover"))) +
geom_line() +
labs (x = "Taxon", y = "Coverage") +
geom_line(aes(y=depth * 4, colour = "Depth")) +
theme(axis.text.x = element_text(angle = 75, hjust= 1, vjust = 1)) +
scale_colour_manual(values = c("navyblue", "green4")) +
scale_y_continuous(sec.axis = sec_axis(~./4, name = "Depth"))
I am plotting yearly demand using ggplot (my code below) but I am not able to put color legend for the plot. My data.frame has "Zone" and "TotalDemand" (only 2 columns) and I have three data.frames for three years ("sales12", "sales13" and "sales14").
ggplot() +
geom_point(data=sales12, aes(x=factor(Zone), y=TotalDemand/1000),
color='green',size=6, shape=17) +
geom_point(data=sales13, aes(x=factor(Zone), y=TotalDemand/1000),
color='red',size=6, shape=18)+
geom_point(data=sales14, aes(x=factor(Zone), y=TotalDemand/1000),
color='black',size=4, shape=19) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(name = 'the colour',
values = c('green'='green', 'black'='black', 'red'='red'),
labels = c('12','13','14'))
Please help me to identify my mistake.
With a very small example data frame, df, I melted it to format it for ggplot.
dput(df)
structure(list(Zone = structure(1:4, .Label = c("Alpha", "Baker",
"Charlie", "Delta"), class = "factor"), TotalDemand = c(90L,
180L, 57L, 159L), sales12 = c(25L, 40L, 13L, 50L), sales13 = c(30L,
60L, 16L, 55L), sales14 = c(35L, 80L, 28L, 54L)), .Names = c("Zone",
"TotalDemand", "sales12", "sales13", "sales14"), class = "data.frame", row.names = c(NA,
-4L))
df.m <- melt(df, id.vars = "Zone", measure.vars = c("sales12", "sales13", "sales14"))
ggplot(df.m, aes(x=factor(Zone), y=value, color = variable )) +
geom_point(size=6, shape=17) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(values = c('green', 'black', 'red'))
You can adjust size and shape and colors of your points, add a title, etc.. Your legend can also be positioned on the bottom, for example.
I'm trying to make a plot with arrows in ggplot2 looking something like this, which was made using base R grapics. (colors are not important)
Using ggplot2:
library(ggplot2)
library(scales)
library(grid)
df3 <- structure(list(value1 = c(51L, 57L, 59L, 57L, 56L, 56L, 60L,
66L, 61L, 61L), value2 = c(56L, 60L, 66L, 61L, 61L, 59L, 61L,
66L, 63L, 63L), group = c("A", "B", "C", "D", "E", "A", "B",
"C", "D", "E"), time = c("1999", "1999", "1999", "1999", "1999",
"2004", "2004", "2004", "2004", "2004"), y_position = c(1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), .Names = c("value1", "value2",
"group", "time", "y_position"), row.names = c(NA, -10L), class = "data.frame")
ggplot( df3, aes( x = value1, y = y_position, group = time, color = time)) +
geom_segment( x = min(df3$value1, df3$value2), xend = max( df3$value1, df3$value2 ),
aes( yend = y_position), color = "lightgrey", size = 19) +
scale_y_continuous( labels = df3$group, breaks = df3$y_position) +
theme_classic() + theme( axis.line = element_blank(), axis.title = element_blank() ) +
geom_segment( aes( yend = y_position, xend = value2, color = time, group = time), size = 19, alpha = 0.9,
arrow = arrow(length = unit(40, "points"),type = "closed", angle = 40) )
I get this:
The problem is that the arrows look wrong (in that they don't look like the first plot). Using geom_segment() is not important.
This question may give the answer but I was hoping for something less hacky:
Specifying gpar settings for grid arrows in R
update: ggplot2 v2.1.0.9001
If the plot is in your current window you can edit the shape of the arrow directly with
grid.force()
# change shape of arrows
grid.gedit("segments", gp=gpar(linejoin ='mitre'))
# change the shape in legend also
grid.gedit("layout", gp=gpar(linejoin ='mitre'))
If the plot is in your current window you can edit the shape of the arrow directly with
grid.gedit("segments", gp=gpar(linejoin ='mitre'))
ggplot now seems to have changed the legend key to an arrow shape, so if you want to change the shape of these as well, you can do this across the full plot with
grid.gedit("gTableParent", gp=gpar(linejoin ='mitre'))
original answer
Not less hacky, but perhaps easier?? You can edit the grobs returned by ggplotGrob.
If p is your plot:
g <- ggplotGrob(p)
idx <- grep("panel", g$layout$name)
nms <- sapply(g$grobs[[idx]]$children[[3]]$children , '[[', "name")
for(i in nms) {
g$grobs[[idx]]$children[[3]] <-
editGrob(g$grobs[[idx]]$children[[3]], nms[i],
gp=gpar(linejoin ='mitre'), grep=TRUE)
}
grid.newpage()
grid.draw(g)
The challenge seems to be that the arrow constructor from the grid package gets messed up if size is invoked in the geom_segment block.
so
p <- ggplot(df3) + coord_flip()
p1 <- p + geom_bar(aes(x=group,y=max(c(value1,value2))*1.1),width=0.2, stat="identity",position="identity",alpha=0.2)
df1<-filter(df3,time=="1999")
p1 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2),color="blue",size=8,arrow=arrow(angle=20,type="closed",ends="last",length=unit(1,"cm")))
looks ridiculous as you show. I tried the workaround of of separating the segment into just a fat segment and an arrow on a skinny segment (two layers) like so:
p2<-p1 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2), color="blue",arrow=arrow(angle=20,type="closed",ends="last",length=unit(1,"cm")))
p2 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2), color="blue",size=8)
but now the fat segment end is not mitred and so obscures the arrow.
Fixing the arrow parameter seems to be needed.