Look of arrows in ggplot2 geom_segment() - r

I'm trying to make a plot with arrows in ggplot2 looking something like this, which was made using base R grapics. (colors are not important)
Using ggplot2:
library(ggplot2)
library(scales)
library(grid)
df3 <- structure(list(value1 = c(51L, 57L, 59L, 57L, 56L, 56L, 60L,
66L, 61L, 61L), value2 = c(56L, 60L, 66L, 61L, 61L, 59L, 61L,
66L, 63L, 63L), group = c("A", "B", "C", "D", "E", "A", "B",
"C", "D", "E"), time = c("1999", "1999", "1999", "1999", "1999",
"2004", "2004", "2004", "2004", "2004"), y_position = c(1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), .Names = c("value1", "value2",
"group", "time", "y_position"), row.names = c(NA, -10L), class = "data.frame")
ggplot( df3, aes( x = value1, y = y_position, group = time, color = time)) +
geom_segment( x = min(df3$value1, df3$value2), xend = max( df3$value1, df3$value2 ),
aes( yend = y_position), color = "lightgrey", size = 19) +
scale_y_continuous( labels = df3$group, breaks = df3$y_position) +
theme_classic() + theme( axis.line = element_blank(), axis.title = element_blank() ) +
geom_segment( aes( yend = y_position, xend = value2, color = time, group = time), size = 19, alpha = 0.9,
arrow = arrow(length = unit(40, "points"),type = "closed", angle = 40) )
I get this:
The problem is that the arrows look wrong (in that they don't look like the first plot). Using geom_segment() is not important.
This question may give the answer but I was hoping for something less hacky:
Specifying gpar settings for grid arrows in R

update: ggplot2 v2.1.0.9001
If the plot is in your current window you can edit the shape of the arrow directly with
grid.force()
# change shape of arrows
grid.gedit("segments", gp=gpar(linejoin ='mitre'))
# change the shape in legend also
grid.gedit("layout", gp=gpar(linejoin ='mitre'))
If the plot is in your current window you can edit the shape of the arrow directly with
grid.gedit("segments", gp=gpar(linejoin ='mitre'))
ggplot now seems to have changed the legend key to an arrow shape, so if you want to change the shape of these as well, you can do this across the full plot with
grid.gedit("gTableParent", gp=gpar(linejoin ='mitre'))
original answer
Not less hacky, but perhaps easier?? You can edit the grobs returned by ggplotGrob.
If p is your plot:
g <- ggplotGrob(p)
idx <- grep("panel", g$layout$name)
nms <- sapply(g$grobs[[idx]]$children[[3]]$children , '[[', "name")
for(i in nms) {
g$grobs[[idx]]$children[[3]] <-
editGrob(g$grobs[[idx]]$children[[3]], nms[i],
gp=gpar(linejoin ='mitre'), grep=TRUE)
}
grid.newpage()
grid.draw(g)

The challenge seems to be that the arrow constructor from the grid package gets messed up if size is invoked in the geom_segment block.
so
p <- ggplot(df3) + coord_flip()
p1 <- p + geom_bar(aes(x=group,y=max(c(value1,value2))*1.1),width=0.2, stat="identity",position="identity",alpha=0.2)
df1<-filter(df3,time=="1999")
p1 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2),color="blue",size=8,arrow=arrow(angle=20,type="closed",ends="last",length=unit(1,"cm")))
looks ridiculous as you show. I tried the workaround of of separating the segment into just a fat segment and an arrow on a skinny segment (two layers) like so:
p2<-p1 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2), color="blue",arrow=arrow(angle=20,type="closed",ends="last",length=unit(1,"cm")))
p2 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2), color="blue",size=8)
but now the fat segment end is not mitred and so obscures the arrow.
Fixing the arrow parameter seems to be needed.

Related

How to implement stacked bar graph with a line chart in R

I have a dataset containing y variable as Year and x variables as (A, B, C(%)). I have attached the dataset here.
dput(result)
structure(list(Year = 2008:2021, A = c(4L, 22L, 31L, 48L, 54L,
61L, 49L, 56L, 59L, 85L, 72L, 58L, 92L, 89L), B = c(1L, 2L, 6L,
7L, 14L, 21L, 15L, 27L, 27L, 46L, 41L, 26L, 51L, 62L), C... = c(25,
9.09, 19.35, 14.58, 25.93, 34.43, 30.61, 48.21, 45.76, 54.12,
56.94, 44.83, 55.43, 69.66)), class = "data.frame", row.names = c(NA,
-14L))
The variables A and B will be plotted as stacked bar graph and the C will be plotted as line chart in the same plot. I have generated the plot using excel like below:
How can I create the same plot in R?
You first need to reshape longer, for example with pivot_longer() from tidyr, and then you can use ggplot2 to plot the bars and the line in two separate layers. The fill = argument in the geom_bar(aes()) lets you stratify each bar according to a categorical variable - name is created automatically by pivot_longer().
library(ggplot2)
library(tidyr)
dat |>
pivot_longer(A:B) |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = value, fill = name)) +
geom_line(aes(y = `C(%)`), size = 2)
Created on 2022-06-09 by the reprex package (v2.0.1)
You're asking for overlaid bars, in which case there's no need to pivot, and you can add separate layers. However I would argue that this could confuse or mislead many people - usually in stacked plots bars are stacked, not overlaid, so thread with caution!
library(ggplot2)
library(tidyr)
dat |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = A), fill = "lightgreen") +
geom_bar(stat = "identity", aes(y = B), fill = "red", alpha = 0.5) +
geom_line(aes(y = `C(%)`), size = 2) +
labs(y = "", caption = "NB: bars are overlaid, not stacked!")
Created on 2022-06-09 by the reprex package (v2.0.1)
I propose this:
library(data.table)
library(ggplot2)
library(ggthemes)
dt <- fread("dataset.csv")
dt.long <- melt(dt, id.vars = c("Year"))
dt.AB <- dt.long[variable %in% c("A", "B"), ]
dt.C <- copy(dt.long[variable == "C(%)", .(Year, variable, value = value * 3/2)])
ggplot(dt.AB, aes(x = Year, y = value, fill = variable), ) +
geom_bar(stat = "identity") +
geom_line(data=dt.C, colour='red', aes(x = Year, y = value)) +
scale_x_continuous(breaks = pretty(dt.AB$Year,
n = length(unique(dt.AB$Year)))) +
scale_y_continuous(
name = "A&B",
breaks = seq (0, 150, 10),
sec.axis = sec_axis(~.*2/3, name="C(%)", breaks = seq (0, 100, 10))
) + theme_hc() +
scale_fill_manual(values=c("grey70", "grey50", "grey30")) +
theme(
axis.line.y = element_line(colour = 'black', size=0.5,
linetype='solid'))

How to automatically adjust the width of each facet for facet_wrap?

I want to plot a boxplot using ggplot2, and i have more than one facet, each facet has different terms, as follows:
library(ggplot2)
p <- ggplot(
data=Data,
aes(x=trait,y=mean)
)
p <- p+facet_wrap(~SP,scales="free",nrow=1)
p <- p+geom_boxplot(aes(fill = Ref,
lower = mean - sd,
upper = mean + sd,
middle = mean,
ymin = min,
ymax = max,
width=c(rep(0.8/3,3),rep(0.8,9))),
lwd=0.5,
stat="identity")
as showed, the width of box in different facet is not the same, is there any way to adjust all the box at a same scale? I had tried to use facet_grid, it can automatically change the width of facets, but all facets share the same y axis.
Data
Data <- structure(list(SP = structure(c(3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("Human", "Cattle", "Horse", "Maize"
), class = "factor"), Ref = structure(c(3L, 2L, 1L, 3L, 3L, 3L,
2L, 2L, 2L, 1L, 1L, 1L), .Label = c("LMM", "Half", "Adoptive"
), class = "factor"), trait = structure(c(11L, 11L, 11L, 14L,
13L, 12L, 14L, 13L, 12L, 14L, 13L, 12L), .Label = c("cad", "ht",
"t2d", "bd", "cd", "ra", "t1d", "fpro", "mkg", "scs", "coat colour",
"ywk", "ssk", "gdd"), class = "factor"), min = c(0.324122039,
0.336486555, 0.073152049, 0.895455441, 0.849944623, 0.825248005,
0.890413591, 0.852385351, 0.826470308, 0.889139116, 0.838256672,
0.723753592), max = c(0.665536838, 0.678764774, 0.34033228, 0.919794865,
0.955018001, 0.899903826, 0.913350912, 0.957305688, 0.89843716,
0.911257005, 0.955312678, 0.817489555), mean = c(0.4919168555,
0.5360103372, 0.24320509565, 0.907436221, 0.9057516121, 0.8552899502,
0.9035394117, 0.9068819173, 0.8572309823, 0.90125638965, 0.90217769835,
0.7667208778), sd = c(0.0790133656517775, 0.09704320004497, 0.0767552215753863,
0.00611921020505611, 0.0339614482273291, 0.0199389195311925,
0.00598633573504195, 0.0332634006653858, 0.0196465508521771,
0.00592476494699222, 0.0348144156099722, 0.0271827880539459)), .Names = c("SP",
"Ref", "trait", "min", "max", "mean", "sd"), class = "data.frame", row.names = c(10L,
11L, 12L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L))
While u/z-lin's answer works, there is a far simpler solution. Switch from facet_wrap(...) to use facet_grid(...). With facet_grid, you don't need to specify rows and columns. You are still able to specify scales= (which allows automatic adjustment of axis scales for each facet if wanted), but you can also specify space=, which does the same thing, but with the scaling of the overall facet width. This is what you want. Your function call is now something like this:
ggplot(Data, aes(x = trait, y = mean)) +
geom_boxplot(aes(
fill = Ref, lower = mean-sd, upper = mean+sd, middle = mean,
ymin = min, ymax = max),
lwd = 0.5, stat = "identity") +
facet_grid(. ~ SP, scales = "free", space='free') +
scale_x_discrete(expand = c(0, 0.5)) +
theme_bw()
Some more description of layout of facets can be found here.
As #cdtip mentioned, this does not allow for independent y scales for each facet, which is what the OP asked for initially. Luckily, there is also a simple solution for this, which utilizes facet_row() from the ggforce package:
library(ggforce)
# same as above without facet_grid call..
p <- ggplot(Data, aes(x = trait, y = mean)) +
geom_boxplot(aes(
fill = Ref, lower = mean-sd, upper = mean+sd, middle = mean,
ymin = min, ymax = max),
lwd = 0.5, stat = "identity") +
scale_x_discrete(expand = c(0, 0.5)) +
theme_bw()
p + ggforce::facet_row(vars(SP), scales = 'free', space = 'free')
You can adjust facet widths after converting the ggplot object to a grob:
# create ggplot object (no need to manipulate boxplot width here.
# we'll adjust the facet width directly later)
p <- ggplot(Data,
aes(x = trait, y = mean)) +
geom_boxplot(aes(fill = Ref,
lower = mean - sd,
upper = mean + sd,
middle = mean,
ymin = min,
ymax = max),
lwd = 0.5,
stat = "identity") +
facet_wrap(~ SP, scales = "free", nrow = 1) +
scale_x_discrete(expand = c(0, 0.5)) + # change additive expansion from default 0.6 to 0.5
theme_bw()
# convert ggplot object to grob object
gp <- ggplotGrob(p)
# optional: take a look at the grob object's layout
gtable::gtable_show_layout(gp)
# get gtable columns corresponding to the facets (5 & 9, in this case)
facet.columns <- gp$layout$l[grepl("panel", gp$layout$name)]
# get the number of unique x-axis values per facet (1 & 3, in this case)
x.var <- sapply(ggplot_build(p)$layout$panel_scales_x,
function(l) length(l$range$range))
# change the relative widths of the facet columns based on
# how many unique x-axis values are in each facet
gp$widths[facet.columns] <- gp$widths[facet.columns] * x.var
# plot result
grid::grid.draw(gp)
In general, you can determine the width of a box plot in ggplot like so:
ggplot(data= df, aes(x = `some x`, y = `some y`)) + geom_boxplot(width = `some witdth`)
In your case, you might consider setting the width of all the box plots to the range of x divided by the maximum number of elements (in the leftmost figure).

Scaling data to secondary axis in R (ggplot2)

I'm trying to build a line chart with ggplot2 in which I would like to have 2 lines, each adapted to a different axis. I'm trying the following code (where df4 is my data frame):
p1 = ggplot(df4, mapping = aes(x=taxon, y=cov, group = 1, colour = "Coverage", xlab("Cover"))) +
geom_line() +
labs (x = "Taxon", y = "Coverage") +
geom_line(aes(y=depth, colour = "Depth")) +
theme(axis.text.x = element_text(angle = 75, hjust= 1, vjust = 1)) +
scale_colour_manual(values = c("navyblue", "green4")) +
scale_y_continuous(sec.axis = sec_axis(~./4, name = "Depth"))
With this, I am able to build a chart with 2 y-axis and 2 lines, but both lines are adapted to the primary y-axis (the secondary axis is there, but it's useless). Is there maybe a parameter with which I can ask my data to follow this axis?
Blue line values only go until 1, so they should be adapted to the secondary axis
This is an example of my data:
structure(list(taxon = structure(c(80L, 57L, 74L, 32L, 1L, 3L,
41L, 9L, 70L, 12L), .Label = c("c__Tremellomycetes", "f__Listeriaceae",
"f__Saccharomycetaceae", "g__Escherichia", "g__Klebsiella", "g__Pseudomonas",
"g__Saccharomyces", "g__Salmonella", "g__Staphylococcus", "s__Bacillus_amyloliquefaciens",
"s__Bacillus_phage_phi105", "s__Bacillus_siamensis", "s__Bacillus_sp_JS",
"s__Bacillus_subtilis", "s__Bacillus_vallismortis", "s__Citrobacter_sp_30_2",
"s__Cronobacter_phage_ENT47670", "s__Enterobacter_cancerogenus",
"s__Enterobacteria_phage_BP_4795", "s__Enterobacteria_phage_cdtI",
"s__Enterobacteria_phage_ES18", "s__Enterobacteria_phage_fiAA91_ss",
"s__Enterobacteria_phage_HK629", "s__Enterobacteria_phage_IME10",
"s__Enterobacteria_phage_lambda", "s__Enterobacteria_phage_mEp237",
"s__Enterobacteria_phage_mEp460", "s__Enterobacteria_phage_Min27",
"s__Enterobacteria_phage_P22", "s__Enterobacteria_phage_YYZ_2008",
"s__Enterococcus_faecalis", "s__Enterococcus_gilvus", "s__Enterococcus_phage_phiEf11",
"s__Enterococcus_phage_phiFL1A", "s__Enterococcus_phage_phiFL3A",
"s__Escherichia_coli", "s__Escherichia_phage_HK639", "s__Escherichia_phage_P13374",
"s__Lactobacillus_fermentum", "s__Listeria_innocua", "s__Listeria_ivanovii",
"s__Listeria_marthii", "s__Listeria_monocytogenes", "s__Listeria_phage_2389",
"s__Listeria_phage_A118", "s__Listeria_phage_A500", "s__Paenibacillus_sp_ICGEB2008",
"s__Phage_Gifsy_1", "s__Phage_Gifsy_2", "s__Pseudomonas_aeruginosa",
"s__Pseudomonas_mendocina", "s__Pseudomonas_phage_B3", "s__Pseudomonas_phage_D3",
"s__Pseudomonas_phage_DMS3", "s__Pseudomonas_phage_F10", "s__Pseudomonas_phage_F116",
"s__Pseudomonas_phage_PAJU2", "s__Pseudomonas_phage_Pf1", "s__Pseudomonas_phage_phi297",
"s__Pseudomonas_sp_2_1_26", "s__Pseudomonas_sp_P179", "s__Salmonella_enterica",
"s__Salmonella_phage_Fels_1", "s__Salmonella_phage_Fels_2", "s__Salmonella_phage_SETP13",
"s__Salmonella_phage_ST64B", "s__Shigella_phage_Sf6", "s__Staphylococcus_aureus",
"s__Staphylococcus_phage_42E", "s__Staphylococcus_phage_55",
"s__Staphylococcus_phage_80alpha", "s__Staphylococcus_phage_P954",
"s__Staphylococcus_phage_phi2958PVL", "s__Staphylococcus_phage_phiMR25",
"s__Staphylococcus_phage_phiN315", "s__Staphylococcus_phage_phiNM3",
"s__Staphylococcus_phage_phiPVL_CN125", "s__Staphylococcus_phage_phiPVL108",
"s__Staphylococcus_phage_PT1028", "s__Staphylococcus_phage_StauST398_1",
"s__Staphylococcus_phage_StauST398_3", "s__Staphylococcus_prophage_phiPV83",
"s__Stx2_converting_phage_1717", "s__Stx2_converting_phage_86"
), class = "factor"), cov = c(0.987654320987654, 0.99685534591195,
0.994535519125683, 0.147003745318352, 0.390923694779116, 0.92831541218638,
0.99079754601227, 0.993055555555556, 0.497512437810945, 0.58144695960941
), depth = c(1.68148148148148, 0.99685534591195, 0.994535519125683,
0.147003745318352, 0.390923694779116, 0.92831541218638, 0.99079754601227,
1.34722222222222, 0.497512437810945, 0.58144695960941)), .Names = c("taxon",
"cov", "depth"), row.names = c(40L, 10L, 58L, 44L, 7L, 55L, 29L,
13L, 2L, 53L), class = "data.frame")
You just need to multiply the 'depth' geom_line with 4 :
ggplot(df4, mapping = aes(x=taxon, y=cov, group = 1, colour = "Coverage", xlab("Cover"))) +
geom_line() +
labs (x = "Taxon", y = "Coverage") +
geom_line(aes(y=depth * 4, colour = "Depth")) +
theme(axis.text.x = element_text(angle = 75, hjust= 1, vjust = 1)) +
scale_colour_manual(values = c("navyblue", "green4")) +
scale_y_continuous(sec.axis = sec_axis(~./4, name = "Depth"))

Is it possible to put space between stacks in ggplot2 stacked bar?

I took this example from here:
DF <- read.table(text="Rank F1 F2 F3
1 500 250 50
2 400 100 30
3 300 155 100
4 200 90 10", header=TRUE)
library(reshape2)
DF1 <- melt(DF, id.var="Rank")
library(ggplot2)
ggplot(DF1, aes(x = Rank, y = value, fill = variable)) +
geom_bar(stat = "identity")
Is it possible to create a stacked bar such as the following graph using ggplot2? I do not want to differentiate stacks by different colors.
EDIT: Based on Pascal's comments,
ggplot(DF1, aes(x = Rank, y = value)) +
geom_bar(stat = "identity",lwd=2, color="white")
I still have the white borders for the bars.
This is the closest I could get to your example figure. It is not much of an improvement beyond what you've already sorted but puts less of an emphasis on the white bar borders on the grey background.
library(ggplot2)
p <- ggplot(DF1, aes(x = Rank, y = value, group = variable))
p <- p + geom_bar(stat = "identity", position = "stack", lwd = 1.5,
width = 0.5, colour = "white", fill = "black")
p <- p + theme_classic()
p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
p
That produces:
If you want to keep the grey background you can find out exactly what shade of grey it is and use that colour for the line while removing the background grids (this is not the right shade).
p <- ggplot(DF1, aes(x = Rank, y = value))
p <- p + geom_bar(stat = "identity", position = "stack", lwd = 1.5,
width = 0.5, colour = "grey", fill = "black")
p <- p + theme(panel.grid = element_blank())
p
An issue with this solution is that very small groups will not be seen (e.g., when Rank = 4 variable F3 = 10; this small value is completely covered by the white bar outline).
Your sample data:
DF1 <- structure(list(Rank = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), variable = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L), .Label = c("F1", "F2", "F3"), class = "factor"),
value = c(500L, 400L, 300L, 200L, 250L, 100L, 155L, 90L,
50L, 30L, 100L, 10L)), row.names = c(NA, -12L), .Names = c("Rank",
"variable", "value"), class = "data.frame")

Unable to customize legend in ggplot

I am plotting yearly demand using ggplot (my code below) but I am not able to put color legend for the plot. My data.frame has "Zone" and "TotalDemand" (only 2 columns) and I have three data.frames for three years ("sales12", "sales13" and "sales14").
ggplot() +
geom_point(data=sales12, aes(x=factor(Zone), y=TotalDemand/1000),
color='green',size=6, shape=17) +
geom_point(data=sales13, aes(x=factor(Zone), y=TotalDemand/1000),
color='red',size=6, shape=18)+
geom_point(data=sales14, aes(x=factor(Zone), y=TotalDemand/1000),
color='black',size=4, shape=19) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(name = 'the colour',
values = c('green'='green', 'black'='black', 'red'='red'),
labels = c('12','13','14'))
Please help me to identify my mistake.
With a very small example data frame, df, I melted it to format it for ggplot.
dput(df)
structure(list(Zone = structure(1:4, .Label = c("Alpha", "Baker",
"Charlie", "Delta"), class = "factor"), TotalDemand = c(90L,
180L, 57L, 159L), sales12 = c(25L, 40L, 13L, 50L), sales13 = c(30L,
60L, 16L, 55L), sales14 = c(35L, 80L, 28L, 54L)), .Names = c("Zone",
"TotalDemand", "sales12", "sales13", "sales14"), class = "data.frame", row.names = c(NA,
-4L))
df.m <- melt(df, id.vars = "Zone", measure.vars = c("sales12", "sales13", "sales14"))
ggplot(df.m, aes(x=factor(Zone), y=value, color = variable )) +
geom_point(size=6, shape=17) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(values = c('green', 'black', 'red'))
You can adjust size and shape and colors of your points, add a title, etc.. Your legend can also be positioned on the bottom, for example.

Resources