Unable to customize legend in ggplot - r

I am plotting yearly demand using ggplot (my code below) but I am not able to put color legend for the plot. My data.frame has "Zone" and "TotalDemand" (only 2 columns) and I have three data.frames for three years ("sales12", "sales13" and "sales14").
ggplot() +
geom_point(data=sales12, aes(x=factor(Zone), y=TotalDemand/1000),
color='green',size=6, shape=17) +
geom_point(data=sales13, aes(x=factor(Zone), y=TotalDemand/1000),
color='red',size=6, shape=18)+
geom_point(data=sales14, aes(x=factor(Zone), y=TotalDemand/1000),
color='black',size=4, shape=19) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(name = 'the colour',
values = c('green'='green', 'black'='black', 'red'='red'),
labels = c('12','13','14'))
Please help me to identify my mistake.

With a very small example data frame, df, I melted it to format it for ggplot.
dput(df)
structure(list(Zone = structure(1:4, .Label = c("Alpha", "Baker",
"Charlie", "Delta"), class = "factor"), TotalDemand = c(90L,
180L, 57L, 159L), sales12 = c(25L, 40L, 13L, 50L), sales13 = c(30L,
60L, 16L, 55L), sales14 = c(35L, 80L, 28L, 54L)), .Names = c("Zone",
"TotalDemand", "sales12", "sales13", "sales14"), class = "data.frame", row.names = c(NA,
-4L))
df.m <- melt(df, id.vars = "Zone", measure.vars = c("sales12", "sales13", "sales14"))
ggplot(df.m, aes(x=factor(Zone), y=value, color = variable )) +
geom_point(size=6, shape=17) +
labs(y='Demand (in 1000s)',x='Zones') +
scale_colour_manual(values = c('green', 'black', 'red'))
You can adjust size and shape and colors of your points, add a title, etc.. Your legend can also be positioned on the bottom, for example.

Related

How to implement stacked bar graph with a line chart in R

I have a dataset containing y variable as Year and x variables as (A, B, C(%)). I have attached the dataset here.
dput(result)
structure(list(Year = 2008:2021, A = c(4L, 22L, 31L, 48L, 54L,
61L, 49L, 56L, 59L, 85L, 72L, 58L, 92L, 89L), B = c(1L, 2L, 6L,
7L, 14L, 21L, 15L, 27L, 27L, 46L, 41L, 26L, 51L, 62L), C... = c(25,
9.09, 19.35, 14.58, 25.93, 34.43, 30.61, 48.21, 45.76, 54.12,
56.94, 44.83, 55.43, 69.66)), class = "data.frame", row.names = c(NA,
-14L))
The variables A and B will be plotted as stacked bar graph and the C will be plotted as line chart in the same plot. I have generated the plot using excel like below:
How can I create the same plot in R?
You first need to reshape longer, for example with pivot_longer() from tidyr, and then you can use ggplot2 to plot the bars and the line in two separate layers. The fill = argument in the geom_bar(aes()) lets you stratify each bar according to a categorical variable - name is created automatically by pivot_longer().
library(ggplot2)
library(tidyr)
dat |>
pivot_longer(A:B) |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = value, fill = name)) +
geom_line(aes(y = `C(%)`), size = 2)
Created on 2022-06-09 by the reprex package (v2.0.1)
You're asking for overlaid bars, in which case there's no need to pivot, and you can add separate layers. However I would argue that this could confuse or mislead many people - usually in stacked plots bars are stacked, not overlaid, so thread with caution!
library(ggplot2)
library(tidyr)
dat |>
ggplot(aes(x = Year)) +
geom_bar(stat = "identity", aes(y = A), fill = "lightgreen") +
geom_bar(stat = "identity", aes(y = B), fill = "red", alpha = 0.5) +
geom_line(aes(y = `C(%)`), size = 2) +
labs(y = "", caption = "NB: bars are overlaid, not stacked!")
Created on 2022-06-09 by the reprex package (v2.0.1)
I propose this:
library(data.table)
library(ggplot2)
library(ggthemes)
dt <- fread("dataset.csv")
dt.long <- melt(dt, id.vars = c("Year"))
dt.AB <- dt.long[variable %in% c("A", "B"), ]
dt.C <- copy(dt.long[variable == "C(%)", .(Year, variable, value = value * 3/2)])
ggplot(dt.AB, aes(x = Year, y = value, fill = variable), ) +
geom_bar(stat = "identity") +
geom_line(data=dt.C, colour='red', aes(x = Year, y = value)) +
scale_x_continuous(breaks = pretty(dt.AB$Year,
n = length(unique(dt.AB$Year)))) +
scale_y_continuous(
name = "A&B",
breaks = seq (0, 150, 10),
sec.axis = sec_axis(~.*2/3, name="C(%)", breaks = seq (0, 100, 10))
) + theme_hc() +
scale_fill_manual(values=c("grey70", "grey50", "grey30")) +
theme(
axis.line.y = element_line(colour = 'black', size=0.5,
linetype='solid'))

Looking to solve problem with facet_wrap/facet_grid that isn't resolved with space = "free"

I am making a horizontal bar graph with multiple different facets (y axis is categorical variable, x-axis is percentages). I am running into the problem that if I use facet_wrap() the bar widths are different in each of the facets (with weird formatting if I add space="free"). I learned this problem can be solved with facet_grid() but then I have the problem of not having numbered x-axises under each facet.
See below:
library(tidyverse)
df <- structure(list(Race = c("Asian", "Black"),
Symptom1 = c(60L, 40L),
Symptom2 = c(50L, 20L),
Symptom3 = c(20L, 50L),
Symptom4 = c(70L, 50L),
Symptom5 = c(90L, 85L),
Symptom6 = c(100L, 30L),
Symptom7 = c(10L, 30L),
Symptom8 = c(30L, 20L),
Symptom9 = c(40L, 80L),
Symptom10 = c(60L, 40L)),
row.names = c(NA, -2L), class = "data.frame")
b <- df %>% pivot_longer(!Race, names_to = "Symptom", values_to ="Percentage")
b$Symptom <- fct_rev(factor(b$Symptom, levels = unique(b$Symptom)))
b$Group <- "Benign"
b[c(3:5,13:15),]$Group <- "Warning"
b[c(6:10,16:20),]$Group <- "Fatal"
This provides a graph that has same bar widths but only displays the x axis at the very bottom (as opposed to also under the benign and fatal facets as well)
ggplot(data=b, aes(x=Percentage, y= Symptom, fill = Race)) +
geom_bar(stat="identity", position=position_dodge()) +
facet_grid(Group~., scales = "free", space = "free")
This provides a graph with a numbered x-axis under each facet but uneven bar widths
ggplot(data=b, aes(x=Percentage, y= Symptom, fill = Race)) +
geom_bar(stat="identity", position=position_dodge()) +
facet_wrap(~Group, scales = "free")
This provides even bar widths and a numbered x-axis but also then creates empty "space" in each facet.
ggplot(data=b, aes(x=Percentage, y= Symptom, fill = Race)) +
geom_bar(stat="identity", position=position_dodge()) +
facet_wrap(~Group)
Is there a solution to this? I would love a graph that has facets, has equal bar widths in each facet, and has repeated numbered x-axises under each of the facets.
Thanks!
You can get this easily with lemon::facet_rep_grid(repeat.tick.labels = T) instead of ggplot2::facet_grid().
library(tidyverse)
library(lemon)
b <- structure(list(Race = c("Asian", "Asian", "Asian", "Asian", "Asian", "Asian", "Asian", "Asian", "Asian", "Asian", "Black", "Black", "Black", "Black", "Black", "Black", "Black", "Black", "Black", "Black"), Symptom = structure(c(10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("Symptom10", "Symptom9", "Symptom8", "Symptom7", "Symptom6", "Symptom5", "Symptom4", "Symptom3", "Symptom2", "Symptom1"), class = "factor"), Percentage = c(60L, 50L, 20L, 70L, 90L, 100L, 10L, 30L, 40L, 60L, 40L, 20L, 50L, 50L, 85L, 30L, 30L, 20L, 80L, 40L), Group = c("Benign", "Benign", "Warning", "Warning", "Warning", "Fatal", "Fatal", "Fatal", "Fatal", "Fatal", "Benign", "Benign", "Warning", "Warning", "Warning", "Fatal", "Fatal", "Fatal", "Fatal", "Fatal")), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
ggplot(data=b, aes(x=Percentage, y= Symptom, fill = Race)) +
geom_bar(stat="identity", position=position_dodge()) +
facet_rep_grid(Group~., scales = "free", space = "free", repeat.tick.labels = T)
Created on 2022-01-18 by the reprex package (v2.0.1)

Adding a second y axis in R

I am envisioning to use the following dataset to create a plot that combines a clustered bar chart and line chart with the following data:
structure(list(X = 1:14, ORIGIN = c("AUS", "AUS", "DAL", "DAL",
"DFW", "DFW", "IAH", "IAH", "OKC", "OKC", "SAT", "SAT", "SHV",
"SHV"), DEST = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("ATL", "SEA"), class = "factor"),
flight19.x = c(293L, 93L, 284L, 93L, 558L, 284L, 441L, 175L,
171L, 31L, 262L, 31L, 175L, 0L), flight19.y = c(5526L, 5526L,
6106L, 6106L, 23808L, 23808L, 15550L, 15550L, 2055L, 2055L,
3621L, 3621L, 558L, 558L)), row.names = c(NA, -14L), class = "data.frame")
In Excel, the chart I am envisioning looks something like this:
I have already tried to used the sec.axis function to generate a second axis. However, the outcome looks like the line plot still uses the first y-axis instead of the second axis:
p1 <- ggplot()+
geom_bar(data = flight19, aes(ORIGIN, flight19.x, fill = DEST),stat = "identity", position = "dodge" )+
scale_fill_viridis(name = "Destinations", discrete = TRUE)+
labs(y= "Operation Counts", x = "Airports")
p2 <- p1 + geom_line(data = flight19, aes(as.character(ORIGIN), flight19.y, group = 1))+
geom_point(data = flight19, aes(as.character(ORIGIN), flight19.y, group = 1))+
scale_y_continuous(limit = c(0,600),sec.axis = sec_axis(~.*75/10, name = "Total Monthly Operations"))
The plot shows the warning below:
Warning messages:
1: Removed 12 row(s) containing missing values (geom_path).
2: Removed 12 rows containing missing values (geom_point).
And the codes produce the plot below:
Could someone teach me how to let the line plot corresponds to the second axis?
Thanks so much in advance.
Find a suitable transformation factor, here I used 50 just to get nice y-axis labels
#create x-axis
flight19$x_axis <- paste0(flight19$ORIGIN,'\n',flight19$DEST)
# The transformation factor
#transf_fact <- max(flight19$flight19.y)/max(flight19$flight19.x)
transf_fact <- 50
ggplot(flight19, aes(x = x_axis)) +
geom_bar(aes(y = flight19.x),stat = "identity", fill = "blue") +
geom_line(aes(y = flight19.y/transf_fact,group=1), color = "orange") +
scale_y_continuous(name = "Operation Counts",
limit = c(0,600),
breaks = seq(0,600,100),
sec.axis = sec_axis(~ (.*transf_fact),
breaks = function(limit)seq(0,limit[2],5000),
labels = scales::dollar_format(prefix = "$",suffix = " k",scale = .001),
name = "Total Monthly Operations")) +
xlab("Airports") +
theme_bw()

Scaling data to secondary axis in R (ggplot2)

I'm trying to build a line chart with ggplot2 in which I would like to have 2 lines, each adapted to a different axis. I'm trying the following code (where df4 is my data frame):
p1 = ggplot(df4, mapping = aes(x=taxon, y=cov, group = 1, colour = "Coverage", xlab("Cover"))) +
geom_line() +
labs (x = "Taxon", y = "Coverage") +
geom_line(aes(y=depth, colour = "Depth")) +
theme(axis.text.x = element_text(angle = 75, hjust= 1, vjust = 1)) +
scale_colour_manual(values = c("navyblue", "green4")) +
scale_y_continuous(sec.axis = sec_axis(~./4, name = "Depth"))
With this, I am able to build a chart with 2 y-axis and 2 lines, but both lines are adapted to the primary y-axis (the secondary axis is there, but it's useless). Is there maybe a parameter with which I can ask my data to follow this axis?
Blue line values only go until 1, so they should be adapted to the secondary axis
This is an example of my data:
structure(list(taxon = structure(c(80L, 57L, 74L, 32L, 1L, 3L,
41L, 9L, 70L, 12L), .Label = c("c__Tremellomycetes", "f__Listeriaceae",
"f__Saccharomycetaceae", "g__Escherichia", "g__Klebsiella", "g__Pseudomonas",
"g__Saccharomyces", "g__Salmonella", "g__Staphylococcus", "s__Bacillus_amyloliquefaciens",
"s__Bacillus_phage_phi105", "s__Bacillus_siamensis", "s__Bacillus_sp_JS",
"s__Bacillus_subtilis", "s__Bacillus_vallismortis", "s__Citrobacter_sp_30_2",
"s__Cronobacter_phage_ENT47670", "s__Enterobacter_cancerogenus",
"s__Enterobacteria_phage_BP_4795", "s__Enterobacteria_phage_cdtI",
"s__Enterobacteria_phage_ES18", "s__Enterobacteria_phage_fiAA91_ss",
"s__Enterobacteria_phage_HK629", "s__Enterobacteria_phage_IME10",
"s__Enterobacteria_phage_lambda", "s__Enterobacteria_phage_mEp237",
"s__Enterobacteria_phage_mEp460", "s__Enterobacteria_phage_Min27",
"s__Enterobacteria_phage_P22", "s__Enterobacteria_phage_YYZ_2008",
"s__Enterococcus_faecalis", "s__Enterococcus_gilvus", "s__Enterococcus_phage_phiEf11",
"s__Enterococcus_phage_phiFL1A", "s__Enterococcus_phage_phiFL3A",
"s__Escherichia_coli", "s__Escherichia_phage_HK639", "s__Escherichia_phage_P13374",
"s__Lactobacillus_fermentum", "s__Listeria_innocua", "s__Listeria_ivanovii",
"s__Listeria_marthii", "s__Listeria_monocytogenes", "s__Listeria_phage_2389",
"s__Listeria_phage_A118", "s__Listeria_phage_A500", "s__Paenibacillus_sp_ICGEB2008",
"s__Phage_Gifsy_1", "s__Phage_Gifsy_2", "s__Pseudomonas_aeruginosa",
"s__Pseudomonas_mendocina", "s__Pseudomonas_phage_B3", "s__Pseudomonas_phage_D3",
"s__Pseudomonas_phage_DMS3", "s__Pseudomonas_phage_F10", "s__Pseudomonas_phage_F116",
"s__Pseudomonas_phage_PAJU2", "s__Pseudomonas_phage_Pf1", "s__Pseudomonas_phage_phi297",
"s__Pseudomonas_sp_2_1_26", "s__Pseudomonas_sp_P179", "s__Salmonella_enterica",
"s__Salmonella_phage_Fels_1", "s__Salmonella_phage_Fels_2", "s__Salmonella_phage_SETP13",
"s__Salmonella_phage_ST64B", "s__Shigella_phage_Sf6", "s__Staphylococcus_aureus",
"s__Staphylococcus_phage_42E", "s__Staphylococcus_phage_55",
"s__Staphylococcus_phage_80alpha", "s__Staphylococcus_phage_P954",
"s__Staphylococcus_phage_phi2958PVL", "s__Staphylococcus_phage_phiMR25",
"s__Staphylococcus_phage_phiN315", "s__Staphylococcus_phage_phiNM3",
"s__Staphylococcus_phage_phiPVL_CN125", "s__Staphylococcus_phage_phiPVL108",
"s__Staphylococcus_phage_PT1028", "s__Staphylococcus_phage_StauST398_1",
"s__Staphylococcus_phage_StauST398_3", "s__Staphylococcus_prophage_phiPV83",
"s__Stx2_converting_phage_1717", "s__Stx2_converting_phage_86"
), class = "factor"), cov = c(0.987654320987654, 0.99685534591195,
0.994535519125683, 0.147003745318352, 0.390923694779116, 0.92831541218638,
0.99079754601227, 0.993055555555556, 0.497512437810945, 0.58144695960941
), depth = c(1.68148148148148, 0.99685534591195, 0.994535519125683,
0.147003745318352, 0.390923694779116, 0.92831541218638, 0.99079754601227,
1.34722222222222, 0.497512437810945, 0.58144695960941)), .Names = c("taxon",
"cov", "depth"), row.names = c(40L, 10L, 58L, 44L, 7L, 55L, 29L,
13L, 2L, 53L), class = "data.frame")
You just need to multiply the 'depth' geom_line with 4 :
ggplot(df4, mapping = aes(x=taxon, y=cov, group = 1, colour = "Coverage", xlab("Cover"))) +
geom_line() +
labs (x = "Taxon", y = "Coverage") +
geom_line(aes(y=depth * 4, colour = "Depth")) +
theme(axis.text.x = element_text(angle = 75, hjust= 1, vjust = 1)) +
scale_colour_manual(values = c("navyblue", "green4")) +
scale_y_continuous(sec.axis = sec_axis(~./4, name = "Depth"))

Look of arrows in ggplot2 geom_segment()

I'm trying to make a plot with arrows in ggplot2 looking something like this, which was made using base R grapics. (colors are not important)
Using ggplot2:
library(ggplot2)
library(scales)
library(grid)
df3 <- structure(list(value1 = c(51L, 57L, 59L, 57L, 56L, 56L, 60L,
66L, 61L, 61L), value2 = c(56L, 60L, 66L, 61L, 61L, 59L, 61L,
66L, 63L, 63L), group = c("A", "B", "C", "D", "E", "A", "B",
"C", "D", "E"), time = c("1999", "1999", "1999", "1999", "1999",
"2004", "2004", "2004", "2004", "2004"), y_position = c(1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), .Names = c("value1", "value2",
"group", "time", "y_position"), row.names = c(NA, -10L), class = "data.frame")
ggplot( df3, aes( x = value1, y = y_position, group = time, color = time)) +
geom_segment( x = min(df3$value1, df3$value2), xend = max( df3$value1, df3$value2 ),
aes( yend = y_position), color = "lightgrey", size = 19) +
scale_y_continuous( labels = df3$group, breaks = df3$y_position) +
theme_classic() + theme( axis.line = element_blank(), axis.title = element_blank() ) +
geom_segment( aes( yend = y_position, xend = value2, color = time, group = time), size = 19, alpha = 0.9,
arrow = arrow(length = unit(40, "points"),type = "closed", angle = 40) )
I get this:
The problem is that the arrows look wrong (in that they don't look like the first plot). Using geom_segment() is not important.
This question may give the answer but I was hoping for something less hacky:
Specifying gpar settings for grid arrows in R
update: ggplot2 v2.1.0.9001
If the plot is in your current window you can edit the shape of the arrow directly with
grid.force()
# change shape of arrows
grid.gedit("segments", gp=gpar(linejoin ='mitre'))
# change the shape in legend also
grid.gedit("layout", gp=gpar(linejoin ='mitre'))
If the plot is in your current window you can edit the shape of the arrow directly with
grid.gedit("segments", gp=gpar(linejoin ='mitre'))
ggplot now seems to have changed the legend key to an arrow shape, so if you want to change the shape of these as well, you can do this across the full plot with
grid.gedit("gTableParent", gp=gpar(linejoin ='mitre'))
original answer
Not less hacky, but perhaps easier?? You can edit the grobs returned by ggplotGrob.
If p is your plot:
g <- ggplotGrob(p)
idx <- grep("panel", g$layout$name)
nms <- sapply(g$grobs[[idx]]$children[[3]]$children , '[[', "name")
for(i in nms) {
g$grobs[[idx]]$children[[3]] <-
editGrob(g$grobs[[idx]]$children[[3]], nms[i],
gp=gpar(linejoin ='mitre'), grep=TRUE)
}
grid.newpage()
grid.draw(g)
The challenge seems to be that the arrow constructor from the grid package gets messed up if size is invoked in the geom_segment block.
so
p <- ggplot(df3) + coord_flip()
p1 <- p + geom_bar(aes(x=group,y=max(c(value1,value2))*1.1),width=0.2, stat="identity",position="identity",alpha=0.2)
df1<-filter(df3,time=="1999")
p1 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2),color="blue",size=8,arrow=arrow(angle=20,type="closed",ends="last",length=unit(1,"cm")))
looks ridiculous as you show. I tried the workaround of of separating the segment into just a fat segment and an arrow on a skinny segment (two layers) like so:
p2<-p1 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2), color="blue",arrow=arrow(angle=20,type="closed",ends="last",length=unit(1,"cm")))
p2 + geom_segment(data=df1,aes(x=group,xend=group,y=value1,yend=value2), color="blue",size=8)
but now the fat segment end is not mitred and so obscures the arrow.
Fixing the arrow parameter seems to be needed.

Resources