Related
I'd like to draw bar plot like this but in dual Y axis
(https://i.stack.imgur.com/ldMx0.jpg)
the first three indexs range from 0 to 1,
so I want the left y-axis (corresponding to NSE, KGE, VE) to range from 0 to 1,
and the right y-axis (corresponding to PBIAS) to range from -15 to 5.
the following is my data and code:
library("ggplot2")
## data
data <- data.frame(
value=c(0.82,0.87,0.65,-3.39,0.75,0.82,0.63,1.14,0.85,0.87,0.67,-7.03),
sd=c(0.003,0.047,0.006,4.8,0.003,0.028,0.006,4.77,0.004,0.057,0.014,4.85),
index=c("NSE","KGE","VE","PBIAS","NSE","KGE","VE","PBIAS","NSE","KGE","VE","PBIAS"),
period=c("all","all","all","all","calibration","calibration","calibration","calibration","validation","validation","validation","validation")
)
## fix index sequence
data$index <- factor(data$index, levels = c('NSE','KGE','VE',"PBIAS"))
data$period <- factor(data$period, levels = c('all','calibration', 'validation'))
## bar plot
ggplot(data, aes(x=index, y=value, fill=period))+
geom_bar(position="dodge", stat="identity")+
geom_errorbar(aes(ymin=value-sd, ymax=value+sd),
position = position_dodge(0.9), width=0.2 ,alpha=0.5, size=1)+
theme_bw()
I try to scale and shift the second y-axis,
but PBIAS bar plot was removed because of out of scale limit as follow:
(https://i.stack.imgur.com/n6Jfm.jpg)
the following is my code with dual y axis:
## bar plot (scale and shift the second y-axis with slope/intercept in 20/-15)
ggplot(data, aes(x=index, y=value, fill=period))+
geom_bar(position="dodge", stat="identity")+
geom_errorbar(aes(ymin=value-sd, ymax=value+sd),
position = position_dodge(0.9), width=0.2 ,alpha=0.5, size=1)+
theme_bw()+
scale_y_continuous(limits = c(0,1), name = "value", sec.axis = sec_axis(~ 20*.- 15, name="value"))
Any advice for move bar_plot or other solution?
Taking a different approach, instead of using a dual axis one option would be to make two separate plots and glue them together using patchwork. IMHO that is much easier than fiddling around with the rescaling the data (that's the step you missed, i.e. if you want to have a secondary axis you also have to rescale the data) and makes it clearer that the indices are measured on a different scale:
library(ggplot2)
library(patchwork)
data$facet <- data$index %in% "PBIAS"
plot_fun <- function(.data) {
ggplot(.data, aes(x = index, y = value, fill = period)) +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(aes(ymin = value - sd, ymax = value + sd),
position = position_dodge(0.9), width = 0.2, alpha = 0.5, size = 1
) +
theme_bw()
}
p1 <- subset(data, !facet) |> plot_fun() + scale_y_continuous(limits = c(0, 1))
p2 <- subset(data, facet) |> plot_fun() + scale_y_continuous(limits = c(-15, 15), position = "right")
p1 + p2 +
plot_layout(guides = "collect", width = c(3, 1))
A second but similar option would be to use ggh4x which via ggh4x::facetted_pos_scales allows to set the limits for facet panels individually. One drawback, the panels have the same width. (I failed in making this approach work with facet_grid and space="free")
library(ggplot2)
library(ggh4x)
data$facet <- data$index %in% "PBIAS"
ggplot(data, aes(x = index, y = value, fill = period)) +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(aes(ymin = value - sd, ymax = value + sd),
position = position_dodge(0.9), width = 0.2, alpha = 0.5, size = 1
) +
facet_wrap(~facet, scales = "free") +
facetted_pos_scales(
y = list(
facet ~ scale_y_continuous(limits = c(-15, 15), position = "right"),
!facet ~ scale_y_continuous(limits = c(0, 1), position = "left")
)
) +
theme_bw() +
theme(strip.text.x = element_blank())
I guess my question is very simple for someone who knows ggplot very well but I spent a lot of time trying different ways. I want to draw a horizontal line that pass-through y axis to separate each band of the bars produced using geom_col. For example, I want to draw a horizontal line that separate bars of meat from maize. Here is my code, example data and the design of the figure I want to produce.
library(tidyverse)
library(ggplot2)
# sample data
Food = c("meat", "meat", "meat", "meat", "wheat","wheat","wheat", "wheat", "maize","maize","maize","maize")
Subgroup = c("Male", "Female", "Urban", "Rural", "Male", "Female", "Urban", "Rural", "Male", "Female","Urban", "Rural")
mean = c(8.66, 10.45, 9.88, 7.32, 21.04, 19.65, 20.26, 20.87, 51.06 , 44.51, 47.60, 48.40)
df <- data.frame(Food, Subgroup, mean)
#Color code
colorPanel = c('#083c5d','#2d004b','#106d8e','#7d103d')
# Plot
Plot_FBGDS <- ggplot(df, aes(x = Food, y = mean, fill = Subgroup)) +
geom_col(stat = "identity", position = position_dodge(-0.9), width = 0.82) +
scale_y_continuous(breaks = c(0,20, 40, 60,80), expand = c(0,0),
limits = c(0,100),
labels = function(x) paste0(x, "%")) +
coord_flip() +
scale_fill_manual(values = colorPanel) +
labs( x= " ",
y = " ")
Try this, using geom_vline with manually specified xintercept.
# Plot
ggplot(df, aes(x = Food, y = mean, fill = Subgroup)) +
geom_col(stat = "identity", position = position_dodge(-0.9), width = 0.82) +
scale_y_continuous(breaks = c(0,20, 40, 60,80), expand = c(0,0),
limits = c(0,100),
labels = function(x) paste0(x, "%")) +
geom_vline(xintercept = c(0.5, 1.5, 2.5, 3.5)) +
coord_flip() +
scale_fill_manual(values = colorPanel) +
labs( x= " ",
y = " ")
Note that geom_vline typically produces a vertical line, but since you have coord_flip it becomes horizontal. Without the coord_flip, you would use geom_hline and set the yintercept parameter instead.
Also, if I may suggest an alternative way to visualize this using facet_wrap instead, I would say the option below looks a lot better and you can style the facets using the strip_ properties in plot_theme
# Plot
ggplot(df, aes(x = Subgroup, y = mean, fill = Subgroup)) +
geom_col(stat = "identity", position = position_dodge(-0.9), width = 0.82) +
scale_y_continuous(breaks = c(0,20, 40, 60,80), expand = c(0,0),
limits = c(0,100),
labels = function(x) paste0(x, "%")) +
coord_flip() +
scale_fill_manual(values = colorPanel) +
facet_wrap(~Food, ncol=1)
labs( x= " ",
y = " ") +
theme(
legend.position = "none"
)
I'm going to build off the answer already posted here from #geoff. OP requested lines that extend beyond the plot area into the axis label. It's true that normally geoms are restricted to the panel area alone, but that's only by default. For any of the coord_*() functions, you can change the default clipping from "on" to "off" via clip="off".
For geom_vline() and geom_hline(), these seem to be automatically clipped to the panel area (more on that below), but for just about all other geoms, you can extend beyond the plot area. We can use this to our advantage in using geom_segment() and specifying the lines.
In this example, I'm going to need to create a linesdata data frame outside of the plot to make drawing the lines a bit easier (and reference that dataset in geom_segment()). We also need to change clip="off" within coord_flip(), and finally, I had to adjust the value for the staring value of the line (y here because we flip the axis) to be some negative value to get below the axis.
linesdata <- data.frame(
xvals = c(0.5, 1.5, 2.5, 3.5),
Subgroup=NA # required because it complains for fill, which I cannot specify again for geom_segment.
)
ggplot(df, aes(x = Food, y = mean, fill = Subgroup)) +
geom_col(position = position_dodge(-0.9), width = 0.82) +
scale_y_continuous(breaks = c(0,20, 40, 60,80), expand = c(0,0),
limits = c(0,100),
labels = function(x) paste0(x, "%")) +
coord_flip(clip="off") +
geom_segment(
data=linesdata, y=-5, yend=Inf,
aes(x=xvals, xend=xvals)
) +
scale_fill_manual(values = colorPanel) +
labs( x= " ",
y = " ")
Note that I had to include Subgroup as a column in the linesdata data frame. The way to avoid having to do this would be to specify the fill= aesthetic inside geom_col instead of globally... but it works this way too.
The reason why geom_hline and vline always clip
Interestingly, you'll note that even though yend=Inf, the line does not extend beyond the panel area in the positive direction! I had no idea this works this way, but It seems Inf is specially designed to clip to the panel area no matter what. I'm pretty sure that geom_vline() and geom_hline() are using values of Inf and -Inf under the hood. If I change the value to y=-Inf inside geom_segment() you can see it doesn't extend the same way as specifying a number:
ggplot(df, aes(x = Food, y = mean, fill = Subgroup)) +
geom_col(position = position_dodge(-0.9), width = 0.82) +
scale_y_continuous(breaks = c(0,20, 40, 60,80), expand = c(0,0),
limits = c(0,100),
labels = function(x) paste0(x, "%")) +
coord_flip(clip="off") +
geom_segment(
data=linesdata, y=-Inf, yend=Inf,
aes(x=xvals, xend=xvals)
) +
scale_fill_manual(values = colorPanel) +
labs( x= " ",
y = " ")
The only problem here is that you have to play around a bit to find the right value for the starting y value via trial and error. I found -5 did the trick pretty well.
I am making a set of scorecards where I am generating a set of graphs that show the distribution of responses from a survey and also where the response for a specific company falls. I need to modify the formatting of a graph, a stacked barchart, and add a few features I’ve outlined below. I’ve already spent a few hours getting my chart to where it is now and would appreciate your help with the features I outline below.
Data is
Data<-data.frame(Reviewed = c("Annually", "Annually", "Hourly", "Monthly", "Weekly","Monthly","Weekly","Other","Other","Monthly","Weekly"),Company=c("a","b","c","d","e","f","g","h","i","j","k"),Question="Q1")
So far I’ve developed this
ggplot(Data, aes(x="Question", fill=Reviewed)) + geom_bar(position='fill' ) +
coord_flip()
I would like to do the following:
Order the variables so they are arranged on plot as follows: Annually,Monthly,Weekly,Hourly,Other
Express the y axis in terms of percent. I.e. 0.25 turns into 25%
Move y-axis directly underneath the bar.
Remove the legend but move the terms underneath the respective part of the graph on a diagonal slant.
Add a black line that cuts down the 50% mark
Add a dot in at the midpoint of the stack for the value of company “e”.
Remove gray background
This is what I'm hoping the finished graph will look like.
There's a lot to unpack here, so I'll break it down bit by bit:
Order the variables so they are arranged on plot as follows: Annually,Monthly,Weekly,Hourly,Other
Assign "Reviewed" as an ordered factor. I'm reversing the order here since it wants to plot the "lowest" factor first (to the left).
Data$Reviewed <- factor(Data$Reviewed,
levels = rev(c('Annually', 'Monthly', 'Weekly', 'Hourly', 'Other')),
ordered = T)
ggplot(Data, aes(x="Question", fill=Reviewed)) + geom_bar(position='fill' ) +
coord_flip()
Express the y axis in terms of percent. I.e. 0.25 turns into 25%
Use scale_y_continuous(labels = scales::percent) to adjust the labels. I believe that the scales was pulled in when you installed ggplot2.
ggplot(Data, aes(x="Question", fill=Reviewed)) +
geom_bar(position = 'fill') +
scale_y_continuous(labels = scales::percent) +
coord_flip()
Move y-axis directly underneath the bar.
Remove gray background
These are done all at once by adding expand = F to coord_flip.
ggplot(Data, aes(x="Question", fill=Reviewed)) +
geom_bar(position = 'fill') +
scale_y_continuous(labels = scales::percent) +
coord_flip(expand = F)
Remove the legend...
Add theme(legend.position = 'none').
ggplot(Data, aes(x="Question", fill=Reviewed)) +
geom_bar(position = 'fill') +
scale_y_continuous(labels = scales::percent) +
coord_flip(expand = F) +
theme(legend.position = 'none')
but move the terms underneath the respective part of the graph on a diagonal slant.
This is tougher and takes a good amount of fiddling.
Use geom_text to make the labels
Calculate the position along the bar using the 'count' stat
Move the labels to the bottom of the plot by providing a fake x coordinate
Align the labels in the center of the bars using position_stack, and make them abut the x axis using hjust.
Add angle.
Use clip = 'off' in coord_flip to make sure that these values are not cut out since they're outside the plotting area.
Fiddle with the x limits to crop out empty plotting area.
Adjust the plot margin in theme to make sure everything can be seen.
ggplot(Data, aes(x="Question", fill=Reviewed)) +
geom_bar(position = 'fill') +
geom_text(aes(label = Reviewed, x = 0.45,
y = stat(..count../sum(..count..))), stat = 'count',
position = position_stack(0.5),
hjust = 0,
angle = 45) +
scale_y_continuous(labels = scales::percent) +
coord_flip(xlim = c(0.555, 1.4), clip = 'off',expand = F) +
theme(plot.margin = margin(0, 0, 35, 10),
legend.position = 'none')
Add a black line that cuts down the 50% mark
Use geom_hline(yintercept = 0.5); remember that it's a "horizontal" line since the coordinates are flipped.
ggplot(Data, aes(x="Question", fill=Reviewed)) +
geom_bar(position = 'fill') +
geom_text(aes(label = Reviewed, x = 0.45,
y = stat(..count../sum(..count..))), stat = 'count',
position = position_stack(0.5),
hjust = 0,
angle = 45) +
geom_hline(yintercept = 0.5) +
scale_y_continuous(labels = scales::percent) +
coord_flip(xlim = c(0.555, 1.4), clip = 'off',expand = F) +
theme(plot.margin = margin(0, 0, 20, 10),
legend.position = 'none')
Add a dot in at the midpoint of the stack for the value of company “e”.
This is pretty hack-y. Using the same y values as in geom_text, use geom_point to plot a point for every value of Reviewed, then use position_stack(0.5) to nudge them to the center of the bar. Then use scale_color_manual to only color "Weekly" values (which is the corresponding value of Reviewed for Company "e"). I'm sure there's a way to do this more programmatically.
ggplot(Data, aes(x="Question", fill=Reviewed)) +
geom_bar(position = 'fill') +
geom_text(aes(label = Reviewed, x = 0.45,
y = stat(..count../sum(..count..))), stat = 'count',
position = position_stack(0.5),
hjust = 0,
angle = 45) +
geom_hline(yintercept = 0.5) +
geom_point(aes(y = stat(..count../sum(..count..)),
color = Reviewed), stat = 'count',
position = position_stack(0.5), size = 5) +
scale_color_manual(values = 'black', limits = 'Weekly') +
scale_y_continuous(labels = scales::percent) +
coord_flip(xlim = c(0.555, 1.4), clip = 'off',expand = F) +
theme(plot.margin = margin(0, 0, 20, 10),
legend.position = 'none')
This is what I'm hoping the finished graph will look like.
Prettying things up:
ggplot(Data, aes(x="Question", fill = Reviewed)) +
geom_bar(position = 'fill') +
geom_text(aes(label = Reviewed, x = 0.45,
y = stat(..count../sum(..count..))), stat = 'count',
position = position_stack(0.5),
hjust = 0,
angle = 45) +
geom_hline(yintercept = 0.5) +
geom_point(aes(y = stat(..count../sum(..count..)),
color = Reviewed), stat = 'count',
position = position_stack(0.5), size = 5) +
scale_color_manual(values = 'black', limits = 'Weekly') +
scale_y_continuous(labels = scales::percent) +
coord_flip(xlim = c(0.555, 1.4), clip = 'off', expand = F) +
labs(x = NULL, y = NULL) +
theme_minimal() +
theme(plot.margin = margin(0, 0, 35, 10),
legend.position = 'none')
I am using the windrose function posted here: Wind rose with ggplot (R)?
I need to have the percents on the figure showing on the individual lines (rather than on the left side), but so far I have not been able to figure out how. (see figure below for depiction of goal)
Here is the code that makes the figure:
p.windrose <- ggplot(data = data,
aes(x = dir.binned,y = (..count..)/sum(..count..),
fill = spd.binned)) +
geom_bar()+
scale_y_continuous(breaks = ybreaks.prct,labels=percent)+
ylab("")+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica")
I marked up the figure I have so far with what I am trying to do! It'd be neat if the labels either auto-picked the location with the least wind in that direction, or if it had a tag for the placement so that it could be changed.
I tried using geom_text, but I get an error saying that "aesthetics must be valid data columns".
Thanks for your help!
One of the things you could do is to make an extra data.frame that you use for the labels. Since the data isn't available from your question, I'll illustrate with mock data below:
library(ggplot2)
# Mock data
df <- data.frame(
x = 1:360,
y = runif(360, 0, 0.20)
)
labels <- data.frame(
x = 90,
y = scales::extended_breaks()(range(df$y))
)
ggplot(data = df,
aes(x = as.factor(x), y = y)) +
geom_point() +
geom_text(data = labels,
aes(label = scales::percent(y, 1))) +
scale_x_discrete(breaks = seq(0, 1, length.out = 9) * 360) +
coord_polar() +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
#teunbrand answer got me very close! I wanted to add the code I used to get everything just right in case anyone in the future has a similar problem.
# Create the labels:
x_location <- pi # x location of the labels
# Get the percentage
T_data <- data %>%
dplyr::group_by(dir.binned) %>%
dplyr::summarise(count= n()) %>%
dplyr::mutate(y = count/sum(count))
labels <- data.frame(x = x_location,
y = scales::extended_breaks()(range(T_data$y)))
# Create figure
p.windrose <- ggplot() +
geom_bar(data = data,
aes(x = dir.binned, y = (..count..)/sum(..count..),
fill = spd.binned))+
geom_text(data = labels,
aes(x=x, y=y, label = scales::percent(y, 1))) +
scale_y_continuous(breaks = waiver(),labels=NULL)+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
ylab("")+xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica") +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
My goal is to make a simple column chart in ggplot2 that looks like the following chart (made in Excel):
What I'm finding is that, with example data such as this (where one percentage value is very close to 100%), my options for plotting this data in ggplot2 leave something to be desired. In particular, I haven't found a way to make the following two simple things happen together:
1) Make the y-axis line end at 100%
and
2) Make the percentage labels over each bar visible
To address this issue, I've tried experimenting with different arguments to scale_y_continuous() but haven't found a way to meet both of the goals above at the same time. You can see this in the example plots and code below.
My question is: how do I expand the y scale so that my percentage labels over each data point are visible, but the y-axis line ends at 100%?
library(dplyr)
library(ggplot2)
library(scales)
example_df <- data_frame(Label = c("A", "B"),
Percent = c(0.5, 0.99))
example_plot <- example_df %>%
ggplot(aes(x = Label, y = Percent)) +
geom_bar(stat = "identity",
fill = "dodgerblue4", width = .6) +
geom_text(aes(label = percent(Percent)),
size = 3, vjust = -0.5) +
scale_x_discrete(NULL, expand = c(0, .5)) +
theme_classic()
Plot with desired y-axis line, but non-visible label over bar
Here is what happens when I set the limit on scale_y_continuous() to c(0,1):
example_plot +
scale_y_continuous(NULL, limits = c(0, 1.0), breaks = seq(0, 1, .2),
labels = function(x) scales::percent(x),
expand = c(0, 0)) +
labs(title = "Y axis line looks perfect, but the label over the bar is off")
Plot with y-axis line too long, but visible label over bar
And here is what happens when I set the limit on scale_y_continuous() to c(0,1.05):
example_plot +
scale_y_continuous(NULL, limits = c(0, 1.05), breaks = seq(0, 1, .2),
labels = function(x) scales::percent(x),
expand = c(0, 0)) +
labs(title = "Y axis line is too long, but the label over the bar is visible")
You could remove the regular axis line and then use geom_segment to create a new one:
example_df %>%
ggplot(aes(x = Label, y = Percent)) +
geom_bar(stat = "identity", fill = "dodgerblue4", width = .6) +
geom_text(aes(label = percent(Percent)), size = 3, vjust = -0.5) +
scale_x_discrete("", expand = c(0, .5)) +
scale_y_continuous("", breaks = seq(0, 1, .2), labels = percent, limits=c(0,1.05),
expand=c(0,0)) +
theme_classic() +
theme(axis.line.y=element_blank()) +
geom_segment(x=.5025, xend=0.5025, y=0, yend=1.002)
To respond to your comment: Even when it's outside the plot area, the 99% label is still being drawn, but it's "clipped", meaning that plot elements outside the plot area are masked. So, another option, still hacky, but less hacky than my original answer, is to turn off clipping so that the label appears:
library(grid)
p = example_df %>%
ggplot(aes(x = Label, y = Percent)) +
geom_bar(stat = "identity", fill = "dodgerblue4", width = .6) +
geom_text(aes(label = percent(Percent)), size = 3, vjust = -0.5) +
scale_x_discrete("", expand = c(0, .5)) +
scale_y_continuous("", breaks = seq(0, 1, .2), labels = percent, limits=c(0,1),
expand=c(0,0)) +
theme_classic() +
theme(plot.margin=unit(c(10,0,0,0),'pt'))
# Turn off clipping
pg <- ggplot_gtable(ggplot_build(p))
pg$layout$clip[pg$layout$name=="panel"] <- "off"
grid.draw(pg)