modify horizontal barplots for combination (tight design) - r

I have the following sample data:
library(tidyverse)
df <- data.frame(col=rep(c("A_B", "A_C", "A_D",
"B_A", "C_A", "D_A",
"B_C", "B_D",
"C_B", "D_B",
"C_D", "D_C"), 2),
level=c(rep("lower_level", 12), rep("higher_level", 12)),
value=abs(rnorm(24, mean=5, sd=2)))%>% tibble()
df[c('origin', 'target')] <- str_split_fixed(df$col, '_', 2)
df <- df %>% select(c(origin, target, level, value))
I now want to create horizontal stacked barplots for each target (df %>% filter(target=="A")). I do this using the following code:
# plot
p1 <- ggplot(data = df %>% filter(target=="A"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
Since I want to combine multiple such plots later (s. below), I would like to
remove the empty space between y-axis and the bars (or manipulate it to value X)
have the fill label displayed on the right side
have one value on the left, saying "target: A"
and have fill legend and y axis shared between all plots.
See annotated plot:
For reference, I create additional plots with this code:
p2 <- ggplot(data = df %>% filter(target=="B"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
p3 <- ggplot(data = df %>% filter(target=="C"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
p4 <- ggplot(data = df %>% filter(target=="D"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
And combine them with this code (but happy to use other ways of combining them if needed).
library("gridExtra")
grid.arrange(p1, p2, p3, p4, ncol = 1, nrow = 4)

It sounds very much as though you simply want to facet by target. No need for stitching multiple plots here.
ggplot(data = df %>% mutate(target = paste('Target:', target)),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_col(position = "fill", width = 0.9) +
scale_fill_manual(values = c("A"="yellow", "B" = "green",
"C"="red", "D"="blue"), name = 'origin') +
facet_grid(target~., switch = 'y') +
coord_flip() +
theme(strip.placement = 'outside',
strip.background = element_blank(),
axis.title.y = element_blank())

two suggestions_
to remove the offset between axis and bar, set the axis expansion to zero
scale_x_continuous(..., expand = c(0,0))
instead of tediously subsetting the data frame, use the facet_wrap or facet_grid option of ggplot:
ggplot(data = df,
aes(x = factor(level), y = value, fill = factor(origin))) +
## other plot instructions
facet_wrap( ~target)
see ?facet_wrap for various layout options like number of plot columns
3. the vertical spacing between bars will be adjusted to the output dimensions (here: figure height) anyway

Related

How to create an legend for ggplot for just one series of data and then add legend for additional horizontal lines?

I'm wanting to prepare a simple plot with some points and horizontal lines with a legend. The code below generates the desired plot and a legend but the legend symbols are combinations of the shape and line, when I would just like a shape for the shape and a line for the line.
dat <- iris %>% select(Sepal.Length)
dat$Type <- "Sepal.Length"
ggplot() +
geom_point(data = dat, aes(x = as.numeric(row.names(dat)), y = Sepal.Length, colour = Type), shape = 10, size = 2) +
geom_hline(aes(yintercept = 6, colour = "Some line"), linetype = "dashed")
Custom linetypes and shapes are assigned using scale_*_manual, like so:
dat %>%
ggplot() +
geom_point(aes(x = as.numeric(row.names(dat)), y = Sepal.Length, shape = Type), size = 2) +
geom_hline(aes(yintercept = 6, linetype = 'Some line')) +
scale_linetype_manual(values = c('Some line' = 'dashed')) +
scale_shape_manual(values = c('Sepal.Length' = 10))

How to use geom_bar to connect stacked-bar proportions if name categorial for bar is character

This is an extension to a previous answer of a question found here
Briefly #Jon Spring uses the following example code to produce a stacked bar plot with lines connecting each bar proportion between the two groups:
library(ggplot2)
set.seed(0)
data_bar <- data.frame(
stringsAsFactors = F,
Sample = rep(c("A", "B"), each = 10),
Percentage = runif(20),
Taxon = rep(1:10, by = 2)
)
library(tidyr)
ggplot() +
geom_bar(data = data_bar,
aes(x = Sample, y =Percentage, fill = Taxon),
colour = 'white', width = 0.3, stat="identity") +
geom_segment(data = tidyr::spread(data_bar, Sample, Percentage),
colour = "white",
aes(x = 1 + 0.3/2,
xend = 2 - 0.3/2,
y = cumsum(A),
yend = cumsum(B))) +
theme(panel.background = element_rect(fill = "black"), # to make connecting points
panel.grid = element_blank())
geom_seg example
While this is an elegant piece of code to address the issue of connecting the bar proportions, I am somehow not able to reproduce it once the bar proportion names are character strings instead on integer as above. Here is my code:
test.matrix<-matrix(c(70,120,65,140,13,68,46,294,52,410),ncol=2,byrow=TRUE)
rownames(test.matrix)<-c("BC.1","BC.2","GC","MO","EB")
colnames(test.matrix)<-c("12m","3m")
test.matrix <- data.frame(test.matrix)
ggplot() +
geom_bar(data = test.matrix,
aes(x = Var2, y =Freq, fill = Var1),
colour = 'black', width = 0.3, stat="identity") +
geom_segment(data = tidyr::spread(test.matrix, Var2, Freq),
colour = "black",
aes(x = 1 + 0.3/2,
xend = 2 - 0.3/2,
y = cumsum(`12m`),
yend = cumsum(`3m`))) +
scale_fill_manual(values=c('BC.1'="gold",'BC.2'="yellowgreen",'GC'="navy",'MO'="royalblue",'EB'="orangered")) +
theme(panel.background = element_rect(fill = "white"), panel.grid = element_blank())
geom_seg char
The result does not match the geom_segment lines to the bar proportions. Maybe it has sth to do with cumsum() using an alphabetic order of the strings, but I cannot figure out how to solve this - or its sth completely different...
So I have two questions:
How can the bar proportions be connected if the proportions order has to be fixed? (a string vector or factor as 'names' for each value group or row)
How can an additional geom_segment at the very bottom of each bar be generated connecting both lower ends of each bar with another?
The issue is that you cumsummed in the wrong "direction" or order, i.e. you start cumsumming at BC.1 while in the bar chart it's on the top. This could simply be fixed by rearranging the dataset before cumulating. Therefore in my opinion it's best to do this outside of the plotting code so that you can easily check the data.
To get another geom_segment at the bottom you can simply add a row to your data.
library(tidyverse)
test.matrix<-matrix(c(70,120,65,140,13,68,46,294,52,410),ncol=2,byrow=TRUE)
rownames(test.matrix)<-c("BC.1","BC.2","GC","MO","EB")
colnames(test.matrix)<-c("12m","3m")
test.matrix <- data.frame(test.matrix)
test.matrix <- test.matrix %>%
setNames(c("12m", "3m")) %>%
rownames_to_column(var = "Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "Freq")
test.matrix.wide <- tidyr::spread(test.matrix, Var2, Freq) %>%
arrange(desc(Var1)) %>%
mutate(y = cumsum(`12m`),
yend = cumsum(`3m`)) %>%
add_row(y = 0, yend = 0)
ggplot() +
geom_bar(data = test.matrix,
aes(x = Var2, y =Freq, fill = Var1),
colour = 'black', width = 0.3, stat="identity") +
geom_segment(data = test.matrix.wide,
colour = "black",
aes(x = 1 + 0.3/2,
xend = 2 - 0.3/2,
y = y,
yend = yend)) +
scale_fill_manual(values=c('BC.1'="gold",'BC.2'="yellowgreen",'GC'="navy",'MO'="royalblue",'EB'="orangered")) +
theme(panel.background = element_rect(fill = "white"), panel.grid = element_blank())

adding a label in geom_line in R

I have two very similar plots, which have two y-axis - a bar plot and a line plot:
code:
sec_plot <- ggplot(data, aes_string (x = year, group = 1)) +
geom_col(aes_string(y = frequency), fill = "orange", alpha = 0.5) +
geom_line(aes(y = severity))
However, there are no labels. I want to get a label for the barplot as well as a label for the line plot, something like:
How can I add the labels to the plot, if there is only pone single group? is there a way to specify this manually? Until know I have only found option where the labels can be added by specifying them in the aes
EXTENSION (added a posterior):
getSecPlot <- function(data, xvar, yvar, yvarsec, groupvar){
if ("agegroup" %in% xvar) xvar <- get("agegroup")
# data <- data[, startYear:= as.numeric(startYear)]
data <- data[!claims == 0][, ':=' (scaled = get(yvarsec) * max(get(yvar))/max(get(yvarsec)),
param = max(get(yvar))/max(get(yvarsec)))]
param <- data[1, param] # important, otherwise not found in ggplot
sec_plot <- ggplot(data, aes_string (x = xvar, group = groupvar)) +
geom_col(aes_string(y = yvar, fill = groupvar, alpha = 0.5), position = "dodge") +
geom_line(aes(y = scaled, color = gender)) +
scale_y_continuous(sec.axis = sec_axis(~./(param), name = paste0("average ", yvarsec),labels = function(x) format(x, big.mark = " ", scientific = FALSE))) +
labs(y = paste0("total ", yvar)) +
scale_alpha(guide = 'none') +
theme_pubclean() +
theme(legend.title=element_blank(), legend.background = element_rect(fill = "white"))
}
plot.ExposureYearly <- getSecPlot(freqSevDataAge, xvar = "agegroup", yvar = "exposure", yvarsec = "frequency", groupvar = "gender")
plot.ExposureYearly
How can the same be done on a plot where both the line plot as well as the bar plot are separated by gender?
Here is a possible solution. The method I used was to move the color and fill inside the aes and then use scale_*_identity to create and format the legends.
Also, I needed to add a scaling factor for severity axis since ggplot does not handle the secondary axis well.
data<-data.frame(year= 2000:2005, frequency=3:8, severity=as.integer(runif(6, 4000, 8000)))
library(ggplot2)
library(scales)
sec_plot <- ggplot(data, aes(x = year)) +
geom_col(aes(y = frequency, fill = "orange"), alpha = 0.6) +
geom_line(aes(y = severity/1000, color = "black")) +
scale_fill_identity(guide = "legend", label="Claim frequency (Number of paid claims per 100 Insured exposure)", name=NULL) +
scale_color_identity(guide = "legend", label="Claim Severity (Average insurance payment per claim)", name=NULL) +
theme(legend.position = "bottom") +
scale_y_continuous(sec.axis =sec_axis( ~ . *1, labels = label_dollar(scale=1000), name="Severity") ) + #formats the 2nd axis
guides(fill = guide_legend(order = 1), color = guide_legend(order = 2)) #control which scale plots first
sec_plot

Adjusting rugplot in ggplot2

Below is the code for a graph I am making for an article I am working on. The plot showed the predicted probabilities along a range of values in my data set. Along the x-axis is a rug plot that shows the distribution of trade share values (I provided the code and an image of the graph):
sitc8 <- ggplot() + geom_line(data=plotdat8, aes(x = lagsitc8100, y = PredictedProbabilityMean), size = 2, color="blue") +
geom_ribbon(data=plotdat8, aes(x = lagsitc8100, ymin = lowersd, ymax = uppersd),
fill = "grey50", alpha=.5) +
ylim(c(-0.75, 1.5)) +
geom_hline(yintercept=0) +
geom_rug(data=multi.sanctions.bust8.full#frame, aes(x=lagsitc8100), col="black", size=1.0, sides="b") +
xlab("SITC 8 Trade Share") +
ylab("Probability of Sanctions Busting") +
theme(panel.grid.major = element_line(colour = "gray", linetype = "dotted"), panel.grid.minor =
element_blank(), panel.background = element_blank())
My question is: is it possible to change the color of the lines of the rugplot of trade share in which the event I am modeling occurs? In other words, I would like to add red lines or red dots along those values of trade share when my event = 1.
Is this possible?
Sure. You'd just have to add a color argument within an aes() function call within geom_rug().
Here's some code to create a dummy data frame.
library(tidyverse)
set.seed(42)
dummy_data <- tibble(x_var = rnorm(100),
y_var = abs(rnorm(100)) * x_var) %>%
rownames_to_column(var = "temp_row") %>%
mutate(color_id = if_else(as.numeric(temp_row) <= 50,
"Type A",
"Type B"))
And here's a ggplot call where the color for geom_rug is mapped to a character column named color_id
ggplot(data = dummy_data, mapping = aes(x = x_var, y = y_var)) +
geom_smooth(method = "lm") +
geom_rug(mapping = aes(color = color_id), sides = "b")
Update:
Following OP's comment, here's an updated version. If it's a numeric vector of 0s and 1s, you have to tell ggplot to treat it as a dichotomous variable. You can do that by wrapping it in a call to factor() for instance.
For the color we can set that manually using scale_color_manual(). So the changes to the code are the following.
color_id is now a vector og 0s and 1s.
the color is now mapped to factor(color_id)
the color scale is determined using scale_color_manual
library(tidyverse)
set.seed(42)
dummy_data <- tibble(x_var = rnorm(100),
y_var = abs(rnorm(100)) * x_var) %>%
rownames_to_column(var = "temp_row") %>%
mutate(color_id = if_else(as.numeric(temp_row) <= 50,
0,
1))
ggplot(data = dummy_data, mapping = aes(x = x_var, y = y_var)) +
geom_smooth(method = "lm") +
geom_rug(mapping = aes(color = factor(color_id)), sides = "b") +
scale_color_manual(values = c("black", "red")) +
labs(color = "This takes two values")
Definitely possible. Here's an example using iris, and a dynamic condition in the rug. You could also do two rugs, if you chose.
library(tidyverse)
iris %>%
ggplot(aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
geom_rug(aes(color = Petal.Length >3), sides = "b")
# Second example, output not shown
iris %>%
ggplot(aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
geom_rug(data = subset(iris, Petal.Length > 3), color = "black", sides = "b") +
geom_rug(data = subset(iris, Petal.Length <= 3), color = "red", sides = "b")

Aesthetics must be either length 1 or the same as the data (1): x, y, label

I'm working on some data on party polarization (something like this) and used geom_dumbbell from ggalt and ggplot2. I keep getting the same aes error and other solutions in the forum did not address this as effectively. This is my sample data.
df <- data_frame(policy=c("Not enough restrictions on gun ownership", "Climate change is an immediate threat", "Abortion should be illegal"),
Democrats=c(0.54, 0.82, 0.30),
Republicans=c(0.23, 0.38, 0.40),
diff=sprintf("+%d", as.integer((Democrats-Republicans)*100)))
I wanted to keep order of the plot, so converted policy to factor and wanted % to be shown only on the first line.
df <- arrange(df, desc(diff))
df$policy <- factor(df$policy, levels=rev(df$policy))
percent_first <- function(x) {
x <- sprintf("%d%%", round(x*100))
x[2:length(x)] <- sub("%$", "", x[2:length(x)])
x
}
Then I used ggplot that rendered something close to what I wanted.
gg2 <- ggplot()
gg2 <- gg + geom_segment(data = df, aes(y=country, yend=country, x=0, xend=1), color = "#b2b2b2", size = 0.15)
# making the dumbbell
gg2 <- gg + geom_dumbbell(data=df, aes(y=country, x=Democrats, xend=Republicans),
size=1.5, color = "#B2B2B2", point.size.l=3, point.size.r=3,
point.color.l = "#9FB059", point.color.r = "#EDAE52")
I then wanted the dumbbell to read Democrat and Republican on top to label the two points (like this). This is where I get the error.
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Democrats, y=country, label="Democrats"),
color="#9fb059", size=3, vjust=-2, fontface="bold", family="Calibri")
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Republicans, y=country, label="Republicans"),
color="#edae52", size=3, vjust=-2, fontface="bold", family="Calibri")
Any thoughts on what I might be doing wrong?
I think it would be easier to build your own "dumbbells" with geom_segment() and geom_point(). Working with your df and changing the variable refences "country" to "policy":
library(tidyverse)
# gather data into long form to make ggplot happy
df2 <- gather(df,"party", "value", Democrats:Republicans)
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
# our dumbell
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
# the text labels
geom_text(aes(label = party), vjust = -1.5) + # use vjust to shift text up to no overlap
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) + # named vector to map colors to values in df2
scale_x_continuous(limits = c(0,1), labels = scales::percent) # use library(scales) nice math instead of pasting
Produces this plot:
Which has some overlapping labels. I think you could avoid that if you use just the first letter of party like this:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(aes(label = gsub("^(\\D).*", "\\1", party)), vjust = -1.5) + # just the first letter instead
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red"),
guide = "none") +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
Only label the top issue with names:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(data = filter(df2, policy == "Not enough restrictions on gun ownership"),
aes(label = party), vjust = -1.5) +
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) +
scale_x_continuous(limits = c(0,1), labels = scales::percent)

Resources