Stacked Diverging Bar Chart Plot by groups in ggplot - r

I am trying to do a chart like this one:
The idea is to plot 3 amounts, in this mixed stacked bar chart we have a dataframe which has one row for a negative value and two rows for the positive value, however i need to stack the negative with the first positive bar, i also need 3 colors. The code I have so far is as follows: (the dataframe already has the desired shape):
df3 <- read.table(
text =
"region group metric somevalue
blue T1 epsilon 63
blue T2 epsilon -40
red T1 epsilon 100
blue T1 kappa 19
blue T2 kappa -30
red T1 kappa 75
blue T1 zulu 50
blue T2 zulu -18
red T1 zulu 68", header=TRUE)
p2 <- ggplot(df3, aes(x = metric, y = somevalue, fill=region))+
geom_col(aes(fill = group), width = 0.7) + geom_bar(position = 'dodge', stat='identity')
p2
please help me out, if you think the dataframe has to be modified please let me know. thanks

Stacking and dodging is always a bit tricky. In your case this could be achieved like so:
Convert region to a factor. (This makes sure that step 3 works)
Split your dataset in two for negative and positive values.
Fill up the datasets using tidy::complete so that each dataset contains "all" combinations of metric, region and group. (This makes sure that the dodging works
Use two geom_col layers to plot the positive and negative values using position="dodge". I added na.rm = TRUE to remove the missing values we added via complete.
library(ggplot2)
library(dplyr)
library(tidyr)
df3$region <- factor(df3$region)
df3_neg <- filter(df3, somevalue < 0) %>%
tidyr::complete(region, group, metric)
df3_pos<- filter(df3, somevalue > 0) %>%
tidyr::complete(region, group, metric)
p2 <- ggplot(df3, aes(somevalue, metric)) +
geom_col(aes(alpha = group, fill=region), data = df3_pos, position = "dodge", na.rm = TRUE) +
geom_col(aes(alpha = group, fill=region), data = df3_neg, position = "dodge", na.rm = TRUE) +
scale_fill_identity() +
scale_alpha_manual(values = c(T2 = .6, T1 = 1)) +
guides(alpha = FALSE)
p2
EDIT Adding annotations could be achieved the same way, e.g. my code below uses two geom_text to add the values next to the bar where I make use of position_dodge2(.9) so that the labels align nicely with the bars:
p2 <- ggplot(df3, aes(somevalue, metric)) +
geom_col(aes(alpha = group, fill=region), data = df3_pos, position = "dodge", na.rm = TRUE) +
geom_col(aes(alpha = group, fill=region), data = df3_neg, position = "dodge", na.rm = TRUE) +
geom_text(aes(x = somevalue + 1, label = somevalue), data = df3_pos, position = position_dodge2(width = .9), hjust = 0, na.rm = TRUE) +
geom_text(aes(x = somevalue - 1, label = somevalue), data = df3_neg, , position = position_dodge2(width = .9), hjust = 1, na.rm = TRUE) +
scale_fill_identity() +
scale_alpha_manual(values = c(T2 = .6, T1 = 1)) +
guides(alpha = FALSE)
p2
EDIT2 Adding a table is indeed a different thing. In that case I would go for patchwork which means making plots to mimic the table layout. To make the dodging work or to make sure that the table rows align with the bars you have make a plot for each table column. The basic approach may look like so:
library(patchwork)
# 1. Make a dataframe with all combinations of region and metric using expand_grid
d_table <- expand_grid(region = unique(df3$region), metric = unique(df3$metric))
# 2. Add columns with the table content
d_table$column1 <- LETTERS[1:6]
d_table$column2 <- letters[1:6]
# 3. Make a plot for each column of the table
p_column1 <- ggplot(d_table, aes(y = metric, x = 1, label = column1)) +
geom_text(aes(group = region), position = position_dodge2(width = .9), na.rm = TRUE) +
scale_x_continuous(position = "top", breaks = 1, labels = "column1") +
labs(y = NULL, x = "") +
theme(axis.text.y = element_blank(),
axis.text.x.bottom = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(rep(0, 4), "pt"),
panel.background = element_rect(fill = NA))
p_column2 <- ggplot(d_table, aes(y = metric, x = 1, label = column2)) +
geom_text(aes(group = region), position = position_dodge2(width = .9), na.rm = TRUE) +
scale_x_continuous(position = "top", breaks = 1, labels = "column2") +
labs(y = NULL, x = "") +
theme(axis.text.y = element_blank(),
axis.text.x.bottom = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(rep(0, 4), "pt"),
panel.background = element_rect(fill = NA))
# 4. Add the table columns via patchwork
p2 + p_column1 + p_column2 + plot_layout(widths = c(1, .1, .1))

Related

Ordering y axis by another variable in a ggolot bar plot

I have a swimlane plot which I want to order by a group variable. I was also wondering if it is possible to label the groups on the ggplot.
Here is the code to create the data set and plot the data
dataset <- data.frame(subject = c("1002", "1002", "1002", "1002", "10034","10034","10034","10034","10054","10054","10054","1003","1003","1003","1003"),
exdose = c(5,10,20,5,5,10,20,20,5,10,20,5,20,10,5),
p= c(1,2,3,4,1,2,3,4,1,2,3,1,2,3,4),
diff = c(3,3,9,7,3,3,4,5,3,5,6,3,5,6,7),
group =c("grp1","grp1","grp1","grp1","grp2","grp2","grp2","grp2","grp1","grp1","grp1","grp2","grp2","grp2","grp2")
)
ggplot(dataset, aes(x = diff + 1, y = subject, group = p)) +
geom_col(aes(fill = as.factor(exdose)), position = position_stack(reverse = TRUE))
I want the y axis order by group and I want a label on the side to label the groups if possible
you can see from the plot it is ordered by subject number but I want it ordered by group and some indicator of group.
I tried reorder but I was unsuccessful in getting the desired plot.
As Stefan points out, facets are probably the way to go here, but you can use them with subtle theme tweaks to make it look as though you have just added a grouping variable on the y axis:
library(tidyverse)
dataset %>%
mutate(group = factor(group),
subject = reorder(subject, as.numeric(group)),
exdose = factor(exdose)) %>%
ggplot(aes(x = diff + 1, y = subject, group = p)) +
geom_col(aes(fill = exdose), color = "gray50",
position = position_stack(reverse = TRUE)) +
scale_y_discrete(expand = c(0.1, 0.4)) +
scale_fill_brewer(palette = "Set2") +
facet_grid(group ~ ., scales = "free_y", switch = "y") +
theme_minimal(base_size = 16) +
theme(strip.background = element_rect(color = "gray"),
strip.text = element_text(face = 2),
panel.spacing.y = unit(0, "mm"),
panel.background = element_rect(fill = "#f9f8f6", color = NA))

Create a split violin plot with paired points and proper orientation

With ggplot2, I can create a violin plot with overlapping points, and paired points can be connected using geom_line().
library(datasets)
library(ggplot2)
library(dplyr)
iris_edit <- iris %>% group_by(Species) %>%
mutate(paired = seq(1:length(Species))) %>%
filter(Species %in% c("setosa","versicolor"))
ggplot(data = iris_edit,
mapping = aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_violin() +
geom_line(mapping = aes(group = paired),
position = position_dodge(0.1),
alpha = 0.3) +
geom_point(mapping = aes(fill = Species, group = paired),
size = 1.5, shape = 21,
position = position_dodge(0.1)) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.title.x = element_blank(),
axis.text.y = element_text(size = 10))
The see package includes the geom_violindot() function to plot a halved violin plot alongside its constituent points. I've found this function helpful when plotting a large number of points so that the violin is not obscured.
library(see)
ggplot(data = iris_edit,
mapping = aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_violindot(dots_size = 0.8,
position_dots = position_dodge(0.1)) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.title.x = element_blank(),
axis.text.y = element_text(size = 10))
Now, I would like to add geom_line() to geom_violindot() in order to connect paired points, as in the first image. Ideally, I would like the points to be inside and the violins to be outside so that the lines do not intersect the violins. geom_violindot() includes the flip argument, which takes a numeric vector specifying the geoms to be flipped.
ggplot(data = iris_edit,
mapping = aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_violindot(dots_size = 0.8,
position_dots = position_dodge(0.1),
flip = c(1)) +
geom_line(mapping = aes(group = paired),
alpha = 0.3,
position = position_dodge(0.1)) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.title.x = element_blank(),
axis.text.y = element_text(size = 10))
As you can see, invoking flip inverts the violin half, but not the corresponding points. The see documentation does not seem to address this.
Questions
How can you create a geom_violindot() plot with paired points, such that the points and the lines connecting them are "sandwiched" in between the violin halves? I suspect there is a solution that uses David Robinson's GeomFlatViolin function, though I haven't been able to figure it out.
In the last figure, note that the lines are askew relative to the points they connect. What position adjustment function should be supplied to the position_dots and position arguments so that the points and lines are properly aligned?
Not sure about using geom_violindot with see package. But you could use a combo of geom_half_violon and geom_half_dotplot with gghalves package and subsetting the data to specify the orientation:
library(gghalves)
ggplot(data = iris_edit[iris_edit$Species == "setosa",],
mapping = aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_half_violin(side = "l") +
geom_half_dotplot(stackdir = "up") +
geom_half_violin(data = iris_edit[iris_edit$Species == "versicolor",],
aes(x = Species, y = Sepal.Length, fill = Species), side = "r")+
geom_half_dotplot(data = iris_edit[iris_edit$Species == "versicolor",],
aes(x = Species, y = Sepal.Length, fill = Species),stackdir = "down") +
geom_line(data = iris_edit, mapping = aes(group = paired),
alpha = 0.3)
As a note, the lines in the pairing won't properly align because the dotplot is binning each observation then lengthing out the dotline-- the paired lines only correspond to x-value as defined in aes, not where the dot is in the line.
As per comment - this is not a direct answer to your question, but I believe that you might not get the most convincing visualisation when using the "slope graph" optic. This becomes quickly convoluted (so many dots/ lines overlapping) and the message gets lost.
To show change between paired observations (treatment 1 versus treatment 2), you can also (and I think: better) use a scatter plot. You can show each observation and the change becomes immediately clear. To make it more intuitive, you can add a line of equality.
I don't think you need to show the estimated distribution (left plot), but if you want to show this, you could make use of a two-dimensional density estimation, with geom_density2d (right plot)
library(tidyverse)
## patchwork only for demo purpose
library(patchwork)
iris_edit <- iris %>% group_by(Species) %>%
## use seq_along instead
mutate(paired = seq_along(Species)) %>%
filter(Species %in% c("setosa","versicolor")) %>%
## some more modificiations
select(paired, Species, Sepal.Length) %>%
pivot_wider(names_from = Species, values_from = Sepal.Length)
lims <- c(0, 10)
p1 <-
ggplot(data = iris_edit, aes(setosa, versicolor)) +
geom_abline(intercept = 0, slope = 1, lty = 2) +
geom_point(alpha = .7, stroke = 0, size = 2) +
cowplot::theme_minimal_grid() +
coord_equal(xlim = lims, ylim = lims) +
labs(x = "Treatment 1", y = "Treatment 2")
p2 <-
ggplot(data = iris_edit, aes(setosa, versicolor)) +
geom_abline(intercept = 0, slope = 1, lty = 2) +
geom_density2d(color = "Grey") +
geom_point(alpha = .7, stroke = 0, size = 2) +
cowplot::theme_minimal_grid() +
coord_equal(xlim = lims, ylim = lims) +
labs(x = "Treatment 1", y = "Treatment 2")
p1+ p2
Created on 2021-12-18 by the reprex package (v2.0.1)

How to choose the right parameters for dotplot in r ggplot

I intend to make a dot plot somewhat like this:
But there's some issue with the code:
df = data.frame(x=runif(100))
df %>%
ggplot(aes(x )) +
geom_dotplot(binwidth =0.01, aes(fill = ..count..), stackdir = "centerwhole",dotsize=2, stackgroups = T, binpositions = "all")
how to choose bin width to avoid dots overlapping, bins wrapping itself in 2 columns or dots get truncated at the top and bottom?
And why is the y axis showing decimal points instead of count? And how to color the dots by x value? I tried fill = x and no color is shown.
The overlap is caused by the dotsize > 1; as #Jimbuo said, the decimal values on the y axis is due to the internals of this geom; for the fill and color you can use the ..x.. computed variable:
Computed variables
x center of each bin, if binaxis is "x"
df = data.frame(x=runif(1000))
library(dplyr)
library(ggplot2)
df %>%
ggplot(aes(x, fill = ..x.., color = ..x..)) +
geom_dotplot(method = 'histodot',
binwidth = 0.01,
stackdir = "down",
stackgroups = T,
binpositions = "all") +
scale_fill_gradientn('', colours = c('#5185FB', '#9BCFFD', '#DFDFDF', '#FF0000'), labels = c(0, 1), breaks = c(0,1), guide = guide_legend('')) +
scale_color_gradientn(colours = c('#5185FB', '#9BCFFD', '#DFDFDF', '#FF0000'), labels = c(0, 1), breaks = c(0,1), guide = guide_legend('')) +
scale_y_continuous() +
scale_x_continuous('', position = 'top') +
# coord_equal(ratio = .25) +
theme_classic() +
theme(axis.line = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
aspect.ratio = .25,
legend.position = 'bottom',
legend.direction = 'vertical'
)
Created on 2018-05-18 by the reprex package (v0.2.0).
First from the help of ?geom_dotplot
When binning along the x axis and stacking along the y axis, the
numbers on y axis are not meaningful, due to technical limitations of
ggplot2. You can hide the y axis, as in one of the examples, or
manually scale it to match the number of dots.
Thus you can try following. Note, the coloring is not completly fitting the x axis.
library(tidyverse)
df %>%
ggplot(aes(x)) +
geom_dotplot(stackdir = "down",dotsize=0.8,
fill = colorRampPalette(c("blue", "white", "red"))(100)) +
scale_y_continuous(labels = c(0,10), breaks = c(0,-0.4)) +
scale_x_continuous(position = "top") +
theme_classic()
For the correct coloring, you have to calculate the bins by yourself using e.g. .bincode:
df %>%
mutate(gr=with(.,.bincode(x ,breaks = seq(0,1,1/30)))) %>%
mutate(gr2=factor(gr,levels = 1:30, labels = colorRampPalette(c("blue", "white", "red"))(30))) %>%
arrange(x) %>%
{ggplot(data=.,aes(x)) +
geom_dotplot(stackdir = "down",dotsize=0.8,
fill = .$gr2) +
scale_y_continuous(labels = c(0,10), breaks = c(0,-0.4)) +
scale_x_continuous(position = "top") +
theme_classic()}

How to separately label and scale double y-axis in ggplot2?

I have a test dataset like this:
df_test <- data.frame(
proj_manager = c('Emma','Emma','Emma','Emma','Emma','Alice','Alice'),
proj_ID = c(1, 2, 3, 4, 5, 6, 7),
stage = c('B','B','B','A','C','A','C'),
value = c(15,15,20,20,20,70,5)
)
Preparation for viz:
input <- select(df_test, proj_manager, proj_ID, stage, value) %>%
filter(proj_manager=='Emma') %>%
do({
proj_value_by_manager = sum(distinct(., proj_ID, value)$value);
mutate(., proj_value_by_manager = proj_value_by_manager)
}) %>%
group_by(stage) %>%
do({
sum_value_byStage = sum(distinct(.,proj_ID,value)$value);
mutate(.,sum_value_byStage= sum_value_byStage)
}) %>%
mutate(count_proj = length(unique(proj_ID)))
commapos <- function(x, ...) {
format(abs(x), big.mark = ",", trim = TRUE,
scientific = FALSE, ...) }
Visualization:
ggplot (input, aes(x=stage, y = count_proj)) +
geom_bar(stat = 'identity')+
geom_bar(aes(y=-proj_value_by_manager),
stat = "identity", fill = "Blue") +
scale_y_continuous(labels = commapos)+
coord_flip() +
ylab('') +
geom_text(aes(label= sum_value_byStage), hjust = 5) +
geom_text(aes(label= count_proj), hjust = -1) +
labs(title = "Emma: 4 projects| $90M Values \n \n Commitment|Projects") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_hline(yintercept = 0, linetype =1)
My questions are:
Why is the y-values not showing up right? e.g. C is labeled 20, but nearing hitting 100 on the scale.
How to adjust the position of labels so that it sits on the top of its bar?
How to re-scale the y axis so that both the very short bar of 'count of project' and long bar of 'Project value' can be well displayed?
Thank you all for the help!
I think your issues are coming from the fact that:
(1) Your dataset has duplicated values. This causes geom_bar to add all of them together. For example there are 3 obs for B where proj_value_by_manager = 90 which is why the blue bar extends to 270 for that group (they all get added).
(2) in your second geom_bar you use y = -proj_value_by_manager but in the geom_text to label this you use sum_value_byStage. That's why the blue bar for A is extending to 90 (since proj_value_by_manager is 90) but the label reads 20.
To get you what I believe the chart you want is you could do:
#Q1: No dupe dataset so it doesnt erroneous add columns
input2 <- input[!duplicated(input[,-c(2,4)]),]
ggplot (input2, aes(x=stage, y = count_proj)) +
geom_bar(stat = 'identity')+
geom_bar(aes(y=-sum_value_byStage), #Q1: changed so this y-value matches your label
stat = "identity", fill = "Blue") +
scale_y_continuous(labels = commapos)+
coord_flip() +
ylab('') +
geom_text(aes(label= sum_value_byStage, y = -sum_value_byStage), hjust = 1) + #Q2: Added in y-value for label and hjust so it will be on top
geom_text(aes(label= count_proj), hjust = -1) +
labs(title = "Emma: 4 projects| $90M Values \n \n Commitment|Projects") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_hline(yintercept = 0, linetype =1)
For your last question, there is no good way to display both of these. One option would be to rescale the small data and still label it with a 1 or 3. However, I didn't do this because once you scale down the blue bars the other bars look OK to me.

Combined frequency histogram using two attributes

I'm using ggplot2 to create histograms for two different parameters. My current approach is attached at the end of my question (including a dataset, which can be used and loaded right from pasetbin.com), which creates
a histrogram visualizing the frequency for the spatial distribution of logged user data based on the "location"-attribute (either "WITHIN" or "NOT_WITHIN").
a histogram visualizing the frequency for the distribution of logged user data based on the "context"-attribute (either "Clicked A" or "Clicked B").
This looks like the follwoing:
# Load my example dataset from pastebin
RawDataSet <- read.csv("http://pastebin.com/raw/uKybDy03", sep=";")
# Load packages
library(plyr)
library(dplyr)
library(reshape2)
library(ggplot2)
###### Create Frequency Table for Location-Information
LocationFrequency <- ddply(RawDataSet, .(UserEmail), summarize,
All = length(UserEmail),
Within_area = sum(location=="WITHIN"),
Not_within_area = sum(location=="NOT_WITHIN"))
# Create a column for unique identifiers
LocationFrequency <- mutate(LocationFrequency, id = rownames(LocationFrequency))
# Reorder columns
LocationFrequency <- LocationFrequency[,c(5,1:4)]
# Format id-column as numbers (not as string)
LocationFrequency[,c(1)] <- sapply(LocationFrequency[, c(1)], as.numeric)
# Melt data
LocationFrequency.m = melt(LocationFrequency, id.var=c("UserEmail","All","id"))
# Plot data
p <- ggplot(LocationFrequency.m, aes(x=id, y=value, fill=variable)) +
geom_bar(stat="identity") +
theme_grey(base_size = 16)+
labs(title="Histogram showing the distribution of all spatial information per user.") +
labs(x="User", y="Number of notifications interaction within/not within the area") +
# using IDs instead of UserEmail
scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30), labels=c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30"))
# Change legend Title
p + labs(fill = "Type of location")
##### Create Frequency Table for Interaction-Information
InterationFrequency <- ddply(RawDataSet, .(UserEmail), summarize,
All = length(UserEmail),
Clicked_A = sum(context=="Clicked A"),
Clicked_B = sum(context=="Clicked B"))
# Create a column for unique identifiers
InterationFrequency <- mutate(InterationFrequency, id = rownames(InterationFrequency))
# Reorder columns
InterationFrequency <- InterationFrequency[,c(5,1:4)]
# Format id-column as numbers (not as string)
InterationFrequency[,c(1)] <- sapply(InterationFrequency[, c(1)], as.numeric)
# Melt data
InterationFrequency.m = melt(InterationFrequency, id.var=c("UserEmail","All","id"))
# Plot data
p <- ggplot(InterationFrequency.m, aes(x=id, y=value, fill=variable)) +
geom_bar(stat="identity") +
theme_grey(base_size = 16)+
labs(title="Histogram showing the distribution of all interaction types per user.") +
labs(x="User", y="Number of interaction") +
# using IDs instead of UserEmail
scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30), labels=c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30"))
# Change legend Title
p + labs(fill = "Type of interaction")
But what I'm trying to realize: How can I combine both histograms in only one plot? Would it be somehow possible to place the corressponding percentage for each part? Somethink like the following sketch, which represents the total number of observations per user (the complete height of the bar) and using the different segmentation to visualize the corresponding data. Each bar would be divided into to parts (within and not_within) where each part would be then divided into two subparts showing the percentage of the interaction types (*Clicked A' or Clicked B).
With the update description, I would make a combined barplot with two parts: a negative and a positve one. In order to achieve that, you have to get your data into the correct format:
# load needed libraries
library(dplyr)
library(tidyr)
library(ggplot2)
# summarise your data
new.df <- RawDataSet %>%
group_by(UserEmail,location,context) %>%
tally() %>%
mutate(n2 = n * c(1,-1)[(location=="NOT_WITHIN")+1L]) %>%
group_by(UserEmail,location) %>%
mutate(p = c(1,-1)[(location=="NOT_WITHIN")+1L] * n/sum(n))
The new.df dataframe looks like:
> new.df
Source: local data frame [90 x 6]
Groups: UserEmail, location [54]
UserEmail location context n n2 p
(fctr) (fctr) (fctr) (int) (dbl) (dbl)
1 andre NOT_WITHIN Clicked A 3 -3 -1.0000000
2 bibi NOT_WITHIN Clicked A 4 -4 -0.5000000
3 bibi NOT_WITHIN Clicked B 4 -4 -0.5000000
4 bibi WITHIN Clicked A 9 9 0.6000000
5 bibi WITHIN Clicked B 6 6 0.4000000
6 corinn NOT_WITHIN Clicked A 10 -10 -0.5882353
7 corinn NOT_WITHIN Clicked B 7 -7 -0.4117647
8 corinn WITHIN Clicked A 9 9 0.7500000
9 corinn WITHIN Clicked B 3 3 0.2500000
10 dpfeifer NOT_WITHIN Clicked A 7 -7 -1.0000000
.. ... ... ... ... ... ...
Next you can create a plot with:
ggplot() +
geom_bar(data = new.df[new.df$location == "NOT_WITHIN",],
aes(x = UserEmail, y = n2, color = "darkgreen", fill = context),
size = 1, stat = "identity", width = 0.7) +
geom_bar(data = new.df[new.df$location == "WITHIN",],
aes(x = UserEmail, y = n2, color = "darkred", fill = context),
size = 1, stat = "identity", width = 0.7) +
scale_y_continuous(breaks = seq(-20,20,5),
labels = c(20,15,10,5,0,5,10,15,20)) +
scale_color_manual("Location of interaction",
values = c("darkgreen","darkred"),
labels = c("NOT_WITHIN","WITHIN")) +
scale_fill_manual("Type of interaction",
values = c("lightyellow","lightblue"),
labels = c("Clicked A","Clicked B")) +
guides(color = guide_legend(override.aes = list(color = c("darkred","darkgreen"),
fill = NA, size = 2), reverse = TRUE),
fill = guide_legend(override.aes = list(fill = c("lightyellow","lightblue"),
color = "black", size = 0.5))) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 14),
axis.title = element_blank(),
legend.title = element_text(face = "italic", size = 14),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 11))
which results in:
If you want to use percentage values, you can use the p-column to make a plot:
ggplot() +
geom_bar(data = new.df[new.df$location == "NOT_WITHIN",],
aes(x = UserEmail, y = p, color = "darkgreen", fill = context),
size = 1, stat = "identity", width = 0.7) +
geom_bar(data = new.df[new.df$location == "WITHIN",],
aes(x = UserEmail, y = p, color = "darkred", fill = context),
size = 1, stat = "identity", width = 0.7) +
scale_y_continuous(breaks = c(-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1),
labels = scales::percent(c(1,0.75,0.5,0.25,0,0.25,0.5,0.75,1))) +
scale_color_manual("Location of interaction",
values = c("darkgreen","darkred"),
labels = c("NOT_WITHIN","WITHIN")) +
scale_fill_manual("Type of interaction",
values = c("lightyellow","lightblue"),
labels = c("Clicked A","Clicked B")) +
coord_flip() +
guides(color = guide_legend(override.aes = list(color = c("darkred","darkgreen"),
fill = NA, size = 2), reverse = TRUE),
fill = guide_legend(override.aes = list(fill = c("lightyellow","lightblue"),
color = "black", size = 0.5))) +
theme_minimal(base_size = 14) +
theme(axis.title = element_blank(),
legend.title = element_text(face = "italic", size = 14),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 11))
which results in:
In response to the comment
If you want to place the text-labels inside the bars, you will have to calculate a position variable too:
new.df <- RawDataSet %>%
group_by(UserEmail,location,context) %>%
tally() %>%
mutate(n2 = n * c(1,-1)[(location=="NOT_WITHIN")+1L]) %>%
group_by(UserEmail,location) %>%
mutate(p = c(1,-1)[(location=="NOT_WITHIN")+1L] * n/sum(n),
pos = (context=="Clicked A")*p/2 + (context=="Clicked B")*(c(1,-1)[(location=="NOT_WITHIN")+1L] * (1 - abs(p)/2)))
Then add the following line to your ggplot code after the geom_bar's:
geom_text(data = new.df, aes(x = UserEmail, y = pos, label = n))
which results in:
Instead of label = n you can also use label = scales::percent(abs(p)) to display the percentages.

Resources