Related
I am using a gender-pay gap data from Glassdoor which is accessible from here.
I am trying include the labels for mean of the response variable, totalSalary, on a stacked barplot by the 5 different performance ratings.
This is the code below so far:
geom_bar(stat = "summary", fun = "mean", width = 0.9, color = "black") +
theme_bw() +
labs(x = "Job Title", y = "Mean Total Salary", fill = "Gender") +
theme(axis.title = element_text(size = 10, color = "blue"),
axis.text = element_text(size = 8),
legend.position = "top") +
# geom_col() +
# geom_text(aes(label = totalSalary), position = position_stack(vjust = 0.5), color = "white") +
scale_fill_manual(values = c("#FF66CC", "blue")) +
scale_y_continuous(labels = comma) +
coord_flip() +
facet_wrap( ~ perfEval)
This is the plot that I get.
What I want to show is to label the mean total salary for male and female employees and for each job title separately on the pink and blue colored bars.
Any help would be appreciated
In terms of comparing salaries by gender, a side-by-side comparison seems more practical (as already pointed out in the comments).
Nevertheless - regarding the technical question of positioning the labels, here is one way of doing it. The tricky part is finding the center positions of the stacked bars.
library(tidyverse)
df <- readr::read_csv("~/data.csv")
df_summary <- df %>%
group_by(gender, jobTitle, perfEval) %>%
summarize(totalcomp = mean(basePay + bonus),
totalcomp_label = paste0(round(totalcomp * 1e-3, 0), "k")) %>%
ungroup()
df_plot <- df_summary %>%
left_join(
# the messy part to find approriate label positions - there may be a solution with less pivoting steps
df_summary %>%
tidyr::pivot_wider(id_cols = c(jobTitle, perfEval),
values_from = "totalcomp", names_from = "gender", values_fill = 0) %>%
dplyr::mutate(labelpos_M = Male/2, labelpos_F = Male + Female/2) %>%
tidyr::pivot_longer(c(Female, Male), names_to = "gender") %>%
dplyr::mutate(
labelpos = case_when(gender == "Male" ~ labelpos_M,
gender == "Female" ~ labelpos_F,
TRUE ~ NA_real_)
) %>%
dplyr::select(jobTitle, perfEval, gender, labelpos),
by = c("jobTitle", "perfEval", "gender")
)
# A tibble: 98 x 6
# gender jobTitle perfEval totalcomp totalcomp_label labelpos
# <chr> <chr> <dbl> <dbl> <chr> <dbl>
# 1 Female Data Scientist 1 118479. 118k 164089.
# 2 Female Data Scientist 2 105040. 105k 140556.
# 3 Female Data Scientist 3 100275. 100k 149580.
# 4 Female Data Scientist 4 87633. 88k 127996.
# 5 Female Data Scientist 5 101449. 101k 142046.
df_plot %>%
ggplot() +
geom_col(aes(y = jobTitle, x = totalcomp, fill = gender), width = 0.9, color = "black") +
theme_bw() +
labs(x = "Job Title", y = "Mean Total Salary", fill = "Gender") +
theme(axis.title = element_text(size = 10, color = "blue"),
axis.text = element_text(size = 8),
legend.position = "top") +
scale_fill_manual(values = c("#FF66CC", "blue")) +
scale_x_continuous(labels = scales::comma) +
facet_wrap( ~ perfEval) +
# positioning the labels
geom_text(aes(x = labelpos, y = jobTitle, label = totalcomp_label),
color = "white")
I would like to align the country names for the graph below as follows:
country1 50% bargraph
country loooooooong name 100% bargraph
country2 50% bargraph
country middle name 50% bargraph
country3 5% bargraph
I have been trying for hours now but still no success. I tried to fix it with the "scale_x_discrete" function:
cat(df$info_country, "\t", df$indicator, "%")
Does anyone have any ideas?
remove(list = ls())
### data frame for bar graph
df <- data.frame(
info_country = c("country1", "country loooooooong name", "country2", "country middle name", "country3"),
indicator = c(50,100,50,50,5))
### change factor level for ggplot order
df$info_country <- factor(df$info_country, levels = df$info_country[order(df$indicator)])
factor(df$info_country)
### change order for naming
df <- df %>%
arrange(indicator)
bar_graph <- df %>%
ggplot( aes(x = info_country, y = indicator)) +
geom_bar(stat = "identity", width = 0.8, fill = "#EE5859") +
xlab("") +
ylab("") +
scale_y_continuous(labels = NULL, limits = c(0, 100)) +
scale_x_discrete(labels = paste(df$info_country, " ", df$indicator, "%", sep=" ")) +
coord_flip() +
theme(
panel.background = element_rect(fill = "white", colour = NA),
axis.ticks.x = element_line(color="white"),
axis.ticks.y = element_line(color="white"),
axis.text.y = element_text(hjust=0),
axis.text.x = element_text(hjust=0),
)
bar_graph
EDIT:
I tried to also align the bars with a fixed distance so that the length of my country names does not matter and the bar always starts at the same distance. I tried to just add empty spaces to the country name so that characters have the same lenght. But somehow for ggplot spaces seem to be shorter than actual letters (see below difference between two different datasets). Any ideas to fix this?
library(ggplot2)
library(dplyr)
df <- data.frame(
info_country = c("country1", "country loooooong", "country2", "country midd", "country3"),
indicator = c(50,100,50,50,5))
## second dataframe with short names
# df <- data.frame(
# info_country = c("c", "a", "b", "d", "e"),
# indicator = c(50,100,50,50,5))
#
### change factor level for ggplot order
df$info_country <- factor(df$info_country, levels = df$info_country[order(df$indicator)])
factor(df$info_country)
df$info_country
## add spaces
df$info_country <-str_pad(df$info_country, width=18, pad = " ", side = "right")
df$info_country
### change order for naming
df <- df %>%
arrange(indicator)
bar_graph <- df %>%
ggplot( aes(x = info_country, y = indicator)) +
geom_bar(stat = "identity", width = 0.8, fill = "#EE5859") +
geom_text(aes(y = -2, label = paste(indicator, "%", sep=" ")),
hjust = 1, size = 11 * 0.8 / ggplot2::.pt, color = "grey30") +
xlab("") +
ylab("") +
scale_y_continuous(labels = NULL, limits = c(-5, 100)) +
coord_flip() +
theme(
panel.background = element_rect(fill = "white", colour = NA),
axis.ticks.x = element_line(color="white"),
axis.ticks.y = element_line(color="white"),
axis.text.y = element_text(hjust=0),
axis.text.x = element_text(hjust=0),
)
bar_graph
One approach to achieve this is via a geom_text layer to add the right aligned values to the plot. For the size and the color I've used the theme_grey defaults for axis.text:
library(ggplot2)
library(dplyr)
df <- data.frame(
info_country = c("country1", "country loooooooong name", "country2", "country middle name", "country3"),
indicator = c(50,100,50,50,5))
### change factor level for ggplot order
df$info_country <- factor(df$info_country, levels = df$info_country[order(df$indicator)])
factor(df$info_country)
#> [1] country1 country loooooooong name country2
#> [4] country middle name country3
#> 5 Levels: country3 country1 country2 ... country loooooooong name
### change order for naming
df <- df %>%
arrange(indicator)
bar_graph <- df %>%
ggplot( aes(x = info_country, y = indicator)) +
geom_bar(stat = "identity", width = 0.8, fill = "#EE5859") +
geom_text(aes(y = -2, label = paste(indicator, "%", sep=" ")),
hjust = 1, size = 11 * 0.8 / ggplot2::.pt, color = "grey30") +
xlab("") +
ylab("") +
scale_y_continuous(labels = NULL, limits = c(-5, 100)) +
coord_flip() +
theme(
panel.background = element_rect(fill = "white", colour = NA),
axis.ticks.x = element_line(color="white"),
axis.ticks.y = element_line(color="white"),
axis.text.y = element_text(hjust=0),
axis.text.x = element_text(hjust=0),
)
bar_graph
EDIT To increase the distance between the labels and the percentages you could increase the margin of the labels via e.g. theme(axis.text.y = element_text(hjust=0, margin = margin(r = 6, unit = "cm")). However in that case you should add option clip=off to coord_flip() to prevent that labels are clipped off when hitting the boundaries and also set the color of the ticks to NA which means no color at all:
bar_graph <- df %>%
ggplot( aes(x = info_country, y = indicator)) +
geom_bar(stat = "identity", width = 0.8, fill = "#EE5859") +
geom_text(aes(y = -2, label = paste(indicator, "%", sep=" ")),
hjust = 1, size = 11 * 0.8 / ggplot2::.pt, color = "grey30") +
xlab("") +
ylab("") +
scale_y_continuous(labels = NULL, limits = c(-2, 100)) +
# Use clip = "off" to prevent that percentage labels are clipped off
coord_flip(clip = "off") +
theme(
panel.background = element_rect(fill = "white", colour = NA),
# Set color of ticks to NA
axis.ticks.x = element_line(color=NA),
axis.ticks.y = element_line(color=NA),
# Increase the margin
axis.text.y = element_text(hjust=0, margin = margin(r = 6, unit = "cm")),
axis.text.x = element_text(hjust=0),
)
bar_graph
You can pad the columns first then join, this should work for most label sizes, first you work out the width for the 1st and 2nd part:
w1 = max(nchar(as.character(df$info_country)))
w2 = max(nchar(as.character(df$indicator)))
And we pad them accordingly, you can see the result:
l1 = with(df,str_pad(info_country,w1,side="right"))
l2 = with(df,str_pad(indicator,w2,side="left"))
df$lab = paste0(l1," ",l2,"% bargraph")
df
info_country indicator lab
1 country3 5 country3 5% bargraph
2 country1 50 country1 50% bargraph
3 country2 50 country2 50% bargraph
4 country middle name 50 country middle name 50% bargraph
5 country loooooooong name 100 country loooooooong name 100% bargraph
Unfortunately, to ensure the characters and " " have equal space, we might need to force the label text to monospace:
ggplot(df,aes(x=info_country,y=indicator)) +
geom_col() + scale_x_discrete(labels=df$lab) + coord_flip() +
theme(axis.text.y = element_text(hjust = 0,family = "mono"))
Maybe there's a better way to use the fonts but I am honestly not good with it
I have two dataframes df1 and df2 as follows:
> df1
dateTime value
1 1 6
2 2 2
3 3 3
4 4 1
> df2
dateTime value
1 1 3
2 2 8
3 3 4
4 4 5
I want to plot these dataframes in just one diagram, split them to two different plots with same x axis, shift df1 by 1 to the right, and connect each value of df1 to the corresponding value of df2. Here is my code:
#Shift df1 by 1 to the right
df1$value <- lag(df1$value, 1)
plot1 <- df1 %>%
select(dateTime, value) %>%
ggplot(aes(dateTime, value)) +
geom_point() +
geom_line(color = "green") +
geom_segment(aes(xend = dateTime, yend = -Inf), linetype = "dashed") +
theme(axis.text=element_text(size = 14), axis.title=element_text(size = 14),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
plot2 <- df2 %>%
select(dateTime, value) %>%
ggplot(aes(dateTime, value)) +
geom_point() +
geom_line(color = "red") +
geom_segment(aes(xend = dateTime, yend = Inf), linetype = "dashed") +
xlab("dateTime") +
theme(axis.text=element_text(size = 14), axis.title=element_text(size = 14))
gt <- rbind(ggplotGrob(plot1), ggplotGrob(plot2), size = "last")
# Panel positioning
is_panel <- which(gt$layout$name == "panel")
panel_x <- unique(gt$layout$l[is_panel])
panel_y <- gt$layout$t[is_panel]
# Coordinates and graphical parameters for segments
x_coords <- gt$grobs[[is_panel[1]]]$children[[5]]$x0
gpar <- gt$grobs[[is_panel[1]]]$children[[5]]$gp
linkgrob <- segmentsGrob(x0 = x_coords, y0 = 0, x1 = x_coords, y1 = 1, gp = gpar)
gt <- gtable_add_grob(gt, linkgrob,
t = panel_y[1] + 1, l = panel_x, b = panel_y[2] - 1)
grid.newpage()
grid.draw(gt)
Here is the result, but actually there is an additional line which I want to remove it and also there is no point for the last value of df1 which I also want to show the last point:
Lag
I guess that lag is maybe the wrong function:
lag(1:3)
# [1] NA 1 2
If I understand you correctly, you want to shift your data and this depends on your real data, but for this dummy example something like
df1 <- df1 %>%
mutate(dateTime = dateTime + 1)
should do the trick.
Lines
You need to adapt your base plots a bit:
plot1 <- df1 %>%
select(dateTime, value) %>%
## create a temp variable to which we can map the line type to
mutate(lty = ifelse(dateTime == max(dateTime), "none", "dashed")) %>%
ggplot(aes(dateTime, value)) +
geom_point() +
geom_line(color = "green") +
## map the linetype to this variable
geom_segment(aes(xend = dateTime, yend = -Inf, linetype = lty)) +
## use a manual scale to map the variable to dashed and blank linetype
scale_linetype_manual(values = c(dashed = "dashed", none = "blank"),
guide = "none") +
## add xlim to align scales properly in both plots
xlim(c(1, 5)) +
theme(axis.text=element_text(size = 14), axis.title=element_text(size = 14),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
plot2 <- df2 %>%
select(dateTime, value) %>%
mutate(lty = ifelse(dateTime == min(dateTime), "none", "dashed")) %>%
ggplot(aes(dateTime, value)) +
geom_point() +
geom_line(color = "red") +
geom_segment(aes(xend = dateTime, yend = Inf, linetype = lty)) +
scale_linetype_manual(values = c(dashed = "dashed", none = "blank"),
guide = "none") +
xlab("dateTime") +
xlim(c(1, 5)) +
theme(axis.text=element_text(size = 14), axis.title=element_text(size = 14))
This gives you this plot:
I am having this strange error regarding displaying the actual bars in a geom_col() plot.
Suppose I have a data set (called user_data) that contains a count of the total number of changes ('adjustments') done for a particular user (and a plethora of other columns). Let's say it looks like this:
User_ID total_adjustments additional column_1 additional column_2 ...
1 'Blah_17' 21 random_data random_data
2 'Blah_1' 47 random_data random_data
3 'foobar' 2 random_data random_data
4 'acbd1' 17 random_data random_data
5 'user27' 9 random_data random_data
I am using the following code to reduce it into a dataframe with only the two columns I care about:
total_adj_count = user_data %>%
select(User_ID, total_adjustments) %>%
arrange(desc(total_adjustments)) %>%
mutate(User_ID = factor(User_ID, User_ID))
This results in my dataframe (total_adj_count) looking like so:
User_ID total_adjustments
1 'Blah_1' 47
2 'Blah_17' 21
3 'acbd1' 17
4 'user27' 9
5 'foobar' 2
Moving along, here is the code I used to attempt to create a geom_col() plot of that data:
g = ggplot(data=total_adj_count, aes(x = User_ID, y = total_adjustments)) +
geom_bar(width=.5, alpha=1, show.legend = FALSE, fill="#000066", stat="identity") +
labs(x="", y="Adjustment Count", caption="(based on sample data)") +
theme_few(base_size = 10) + scale_color_few() +
theme(axis.text.x=element_text(angle = 45, hjust = 1)) +
geom_text(aes(label=round(total_adjustments, digits = 2)), size=3, nudge_y = 2000) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
p = ggplotly(g)
p = p %>%
layout(margin = m,
showlegend = FALSE,
title = "Number of Adjustments per User"
)
p
And for some strange reason when I try to view plot p it displays all parts of the plot as intended, but does not show the actual bars (or columns).
In fact I get this strange plot and am sort of stuck where to fix it:
Change nudge_y argument to a smaller number. Right now you have it set to 2000 which offsets the labels by 2000 on the y-axis. Below I've changed it to nudge_y = 2 and it looks like so:
g <-
ggplot(total_adj_count, aes(User_ID, total_adjustments)) +
geom_col(width = .5, alpha = 1, show.legend = FALSE, fill = "#000066") +
labs(x = "", y = "Adjustment Count", caption = "(based on sample data)") +
theme_few(base_size = 10) +
scale_color_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(total_adjustments, digits = 2)), size = 3, nudge_y = 2) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
Full copy/paste:
library(ggplot2)
library(ggthemes)
library(plotly)
library(dplyr)
text <- " User_ID total_adjustments
1 'Blah_1' 47
2 'Blah_17' 21
3 'acbd1' 17
4 'user27' 9
5 'foobar' 2"
total_adj_count <- read.table(text = text, header = TRUE, stringsAsFactors = FALSE)
g <-
ggplot(total_adj_count, aes(User_ID, total_adjustments)) +
geom_col(width = .5, alpha = 1, show.legend = FALSE, fill = "#000066") +
labs(x = NULL, y = "Adjustment Count", caption = "(based on sample data)", title = "Number of Adjustments per User") +
theme_few(base_size = 10) +
scale_color_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(total_adjustments, digits = 2)), size = 3, nudge_y = 2) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
p <- ggplotly(g)
p <- layout(p, showlegend = FALSE)
p
I'm using ggplot2 to create histograms for two different parameters. My current approach is attached at the end of my question (including a dataset, which can be used and loaded right from pasetbin.com), which creates
a histrogram visualizing the frequency for the spatial distribution of logged user data based on the "location"-attribute (either "WITHIN" or "NOT_WITHIN").
a histogram visualizing the frequency for the distribution of logged user data based on the "context"-attribute (either "Clicked A" or "Clicked B").
This looks like the follwoing:
# Load my example dataset from pastebin
RawDataSet <- read.csv("http://pastebin.com/raw/uKybDy03", sep=";")
# Load packages
library(plyr)
library(dplyr)
library(reshape2)
library(ggplot2)
###### Create Frequency Table for Location-Information
LocationFrequency <- ddply(RawDataSet, .(UserEmail), summarize,
All = length(UserEmail),
Within_area = sum(location=="WITHIN"),
Not_within_area = sum(location=="NOT_WITHIN"))
# Create a column for unique identifiers
LocationFrequency <- mutate(LocationFrequency, id = rownames(LocationFrequency))
# Reorder columns
LocationFrequency <- LocationFrequency[,c(5,1:4)]
# Format id-column as numbers (not as string)
LocationFrequency[,c(1)] <- sapply(LocationFrequency[, c(1)], as.numeric)
# Melt data
LocationFrequency.m = melt(LocationFrequency, id.var=c("UserEmail","All","id"))
# Plot data
p <- ggplot(LocationFrequency.m, aes(x=id, y=value, fill=variable)) +
geom_bar(stat="identity") +
theme_grey(base_size = 16)+
labs(title="Histogram showing the distribution of all spatial information per user.") +
labs(x="User", y="Number of notifications interaction within/not within the area") +
# using IDs instead of UserEmail
scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30), labels=c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30"))
# Change legend Title
p + labs(fill = "Type of location")
##### Create Frequency Table for Interaction-Information
InterationFrequency <- ddply(RawDataSet, .(UserEmail), summarize,
All = length(UserEmail),
Clicked_A = sum(context=="Clicked A"),
Clicked_B = sum(context=="Clicked B"))
# Create a column for unique identifiers
InterationFrequency <- mutate(InterationFrequency, id = rownames(InterationFrequency))
# Reorder columns
InterationFrequency <- InterationFrequency[,c(5,1:4)]
# Format id-column as numbers (not as string)
InterationFrequency[,c(1)] <- sapply(InterationFrequency[, c(1)], as.numeric)
# Melt data
InterationFrequency.m = melt(InterationFrequency, id.var=c("UserEmail","All","id"))
# Plot data
p <- ggplot(InterationFrequency.m, aes(x=id, y=value, fill=variable)) +
geom_bar(stat="identity") +
theme_grey(base_size = 16)+
labs(title="Histogram showing the distribution of all interaction types per user.") +
labs(x="User", y="Number of interaction") +
# using IDs instead of UserEmail
scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30), labels=c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30"))
# Change legend Title
p + labs(fill = "Type of interaction")
But what I'm trying to realize: How can I combine both histograms in only one plot? Would it be somehow possible to place the corressponding percentage for each part? Somethink like the following sketch, which represents the total number of observations per user (the complete height of the bar) and using the different segmentation to visualize the corresponding data. Each bar would be divided into to parts (within and not_within) where each part would be then divided into two subparts showing the percentage of the interaction types (*Clicked A' or Clicked B).
With the update description, I would make a combined barplot with two parts: a negative and a positve one. In order to achieve that, you have to get your data into the correct format:
# load needed libraries
library(dplyr)
library(tidyr)
library(ggplot2)
# summarise your data
new.df <- RawDataSet %>%
group_by(UserEmail,location,context) %>%
tally() %>%
mutate(n2 = n * c(1,-1)[(location=="NOT_WITHIN")+1L]) %>%
group_by(UserEmail,location) %>%
mutate(p = c(1,-1)[(location=="NOT_WITHIN")+1L] * n/sum(n))
The new.df dataframe looks like:
> new.df
Source: local data frame [90 x 6]
Groups: UserEmail, location [54]
UserEmail location context n n2 p
(fctr) (fctr) (fctr) (int) (dbl) (dbl)
1 andre NOT_WITHIN Clicked A 3 -3 -1.0000000
2 bibi NOT_WITHIN Clicked A 4 -4 -0.5000000
3 bibi NOT_WITHIN Clicked B 4 -4 -0.5000000
4 bibi WITHIN Clicked A 9 9 0.6000000
5 bibi WITHIN Clicked B 6 6 0.4000000
6 corinn NOT_WITHIN Clicked A 10 -10 -0.5882353
7 corinn NOT_WITHIN Clicked B 7 -7 -0.4117647
8 corinn WITHIN Clicked A 9 9 0.7500000
9 corinn WITHIN Clicked B 3 3 0.2500000
10 dpfeifer NOT_WITHIN Clicked A 7 -7 -1.0000000
.. ... ... ... ... ... ...
Next you can create a plot with:
ggplot() +
geom_bar(data = new.df[new.df$location == "NOT_WITHIN",],
aes(x = UserEmail, y = n2, color = "darkgreen", fill = context),
size = 1, stat = "identity", width = 0.7) +
geom_bar(data = new.df[new.df$location == "WITHIN",],
aes(x = UserEmail, y = n2, color = "darkred", fill = context),
size = 1, stat = "identity", width = 0.7) +
scale_y_continuous(breaks = seq(-20,20,5),
labels = c(20,15,10,5,0,5,10,15,20)) +
scale_color_manual("Location of interaction",
values = c("darkgreen","darkred"),
labels = c("NOT_WITHIN","WITHIN")) +
scale_fill_manual("Type of interaction",
values = c("lightyellow","lightblue"),
labels = c("Clicked A","Clicked B")) +
guides(color = guide_legend(override.aes = list(color = c("darkred","darkgreen"),
fill = NA, size = 2), reverse = TRUE),
fill = guide_legend(override.aes = list(fill = c("lightyellow","lightblue"),
color = "black", size = 0.5))) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 14),
axis.title = element_blank(),
legend.title = element_text(face = "italic", size = 14),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 11))
which results in:
If you want to use percentage values, you can use the p-column to make a plot:
ggplot() +
geom_bar(data = new.df[new.df$location == "NOT_WITHIN",],
aes(x = UserEmail, y = p, color = "darkgreen", fill = context),
size = 1, stat = "identity", width = 0.7) +
geom_bar(data = new.df[new.df$location == "WITHIN",],
aes(x = UserEmail, y = p, color = "darkred", fill = context),
size = 1, stat = "identity", width = 0.7) +
scale_y_continuous(breaks = c(-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1),
labels = scales::percent(c(1,0.75,0.5,0.25,0,0.25,0.5,0.75,1))) +
scale_color_manual("Location of interaction",
values = c("darkgreen","darkred"),
labels = c("NOT_WITHIN","WITHIN")) +
scale_fill_manual("Type of interaction",
values = c("lightyellow","lightblue"),
labels = c("Clicked A","Clicked B")) +
coord_flip() +
guides(color = guide_legend(override.aes = list(color = c("darkred","darkgreen"),
fill = NA, size = 2), reverse = TRUE),
fill = guide_legend(override.aes = list(fill = c("lightyellow","lightblue"),
color = "black", size = 0.5))) +
theme_minimal(base_size = 14) +
theme(axis.title = element_blank(),
legend.title = element_text(face = "italic", size = 14),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 11))
which results in:
In response to the comment
If you want to place the text-labels inside the bars, you will have to calculate a position variable too:
new.df <- RawDataSet %>%
group_by(UserEmail,location,context) %>%
tally() %>%
mutate(n2 = n * c(1,-1)[(location=="NOT_WITHIN")+1L]) %>%
group_by(UserEmail,location) %>%
mutate(p = c(1,-1)[(location=="NOT_WITHIN")+1L] * n/sum(n),
pos = (context=="Clicked A")*p/2 + (context=="Clicked B")*(c(1,-1)[(location=="NOT_WITHIN")+1L] * (1 - abs(p)/2)))
Then add the following line to your ggplot code after the geom_bar's:
geom_text(data = new.df, aes(x = UserEmail, y = pos, label = n))
which results in:
Instead of label = n you can also use label = scales::percent(abs(p)) to display the percentages.