Removing "False"-condition scales::dollar labels on ifelse within geom_label - r

trying to establish individual bar data labels ONLY if the value is negative. I was able to do it fine for a variable that comprised simple integers, but for a variable that needs to be formatted as dollar with the thousands separator, I can't seem to get rid of the "NA" label.
DolSumPlot <- ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(aes(label=scales::dollar(ifelse(DolSums$x < 0, DolSums$x,NA)),
y = DolSums$x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")
Data:
DolSums = structure(list(Group.1 = c((names)), Group.2 = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Radio",
"Video", "Engineering", "800Mhz", "PSSRP", "Other"), class = "factor"),
x = c(4646, 16008.5, 48793.1, 4040, 14468.25, 13332, 1565.5,
6060, 6549.85, 2929, 4444, 3257.25, 5904, 2029.5, 3321, 6767,
8105.25, 8105.25, 8130.5, 3131, 5075.25, 3383.5, 4418.75,
23381.5, 1363.5, -2323, 29133.45, 2550.25, 505, 26042.85,
35203.55, 35940.85, 1641.25, 45066.2, 37541.7, 606, 45439.9
)), .Names = c("Group.1", "Group.2", "x"), row.names = c(NA,
-37L), class = "data.frame")

You can do this by using the data argument in geom_label and subsetting only rows with negative x. Also note that since you already have DolSums as input, there is no need to write DolSums$x. Instead, use column name to refer to a specific column directly:
library(ggplot2)
ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(data = DolSums[DolSums$x < 0,],
aes(label=scales::dollar(x),
y = x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")

Related

Order of the legend in ggplot

I am plotting a three series graph for the data
> dput(test)
structure(list(Industry = structure(c(2L, 3L, 1L, 4L, 2L, 3L,
1L, 4L, 2L, 3L, 1L, 4L), .Label = c("Overall", "High Tech", "Other Non-Manufacturing",
"Services (Non-Financial)"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("2018 % (Actual)",
"2019 % (Actual/Budgeted)", "2020 % (Forecast)"), class = "factor"),
value = c(3.8, 3.7, 3.9, 3.4, 3.8, 3.7, 3.8, 3.9, 3.5, 3.3,
3.7, 3.8)), row.names = c(3L, 7L, 8L, 10L, 14L, 18L, 19L,
21L, 25L, 29L, 30L, 32L), class = "data.frame")
The three series are: 2018 % (Actual), 2019 % (Actual/Budgeted), 2020 % (Forecast)
The problem is I want the legend in the same sequence but the graph is showing the legend sequence as: 2020 % (Forecast),2018 % (Actual), 2019 % (Actual/Budgeted)
ggplot(subset(test,variable!="2020 % (Forecast)")) +
aes(x=Industry,y=value,fill=factor(variable, levels = c("2018 % (Actual)","2019 % (Actual/Budgeted)"))) +
geom_bar(,width = 0.9,stat="identity",position = position_dodge2(width=NULL,preserve = "single"))+
scale_fill_manual(values = c("#006D9E","#00A8C8","#FDFEFE"))+
geom_text(aes(label=paste0(value, "%")), colour="black", size=3,
position=position_dodge(width = 0.9), vjust=0.2,angle=90, hjust=2) +
geom_point(data=subset(test,variable=="2020 % (Forecast)"),aes(x=Industry,y=value,colour=variable),
position = position_dodge(0.5),show.legend = F)+
geom_line(data=subset(test,variable=="2020 % (Forecast)"),aes(x=Industry,y=value,colour=variable,group=1),position = position_dodge(1)) +
geom_text(data=subset(test,variable=="2020 % (Forecast)"), aes(x=Industry,y=value,label=paste0(value, "%")),size=3,
position=position_dodge(width =1), vjust= -0.25)+
labs(x=NULL, y=NULL)+
theme(axis.text.x = element_text(face="bold",color = "#626366",size = 10,angle=90, vjust=0.6))+
scale_y_continuous(expand = c(0,0),labels = number_format(suffix = "%"),
limits=c(0,max(test$value, na.rm = TRUE)+
max(test$value, na.rm = TRUE)/5))+
scale_x_discrete(limits=levels(test$Industry),labels = function(x) str_wrap(x, width = 5))
Is there any way in ggplot to change the sequence of legends?
You can use guides(). First we start with your plot:
library(tidyverse)
p = ggplot(subset(test,variable!="2020 % (Forecast)")) +
aes(x=Industry,y=value,fill=factor(variable, levels = c("2018 % (Actual)","2019 % (Actual/Budgeted)"))) +
geom_bar(,width = 0.9,stat="identity",position = position_dodge2(width=NULL,preserve = "single"))+
scale_fill_manual(values = c("#006D9E","#00A8C8","#FDFEFE"))+
geom_text(aes(label=paste0(value, "%")), colour="black", size=3,
position=position_dodge(width = 0.9), vjust=0.2,angle=90, hjust=2) +
geom_point(data=subset(test,variable=="2020 % (Forecast)"),aes(x=Industry,y=value,colour=variable),
position = position_dodge(0.5),show.legend = F)+
geom_line(data=subset(test,variable=="2020 % (Forecast)"),aes(x=Industry,y=value,colour=variable,group=1),position = position_dodge(1)) +
geom_text(data=subset(test,variable=="2020 % (Forecast)"), aes(x=Industry,y=value,label=paste0(value, "%")),size=3,
position=position_dodge(width =1), vjust= -0.25)+
labs(x=NULL, y=NULL)+
theme(axis.text.x = element_text(face="bold",color = "#626366",size = 10,angle=90, vjust=0.6))+
scale_y_continuous(expand = c(0,0),labels = number_format(suffix = "%"),
limits=c(0,max(test$value, na.rm = TRUE)+
max(test$value, na.rm = TRUE)/5))+
scale_x_discrete(limits=levels(test$Industry),labels = function(x) str_wrap(x, width = 5))
Now we specify guides, your line is a color legend and your bars have a fill, and you just specify the title and order insider guide_legend():
p + guides(color=guide_legend(title="Forecast",order = 2),fill=guide_legend(title="Actual",order = 1))
Gives you:

What is the "N = 1" box in my R geom_bar legend, and how do I remove?

These are the data:
structure(list(Group.1 = c((name list)
), Group.2 = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Radio", "Video", "Engineering",
"800Mhz", "PSSRP", "Other"), class = "factor"), x = c(93.5, 208.75,
214, 48, 66.33, 71.5, 19.5, 64.75, 17, 39, 30.75, 96.75, 30,
19, 32.5, 12.75, 47.25, 14, 22.25, 12, 3, 128.5, 9.5, 303.2,
290.35, 364.05, 333.25, 11.75, 553.25, 423, 6, 496)), .Names = c("Group.1",
"Group.2", "x"), row.names = c(NA, -32L), class = "data.frame")
running this plot:
ggplot(data = HrSums, aes(x = Group.1, y = x, fill = Group.2)) +
geom_bar(stat = "sum", position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Hours Billed, by Technician and Shop", y = "Hours Billed",
x = "Technician", fill = "Shop")
I get this bar chart:
What is the "n" box, and how do I remove it (only) from the legend?
I believe the n box is because geom_bar expects to count the number of times each combination of Group.1 and Group.2 occurs, but instead you're giving a y value in your aes. geom_bar can use a different stat instead of counting, but if you want the sums of values, it expects a weight aesthetic. Here are two ways to do this, one using weight = x in geom_bar, and one that uses dplyr functions to calculate sums beforehand, then supplies this to y.
library(tidyverse)
ggplot(df, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = T)) +
coord_flip()
df_sums <- df %>%
group_by(Group.1, Group.2) %>%
summarise(x = sum(x))
ggplot(df_sums, aes(x = Group.1, y = x, fill = Group.2)) +
geom_col(position = position_stack(reverse = T)) +
coord_flip()
if you include the following then you'll only see the aesthetics you're expecting:
show.legend = c(
"x" = TRUE,
"y" = TRUE,
"alpha" = FALSE,
"color" = FALSE,
"fill" = TRUE,
"linetype" = FALSE,
"size" = FALSE,
"weight" = FALSE
)
See show.legend argument on ?geom_bar:
show.legend logical. Should this layer be included in the legends?
NA, the default, includes if any aesthetics are mapped. FALSE never
includes, and TRUE always includes. It can also be a named logical
vector to finely select the aesthetics to display.

add empty lines/space between groups in ggplot

I want to add empty lines between groups to make the graph look less dense.
The first 10 rows of data is attached here:
> dput(droplevels(head(hosp, 10)))
structure(list(mzipid = c("FL-3", "FL-3", "FL-3", "FL-4", "FL-4",
"FL-4", "FL-4", "FL-4", "FL-4", "FL-4"), region_oe = c(1.6459234,
1.6459234, 1.6459234, 1.3399296, 1.3399296, 1.3399296, 1.3399296,
1.3399296, 1.3399296, 1.3399296), region_low_ci = c(0.53041852,
0.53041852, 0.53041852, 0.9696098, 0.9696098, 0.9696098, 0.9696098,
0.9696098, 0.9696098, 0.9696098), region_high_ci = c(3.8410261,
3.8410261, 3.8410261, 1.8049299, 1.8049299, 1.8049299, 1.8049299,
1.8049299, 1.8049299, 1.8049299), hosp_id = structure(c(5L, 9L,
4L, 7L, 8L, 6L, 2L, 1L, 9L, 3L), .Label = c("100025", "100054",
"100093", "100106", "100113", "100122", "100124", "100223", "non_local_hosp"
), class = "factor"), outc_n = c(2L, 2L, 1L, 1L, 6L, 3L, 1L,
19L, 2L, 9L), pre_outc_n = c(0.339173126965761, 0.577540323603898,
0.838447939604521, 0.463049655780196, 2.99622658733279, 1.86155441217125,
0.626489417627454, 12.4346171403304, 1.37301584333181, 6.90619195345789
), oe = c(NA, NA, NA, NA, 2.0025189, 1.6115564, NA, 1.5279924,
NA, 1.3031784), low_ci = c(NA, NA, NA, NA, 0.73123336, 0.32389662,
NA, 0.91951913, NA, 0.59465784), high_ci = c(NA, NA, NA, NA,
4.3587809, 4.7086916, NA, 2.3862863, NA, 2.4740036), state = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "FL", class = "factor"),
id = 1:10, outlier = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "black", class = "factor"), cv = c(" 66.88",
" 66.88", " 66.88", " 38.37", " 38.37", " 38.37", " 38.37",
" 38.37", " 38.37", " 38.37"), start_id = c(1L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 4L), end_id = c(3L, 3L, 3L, 12L, 12L,
12L, 12L, 12L, 12L, 12L)), .Names = c("mzipid", "region_oe",
"region_low_ci", "region_high_ci", "hosp_id", "outc_n", "pre_outc_n",
"oe", "low_ci", "high_ci", "state", "id", "outlier", "cv", "start_id",
"end_id"), row.names = 105:114, class = "data.frame")
The current code is like this:
## draw graph
ggplot(hosp, aes(x = id, y = oe, group = mzipid)) +
theme(panel.background = element_rect(fill = "white", colour = "grey50")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_point(size=1, show.legend = FALSE, colour = hosp$outlier) +
geom_linerange(aes(ymin = low_ci, ymax = high_ci), alpha = 0.4, na.rm = TRUE, colour = hosp$outlier)) +
scale_linetype_manual(values = linetype, guide = FALSE) +
geom_segment(aes(x = start_id, xend = end_id, y = region_oe, yend = region_oe, linetype = "1",
size = 1), na.rm = TRUE) +
geom_ribbon(aes(ymin = region_low_ci, ymax = region_high_ci), alpha=0.2, linetype = "blank", show.legend = FALSE) +
geom_hline(aes(yintercept = 1, alpha = 0.2, colour = "red", size = 1), show.legend = FALSE) +
scale_size_identity() +
scale_x_continuous(name = "hospital id", breaks = seq(0,350, by = 20)) +
scale_y_continuous(name = "O:E ratio", breaks = seq(0,6, by = 0.5),
sec.axis = sec_axis(trans = ~., breaks = c(0.1),
labels = "coefficient of variation")) +
stat_summaryh(fun.x = mean, aes(label = mzipid, y = 5.3), geom = "text", size = 2, srt = 35,
check_overlap = F) +
stat_summaryh(fun.x = mean, aes(label = cv, y = 0.1), geom = "text", size = 2, srt = 35,
check_overlap = F) +
guides(colour = FALSE) +
ggtitle("O:E ratio:hospital level\nmedian clusters") +
theme(plot.title = element_text(hjust = 0.5))
And the current graph looks like this:
How can I add empty lines/space between each zip group (mzipid)? Given the data provided, if anyone can show how to add space between FL-3 group and FL-4 group would be great.
Thanks!!

ggrepel label fill color questions

I'm working with ggplot2 for the first time, and I'm having trouble making the colors of the labels I created with ggrepel change dynamically. Currently, my code looks like this:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
In general, this works pretty well, except I strongly dislike the toothpaste green color it gives me for one of the two factors plotted. I want to dictate the specific colors of that fill = factor(Hemisphere)) line, but I don't know how.
I have already tried using the scale_colours_manual function, but when I include it within the geom_label_repel(.....) paratheses in line 3, the program complains that "ggplot2 doesn't know how to deal with data of class ScaleDiscrete/Scale/ggproto", and when I place the scale_colours_manual line outside of line 3, it has no effect at all, as in this example, which produced an identical plot to the one above:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
scale_colour_manual(values = c('blue', 'red')) +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
I know there has to be a way to do this, but I'm at a loss. Thanks for any help you've got!
EDIT: At request, I've attached a dput() of tstat. Not a big data frame.
structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
You can use scale_fill_manual:
tstat <- structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
library(ggplot2)
library(ggrepel)
library(ggthemes)
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere") +
scale_fill_manual(values = setNames(c("lightblue", "darkgreen"), levels(tstat$Hemisphere)))

ggplot2: axis line at 0 with negative datapoints

I am doing boxplot with positive and negative data and would like to have the axis at y=0.
adding a line afterwards is not elegant since the line would be on to of the boxes and not behind them.
(the goal is to have the line at y=0 black, while the lines at 1 and -1 should be gray)
In addition I would like to have only the axis lines. I therefore used
axis.line=element_line()
,panel.border = element_blank()
in the theme. however the vertical line goes above 1, which does not look good (my data is by definition between -1 and 1).
here is the code:
require (ggplot2)
theme_jack <- function (base_size = 10, base_family = "") {
theme_bw(base_size = base_size, base_family = base_family) %+replace%
theme(
title = element_text(size = 12)
,axis.title.x = element_text(colour = "white", size=0)
,axis.title.y = element_text(size=12, angle=90)
,axis.text.x=element_text(size=8)
,panel.grid.major = element_line(colour = "grey")#,
,axis.line=element_line()
,panel.border = element_blank()
,panel.grid.minor = element_blank()
,panel.grid.major.x = element_blank()
,legend.position = "none"
)
}
theme_set(theme_jack())
datatest2=structure(list(datatest2.genotype = structure(c(1L, 1L, 5L, 5L,
1L, 5L, 5L, 5L, 1L, 5L, 1L, 5L, 1L, 5L, 5L, 1L, 1L, 1L, 5L, 5L,
1L, 1L, 1L, 5L, 1L, 1L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L), .Label = c("CS_d42-chG80", "CS_G_c380cha", "CS_Gd42-chG80",
"PKC_CS", "PKC_d42-chG80", "PKC_G_c380cha", "PKC_Gd42-chG80"), class = "factor"),
datatest2.score = c(0.8882, -0.3775, -0.4053, 0.1962, 0.9982,
0.5627, -0.4865, 0.7267, 0.3276, 0.5017, 0.9731, 0.1525,
0.7857, 0.6121, 0.8508, 0.1311, -0.2457, 0.8848, -0.1254,
0.1047, -0.2715, 0.7189, 0.4115, 0.9704, -0.8328, -0.1301,
0.9756, 0.2317, 0.4297, 0.9967, 0.6423, 0.8516, 0.3386, 0.5208,
0.9148, 0.2539, 0.8581, 0.5621, 0.5969, 0.7435)), .Names = c("genotype",
"score"), row.names = c(NA, 40L), class = "data.frame")
p=ggplot(datatest2, aes(x=factor(genotype),y= score))
plot=p+ geom_boxplot()+ labs(x="genotype",y="PI during final test")+
scale_fill_grey(start = 0.9, end = 0.9)+ ##allow good bw prints
scale_y_continuous(minor_breaks=NULL,breaks = seq(-1 , 1, 1) )
plot
Try the expand argument to `scale_y_continuous. Replace the final section with this and you should get something closer to what you want. You may have to experiment with the values below.
p = ggplot(datatest2, aes(x = factor(genotype), y = score))
plot = p+ geom_boxplot()+ labs(x = "genotype",
y = "PI during final test")+
scale_fill_grey(start = 0.9, end = 0.9)+ ##allow good bw prints
scale_y_continuous(expand = c(0,0.005),
minor_breaks = NULL,
breaks = seq(-1 , 1, 1) ) +
theme()
plot

Resources