adding annotation_custom with rasterGrob after function call - r

Apologies for the title, I know it sucks.
I am trying to create a waterfall chart function. So, I am trying to create a basic plot, which people can configure however they wish. I ran into a problem, though, adding a gradient to the plot. For example:
I have this df:
> wfDF
category value sign id end start labels
1 Basic Materials 0.0024 pos 1 0.0024 0.0000 0.0024
2 Communications 0.0492 pos 2 0.0516 0.0024 0.0516
3 Consumer, Cyclical 0.0268 pos 3 0.0784 0.0516 0.0784
4 Consumer, Non-cyclical 0.0245 pos 4 0.1029 0.0784 0.1029
5 Diversified -0.0037 neg 5 0.0992 0.1029 0.1029
6 Energy -0.0040 neg 6 0.0952 0.0992 0.0992
7 Financial 0.0445 pos 7 0.1397 0.0952 0.1397
8 Industrial 0.0006 pos 8 0.1403 0.1397 0.1403
9 Technology -0.0059 neg 9 0.1344 0.1403 0.1403
10 Total 0.1345 pos 10 0.0000 0.1344 0.1344
With this code:
ggplot(wfDF, aes(category, fill = sign, color = sign)) + guides(fill = FALSE, color=FALSE) +
ggtitle("Risk by Industry") +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
theme(plot.title = element_text(vjust=1.5, face="bold", size = 20),
axis.title.x = element_blank(), axis.title.y = element_blank()) +
geom_rect(aes(x = category, xmin = id - 0.475, xmax = id + 0.475, ymin = end, ymax = start)) +
scale_fill_manual(values=c("red", "forestgreen")) +
scale_color_manual(values=c("black", "black")) +
scale_y_continuous(labels = percent) +
scale_x_discrete("", breaks = levels(wfDF$category), labels = gsub(" ", "\n", levels(wfDF$category))) +
geom_text(data = wfDF, aes(id, labels, label = paste0(value*100, "%")), vjust = -.5, size = 5, fontface = 4)
Which produces this graph:
Which looks great. I am trying to write a function which will do all this with any set of categories and values, and allows for any colors or customization to be added or used. I have this function:
waterfall <- function(categories, values, has.total = FALSE, offset = .475, labelType = c("decimal", "percent")) {
library(scales)
library(grid)
library(ggplot2)
library(dplyr)
theData <- data.frame("category" = as.character(categories), "value" = as.numeric(values))
if (labelType == "percent") theData$value = theData$value/100
if (!has.total) theData <- theData %>% rbind(.,list("Total", sum(.$val)))
theData$sign <- ifelse(theData$val >= 0, "pos","neg")
theData <- data.frame(category = factor(theData$category, levels = unique(theData$category)),
value = round(theData$value,4),
sign = factor(theData$sign, levels = unique(theData$sign)))
theData$id <- seq_along(theData$value)
theData$end <- cumsum(theData$value)
theData$end <- c(head(theData$end, -1), 0)
theData$start <- c(0, head(theData$end, -1))
theData$labels <- paste0(theData$value*100, "%")
theData$labellocs <- pmax(theData$end,theData$start)
theGG <- ggplot(theData, aes(category, fill = sign, color = sign)) +
geom_rect(aes(x = category, xmin = id - offset, xmax = id + offset, ymin = end, ymax = start)) +
scale_x_discrete("", breaks = levels(theData$category), labels = gsub(" ", "\n", levels(theData$category))) +
geom_text(data = theData, aes(id, labellocs, label = labels), vjust = -.5, size = 5, fontface = 4)
return(theGG)
}
waterfall(categories = riskDecomp$ID, values = riskDecomp$val, labelType = "percent")
Which produces a pretty ugly basic thing:
However, if I try to run something like the following:
test <- waterfall(categories = riskDecomp$ID, values = riskDecomp$val, labelType = "percent")
g <- rasterGrob(blues9, width=unit(1,"npc"), height = unit(1,"npc"), interpolate = TRUE)
test + guides(fill = FALSE, color=FALSE) +
ggtitle("Risk Decomposition") +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
theme(plot.title = element_text(vjust=1.5, face="bold", size = 20),
axis.title.x = element_blank(), axis.title.y = element_blank()) +
scale_fill_manual(values=c("red", "forestgreen")) +
scale_color_manual(values=c("black", "black")) +
scale_y_continuous(labels = percent)
I get this nonsense:
The rasterGrob thing seems to overlay the entire rest of the plot. The only workaround I can find is to add the gradient to the inside of the function. Which kind of removes the... customization of the function. Is there a way to fix this? To fix the order of the grobs? If that makes sense?

you can change the order of the layers manually,
library(grid)
library(ggplot2)
g <- rasterGrob(matrix(blues9, ,1), interpolate=TRUE,
width=unit(1,"npc"), height=unit(1,"npc"))
p <- qplot(rnorm(10), rnorm(10)) +
annotation_custom(g)
nl <- length(p$layers)
p$layers <- c(p$layers[[nl]], p$layers[-nl])
p

Related

Adding text to multiple column graph (faceted) in ggplot2

I am trying to add p-values to each boxplot pair in the graph shown below. I would like the p-values to be placed under each soil horizon label ('O', 'A' and 'B').
My data looks like this:
> head(kiwi_l)
# A tibble: 6 x 6
type horizon root_name length diameter n_child
<chr> <chr> <chr> <dbl> <dbl> <int>
1 Elevated CO2 A R1_A_L_S4G 0.0752 0.0342 0
2 Elevated CO2 A R1_A_L_S4F 0.0987 0.0319 0
3 Elevated CO2 A R1_A_L_S4E 0.105 0.0209 0
4 Elevated CO2 A R1_A_L_S4D 0.0476 0.0127 0
5 Elevated CO2 A R1_A_L_S4C 0.110 0.0282 0
6 Elevated CO2 A R1_A_L_S4B 0.244 0.0168 0
While the code I used to generate the graph is:
l_horizon<-ggplot(kiwi, aes(x=type, y=length, fill=type, palette='jco'))
+
geom_boxplot() +
facet_grid(. ~ factor(horizon, level=level_order)) +
theme_pubr() +
scale_y_continuous(name='Primary root length (cm)') +
scale_x_discrete(name='Treatment') +
ggtitle('Soil horizon') + theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position="none") +
theme(plot.title = element_text(size = 10, face = "bold"),
text = element_text(size = 10),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 10),
axis.text.y=element_text(size=10),
axis.title.x = element_blank(),
axis.title.y=element_text(size=10))
l_horizon<-l_horizon+scale_fill_locuszoom()
l_horizon
Please help!
Since there is no data to play around with, I'll make up some:
set.seed(0)
df <- data.frame(f1 = rep(c("O","A","B"), each = 30),
f2 = rep(c("M","N"), 45),
y = rnorm(90))
Next we do a test on that data and format it's output:
tests <- split(df, df$f1) %>% sapply(function(x){
pval <- t.test(x[x$f2 == "M", "y"], x[x$f2 == "N", "y"])$p.value
paste0("p-value = ", format(pval, digits = 2, nsmall = 2))
})
Now if you want it to be part of the facet strip, you can adjust the levels of df$f1 to include the p-value:
levels(df$f1) <- paste0(levels(df$f1), "\n", tests)
ggplot(df, aes(x = f2, y = y)) +
geom_boxplot() +
facet_grid(~ f1)
If you wanted the p-values inside the panel instead of in the strip, you can use the annotate() function to place them in the panel. y = Inf ensures they are placed at the top.
ggplot(df, aes(x = f2, y = y)) +
geom_boxplot() +
facet_grid(~ f1) +
annotate("text", x = 1.5, y = Inf, label = tests, vjust = 1)
If you know where the on the y-axes to put the text, maybe annotate like this?
p_values <- c(1.1,2.2,3.3)
ggplot(data = d2,mapping = aes(x=range,y=p_area)) +
geom_boxplot() +
annotate("text", x=c(1,2,3), y=0.5, label= p_values)

R geom_col does not show the 'bars'

I am having this strange error regarding displaying the actual bars in a geom_col() plot.
Suppose I have a data set (called user_data) that contains a count of the total number of changes ('adjustments') done for a particular user (and a plethora of other columns). Let's say it looks like this:
User_ID total_adjustments additional column_1 additional column_2 ...
1 'Blah_17' 21 random_data random_data
2 'Blah_1' 47 random_data random_data
3 'foobar' 2 random_data random_data
4 'acbd1' 17 random_data random_data
5 'user27' 9 random_data random_data
I am using the following code to reduce it into a dataframe with only the two columns I care about:
total_adj_count = user_data %>%
select(User_ID, total_adjustments) %>%
arrange(desc(total_adjustments)) %>%
mutate(User_ID = factor(User_ID, User_ID))
This results in my dataframe (total_adj_count) looking like so:
User_ID total_adjustments
1 'Blah_1' 47
2 'Blah_17' 21
3 'acbd1' 17
4 'user27' 9
5 'foobar' 2
Moving along, here is the code I used to attempt to create a geom_col() plot of that data:
g = ggplot(data=total_adj_count, aes(x = User_ID, y = total_adjustments)) +
geom_bar(width=.5, alpha=1, show.legend = FALSE, fill="#000066", stat="identity") +
labs(x="", y="Adjustment Count", caption="(based on sample data)") +
theme_few(base_size = 10) + scale_color_few() +
theme(axis.text.x=element_text(angle = 45, hjust = 1)) +
geom_text(aes(label=round(total_adjustments, digits = 2)), size=3, nudge_y = 2000) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
p = ggplotly(g)
p = p %>%
layout(margin = m,
showlegend = FALSE,
title = "Number of Adjustments per User"
)
p
And for some strange reason when I try to view plot p it displays all parts of the plot as intended, but does not show the actual bars (or columns).
In fact I get this strange plot and am sort of stuck where to fix it:
Change nudge_y argument to a smaller number. Right now you have it set to 2000 which offsets the labels by 2000 on the y-axis. Below I've changed it to nudge_y = 2 and it looks like so:
g <-
ggplot(total_adj_count, aes(User_ID, total_adjustments)) +
geom_col(width = .5, alpha = 1, show.legend = FALSE, fill = "#000066") +
labs(x = "", y = "Adjustment Count", caption = "(based on sample data)") +
theme_few(base_size = 10) +
scale_color_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(total_adjustments, digits = 2)), size = 3, nudge_y = 2) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
Full copy/paste:
library(ggplot2)
library(ggthemes)
library(plotly)
library(dplyr)
text <- " User_ID total_adjustments
1 'Blah_1' 47
2 'Blah_17' 21
3 'acbd1' 17
4 'user27' 9
5 'foobar' 2"
total_adj_count <- read.table(text = text, header = TRUE, stringsAsFactors = FALSE)
g <-
ggplot(total_adj_count, aes(User_ID, total_adjustments)) +
geom_col(width = .5, alpha = 1, show.legend = FALSE, fill = "#000066") +
labs(x = NULL, y = "Adjustment Count", caption = "(based on sample data)", title = "Number of Adjustments per User") +
theme_few(base_size = 10) +
scale_color_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = round(total_adjustments, digits = 2)), size = 3, nudge_y = 2) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
p <- ggplotly(g)
p <- layout(p, showlegend = FALSE)
p

ggplot - multiple boxplots

I'm trying to create boxplots with this dataset faceting by factor mix (3 boxplots combined):
daf <- read.table("http://pastebin.com/raw.php?i=xxYjmdgD", header=T, sep="\t")
This is what the sample looks like:
ia mix Rs
1 Fluazinam 1 0.62
2 Fluazinam 1 0.76
3 Fluazinam 1 0.76
4 Fluazinam 1 0.52
5 Fluazinam 1 0.56
6 Fluazinam 1 0.20
7 Fluazinam 1 0.98
235 Carbendazim+Cresoxim-Metílico+Tebuconazole 3 0.65
236 Carbendazim+Cresoxim-Metílico+Tebuconazole 3 0.28
237 Carbendazim+Cresoxim-Metílico+Tebuconazole 3 0.41
These are my failed attempts!
library(ggplot2)
qplot( Rs, ia, data=daf) +
facet_grid(mix ~ ., scales = "free", space = "free", labeller = label_both)
» When I add the qplot( Rs, ia, data=daf, geom="boxplot")
It simply appear a line, not the box.
ggplot(data=daf, aes(x=ia, y=Rs))+
geom_boxplot(outlier.colour = "black", outlier.size = 2) +
coord_flip() + theme_bw() +
scale_y_continuous(breaks=seq(0,1,by=0.25))+
stat_summary(fun.y = mean, geom="point", shape = 4, size = 3, colour = "blue") +
facet_grid(mix ~. , scales = "free", space="free", labeller = label_both)
» It repeats every "ia" level into each "mix" level
ggplot(data=daf, aes(x=ia, y=Rs))+
geom_boxplot(outlier.colour = "black", outlier.size = 2) +
layer(data = a, mapping = aes(x = ia, y= 0, label=a$Rs.median),
geom = "text", color="NavyBlue", size=3.5) +
coord_flip() + theme_bw() +
scale_y_continuous(breaks=seq(0,1,by=0.25))+
stat_summary(fun.y = mean, geom="point", shape = 4, size = 3, colour = "blue")
Finally I'd like a combination of the three plots:
from the first plot, the facet.grid(without repeating "ia" variables), from the second one, the boxes, and from the third one the median values in the left inside margin, and if it could be possible, into each level of factor "mix", reordering the "ia" by median values...
Could someone help me with this??
Thanks in advance!
geom_boxplot assumes the categorical variables are on the x-axis. coord_flip doesn't work in combination with facet_grid + geom_boxplot. One workaround is to rotate the text. You can export and rotate the image in another program (or figure out how to pull out the grid object and rotate it).
a = ddply(daf, .(ia,mix), function(x) c(Rs=median(x$Rs, na.rm=TRUE)))
ggplot( data=daf, aes(x=ia, y=Rs) ) +
geom_boxplot() +
facet_wrap(~mix, scales="free_x") +
stat_summary(fun.y = mean, geom="point", shape = 4, size = 3, colour = "blue") +
theme(axis.text.x=element_text(angle = 90, hjust = 1, vjust=0.5)) +
theme(axis.title.x=element_text(angle = 90, vjust=0.5)) +
theme(axis.text.y=element_text(angle = 90, hjust=0.5)) +
theme(strip.text=element_text(angle = 90, hjust=0.5)) +
geom_text(data = a, mapping = aes(x = ia, y= 0.02, label=round(Rs,2)),
color="NavyBlue", size=3.5, angle=90, hjust=1) +
ylim(-0.03,1)
I found https://github.com/lionel-/ggstance and thought I'd make an alternative answer.
library(devtools)
devtools::install_github("lionel-/ggstance")
library(ggplot2)
library(ggstance)
daf <- read.table("http://pastebin.com/raw.php?i=xxYjmdgD", header=T, sep="\t")
library(dplyr)
a = daf %>%
group_by(ia, mix) %>%
summarize(Rs=mean(Rs))
ggplot(daf, aes(x=Rs, y=ia)) +
geom_boxploth() +
geom_point(data=a, shape = 4, size = 3, colour = "blue") +
geom_text(data = a, mapping = aes(y = ia, x=0, label=round(Rs,2)),
color="NavyBlue", size=3.5, hjust=0) +
facet_grid(mix~., scales="free_y")

How to separate geom_vline() and geom_hline() legends from other legends in ggplot2

I'm trying to create a plot in R using ggplot2 that shows horizontal lines for groundwater protection standards as well as a vertical line that shows the start of construction project. I have legends created already for units of measure and whether the sample was below the detection limit. All of these legends show up as I want them, but the legends from the horizontal and vertical line are overlain on the other legends. I've tried multiple ways using show_guide, using different data frames for the data, and override.aes = list() but nothing seems to get the desired results.
Here is what the data look like:
head(dmr_data)
location_id sample_date analysis_result lt_measure default_unit param_name
154 MWH1 2004-06-02 0.0150 mg/L Arsenic, dissolved
155 MWH1 2004-06-02 0.0850 mg/L Barium, dissolved
156 MWH1 2004-06-02 0.0002 < mg/L Beryllium, dissolved
158 MWH1 2004-06-02 0.0005 < mg/L Cadmium, dissolved
162 MWH1 2004-06-02 0.0020 < mg/L Lead, dissolved
164 MWH1 2004-06-02 0.0010 < mg/L Thallium, dissolved
DMR_limit GWPS non_detect
154 0.01000 0.010 0
155 0.17340 2.000 0
156 0.00005 0.004 1
158 0.00100 0.005 1
162 0.00500 0.015 1
164 0.00060 0.002 1
And here is the code for the plot:
combo_plot <- function(df){
limits = df
shaded_dates <- data.frame(xmin = c(as.POSIXct("2004-06-01", format = "%Y-%m-%d"),
as.POSIXct("2013-10-01", format = "%Y-%m-%d")),
xmax = c(as.POSIXct("2013-10-01", format="%Y-%m-%d"),
max(df$sample_date)),
ymin = c(-Inf, -Inf),
ymax = c(Inf, Inf),
years = c("background", "compliance"))
ggplot(data = df, aes(x = sample_date, y = analysis_result)) +
geom_point(data = df, aes(colour = default_unit, shape = factor(non_detect)), size = 4) +
geom_line(data = df, aes(colour = default_unit), size = 1) +
facet_wrap(~ param_name, scale="free") +
# Plot legends, labels, and titles
ggtitle(paste("Time Series Plots for Monitoring Well",
df$location_id[1], "\n", sep=" ")) +
ylab("Analysis Result") +
xlab("Sample Date") + scale_x_datetime(labels = date_format("%Y")) +
theme(axis.text.x = element_text(angle = 90)) +
theme(plot.margin = unit(c(0.75, 0.75, 0.75, 0.75), "in")) +
theme_bw() +
scale_colour_discrete(name = "Units", guide = "legend") +
# add rectangles for date ranges
geom_rect(data = shaded_dates, aes(xmin = xmin, ymin = ymin, xmax = xmax,
ymax = ymax, fill = years),
alpha = 0.2, inherit.aes = FALSE) +
scale_fill_manual(values=c("blue","green")) +
# add horizontal lines for EPA MCL and Upper Prediction Limit
geom_hline(data = limits, aes(yintercept = GWPS, linetype = "GWPS"), show_guide = TRUE, size = 0.75) +
geom_hline(data = limits, aes(yintercept = DMR_limit, linetype = "DMR Limit"), show_guide = TRUE, size = 0.75) +
# create custom legend using guide
theme(axis.title.x = element_text(size = 15, vjust=-.2)) +
theme(axis.title.y = element_text(size = 15, vjust=0.3)) +
guides(colour = guide_legend("Units"), fill = guide_legend("Dates"),
linetype = guide_legend("Limits")) +
scale_shape_manual(name = "Measure", labels = c("Non-Detect", "Detected"),
values = c("1" = 21, "0" = 4)) +
# add vertical line to show start of "East Pushout" construction
geom_vline(xintercept = as.numeric(as.POSIXct("2008-08-01", format="%Y-%m-%d")),
linetype="dotted", show_guide = T)
}
I then use plyr to create faceted plots for all the wells
d_ply(dmr_data, .(location_id), .progress = "text", failwith(NA, combo_plot), .print = TRUE)
Here is what the ouput looks like.
Any help would be appreciated!
You can get the desired effect by using override.aes = list(linetype = 0) in guides(), and by adding a new scale for linetype (so as to exclude the vertical construction line from showing up in the legend).
Replace your hline() section with:
## add horizontal lines for EPA MCL and Upper Prediction Limit
geom_hline(data = limits, aes(yintercept = GWPS, linetype = "GWPS"), colour = "black", size = 0.75, show_guide = T) +
geom_hline(data = limits, aes(yintercept = DMR_limit, linetype = "DMR Limit"), size = 0.75, show_guide = T) +
scale_linetype_manual(name = "Limits", labels = c("GWPS", "DMR Limit"), values = c("GWPS" = 1, "DMR Limit" = 2)) +
Replace your guides() line with:
guides(colour = guide_legend(override.aes = list(linetype = 0 )),
fill = guide_legend(override.aes = list(linetype = 0 )),
shape = guide_legend(override.aes = list(linetype = 0 )),
linetype = guide_legend()) +
If you do want the dotted vertical line to show up in the legend, add the appropriate arguments to geom_vline(aes()) and to scale_linetype_manual().

Error in Data Frame when creating a plot with ggplot2 and geom_point() and stat_function

I've got a problem when creating a plot with ggplot2
> setwd("c:/tesis/emisiones")
e<-read.csv("fh3pco1.csv",header=T)
attach(e)
names(e)
1 "VehCat" "Component" "TrafficSit" "Subsegment" "SizeClasse" "V" "EFA"
[8] "norma" "cat" "cat1"
library(ggplot2)
length(e)
1 10
vpge0 <- function(x) {(281* x^-0.63)}
No Errors in Plot
ggplot(e, aes(x=V,y=EFA, colour=norma)) + geom_point(size=4)
Errors in Plot
ggplot(e, aes(x=V,y=EFA, colour=norma)) +
geom_point(size=4)+
stat_function(data = data.frame(x = 1:100, FE = factor(1)),fun = vpge0, size=1)
Using some fake data since none was provided, you can do this without stat_function.
library(ggplot2)
e <- data.frame(V=1:10, EFA=1:10, norma=c('a', 'b'))
g <- ggplot(e, aes(x=V, y=EFA, colour=norma)) + geom_point(size=4)
Then just add another geom_point that contains the output of your function:
g + geom_point(aes(x=1:10, y=vpge0(1:10), colour='vpge0'))
I find this cleaner and easier syntax than monkeying with stat_function.
NOx
library(scales)
ggplot(f1, aes(x=V,y=EFAnoxE3, colour=NormaGEuro3))+
geom_point(size=4)+
stat_function(fun = m5noxe3, size=1, colour="green")+
stat_function(fun = c4noxe3, size=1, colour="green")+
stat_function(fun = unoxe3, size=1, colour="red")+
scale_colour_manual(values = c("green", "blue", "green","red"),
name = "",
labels = c("MODEM 5 Euro 3","HBEFA 3 Euro 3", "COPERT 4 Euro 3","UNTEC Euro 3"))+
theme(plot.title = element_text(lineheight=.8, face="bold"),
axis.text.x=element_text(size=12, face="bold"),
axis.text.y=element_text(size=12, face="bold"),
legend.position=c(1,0.82),legend.justification=c(1,1),
legend.background = element_rect(fill=alpha("grey", 0.2)))+
ggtitle("Factores de Emisión NOx Vehiculos \n Particulares Gasolineros (g/km)")+
ylab("(g/km)") +
xlab("Velocidad Promedio (km/h)")+
annotate("rect", xmin = 25, xmax = 35, ymin = 0, ymax = 0.6, alpha = .2)
See more at: My Blog

Resources