Write function for plot in R (ggplot2) [duplicate] - r

This question already has an answer here:
passing unquoted variables to curly curly {{}} ggplot function
(1 answer)
Closed 2 years ago.
I wrote a code to create a plot
#Create a plot mean Calories in each Category
p<-mean_table %>%
ggplot(aes(mean.Calories,reorder(Category, mean.Calories))) +
geom_col(aes(fill = mean.Calories)) +
scale_fill_gradient2(low = "forestgreen",
high = 'firebrick1',
mid = "gold", midpoint = 283.8947)+
scale_x_continuous(breaks = seq(0, 600, length.out = 7),
limits = c(0, 600),
labels = seq(0, 600, length.out = 7))+
xlab("Calories")+
ylab("Category")+
ggtitle("Mean of Calories in each Category")+
theme_minimal()
#Theme for plots
theme<-theme(axis.title.x=element_text(size=16),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
panel.background = element_rect(color = "black"),
plot.title = element_text(size = 20,hjust = 0.5),
legend.position = "none")
#Calories+theme
p+theme
It works well. We can see this plot
But I'd like to write a function because I need several similar graphs.
It's my variant:
p3<-function(data, x_data, breaks, xlab, title){
my_plot<-data %>%
ggplot(aes(x_data,reorder(Category, x_data))) +
geom_col(aes(fill = x_data)) +
scale_fill_gradient2(low = "forestgreen",
high = 'firebrick1',
mid = "gold", midpoint = median(data$x_data))+
scale_x_continuous(breaks = round(seq(0, breaks, length.out = 7)),
limits = c(0, breaks),
labels = round(seq(0, breaks, length.out = 7)))+
xlab(xlab)+
ylab("Category")+
ggtitle(title)+
theme_minimal()
return(my_plot)
}
p3(mean_table, mean_table$mean.Cholesterol, 155, "Gram", "Mean of Fat in each Cholesterol")
I use data.frame like this(it's small version):
mean_table<-data.frame(Category=c("Beef & Pork","Beverages","Breakfast"), mean.Cholesterol=c(87.3333333, 0.5555556, 152.8571429))
Error:Elements must equal the number of rows or 1
Run rlang::last_error() to see where the error occurred.
1: Unknown or uninitialised column: x_data.
What is wrong with my function?

You need to use non-standard evaluation (NSE) while passing the column names to the function.
There are two ways in which you can pass the column names, unquoted and quoted column names. In this answer, we pass unquoted column names. We use curly-curly ({{}}) operator.
library(ggplot2)
library(rlang)
p3<-function(data, x_data, breaks, xlab, title){
my_plot<-data %>%
ggplot(aes({{x_data}},reorder(Category, {{x_data}}))) +
geom_col(aes(fill = {{x_data}})) +
scale_fill_gradient2(low = "forestgreen",
high = 'firebrick1',
mid = "gold",
midpoint = median(data %>% pull({{x_data}}))) +
scale_x_continuous(breaks = round(seq(0, breaks, length.out = 7)),
limits = c(0, breaks),
labels = round(seq(0, breaks, length.out = 7)))+
xlab(xlab)+
ylab("Category")+
ggtitle(title) +
theme_minimal()
return(my_plot)
}
and call p3 as :
p3(mean_table, mean.Cholesterol, 155, "Gram", "Mean of Fat in each Cholesterol")
If we want to pass column names as quoted variables as "mean.Cholesterol" we can use sym and !! to evaluate it.

Related

Bioconductor Heatmap Error: 'x' must be a numeric matrix

Here is the script I am using
#Make data long for ggplot2 using tidyr (PL is first column name)
Lipidslong <- pivot_longer(data = Lipids,
cols = -c(PL),
names_to = "Comparison",
values_to = "FC")
#Turn your 'Comparison' column into a character vector
Lipidslong$Comparison <- as.matrix(Lipidslong$Comparison)
#Then turn it back into a factor with the levels in the correct order
Lipidslong$Comparison <- factor(Lipidslong$Comparison, levels=unique(Lipidslong$Comparison))
Lipidsheatmap <- ggplot(data = Lipidslong, mapping = aes(x = Comparison,
y = PL,
fill = FC)) +
geom_tile(colour="black") +
labs(x="", y="") +
coord_fixed(0.25, expand = TRUE) +
scale_y_discrete(limits = rev(levels(as.factor(Lipidslong$PL)))) +
scale_x_discrete(labels = str_wrap(Lipidslong$Comparison, width = 0.25)) +
scale_fill_gradient2(limits=c(-3,3), #Set scale limit
labels=c("-3", "0", "3"), #Set scale numbers
breaks=c(-3, 0, 3), #Need to have break and scale numbers match
oob=squish, #makes values outside of limits "squish" into the scale
low = ("blue"),
mid = "white",
high = ("red"),
midpoint = 0,
space = "Lab",
na.value = "grey50",
name = "Log2\nFC") + #Name the scale bar
theme(axis.ticks.y = element_blank(),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
panel.background = element_blank())
heatmap(df, name = "Lipidsheatmap", split = Lipidsheatmap$PL,
row_names_gp = gpar(fontsize = 7))
I am trying to cluster my heatmap by row using Bioconductor's Code. But is keep getting
Error in heatmap(df, name = "Lipidsheatmap", split = Lipidsheatmap$PL, : 'x' must be a numeric matrix
I have tried as.numeric() and lapply and unlist, but it doesn't seem to work.

add text to plot or legend for second axis only R

I very nearly have the plot that I want so I think this will be an easy fix for someone more R savvy than myself
Sample_ID <- c("P1014B", "P1014F", "P1036A", "P1036B", "P1036C", "P1036D", "P1036E", "P1036F")
`CONTAMINATION_SCORE (NA)` <- c(2677, 1021, 870, 6831, 1324, 4175, 1370, 875)
`CONTAMINATION_P_VALUE (NA)` <- c(0.101, 1.000, 1.000, 0.000, 1.000, 0.036, 1.000, 1.000)
contam_reads <- data.frame(Sample_ID, `CONTAMINATION_SCORE (NA)`, `CONTAMINATION_P_VALUE (NA)`, check.names = FALSE)
I have a plot that is very near to what I want:
cols = c("P-value upper Limit" = "black","Contam. score upper limit" = "red")
ggplot(contam_reads[-c(1,2),], aes(x=Sample_ID, y=`CONTAMINATION_SCORE (NA)`)) +
geom_bar(stat="identity", fill="cyan4") +
geom_hline(aes(yintercept=contam_reads$`CONTAMINATION_SCORE (NA)`[1]), col = cols[2], size = 1.5, linetype=2) +
geom_hline(aes(yintercept=contam_reads$`CONTAMINATION_P_VALUE (NA)`[1]*10000), col = cols[1], size = 1, linetype=2) +
geom_point(aes(x=Sample_ID, y=`CONTAMINATION_P_VALUE (NA)`* 10000),stat="identity",color="red", size = 1.5, show.legend = T) +
labs(title= "DNA Library QC Metrics for Contamination",
x="Sample ID",y="Contamination Score") +
scale_y_continuous(sec.axis=sec_axis(~./10000,name="P-value", breaks = seq(0,1,0.1))) +
theme(
axis.title.y = element_text(color = "cyan4",size=15),
axis.text.y = element_text(color = "cyan4"),
axis.title.y.right = element_text(color = "red"),
axis.text.y.right = element_text(color = "red")
) +
scale_colour_manual(values=cols)
I can't get the legend to be a line and not dots, and I can't get the first dot in the legend to turn black.
You could fix your legend by mapping on aesthetics, i.e. instead of setting colors for the hlines as arguments map on the color aes and set show.legend=FALSE for geom_point to show just the lines. To this end I also use the data argument to pass just the first line of the dataset to the hlines.
Note 1: I added a named vector with labels for your scores and values. Also as names I use abbreviations for both the labels and color vector which makes it much easier to refer to a specific value in the ggplot code and makes the code easier to read.
Note 2: In my plot the colors for the lines are swapped which is right if p-values should be "red" and scores "black". Perhaps you confused that which could easily happen if one uses e.g. cols[1] to refer to colors.
cols <- c("pval" = "red", "score" = "black")
labels <- c("pval" = "P-value upper Limit", "score" = "Contam. score upper limit")
library(ggplot2)
ggplot(contam_reads[-c(1, 2), ], aes(x = Sample_ID, y = `CONTAMINATION_SCORE (NA)`)) +
geom_bar(stat = "identity", fill = "cyan4") +
geom_hline(
data = contam_reads[1, ],
aes(yintercept = `CONTAMINATION_SCORE (NA)`, color = "score"),
size = 1.5, linetype = 2
) +
geom_hline(
data = contam_reads[1, ],
aes(yintercept = `CONTAMINATION_P_VALUE (NA)` * 10000, color = "pval"),
size = 1, linetype = 2
) +
geom_point(aes(x = Sample_ID, y = `CONTAMINATION_P_VALUE (NA)` * 10000, color = "pval"),
stat = "identity", size = 1.5, show.legend = FALSE
) +
labs(
title = "DNA Library QC Metrics for Contamination",
x = "Sample ID", y = "Contamination Score",
color = NULL
) +
scale_y_continuous(sec.axis = sec_axis(~ . / 10000, name = "P-value", breaks = seq(0, 1, 0.1))) +
theme(
axis.title.y = element_text(color = "cyan4", size = 15),
axis.text.y = element_text(color = "cyan4"),
axis.title.y.right = element_text(color = "red"),
axis.text.y.right = element_text(color = "red")
) +
scale_colour_manual(values = cols, labels = labels)

Error in `combine_vars()`: ! At least one layer must contain all faceting variables: in function with facet_wrap() & facet_grid()

I am trying to create a function on Rthat creates and saves a dotplot with facets on my wd. Code for the function below:
get_dotplot <- function(df, xvalue, avgvalue, sdvalue, svalue, gvalue, main, xaxis, yaxis, glegend, figure_title)
{
dp <- ggplot(df, aes(x = xvalue, y = avgvalue, color = gvalue)) +
geom_point(stat = 'identity', aes(shape=svalue, color=gvalue))+
geom_errorbar(aes(ymin=avgvalue-sdvalue, ymax=avgvalue+sdvalue))+
facet_grid(cols = vars(svalue), scales = "fixed")+
labs(x = xaxis, y = yaxis, title = main, color=glegend)+
theme(axis.title.x.bottom = element_text(hjust = 0.5, vjust = 1),
axis.title.y = element_text(hjust = 0.5, vjust = 1),
axis.ticks.x = element_line(),
axis.text.x = element_text(angle = 0, hjust = 1, vjust = 0.5, size = 7),
axis.ticks.x.bottom = element_line(colour = "grey", size = (0.5)),
axis.ticks.y.left = element_line(colour = "black", size = (0.4)),
panel.background=element_rect(colour = "black", size = 0.5, fill=NA),
panel.grid = element_blank())
print(dp)
ggsave(paste(figure_title, "png", sep = "."), plot = dp, scale = 1, dpi = 600)
}
get_dotplot(df, xvalue, avgvalue, sdvalue, svalue, gvalue, main, xaxis, yaxis, glegend, figure_title)
However, I always get this error message:
Error in `combine_vars()`:
! At least one layer must contain all faceting variables: `svalue`.
* Plot is missing `svalue`
* Layer 1 is missing `svalue`
* Layer 2 is missing `svalue`
Backtrace:
1. global get_dotplot_errorbar_yaxis(...)
3. ggplot2:::print.ggplot(dp)
5. ggplot2:::ggplot_build.ggplot(x)
6. layout$setup(data, plot$data, plot$plot_env)
7. ggplot2 f(..., self = self)
8. self$facet$compute_layout(data, self$facet_params)
9. ggplot2 f(...)
10. ggplot2::combine_vars(data, params$plot_env, cols, drop = params$drop)
I suspect it's because of the facetting so I played around between facet_wrap() and facet_grid() with no result. Could someone please help me with that ?
I checked and I have the svalue variable in my dataframe, and it is spelled correctly. I also consulted previous questions about the topic but they were not helpful.
the dataset looks something like this, but with a larger number of individuals and numbers of days:
set.seed(108)
n <- 1:12
treatment <- factor(paste("trt", 1:2))
individuals <- sample(LETTERS, 2)
days <- c("12", "20", "25")
avg_var1 <- sample(1:100, 12)
sd_var1 <- sample(1:50, 12)
avg_var2 <- sample(1:100, 12)
sd_var2 <- sample(1:50, 12)
avg_var3 <- sample(1:100, 12)
sd_var3 <- sample(1:50, 12)
test <- data.frame(n, treatment, individuals, days,avg_var1, sd_var1, avg_var2, sd_var2, avg_var3, sd_var3)
I define the variables for the function as follows on R:
df=test
xvalue=test$days
avgvalue=test$avg_var1
sdvalue = test$sd_var1
svalue=test$treatment
gvalue=test$individuals
main= "var1 in function of days"
xaxis="days"
yaxis="var1"
glegend="individuals"
figure_title ="var1_days"
As written, your code passes columns into the function repeating the data in the dataframe. This doesn't seem to "play nicely" with the non-standard evaluation used in ggplot. Essentially ggplot is looking for a column in df called "svalue" to use for faceting (it doesn't find it). Once this has been fixed, the same sort of problem occurs with the error bars.
One way round this is to just pass in the column names, and use aes_string for the variables. This doesn't work for the faceting or the calculated values, so those are calculated at the start of the function. This would give:
get_dotplot <- function(df, xvalue, avgvalue, sdvalue, svalue, gvalue, main, xaxis, yaxis, glegend, figure_title)
{
df$ymin <- df[[avgvalue]] - df[[sdvalue]]
df$ymax <- df[[avgvalue]] + df[[sdvalue]]
df$facets <- df[[svalue]]
dp <- ggplot(df, aes_string(x = xvalue, y = avgvalue, color = gvalue)) +
geom_point(stat = 'identity', aes_string(shape=svalue, color=gvalue)) +
geom_errorbar(aes(ymin=ymin, ymax=ymax))+
facet_grid(cols = vars(facets), scales = "fixed")+
labs(x = xaxis, y = yaxis, title = main, color=glegend)+
theme(axis.title.x.bottom = element_text(hjust = 0.5, vjust = 1),
axis.title.y = element_text(hjust = 0.5, vjust = 1),
axis.ticks.x = element_line(),
axis.text.x = element_text(angle = 0, hjust = 1, vjust = 0.5, size = 7),
axis.ticks.x.bottom = element_line(colour = "grey", size = (0.5)),
axis.ticks.y.left = element_line(colour = "black", size = (0.4)),
panel.background=element_rect(colour = "black", size = 0.5, fill=NA),
panel.grid = element_blank())
print(dp)
ggsave(paste(figure_title, "png", sep = "."), plot = dp, scale = 1, dpi = 600)
}
get_dotplot(df=test,
xvalue="days",
avgvalue="avg_var1",
sdvalue = "sd_var1",
svalue="treatment",
gvalue="individuals",
main= "var1 in function of days",
xaxis="days",
yaxis="var1",
glegend="individuals",
figure_title ="var1_days")

How to plot two variables side by side in the same ggplot using geom_col?

I have the following data
structure(list(id = 1:7, date = c(2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2019L), station = structure(1:7, .Label = c("41B004",
"41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1"), class = "factor"),
days = c(6L, 21L, 5L, 9L, 13L, 14L, 3L), mean3y = c(8.33,
21.3, NA, 10, 11.3, 16.3, 3.67), environ = structure(c(3L,
4L, 2L, 1L, 3L, 4L, 3L), .Label = c("Industriel avec influence modérée du trafic",
"Urbain avec faible influence du trafic", "Urbain avec influence modérée du trafic",
"Urbain avec très faible influence du trafic"), class = "factor")), class = "data.frame", row.names = c(NA,
-7L))
which is plotted with the following ggplot code
ggplot(data, aes(x = reorder(station, -days),
y = days, fill = environ)) +
geom_col(width = 0.5, colour = "black", size = 0.5) +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = days),
vjust=-0.3, color="black", size = 3.5) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom", legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
generating this figure.
I would like to also add in this figure the variable mean3y besides days for each x value using another geom_col, such as
p <- ggplot(data, aes(x = reorder(station, -days),
y = days, fill = environ)) +
geom_col(width = 0.5, colour = "black", size = 0.5) +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = days),
vjust=-0.3, color="black", size = 3.5) +
geom_col(aes(x = reorder(station, -days),
y = mean3y, fill = environ),
inherit.aes = FALSE,
width = 0.5, colour = "black", size = 0.5) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom",
legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
However, I was not able to achieve the desired result, despite the use of position = "dodge", as illustrated by this figure where both variables are overlapping.
Is there a way to achieve this, please ?
Many thanks.
Position dodges only work in a single layer and not between multiple layers. You could either solve the problem by manually nudging them or by formatting the data in such a way that it can be dodged. Examples of both in code below.
Your data was hard to copy into my R session and your code was more elaborate than necessary to demonstrate the problem, so I've kept both to a minimum.
library(ggplot2)
df <- data.frame(
x = c("A", "B"),
y = c(10, 15),
z = c(12, 9)
)
# Example of nudging
# Choose width and nudge values manually to fit your data
ggplot(df, aes(x, y)) +
geom_col(aes(fill = "first col"),
width = 0.45,
position = position_nudge(x = -0.225)) +
geom_col(aes(y = z, fill = "second_col"),
width = 0.45,
position = position_nudge(x = 0.225))
library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.6.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Example of dodging + data formatting
ggplot(mapping = aes(x, y)) +
geom_col(data = rbind(mutate(df, a = "first_col"),
mutate(df, y = z, a = "second_col")),
aes(fill = a),
position = "dodge")
Created on 2020-04-16 by the reprex package (v0.3.0)
Consider this possible solution for your dataset - although you may want to play around with the aesthetics. I attempted to keep the aesthetics as similar as possible and set the bars to be the same color (based on df$environ), but make the difference between "days" and "mean3y" clear with text labels.
Data Preparation
First, we need to take the information from two columns and combine them: "days" and "mean3y". In your original data frame, these two columns can (and should) be combined to show type of value and the value itself. What we want to do is convert this type of data:
day.type.1 day.type.2
1 4 1
2 5 3
3 6 4
4 7 5
To this type of data:
day.type day.value
1 day.type.1 4
2 day.type.1 5
3 day.type.1 6
4 day.type.1 7
5 day.type.2 1
6 day.type.2 3
7 day.type.2 4
8 day.type.2 5
In the above example, you can use the gather() function from dplyr:
t %>% gather('day.type', 'day.value')
If we apply that to your data frame, we have to specify to do that to the data frame, but ignore the other columns:
df1 <- df %>% gather('variable', 'value', -date, -station, -environ)
This converts your "days" and "mean3y" columns into two new columns called "variable" (which is either "days" or "mean3y") and "value" (which is the actual number)
I also had to convert the new column "value" into numeric... but that could have been due to how I had to import your data, which was... difficult. Please note, it is recommended that you include your dataset in future questions via the output of dput(your.data.frame)... believe me it makes all the difference. ;)
Plotting the new Dataset
Here the idea is to keep your same x axis, but we are now setting "value" as the y aesthetic. In addition, you want to make sure to include a group= aesthetic of "variable" so that dodging works appropriately for text and columns. If you are not familiar, "dodging" is the term for when a geom is kind of "split" across an axis aesthetic: like "subsetting" of discrete axis values.
The geom_col call is set for position='dodge'... not much else changes there. You need this because the default position is set to "stacked" (which is why your attempt resulted in columns "stacked" on top of one another.
The geom_text call has a few things going on:
The dodge is set here with position=position_dodge(), which allows you to specify how far apart the "dodge" will be. It allowed me to "push apart" the labels to be a bit wider so that the text looks okay and doesn't run into the adjacent column. A larger width= argument in position_dodge() results in "pushing" the labels further apart. A value of 0 would be putting the labels in the center of the x axis aesthetic... 0.5 is default.
The label aesthetic is actually using both "variable" and "value" columns as a way to differentiate your columns from one another. I used paste0 and stuck a '\n' in-between so that you had two lines and could fit them. Had to adjust the size a bit too.
By default, the labels would be positioned right at y (value), which would mean they would overlap with your columns. You need to "nudge" them up, but cannot use nudge_y to push them up because you cannot combine nudge_y with position. What to do? Well, we can just overwrite the default y aesthetic by setting it equal to y + "a number" to nudge them up. Much better to do it this way.
Here's the final code:
ggplot(df1, aes(x = reorder(station, -value),
y = value, fill = environ,
group=variable)) +
geom_col(width = 0.5, colour = "black", size = 0.5, position='dodge') +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = paste0(variable,'\n', value), y=value+1.5),
color="black", size = 3,
position=position_dodge(0.7)) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom", legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
One way to achieve this is to convert the data to long format via e.g. tidyr::pivot_longer, so that the variables we want to plot are categories of one variable. To get the order of the stations right I reorder station according to days before converting to long. To get the bars side-by-side I use position_dodge2 both in geom_col and geom_text. To show which bar corresponds to which var I put the names of the vars in the labels above the bars.
library(ggplot2)
library(dplyr)
library(tidyr)
data1 <- data %>%
mutate(station = forcats::fct_reorder(station,-days)) %>%
pivot_longer(c(days, mean3y), names_to = "var", values_to = "value")
my_labels <- function(x) {
gsub("(days.|mean3y.)", "", x)
}
p <- ggplot(data1, aes(x = station, y = value, fill = environ)) +
geom_col(position = position_dodge2(preserve = "single"), colour = "black") +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = paste(var, "\n", value)), position = position_dodge2(width = .9, preserve = "single"), vjust=-0.3, color="black", size = 3.5) +
scale_x_discrete(labels = my_labels) +
geom_hline(aes(yintercept = 25), linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() + theme(legend.position="bottom", legend.title = element_blank(), legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) + geom_hline(yintercept = 0)

How to create a timeseries plot using facet_wrap of ggplot2

This is a follow up question on How to format the x-axis of the hard coded plotting function of SPEI package in R?. in my previous question, I had a single location dataset that needed to be plotted, however, in my current situation, I have dataset for multiple location (11 in total) that in needed to plot in a single figure. I tried to replicate same code with minor adjustment, however, the code do not produce the right plot. also I do not see dates break on the x-axis. Any help would be appreciated.
library(SPEI)
library(tidyverse)
library(zoo)
data("balance")
SPEI_12=spei(balance,12)
SpeiData=SPEI_12$fitted
myDate=as.data.frame(seq(as.Date("1901-01-01"), to=as.Date("2008-12-31"),by="months"))
names(myDate)= "Dates"
myDate$year=as.numeric(format(myDate$Dates, "%Y"))
myDate$month=as.numeric(format(myDate$Dates, "%m"))
myDate=myDate[,-1]
newDates = as.character(paste(month.abb[myDate$month], myDate$year, sep = "_" ))
DataWithDate = data.frame(newDates,SpeiData)
df_spei12 = melt(DataWithDate, id.vars = "newDates" )
SPEI12 = df_spei12 %>%
na.omit() %>%
mutate(sign = ifelse(value >= 0, "pos", "neg"))
SPEI12 = SPEI12%>%
spread(sign,value) %>%
replace(is.na(.), 0)
ggplot(SPEI12) +
geom_area(aes(x = newDates, y = pos), col = "blue") +
geom_area(aes(x = newDates, y = neg), col = "red") +
facet_wrap(~variable)+
scale_y_continuous(limits = c(-2.5, 2.5), breaks = -2.5:2.5) +
scale_x_discrete(breaks=c(1901,1925,1950,1975,2000,2008))+
ylab("SPEI") + ggtitle("12-Month SPEI") +
theme_bw() + theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"))+
theme(axis.text = element_text(size=12, colour = "black"), axis.title = element_text(size = 12,face = "bold"))
Here is what the code produces- instead of area plot it is producing bar plots.
With geom_area I was returning an error in the fill of the plot (a superposition), so I used geom_bar.
library(SPEI)
library(tidyverse)
library(zoo)
library(reshape2)
library(scales)
data("balance")
SPEI_12=spei(balance,12)
SpeiData=SPEI_12$fitted
myDate=as.data.frame(seq(as.Date("1901-01-01"), to=as.Date("2008-12-31"),by="months"))
names(myDate)= "Dates"
myDate$year=as.numeric(format(myDate$Dates, "%Y"))
myDate$month=as.numeric(format(myDate$Dates, "%m"))
myDate=myDate[,-1]
newDates = as.character(paste(month.abb[myDate$month], myDate$year, sep = "_" ))
DataWithDate = data.frame(newDates,SpeiData)
df_spei12 = melt(DataWithDate, id.vars = "newDates" )
SPEI12 = df_spei12 %>%
na.omit() %>%
mutate(sign = ifelse(value >= 0, "pos", "neg"))
###
SPEI12_md <- SPEI12 %>%
dplyr::mutate(Date = lubridate::parse_date_time(newDates, "m_y"),
Date = lubridate::ymd(Date),
variable = as.factor(variable))
levels(SPEI12_md$variable) <- c("Indore", "Kimberley", "Albuquerque", "Valencia",
"Viena", " Abashiri", "Tampa", "São Paulo",
"Lahore", "Punta Arenas", "Helsinki")
v <- 0.1 # 0.1 it is a gap
v1 <- min(SPEI12_md$value) - v
v2 <- max(SPEI12_md$value) + v
vv <- signif(max(abs(v1), abs(v2)), 2)
ggplot2::ggplot(SPEI12_md) +
geom_bar(aes(x = Date, y = value, col = sign, fill = sign),
show.legend = F, stat = "identity") +
scale_color_manual(values = c("pos" = "darkblue", "neg" = "red")) +
scale_fill_manual(values = c("pos" = "darkblue", "neg" = "red")) +
facet_wrap(~variable) +
scale_x_date(date_breaks = "10 years",
labels = scales::date_format("%Y-%m")) + #
scale_y_continuous(limits = c(-vv, vv), breaks = c(seq(-vv-v, 0, length.out = 3),
seq(0, vv+v, length.out = 3))) +
ylab("SPEI") + ggtitle("12-Month SPEI") +
theme_bw() + theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.text = element_text(size=12, colour = "black"),
axis.title = element_text(size = 12,face = "bold"),
axis.text.x = element_text(angle = 90, size = 10))
You use scale_x_discrete() but your variable on the x-axis, newDates, seems to be a character. It could explain why nothing is print on x-axis.
If you transform newDates as numeric (as you proposed in comments)
SPEI12$newDates= as.numeric(as.character(gsub(".*_","",SPEI12$newDates)))
and use scale_x_continuous() instead of discrete, you obtain this:

Resources