Fill area under time series based on factor value - r

I am trying to fill the area under a time series line based on a factor value of 0 and 1. The area should only be filled if the value is equal to 1.
I have managed to colour code the time series line based on the factor value with the following code:
install.packages("scales")
library("scales")
library("ggplot2")
ggplot(plot.timeseries) +
geom_line(aes(x = Date, y = Price, color = Index, group = 1)) +
scale_x_date(labels = date_format("%Y"), breaks = date_breaks("years")) +
scale_colour_manual(values = c("red3", "green3"))
This provides the following graph:
I have also tried this:
ggplot(plot.timeseries, aes(x=Date, y = Price, fill=Index)) +
geom_area(alpha=0.6) +
theme_classic() +
scale_fill_manual(values=c("#999999", "#32CD32"))
which comes out as a complete mess:
Ideally the final result should look like plot1 where the parts of the line in green are filled.
The time series data can be accessed here:
https://drive.google.com/file/d/1qWsuJk41_fJZktLCAZSgfGvoDLqTt-jk/view?usp=sharing
Any help would be greatly appreciated!

Okay, here is what I did to get the graph shown below if that is what you want.
# -------------------------------------------------------------------------
# load required packages #
library(scales)
library("ggplot2")
library(dplyr)
# -------------------------------------------------------------------------
# load the data to a df #
plot.timeseries <- get(load("TimeSeries_Data.RData"))
# -------------------------------------------------------------------------
# transform the data (my_fill_color will have green and NA values)
my_object <- plot.timeseries %>%
select(Price, Index, Date) %>%
mutate(Index_ord_factor = factor(Index, levels = unique(Index), ordered=TRUE),
my_fill_color = case_when(
Index_ord_factor > 0 ~ "green" # ordered factor enables the '>' operation
))
# -------------------------------------------------------------------------
# Plot your graph using the transformed data
ggplot(my_object, mapping = aes(x=Date, y=Price)) +
geom_line(aes(color = Index, group = 1))+
geom_col(fill =my_object$my_fill_color, width = 1)
# -------------------------------------------------------------------------
Let me know if you need elaboration to understand the script. Attached is the output in my end.

For those that are interested I also received this alternative solution from Erik Chacon.
You can view his tutorial here for a better understanding of the ggplot2 extension he designed, which is used in this solution.
# Installing and loading necessary packages
install.packages("remotes")
remotes::install_github("ErickChacon/mbsi")
library(mbsi)
library(ggplot2)
load("timeseries.RData")
#converting factor to numeric
plot.timeseries$Index <- as.numeric(levels(plot.timeseries$Index))[plot.timeseries$Index]
ggplot(plot.timeseries, aes(Date, Price)) +
geom_line() +
stat_events(aes(event = I(1 * (Index > 0)), fill = "Index"),
threshold = min(plot.timeseries$Price),
fill = "green", alpha = 0.3)

Related

ggplot: No legend when using scale_fill_brewer with geom_contour_filled

I've plotted a specific set of meteorological data using ggplot as described in the R code below. However, when I use scale_fill_brewer to specific the fill color, a legend does not appear.
What changes are necessary for the legend to appear?
library(tidyverse)
library(lubridate)
library(ggplot2)
library(RColorBrewer)
qurl <- "https://www.geo.fu-berlin.de/met/ag/strat/produkte/qbo/singapore.dat"
sing <- read_table(qurl, skip=4)
# the data file adds a 100mb data row starting in 1997 increasing the number of rows per year from
# 14 to 15. So, one calcuation must be applied to rnum <140 and a different to rnum >140.
sing2 <- sing %>% separate(1,into=c('hpa','JAN'),sep='\\s+') %>% drop_na() %>%
subset(hpa != 'hPa') %>%
mutate(rnum = row_number(),
hpa=as.integer(hpa)) %>%
mutate(year = case_when(rnum <=140 ~ 1987 + floor(rnum/14), # the last year with 14 rows of data
rnum >=141 ~ 1987 + floor(rnum+10/15))) %>% # the first year with 15 rows of data
relocate(year, .before='hpa') %>% arrange(year,hpa) %>%
pivot_longer(cols=3:14, names_to='month',values_to='qbo') %>%
mutate(date=ymd(paste0(year,'-',month,'-15')),
hpa=as.integer(hpa),
qbo=as.numeric(qbo))
sing2 <- sing %>% separate(1,into=c('hpa','JAN'),sep='\\s+') %>% drop_na() %>%
subset(hpa != 'hPa') %>%
mutate(year=1987+floor(row_number()/15),
hpa=as.integer(hpa)) %>%
relocate(year, .before='hpa') %>% arrange(year,hpa) %>%
pivot_longer(cols=2:13, names_to='month',values_to='qbo') %>%
mutate(date=ymd(paste0(year,'-',month,'-15')),
hpa=as.integer(hpa),
qbo=as.numeric(qbo))
# End Data Massaging. It's ready to be graphed
# A simple call to ggplot with geom_contour_filled generates a legend
sing2 %>%
ggplot(aes(x=date,y=hpa)) +
geom_contour_filled(aes(z=qbo*0.1)) +
scale_y_reverse()
# Adding scale_fill_brewer removes the legend.
# Adding show.legend = TRUE to the geom_countour_filled options has no effect.
limits = c(-1,1)*max(abs(sing2$qbo),na.rm=TRUE)
zCuts <- round(seq(limits[1], limits[2], length.out = 11), digits=0)
sing2 %>%
ggplot() +
geom_contour_filled(aes(x=date,y=hpa, z = qbo*0.1),breaks=zCuts*0.1) +
scale_y_reverse(expand=c(0,0)) +
scale_x_date(expand=c(0,0), date_breaks = '1 year', date_labels = '%Y') +
scale_fill_brewer(palette = 5,type='div',breaks=zCuts) +
theme_bw() +
theme(legend.position = 'right',
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
OP, I don't have a direct answer for you, given that your example is not able to be replicated (unable to access the data you gave). In place, I can give you a bit of advice on debugging, since it seems the issue is related to the breaks= argument of scale_fill_brewer(). As you mention, you get a legend when using geom_contour_filled(), but not when you add the scale_fill_brewer() part.
Let me use the example from the documentation for geom_contour_filled() to illustrate this behavior, which utilizes the built-in dataset, fathfuld.
I'll add in your own palette and type choice, leaving out the breaks argument for example:
v <- ggplot(faithfuld, aes(waiting, eruptions, z = density))
v + geom_contour_filled() +
scale_fill_brewer(palette = 5, type='div')
If you do the same thing, but add in a "nonsensical" breaks argument, you get the same plot, but without a legend (like you are seeing):
v + geom_contour_filled() +
scale_fill_brewer(palette = 5, type='div', breaks=1:4)
For me, this is good evidence that the issue in your code relates to the value for breaks= not being within the range expected. Is this just a typo? Note that breaks=zCuts in scale_fill_brewer(), yet breaks=zCuts*0.1 in geom_contour_filled(). This would put each value for your color scale to be 10 times outside the range of the breaks for the contours themselves. I'd be willing to bet that this change to that scale_fill_brewer() line will do the trick:
# earlier plot code
... +
scale_fill_brewer(palette = 5,type='div',breaks=zCuts*0.1) +
...
# remaining plot code

generating a manhattan plot with ggplot

I've been trying to generate a Manhattan plot using ggplot, which I finally got to work. However, I cannot get the points to be colored by chromosome, despite having tried several different examples I've seen online. I'm attaching my code and the resulting plot below. Can anyone see why the code is failing to color points by chromosome?
library(tidyverse)
library(vroom)
# threshold to drop really small -log10 p values so I don't have to plot millions of uninformative points. Just setting to 0 since I'm running for a small subset
min_p <- 0.0
# reading in data to brassica_df2, converting to data frame, removing characters from AvsDD p value column, converting to numeric, filtering by AvsDD (p value)
brassica_df2 <- vroom("manhattan_practice_data.txt", col_names = c("chromosome", "position", "num_SNPs", "prop_SNPs_coverage", "min_coverage", "AvsDD", "AvsWD", "DDvsWD"))
brassica_df2 <- as.data.frame(brassica_df2)
brassica_df2$AvsDD <- gsub("1:2=","",as.character(brassica_df2$AvsDD))
brassica_df2$AvsDD <- as.numeric(brassica_df2$AvsDD)
brassica_df2 <- filter(brassica_df2, AvsDD > min_p)
# setting significance threshhold
sig_cut <- -log10(1)
# settin ylim for graph
ylim <- (max(brassica_df2$AvsDD) + 2)
# setting up labels for x axis
axisdf <- as.data.frame(brassica_df2 %>% group_by(chromosome) %>% summarize(center=( max(position) + min(position) ) / 2 ))
# making manhattan plot of statistically significant SNP shifts
manhplot <- ggplot(data = filter(brassica_df2, AvsDD > sig_cut), aes(x=position, y=AvsDD), color=as.factor(chromosome)) +
geom_point(alpha = 0.8) +
scale_x_continuous(label = axisdf$chromosome, breaks= axisdf$center) +
scale_color_manual(values = rep(c("#276FBF", "#183059"), unique(length(axisdf$chromosome)))) +
geom_hline(yintercept = sig_cut, lty = 2) +
ylab("-log10 p value") +
ylim(c(0,ylim)) +
theme_classic() +
theme(legend.position = "n")
print(manhplot)
I think you just need to move your color=... argument inside the call to aes():
ggplot(
data = filter(brassica_df2, AvsDD > sig_cut),
aes(x=position, y=AvsDD),
color=as.factor(chromosome))
becomes...
ggplot(
data = filter(brassica_df2, AvsDD > sig_cut),
aes(x=position, y=AvsDD, color=as.factor(chromosome)))

Plot multiple distributions by year using ggplot Boxplot

I'm trying to evaluate the above data in a boxplot similar to this: https://www.r-graph-gallery.com/89-box-and-scatter-plot-with-ggplot2.html
I want the x axis to reflect my "Year" variable and each boxplot to evaluate the 8 methods as a distribution. Eventually I'd like to pinpoint the "Selected" variable in relation to that distribution but currently I just want this thing to render!
I figure out how to code my y variable and I get various errors no matter what I try. I think the PY needs to be as.factor but I've tried some code that way and I just get other errors.
anyway here is my code (Send Help):
# Libraries
library(tidyverse)
library(hrbrthemes)
library(viridis)
library(ggplot2)
library(readxl) # For reading in Excel files
library(lubridate) # For handling dates
library(dplyr) # for mutate and pipe functions
# Path to current and prior data folders
DataPath_Current <- "C:/R Projects/Box Plot Test"
Ult_sum <- read_excel(path = paste0(DataPath_Current, "/estimate.XLSX"),
sheet = "Sheet1",
range = "A2:J12",
guess_max = 100)
# just want to see what my table looks like
Ult_sum
# create a dataset - the below is code I commented out
# data <- data.frame(
# name=c(Ult_sum[,1]),
# value=c(Ult_sum[1:11,2:8])
#)
value <- Ult_sum[2,]
# Plot
Ult_sum %>%
ggplot( aes(x= Year, y= value, fill=Year)) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE, alpha=0.6) +
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("A boxplot with jitter") +
xlab("")
I do not see how your code matches the screenshot of your dataset. However, just a general hint: ggplot likes data in long format. I suggest you reshape your data using tidyr::reshape_long oder data.table::melt. This way you get 3 columns: year, method, value, of which the first two should be a factor. The resulting dataset can then be neatly used in aes() as aes(x=year, y=value, fill=method).
Edit: Added an example. Does this do what you want?
library(data.table)
library(magrittr)
library(ggplot2)
DT <- data.table(year = factor(rep(2010:2014, 10)),
method1 = rnorm(50),
method2 = rnorm(50),
method3 = rnorm(50))
DT_long <- DT %>% melt(id.vars = "year")
ggplot(DT_long, aes(x = year, y = value, fill = variable)) +
geom_boxplot()

How to get the plots side by side and that too sorted according to Fill in R Language [duplicate]

I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")

Don't drop zero count: dodged barplot

I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")

Resources