Keep missing data in ggplot2 stacked barplot - r

This is my sample data. ID 144 contains 6 positions while ID AB01 contains only 3. In a stacked plot I still want to show 6 positions in AB01 with missing positions shown in a specific color .
ID YEAR POS
144 2017 10
144 2017 12
144 2017 18
144 2017 15
144 2017 163
144 2017 200
AB01 2018 10
AB01 2018 15
AB01 2018 18
This is what I tried.
ggplot(data1, aes(x = ID, y=1, fill = as.factor(POS))) +
geom_bar(stat = "identity", position = "stack", exclude = NULL) +
facet_wrap(~ data1$Year, ncol=1, scale="free") +
labs(x="Year", y= "Number ", fill = "Position", Title= "Pos plot") +
theme(text = element_text(size = 15, color = "Black"))
data
data <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("144", "AB01"), class = "factor"), YEAR = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L), POS = c(10L, 12L, 18L, 15L, 163L, 200L, 10L, 15L, 18L)), class = "data.frame", row.names = c(NA, -9L))

Can you use geom_tile instead?
data <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("144", "AB01"), class = "factor"), YEAR = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L), POS = c(10L, 12L, 18L, 15L, 163L, 200L, 10L, 15L, 18L)), class = "data.frame", row.names = c(NA, -9L))
ggplot(data, aes(x = ID, y = as.factor(POS), fill = as.factor(POS))) +
geom_tile(color = "black") +
coord_cartesian(expand = F) + # get rid of space around tiles
theme_classic() # make background white

ggplot(data, aes(x = ID, y = as.factor(POS), fill = as.factor(POS))) +
geom_tile(color = "black") + facet_wrap(~ data1$Year, ncol=2, scale="free_x") +
coord_cartesian(expand = F) + theme(strip.background = element_blank(), strip.text.x = element_blank())

How about this:
data <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("144", "AB01"), class = "factor"), YEAR = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L), POS = c(10L, 12L, 18L, 15L, 163L, 200L, 10L, 15L, 18L)), class = "data.frame", row.names = c(NA, -9L))
library(ggplot2)
library(forcats)
library(tidyr)
library(dplyr)
data_1 <-
data %>%
mutate(temp = as.character(POS)) %>%
complete(ID, POS) %>%
mutate(temp = fct_explicit_na(fct_inseq(temp), na_level = "Missing"))
col_map <- c("10" = "powderblue",
"12" = "red",
"18" = "orange",
"15" = "yellow",
"163" = "green",
"200" = "blue",
"Missing" = "White")
ggplot(data_1, aes(x = ID, y = fct_rev(factor(POS)), fill = temp)) +
geom_tile(color = "black", width = 0.5, height = 0.8) +
scale_fill_manual(values = col_map)+
coord_cartesian(expand = F) +
labs(x = NULL,
y = NULL,
fill = NULL)+
theme_classic()+
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(size = 14),
axis.line = element_blank())
Created on 2020-07-08 by the reprex package (v0.3.0)

Related

R Markdown output of bar plot different than output in the console

I am trying to make an R Markdown document where a bar plot of urchin abundance and the associated analyses are printed.
I have written code to do this and running the code in the console results in the following figure which is what I want:
However, when I create a code chunk with this code and run it as an R Markdown with an html output, I get a figure that looks like this:
Note that the code for the first figure is in the R Markdown code chunk under the "# combined plot #" section
Also note that the beginning of this chunk is as follows: {r, results='hide', echo = FALSE, message=FALSE, warning=FALSE}
Urchins R Markdown Code
### Packages
library(tidyverse)
library(ggplot2)
library(lme4)
library(lmerTest)
library(emmeans)
library(MuMIn)
library(effects)
library(rcompanion)
library(png)
## Urchin Database
# database with starting urchins #
mean_urchin_totals2 <- structure(list(Year = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L
), Date = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L,
11L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L,
1L, 1L, 1L, 1L, 1L, 1L, 4L, 5L, 5L, 6L, 6L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("1/30/18", "11/28/17",
"12/3/17", "2/24/18", "3/10/18", "3/13/18", "3/23/18", "5/15/18",
"5/20/18", "5/25/17", "6/6/17", "9/10/17", "9/5/17"), class = "factor"),
Reef.. = c(211L, 212L, 213L, 214L, 215L, 216L, 111L, 112L,
113L, 114L, 115L, 116L, 111L, 112L, 113L, 114L, 115L, 116L,
211L, 212L, 213L, 214L, 215L, 216L, 111L, 112L, 113L, 114L,
115L, 116L, 211L, 212L, 213L, 214L, 215L, 216L, 211L, 212L,
213L, 214L, 215L, 216L, 116L, 113L, 114L, 111L, 112L, 115L,
211L, 212L, 213L, 214L, 215L, 216L, 111L, 112L, 113L, 114L,
115L, 116L), Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("HAN", "WAI"), class = "factor"),
Treatment = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L), .Label = c("CLO", "OPE"), class = "factor"),
TimeStep = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L), Site_long = c("Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Waikiki", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Waikiki", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Waikiki", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Waikiki", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki"
), Treatment_long = c("Open", "Closed", "Open", "Closed",
"Open", "Closed", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Open", "Closed",
"Open", "Closed", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Open", "Closed",
"Open", "Closed", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Open", "Closed"
), Shelter = c("High", "Low", "High", "Low", "High", "Low",
"High", "Low", "High", "Low", "High", "Low", "High", "Low",
"High", "Low", "High", "Low", "High", "Low", "High", "Low",
"High", "Low", "High", "Low", "High", "Low", "High", "Low",
"High", "Low", "High", "Low", "High", "Low", "High", "Low",
"High", "Low", "High", "Low", "Low", "High", "Low", "High",
"Low", "High", "High", "Low", "High", "Low", "High", "Low",
"High", "Low", "High", "Low", "High", "Low"), mean_urchin_abundance = c(25L,
2L, 3L, 6L, 44L, 5L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 47L, 15L, 47L, 7L, 67L, 3L, 0L, 0L, 1L, 0L, 0L,
0L, 30L, 6L, 10L, 6L, 48L, 2L, 12L, 1L, 2L, 1L, 10L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 4L, 0L, 1L, 16L, 0L, 0L, 0L,
0L, 0L, 0L, 0L)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -60L), groups = structure(list(
Year = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L),
Date = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
13L, 13L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 5L, 5L, 6L, 6L, 7L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("1/30/18",
"11/28/17", "12/3/17", "2/24/18", "3/10/18", "3/13/18", "3/23/18",
"5/15/18", "5/20/18", "5/25/17", "6/6/17", "9/10/17", "9/5/17"
), class = "factor"), Reef.. = c(211L, 212L, 213L, 214L,
215L, 216L, 111L, 112L, 113L, 114L, 115L, 116L, 111L, 112L,
113L, 114L, 115L, 116L, 211L, 212L, 213L, 214L, 215L, 216L,
111L, 112L, 113L, 114L, 115L, 116L, 211L, 212L, 213L, 214L,
215L, 216L, 211L, 212L, 213L, 214L, 215L, 216L, 116L, 113L,
114L, 111L, 112L, 115L, 211L, 212L, 213L, 214L, 215L, 216L,
111L, 112L, 113L, 114L, 115L, 116L), Site = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("HAN",
"WAI"), class = "factor"), Treatment = structure(c(2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("CLO",
"OPE"), class = "factor"), TimeStep = c(5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), Site_long = c("Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Waikiki", "Waikiki", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Waikiki", "Waikiki", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Waikiki", "Waikiki",
"Waikiki", "Waikiki", "Waikiki", "Waikiki", "Hanauma Bay",
"Hanauma Bay", "Hanauma Bay", "Hanauma Bay", "Hanauma Bay",
"Hanauma Bay", "Waikiki", "Waikiki", "Waikiki", "Waikiki",
"Waikiki", "Waikiki"), Treatment_long = c("Open", "Closed",
"Open", "Closed", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Open", "Closed",
"Open", "Closed", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Open", "Closed",
"Open", "Closed", "Open", "Closed", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Open", "Closed", "Open",
"Closed", "Open", "Closed", "Open", "Closed", "Open", "Closed",
"Open", "Closed"), .rows = list(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L)), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
mean_urchin_totals2$new_date <- strptime(as.character(mean_urchin_totals2$Date), "%m/%d/%y")
mean_urchin_totals2$new_date <- as.Date(mean_urchin_totals2$new_date)
mean_urchin_totals2$Shelter <- ifelse(mean_urchin_totals2$Treatment_long == "Closed", "Low", "High")
mean_urchin_totals2$Shelter <- factor(mean_urchin_totals2$Shelter, levels = c("Low", "High"))
mean_urchin_totals2$Site_long <- as.factor(mean_urchin_totals2$Site_long)
mean_urchin_totals2$Shelter <- as.factor(mean_urchin_totals2$Shelter)
mean_urchin_totals2$Year <- as.factor(mean_urchin_totals2$Year)
## Urchin Analyses
### Distribution
# variables
module_urchin2 <- mean_urchin_totals2$Reef..
plotNormalHistogram(mean_urchin_totals2$mean_urchin_abundance, main = "Urchins")
### glmer with Poisson Distribution
mean_urchin_totals2$Shelter <- factor(mean_urchin_totals2$Shelter, levels = c("High", "Low"))
mean_urchin_totals2$Shelter <- sort(mean_urchin_totals2$Shelter, decreasing = FALSE)
# with EUME, ECMA #
urchin_glmer <- glmer(mean_urchin_abundance ~ Site_long + Shelter + Site_long*Shelter + (1|module_urchin2), data = mean_urchin_totals2, family = poisson, na.action = "na.fail")
summary(urchin_glmer)
# Effects plots #
plot(allEffects(urchin_glmer))
# Residuals glmer #
qqnorm(resid(urchin_glmer), main = "Urchin Residual Plot")
qqline(resid(urchin_glmer))
# Pairwise multiple comparisons #
emm_urchin <- emmeans(urchin_glmer, ~Site_long*Shelter)
pairs(emm_urchin, simple = "each")
### Best Model
# Best model #
dredge(urchin_glmer, extra = "R^2")
urchin_glmer_1 <- glmer(mean_urchin_abundance ~ Site_long + Shelter + Year + (1|module_urchin2), data = mean_urchin_totals2, family = poisson, na.action = "na.fail")
summary(urchin_glmer_1)
# Effects plots #
plot(allEffects(urchin_glmer_1))
# Residuals glmer #
qqnorm(resid(urchin_glmer_1), main = "Urchin Residual Plot")
qqline(resid(urchin_glmer_1))
# emmeans package #
emm <- emmeans(urchin_glmer_1, ~ Site_long*Shelter)
pairs(emm)
## Urchin Plots
### Summary Database
#barplot CI
std.dev.pop <- function(x) sqrt(sum((x - mean(x))^2)/(length(x)))
std.error.pop <- function(x) (std.dev.pop(x))/(sqrt(length(x)))
plot_data_Site4 <- mean_urchin_totals2 %>%
group_by(Site_long, Treatment_long, Shelter) %>%
summarise(mean = mean(mean_urchin_abundance),
sd = std.dev.pop(mean_urchin_abundance),
lower = mean(mean_urchin_abundance) - 1.96*std.error.pop(mean_urchin_abundance),
upper = mean(mean_urchin_abundance) + 1.96*std.error.pop(mean_urchin_abundance))
# reorder summary dataframe for plotting #
plot_data_Site4 <- plot_data_Site4[c(3,4,1,2),]
plot_data_Site4$Shelter <- factor(plot_data_Site4$Shelter, levels = c("Low", "High"))
plot_data_Site4$Site_long <- factor(plot_data_Site4$Site_long, levels = c("Waikiki", "Hanauma Bay"))
# ggplot2 barplot final #
position <- c("Waikiki", "Hanauma Bay")
### Plots
# plot #
urchin_plot <- ggplot(data = plot_data_Site4, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
scale_y_continuous(breaks = seq(0, 30, 5)) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = "Mean urchin abundance ± 95% CI") +
theme_classic(base_size = 18) +
theme(axis.title.x = element_blank(),
legend.position = c(.1, .9))
# plot without legend #
urchin_plot <- ggplot(data = plot_data_Site4, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
scale_y_continuous(breaks = seq(0, 30, 5)) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = "Mean urchin abundance ± 95% CI") +
theme_classic(base_size = 18) +
theme(axis.title.x = element_blank(),
legend.position = "none")
# combined plot #
mult_compare4 <- c( "A", "A","B", "C")
urchin_plot_final <- ggplot(data = plot_data_Site4, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
scale_y_continuous(breaks = seq(0, 30, 5)) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare4, y = plot_data_Site4$upper + 0.05), vjust = -.5, position = position_dodge(width = 0.8)) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = "Mean urchin abundance ± 95% CI") +
theme_classic(base_size = 13) +
theme(axis.title.x = element_blank(),
legend.position = "none",
axis.text.y = element_text(angle = 90))
urchin_plot_final
## Urchin Time Series Plots
### Summary Databases
## Time Series Plots ###
## Subsetting for time series figures ##
HAN_OPE_urchin <- mean_urchin_totals2 %>% filter(Site_long == "Hanauma Bay", Treatment_long == "Open")
HAN_CLO_urchin <- mean_urchin_totals2 %>% filter(Site_long == "Hanauma Bay", Treatment_long == "Closed")
WAI_OPE_urchin <- mean_urchin_totals2 %>% filter(Site_long == "Waikiki", Treatment_long == "Open")
WAI_CLO_urchin <- mean_urchin_totals2 %>% filter(Site_long == "Waikiki", Treatment_long == "Closed")
### Population SE ###
std.dev.pop <- function(x) sqrt(sum((x - mean(x))^2)/(length(x)))
std.error.pop <- function(x) (std.dev.pop(x))/(sqrt(length(x)))
plot_data_urchin <- mean_urchin_totals2 %>%
group_by(TimeStep, Shelter, Treatment_long, Site_long) %>%
summarise(mean = mean(mean_urchin_abundance),
sd = std.dev.pop(mean_urchin_abundance),
lower = mean(mean_urchin_abundance) - std.error.pop(mean_urchin_abundance),
upper = mean(mean_urchin_abundance) + std.error.pop(mean_urchin_abundance),
Date = mean(new_date))
# add shelter column #
plot_data_urchin$Shelter <- ifelse(plot_data_urchin$Treatment_long == "Closed", "Low", "High")
plot_data_urchin$Shelter <- factor(plot_data_urchin$Shelter, levels = c("Low", "High"))
### Time Series Plots
## full plot ##
urchin_time_series_plot <- ggplot(data = plot_data_urchin, aes(x = Date, y = mean, group = interaction(Site_long, Treatment_long))) + geom_line(aes(linetype = Treatment_long)) +
geom_point(aes(shape = Site_long, size = 3, color = Treatment_long)) +
guides(size = FALSE) +
theme(text = element_text(size = 15)) +
geom_errorbar(aes(ymin = lower, ymax = upper)) +
scale_x_date(date_breaks = "1 month", date_labels = "%b%y") +
scale_y_continuous(breaks=seq(0, 10, .5)) +
labs(x = "Date", y = "Mean urchin abundance ± SEM")
## cropped plot ##
urchin_time_series_plot <- ggplot(data = plot_data_urchin, aes(x = Date, y = mean, fill = Shelter, shape = Site_long)) +
geom_point(aes(size = 3)) +
geom_line(aes(linetype = Shelter)) +
scale_shape_manual(values = c(21, 24)) +
scale_fill_manual(values = c(NA, "black"), guide = guide_legend(override.aes = list(shape = 21))) +
guides(size = FALSE) +
theme(text = element_text(size = 15)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
scale_y_continuous(breaks=seq(0, 20, 2)) +
coord_cartesian(ylim = c(0, 18)) +
labs(x = "Date", y = "Mean urchin abundance ± SEM") +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.title.x = element_blank(), axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1.5)), legend.text = element_text(size = rel(1.5)), legend.title = element_text(size = rel(1.5)))
## cropped plot for combined figure ##
legend_image <- readPNG('Legend_final2.png')
xmin <- as.Date("2018/01/01")
xmax <- as.Date("2018/03/21")
ymin <- 40
ymax <- 51
urchin_time_series_plot <- ggplot(data = plot_data_urchin, aes(x = Date, y = mean, fill = Shelter, shape = Site_long)) +
geom_point(aes(size = 3)) +
geom_line(aes(linetype = Shelter)) +
scale_linetype_manual(values=c("dashed", "solid")) +
scale_shape_manual(values = c(21, 24)) +
scale_fill_manual(values = c(NA, "black"), guide = guide_legend(override.aes = list(shape = 21))) +
guides(size = FALSE) +
theme(text = element_text(size = 1)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
scale_y_continuous(breaks=seq(0, 55, 10)) +
coord_cartesian(ylim = c(0, 55)) +
labs(x = "Date", y = "Mean urchin abundance ± SEM") +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), axis.title.x = element_blank(), axis.title = element_text(size = rel(1)), axis.text = element_text(size = rel(1)), legend.text = element_text(size = rel(1.5)), axis.text.y = element_text(angle = 90), legend.title = element_text(size = rel(1.5)), legend.position = "none")
urchin_time_series_plot
I am looking to make an output where the html created has the urchin figure the same as the first figure from above. Thank you very much for your time!

How to remove zero frequency for frequency plot and fix time?

When I produce a frequency plot:
Data <- structure(list(Venue = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Conference", "Journal"), class = "factor"), Year = c(2008L,
2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L,
2018L, 2019L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2018L), Frequency = c(0L, 0L, 0L, 0L, 1L,
1L, 2L, 1L, 4L, 4L, 11L, 3L, 2L, 1L, 0L, 0L, 3L, 5L, 3L, 7L,
8L, 19L, 10L)), class = "data.frame", row.names = c(NA, -23L))
library(ggplot2)
ggplot(Data, aes(x = Year, y = Frequency, fill = Venue, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5))
I receive in the plot value with zero and the year in x axis does not seem as the data frame
How is it possible to remove zero frequency from plot (but keep from year i.e. 2012 the record in the plot) and show in x axis all years for every bar?
Is this what you want?
The code to get it is:
ggplot(Data, aes(x = as.character(Year), y = Frequency, fill = Venue,
label = ifelse(Frequency > 0, Frequency, numeric(0)))) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5)) +
scale_x_discrete(name ="Year")

ggplot2 graphing and plotting average and minimum

here is my code:
library(dplyr); library(tidyr)
T0.modified <- T0data %>%
# create year range based on each company's T0 year
mutate(Year.M1 = Year - 1,
Year.M2 = Year - 2,
Year.M3 = Year - 3,
Year.P1 = Year + 1,
Year.P2 = Year + 2,
Year.P3 = Year + 3) %>%
# convert to long format, match with Alldata based on both company & year
gather(reference.year, actual.year, -Company, -Price) %>%
left_join(Alldata, by = c("Company" = "Company", "actual.year" = "Year")) %>%
# keep T0 price for year T0, & use matched prices for all other years
mutate(Price = ifelse(reference.year == "Year", Price.x, Price.y)) %>%
# take maximum of all matched prices for each company each year
group_by(Company, reference.year) %>%
summarise(Price = max(Price)) %>%
ungroup() %>%
# order reference.year for correct sequence in ggplot's x-axis
mutate(reference.year = factor(reference.year,
levels = c("Year.M3", "Year.M2", "Year.M1", "Year",
"Year.P1", "Year.P2", "Year.P3"),
labels = c("T-3", "T-2", "T-1", "T0", "T+1", "T+2", "T+3")))
ggplot(T0.modified,
aes(x = reference.year, y = Price, group = Company, color = Company)) +
geom_line(aes()) +
xlab("Year") + theme_bw() +
stat_summary(fun.y = mean, geom = "line", group = 1,
linetype = 2, size = 1.5, colour = "grey") +
annotate("label", x = 7, y = 200, label = "Average",
fill = "grey", alpha = 0.5, hjust = 1)
And here is my data:
T0data:
structure(list(Company = structure(1:3, .Label = c("Amazon",
"Cisco", "McDonald's"), class = "factor"), Year = c(2011L, 2008L,
2013L), Price = c(182, 21.82, 95.15)), .Names = c("Company",
"Year", "Price"), row.names = c(NA, 3L), class = "data.frame")
All Data:
structure(list(Company = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Amazon", "Cisco", "McDonald's"), class = "factor"),
Year = c(2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2010L,
2010L, 2010L, 2011L, 2011L, 2012L, 2012L, 2013L, 2013L, 2014L,
2014L, 2014L, 2008L, 2010L, 2010L, 2010L, 2011L, 2011L, 2012L,
2012L, 2013L, 2013L, 2014L, 2014L, 2014L, 2015L, 2015L, 2016L,
2016L, 2016L, 2005L, 2005L, 2005L, 2006L, 2006L, 2007L, 2007L,
2007L, 2008L, 2008L, 2009L, 2009L, 2009L, 2010L, 2010L, 2011L,
2011L, 2011L), Price = c(91L, 77L, 81L, 87L, 63L, 88L, 110L,
75L, 117L, 170L, 190L, 215L, 245L, 316L, 275L, 330L, 378L,
390L, 55L, 62L, 66L, 65L, 72L, 98L, 93L, 88L, 99L, 101L,
94L, 103L, 96L, 99L, 116L, 112L, 123L, 113L, 19L, 17L, 18L,
20L, 19L, 26L, 31L, 27L, 24L, 21L, 14L, 22L, 18L, 26L, 22L,
14L, 16L, 15L)), .Names = c("Company", "Year", "Price"), class = "data.frame", row.names = c(NA,
-54L))
Here's my question:
How can I make the line graph show only 2 values, the average, and the minimum for all values?
And How can I plot a random company to represent the third line in the graph too to compare it to the minimum and the average?
Something like this? It plots the average, the minimum and a random company (see subset).
p = ggplot(T0.modified) + xlab("Year") + theme_bw() +
stat_summary(aes(x = reference.year, y = Price),fun.y = mean, geom = "line", group = 1, linetype = 2, size = 1.5, colour = "grey") +
stat_summary(aes(x = reference.year, y = Price),fun.y = min, geom = "line", group = 1, linetype = 2, size = 1.5, colour = "red") +
annotate("label", x = 7, y = 200, label = "Average", fill = "grey", alpha = 0.5, hjust = 1) +
annotate("label", x = 7, y = 30, label = "Min", fill = "grey", alpha = 0.5, hjust = 1) +
geom_line(data = subset(T0.modified,Company=="Amazon"),aes(x = reference.year, y = Price,group=Company),color="blue")

Defining T0 in my program

Here's a small program I'm making, to eventually get a final graph. I have 2 separate data sets. One is called T0 and the second one contains all the data I have. I want this program to get the T0 values from the the first data frame, then it searches about the maximum price in the 3 years before and the 3 years after the T0 year.
In essence, my program is going to assign T0 values that I chose arbitrarily. Then it will search automatically in my database for the maximum price in each year except the t0 year.
The problem I'm facing, is with the implementation of T0 values in the schedule. It just does not come out right when I run my code.
The problem apparently has to do with the way I'm defining T0. Should I use a for loop? or is there a small tweak I'm missing?
Final result wanted:
Data Base Example:
T0data:
structure(list(Company = structure(1:3, .Label = c("Amazon",
"Cisco", "McDonald's"), class = "factor"), Year = c(2011L, 2008L,
2013L), Price = c(182, 21.82, 95.15)), .Names = c("Company",
"Year", "Price"), row.names = c(NA, 3L), class = "data.frame")
All Data:
structure(list(Company = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Amazon", "Cisco", "McDonald's"), class = "factor"),
Year = c(2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2010L,
2010L, 2010L, 2011L, 2011L, 2012L, 2012L, 2013L, 2013L, 2014L,
2014L, 2014L, 2008L, 2010L, 2010L, 2010L, 2011L, 2011L, 2012L,
2012L, 2013L, 2013L, 2014L, 2014L, 2014L, 2015L, 2015L, 2016L,
2016L, 2016L, 2005L, 2005L, 2005L, 2006L, 2006L, 2007L, 2007L,
2007L, 2008L, 2008L, 2009L, 2009L, 2009L, 2010L, 2010L, 2011L,
2011L, 2011L), Price = c(91L, 77L, 81L, 87L, 63L, 88L, 110L,
75L, 117L, 170L, 190L, 215L, 245L, 316L, 275L, 330L, 378L,
390L, 55L, 62L, 66L, 65L, 72L, 98L, 93L, 88L, 99L, 101L,
94L, 103L, 96L, 99L, 116L, 112L, 123L, 113L, 19L, 17L, 18L,
20L, 19L, 26L, 31L, 27L, 24L, 21L, 14L, 22L, 18L, 26L, 22L,
14L, 16L, 15L)), .Names = c("Company", "Year", "Price"), class = "data.frame", row.names = c(NA,
-54L))
My code:
library(data.table)
T0data<- read.csv(file = "C:/Users/My first file.csv", header = TRUE )
Alldata<- read.csv(file = "C:/Users/My second file.csv", header = TRUE )
d<-Alldata
setDT(d)
year_zero <- T0data$Year
# Filter to include year_zero +/- 3 years and get Best result per company per year
d <- d[Year >= year_zero - 3 & Yeae <= year_zero + 3,
.(Best_Result = max(Price, na.rm = TRUE)), by = .(Company, Year)]
# Add T as interval to year_zero (and convert to factor in order to get all
# values from 3 to 3
d[, "T" := factor(Year - year_zero, levels = seq(-3, 3), ordered = TRUE)]
# Cast to wide format (fill missing values with NA)
dcast(d, Company ~T, value.var = "Best_Result", drop = FALSE)
# Cast to wide format (fill missing values with "")
dcast(d, Company~T, value.var = "Best_Result", drop = FALSE, fun.aggregate = paste0,
fill = "")
Here's a solution that uses dplyr / tidyr packages from the tidyverse, rather than data.table, but it should do the job:
library(dplyr); library(tidyr)
T0.modified <- T0data %>%
# create year range based on each company's T0 year
mutate(Year.M1 = Year - 1,
Year.M2 = Year - 2,
Year.M3 = Year - 3,
Year.P1 = Year + 1,
Year.P2 = Year + 2,
Year.P3 = Year + 3) %>%
# convert to long format, match with Alldata based on both company & year
gather(reference.year, actual.year, -Company, -Price) %>%
left_join(Alldata, by = c("Company" = "Company", "actual.year" = "Year")) %>%
# keep T0 price for year T0, & use matched prices for all other years
mutate(Price = ifelse(reference.year == "Year", Price.x, Price.y)) %>%
# take maximum of all matched prices for each company each year
group_by(Company, reference.year) %>%
summarise(Price = max(Price)) %>%
ungroup() %>%
# order reference.year for correct sequence in ggplot's x-axis
mutate(reference.year = factor(reference.year,
levels = c("Year.M3", "Year.M2", "Year.M1", "Year",
"Year.P1", "Year.P2", "Year.P3"),
labels = c("T-3", "T-2", "T-1", "T0", "T+1", "T+2", "T+3")))
Resulting plot:
library(ggplot2)
ggplot(T0.modified,
aes(x = reference.year, y = Price, group = Company, color = Company)) +
geom_line(aes()) +
xlab("Year") + theme_bw()
Edit adding average for each year using stat_summary:
ggplot(T0.modified,
aes(x = reference.year, y = Price, group = Company, color = Company)) +
geom_line(aes()) +
xlab("Year") + theme_bw() +
stat_summary(fun.y = mean, geom = "line", group = 1,
linetype = 2, size = 1.5, colour = "grey") +
annotate("label", x = 7, y = 200, label = "Average",
fill = "grey", alpha = 0.5, hjust = 1)

Remove space between bars within a grid

I want to move bars that are within one grid closer to save space. Is there a way to do that? The code I used just change the bar width and does not change the spacing. I am aware that bindwidth no longer work in geom_bar as I referred to these problems:
Adding space between bars in ggplot2
Increase space between bars in ggplot
This is the data
dput(grp1)
structure(list(Rot = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2-year",
"3-year", "4-year"), class = "factor"), Rot.Herb = structure(c(3L,
4L, 13L, 14L, 5L, 6L, 9L, 10L, 15L, 16L, 1L, 2L, 7L, 8L, 11L,
12L, 17L, 18L), .Label = c("A4-conv", "A4-low", "C2-conv", "C2-low",
"C3-conv", "C3-low", "C4-conv", "C4-low", "O3-conv", "O3-low",
"O4-conv", "O4-low", "S2-conv", "S2-low", "S3-conv", "S3-low",
"S4-conv", "S4-low"), class = "factor"), Rot.trt = structure(c(2L,
2L, 7L, 7L, 3L, 3L, 5L, 5L, 8L, 8L, 1L, 1L, 4L, 4L, 6L, 6L, 9L,
9L), .Label = c("A4", "C2", "C3", "C4", "O3", "O4", "S2", "S3",
"S4"), class = "factor"), Crop = structure(c(2L, 2L, 4L, 4L,
2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("alfalfa",
"corn", "oat", "soybean"), class = "factor"), Year = c(2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L), Herb.trt = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("conv", "low"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "kg.ha", class = "factor"), N = c(4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), value = c(0.43168573275,
16.22463846275, 0.554446363, 25.89844593075, 13.882743364, 84.1922080725,
84.2625, 105.7375, 0.8350420895, 106.7464386085, 127.8875, 65.3875,
1.964170084, 29.46524929925, 249.7625, 333.125, 0.535560112,
28.757014893), sd = c(0.528135488290589, 20.3216237207314, 0.8810357408252,
40.4576156325903, 27.3195114207629, 71.6424882235524, 22.6948736869511,
40.9587266851237, 0.931926987921512, 108.45555847826, 89.1035387905553,
25.141843442092, 2.20017281317418, 24.1072626208021, 93.2983509589889,
85.0439543216722, 0.48339034731648, 33.2628789566726), se = c(0.264067744145294,
10.1608118603657, 0.4405178704126, 20.2288078162951, 13.6597557103815,
35.8212441117762, 11.3474368434756, 20.4793633425619, 0.465963493960756,
54.2277792391299, 44.5517693952777, 12.570921721046, 1.10008640658709,
12.0536313104011, 46.6491754794945, 42.5219771608361, 0.24169517365824,
16.6314394783363), ci = c(0.840381416699796, 32.3362381637038,
1.40192446910603, 64.3770946952626, 43.4714390915815, 113.999185974188,
36.112608456959, 65.1744742040986, 1.48290379975249, 172.576995683309,
141.783613905853, 40.0062833851536, 3.50096592013592, 38.3600344290379,
148.458496149248, 135.323909108861, 0.76918191241357, 52.928663119381
)), .Names = c("Rot", "Rot.Herb", "Rot.trt", "Crop", "Year",
"Herb.trt", "variable", "N", "value", "sd", "se", "ci"), row.names = c(NA,
-18L), class = "data.frame")
This is the code
ggplot(grp1, aes(x=Rot.Herb, y=value, fill=factor(Herb.trt)))+
geom_bar(stat="identity", position=position_dodge(.), width=.5)+
scale_fill_brewer(palette = "Dark2")+
theme_bw() +
theme(panel.grid.major=element_blank()) +
facet_grid(~Crop, scales = "free_x", space="free_x")+
theme(legend.title=element_blank(),legend.text=element_text(size=20),legend.position="top")+
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.75, width=.25,position=position_dodge(.5))+
xlab("Treatment") +
theme(axis.title = element_text(size=24,face="bold", vjust=4)) +
ylab("2014 total weed biomass (Kg/ha)\n") +
theme(axis.title = element_text(size=24,face="bold", vjust=2), axis.text.y = element_text(size=20, color="black"))+
theme(strip.text.x = element_text(colour = "black", size = 15), strip.background = element_rect(fill = "white"), axis.text.x = element_blank(), axis.ticks.x = element_blank())
And graph
What I want is a graph with bar width of 0.5 and no spacing between bars in the same grid. Thank you for all the input.
I think you want width = 0.5 while keeping the bars touching because you don't want the absolute width to change (width = 0.5 changes the relative width). This might be closer to what you are asking for.
ggplot(grp1, aes(x=Rot.Herb, y=value, fill=factor(Herb.trt)))+
geom_bar(stat="identity", position = "dodge", width=1)+
scale_x_discrete(expand=c(0.2,0))+
scale_fill_brewer(palette = "Dark2")+
theme_bw() +
theme(panel.grid.major=element_blank()) +
facet_grid(~Crop, scales = "free_x", space="free_x")+
theme(legend.title=element_blank(),legend.text=element_text(size=20),legend.position="top")+
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.75, width=.25,position=position_dodge(0.5))+
xlab("Treatment") +
theme(axis.title = element_text(size=24,face="bold", vjust=4)) +
ylab("2014 total weed biomass (Kg/ha)\n") +
theme(axis.title = element_text(size=24,face="bold", vjust=2), axis.text.y = element_text(size=20, color="black"))+
theme(strip.text.x = element_text(colour = "black", size = 15), strip.background = element_rect(fill = "white"), axis.text.x = element_blank(), axis.ticks.x = element_blank())
Basically I changed the width = 1 as suggested by #Gregor, which causes the bars to touch, but added an extra option scale_x_discrete(expand=c(0.2,0)) to add spaces between the bars and the sides of the plot. This in effect adjusts the absolute width of the bars while keeping them touching. The first number gives the multiplicative constant used to expand the range, while the second gives the additive.
You can experiment with the two values in expand = to get what you want.
Also see this related question: ggplot geom_boxplot: reduce space between x-axis categories

Resources