How to remove zero frequency for frequency plot and fix time? - r

When I produce a frequency plot:
Data <- structure(list(Venue = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Conference", "Journal"), class = "factor"), Year = c(2008L,
2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L,
2018L, 2019L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2018L), Frequency = c(0L, 0L, 0L, 0L, 1L,
1L, 2L, 1L, 4L, 4L, 11L, 3L, 2L, 1L, 0L, 0L, 3L, 5L, 3L, 7L,
8L, 19L, 10L)), class = "data.frame", row.names = c(NA, -23L))
library(ggplot2)
ggplot(Data, aes(x = Year, y = Frequency, fill = Venue, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5))
I receive in the plot value with zero and the year in x axis does not seem as the data frame
How is it possible to remove zero frequency from plot (but keep from year i.e. 2012 the record in the plot) and show in x axis all years for every bar?

Is this what you want?
The code to get it is:
ggplot(Data, aes(x = as.character(Year), y = Frequency, fill = Venue,
label = ifelse(Frequency > 0, Frequency, numeric(0)))) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5)) +
scale_x_discrete(name ="Year")

Related

fix error caused by error in `setup_params()`with y-axis for stacked barplot ggplot2

I am trying to build a stacked barplot that looks like so...
My dataframe looks like this:
df = structure(list(Year_Publication = c(1989L, 1994L, 2001L, 2002L,
2002L, 2004L, 2006L, 2007L, 2009L, 2011L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2019L,
2019L, 2019L, 2020L, 2020L, 2020L, 2020L, 2021L, 2021L, 2022L,
2022L, 2022L, 2022L), Taxa = c("Cervidae", "Teleostei", "Chondrichtyes",
"Chondrichtyes", "Gastropoda", "Teleostei", "Malacostraca", "Teleostei",
"Teleostei", "Teleostei", "Teleostei", "Teleostei", "Teleostei",
"Teleostei", "Teleostei", "Chondrichtyes", "Teleostei", "Chondrostei",
"Teleostei", "Chondrichtyes", "Decapoda", "Teleostei", "Gastropoda",
"Chondrichtyes", "Chondrostei", "Teleostei", "Bivalvia", "Tetrapoda",
"Teleostei", "Orthoptera", "Chondrichtyes", "Teleostei", "Reptilia",
"Bovidae"), Total_Species_Per_Taxa = c(1L, 2L, 1L, 1L, 3L, 1L,
1L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 4L, 1L, 7L, 3L, 1L, 6L,
1L, 3L, 1L, 7L, 2L, 1L, 1L, 1L, 4L, 4L, 1L, 1L), Total_Species_Per_Pub = c(1L,
2L, 1L, 4L, 4L, 1L, 1L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 7L, 7L,
7L, 11L, 11L, 11L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 2L, 2L,
10L, 10L, 10L, 10L)), class = "data.frame", row.names = c(NA,
-34L))
and I have tried this code:
ggplot(data = taxa_diversity, aes(x = `Year_Publication`, fill = Taxa)) +
geom_bar(aes(y = `Total_Species_Per_Pub`), stat = "count")
AND
ggplot(data = taxa_diversity, aes(x = `Year_Publication`, y = `Total_Species_Per_Pub`, fill = Taxa)) +
geom_bar(stat = "count")
But I keep getting this error
Error in `geom_bar()`:
! Problem while computing stat.
ℹ Error occurred in the 1st layer.
Caused by error in `setup_params()`:
! `stat_count()` must only have an x or y aesthetic.
Run `rlang::last_error()` to see where the error occurred.
I know it's an error with the y-axis, because I can get it to work when I don't include an command for the y-axis... but then it gives me a graph with the wrong values on the y-axis.
Any idea how to fix this?
Update (removed basic answer):
Using geom_col() we could set y in aes:
my_color <- c("#41859f", "#404040", "#ea9f91","#bfd2d9",
"#981f26", "#575a7b", "#ce2a0a", "#eddca3",
"#2c6049", "#41859f", "#404040", "#ea9f91")
library(ggplot2)
ggplot(data =df, aes(x = `Year_Publication`, y = `Total_Species_Per_Pub`, fill = Taxa)) +
geom_col()+
scale_x_continuous("Year_Publication", labels = as.character(df$Year_Publication), breaks = df$Year_Publication)+
scale_fill_manual(values=my_color)+
theme_classic()

x-axis labels not showing in ggplot

The x-axis labels isn't showing in my ggplot and I can't figure out what the issue is. I tried changing the scale_x_continuous to scale_x_discrete but that wasn't the issue. Here's the data and the code:
dput(df)
structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "X..i..", class = "factor"),
value = c(0.86535786015671, 0.270518550067837, 0.942648772841964,
3.99444934081099, 1.11759146288817, 1.54510976425154, 2.44547105239855,
2.2564822479637, 0.806268193902794, 0.334684787222841, 0.279275582280181,
0.506202944652795, 0.00974858004556866, 0.274742461635902,
0.22071873199716, 0.289511637643534, 0.352185038116792, 0.834072418861261,
1.34338149120735, 1.74931508000265, 1.49348843361896, 4.07991249877895,
1.37225152308336, 0.812438174787708, 0.870119514197706, 1.12552827647611,
0.981401242191818, 0.811544940639505, 0.270314252804909,
0.00129424269740973, 0.138397649461267, 0.320412520877311,
0.200638317328505, 0.311317976283425, 2.27515845904203, 0.701130150695764,
1.19347381779438, 1.74260582346705, 2.04812451743241, 3.30525861365071,
1.09525257544341, 2.6941909849432, 1.24879308689346, 2.32559594481724,
0.489685734592222, 0.401412018111572, 0.209957274618462,
0.715330877881211, 0.844512982038313, 0.220417574806829,
0.440151738500053, 1.32486291268667, 0.771676730656983, 1.295145890213,
2.410181199299, 2.41520949303317, 2.07420663366187, 1.45105393420989,
1.94026424903487, 1.06019651909079, 1.21389399141063, 0.526835419170636,
0.392643071856425, 0.07366669912048, 0.376156996326127, 0.461881411637594,
0.236855843259622, 0.367884917633423), year = c(2005L, 2006L,
2007L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L,
2016L, 2017L, 2018L, 2019L, 2020L, 2021L, 2005L, 2006L, 2007L,
2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L,
2017L, 2018L, 2019L, 2020L, 2021L, 2005L, 2006L, 2007L, 2008L,
2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L,
2018L, 2019L, 2020L, 2021L, 2005L, 2006L, 2007L, 2008L, 2009L,
2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 2016L, 2017L, 2018L,
2019L, 2020L, 2021L), tenor = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("1", "5", "10", "average"), class = "factor")), row.names = c(NA,
-68L), class = "data.frame")
ggplot(df, aes(year, value, color = tenor)) +
geom_line(size=0.5) + scale_x_continuous(breaks = seq(1:17),labels = seq(2005,2021)) +
geom_point() +
xlab("year")
If you wanted to force ggplot to plot every x axis label, you could use scale_x_continous(breaks = 2005:2021) or breaks = df$year
ggplot(df, aes(year, value, color = tenor)) +
geom_line(size=0.5) +
scale_x_continuous(breaks = df$year) +
geom_point() +
xlab("year")

ggplot2 graphing and plotting average and minimum

here is my code:
library(dplyr); library(tidyr)
T0.modified <- T0data %>%
# create year range based on each company's T0 year
mutate(Year.M1 = Year - 1,
Year.M2 = Year - 2,
Year.M3 = Year - 3,
Year.P1 = Year + 1,
Year.P2 = Year + 2,
Year.P3 = Year + 3) %>%
# convert to long format, match with Alldata based on both company & year
gather(reference.year, actual.year, -Company, -Price) %>%
left_join(Alldata, by = c("Company" = "Company", "actual.year" = "Year")) %>%
# keep T0 price for year T0, & use matched prices for all other years
mutate(Price = ifelse(reference.year == "Year", Price.x, Price.y)) %>%
# take maximum of all matched prices for each company each year
group_by(Company, reference.year) %>%
summarise(Price = max(Price)) %>%
ungroup() %>%
# order reference.year for correct sequence in ggplot's x-axis
mutate(reference.year = factor(reference.year,
levels = c("Year.M3", "Year.M2", "Year.M1", "Year",
"Year.P1", "Year.P2", "Year.P3"),
labels = c("T-3", "T-2", "T-1", "T0", "T+1", "T+2", "T+3")))
ggplot(T0.modified,
aes(x = reference.year, y = Price, group = Company, color = Company)) +
geom_line(aes()) +
xlab("Year") + theme_bw() +
stat_summary(fun.y = mean, geom = "line", group = 1,
linetype = 2, size = 1.5, colour = "grey") +
annotate("label", x = 7, y = 200, label = "Average",
fill = "grey", alpha = 0.5, hjust = 1)
And here is my data:
T0data:
structure(list(Company = structure(1:3, .Label = c("Amazon",
"Cisco", "McDonald's"), class = "factor"), Year = c(2011L, 2008L,
2013L), Price = c(182, 21.82, 95.15)), .Names = c("Company",
"Year", "Price"), row.names = c(NA, 3L), class = "data.frame")
All Data:
structure(list(Company = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Amazon", "Cisco", "McDonald's"), class = "factor"),
Year = c(2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2010L,
2010L, 2010L, 2011L, 2011L, 2012L, 2012L, 2013L, 2013L, 2014L,
2014L, 2014L, 2008L, 2010L, 2010L, 2010L, 2011L, 2011L, 2012L,
2012L, 2013L, 2013L, 2014L, 2014L, 2014L, 2015L, 2015L, 2016L,
2016L, 2016L, 2005L, 2005L, 2005L, 2006L, 2006L, 2007L, 2007L,
2007L, 2008L, 2008L, 2009L, 2009L, 2009L, 2010L, 2010L, 2011L,
2011L, 2011L), Price = c(91L, 77L, 81L, 87L, 63L, 88L, 110L,
75L, 117L, 170L, 190L, 215L, 245L, 316L, 275L, 330L, 378L,
390L, 55L, 62L, 66L, 65L, 72L, 98L, 93L, 88L, 99L, 101L,
94L, 103L, 96L, 99L, 116L, 112L, 123L, 113L, 19L, 17L, 18L,
20L, 19L, 26L, 31L, 27L, 24L, 21L, 14L, 22L, 18L, 26L, 22L,
14L, 16L, 15L)), .Names = c("Company", "Year", "Price"), class = "data.frame", row.names = c(NA,
-54L))
Here's my question:
How can I make the line graph show only 2 values, the average, and the minimum for all values?
And How can I plot a random company to represent the third line in the graph too to compare it to the minimum and the average?
Something like this? It plots the average, the minimum and a random company (see subset).
p = ggplot(T0.modified) + xlab("Year") + theme_bw() +
stat_summary(aes(x = reference.year, y = Price),fun.y = mean, geom = "line", group = 1, linetype = 2, size = 1.5, colour = "grey") +
stat_summary(aes(x = reference.year, y = Price),fun.y = min, geom = "line", group = 1, linetype = 2, size = 1.5, colour = "red") +
annotate("label", x = 7, y = 200, label = "Average", fill = "grey", alpha = 0.5, hjust = 1) +
annotate("label", x = 7, y = 30, label = "Min", fill = "grey", alpha = 0.5, hjust = 1) +
geom_line(data = subset(T0.modified,Company=="Amazon"),aes(x = reference.year, y = Price,group=Company),color="blue")

ggplot2 legends: Adding items not in plot code to the legend & changing shapes in legend

I have a complex plot that shows dots of different colors for patient grades in different years, and lines of different colors connecting repeated measures (the same patients) measured in the two years whose grading has changed. As you can see the legend simply lists the dots with lines for the two colors. What I need, however, is a legend that has a red dot for 2009 and a blue dot for 2016 (without lines!), as well as a red line that I can label "Upgraded" and a blue line I can label "Downgraded". So I need four legend items: 2 dots, 2 lines, 4 labels. I've done extensive searching on this, and cannot find the answer.
Here's the plot code I used to build all the aesthetics I wanted:
sampledata <- structure(list(patient = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L,
5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L,
12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L,
19L, 19L), grade = structure(c(5L, 5L, 5L, 5L, 5L, 1L, 5L, 1L,
1L, 5L, 1L, 5L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L, 5L, 3L, 3L,
3L, 3L, 2L, 4L, 2L, 4L, 4L, 1L, 4L, 1L, 4L, 2L, 4L, 2L), .Label = c("grade_I",
"grade_II", "grade_III", "indeterminate", "normal"), class = "factor"),
year = c(2009L, 2016L, 2009L, 2016L, 2009L, 2016L, 2009L,
2016L, 2009L, 2016L, 2009L, 2016L, 2009L, 2016L, 2009L, 2016L,
2009L, 2016L, 2009L, 2016L, 2009L, 2016L, 2009L, 2016L, 2009L,
2016L, 2009L, 2016L, 2009L, 2016L, 2009L, 2016L, 2009L, 2016L,
2009L, 2016L, 2009L, 2016L)), .Names = c("patient", "grade",
"year"), class = "data.frame", row.names = c(NA, -38L))
yearf = factor(year)
gradef = factor(gradef, levels=c("normal", "grade_I", "grade_II", "grade_III", "indeterminate"))
p <- ggplot(data=guidegrades2, aes(x=gradef, y=patient, group=patient, color=yearf)) +
geom_point() + geom_line()
p + scale_colour_brewer(palette = "Set1") +
labs(x = "ASE Grade", y = "Patient", color = "ASE Guidelines")
pticks = p + scale_x_discrete(labels=c("grade_I" = "Grade I", "grade_II" = "Grade II",
"grade_III" = "Grade III", "indeterminate" = "Indeterminate", "normal" = "Normal"))
ptheme = pticks + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = "white"), axis.line = element_line(), axis.title.x=element_text(vjust=0.0001))
paxes = ptheme + scale_colour_brewer(palette = "Set1") +
labs(x = "ASE Grade", y = "Patient", color = "ASE Guidelines")

Error (ymax not found) while adding a geom_line (different data) to a multi-layered ggplot

I'm trying to generate a plot that summarizes a dataset by first plotting the median & quantiles in an area / black line, after which I want to outline a specific 'firm' with a red line.
I'd also like to do so while facetting on a variable, thus plotting multiple variables at once.
An example code of what I'd plot is as follows:
require(dplyr)
require(ggplot2)
dt <- structure(list(Firm = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("a", "b", "c", "d"), class = "factor"), Year = c(2008L,
2009L, 2008L, 2009L, 2008L, 2009L, 2008L, 2009L, 2008L, 2009L,
2008L, 2009L, 2008L, 2009L, 2008L, 2009L, 2008L, 2009L, 2008L,
2009L, 2008L, 2009L, 2008L, 2009L), variable = structure(c(1L,
1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L,
3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("var1", "var2", "var3"
), class = "factor"), value = c(0.991894223, 2.216322113, 3.189415462,
0.663732077, 0.444826423, 2.674568191, 1.272077011, 7.691464914,
4.263339855, 0.214415839, 3.995328653, 6.028747322, 8.191459456,
0.16205906, 4.056495056, 5.17994524, 0.42435417, 0.678655669,
6.246411921, 0.505532339, 4.65045746, 8.85141854, 5.850616048,
2.028583225)), .Names = c("Firm", "Year", "variable", "value"
), class = "data.frame", row.names = c(NA, -24L))
head(dt)
Firm Year variable value
1 a 2008 var1 0.9918942
2 a 2009 var1 2.2163221
3 a 2008 var2 3.1894155
4 a 2009 var2 0.6637321
5 a 2008 var3 0.4448264
6 a 2009 var3 2.6745682
I now manually calculate the ymin, ymax, and y for the ribbon / line plots. They're plotting just fine.
dt_aggregates <- dt %>%
group_by(variable, Year) %>%
arrange(variable, Year) %>%
summarize(y=median(value))
dt_aggregates$ymin <- 0.9*dt_aggregates$y
dt_aggregates$ymax <- 1.1*dt_aggregates$y
This will be the firm I want to highlight:
dt_focus <- filter(dt, Firm=="a")
The following plots just fine, and is almost what I want.
g <- ggplot(data=dt_aggregates,
aes(x=Year,
y=y,
ymax=ymax,
ymin=ymin,
group=variable)) +
facet_grid(variable~., scales="free") + geom_line() + geom_ribbon(alpha=0.3) +
theme_bw()
However, I want to add another line (for the one firm) onto this (in red).
Once I try to add a new line with a separate dataframe, I get the following error. Any help on getting this to work is greatly appreciated
# Error in eval(expr, envir, enclos) : object 'ymax' not found
g + geom_line(data=dt_focus,
aes(x=Year, y=value, group=variable),
col="red", size=2)

Resources