I am trying to plot one column by Date (different color points for each animal category) and on the same graph, plot a second column by Date as well. The second column has entries for the days but only for certain categories, Large Dog. There is no adoption_with_discount for small or medium dogs (please see the reproducible example data set, example_data). When I plot them separately they visualize fine but not when plotted together. I thought I would just overlay a separate geom but that is not working.
I want to combine the two plots into one. My goal is for the points plot to have the line graph on top of it. I am trying to visualize the adoption as points colored by animal and put a line on the same graph of adoption_with_discount.
Thank you for your help!
# Make example -----------------------------------------------------------
# Here is an example data set
# You can see in the `adoption_with_discount` the values I want to add as a line.
library(lubridate)
library(tidyverse)
example_days <- data.frame(Date = c(seq.Date(from = as.Date('2022-03-01'), to = as.Date('2022-04-30'), by = 'days')))
example_small <-
example_days %>%
mutate(animal = "Small Dog")
a <-sample(100:150, nrow(example_small), rep = TRUE)
example_small <-
example_small %>%
mutate(adoption = a,
adoption_with_discount = NA)
example_med <-
example_days %>%
mutate(animal = "Medium Dog")
b <-sample(150:180, nrow(example_med), rep = TRUE)
example_med <-
example_med %>%
mutate(adoption = b,
adoption_with_discount = NA)
example_large <-
example_days %>%
mutate(animal = "Large Dog")
c <-sample(150:200, nrow(example_large), rep = TRUE)
example_large <-
example_large %>%
mutate(adoption = c)
example_large <-
example_large %>%
mutate(adoption_with_discount = adoption - 15)
example_data <- rbind(example_small, example_med, example_large)
# Plot --------------------------------------------------------------------
ggplot(data = example_data) +
geom_point(mapping = aes(x = Date,
y = adoption,
color = animal)) +
ggtitle("Dog Adoption by Size") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45))
# Plot with Fee -----------------------------------------------------------
# This is where the problem is occurring
# When I want to add a line that plots the adoption with discount by day
# on top of the points, it does not populate.
ggplot(data = example_data) +
geom_point(mapping = aes(x = Date,
y = adoption,
color = animal)) +
geom_line(mapping = aes(x = Date,
y = adoption_with_discount),
color = "black") +
ggtitle("Dog Adoption by Size with Discount Included") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45))
# See if just Discount will Plot -----------------------------------------
#This plots separately
ggplot(data = example_large) +
geom_line(mapping = aes(x = Date,
y = adoption_with_discount),
color = "black") +
ggtitle("Discount") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45))
While subsetting is an option to fix the issue, the reason why no line is plotted is simply the missing grouping, i.e. in geom_line you are trying to plot observations for all three dog types as one group or line. However, because of the NAs no line will show up. An easy option to solve that would be to explicitly map animal on the group aes. Additionally I added na.rm=TRUE to silent the warning about removed NAs. Finally I right aligned your axis labels by adding hjust=1:
library(ggplot2)
ggplot(data = example_data) +
geom_point(mapping = aes(
x = Date,
y = adoption,
color = animal
)) +
geom_line(
mapping = aes(
x = Date,
y = adoption_with_discount,
group = animal
),
color = "black",
na.rm = TRUE
) +
ggtitle("Dog Adoption by Size with Discount Included") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Based on discussion here I found that you can use subset argument in the aes of geom_line to select values that are not NAs in adoption_with_discount column.
ggplot(data = example_data) +
geom_point(mapping = aes(x = Date,
y = adoption,
color = animal)) +
geom_line(mapping = aes(x = Date,
y = adoption_with_discount),
color = "black") +
ggtitle("Dog Adoption by Size with Discount Included") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45)) +
geom_line(mapping = aes(x = Date,
y = adoption_with_discount,
subset = !is.na(adoption_with_discount)),
color = "black") +
ggtitle("Discount") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45))
The result:
It looks like it is the NA that are included in the geom_line portion that is creating the issue so you can filter those out before plotting the line:
geom_point(mapping = aes(x = Date,
y = adoption,
color = animal)) +
geom_line(data=example_data %>% filter(!is.na(adoption_with_discount)),
mapping = aes(x = Date,
y = adoption_with_discount),
color = "black") +
ggtitle("Dog Adoption by Size with Discount Included") +
labs(x = "Date", y = "Adoption Fee") +
scale_y_continuous(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 45))
Related
I have a dataframe which is something like this:
#sample dataframe
set.seed(42)
dates <- seq.Date(as.Date("2020-01-01"), as.Date("2021-12-30"), "day")
n <- length(dates)
dat <- data.frame(created_at = dates,
category = rep(LETTERS[1:4], n/2),
With_Outlier = sample(18:100, n, replace=TRUE),
count = sample(10:40, n, replace=TRUE))
I am using the following code to plot the data in a way I desire:
ggplot(data = dat, aes(x = created_at)) +
geom_line(aes(y = With_Outlier, colour = paste0(category," With_Outliers")))+
geom_line(aes(y = count, colour = paste0(category," Value Counts")))+
facet_wrap(~ category, ncol = 1, scales = "free_x") +
labs(title = 'Data Visualization',
x = 'Month',
y = 'Count') +
theme_bw() +
scale_x_date(date_breaks = '1 month', date_labels = "%b-%y", expand = expansion(0,0))+
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
theme(text=element_text(size=13),panel.spacing.x=unit(0.6, "lines"),
panel.spacing.y=unit(1, "lines"), legend.position="bottom")+
scale_linetype_manual(name = NULL, values = 2)+ labs(color = NULL)
And this is the output I am getting:
Now I do some kind of operation on the data and for category B, the values are updated (I'd call it Clean_Values and Clean_Count, for ease, I save them in another dataframe). For the rest of the categories, everything remains same. This way, I need to plot two lines for two columns just for Category B.
Any guidance please?
I did some work and came up with this solution, posting here so it might help someone else.
Creating sample new dataframe with Clean_Values and Clean_Count.
dat1 <- data.frame(created_at = dates,
category = rep(LETTERS[2:2], n/2),
Clean_Values = sample(200:400, n, replace=TRUE),
Clean_Count = sample(500:600, n, replace=TRUE))
)
Now to plot the data, I've added following two lines:
geom_line(data = dat1, aes(x = created_at, y = Clean_Values, colour = paste0(" Clean Values")))+
geom_line(data = dat1, aes(x = created_at, y = Clean_Count, colour = paste0(" Clean Count")))+
Here's the complete code:
ggplot(data = dat, aes(x = created_at)) +
geom_line(aes(y = With_Outlier, colour = paste0(category," With_Outliers")))+
geom_line(aes(y = count, colour = paste0(category," Value Counts")))+
facet_wrap(~ category, ncol = 1, scales = "free_x") +
geom_line(data = dat1, aes(x = created_at, y = Clean_Values, colour = paste0(" Clean Values")))+
geom_line(data = dat1, aes(x = created_at, y = Clean_Count, colour = paste0(" Clean Count")))+
labs(title = 'Data Visualization',
x = 'Month',
y = 'Count') +
theme_bw() +
scale_x_date(date_breaks = '1 month', date_labels = "%b-%y", expand = expansion(0,0))+
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
theme(text=element_text(size=13),panel.spacing.x=unit(0.6, "lines"),
panel.spacing.y=unit(1, "lines"), legend.position="bottom")+
scale_linetype_manual(name = NULL, values = 2)+ labs(color = NULL)
And below is the output with two new lines as desired also in the legend too:
In order to highlight the moving average in my ggplot visualization, I want to give it a different color (in this case grey or black for both MA lines). When it comes to to a graph representing two time series, I struggle to find the best solution. Maybe I need to take a different approach.
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(tidyquant))
V = 365
data <- data.frame (var1 = c(rnorm(V)),
var2 = c(rnorm(V)+12),
date = c(dates <- ymd("2013-01-01")+ days(0:364))
)
data_melted <- reshape2::melt(data, id.var='date')
data_melted %>%
ggplot() +
geom_line(mapping = aes(x= date, y=value, col=variable)) +
scale_color_manual(values=c("#CC6666", "steelblue")) +
geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, col=variable)) +
theme(axis.text.x = element_text(angle = 50, vjust = 0.5)) +
scale_x_date(date_breaks = "1 month")
I think you can get what you want by not mapping variable to color in aes() for the MA part. I instead include group = variable to tell ggplot2 that the two MA's should be separate series, but no difference in their color based on that.
data_melted %>%
ggplot() +
geom_line(mapping = aes(x= date, y=value, col=variable)) +
scale_color_manual(values=c("#CC6666", "steelblue")) +
tidyquant::geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, group = variable), color = "black") +
theme(axis.text.x = element_text(angle = 50, vjust = 0.5)) +
scale_x_date(date_breaks = "1 month")
If you want different colors, the natural way to do this in ggplot would be to give the moving averages their own values to be mapped to color.
...
scale_color_manual(values=c("#CC6666", "#996666", "steelblue", "slateblue")) +
tidyquant::geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, col=paste(variable, "MA"))) +
...
I would consider looking at the tsibble library for time series data.
library(tsibble)
data_melted <-as_tsibble(data_melted, key = 'variable', index = 'date')
data_melted <- data_melted %>%
mutate(
`5-MA` = slider::slide_dbl(value, mean,
.before = 2, .after = 2, .complete = TRUE)
)
data_melted %>%
filter(variable == "var1") %>%
autoplot(value) +
geom_line(aes(y = `5-MA`), colour = "#D55E00") +
labs(y = "y",
title = "title") +
guides(colour = guide_legend(title = "series"))
I have this code which creates the plot below
sec_axis_data <- mpg %>%
group_by(manufacturer) %>%
summarise(entries = n())
p <- ggplot(mpg, aes(x = manufacturer, fill = class == "compact")) +
geom_bar(position = "fill") +
scale_fill_manual(values = c('blue', 'red')) +
scale_y_continuous(sec.axis = sec_axis(~. * 50))
p
However, I'm not sure how to get the secondary axis data to display properly as a line across the plot? When, for example, I try:
p <- ggplot(mpg, aes(x = manufacturer, fill = class == "compact")) +
geom_bar(position = "fill") +
scale_fill_manual(values = c('blue', 'red')) +
scale_y_continuous(sec.axis = sec_axis(~. * 50)) +
geom_line(data = sec_axis_data, aes(x = manufacturer, y = entries))
p
... I get an error. I think the issue is linked to the different lengths of the data for mpg and sec_axis_data, but I'm not sure how to resolve this.
You were quite close of the solution.
You need to add inherit.aes = FALSE because of the fill argument not find in your second dataframe.
Also, to set the appropiate value, you need to divide your "entries" values by the same ratio you used for building the second axis in sec.axis function:
library(ggplot2)
ggplot(mpg, aes(x = manufacturer, fill = class == "compact")) +
geom_bar(position = "fill", alpha = 0.5) +
scale_fill_manual(values = c('blue', 'red')) +
scale_y_continuous(sec.axis = sec_axis(~. * 50, name = "Second axis")) +
geom_line(inherit.aes = FALSE, data = sec_axis_data,
aes(x = manufacturer, y = entries/50, group = 1), size = 2)
Does it answer your question ?
I have the following tibble format and i want to create a chart with two y-axis.
sample <- climate <- tibble(
Month = c("1/1/2019","2/1/2019","3/1/2019","4/1/2019","5/1/2019","6/1/2019","7/1/2019","8/1/2019","9/1/2019","10/1/2019","11/1/2019","12/1/2019","1/1/2020","2/1/2020","3/1/2020"),
Reactions = c(52111,37324,212695,152331,24973,10878,7413,8077,13066,50486,8087,12600,31625,25578,20069),
Ratio = c(1371,1866,6445,4914,925,363,218,245,335,1530,352,525,1506,1112,873)
)
Here's what i tried so far.
ggplot() +
geom_bar(mapping = aes(x = sample$Month, y = sample$Reactions), stat = 'identity') +
geom_line(mapping = aes(x = sample$Month , y = sample$Ratio), size = 2, color = "red") +
scale_y_continuous(name = "Reactions per Month", sec.axis = sec_axis(trans = ~./20, name = "Reactions/ post"))
Any help will be appreciated
you have to recode Month column as date, and multiply Ratio times 20 (since you devided second axis by 20):
library(lubridate)
sample$Month <- mdy(sample$Month)
ggplot() +
geom_bar(mapping = aes(x = sample$Month, y = sample$Reactions), stat = 'identity') +
geom_line(mapping = aes(x = sample$Month , y = sample$Ratio*20), size = 2, color = "red") +
scale_y_continuous(name = "Reactions per Month", sec.axis = sec_axis(trans = ~./20, name = "Reactions/ post"))
you can also improve your code with use of data variable inside ggplot()
ggplot(sample, aes(x = Month)) +
geom_bar(aes(y = Reactions), stat = 'identity') +
geom_line(aes(y = Ratio*20), size = 2, color = "red") +
scale_y_continuous(name = "Reactions per Month", sec.axis = sec_axis(trans = ~./20, name = "Reactions/ post"))
Plot:
I need to make a bar plot based on two variables (points and type) with fill.
Below is a minimal example, I would like to see the points ranking by guard points and ranking by points as guard or forward.
I tried ~reorder(names, -c(type, points)) but it doesn't work.
name <- c("James Harden","James Harden","Lebron James","Lebron James","Lebron James","Kawhi Leonerd","Kawhi Leonerd","Klay Thompson","Steph Curry","Kevin Durant","Kevin Durant","Chris Paul","Chris Paul")
team <- c("HOU","OKC","LAL","MIA","CLE","SAS","TOR","GSW","GSW","GSW","OKC","HOU","LAC")
points <- c(2000,12000,2000,10000,20000,7000,2000,14000,20000,6000,18000,4000,14000)
type <- c("G","G","F","G","F","G","G","G","G","F","F","G","G")
nba <- data.frame(name,team,points,type)
nba <- nba %>% arrange(desc(type))
ggplot(nba, aes(x = type, y = points, fill = team)) +
geom_bar(stat = 'identity', position = 'stack', color = 'black') +
facet_wrap(~reorder(name,-points), ncol = 1, strip.position = "top") +
coord_flip() + theme_minimal() +
labs(x = "players", y = "points", title = "Rank by points as Guard")
If it's ranked by points as guard, I would like to see Steph Curry ranks top, Chris Paul at second, James Harden and Klay tied at third, Lebron at fifth, Kawhi at sixth, and KD at the bottom.
If it's ranked by points as either guard or forward, I'd like to see Lebron at top, KD second, so on and so forth.
You can sort it for points as guard by adding a helper column. Look below;
library(ggplot2)
library(dplyr)
nba %>%
mutate(guardpoints = points * (type=="G")) %>%
ggplot(aes(x = type, y = points, fill = team)) +
geom_bar(stat = 'identity', position = 'stack', color = 'black') +
facet_wrap(~reorder(name, -guardpoints, sum), ncol = 1, strip.position = "top") +
coord_flip() + theme_minimal() +
labs(x = "players", y = "points", title = "Rank by points as Guard")
nba %>%
ggplot(aes(x = type, y = points, fill = team)) +
geom_bar(stat = 'identity', position = 'stack', color = 'black') +
facet_wrap(~reorder(name, -points, sum), ncol = 1, strip.position = "top") +
coord_flip() + theme_minimal() +
labs(x = "players", y = "points", title = "Rank by points")
Created on 2019-06-04 by the reprex package (v0.3.0)