Ggplot adds ticks marks centrally using this code:
tibble(year = 2010:2020, count = sample(1:100, 11)) %>%
mutate(year = paste0("01/01/", year)) %>%
mutate(year = dmy(year)) %>%
ggplot +
geom_bar(aes(year, count), stat = "identity", width = 240, position = position_dodge(width = 0.1)) +
scale_x_date(date_labels = "%y", date_breaks = "1 year") +
scale_y_continuous(limits = c(0,100), expand = c(0, 0)) +
theme_classic()
But I need tick marks either side of the year. I have photoshopped in the tick marks plot to show where I need them. What code can I use to add ticks marks either side of year?
You can draw the tick marks yourself with geom_segment(), turn off the clipping and hide the default ones.
library(ggplot2)
library(dplyr)
library(lubridate)
tbl <- tibble(year = 2010:2020, count = sample(1:100, 11)) %>%
mutate(year = paste0("01/01/", year)) %>%
mutate(year = dmy(year))
ggplot(tbl) +
geom_bar(aes(year, count), stat = "identity", width = 240, position = position_dodge(width = 0.1)) +
geom_segment(aes(x = year, xend = year), y = 0, yend = -1,
colour ="black", position = position_nudge(x = 240/2)) +
geom_segment(aes(x = year, xend = year), y = 0, yend = -1,
colour ="black", position = position_nudge(x = -240/2)) +
scale_x_date(date_labels = "%y", date_breaks = "1 year") +
scale_y_continuous(limits = c(0,100), expand = c(0, 0),
oob = scales::oob_keep) +
coord_cartesian(clip = "off") +
theme_classic() +
theme(axis.ticks = element_blank())
Created on 2021-04-10 by the reprex package (v1.0.0)
Related
I want to combine multiple ggplots into one plot with same x and y axis. This is my data. I have one Time column and 6 trend columns (A_Trnd, B_Trnd, C_Trnd etc). I have generated plot for Time vs A_Trnd.
library(ggplot2)
library(scales)
result <- read.csv("Downloads/Questions Trend - Questions Trend.csv")
result$Time_Formatted <- as.Date(result$Time_Formatted)
date_breaks <- as.Date(c("9/1/08", "5/12/14", "7/1/17", "2/2/19", "6/3/20"), "%m/%d/%y")
p1 <- ggplot(result, aes(result$Time_Formatted, result$A_Trnd)) +
geom_point(size = 0.1) + xlab("Month") + ylab("Temporal Trend") +
scale_x_date(breaks = date_breaks , date_labels = "%Y-%m", limits = c(as.Date("2008-08-01"), as.Date("2021-08-01"))) +
theme(axis.text.x = element_text(angle = 70, vjust = 0.9, hjust = 1))
p1 + geom_smooth(method = "loess", color = "red")
Now, I want to plot the same for Time vs B_Trnd, Time vs C_Trnd and have a combine plot like below.
How can I achieve this?
library(tidyverse)
library(scales)
result <-read.csv("Downloads/Questions Trend - Questions Trend.csv") %>%
mutate(Time = as.Date(Time, format = "%m/%d/%y")) %>%
pivot_longer(cols = -Time, names_to = "group", values_to = "value")
date_breaks <- as.Date(c("9/1/08", "5/12/14", "7/1/17", "2/2/19", "6/3/20"), "%m/%d/%y")
p1 <- ggplot(result, aes(Time, value)) +
geom_point(size = 0.1) +
labs(x = "Month", y = "Temporal Trend") +
scale_x_date(breaks = date_breaks , date_labels = "%Y-%m", limits = c(as.Date("2008-08-01"), as.Date("2021-08-01"))) +
theme(axis.text.x = element_text(angle = 70, vjust = 0.9, hjust = 1),
legend.position = "none") +
geom_smooth(method = "loess", aes(color = group)) +
facet_wrap(vars(group), nrow = 1)
p1
This post describes a method to create a two-line x-axis (year below months) on a time series plot. Unfortunately, the method that I use from this post (option 2) is not compatible with ggsave().
library(tidyverse)
library(lubridate)
df <- tibble(
date = as.Date(41000:42000, origin = "1899-12-30"),
value = c(rnorm(500, 5), rnorm(501, 10))
)
p <- ggplot(df, aes(date, value)) +
geom_line() +
geom_vline(
xintercept = as.numeric(df$date[yday(df$date) == 1]), color = "grey60"
) +
scale_x_date(date_labels = "%b", date_breaks = "month", expand = c(0, 0)) +
theme_bw() +
theme(panel.grid.minor.x = element_blank()) +
labs(x = "")
# Get the grob
g <- ggplotGrob(p)
# Get the y axis
index <- which(g$layout$name == "axis-b") # which grob
xaxis <- g$grobs[[index]]
# Get the ticks (labels and marks)
ticks <- xaxis$children[[2]]
# Get the labels
ticksB <- ticks$grobs[[2]]
# Edit x-axis label grob
# Find every index of Jun in the x-axis labels and a year label
junes <- grep("Jun", ticksB$children[[1]]$label)
ticksB$children[[1]]$label[junes] <-
paste0(
ticksB$children[[1]]$label[junes],
"\n ", # adjust the amount of spaces to center the year
unique(year(df$date))
)
# Center the month labels between ticks
ticksB$children[[1]]$label <-
paste0(
paste(rep(" ", 12), collapse = ""), # adjust the integer to center month
ticksB$children[[1]]$label
)
# Put the edited labels back into the plot
ticks$grobs[[2]] <- ticksB
xaxis$children[[2]] <- ticks
g$grobs[[index]] <- xaxis
# Draw the plot
grid.newpage()
grid.draw(g)
# Save the plot
ggsave("plot.png", width = 11, height = 8.5, units = "in")
A plot is saved, but without the years. How do I ggsave() the final plot from grid.draw(g)? This grid.draw(g) plot is shown below, but the actual plot.png file is slightly different, with the three years 2012, 2013 and 2014 omitted.
library(tidyverse)
library(lubridate)
library(scales)
set.seed(123)
df <- tibble(
date = as.Date(41000:42000, origin = "1899-12-30"),
value = c(rnorm(500, 5), rnorm(501, 10))
)
# create year column for facet
df <- df %>%
mutate(year = as.factor(year(date)))
p <- ggplot(df, aes(date, value)) +
geom_line() +
geom_vline(xintercept = as.numeric(df$date[yday(df$date) == 1]), color = "grey60") +
scale_x_date(date_labels = "%b",
breaks = pretty_breaks(),
expand = c(0, 0)) +
# switch the facet strip label to the bottom
facet_grid(.~ year, space = 'free_x', scales = 'free_x', switch = 'x') +
labs(x = "") +
theme_bw(base_size = 14, base_family = 'mono') +
theme(panel.grid.minor.x = element_blank()) +
# remove facet spacing on x-direction
theme(panel.spacing.x = unit(0,"line")) +
# switch the facet strip label to outside
# remove background color
theme(strip.placement = 'outside',
strip.background.x = element_blank())
p
ggsave("plot.png", plot = p,
type = "cairo",
width = 11, height = 8.5, units = "in",
dpi = 150)
Using theme_classic()
p <- ggplot(df, aes(date, value)) +
geom_line() +
geom_vline(xintercept = as.numeric(df$date[yday(df$date) == 1]), color = "grey60") +
scale_x_date(date_labels = "%b",
breaks = pretty_breaks(),
expand = c(0, 0)) +
# switch the facet strip label to the bottom
facet_grid(.~ year, space = 'free_x', scales = 'free_x', switch = 'x') +
labs(x = "") +
theme_classic(base_size = 14, base_family = 'mono') +
theme(panel.grid.minor.x = element_blank()) +
# remove facet spacing on x-direction
theme(panel.spacing.x = unit(0,"line")) +
# switch the facet strip label to outside
# remove background color
theme(strip.placement = 'outside',
strip.background.x = element_blank())
p
Add the top and right most borders
ymax <- ceiling(1.1 * max(df$value, na.rm = TRUE))
xmax <- max(df$date, na.rm = TRUE)
p <- ggplot(df, aes(date, value)) +
geom_line() +
geom_vline(xintercept = as.numeric(df$date[yday(df$date) == 1]), color = "grey60") +
scale_x_date(date_labels = "%b",
breaks = pretty_breaks(),
expand = c(0, 0)) +
# switch the facet strip label to the bottom
facet_grid(.~ year, space = 'free_x', scales = 'free_x', switch = 'x') +
labs(x = "") +
theme_classic(base_size = 14, base_family = 'mono') +
theme(panel.grid.minor.x = element_blank()) +
# remove facet spacing on x-direction
theme(panel.spacing.x = unit(0,"line")) +
# switch the facet strip label to outside
# remove background color
theme(strip.placement = 'outside',
strip.background.x = element_blank()) +
### add top and right most borders
scale_y_continuous(expand = c(0, 0), limits = c(0, ymax)) +
geom_hline(yintercept = ymax) +
geom_vline(xintercept = as.numeric(df$date[df$date == xmax])) +
theme(panel.grid.major = element_line())
p
Created on 2018-10-01 by the reprex package (v0.2.1.9000)
Taken from Tung comments above. Add the following at the end of the code chunk in the op's question.
ggsave(
"plot.png",
plot = g,
type = "cairo",
width = 11,
height = 8.5,
units = "in",
dpi = 150
)
I need to essentially do a graph of x-axis (the date) and y-axis (volume sold) and have each day in a calendar year be represented.
Issues - first is the white space between dates. I tried to use factor(Date), but ran into issues when I wanted additional changes to the graph. The other issue, is the x-axis is currently by month. This looks fine. However, when I try to do it by day, I get ...
In short, it looks like a mess. Probably because I'm trying to put every date in at once. Below is my code as is.
ggplot(MyDataCSV, aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 day", labels = date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5))
I'm a beginner to R, so I know this is beginner level, but I'm very confused. I have ggplot, tidyverse, dplyr, lubridate, and scales installed. Essentially, I want my labels on the x-axis to look like the first picture, except with every date in my data set (about a year)
One solution is to specify the dimensions of the saved figure, e.g.
# Load libraries
library(tidyverse)
library(lubridate)
# Generate a fake dataset (minimal reproducible example)
df <- data.frame(Date = seq.Date(from = ymd("2021-01-01"),
to = ymd("2021-12-31"),
by = "1 day"),
Volume = runif(365, 0, 4e+08))
# Plot the fake data
ggplot(df, aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 day", labels = scales::date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5))
# Save the plot
ggsave(filename = "example_1.png", width = 60, height = 10, limitsize = FALSE)
Then, if you zoom in, you can see the dates don't overlap:
Otherwise, you could change the date_breaks to "1 month", or "1 week" to stop the dates overlapping whilst keeping a 'normal' figure size:
# Plot the fake data
ggplot(df, aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 week", labels = scales::date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5))
Or, the option that I would probably recommend, you could create facets by e.g. every 3 months:
# Load libraries
library(tidyverse)
library(lubridate)
# Generate a fake dataset (minimal reproducible example)
df <- data.frame(Date = seq.Date(from = ymd("2021-01-01"),
to = ymd("2021-12-31"),
by = "1 day"),
Volume = runif(365, 0, 4e+08))
plot_labels <- c(
"1" = "First Quarter, 2021",
"2" = "Second Quarter, 2021",
"3" = "Third Quarter, 2021",
"4" = "Fourth Quarter, 2021"
)
# Plot the fake data
df %>%
mutate(quarter = cut.Date(Date, breaks = "quarter", labels = FALSE)) %>%
ggplot(., aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 day", labels = scales::date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5)) +
facet_wrap(~ quarter, ncol = 1, scales = "free_x",
labeller = labeller(quarter = plot_labels))
Here's some sample data for a company's Net revenue split by two cohorts:
data <- data.frame(dates = rep(seq(as.Date("2000/1/1"), by = "month", length.out = 48), each = 2),
revenue = rep(seq(10000, by = 1000, length.out = 48), each = 2) * rnorm(96, mean = 1, sd = 0.1),
cohort = c("Group 1", "Group 2"))
I can show one year's worth of data and it returns what I would expect:
start = "2000-01-01"
end = "2000-12-01"
ggplot(data, aes(fill = cohort, x = dates, y = revenue)) +
geom_bar(stat = "identity", position = position_dodge(width = NULL)) +
xlab("Month") +
ylab("Net Revenue") +
geom_text(aes(label = round(revenue, 0)), vjust = -0.5, size = 3, position = position_dodge(width = 25)) +
scale_x_date(date_breaks = "1 month", limits = as.Date(c(start, end))) +
ggtitle("Monthly Revenue by Group") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values=c("#00BFC4", "#F8766D"))
But if I expand the date range to two years or more and rerun the graph, it shows additional months on both sides of the x-axis despite not displaying any information on the y-axis.
start = "2000-01-01"
end = "2001-12-01"
#rerun the ggplot code from above
Note the non-existant data points for 1999-12-01 and 2002-01-01. Why do these appear and how can I remove them?
Many (all?) of the scale_* functions take expand= as an argument. It's common in R plots (both base and ggplot2) to expand the axes just a little bit (4% on each end, I believe), I think so that none of the lines/points are scrunched up against the "box" boundary.
If you include expand=c(0,0), you get what you want.
(BTW: you have mismatched parens. Fixed here.)
ggplot(data, aes(fill = cohort, x = dates, y = revenue)) +
geom_bar(stat = "identity", position = position_dodge(width = NULL)) +
xlab("Month") +
ylab("Net Revenue") +
geom_text(aes(label = round(revenue, 0)), vjust = -0.5, size = 3, position = position_dodge(width = 25)) +
scale_x_date(date_breaks = "1 month", limits = as.Date(c(start, end)), expand = c(0, 0)) +
ggtitle("Monthly Revenue by Group") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values=c("#00BFC4", "#F8766D"))
I am not sure what exactly the issue is but if you change from "Date" class on x-axis to any other it seems to work as expected. Also filtering the data for the specific range before passing it to ggplot.
For example in this case changing dates to month-year format,
library(dplyr)
library(ggplot2)
start = as.Date("2000-01-01")
end = as.Date("2001-12-01")
all_fac <- c(outer(month.abb, 2000:2001, paste, sep = "-"))
data %>%
filter(between(dates, start, end)) %>%
mutate(dates = factor(format(dates, "%b-%Y"),levels = all_fac)) %>%
ggplot() + aes(fill = cohort, x = dates, y = revenue) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Month") +
ylab("Net Revenue") +
geom_text(aes(label = round(revenue, 0))) +
ggtitle("Monthly Revenue by Group") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title =
element_text(hjust = 0.5)) +
scale_fill_manual(values=c("#00BFC4", "#F8766D"))
Please beautify/change the labels on the bars.
I'm plotting a stacked bar graph and use geom_text to insert the value and name of each stack. The problem is some stacks are very small/narrow, so that the text of two stacks overlap each other and hence is not very readable. How can I modify the code to solve this issue.
Type<-c("ddddddddddd","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd","eeeeeeeeeeeeee","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd",
"eeeeeeeeeeeeee","mmmmmmmmmmmmmmmmmmm","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","eeeeeeeeeeeeee")
Category<-c("mmmmm","mmmmm","gggggggggggggggggg","ffffffffffff","ffffffffffff","ffffffffffff","sanddddddddd","sanddddddddd","yyyyyyyyyyy",
"yyyyyyyyyyy","yyyyyyyyyyy","sssssssssssssss","sssssssssssssss","sssssssssssssss","ttttttttttttt")
Frequency<-c(4,1,30,7,127,11,1,1,6,9,1,200,3,4,5)
Data <- data.frame(Type, Category, Frequency)
p <- ggplot(Data, aes(x = Type, y = Frequency)) +
geom_bar(aes(fill = Category), stat="identity", show.legend = FALSE) +
geom_text(aes(label = Frequency), size = 3) +
geom_text(aes(label = Category), size = 3)
Considering your data, a facetted plot might be a better approach:
# summarise your data
library(dplyr)
d1 <- Data %>%
mutate_each(funs(substr(.,1,2)),Type,Category) %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq)) # create a label by pasting the 'Category' and the 'Freq' variables together
# plot
ggplot(d1, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(label = lbl), angle = 90, size = 5, hjust = -0.1, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14)
which gives:
In the above plot I shortened the labels on purpose. If you don't want to do that, you could consider this:
d2 <- Data %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq))
ggplot(d2, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(y = 5, label = lbl), alpha = 0.6, angle = 90, size = 5, hjust = 0, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
which gives: