I need to essentially do a graph of x-axis (the date) and y-axis (volume sold) and have each day in a calendar year be represented.
Issues - first is the white space between dates. I tried to use factor(Date), but ran into issues when I wanted additional changes to the graph. The other issue, is the x-axis is currently by month. This looks fine. However, when I try to do it by day, I get ...
In short, it looks like a mess. Probably because I'm trying to put every date in at once. Below is my code as is.
ggplot(MyDataCSV, aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 day", labels = date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5))
I'm a beginner to R, so I know this is beginner level, but I'm very confused. I have ggplot, tidyverse, dplyr, lubridate, and scales installed. Essentially, I want my labels on the x-axis to look like the first picture, except with every date in my data set (about a year)
One solution is to specify the dimensions of the saved figure, e.g.
# Load libraries
library(tidyverse)
library(lubridate)
# Generate a fake dataset (minimal reproducible example)
df <- data.frame(Date = seq.Date(from = ymd("2021-01-01"),
to = ymd("2021-12-31"),
by = "1 day"),
Volume = runif(365, 0, 4e+08))
# Plot the fake data
ggplot(df, aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 day", labels = scales::date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5))
# Save the plot
ggsave(filename = "example_1.png", width = 60, height = 10, limitsize = FALSE)
Then, if you zoom in, you can see the dates don't overlap:
Otherwise, you could change the date_breaks to "1 month", or "1 week" to stop the dates overlapping whilst keeping a 'normal' figure size:
# Plot the fake data
ggplot(df, aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 week", labels = scales::date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5))
Or, the option that I would probably recommend, you could create facets by e.g. every 3 months:
# Load libraries
library(tidyverse)
library(lubridate)
# Generate a fake dataset (minimal reproducible example)
df <- data.frame(Date = seq.Date(from = ymd("2021-01-01"),
to = ymd("2021-12-31"),
by = "1 day"),
Volume = runif(365, 0, 4e+08))
plot_labels <- c(
"1" = "First Quarter, 2021",
"2" = "Second Quarter, 2021",
"3" = "Third Quarter, 2021",
"4" = "Fourth Quarter, 2021"
)
# Plot the fake data
df %>%
mutate(quarter = cut.Date(Date, breaks = "quarter", labels = FALSE)) %>%
ggplot(., aes(x = Date, y = Volume)) +
geom_col(stat = "identity",
width = 0.9,
fill = "coral",
alpha = 0.5,
colour = "black",
position = "dodge") +
scale_x_date(date_breaks = "1 day", labels = scales::date_format("%m/%d")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Volume Sold by Date") +
theme(plot.title = element_text(hjust = 0.5)) +
facet_wrap(~ quarter, ncol = 1, scales = "free_x",
labeller = labeller(quarter = plot_labels))
Related
Ggplot adds ticks marks centrally using this code:
tibble(year = 2010:2020, count = sample(1:100, 11)) %>%
mutate(year = paste0("01/01/", year)) %>%
mutate(year = dmy(year)) %>%
ggplot +
geom_bar(aes(year, count), stat = "identity", width = 240, position = position_dodge(width = 0.1)) +
scale_x_date(date_labels = "%y", date_breaks = "1 year") +
scale_y_continuous(limits = c(0,100), expand = c(0, 0)) +
theme_classic()
But I need tick marks either side of the year. I have photoshopped in the tick marks plot to show where I need them. What code can I use to add ticks marks either side of year?
You can draw the tick marks yourself with geom_segment(), turn off the clipping and hide the default ones.
library(ggplot2)
library(dplyr)
library(lubridate)
tbl <- tibble(year = 2010:2020, count = sample(1:100, 11)) %>%
mutate(year = paste0("01/01/", year)) %>%
mutate(year = dmy(year))
ggplot(tbl) +
geom_bar(aes(year, count), stat = "identity", width = 240, position = position_dodge(width = 0.1)) +
geom_segment(aes(x = year, xend = year), y = 0, yend = -1,
colour ="black", position = position_nudge(x = 240/2)) +
geom_segment(aes(x = year, xend = year), y = 0, yend = -1,
colour ="black", position = position_nudge(x = -240/2)) +
scale_x_date(date_labels = "%y", date_breaks = "1 year") +
scale_y_continuous(limits = c(0,100), expand = c(0, 0),
oob = scales::oob_keep) +
coord_cartesian(clip = "off") +
theme_classic() +
theme(axis.ticks = element_blank())
Created on 2021-04-10 by the reprex package (v1.0.0)
I'm trying to find out why I'm not able to change the color of my bars. Hope you can help, I'm kinda new to this.
ggplot2(data, aes(x = data$Crop, y = data$"2018"))+
geom_bar(color="black", fill="red") +
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) +
main="Production value per crop in 2018" +
ylab("Production value in 2018")+
xlab("Crop")+
Hope to hear from you.
A little tweak of your code:
library(tidyverse)
data <- data.frame(Crop = c("East","West","North","South"),
Y2018 = c(1000,2000,3000,400),
stringsAsFactors = TRUE)
ggplot(data, aes(x = data$Crop, y = data$Y2018)) +
geom_col(color="black", fill="red") +
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) +
labs(
title = "Production value per crop in 2018",
ylab = "Production value in 2018",
xlab = "Crop"
)
Hope it helps
This is the code, which worked, though xlab gave an error referring to unexpected symbol
setwd("C:/Users/####/OneDrive/Documenten") # include the path to your data
data<-read_xlsx("datasetR.xlsx")
str(data) # get an overview of the data
library(ggplot2)
library(tidyverse)
dCrop <- data$Crop
d2018 <- data$"2018"
ggplot(data, aes(x = dCrop, y = d2018)) +
geom_col(width=1, fill = "red") +
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) +
ggtitle("Production value per crop in 2018")+
ylab("Production value in 2018") +
xlab("Crop")
enter image description here
I generate a plot with code below:
ggplot(reshaped_median, aes(x= Month_Yr, y = value))+
geom_line(aes(color = Sentiments)) +
geom_point(aes(color = Sentiments)) +
labs(title = 'Change in Sentiments (in median)', x = 'Month_Yr', y = 'Proportion of Sentiments %') +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
But as you can notice the dates labels in x-axis are too dense, so if I want to it displays date quarterly or on the half year (every 3 or 6 months).
The values from Month_Yr are with format %Y-%m.
How could I do that? Thanks.
First convert date by: df$Month_Yr <- as.Date(as.yearmon(df$Month_Yr))
Then use this can solve the issue:
ggplot(reshaped_median, aes(x= Month_Yr, y = value))+
geom_line(aes(color = Sentiments)) +
geom_point(aes(color = Sentiments)) +
#Here you set date_breaks ="6 month" or what you wish
scale_x_date(date_labels="%b-%d",date_breaks ="3 month")+
labs(title = 'Change in Sentiments (in median)', x = 'Month_Yr', y = 'Proportion of Sentiments %') +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Here's another way. With scale_x_date you can manipulate the breaks on your x-axis easily.
library(ggplot2)
library(tibble)
data <- tibble(
Month_Yr = seq.Date(from = as.Date("2010/01/01"), to = as.Date("2020/01/31"), by = "month"),
Value = runif(121, min = 0, max = 150)
)
p <- ggplot(data = data, aes(x = Month_Yr, y = Value)) +
geom_point() +
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
scale_x_date(date_breaks = "6 months")
p
Here's some sample data for a company's Net revenue split by two cohorts:
data <- data.frame(dates = rep(seq(as.Date("2000/1/1"), by = "month", length.out = 48), each = 2),
revenue = rep(seq(10000, by = 1000, length.out = 48), each = 2) * rnorm(96, mean = 1, sd = 0.1),
cohort = c("Group 1", "Group 2"))
I can show one year's worth of data and it returns what I would expect:
start = "2000-01-01"
end = "2000-12-01"
ggplot(data, aes(fill = cohort, x = dates, y = revenue)) +
geom_bar(stat = "identity", position = position_dodge(width = NULL)) +
xlab("Month") +
ylab("Net Revenue") +
geom_text(aes(label = round(revenue, 0)), vjust = -0.5, size = 3, position = position_dodge(width = 25)) +
scale_x_date(date_breaks = "1 month", limits = as.Date(c(start, end))) +
ggtitle("Monthly Revenue by Group") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values=c("#00BFC4", "#F8766D"))
But if I expand the date range to two years or more and rerun the graph, it shows additional months on both sides of the x-axis despite not displaying any information on the y-axis.
start = "2000-01-01"
end = "2001-12-01"
#rerun the ggplot code from above
Note the non-existant data points for 1999-12-01 and 2002-01-01. Why do these appear and how can I remove them?
Many (all?) of the scale_* functions take expand= as an argument. It's common in R plots (both base and ggplot2) to expand the axes just a little bit (4% on each end, I believe), I think so that none of the lines/points are scrunched up against the "box" boundary.
If you include expand=c(0,0), you get what you want.
(BTW: you have mismatched parens. Fixed here.)
ggplot(data, aes(fill = cohort, x = dates, y = revenue)) +
geom_bar(stat = "identity", position = position_dodge(width = NULL)) +
xlab("Month") +
ylab("Net Revenue") +
geom_text(aes(label = round(revenue, 0)), vjust = -0.5, size = 3, position = position_dodge(width = 25)) +
scale_x_date(date_breaks = "1 month", limits = as.Date(c(start, end)), expand = c(0, 0)) +
ggtitle("Monthly Revenue by Group") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values=c("#00BFC4", "#F8766D"))
I am not sure what exactly the issue is but if you change from "Date" class on x-axis to any other it seems to work as expected. Also filtering the data for the specific range before passing it to ggplot.
For example in this case changing dates to month-year format,
library(dplyr)
library(ggplot2)
start = as.Date("2000-01-01")
end = as.Date("2001-12-01")
all_fac <- c(outer(month.abb, 2000:2001, paste, sep = "-"))
data %>%
filter(between(dates, start, end)) %>%
mutate(dates = factor(format(dates, "%b-%Y"),levels = all_fac)) %>%
ggplot() + aes(fill = cohort, x = dates, y = revenue) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Month") +
ylab("Net Revenue") +
geom_text(aes(label = round(revenue, 0))) +
ggtitle("Monthly Revenue by Group") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title =
element_text(hjust = 0.5)) +
scale_fill_manual(values=c("#00BFC4", "#F8766D"))
Please beautify/change the labels on the bars.
I have the following plot but I want to add additional labels on the x axis.
I've already tried scale_x_continuous but it doesn't work since my values are not numeric values, but dates.
how can I solve this?
If by "more x values" you mean that you would like to have more labels on your x-axis, then you can adjust the frequency using the scale_x_dates argument like so:
scale_x_date(date_breaks = "1 month", date_labels = "%b-%y")
Here is my working example. Please post your own if I misunderstood your question:
library("ggplot2")
# make the results reproducible
set.seed(5117)
start_date <- as.Date("2015-01-01")
end_date <- as.Date("2017-06-10")
# the by=7 makes it one observation per week (adjust as needed)
dates <- seq(from = start_date, to = end_date, by = 7)
val1 <- rnorm(length(dates), mean = 12.5, sd = 3)
qnt <- quantile(val1, c(.05, .25, .75, .95))
mock <- data.frame(myDate = dates, val1)
ggplot(data = mock, mapping = aes(x = myDate, y = val1)) +
geom_line() +
geom_point() +
geom_hline(yintercept = qnt[1], colour = "red") +
geom_hline(yintercept = qnt[4], colour = "red") +
geom_hline(yintercept = qnt[2], colour = "lightgreen") +
geom_hline(yintercept = qnt[3], colour = "lightgreen") +
theme_classic() +
scale_x_date(date_breaks = "1 month", date_labels = "%b-%y") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))