Starting with the following dataset:
$ Orders,Year,Date
1608052.2,2019,2019-08-02
1385858.4,2018,2018-07-27
1223593.3,2019,2019-07-25
1200356.5,2018,2018-01-20
1198226.3,2019,2019-07-15
837866.1,2019,2019-07-02
Trying to make a similar format as:
with the criteria: X-axis will be days or months, y-axis will be sum of Orders, grouping / colors will be by year.
Attempts:
1) No overlay
dataset %>%
ggplot( aes(x=`Merge Date`, y=`$ Orders`, group=`Merge Date (Year)`, color=`Merge Date (Year)`)) +
geom_line()
2) ggplot month grouping
dataset %>%
mutate(Date = as.Date(`Date`) %>%
mutate(Year = format(Date,'%Y')) %>%
mutate(Month = format(Date,'%b')) -> dataset2
ggplot(data=dataset2, aes(x=Month, y=`$ Orders`, group=Year, color=factor(Year))) +
geom_line(size=.75) +
ylab("Volume")
The lubridate package is your answer. Extract month from the Date field and turn it into a variable. This code worked for me:
library(tidyverse)
library(lubridate)
dataset <- read_delim("OrderValue,Year,Date\n1608052.2,2019,2019-08-02\n1385858.4,2018,2018-07-27\n1223593.3,2019,2019-07-25\n1200356.5,2018,2018-01-20\n1198226.3,2019,2019-07-15\n837866.1,2019,2019-07-02", delim = ",")
dataset <- dataset %>%
mutate(theMonth = month(Date))
ggplot(dataset, aes(x = as.factor(theMonth), y = OrderValue, group = as.factor(Year), color = as.factor(Year))) +
geom_line()
Related
Similar to this question: Split up time series per year for plotting which has done in Python, I want to display the daily time series as multiple lines by year. How can I achieve this in R?
library(ggplot2)
library(dplyr)
# Dummy data
df <- data.frame(
day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000
)
# Most basic bubble plot
p <- ggplot(df, aes(x=day, y=value)) +
geom_line() +
xlab("")
p
Out:
One solution is using ggplot2, but date_labels are displayed incorrectly:
library(tidyverse)
library(lubridate)
p <- df %>%
# mutate(date = ymd(date)) %>%
mutate(date=as.Date(date)) %>%
mutate(
year = factor(year(date)), # use year to define separate curves
date = update(date, year = 1) # use a constant year for the x-axis
) %>%
ggplot(aes(date, value, color = year)) +
scale_x_date(date_breaks = "1 month", date_labels = "%b")
# Raw daily data
p + geom_line()
Out:
Alternative solution is to use gg_season from feasts package:
library(feasts)
library(tsibble)
library(dplyr)
tsibbledata::aus_retail %>%
filter(
State == "Victoria",
Industry == "Cafes, restaurants and catering services"
) %>%
gg_season(Turnover)
Out:
References:
Split up time series per year for plotting
R - How to create a seasonal plot - Different lines for years
If you want your x axis to represent the months from January to February, then perhaps getting the yday of the date and adding it to the first of January on a random year would be simplest:
library(tidyverse)
library(lubridate)
df <- data.frame(
day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000
)
df %>%
mutate(year = factor(year(day)), date = yday(day) + as.Date('2017-01-01')) %>%
ggplot(aes(date, value, color = year)) +
geom_line() +
scale_x_date(breaks = seq(as.Date('2017-01-01'), by = 'month', length = 12),
date_labels = '%b')
Created on 2023-02-07 with reprex v2.0.2
I tend to think simple is better:
transform(df, year = format(day, "%Y")) |>
ggplot(aes(x=day, y=value, group=year, color=year)) +
geom_line() +
xlab(NULL)
optionally removing the year legend with + guides(colour = "none").
If I would like to aggregate the data by month, an approach is the following:
library(dplyr)
library(lubridate)
set.seed(2017)
options(digits=4)
(expenses <- data_frame(
date=seq(as.Date("2016-01-01"), as.Date("2016-12-31"), by=1),
amount=rgamma(length(date), shape = 2, scale = 20)))
Then I summarized them by month like this:
expenses %>% group_by(month=floor_date(date, "month")) %>%
summarize(amount=sum(amount))
I would like to plot an histogram of the variable amount for each month. How could I do it?
Extract the month value from date and using facets you can show histogram for every month in separate plots.
library(dplyr)
library(ggplot2)
expenses %>%
arrange(date) %>%
mutate(month = format(date, '%b %Y'),
month = factor(month, unique(month))) %>%
ggplot() + aes(amount) +
geom_histogram(bins = 10) +
facet_wrap(~month)
I am trying to do a faceted plot of a grouped dataframe with ggplot2, using geom_line(). My dataframe has a Date column and I would like to have dates on the horizontal axis. If I just use Date in aes(x=Date, ...) I get nice labels on the horizontal axis. However, the line has an almost horizontal section where the date jumps from the end of one group to the beginning of the next group. This code and chart shows that:
dts <- seq.Date(as.Date("2020-01-01"), as.Date("2021-12-31"), by="day")
mos <- sapply(dts, month)
df <- data.frame(Date=dts, Month=mos)
nr <- nrow(df)
df$X <- rep(1, nr)
df %>%
group_by(Month) -> dfgrp
dfgrp %>%
group_by(Month) %>%
mutate(Time = Date[1:n()],
Z = cumsum(X)) %>%
ggplot(aes(x=Date, y=Z)) +
geom_line(color="darkgreen", size=0.5) +
facet_grid(. ~ Month, scale="free_x") +
theme(axis.text.x = element_text(angle=45, size=7))
I would not like my chart to have those almost-horizontal lines when the date changes by a large amount. I was able to generate a chart without those lines using integers on aes() as follows:
dfgrp %>%
mutate(Time = 1:n() %>% as.integer(),
Z = cumsum(X)) %>%
ggplot(aes(x=Time, y=Z)) +
geom_line(color="darkgreen", size=0.5) +
facet_grid(. ~ Month, scale="free_x") +
scale_x_continuous(breaks = seq(from=1, to=nr, by=10) %>% as.integer(),
labels = function(x) as.character(dfgrp$Date[x])) +
theme(axis.text.x = element_text(angle=45, size=7))
The line on the chart looks like I want it but the dates on the horizontal axis are not correct: they end in February 2020 in every facet while the dates in the dataframe end in December 2021 and the dates in the first chart begin and end on different months in different facets.
I tried many things but nothing worked. Any suggestions on how to have a chart with dates like in the first chart above and lines like in the second chart above?
Help will be much appreciated.
You may want to adjust the dates to be in the same year, but noting the original year as a variable:
library(lubridate)
dfgrp %>%
group_by(Month) %>%
mutate(year = year(Date),
adj_date = ymd(paste(2020, month(Date), day(Date)))) %>%
# 2020 was leap year so 2/29 won't be lost
mutate(Time = Date[1:n()],
Z = cumsum(X)) %>%
ggplot(aes(x=adj_date, y=Z, color = year, group = year)) +
geom_line(size=0.5) +
facet_grid(. ~ Month, scale="free_x") +
theme(axis.text.x = element_text(angle=45, size=7))
I have a dataframe in R where:
Date MeanVal
2002-01 37.70722
2002-02 43.50683
2002-03 45.31268
2002-04 14.96000
2002-05 29.95932
2002-09 52.95333
2002-10 12.15917
2002-12 53.55144
2003-03 41.15083
2003-04 21.26365
2003-05 33.14714
2003-07 66.55667
.
.
2011-12 40.00518
And when I plot a time series using ggplot with:
ggplot(mean_data, aes(Date, MeanVal, group =1)) + geom_line()+xlab("")
+ ylab("Mean Value")
I am getting:
but as you can see, the x axis scale is not very neat at all. Is there any way I could just scale it by year (2002,2003,2004..2011)?
Let's use lubridate's parse_date_time() to convert your Date to a date class:
library(tidyverse)
library(lubridate)
mean_data %>%
mutate(Date = parse_date_time(as.character(Date), "Y-m")) %>%
ggplot(aes(Date, MeanVal)) +
geom_line()
Similarly, we can convert to an xts and use autoplot():
library(timetk)
mean_data %>%
mutate(Date = parse_date_time(as.character(Date), "Y-m")) %>%
tk_xts(silent = T) %>%
autoplot()
This achieves the plot above as well.
library(dplyr)
mean_data %>%
mutate(Date = as.integer(gsub('-.*', '', Date)) %>%
#use the mutate function in dplyr to remove the month and cast the
#remaining year value as an integer
ggplot(aes(Date, MeanVal, group = 1)) + geom_line() + xlab("")
+ ylab("Mean Value")
I am trying to plot my data as a stacked bar chart using the ggplot2 package. I want to:
get the dataframe's row names on the x axis;
sum up the values by month and show the split by each column as well;
order the values in decreasing order for every month.
My data:
neg.trans <- data.frame( Fraud = c(1.686069964, 2.95565648,
1.170119649,0.429596978),
DeviceDeposit= c( 0.86629,0.61366,0.97226,0.42835),
Usagefees= c(2.2937235,2.294725,2.587091,1.841178),
SecurityDeposit= c(1.616816492, 3.036161258,5.820125209, 2.62082681),
row.names=c("2018-Oct","2018-Nov","2018-Dec","2019-Jan"))
I'd like to generate a chart that looks like below:
Is this possible to do this with R?
Here is an improved handling of the dates and a more base R (well still using ggplot2...) solution:
library(tidyverse)
my.df <- neg.trans %>%
# Convert the row.names to a proper column so it can be the x-axis
rownames_to_column("Date") %>%
# Format the Date colum with parse_date, %Y is the symbol for year, %b for abbrev. months
mutate(Date = parse_date(Date, format = "%Y-%b")) %>%
# Transform the data from wide to long format
gather("type", "value", -Date)
ggplot(my.df, aes(Date, value, fill = type)) +
geom_col() +
scale_x_date(date_labels = "%Y-%b") # Take care of the correct date-labels
library(ggplot2)
# Convert the row.names to a proper column so it can be the x-axis
neg.trans$Date <- row.names(neg.trans)
# Columns which should be gathered into one
ids <- c("Fraud", "DeviceDeposit", "Usagefees", "SecurityDeposit")
# Transform the data from wide to long format
my.df <- reshape(neg.trans, idvar = "Date", varying = list(ids),
times = ids, v.names = "value", direction = "long")
row.names(my.df) <- NULL
# Add a day to each Date so we can transform it
my.df$Date <- paste0(my.df$Date, "-01")
# Format the Date colum with as.Date, %Y is for year, %b for abbrev. months, %d for day
my.df$Date <- as.Date(my.df$Date, format = "%Y-%b-%d")
ggplot(my.df, aes(Date, value, fill = time)) +
geom_col() +
scale_x_date(date_labels = "%Y-%b")
Descending odering
If you want to order your columns individually you can do the following (adapted from https://stackoverflow.com/a/53598064/5892059)
my.df <- my.df %>%
arrange(Date, type) %>%
mutate(type = factor(type)) %>%
arrange(Date, -value)
aux <- with(my.df, match(sort(unique(type)), type))
ggplot(my.df, aes(Date, value, fill = interaction(-value, Date))) +
geom_col() +
scale_fill_manual(values = scales::hue_pal()(4)[my.df$type],
labels = with(my.df, type[aux]),
breaks = with(my.df, interaction(-value, Date)[aux])) +
scale_x_date(date_labels = "%Y-%b")
In my opinion that looks confusing.
This? Hopefully someone suggests an edit. The way I've handled the date is really not the best.
library(tidyverse)
df<-neg.trans %>%
mutate(Date=row.names(.),Day=rep(1,nrow(.)),Date=paste(Date,Day,sep="-0"))
df<-df %>%
mutate(Date=as.factor(Date))
levels(df$Date)<-c("2018-Oct-01","2018-Nov-01","2018-Dec-01","2019-Jan-01")
df%>%
gather("ID","Value",-Date,-Day) %>%
select(-Day) %>%
ggplot(aes(Date,Value,fill=ID)) + geom_col()
NOTE:
Months<-sapply(strsplit(as.character(df$Date),"-"),"[[",2)
Months<-recode(Months,"Dec"=12,"Nov"=11,"Oct"=10,"Jan"=1)
df %>%
mutate(Months=Months,Date=str_remove_all(df$Date,"-.*"),
Date=make_date(Date,Months,Day),Date=as.factor(Date)) %>%
gather("ID","Value",-Date,-Day,-Months) %>%
arrange(Date) %>%
select(-Day,-Months) %>%
ggplot(aes(Date,Value,fill=ID)) + geom_col()