If I would like to aggregate the data by month, an approach is the following:
library(dplyr)
library(lubridate)
set.seed(2017)
options(digits=4)
(expenses <- data_frame(
date=seq(as.Date("2016-01-01"), as.Date("2016-12-31"), by=1),
amount=rgamma(length(date), shape = 2, scale = 20)))
Then I summarized them by month like this:
expenses %>% group_by(month=floor_date(date, "month")) %>%
summarize(amount=sum(amount))
I would like to plot an histogram of the variable amount for each month. How could I do it?
Extract the month value from date and using facets you can show histogram for every month in separate plots.
library(dplyr)
library(ggplot2)
expenses %>%
arrange(date) %>%
mutate(month = format(date, '%b %Y'),
month = factor(month, unique(month))) %>%
ggplot() + aes(amount) +
geom_histogram(bins = 10) +
facet_wrap(~month)
Related
Similar to this question: Split up time series per year for plotting which has done in Python, I want to display the daily time series as multiple lines by year. How can I achieve this in R?
library(ggplot2)
library(dplyr)
# Dummy data
df <- data.frame(
day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000
)
# Most basic bubble plot
p <- ggplot(df, aes(x=day, y=value)) +
geom_line() +
xlab("")
p
Out:
One solution is using ggplot2, but date_labels are displayed incorrectly:
library(tidyverse)
library(lubridate)
p <- df %>%
# mutate(date = ymd(date)) %>%
mutate(date=as.Date(date)) %>%
mutate(
year = factor(year(date)), # use year to define separate curves
date = update(date, year = 1) # use a constant year for the x-axis
) %>%
ggplot(aes(date, value, color = year)) +
scale_x_date(date_breaks = "1 month", date_labels = "%b")
# Raw daily data
p + geom_line()
Out:
Alternative solution is to use gg_season from feasts package:
library(feasts)
library(tsibble)
library(dplyr)
tsibbledata::aus_retail %>%
filter(
State == "Victoria",
Industry == "Cafes, restaurants and catering services"
) %>%
gg_season(Turnover)
Out:
References:
Split up time series per year for plotting
R - How to create a seasonal plot - Different lines for years
If you want your x axis to represent the months from January to February, then perhaps getting the yday of the date and adding it to the first of January on a random year would be simplest:
library(tidyverse)
library(lubridate)
df <- data.frame(
day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000
)
df %>%
mutate(year = factor(year(day)), date = yday(day) + as.Date('2017-01-01')) %>%
ggplot(aes(date, value, color = year)) +
geom_line() +
scale_x_date(breaks = seq(as.Date('2017-01-01'), by = 'month', length = 12),
date_labels = '%b')
Created on 2023-02-07 with reprex v2.0.2
I tend to think simple is better:
transform(df, year = format(day, "%Y")) |>
ggplot(aes(x=day, y=value, group=year, color=year)) +
geom_line() +
xlab(NULL)
optionally removing the year legend with + guides(colour = "none").
I want to plot cumsum line for each year of a series, but the thing is each year have different observations. I have tried using Chart_series but it doesn't work.
My goal is to plotting lines as years in terms of cumulative evolution.
In the example I put only two years.
library(PerformanceAnalytics)
library(quantmod)
library(tidyverse)
library(tidyquant)
library(xts)
a<-dailyReturn(TSLA,subset='2020')
a2019<-dailyReturn(TSLA,subset='2019')
b<-cumsum(a)*100
b2019<-cumsum(a2019)*100
plot(b2019)
lines(b)
We could get a single dataset and then do a group by cumsum before plotting
library(dplyr)
library(tibble)
library(lubridate)
library(PerformanceAnalytics)
library(quantmod)
library(ggplot2)
getSymbols('TSLA')
dailyReturn(TSLA, subset = c('2019', '2020')) %>%
as.data.frame %>%
rownames_to_column('Date') %>%
mutate(Date = as.Date(Date)) %>%
group_by(Year = year(Date)) %>%
mutate(CumDaily.returns = cumsum(daily.returns) * 100) %>%
ggplot(aes(x = Date, y = CumDaily.returns, color = Year)) +
geom_line() +
theme_bw()
-output
Starting with the following dataset:
$ Orders,Year,Date
1608052.2,2019,2019-08-02
1385858.4,2018,2018-07-27
1223593.3,2019,2019-07-25
1200356.5,2018,2018-01-20
1198226.3,2019,2019-07-15
837866.1,2019,2019-07-02
Trying to make a similar format as:
with the criteria: X-axis will be days or months, y-axis will be sum of Orders, grouping / colors will be by year.
Attempts:
1) No overlay
dataset %>%
ggplot( aes(x=`Merge Date`, y=`$ Orders`, group=`Merge Date (Year)`, color=`Merge Date (Year)`)) +
geom_line()
2) ggplot month grouping
dataset %>%
mutate(Date = as.Date(`Date`) %>%
mutate(Year = format(Date,'%Y')) %>%
mutate(Month = format(Date,'%b')) -> dataset2
ggplot(data=dataset2, aes(x=Month, y=`$ Orders`, group=Year, color=factor(Year))) +
geom_line(size=.75) +
ylab("Volume")
The lubridate package is your answer. Extract month from the Date field and turn it into a variable. This code worked for me:
library(tidyverse)
library(lubridate)
dataset <- read_delim("OrderValue,Year,Date\n1608052.2,2019,2019-08-02\n1385858.4,2018,2018-07-27\n1223593.3,2019,2019-07-25\n1200356.5,2018,2018-01-20\n1198226.3,2019,2019-07-15\n837866.1,2019,2019-07-02", delim = ",")
dataset <- dataset %>%
mutate(theMonth = month(Date))
ggplot(dataset, aes(x = as.factor(theMonth), y = OrderValue, group = as.factor(Year), color = as.factor(Year))) +
geom_line()
I have a dataframe in R where:
Date MeanVal
2002-01 37.70722
2002-02 43.50683
2002-03 45.31268
2002-04 14.96000
2002-05 29.95932
2002-09 52.95333
2002-10 12.15917
2002-12 53.55144
2003-03 41.15083
2003-04 21.26365
2003-05 33.14714
2003-07 66.55667
.
.
2011-12 40.00518
And when I plot a time series using ggplot with:
ggplot(mean_data, aes(Date, MeanVal, group =1)) + geom_line()+xlab("")
+ ylab("Mean Value")
I am getting:
but as you can see, the x axis scale is not very neat at all. Is there any way I could just scale it by year (2002,2003,2004..2011)?
Let's use lubridate's parse_date_time() to convert your Date to a date class:
library(tidyverse)
library(lubridate)
mean_data %>%
mutate(Date = parse_date_time(as.character(Date), "Y-m")) %>%
ggplot(aes(Date, MeanVal)) +
geom_line()
Similarly, we can convert to an xts and use autoplot():
library(timetk)
mean_data %>%
mutate(Date = parse_date_time(as.character(Date), "Y-m")) %>%
tk_xts(silent = T) %>%
autoplot()
This achieves the plot above as well.
library(dplyr)
mean_data %>%
mutate(Date = as.integer(gsub('-.*', '', Date)) %>%
#use the mutate function in dplyr to remove the month and cast the
#remaining year value as an integer
ggplot(aes(Date, MeanVal, group = 1)) + geom_line() + xlab("")
+ ylab("Mean Value")
I am trying to plot my data as a stacked bar chart using the ggplot2 package. I want to:
get the dataframe's row names on the x axis;
sum up the values by month and show the split by each column as well;
order the values in decreasing order for every month.
My data:
neg.trans <- data.frame( Fraud = c(1.686069964, 2.95565648,
1.170119649,0.429596978),
DeviceDeposit= c( 0.86629,0.61366,0.97226,0.42835),
Usagefees= c(2.2937235,2.294725,2.587091,1.841178),
SecurityDeposit= c(1.616816492, 3.036161258,5.820125209, 2.62082681),
row.names=c("2018-Oct","2018-Nov","2018-Dec","2019-Jan"))
I'd like to generate a chart that looks like below:
Is this possible to do this with R?
Here is an improved handling of the dates and a more base R (well still using ggplot2...) solution:
library(tidyverse)
my.df <- neg.trans %>%
# Convert the row.names to a proper column so it can be the x-axis
rownames_to_column("Date") %>%
# Format the Date colum with parse_date, %Y is the symbol for year, %b for abbrev. months
mutate(Date = parse_date(Date, format = "%Y-%b")) %>%
# Transform the data from wide to long format
gather("type", "value", -Date)
ggplot(my.df, aes(Date, value, fill = type)) +
geom_col() +
scale_x_date(date_labels = "%Y-%b") # Take care of the correct date-labels
library(ggplot2)
# Convert the row.names to a proper column so it can be the x-axis
neg.trans$Date <- row.names(neg.trans)
# Columns which should be gathered into one
ids <- c("Fraud", "DeviceDeposit", "Usagefees", "SecurityDeposit")
# Transform the data from wide to long format
my.df <- reshape(neg.trans, idvar = "Date", varying = list(ids),
times = ids, v.names = "value", direction = "long")
row.names(my.df) <- NULL
# Add a day to each Date so we can transform it
my.df$Date <- paste0(my.df$Date, "-01")
# Format the Date colum with as.Date, %Y is for year, %b for abbrev. months, %d for day
my.df$Date <- as.Date(my.df$Date, format = "%Y-%b-%d")
ggplot(my.df, aes(Date, value, fill = time)) +
geom_col() +
scale_x_date(date_labels = "%Y-%b")
Descending odering
If you want to order your columns individually you can do the following (adapted from https://stackoverflow.com/a/53598064/5892059)
my.df <- my.df %>%
arrange(Date, type) %>%
mutate(type = factor(type)) %>%
arrange(Date, -value)
aux <- with(my.df, match(sort(unique(type)), type))
ggplot(my.df, aes(Date, value, fill = interaction(-value, Date))) +
geom_col() +
scale_fill_manual(values = scales::hue_pal()(4)[my.df$type],
labels = with(my.df, type[aux]),
breaks = with(my.df, interaction(-value, Date)[aux])) +
scale_x_date(date_labels = "%Y-%b")
In my opinion that looks confusing.
This? Hopefully someone suggests an edit. The way I've handled the date is really not the best.
library(tidyverse)
df<-neg.trans %>%
mutate(Date=row.names(.),Day=rep(1,nrow(.)),Date=paste(Date,Day,sep="-0"))
df<-df %>%
mutate(Date=as.factor(Date))
levels(df$Date)<-c("2018-Oct-01","2018-Nov-01","2018-Dec-01","2019-Jan-01")
df%>%
gather("ID","Value",-Date,-Day) %>%
select(-Day) %>%
ggplot(aes(Date,Value,fill=ID)) + geom_col()
NOTE:
Months<-sapply(strsplit(as.character(df$Date),"-"),"[[",2)
Months<-recode(Months,"Dec"=12,"Nov"=11,"Oct"=10,"Jan"=1)
df %>%
mutate(Months=Months,Date=str_remove_all(df$Date,"-.*"),
Date=make_date(Date,Months,Day),Date=as.factor(Date)) %>%
gather("ID","Value",-Date,-Day,-Months) %>%
arrange(Date) %>%
select(-Day,-Months) %>%
ggplot(aes(Date,Value,fill=ID)) + geom_col()