I have been trying to plot time series data in R. I consulted several different sources online and somehow I am still having problems creating this plots. I have simulated some data below that represents daily information a fictitious company received from 2014 to 2016:
#create data
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,100,10)
car_damages_in_dollars <- rnorm(731,105,8)
other_damages_in_dollars <- rnorm(731,104,9)
location <- c("canada","usa")
location <- sample(location, 731, replace=TRUE, prob=c(0.3, 0.7))
type_of_house <- c("single","townhome", "rental" )
type_of_house<- sample(type_of_house , 731, replace=TRUE, prob=c(0.5, 0.3, 0.2))
response_variable <- c("claim_approved","claim_rejected")
response_variable<- sample(response_variable, 731, replace=TRUE, prob=c(0.4, 0.6))
final_dataset <- cbind(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars, location, type_of_house, response_variable)
final_dataset <- as.data.frame(final_dataset)
final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)
final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)
final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)
prop_damage <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars))
car_damage <-subset(final_dataset, select = c(date_decision_made, car_damages_in_dollars))
other_damage <-subset(final_dataset, select = c(date_decision_made, other_damages_in_dollars))
new <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars))
Based on this data, I have tried to plot this data as a time series in R. I tried several methods and all of them are producing errors. I have tried to resolve these problems but I just can't seem to figure them out. Could someone please help me?
#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)
ggplot() + geom_line(data = prop_damage, aes(x = date_decision_made, y = property_damages_in_dollars, group = 1), color = "red")
+ scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+
geom_line(data = car_damage, aes(x = date_decision_made, y = car_damages_in_dollars, group =1 ), color = "blue")
+ geom_line(data = other_damage, aes(x = date_decision_made, y = other_damages_in_dollars, group =1), color = "green")
+ xlab('data_date') + ylab('percent.change')
#second way (error)
ggplot(data = new, aes(x = date_decision_made)) +
geom_line(aes(y = property_damages_in_dollars, colour = "property_damages_in_dollars")) +
geom_line(aes(y = car_damages_in_dollars, colour = "car_damages_in_dollars")) +
geom_line(aes(y =other_damages_in_dollars, colour = "other_damages_in_dollars")) +
scale_colour_manual("",
breaks = c("property_damages_in_dollars", "car_damages_in_dollars", "other_damages_in_dollars"),
values = c("red", "green", "blue")) +
xlab(" ") +
scale_y_continuous("Dollars", limits = c(0,10000)) +
labs(title="demo graph")
#3rd way error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
ggplot(dd) + geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1)) + scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+ scale_colour_manual(values=c("red","green","blue"))
#4th error
mymts = ts(new,
frequency = 1,
start = c(2014, 1))
autoplot(mymts) +
ggtitle("Time Series Plot") +
theme(plot.title = element_text(hjust = 0.5))
#5th Method error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
ts.plot(x, y, gpars = list(col = c("black", "red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))
#6th method error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
qplot(date_decision_made,value,data=dd,geom='line',color=variable)
#7th way error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
comb_ts <- cbind(x1, x2)
plot.ts(comb_ts, plot.type = "single")
Could someone please show me what I am doing wrong in these codes? Thanks
Try this approach and be careful with the dates. There are plenty of dates because of years:
#Data
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
dd$date_decision_made <- as.Date(as.character(dd$date_decision_made),'%Y/%m/%d')
#Plot
ggplot(dd) +
geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1))+
scale_x_date(date_breaks = "months" ,breaks = '12 months', date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle=90))
Output:
Another option is to use pivot_longer from tidyr
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
pivot_longer(cols = -date_decision_made) %>%
mutate(date_decision_made = ymd(date_decision_made)) %>%
ggplot() +
geom_line(aes(x = date_decision_made, y=value, colour = name, group = 1)) +
scale_x_date(date_breaks = "months", breaks = '12 months',
date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle = 90))
Related
I have the following data:
library(ggplot2)
library(gganimate)
library(tidyverse)
createData<- function(vintage, id){#create data
# Generate a sequence of dates from 2010-01-01 to 2025-12-31 with a quarterly frequency
Dates <- seq(from = as.Date("2010-01-01"), to = as.Date("2025-12-31"), by = "quarter")
RLG<- cumsum(sample(c(-1, 1), 64, TRUE))
df<- data.frame( Dates,RLG, vintage,id)
return(df)
}
#createData
df<- createData("2018-01-01",1) %>%
rbind(createData("2019-01-01",2))%>%
rbind(createData("2020-01-01",3)) %>%
rbind(createData("2021-01-01",4))%>%
rbind(createData("2022-01-01",5))%>%
rbind(createData("2023-01-01",6))%>%
rbind(createData("2024-01-01",7))%>%
rbind(createData("2025-01-01",8))
Which I use to make the following chart:
options(gganimate.nframes = 8*length(unique(df$vintage)), gganimate.res = 30)
p<- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = "RLG") +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none")+
transition_time(id)+
exit_fade(alpha = 0.5)+
shadow_mark(alpha = 0.2)
animate(p, end_pause = 30)
I would like to add a geom_rect which goes from vintage to max(Dates). At each frame, vintage will increase, so the geom_rect will shrink slightly. How can I do this without interfering with the shadow_mark and exit_fades which I am applying to the lines?
If you mean something like a progress bar you could do it like so:
create an DF for the geom which is a subset of the original
df_geom <- df |>
mutate(vintage = as.Date(vintage)) |>
group_by(id) |>
slice(n())
Use geom_segment with the DF from above.
If you want to leave shadow_mark in you can do shadow_mark(exclude_layer = 2).
p <- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = RLG) +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none") +
geom_segment(
data = df_geom,
mapping = aes(x=vintage, xend=Dates,
y = 18, yend = 18),
size = 10, alpha =.4, color ='lightblue'
) +
transition_time(id)+
exit_fade(alpha = 0.5)
# shadow_mark(alpha = 0.2)
animate(p)
I am trying annotate individual plots of a facet plot. I have set the order to 2008, 1999 using factor levels.
But when I add the geom_text to the ggplot, the order of the plots change. See examples below. What am I doing wrong? How can I solve this?
library(tidyverse)
df <- mpg %>% mutate(year = factor(year, levels = c(2008,1999)))
anno <- data.frame(xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1","text2"),
year = c("2008","1999"))
df %>% ggplot(aes(class, displ)) +
geom_col(aes(fill=drv)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab))
Convert the year column in your annotation dataframe also to a factor with the same levels and order as in your main df:
library(ggplot2)
df <- mpg
df$year = factor(df$year, levels = c(2008, 1999))
anno <- data.frame(
xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1", "text2"),
year = factor(c("2008", "1999"), levels = c(2008, 1999))
)
ggplot(df, aes(class, displ)) +
geom_col(aes(fill = drv)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
I am trying to plot some line graphs from a dataset but with varied y-axis values that are set in the list var. For some reason, all the plots seem to display an empty plot but when I have the same code without the for loop it seems to work. So my question is can I possibly use a for loop to plot multiple plots at once instead of individually plotting them. Am I missing something while plotting this.
var = list("CAH", 'CTLT',"CI","DVA","HSIC","HOLX","HUM","IDXX","INCY","MRK","REGN","VTRS")
for(v in var){
p<-closing_price %>%
mutate(date = as.Date(Date,"%m/%d/%y")) %>%
ggplot(aes(x = date, y = v, group=1)) +
geom_line() +
labs(x = "Date", y = "Stock Price ($)", title = glue("{v} price over time")) +
scale_x_date(date_minor_breaks = "2 day")
print(p)
}
This is the outcome I am looking for:
closing_price %>%
mutate(date = as.Date(Date,"%m/%d/%y")) %>%
select(date,CAH) %>%
ggplot(aes(x = date, y = CAH, group=1)) +
geom_line() +
labs(x = "Date", y = "Stock Price ($)", title = "CAH price over time") +
scale_x_date(date_minor_breaks = "2 day")
As noted by stefan, ggplot thinks you're trying to plot the character which is why you're getting "CAH" on the y-axis. You can refer to the variable with .data[[v]].
var = list("CAH", 'CTLT',"CI","DVA","HSIC","HOLX","HUM","IDXX","INCY","MRK","REGN","VTRS")
for(v in var){
p<-closing_price %>%
mutate(date = as.Date(Date,"%m/%d/%y")) %>%
ggplot(aes(x = date, y = .data[[v]], group=1)) +
geom_line() +
labs(x = "Date", y = "Stock Price ($)", title = glue("{v} price over time")) +
scale_x_date(date_minor_breaks = "2 day")
print(p)
}
Since I don't have access to your data, here's a reproducible example:
library(tidyverse)
library(zoo)
closing_price <-
tibble(
dt = as.Date(seq(as.yearmon("2020-01-31"), as.yearmon("2020-12-31"), by = 1 / 12), frac = 1),
CAH = rnorm(12, mean = 10, sd = 2),
CTLT = rnorm(12, mean = 50, sd = 5),
CI = rnorm(12, mean = 25, sd = 2)
)
vars <- c("CAH", "CTLT","CI")
for(v in vars){
p <-
ggplot(data = closing_price, aes(x = dt, y = .data[[v]])) +
geom_line() +
labs(x = "Date", y = "Stock Price ($)", title = paste(v, "price over time")) +
scale_x_date(date_minor_breaks = "2 day")
print(p)
}
You could also use lapply for this by wrapping your plot in a function instead. The function may be useful depending on how often you make these plots or to make it easier to fix and share.
closing_price_plot <- function(data, var){
p <-
data %>%
ggplot(aes(x = dt, y = .data[[var]], group=1)) +
geom_line() +
labs(x = "Date", y = "Stock Price ($)", title = paste(var, "price over time")) +
scale_x_date(date_minor_breaks = "2 day")
return(p)
}
## prints out the plots as in the loop
lapply(vars, FUN = closing_price_plot, data = closing_price)
## save them to a list instead
p <- list()
p <- lapply(vars, FUN = closing_price_plot, data = closing_price)
## view the first plot
p[[1]]
I'm trying to do a nice graph with ggplot but I'm still faces a barrier.
When I use facet_grid at the end of my code, somethings wrong happen. A helping hand would be great!
This is my code :
# Package
library(ggplot2)
# Function
firstup <- function(x) {
x <- tolower(x)
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
Create data
data_F = data.frame(DATE = seq(as.Date('2020-01-21'), as.Date('2020-03-06'), by = 'days'),
NB = sample(1:20, 46, replace=TRUE))
Manage the data
data_F = data.frame(DATE = data_F$DATE,
year = as.numeric(format(data_F$DATE, format = "%Y")),
month = as.factor(format(data_F$DATE, format = "%B")),
days = as.numeric(format(data_F$DATE, format = "%d")),
NB = data_F$NB)
Relevel month with the right order
data_F$month = as.factor(firstup(data_F$month))
data_F$month = factor(data_F$month,unique(data_F$month))
month = factor(data_F$month,unique(data_F$month))
month = unique(month)
month = as.factor(month)
The main plot
plot1 = ggplot(data_F,aes(x=DATE,y=NB)) +
geom_bar(stat = "identity", colour="black", fill = "dodgerblue3", width=0.5) +
scale_x_date(breaks = data_F$DATE, labels = data_F$days, minor_breaks = NULL,
expand = expansion(add = 0.3))+
scale_y_continuous(limits = c(0, 65), breaks = seq(0, 65, by = 5), minor_breaks = seq(0, 65, by = 1))
plot1
Creating the segment data
data.segm = data.frame(x=data_F$DATE,y=Inf, xend = data_F$DATE, yend=-Inf,
month=data_F$month)
Show two days, for example at the row 6 and 35
i = 6
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
i = 35
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
plot1
And know my problem with facet_grid
plot2 = plot1 + facet_grid(.~month, space="free_x", scales="free_x", switch="x")
plot2
Jonas
Using an idea from a previous question I have created a gantt-like chart using ggplot2. Here is the example code:
tasks <- c("Review literature", "Mung data", "Stats analysis", "Write Report")
dfr <- data.frame(
name = tasks[c(1,2,3,4,2,3)],
start.date = c("24/08/2010", "01/10/2010", "01/11/2010", "14/02/2011","15/12/2010","1/9/2010"),
end.date = c("31/10/2010", "14/12/2010", "28/02/2011", "30/04/2011","05/02/2011","1/11/2010"),
type = c(TRUE, FALSE, TRUE, TRUE,TRUE,FALSE)
)
mdfr <- melt(dfr, measure.vars = c("start.date", "end.date"))
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = type)) +
geom_line(size = 6) +
xlab("") + ylab("") +
theme_bw()
Now, I need to indicate one (or maybe more, some other day) specific critical date for each task, using a bullet or a star or anything, which maybe inside or outside the bar and also a textual annotation of that date. Can it be achieved using the above procedure. If not, is there another (not ggplot) way of doing this?
Thank you!
Here you go:
require(ggplot2)
tasks <- c("Review literature", "Mung data", "Stats analysis", "Write Report")
dfr <- data.frame(
name = tasks[c(1,2,3,4,2,3)],
start.date = c("24/08/2010", "01/10/2010", "01/11/2010", "14/02/2011","15/12/2010","1/9/2010"),
end.date = c("31/10/2010", "14/12/2010", "28/02/2011", "30/04/2011","05/02/2011","1/11/2010"),
type = c(TRUE, FALSE, TRUE, TRUE,TRUE,FALSE)
)
dfrLabels <- data.frame(
name = tasks[c(1,2,3,4)],
date = c("16/10/2010", "07/12/2010", "14/02/2011", "15/04/2011"),
event = c("Something", "Other", "Whatever", "Deadline")
)
mdfr <- melt(dfr, measure.vars = c("start.date", "end.date"))
ggplot(mdfr, aes(as.Date(value, "%d/%m/%Y"), name, colour = type)) +
geom_line(size = 6) +
xlab("") + ylab("") +
theme_bw() +
geom_text( data=dfrLabels, aes(x= as.Date(date, "%d/%m/%Y"), label = event), hjust = 0, vjust = 1, colour = "red", size = 5.0 ) +
geom_point( data=dfrLabels, aes(x= as.Date(date, "%d/%m/%Y")), size=3.0, colour="black" )