Time intervals in ggplot - r

I tried to plot time intervals for my data. Basically I want to compare recording of 2 systems for different individuals. The code might be a bit inefficient.
require(reshape2)
require(ggplot2)
df.system1.person1<-data.frame(
time.start.person1=c("12:12:30","12:13:10","12:13:50"),
time.end.person1=c("12:12:35","12:13:20","12:13:55")
)
df.system2.person1=data.frame(
time.start.person1=c("12:12:30","12:13:50"),
time.end.person1=c("12:13:25","12:14:00")
)
df.system1.person2=data.frame(
time.start.person2=c("12:12:30","12:13:10","12:13:50"),
time.end.person2=c("12:12:35","12:13:20","12:13:55")
)
df.system2.person2=data.frame(
time.start.person2=c("12:12:30","12:13:50"),
time.end.person2=c("12:13:25","12:14:00")
)
ndf.system1.person1 <- melt(df.system1.person1, measure.vars = c("time.start.person1", "time.end.person1"))
ndf.system2.person1 <- melt(df.system2.person1, measure.vars = c("time.start.person1", "time.end.person1"))
ndf.system1.person2 <- melt(df.system1.person2, measure.vars = c("time.start.person2", "time.end.person2"))
ndf.system2.person2 <- melt(df.system2.person2, measure.vars = c("time.start.person2", "time.end.person2"))
ndf.system1.person1$value2<-as.POSIXct(strptime(ndf.system1.person1$value, "%H:%M:%S"))
ndf.system2.person1$value2<-as.POSIXct(strptime(ndf.system2.person1$value, "%H:%M:%S"))
ndf.system1.person2$value2<-as.POSIXct(strptime(ndf.system1.person2$value, "%H:%M:%S"))
ndf.system2.person2$value2<-as.POSIXct(strptime(ndf.system2.person2$value, "%H:%M:%S"))
data=rbind(ndf.system1.person1,ndf.system2.person1,ndf.system1.person2,ndf.system2.person2)
data$Arg[1:6]="System1"
data$Arg[7:10]="System2"
data$Arg[11:16]="System1"
data$Arg[17:20]="System2"
data$Ind[1:10]="Person 1"
data$Ind[11:20]="Person 2"
ggplot(data,aes(x=value2,y=Arg))+geom_line(size=1)+ facet_grid(.~Ind,scales="free_x",space="free_x")+xlab("") + ylab("") +theme_bw()+theme(legend.position="none",axis.text=element_text(size=14),panel.grid.major = element_blank(),axis.title.y = element_text(vjust=+1),panel.grid.minor = element_blank(), axis.title=element_text(size=14))
However, instead of time intervals I get one solid line. The time intervals should be start.time-end.time. Also, on the x axis I would like to get HH:MM format. Thank you.

The way you combine your data does not seem to be suitable for the plot you want to make. Below you find a different approach to combine the data that you originally created. Then you can use geom_linerange to make your plot.
df <- do.call(rbind, lapply(list(cbind(df.system1.person1, Arg = "System1", Ind = "Person 1"),
cbind(df.system2.person1, Arg = "System2", Ind = "Person 1"),
cbind(df.system1.person2, Arg = "System1", Ind = "Person 2"),
cbind(df.system2.person2, Arg = "System2", Ind = "Person 2")),
'colnames<-', c('start.time', 'end.time', 'Arg', 'Ind')))
#
ggplot(df, aes(x = Arg, ymin = start.time, ymax=end.time)) +
geom_linerange(size = 1) +
coord_flip() +
facet_grid(.~Ind,scales="free_y",space="free_y") +
xlab("") + ylab("") +
theme_bw() +
theme(axis.text=element_text(size=14),
axis.text.x = element_text(angle = 20, vjust=0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
EDIT: In order to change the time format, you need to first convert the times to POSIXct format and then use scale_y_datetime.
# change formats
df$start.time <- as.POSIXct(df$start.time, format='%H:%M:%S')
df$end.time <- as.POSIXct(df$end.time, format='%H:%M:%S')
# load additional package
require(scales)
# added scale_y_datetime.
# The example data you provide span less than 2 minutes,
# so I used the date format with seconds and 30 second breaks
# You need to change this to get 15 minute breaks and no seconds.
ggplot(df, aes(x = Arg, ymin = start.time, ymax=end.time)) +
geom_linerange(size = 1) +
scale_y_datetime(breaks = date_breaks("30 secs"), # change to "15 mins"
labels = date_format("%H:%M:%S")) + # change to "%H:%M"
coord_flip() +
facet_grid(.~Ind,scales="free_y",space="free_y") +
xlab("") + ylab("") +
theme_bw() +
theme(axis.text=element_text(size=14),
axis.text.x = element_text(angle = 20, vjust=0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())

Related

In R, how can I extend a posixct timeseries axis beyond the end of the data

The issue I am trying to fix is that one of my temperature loggers stopped early. This causes the x axis to be a different length to the others (See image Axis). I want to extend the axis to be the same for plotting next to each other with an end date/time of 2022-06-06 00:00:00.
Any suggestions on how to adjust this? I've pasted the main 3 chunks of code used to transform data and plot below.
Thanks
SD1b <- SaxDeep1b
SD1b$datetime <- as.POSIXct(strptime(SD1b$datetime,format="%d/%m/%Y %H:%M",tz="Australia/Brisbane"))
head(SD1b)
SD1comb$datehour <- cut(as.POSIXct(SD1comb$datetime, format="%Y-%m-%d %H:%M:%S"),breaks="hour")
SD1hourlyT <- aggregate(temp ~ datehour,SD1comb,mean)
head(SD1hourlyT)
SD1hourlyT$datehour <- as.POSIXct(strptime(SD1hourlyT$datehour,format = "%Y-%m-%d %H",
tz="Australia/Brisbane"))
str(SD1hourlyT)
jpeg(file='SD1_temp.jpeg',width=19,height=10,res=1200,units="cm")
SD1temp <- ggplot(SD1hourlyT, aes(x = datehour, y = temp)) +
geom_line(colour="black") +
labs(x=element_blank(),y=expression("Temperature " ( degree*C)) ) +
scale_x_datetime(date_breaks="1 month",date_labels = "%b") + #see ?strptime for labs
scale_y_continuous(limits=c(23,33),breaks=c(23,25,27,29,31,33)) +
theme_linedraw() +
theme_minimal()+
theme(axis.text.x = element_text(colour="black",size=10),
axis.title.x = element_text(color = "black", size=12),
panel.grid.major = element_line(colour = "#d3d3d3"),
panel.grid.minor = element_blank(),
axis.text.y = element_text(colour="black",size=10),
axis.title.y = element_text(color = "black", size=12)) +
ggtitle("Saxon Deep 1")
You can use the limits argument within scale_x_datetime to expand the scale.
library(dplyr)
library(ggplot2)
library(scales)
library(lubridate)
# Example data
df <- data.frame(date = as.POSIXct(
c(
"2016-12-05-16.25.54.875000",
"2016-12-06-15.25.54.875000",
"2016-12-08-14.25.54.875000",
"2016-12-09-13.25.54.875000",
"2016-12-09-12.25.54.875000",
"2016-12-10-11.25.54.875000",
"2016-12-10-10.25.54.875000",
"2016-12-11-07.25.54.875000"
),
format = "%Y-%m-%d-%H.%M.%S"
) ,
y = 1:8)
Default axis limits
The minimum and maximum values of date are the default limits, even if there is no label or tick mark at the spot.
ggplot(df, aes(x = date, y = y)) +
geom_point() +
scale_x_datetime(labels = date_format("%D"),
date_breaks = "2 day")
Expanded axis limits
We can expand the axis limits even to values not observed in our data. Once again, you'll need to adjust labels and tick marks if you want to scale them the same as well.
ggplot(df, aes(x = date, y = y)) +
geom_point() +
scale_x_datetime(
labels = date_format("%D"),
date_breaks = "2 day",
limits = ymd_h("2016-12-05 00", "2016-12-20 23")
)

Multiple plots in one figure in R

I have three plots and I want to show them in a figure like below
link
I made a few attempts but I was not successful. My codes are given below:
dat <- read.table(text="
dates PS.230 PS.286 PS.389
3.01.2018 20.75103 16.69312 -6.503637
15.01.2018 15.00284 16.03211 16.1058
8.02.2018 11.0789 7.438522 -2.970704
20.02.2018 15.10865 12.8969 3.935687
4.03.2018 24.74799 19.25148 9.186779
28.03.2018 -1.299456 7.028817 -8.126284
9.04.2018 4.778902 8.309322 -3.450085
21.04.2018 7.131915 9.484932 -4.326919
", header=T, stringsAsFactors=FALSE)
dat$dates <- as.Date(dat$dates, "%d.%m.%Y")
library(ggplot2)
library(tidyverse)
a <- ggplot(dat, aes(x=dates, y=PS.230)) +
geom_point() +
geom_line() +
geom_smooth(se = FALSE, method = lm, size = 0.15, color = "#da0018") + #cizgi eklemek icin
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylim(-20,40) +
ylab("[mm/year]") +
xlab("") +
theme_linedraw() #theme_light
a + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
b <- ggplot(dat, aes(x=dates, y=PS.286)) +
geom_point() +
geom_line() +
geom_smooth(se = FALSE, method = lm, size = 0.15, color = "#da0018") + #cizgi eklemek icin
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylim(-20,40) +
ylab("[mm/year]") +
xlab("") +
theme_linedraw() #theme_light
b + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
c <- ggplot(dat, aes(x=dates, y=PS.389)) +
geom_point() +
geom_line() +
geom_smooth(se = FALSE, method = lm, size = 0.15, color = "#da0018") + #cizgi eklemek icin
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylim(-20,40) +
ylab("[mm/year]") +
xlab("") +
theme_linedraw() #theme_light
c + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
in the link I provided, much better graphics were drawn with fewer lines. my codes seem a little more complicated and frankly i couldn't get out. a, b and c plots in one image and only one date axes. How can I modify the codes to achieve sample result? Thank you.
Thank you for posting your data. As mentioned, the first step is to arrange your dataset so that it is in Tidy Data format. The information in dat$PS.230, dat$PS.286 and dat$PS.389 should be better represented in two columns:
First Column: name of data type - We'll call this column dat$value_type and it will have values that indicate if dat$results comes from PS.230, PS.286, or PS.389.
Second Column: value of data - We'll call this column dat$result and it just shows the value. This will be the y= aesthetic for all plots.
Pre-Processing: Gather into TidyData format
Use the gather() function to gather all columns in to a key ("value_type") and a "value" ("result"). We'll gather all columns except for "dates", so we just note to exclude that column via -dates:
dat <- dat %>% gather(key='value_type', value='result', -dates)
Plot
For the plot, you apply x and y aesthetics to "date" and "result". You can use "value_type" to differentiate based on color and create your legend for points and lines. You also use "value_type" as the column for creating the facets (the three separate plots) via use of facet_grid() function. Note that value_type ~ . arranges by "value_type" vertically, whereas . ~ value_type would arrange horizontally:
ggplot(dat, aes(x=dates, y=result)) +
geom_line(aes(color=value_type)) +
geom_point(aes(color=value_type)) +
scale_x_date(date_breaks = '1 months', date_labels = '%Y-%m') +
facet_grid(value_type ~ .) +
theme_bw()

How to use scale_x_date properly

I am a new user in R and I hope you can help me.
setwd("C:/Users/USER/Desktop/Jorge")
agua <- read_excel("agua.xlsx")
pbi <- read_excel("PBIagro.xlsx")
str(agua);
names(agua)[2] <- "Variación";
agua[,1] <- as.Date(agua$Trimestre)
lagpbi <- lag(pbi$PBIAgropecuario, k=1)
pbi[,3]<- lagpbi; pbi <- pbi[-c(1),];
names(pbi)[3] <- "PBIlag"
growth <- ((pbi$PBIAgropecuario-pbi$PBIlag)/pbi$PBIlag)*100
Anual_growth <- data.frame(growth); Anual_growth[,2] <- pbi$Año; names(Anual_growth)[2] <- "Año"
# Plot
Agro <- ggplot(Anual_growth, aes(x=Año, y=growth)) +
geom_line(color="steelblue") +
geom_point() +
geom_text(aes(label = round(Anual_growth$growth, 1)),
vjust = "inward", hjust = "inward", size=2.5, show.legend = FALSE) +
xlab("") +
theme_ipsum() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
ylim(-9.9,13.4) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.line.x = element_blank(), plot.margin = unit(c(1,1,0.5,1),"cm"),
axis.line.y = element_blank(), axis.text.x=element_text(face = "bold", size=8,
angle=1,hjust=0.95,vjust=0.2),
axis.text.y = element_blank(), axis.title.y=element_blank())+
scale_x_continuous("Año", labels = as.character(Anual_growth$Año), breaks = Anual_growth$Año)
print(Agro)
The problem is that it shows all the years, but I only want pair years (in X-axis) or years with step equal to 2.
I hope you can really help me.
Thank you.
Notice that the X-axis variable is a numeric string.
You can add something like
scale_x_date(date_breaks = "2 years", date_labels = "%Y") to your ggplot.
This is how it looks with my data, since you haven't posted yours. I am plotting a type date on x axis.
1.
ggplot(mydata) +
aes(x = date, y = number, color = somevar) +
geom_line()
ggplot(mydata) +
aes(x = date, y = number, color = somevar) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y")
3.
ggplot(mydata) +
aes(x = date, y = number, color = somevar) +
geom_line() +
scale_x_date(date_breaks = "2 years", date_labels = "%Y")
If you want pair years and because your x-axis variable is numeric, you can specify in scale_x_continous that breaks argument should take only even numbers.
Here how you can do it using this small example:
year = 1998:2020
value = rnorm(23,mean = 3)
df = data.frame(year,value)
library(ggplot2)
ggplot(df, aes(x = year, y = value))+
geom_point()+
geom_line()+
scale_x_continuous(breaks = year[year %%2 ==0])
Reciprocally, if you want odd years, you just have to specify scale_x_continuous(breaks = year[year %%2 != 0])
So, in your code, you should write:
scale_x_continuous(breaks = Anual_growth$Año[Anual_growth$Año %%2 ==0])
Does it answer your question ?

How to shade the area between two time series only if value A is greater than value B in ggplot?

I am trying to chart 2 time series (indexed stock prices) on ggplot, AAPL and MSFT. I want to shade the the area between these two lines but only when the AAPL indexed price is higher than that of MSFT. How do I accomplish this?
I have been reading about using geom_ribbon() but saw that some people said it is problematic and doesn't work when the two lines do not cross. I also have not been able to get the code to work. How do I set my ymin and ymax values for geom_ribbon()? I tried geom_area() as well but then all I created was a stacked area graph.
Here is my code so far:
install.packages("tidyquant")
install.packages("ggplot2")
library(tidyquant)
library(ggplot2)
symbols <- c("AAPL", "MSFT")
data <- tq_get(symbols, get = "stock.prices", from = "2016-01-01")
S1_index <-data$adjusted[which(data$symbol == "AAPL" & data$date == min(data$date))]
S2_index <-data$adjusted[which(data$symbol == "MSFT" & data$date == min(data$date))]
data$adjusted <- ifelse(data$symbol == "AAPL", data$adjusted/S1_index,
ifelse(data$symbol == "MSFT", data$adjusted/S2_index,NA))
ggplot(data,aes(x=date, y=adjusted,colour= symbol)) +
geom_line() +
scale_colour_manual(values = c(AAPL = "darkblue", MSFT = "red")) +
ggtitle("Title Here") + xlab("X Axis Label Here") + ylab("Y Axis Label Here") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_x_date(date_labels = "%b %y", date_breaks = "6 months") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+
labs(color = "Company") +
theme(legend.title = element_blank())
I would like the area between two time series to be shaded when AAPL is higher than MSFT, but currently my code doesn't accomplish that. I'm not very proficient in using ggplot, so I would appreciate any advice you might have.
You can use a ribbon to show the area between the two lines, but it'll require a bit of tweaking to only show the area when AAPL is higher than MSFT. Assume data is the link to the .csv file you've posted and the dates were formatted. First, we're going to build a seperate data.frame in a typical ribbon-like format:
ribbondata <- data.frame(
# We'll keep the x-values for one of the lines
x = data$date[data$symbol == "AAPL"],
# Next we are going to take the pairwise minima and maxima along the lines
ymin = pmin(data$adjusted[data$symbol == "AAPL"], data$adjusted[data$symbol == "MSFT"]),
ymax = pmax(data$adjusted[data$symbol == "AAPL"], data$adjusted[data$symbol == "MSFT"]),
# Then, we'll save a variable for which observations to keep
keep = data$adjusted[data$symbol == "AAPL"] > data$adjusted[data$symbol == "MSFT"]
)
Then here is how I would filter out regions we do not want to shade, and also attach some id variable to the data that keeps track of stretches of data that we do indeed want to shade. We'll use run length encoding for this:
keep_rle <- rle(ribbondata$keep)
# Now we'll replace every TRUE with a counter integer
keep_rle$values[keep_rle$values] <- seq_len(sum(keep_rle$values))
Next, we'll attach the inverse of this run length encoded id to the ribbondata dataframe and remove the bits where ribbondata$KEEP == FALSE.
ribbondata$id <- inverse.rle(keep_rle)
ribbondata <- ribbondata[ribbondata$keep,]
Then, we'll use the plotting code you provided:
g <- ggplot(data,aes(x=date, y=adjusted,colour= symbol)) +
geom_line() +
scale_colour_manual(values = c(AAPL = "darkblue", MSFT = "red")) +
ggtitle("Title Here") + xlab("X Axis Label Here") + ylab("Y Axis Label Here") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_x_date(date_labels = "%b %y", date_breaks = "6 months") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+
labs(color = "Company") +
theme(legend.title = element_blank())
And attach our ribbondata to it:
g <- g + geom_ribbon(data = ribbondata,
aes(x = x, ymin = ymin, ymax = ymax, group = id),
inherit.aes = FALSE)
Now the trick here is to attach our calculated id variable to the group in the aes() call, so that ggplot doesn't interpret the ribbon as a continuous object and draw weird lines at x-values where ribbondata y-values are undefined. Also I've set inherit.aes = FALSE because the ribbondata has different names for x and ymin/ymax variables than the main data.
I ended up with the following plot:
Of course, you can give the ribbon any fill colour or alpha that you want. Good luck!
First reshape your data.
data <- data %>%
# Select down to the necessary columns
select(date, symbol, adjusted) %>%
# Pivot to create columns for both symbols
pivot_wider(names_from = symbol, values_from = adjusted) %>%
# Create new variables for ribbon
mutate(max1 = ifelse(AAPL >= MSFT, AAPL, MSFT)) %>%
mutate(max2 = ifelse(MSFT >= AAPL, MSFT, AAPL))
Next, create your ggplot object
g1 <- data %>%
# Set PlotAesthetics
ggplot(aes(x=date, y=AAPL)) +
# First ribbon creates the color above MSFT and below AAPL
geom_ribbon(aes(ymin=MSFT, ymax=AAPL), fill="grey", alpha=0.9) +
# Second ribbon removes anything below MSFT
geom_ribbon(aes(ymin=0, ymax=MSFT), fill="white", alpha=0.9) +
# Add lines for AAPL and MSFT
geom_line(aes(y=AAPL), color = "blue") +
geom_line(aes(y=MSFT), color = "red") +
# Create Labels
labs(x = "X Axis Label Here", y = "Y Axis Label Here",
title = "Title Here") +
# Set Theme to match your original plot
theme_classic() +
# Need to create custom legend
annotate(geom = "text", x = ymd('2020-06-01'), y = .25, label = "AAPL", hjust = "left") +
annotate(geom = "segment", x = ymd('2020-03-01'), xend = ymd('2020-05-01'), y = .25, yend = .25, colour = "blue", size = 1) +
annotate(geom = "text", x = ymd('2020-06-01'), y = .05, label = "MSFT", hjust = "left") +
annotate(geom = "segment", x = ymd('2020-03-01'), xend = ymd('2020-05-01'), y = .05, yend = .05, colour = "red", size = 1)
I realise this is a bit late, but is an alternative approach to achieving what #bgm was after.
Here is the associated plot

Time and date issues in R

I have a set of data with date and time. I want to plot time on the y-axis and date on the x-axis, and plot the number of observations at a given time during each day. However, my observations start at around 20.00 on the first day, and end around 06.00 the next day. So, when I plot this, R plots all observations on each date:
Here is a link to a picture of my current graph:
Does anyone know how to define the y-axis to start at one date and end at the next date?
Here is my script:
library(ggplot2)
library(scales)
library(dplyr)
library(lubridate)
test$DATE <- as.Date(test$DATE, format = "%d.%m.%Y", tz = "UTC")
test$hms <- format(test$TIME, format = "%H:%M:%S")
test$hms <- as.POSIXct(test$hms, format = "%H:%M:%S")
ggplot(test, aes(test$DATE,test$hms)) +
geom_point() +
geom_smooth() +
theme_light() + theme(legend.position="top") +
scale_color_brewer(palette = 1, type = "qual") +
ggtitle("emergence/return time") +
scale_x_date("d",date_breaks="7 days",date_labels="%d-%m")+
scale_y_datetime("Time",breaks=date_breaks("60 min"),labels =
date_format("%H:%M"))
g <- ggplot(test, aes(test$DATE,test$hms)) +
geom_point() +
geom_smooth() +
theme_light() + theme(legend.position="top") +
scale_color_brewer(palette = 1, type = "qual") +
ggtitle("emergence/return time") +
scale_x_date("d",date_breaks="7 days",date_labels="%d-%m")+
scale_y_datetime("Time",breaks=date_breaks("60 min"),labels =
date_format("%H:%M"))
g + scale_colour_grey()+theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())

Resources