Compute daily, month and annual average of several data sets - r

I have a data frame:
MS_NR SS_NR DATE HOUR VALUE
1 13095010 68 1/01/2014 0:00:00 9,8
2 13095010 68 1/01/2014 1:00:00 8,0
3 13095010 68 1/01/2014 2:00:00 NA
4 13095010 68 1/01/2014 3:00:00 7,5
5 13095010 68 1/01/2014 4:00:00 7,0
6 13095010 68 1/01/2014 5:00:00 8,5
are temperature observations of a weather station taken every hour, I want to calculate the daily, weekly, monthly and annual averages of several data frames of different weather stations. How can I do this within a loop, so that the process is not repetitive?

When working with hydro-meteorological data, I usually use xts and hydroTSM packages as they have many functions for data aggregation.
You didn't provide any data so I created one for demonstration purpose
library(xts)
library(hydroTSM)
# Generate random data
set.seed(2018)
date = seq(from = as.Date("2016-01-01"), to = as.Date("2018-12-31"),
by = "days")
temperature = runif(length(date), -15, 35)
dat <- data.frame(date, temperature)
# Convert to xts object for xts & hydroTSM functions
dat_xts <- xts(dat[, -1], order.by = dat$date)
# All daily, monthly & annual series in one plot
hydroplot(dat_xts, pfreq = "dma", var.type = "Temperature")
# Weekly average
dat_weekly <- apply.weekly(dat_xts, FUN = mean)
plot(dat_weekly)
# Monthly average
dat_monthly <- daily2monthly(dat_xts, FUN = mean, na.rm = TRUE)
plot.zoo(dat_monthly, xaxt = "n", xlab = "")
axis.Date(1, at = pretty(index(dat_monthly)),
labels = format(pretty(index(dat_monthly)), format = "%b-%Y"),
las = 1, cex.axis = 1.1)
# Seasonal average: need to specify the months
dat_seasonal <- dm2seasonal(dat_xts, season = "DJF", FUN = mean, na.rm = TRUE)
plot(dat_seasonal)
# Annual average
dat_annual <- daily2annual(dat_xts, FUN = mean, na.rm = TRUE)
plot(dat_annual)
Edit: using OP's data
df <- readr::read_csv2("Temp_2014_Hour.csv")
str(df)
# Convert DATE to Date object & put in a new column
df$date <- as.Date(df$DATE, format = "%d/%m/%Y")
dat <- df[, c("date", "VALUE")]
str(dat)
dat_xts <- xts(dat[, -1], order.by = dat$date)
Created on 2018-02-28 by the reprex package (v0.2.0).

I try this
first using read.table load the file
library(openair)
Temp <- read.table (file, header=TRUE, sep=";",stringsAsFactors = FALSE, dec = ",", na.strings = "NA")
tiempos <- Temp$HOUR
timestamps <- as.POSIXlt(as.POSIXct('1900-1-1', tz='UTC')
+ as.difftime(as.character(tiempos))
time <- format(timestamps, format='%H:%M:%S')
date<-paste(Temp[,3], time, sep=" ")
date
Temp_met <- cbind(date, CovTemp[-c(3,4)])
Temp_met$date <- as.POSIXct(strptime(Met_CovTemp$date,
format = "%d/%m/%Y %H:%M", "GMT"))
## daily mean
Temp_daily <- timeAverage(Met_CovTemp, avg.time = "day")
## weekly mean
Temp_week <- timeAverage(Met_CovTemp, avg.time = "week")
## monthly mean
Temp_month <- timeAverage(Met_CovTemp, avg.time = "month")
## annual mean
Temp_annual <- timeAverage(Met_CovTemp, avg.time = "year")

Related

From the hourly data create a time series of daily data

How can I convert hourly data into a daily data time series?
here's my code for the hourly data:
rainfall$V1 <- as.POSIXct(paste0(rainfall$V1), format = "%d%b%y:%H:%M")
rainfall <- tidyr::complete(rainfall, V1 = seq(as.POSIXct("1992-01-01 00:00:00"), as.POSIXct("1992-12-31 23:00:00"), by = 'hour'), fill = list(V2 = 0))
rainfall_ts <- ts(rainfall$V2, start= c(1992,01,01), frequency = 24*366)
head(rainfall)
head(rainfall)
V1 is the date(hourly)
V2 is the rainfall

10 Day intervals with a overlapping date

I have a annual data set that I would like to break into 10 day intervals. For example I would like to subset 2010-12-26 to 2011-01-04 create a home range using the x and y values for those dates, then get the next 9 days plus an overlapping date between the subsetted data in this case it would be 2011-01-04 (2011-01-04 to 2011-01-13). Is there a good way to do this?
library(lubridate)
date <- rep_len(seq(dmy("26-12-2010"), dmy("20-01-2011"), by = "days"), 500)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000))
I separated them by 10 observations, but I am unsure as to how I can do make it more specific to get 10 days instead of just 10 observations.
interval_10 <- lapply(
seq(0, nrow(df), by = 10),
function(k) df[max(k+1, 1):min(k + 10, nrow(df)), ]
)
Thank you
lapply through the unique date vector will do the work:
t <- unique(date)[seq(from = 1, to = length(unique(date)), by = 9)]
interval_10 <- lapply(
1:(length(t)-1),
function(k) df %>% filter(date <= t[k+1], date >= t[k])
)

Select specific date / hour range from list elements and create dataframe

I have a list with approximately 150 elements (data frames) of weather data (ID,date,time,temperature). I want to select specific date range and time from each list element (df) and create a data frame (or multiple) with these selected rows. Given the fact I can't provide real data I've created a reproducible example:
library(lubridate)
library(dplyr)
library(tidyr)
library(purrr)
z1 <- seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date()+780), by = "10 min")
z2 <- seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date()+780), by = "10 min")
z3 <- seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date()+780), by = "10 min")
z4 <- seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date()+780), by = "10 min")
temperature1 <- runif(112321, min = -5, max = 45)
temperature2 <- runif(112321, min = -5, max = 45)
temperature3 <- runif(112321, min = -5, max = 45)
temperature4 <- runif(112321, min = -5, max = 45)
station1 <- data.frame(date = z1, temp = temperature1)
station2 <- data.frame(date = z2, temp = temperature2)
station3 <- data.frame(date = z3, temp = temperature3)
station4 <- data.frame(date = z4, temp = temperature4)
##isolate date from time
station1 <- separate(station1, date, c("date", "time"), sep = " ")
station2 <- separate(station2, date, c("date", "time"), sep = " ")
station3 <- separate(station3, date, c("date", "time"), sep = " ")
station4 <- separate(station4, date, c("date", "time"), sep = " ")
## list of all stations
stations_list <- list(station1,station2,station3,station4)
#create a column with station ID (name) ##
ID_names <- c("station1","station2","station3","station4")
stations_list <- mapply(cbind,stations_list, "ID" = ID_names, SIMPLIFY = F)
Now in this list I want to select specific date and time range so I used the following script:
selected_date_time <- map_dfr(stations_list,
~ filter(.x, date >= "2021-06-01" &
date <= "2021-10-15" & time >= "18:00" & time <= "10:00" |
date > "2022-08-18" & date <= "2022-10-05" & time >= "09:00"
& time <= "17:00"))
In this case, I got a data frame with only 2022 year and no selection fro 2021. I changed slightly the code and I selected different hour range :
selected_date_time <- map_dfr(stations_list,
~ filter(.x, date >= "2021-06-01" &
date <= "2021-10-15" & time >= "18:00" & time <= "10:00" |
date > "2022-08-18" & date <= "2022-10-05" & time <= "09:00"
& time >= "17:00"))
In the last case I got a data frame with zero observations. What am I doing wrong ?!
As mentioned by #AntoniosK, your filter logic was off so I made a few amendments but most importantly, for this filter to work, we need to make sure the date and time are "date" and "time" class respectively.
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(tidyverse)
library(hms)
#>
#> Attaching package: 'hms'
#> The following object is masked from 'package:lubridate':
#>
#> hms
z1 <-seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 780), by = "10 min")
z2 <-seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 780), by = "10 min")
z3 <-seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 780), by = "10 min")
z4 <-seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 780), by = "10 min")
temperature1 <- runif(112321, min = -5, max = 45)
temperature2 <- runif(112321, min = -5, max = 45)
temperature3 <- runif(112321, min = -5, max = 45)
temperature4 <- runif(112321, min = -5, max = 45)
station1 <- tibble(date = z1, temp = temperature1)
station2 <- tibble(date = z2, temp = temperature2)
station3 <- tibble(date = z3, temp = temperature3)
station4 <- tibble(date = z4, temp = temperature4)
station1 <- station1 %>%
mutate(time = hms::as_hms(date),
date = as_date(date)) %>%
relocate(date, time)
station2 <- station2 %>%
mutate(time = hms::as_hms(date),
date = as_date(date)) %>%
relocate(date, time)
station3 <- station3 %>%
mutate(time = hms::as_hms(date),
date = as_date(date)) %>%
relocate(date, time)
station4 <- station4 %>%
mutate(time = hms::as_hms(date),
date = as_date(date)) %>%
relocate(date, time)
## list of all stations
stations_list <- list(station1, station2, station3, station4)
#create a column with station ID (name) ##
ID_names <- c("station1", "station2", "station3", "station4")
stations_list <-
mapply(cbind, stations_list, "ID" = ID_names, SIMPLIFY = F)
stations_list %>%
map_dfr(~ filter(
.x,
(
between(date, as.Date("2021-06-01"), as.Date("2021-10-15")) &
(time >= as_hms("18:00:00") | time <= as_hms("10:00:00"))
) |
(date > as.Date("2022-08-18") &
date <= as.Date("2022-10-05")) &
(time <= as_hms("09:00:00") | time >= as_hms("17:00:00"))
)) %>%
arrange(date) %>%
head()
#> date time temp ID
#> 1 2021-06-01 00:00:00 20.259581 station1
#> 2 2021-06-01 00:10:00 37.558833 station1
#> 3 2021-06-01 00:20:00 18.729679 station1
#> 4 2021-06-01 00:30:00 5.880394 station1
#> 5 2021-06-01 00:40:00 2.393515 station1
#> 6 2021-06-01 00:50:00 36.030296 station1
Created on 2021-05-26 by the reprex package (v2.0.0)

My original data is weekly data, how do I plot it as weekly data in r?

My data are originally in week (examples below). I find it difficult to perform time series data since this data is always in the from of dd-mm-yy.
WEEK SALES
1: 29.2010 60.48
2: 30.2010 95.76
3: 31.2010 51.66
4: 32.2010 73.71
5: 33.2010 22.05
Thanks in advance!
We can convert the week number as date using functions from the lubridate package, and then plot the date on the x-axis and SALES on the y-axis.
library(tidyverse)
library(lubridate)
dat2 <- dat %>%
separate(WEEK, into = c("WEEK", "YEAR"), convert = TRUE) %>%
mutate(Date = ymd("2010-01-01") + weeks(WEEK - 1))
ggplot(dat2, aes(x = Date, y = SALES)) +
geom_line()
DATA
dat <- read.table(text = " WEEK SALES
1 '29.2010' 60.48
2 '30.2010' 95.76
3 '31.2010' 51.66
4 '32.2010' 73.71
5 '33.2010' 22.05",
header = TRUE, stringsAsFactors = FALSE,
colClasses = c("character", "character", "numeric"))
UPDATE
If the data are from different years, we can use the following code.
dat2 <- dat %>%
separate(WEEK, into = c("WEEK", "YEAR"), convert = TRUE) %>%
mutate(Date = ymd(paste(YEAR, "01", "01", sep = "-")) + weeks(WEEK - 1))
DATA
dat <- read.table(text = " WEEK SALES
1 '29.2010' 60.48
2 '30.2010' 95.76
3 '31.2010' 51.66
4 '32.2010' 73.71
5 '33.2010' 22.05
6 '1.2011' 37.5
7 '2.2011' 45.2
8 '3.2011' 62.9",
header = TRUE, stringsAsFactors = FALSE,
colClasses = c("character", "character", "numeric"))

Precipitation values for every 5 minutes to hourly summaries in R

I am trying to get the total precipitation values for every hour from a personal weather station I have using the weatherData package. The problem I have is that the data is collected every five minutes and the values repeat themselves until there is a change in precipitation value. I have tried the 'duplicated' function but I get a large number of data removed when there is no precipitation which makes it hard for me to get a summary of the hourly precipitation.
Please see code below
## Load required libraries
library(weatherData)
library(ggplot2)
library(scales)
library(plyr)
library(reshape2)
library(gridExtra)
library(lubridate)
library(weathermetrics)
library(zoo)
# Get data for PWS using weatherData package
pws <- getWeatherForDate("IPENANGB2", "2014-09-01","2014-09-30", station_type = "id",opt_detailed=T, opt_custom_columns=T, custom_columns=c(1,2,6,7,10))
# Rename columns
colnames(pws)<-c("time","time1","tempc","wdd","wspd","prcp")
## Adding date columns
pws$time<-as.POSIXct(pws$time1,format="%Y-%m-%d %H:%M:%S",tz="Australia/Perth")
pws$year <- as.numeric(format(pws$time,"%Y"))
pws$date <-as.Date(pws$time,format="%Y-%m-%d",tz="Australia/Perth")
pws$year <- as.numeric(as.POSIXlt(pws$date)$year+1900)
pws$month <- as.numeric(as.POSIXlt(pws$date)$mon+1)
pws$monthf <- factor(pws$month,levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)
pws$weekday <- as.POSIXlt(pws$date)$wday
pws$weekdayf <- factor(pws$weekday,levels=rev(0:6),labels=rev(c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")),ordered=TRUE)
pws$yearmonth <- as.yearmon(pws$date)
pws$yearmonthf <- factor(pws$yearmonth)
pws$week <- as.numeric(format(as.Date(pws$date),"%W"))
pws$weekf<- factor(pws$week)
pws$jday<-yday(pws$date)
pws$hour <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%H"))
pws$min <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%M"))
# Remove duplicate values
pws.df <- pws[!duplicated(pws$prcp),]
Assuming you want to get hourly averages of tempc, wdd, wspd, prcp:
# used packages
library(weatherData)
library(lubridate)
library(dplyr)
library(stringr)
# read data
pws <- getWeatherForDate("IPENANGB2",
"2014-09-01",
"2014-09-30",
station_type = "id",
opt_detailed = T,
opt_custom_columns = T,
custom_columns = c(1, 2, 6, 7, 10))
# rename columns
colnames(pws) <- c("time", "time1", "tempc", "wdd", "wspd", "prcp")
# cleaning dataset and adding some columns
useful_pws <-
pws %>%
select(2:6) %>%
filter(!str_detect(time1, "<br>")) %>%
mutate(time1 = ymd_hms(time1),
year = year(time1),
month = month(time1),
day = day(time1),
hour = hour(time1)) %>%
tbl_df()
# summarising dataset
useful_pws %>%
select(-time1) %>%
group_by(year, month, day, hour) %>%
summarise(tempc = mean(tempc, na.rm = TRUE),
wdd = mean(wdd, na.rm = TRUE),
wspd = mean(wspd, na.rm = TRUE),
prcp = mean(prcp, na.rm = TRUE))

Resources