Related
I have to read many excel files (weather data from different stations) with multiple sheets (yearly weather data from 2008:2021) and then change it into a new format and save it again. I have done it for one station (one excel file) with multiple sheets and edited it as well for my desired format of putting all the yearly data from different sheets into a single data-frame year after the year and finally saved it (the code is as below).
I would like to replicate the same for more than 50 other stations (excel files) as well, but being very new to R, I have not succeeded. I have read about many post mentioning importing multiple excels with multiple sheets but since my code involves some formatting after reading therefore, I could not find a very relevant answer. Any help will be greatly appreciated.
install.packages("readxl")
library(readxl)
#Now looping the same for other years:
years <- c(2008:2022)
datasets <- paste("Lashkargah_AT", years, sep = "_")
for (i in 1:length(years)) {
Lashkargah_AT <- read_excel("D:/Chill R Tutorial/ChillR-basic/AllBasin/Lashkargah_AT .xlsx",range = "A7:AK38",.name_repair = tolower, sheet = as.character(years[i]))
colnames(Lashkargah_AT)[1] <- "Day"
jan <-
data.frame(Month = 1,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[2],
Tmax = Lashkargah_AT[3],
Tmean = Lashkargah_AT[4])
feb <-
data.frame(Month = 2,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[5],
Tmax = Lashkargah_AT[6],
Tmean = Lashkargah_AT[7])
mar <-
data.frame(Month = 3,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[8],
Tmax = Lashkargah_AT[9],
Tmean = Lashkargah_AT[10])
apr <-
data.frame(Month = 4,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[11],
Tmax = Lashkargah_AT[12],
Tmean = Lashkargah_AT[13])
mai <-
data.frame(Month = 5,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[14],
Tmax = Lashkargah_AT[15],
Tmean = Lashkargah_AT[16])
jun <-
data.frame(Month = 6,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[17],
Tmax = Lashkargah_AT[18],
Tmean = Lashkargah_AT[19])
jul <-
data.frame(Month = 7,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[20],
Tmax = Lashkargah_AT[21],
Tmean = Lashkargah_AT[22])
aug <-
data.frame(Month = 8,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[23],
Tmax = Lashkargah_AT[24],
Tmean = Lashkargah_AT[25])
sep <-
data.frame(Month = 9,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[26],
Tmax = Lashkargah_AT[27],
Tmean = Lashkargah_AT[28])
okt <-
data.frame(Month = 10,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[29],
Tmax = Lashkargah_AT[30],
Tmean = Lashkargah_AT[31])
nov <-
data.frame(Month = 11,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[32],
Tmax = Lashkargah_AT[33],
Tmean = Lashkargah_AT[34])
dez <-
data.frame(Month = 12,
Day = Lashkargah_AT[1],
Tmin = Lashkargah_AT[35],
Tmax = Lashkargah_AT[36],
Tmean = Lashkargah_AT[37])
Lashkargah <- rbind(jan,feb,mar,apr,mai,jun,jul,aug,sep,okt,nov,dez)
Lashkargah <-
Lashkargah %>%
mutate(Year = years[i]) %>%
rename(Tmin = min,
Tmax = max,
Tmean = avg)
Lashkargah$Date <- as.Date(paste0(Lashkargah$Year,"-",Lashkargah$Month,"-",Lashkargah$Day))
Lashkargah <- dplyr::filter(Lashkargah, !is.na(Date))
Lashkargah$Prec <- NA
Lashkargah <- Lashkargah[c(7,6,1,2,3,4,5,8)]#Re ordering the columns
assign(datasets[i],
Lashkargah)
}
Lashkargah <- rbind(Lashkargah_AT_2008,
Lashkargah_AT_2009,
Lashkargah_AT_2010,
Lashkargah_AT_2011,
Lashkargah_AT_2012,
Lashkargah_AT_2013,
Lashkargah_AT_2014,
Lashkargah_AT_2015,
Lashkargah_AT_2016,
Lashkargah_AT_2017,
Lashkargah_AT_2018,
Lashkargah_AT_2019,
Lashkargah_AT_2020,
Lashkargah_AT_2021,
Lashkargah_AT_2022)
rm(Lashkargah_AT,
Lashkargah_AT_2008,
Lashkargah_AT_2009,
Lashkargah_AT_2010,
Lashkargah_AT_2011,
Lashkargah_AT_2012,
Lashkargah_AT_2013,
Lashkargah_AT_2014,
Lashkargah_AT_2015,
Lashkargah_AT_2016,
Lashkargah_AT_2017,
Lashkargah_AT_2018,
Lashkargah_AT_2019,
Lashkargah_AT_2020,
Lashkargah_AT_2021,
Lashkargah_AT_2022,
jan,feb,mar,apr,mai,jun,jul,aug,sep,okt,nov,dez,
years,datasets,
i)
#dir.create("D:/Chill R Tutorial/ChillR-basic/AllBasin/Lashkargah_AT")
write.csv(Lashkargah,"D:/Chill R Tutorial/ChillR-basic/AllBasin/Lashkargah_AT_Obs.csv")
I have a data frame stored with daily data within a year and I want to compute monthly averages as well as day of the week averages and add those values as additional columns.
Here is a MWE of my data frame
df <- tibble(Date = seq(as.Date('2020-01-01'), by = '1 day', length.out = 365),
Daily_sales = rnorm(365, 2, 1))
df <- df %>%
mutate(month = lubridate::month(Date), #Month
dow = lubridate::wday(Date, week_start = 1), #Day of the week
dom = lubridate::day(Date)) #Day of the month
My problem is as follows: I know how to compute the monthly averages, e.g.
df %>% group_by(month) %>% summarize(Monthly_avg = mean(Daily_sales))
but i don't know how to add this as an additional column where every value in January has the average, and every value in February has the avg from February. E.g. if the avg of January is 2.22, then the new column should contain 2.22 for all dates in January. The same problem for the day of the week average.
Instead of summarize()ing an entire group into one row, we can mutate() all rows to add the group mean:
result <- df %>%
group_by(month) %>% mutate(monthly_avg = mean(Daily_sales)) %>%
group_by(dow) %>% mutate(dow_avg = mean(Daily_sales)) %>%
group_by(dom) %>% mutate(dom_avg = mean(Daily_sales)) %>%
ungroup()
I have a annual data set that I would like to break into 10 day intervals. For example I would like to subset 2010-12-26 to 2011-01-04 create a home range using the x and y values for those dates, then get the next 9 days plus an overlapping date between the subsetted data in this case it would be 2011-01-04 (2011-01-04 to 2011-01-13). Is there a good way to do this?
library(lubridate)
date <- rep_len(seq(dmy("26-12-2010"), dmy("20-01-2011"), by = "days"), 500)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000))
I separated them by 10 observations, but I am unsure as to how I can do make it more specific to get 10 days instead of just 10 observations.
interval_10 <- lapply(
seq(0, nrow(df), by = 10),
function(k) df[max(k+1, 1):min(k + 10, nrow(df)), ]
)
Thank you
lapply through the unique date vector will do the work:
t <- unique(date)[seq(from = 1, to = length(unique(date)), by = 9)]
interval_10 <- lapply(
1:(length(t)-1),
function(k) df %>% filter(date <= t[k+1], date >= t[k])
)
I have a data frame:
MS_NR SS_NR DATE HOUR VALUE
1 13095010 68 1/01/2014 0:00:00 9,8
2 13095010 68 1/01/2014 1:00:00 8,0
3 13095010 68 1/01/2014 2:00:00 NA
4 13095010 68 1/01/2014 3:00:00 7,5
5 13095010 68 1/01/2014 4:00:00 7,0
6 13095010 68 1/01/2014 5:00:00 8,5
are temperature observations of a weather station taken every hour, I want to calculate the daily, weekly, monthly and annual averages of several data frames of different weather stations. How can I do this within a loop, so that the process is not repetitive?
When working with hydro-meteorological data, I usually use xts and hydroTSM packages as they have many functions for data aggregation.
You didn't provide any data so I created one for demonstration purpose
library(xts)
library(hydroTSM)
# Generate random data
set.seed(2018)
date = seq(from = as.Date("2016-01-01"), to = as.Date("2018-12-31"),
by = "days")
temperature = runif(length(date), -15, 35)
dat <- data.frame(date, temperature)
# Convert to xts object for xts & hydroTSM functions
dat_xts <- xts(dat[, -1], order.by = dat$date)
# All daily, monthly & annual series in one plot
hydroplot(dat_xts, pfreq = "dma", var.type = "Temperature")
# Weekly average
dat_weekly <- apply.weekly(dat_xts, FUN = mean)
plot(dat_weekly)
# Monthly average
dat_monthly <- daily2monthly(dat_xts, FUN = mean, na.rm = TRUE)
plot.zoo(dat_monthly, xaxt = "n", xlab = "")
axis.Date(1, at = pretty(index(dat_monthly)),
labels = format(pretty(index(dat_monthly)), format = "%b-%Y"),
las = 1, cex.axis = 1.1)
# Seasonal average: need to specify the months
dat_seasonal <- dm2seasonal(dat_xts, season = "DJF", FUN = mean, na.rm = TRUE)
plot(dat_seasonal)
# Annual average
dat_annual <- daily2annual(dat_xts, FUN = mean, na.rm = TRUE)
plot(dat_annual)
Edit: using OP's data
df <- readr::read_csv2("Temp_2014_Hour.csv")
str(df)
# Convert DATE to Date object & put in a new column
df$date <- as.Date(df$DATE, format = "%d/%m/%Y")
dat <- df[, c("date", "VALUE")]
str(dat)
dat_xts <- xts(dat[, -1], order.by = dat$date)
Created on 2018-02-28 by the reprex package (v0.2.0).
I try this
first using read.table load the file
library(openair)
Temp <- read.table (file, header=TRUE, sep=";",stringsAsFactors = FALSE, dec = ",", na.strings = "NA")
tiempos <- Temp$HOUR
timestamps <- as.POSIXlt(as.POSIXct('1900-1-1', tz='UTC')
+ as.difftime(as.character(tiempos))
time <- format(timestamps, format='%H:%M:%S')
date<-paste(Temp[,3], time, sep=" ")
date
Temp_met <- cbind(date, CovTemp[-c(3,4)])
Temp_met$date <- as.POSIXct(strptime(Met_CovTemp$date,
format = "%d/%m/%Y %H:%M", "GMT"))
## daily mean
Temp_daily <- timeAverage(Met_CovTemp, avg.time = "day")
## weekly mean
Temp_week <- timeAverage(Met_CovTemp, avg.time = "week")
## monthly mean
Temp_month <- timeAverage(Met_CovTemp, avg.time = "month")
## annual mean
Temp_annual <- timeAverage(Met_CovTemp, avg.time = "year")
I am trying to get the total precipitation values for every hour from a personal weather station I have using the weatherData package. The problem I have is that the data is collected every five minutes and the values repeat themselves until there is a change in precipitation value. I have tried the 'duplicated' function but I get a large number of data removed when there is no precipitation which makes it hard for me to get a summary of the hourly precipitation.
Please see code below
## Load required libraries
library(weatherData)
library(ggplot2)
library(scales)
library(plyr)
library(reshape2)
library(gridExtra)
library(lubridate)
library(weathermetrics)
library(zoo)
# Get data for PWS using weatherData package
pws <- getWeatherForDate("IPENANGB2", "2014-09-01","2014-09-30", station_type = "id",opt_detailed=T, opt_custom_columns=T, custom_columns=c(1,2,6,7,10))
# Rename columns
colnames(pws)<-c("time","time1","tempc","wdd","wspd","prcp")
## Adding date columns
pws$time<-as.POSIXct(pws$time1,format="%Y-%m-%d %H:%M:%S",tz="Australia/Perth")
pws$year <- as.numeric(format(pws$time,"%Y"))
pws$date <-as.Date(pws$time,format="%Y-%m-%d",tz="Australia/Perth")
pws$year <- as.numeric(as.POSIXlt(pws$date)$year+1900)
pws$month <- as.numeric(as.POSIXlt(pws$date)$mon+1)
pws$monthf <- factor(pws$month,levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)
pws$weekday <- as.POSIXlt(pws$date)$wday
pws$weekdayf <- factor(pws$weekday,levels=rev(0:6),labels=rev(c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")),ordered=TRUE)
pws$yearmonth <- as.yearmon(pws$date)
pws$yearmonthf <- factor(pws$yearmonth)
pws$week <- as.numeric(format(as.Date(pws$date),"%W"))
pws$weekf<- factor(pws$week)
pws$jday<-yday(pws$date)
pws$hour <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%H"))
pws$min <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%M"))
# Remove duplicate values
pws.df <- pws[!duplicated(pws$prcp),]
Assuming you want to get hourly averages of tempc, wdd, wspd, prcp:
# used packages
library(weatherData)
library(lubridate)
library(dplyr)
library(stringr)
# read data
pws <- getWeatherForDate("IPENANGB2",
"2014-09-01",
"2014-09-30",
station_type = "id",
opt_detailed = T,
opt_custom_columns = T,
custom_columns = c(1, 2, 6, 7, 10))
# rename columns
colnames(pws) <- c("time", "time1", "tempc", "wdd", "wspd", "prcp")
# cleaning dataset and adding some columns
useful_pws <-
pws %>%
select(2:6) %>%
filter(!str_detect(time1, "<br>")) %>%
mutate(time1 = ymd_hms(time1),
year = year(time1),
month = month(time1),
day = day(time1),
hour = hour(time1)) %>%
tbl_df()
# summarising dataset
useful_pws %>%
select(-time1) %>%
group_by(year, month, day, hour) %>%
summarise(tempc = mean(tempc, na.rm = TRUE),
wdd = mean(wdd, na.rm = TRUE),
wspd = mean(wspd, na.rm = TRUE),
prcp = mean(prcp, na.rm = TRUE))