How to extract time series from a raster stack in R - r

I have created a RasterStack from NDVI layers of the MODIS data. Now i want to extract time series data from different locations of this data so that i can use BFAST/greenbrown package to estimate trend and breakpoints.
Here is how i have created the stack:
#runGdal(Job="testJob","MYD13Q1",begin = "2018.01.09", end = "2018.12.27",
# tileH = 26:29, tileV = 4:7
# , SDSstring = "1000000000000000000000")
###NDVI files path
NDVI_files_path <- "/media/MyData/Data/MODIS/PROCESSED/MYD13Q1.006_20190527193158"
all_NDVI_files <- list.files(NDVI_files_path,
full.names = TRUE,
pattern = ".tif$")
all_NDVI_files
### Raster Stack
NDVI_stack <- stack(all_NDVI_files)
How can i extract time series data for any specific area in Raster stack ?

You can use lubridate and rts to create a RasterStackTS object as follows:
#Load libraries
library(rts)
library(lubridate)
#Reproducible example (use your files here)
all_NDVI_files = c('MOD13Q1.006__250m_16_days_EVI_doy2000049_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000065_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000081_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000097_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000113_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000129_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000145_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000161_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000177_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000193_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000209_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000225_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000241_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000257_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000273_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000289_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000305_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000321_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000337_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000353_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001001_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001017_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001033_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001049_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001065_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001081_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001097_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001113_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001129_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001145_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001161_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001177_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001193_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001209_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001225_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001241_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001257_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001273_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001289_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001305_aid0001.tif')
#Depending on file format, extract dates (this example uses MODIS 16-day composite NDVI)
ndvi.time = data.frame(year=substr(basename(all_NDVI_files),34,37),
julD=substr(basename(all_NDVI_files),38,40))
ndvi.time$dateJ = paste(ndvi.time$year,ndvi.time$julD,sep='-')
ndvi.time$julD = parse_date_time(ndvi.time$dateJ,'y-j')
# create your RasterStackTS object
# Use your actual rasterstack here i.e. it's not reproducible with above code
ndvi.rts = rts(NDVI_stack ,ndvi.time$julD)
ndvi.rts
plot(ndvi.rts)
Hope this helps!

Related

Error in do.ply(i) : task 1 failed - "could not find function "%>%"" in R parallel programming

Every time I run the script it always gives me an error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution.
Please note: I have only 2 cores on my PC.
My code is as follows:
library(dplyr) # For basic data manipulation
library(ncdf4) # For creating NetCDF files
library(tidync) # For easily dealing with NetCDF data
library(ggplot2) # For visualising data
library(doParallel) # For parallel processing
MHW_res_grid <- readRDS("C:/Users/SUDHANSHU KUMAR/Desktop/MTech Project/R/MHW_result.Rds")
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 2)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")

Using R to process google earth engine data

I want to download the daily tmax from the NASA for a given lat lon (https://developers.google.com/earth-engine/datasets/catalog/NASA_NEX-DCP30_ENSEMBLE_STATS)
using the following tutorial https://jesjehle.github.io/earthEngineGrabR/index.html
library(devtools)
install_github('JesJehle/earthEngineGrabR')
library(earthEngineGrabR)
ee_grab_install() # had to install Anaconda before doing this step.
test_data <- ee_grab(data = ee_data_collection(datasetID = "NASA/NEX-DCP30_ENSEMBLE_STATS",
timeStart = "1980-01-01",
timeEnd = '1980-01-02',
bandSelection = 'tasmax'),
targetArea = system.file("data/territories.shp", package = "earthEngineGrabR")
)
Error: With the given product argument no valid data could be requested.
In addition: Warning message:
Error on Earth Engine servers for data product: NASA-NEX-DCP30_ENSEMBLE_STATS_s-mean_t-mean_1980-01-01to2005-12-31
Error in py_call_impl(callable, dots$args, dots$keywords): EEException: Collection.first: Error in map(ID=historical_195001):
Image.select: Pattern 'tasmax' did not match any bands.
I would like to know how to specify the bandwidth so that I do get this error and instead of using a shapefile as target area, I do I download tmax data for a single lat lon 9.55, 78.59?
You might use rgee to accomplish this. Currently, rgee has a function called rgee::ee_extract that works similar to raster::extract().
library(rgee)
library(sf)
# 1. Load a geometry
y <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) %>%
st_transform(4326)
## Move that geometry from local to earth engine
ee_y <- sf_as_ee(y)
# 2. Load your ImageCollection
x <- ee$ImageCollection("NASA/NEX-DCP30_ENSEMBLE_STATS")$
filterDate("1980-01-01","1980-01-02")$
map(function(img) img$select("tasmax_mean"))
## calculate the nominal scale
scale <- x$first()$projection()$nominalScale()$getInfo()
# 3. Extract values
tasmax_mean_data <- ee_extract(x = x,
y = y,
fun = ee$Reducer$mean(),
scale = scale,
id = "FIPS")
# 4. Merge results with the sf object
ee_nc_tasmax <- merge(y, tasmax_mean_data, by = "FIPS")
plot(ee_nc_rain['historical_198001'])

How to visulize the convolution layer and feature layer in mxnet after cnn was finished trained?

I want to plot or visualize the result of each layers out from a trained CNN with mxnet in R. Like w´those abstract art from what a nn's each layer can see.
But I don't know how. Please somebody help me. One way I can think out is to put the weights and bias back to every step and plot the step out. But when I try to put model$arg.params$convolution0_weight back to mx.symbol.Convolution(), I get
Error in mx.varg.symbol.Convolution(list(...)) :
./base.h:291: Unsupported parameter type object type for argument weight, expect integer, logical, or string.
Can anyone help me?
I thought out one way, but encounter a difficulty at one step. Here is what I did.
I found all the trained cnn's parameters inmodel$arg.params , and to compute with parameters we can use mx.nd... founctions as bellow:
`#convolution 1_result
conv1_result<- mxnet::mx.nd.Convolution(data=mx.nd.array(train_array),weight=model$arg.params$convolution0_weight,bias=model$arg.params$convolution0_bias,kernel=c(8,8),num_filter = 50)
str(conv1_result)
tanh1_result<-mx.nd.Activation(data= conv1_result, act_type = "sigmoid")
pool1_result <- mx.nd.Pooling(data = tanh1_result, pool_type = "avg", kernel = c(4,4), stride = c(4,4))
conv2 result
conv2_result<- mxnet::mx.nd.Convolution(data=pool1_result,weight=model$arg.params$convolution1_weight,bias=model$arg.params$convolution1_bias,kernel=c(5,5),num_filter = 50)
tanh2_result<-mx.nd.Activation(data= conv1_result, act_type = "sigmoid")
pool2_result <- mx.nd.Pooling(data = tanh1_result, pool_type = "avg", kernel = c(4,4), stride = c(4,4))
1st fully connected layer result
flat_result <- mx.nd.flatten(data = pool2_result)
fcl_1_result <- mx.nd.FullyConnected(data = flat_result,weight = model$arg.params$fullyconnected0_weight,bias = model$arg.params$fullyconnected0_bias, num_hidden = 500)
tanh_3_result <- mx.nd.Activation(data = fcl_1_result, act_type = "tanh")
2nd fully connected layer result
fcl_2_result <- mx.nd.FullyConnected(data = tanh_3,weight = model$arg.params$fullyconnected1_weight,bias = model$arg.params$fullyconnected1_bias, num_hidden =100)`
but when I came to mx.nd.FullyConnected() step , I encountered not sufficient memory(i have 16 GB RAM) and R crashed.
So, does anyone know how to batch_size the input data in
mx.nd.FullyConnected(), or any method to make mx.nd.FullyConnected() run successfully as mx.model.FeedForward.create()
did?
Here is the code that can help you to achieve what you want. The code below displays activations of 2 convolution layers of LeNet. The code gets as an input MNIST dataset, which is 28x28 grayscale images (downloaded automatically), and produces images as activations.
You can grab outputs from executor. To see the list of available outputs use names(executor$ref.outputs)
The result of each output is available as a matrix with values in [-1; 1] range. The dimensions of the matrix depends on parameters of the layer. The code use these matrices to display as greyscaled images where -1 is white pixel, 1 - black pixel. (most of the code is taken from https://github.com/apache/incubator-mxnet/issues/1152 and massaged a little bit)
The code is a self sufficient to run, but I have noticed that if I build the model second time in the same R session, the names of ouputs get different indices, and later the code fails because the expected names of outputs are hard coded. So if you decide to create a model more than once, you will need to restart R session.
Hope it helps and you can adjust this example to your case.
library(mxnet)
download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip', destfile = 'mnist_csv.zip')
unzip('mnist_csv.zip', exdir = '.')
train <- read.csv('train.csv', header=TRUE)
data.x <- train[,-1]
data.x <- data.x/255
data.y <- train[,1]
val_ind = 1:100
train.x <- data.x[-val_ind,]
train.x <- t(data.matrix(train.x))
train.y <- data.y[-val_ind]
val.x <- data.x[val_ind,]
val.x <- t(data.matrix(val.x))
val.y <- data.y[val_ind]
train.array <- train.x
dim(train.array) <- c(28, 28, 1, ncol(train.x))
val.array <- val.x
dim(val.array) <- c(28, 28, 1, ncol(val.x))
# input layer
data <- mx.symbol.Variable('data')
# first convolutional layer
convLayer1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=30)
convAct1 <- mx.symbol.Activation(data=convLayer1, act_type="tanh")
poolLayer1 <- mx.symbol.Pooling(data=convAct1, pool_type="max", kernel=c(2,2), stride=c(2,2))
# second convolutional layer
convLayer2 <- mx.symbol.Convolution(data=poolLayer1, kernel=c(5,5), num_filter=60)
convAct2 <- mx.symbol.Activation(data=convLayer2, act_type="tanh")
poolLayer2 <- mx.symbol.Pooling(data=convAct2, pool_type="max",
kernel=c(2,2), stride=c(2,2))
# big hidden layer
flattenData <- mx.symbol.Flatten(data=poolLayer2)
hiddenLayer <- mx.symbol.FullyConnected(flattenData, num_hidden=500)
hiddenAct <- mx.symbol.Activation(hiddenLayer, act_type="tanh")
# softmax output layer
outLayer <- mx.symbol.FullyConnected(hiddenAct, num_hidden=10)
LeNet1 <- mx.symbol.SoftmaxOutput(outLayer)
# Group some output layers for visual analysis
out <- mx.symbol.Group(c(convAct1, poolLayer1, convAct2, poolLayer2, LeNet1))
# Create an executor
executor <- mx.simple.bind(symbol=out, data=dim(val.array), ctx=mx.cpu())
# Prepare for training the model
mx.set.seed(0)
# Set a logger to keep track of callback data
logger <- mx.metric.logger$new()
# Using cpu by default, but set gpu if your machine has a supported one
devices=mx.cpu(0)
# Train model
model <- mx.model.FeedForward.create(LeNet1, X=train.array, y=train.y,
eval.data=list(data=val.array, label=val.y),
ctx=devices,
num.round=1,
array.batch.size=100,
learning.rate=0.05,
momentum=0.9,
wd=0.00001,
eval.metric=mx.metric.accuracy,
epoch.end.callback=mx.callback.log.train.metric(100, logger))
# Update parameters
mx.exec.update.arg.arrays(executor, model$arg.params, match.name=TRUE)
mx.exec.update.aux.arrays(executor, model$aux.params, match.name=TRUE)
# Select data to use
mx.exec.update.arg.arrays(executor, list(data=mx.nd.array(val.array)), match.name=TRUE)
# Do a forward pass with the current parameters and data
mx.exec.forward(executor, is.train=FALSE)
# List of outputs available.
names(executor$ref.outputs)
# Plot the filters of a sample from validation set
sample_index <- 99 # sample number in validation set. Change it to if you want to see other samples
activation0_filter_count <- 30 # number of filters of the "convLayer1" layer
par(mfrow=c(6,5), mar=c(0.1,0.1,0.1,0.1)) # number of rows x columns in output
dim(executor$ref.outputs$activation0_output)
for (i in 1:activation0_filter_count) {
outputData <- as.array(executor$ref.outputs$activation0_output)[,,i,sample_index]
image(outputData,
xaxt='n', yaxt='n',
col=gray(seq(1,0,-0.1)))
}
activation1_filter_count <- 60 # number of filters of the "convLayer2" layer
dim(executor$ref.outputs$activation1_output)
par(mfrow=c(6,10), mar=c(0.1,0.1,0.1,0.1)) # number of rows x columns in output
for (i in 1:activation1_filter_count) {
outputData <- as.array(executor$ref.outputs$activation1_output)[,,i,sample_index]
image(outputData,
xaxt='n', yaxt='n',
col=gray(seq(1,0,-0.1)))
}
As a result you should see the following images for a validation sample #2 (use RStudio left and right arrows to navigate between them).

getSymbols downloading multiple currency pairs from oanda in a single data frame

I am currently downloading currency pairs from Oanda using getSymbols and here is my code:
Currency.Pairs <- as_tibble(c("EUR/USD", "USD/JPY", "GBP/USD", "EUR/GBP"))
colnames(Currency.Pairs) <- "Pairs"
getSymbols(Currency.Pairs$Pairs, src = "oanda", from = "2012-01-01", to = "2016-12-20")
output:
[1] "EURUSD" "USDJPY" "GBPUSD" "EURGBP"
Is there a way that I can download all the prices of currency prices in one single list or data frame, instead of dataframes for each currency pair? Thanks in advance!
The env (environment) parameter of getSymbols allows creation of a secure location to store
and manipulate the price data as below:
#tibble library is not required (tibble::as_tibble)
Currency.Pairs <- c("EUR/USD", "USD/JPY", "GBP/USD", "EUR/GBP")
#create new environment to house all price data
newEnv = new.env()
#source data
getSymbols(Currency.Pairs, src = "oanda",env=newEnv, from = "2012-01-01", to = "2016-12-20")
#merge all price series
allTimeSeries = do.call(merge.xts,as.list(newEnv))
#convert column names to original form EUR.USD => EUR/USD
colnames(allTimeSeries) = gsub("[.]","\\/",colnames(allTimeSeries))
#reorder columns as per original order
allTimeSeries = allTimeSeries[,Currency.Pairs]
head(allTimeSeries)
# EUR/USD USD/JPY GBP/USD EUR/GBP
#2012-01-01 1.29590 76.8892 1.55384 0.833490
#2012-01-02 1.29375 76.9584 1.55049 0.834425
#2012-01-03 1.30038 76.7861 1.55779 0.834755
#2012-01-04 1.30036 76.6969 1.56310 0.831920
#2012-01-05 1.28717 76.8753 1.55586 0.827300
#2012-01-06 1.27698 77.1381 1.54737 0.825255
tail(allTimeSeries)
# EUR/USD USD/JPY GBP/USD EUR/GBP
#2016-12-15 1.04682 117.772 1.25015 0.837350
#2016-12-16 1.04349 118.106 1.24366 0.839055
#2016-12-17 1.04510 117.915 1.24884 0.836935
#2016-12-18 1.04512 117.913 1.24887 0.836930
#2016-12-19 1.04458 117.339 1.24500 0.839030
#2016-12-20 1.03906 117.695 1.23718 0.839870

R HTS package: combinef and aggts not working with gts object

I'm trying to apply the combinef and aggts functions from the R hts package to a time series matrix in order to obtain an optimized set of forecasts across a hierarchy. I've run the same code every month without issue, and am now seeing errors after upgrading to hts package v4.5.
Reproducible example (I can share data file offline if needed)
#Read in forecast data for all levels of hierarchy#
fcast<-read.csv("SampleHierarchyForecast.csv", header = TRUE, check.names = FALSE)
#Convert to time series#
fcast<-ts(fcast, start = as.numeric(2010.25) + (64)/12, end = as.numeric(2010.25) + (75)/12, f= 12)
#Create time series of only the bottom level of the hierarchy#
index<-c()
fcastBottom<-fcast
for (i in 1:length(fcastBottom [1,]))
{
if(nchar(colnames(fcastBottom)[i])!=28)
index[i]<-i
else
index[i]<-0
}
fcastBottom<-fcastBottom[,-index]
#Create grouped time series from the bottom level forecast #
GtsForecast <- gts(fcastBottom, characters = list(c(12,12), c(4)), gnames = c("Category", "Item", "Customer", "Category-Customer"))
#Use combinef function to optimally combine the full hierarchy forecast using the groups from the full hierarchy gts#
combo <- combinef(fcast, groups = GtsForecast$groups)
*Warning message:
In mapply(rep, as.list(gnames), times, SIMPLIFY = FALSE) :
longer argument not a multiple of length of shorter*
traceback()
2: stop("Argument fcasts requires all the forecasts.")
1: combinef(fcast, groups = GtsForecast$groups)
There's a little bug when comebinef() function calls gts(). Now I've fixed it on github. So you can run your own code above without any trouble after updating the development version.
Alternatively, you need to tweak your code a bit if you don't want to install the newest version.
combo <- combinef(fcast, groups = GtsForecast$groups, keep = "bottom")
combo <- ts(combo, start = as.numeric(2010.25) + (64)/12,
end = as.numeric(2010.25) + (75)/12, f = 12)
colnames(combo) <- colnames(fcastBottom)
newGtsForecast <- gts(combo, characters = list(c(12,12), c(4)),
gnames = c("Category", "Item", "Customer",
"Category-Customer"))
Aggregate <- aggts(newGtsForecast)
Hope it helps.

Resources