solaR timestamp for radiation on a tilted surface - r

I am trying to use the R package solaR to calculate irradiance on a tilted plane given measured irradiance on the horizontal plane. I can get the code to work, but the final output timestamp does not make sense.
Data for this code can be found here. It is one day's worth of measured irradiance (global horizontal -- ghz, direct normal -- dir, diffuse horizontal -- dhz, and outdoor temp ta) for Austin, TX. The timestamp is local 'CST6CDT' time. The data is for a clear day, so that maximum value of global horizontal (ghz) should roughly correspond with solar noon (the time that the sun crosses the local meridian).
My code is as follows:
library(solaR)
sol_data <- read.csv(file)
# The data must be named a certain way.
names(sol_data) <- c('time', 'G0', 'B', 'D0', 'Ta')
# The negatives are an artifact of the sensor and are set to 0.
sol_data$G0 <- ifelse(sol_data$G0 < 0, 0, sol_data$G0)
sol_data$B <- ifelse(sol_data$B < 0, 0, sol_data$B)
sol_data$D0 <- ifelse(sol_data$D0 < 0, 0, sol_data$D0)
# This calculates the beam incidence on the horizontal plane.
sol_data$B0 <- sol_data$G0 - sol_data$D0
sol_data$B0 <- ifelse(sol_data$B0 < 0, 0, sol_data$B0)
# This takes the data and assigns the timestamp to a certain format and timezone
idxLocal <- with(sol_data, as.POSIXct(time, format='%Y-%m-%d %H:%M:%S', tz = 'CST6CDT'))
# This converts the timestamp to solar time
idx <- local2Solar(idxLocal, lon = -97.7428)
# Creates a zoo object needed to make the Meteo file for input
z <- zoo(sol_data[,c('G0', 'D0', 'B0', 'Ta')], idx)
# local latitude
lat = 30.2669
# Creates a Meteo file
My_Meteo <- zoo2Meteo(z, lat=lat)
# Finds the start and end date of the input file
start <- idx[1]
end <- idx[length(idx)]
# Returns a base time for the calculations
BTd <- fBTd(mode = 'serie', year = '2013', start = start, end = end, format = '%Y-%m-%d %H:%M:%S')
# Computes the movement of the sun/earth
sol <- calcSol(lat = 30.2669, BTd, sample = 'min')
# Creates a G0 file for solar rad on horizontal surface
compI <- calcG0(30.2669, modeRad = 'bdI', dataRad = My_Meteo, corr = 'none')
# creates the angles for calculation of the rad on a tilted surface
angGen <- fTheta(sol = sol, beta = 0, alfa = 0)
# Calculates the irradiance on a tilted surface
irad_tilt <- fInclin(compI, angGen)
When I use beta = 0, alfa = 0 (a flat plane) I should get roughly the same output as my input. However, when I search for the max value of global horizontal irradiance:
x <- which.max(irad_tilt$G)
irad_tilt[x,]
I get it to return a max at 2013-05-05 10:43:01 and I cannot figure out what/why this time is as it is. It is not local time, that should be around 13:24. Local solar time should be around 12:00. UTC time should be around 18:24, and UTC solar time (if there is such a thing) should be 17:00...
I know this is obscure, but any thoughts?

I have tested the code and data in my computer with correct
results. Let's reproduce the main steps with some graphical
outputs:
library(solaR)
sol_data <- read.csv('/tmp/one_day_WSL_8.csv')
## The data must be named a certain way.
names(sol_data) <- c('time', 'G0', 'B', 'D0', 'Ta')
## The negatives are an artifact of the sensor and are set to 0.
sol_data$G0 <- ifelse(sol_data$G0 < 0, 0, sol_data$G0)
sol_data$B <- ifelse(sol_data$B < 0, 0, sol_data$B)
sol_data$D0 <- ifelse(sol_data$D0 < 0, 0, sol_data$D0)
## This calculates the beam incidence on the horizontal plane.
sol_data$B0 <- sol_data$G0 - sol_data$D0
sol_data$B0 <- ifelse(sol_data$B0 < 0, 0, sol_data$B0)
## This takes the data and assigns the timestamp to a certain format and timezone
idxLocal <- with(sol_data, as.POSIXct(time, format='%Y-%m-%d %H:%M:%S', tz = 'CST6CDT'))
The function local2Solar converts the time zone of a POSIXct object to the mean solar time and set its time zone to UTC as a synonym of mean solar time. It includes two corrections: the difference of longitudes between the location and the time zone, and the daylight saving time.
idx <- local2Solar(idxLocal, lon = -97.7428)
## Creates a zoo object needed to make the Meteo file for input
z <- zoo(sol_data[,c('G0', 'D0', 'B0', 'Ta')], idx)
Because your data belongs to a clear day and this time series uses
mean solar time, the maximum should be located around noon.
xyplot(z, type=c('l', 'g'))
Now we compute the sun geometry with calcSol. Here I am using a
different code from yours.
## local latitude
lat = 30.2669
## Computes the movement of the sun/earth
sol <- calcSol(lat, BTi=idx)
xyplot(as.zooI(sol), type=c('l', 'g'))
Next we calculate radiation on the horizontal surface.
g0 <- calcG0(lat, modeRad = 'bdI', dataRad = z, corr = 'none')
xyplot(as.zooI(g0), type=c('l', 'g'))
Finally, with calcGef we obtain irradiance on a tilted surface:
gef <- calcGef(lat=lat, modeRad='bdI', dataRad=z)
xyplot(as.zooI(gef), type=c('l', 'g'))
I suspect that your problem is related with time zones defined in
your computer. Could you check these results?:
lonHH('America/Chicago')
## [1] -1.570796
lonHH('CDT6CST')
## [1] -1.570796
idxLocal1 <- as.POSIXct(sol_data$time, format='%Y-%m-%d %H:%M:%S', tz = 'CST6CDT')
idxLocal2 <- as.POSIXct(sol_data$time, format='%Y-%m-%d %H:%M:%S', tz = 'America/Chicago')
idxUTC1 <- as.POSIXct(format(idxLocal1, tz='UTC'), tz='UTC')
idxUTC2 <- as.POSIXct(format(idxLocal2, tz='UTC'), tz='UTC')
all.equal(idxUTC1, idxUTC2)
## [1] TRUE
Maybe these technical notes are useful for additional information on
this topic:
Ripley, B. D. and Hornik, K. (2001) Date-time classes. R News, 1/2, 8–11.
Gabor Grothendieck and Thomas Petzoldt (2004), Date and Time Classes in
R, R News 4(1), 29-32.
Besides, you should take a look at the information and examples of help(timezone).

thank you greatly for responding directly and for the great package. It turns out we had a wildly wrong interpretation of solar time. I am seeing a different possible issue that would not fit into the comments section.
When I run:
local2Solar(as.POSIXct("2013-07-07 13:36:00",tz="America/Chicago"),lon=-97.7428)
I get "2013-07-07 12:05:01 UTC". According to NOAA, "2013-07-07 13:36:00" is solar noon for that day.
Just to confuse matter, when I run:
local2Solar(as.POSIXct("2013-06-07 13:30:00",tz="America/Chicago"),lon=-97.7428)
I get "2013-06-07 11:59:01 UTC", so it appears to be very close. According to NOAA, "2013-06-07 13:30:00" is solar noon for that day.
If you were to run:
local2Solar(as.POSIXct("2013-01-07 12:37:27",tz="America/Chicago"),lon=-97.7428)
You would get "2013-01-07 12:06:28 UTC". According to NOAA, "2013-01-07 12:37:27"" is solar noon for that day.
I ran G. Master's equations separately from solaR and got: "2013-06-07 13:29:30 CDT" (the highest precision is each minute for this version) for the time with maximum incident power for the first case on "2013-06-07".

Related

Displace GPS coordinates of sf point features by vector of offsets in R

Okay, so I'm building on a previous question. I've got an original set of data, and I'm following a specific protocol (that I need to follow for IRB reasons) to offset these randomly.
Each point has it's own randomly generated offset. For reference, urban points are offset by 0-2km, rural are offset by 0-5km with randomly selected 1% offset by 0-10km.
Here is my code so far, I'm using a projected coordinate reference system
displaceData <- function(sfDataset) {
dataset.sf <- sfDataset
# 2. Generate a random direction by generating angle between 0 and 360, and converting the angle from degrees to radians.
dataset.sf$angle_rad <- deg2rad(runif(nrow(dataset.sf), 0, 360))
# 3. Generate a random distance in meters of 0-5,000 meters for Rural points with 1% of rural points being given 0-10,000 meter distance.
# get number of rural clusters
nRuralClusters <- sum(dataset.sf$URBAN_RURA == "R")
# Split urban and rural
dataset_R.sf <- subset(dataset.sf, URBAN_RURA == "R")
dataset_U.sf <- subset(dataset.sf, URBAN_RURA == "U")
# For rural, assign 5000 meter displacement with 1% randomly assigned up to 10000 meter displacement
dataset_R.sf$m_displaced <- ifelse({runif(nRuralClusters) |> rank(ties.method = "random") <= floor(nRuralClusters*0.01)}, 10000, 5000)
# For urban, assign 2000 meter displacement
dataset_U.sf$m_displaced <- 2000
# Combine them back together
dataset_C.sf <- rbind(dataset_R.sf, dataset_U.sf)
dataset_C.sf$random_meters <- runif(nrow(dataset_C.sf), min = 0, max = dataset_C.sf$m_displaced)
# 4. Generate the offset by applying trigonometry formulas (law of cosines) using the distance as the hypotenuse and the radians calculated in step 2.
dataset_C.sf$xOffset <- sin(dataset_C.sf$angle_rad)*dataset_C.sf$random_meters
dataset_C.sf$yOffset <- cos(dataset_C.sf$angle_rad)*dataset_C.sf$random_meters
# 5. Add the offset to the original coordinate (in meters) to return the displaced coordinates.
st_geometry(dataset_C.sf) <- st_geometry(dataset_C.sf) + cbind(dataset_C.sf$xOffset, dataset_C.sf$yOffset)
#st_geometry(dataset_C.sf) <- st_sfc(st_point(c(st_coordinates(dataset_C.sf$geometry)[,1] + dataset_C.sf$xOffset, st_coordinates(dataset_C.sf$geometry)[,2] + dataset_C.sf$yOffset)))
return(dataset_C.sf)
}
Step 5 is where I'm getting hung up. I'm having trouble shifting the coordinates by the displacements.
I answered my own question after a while.
displaceData <- function(sfDataset) {
dataset.sf <- htDHS_t2.sf
# dataset.sf <- sfDataset
# 2. Generate a random direction by generating angle between 0 and 360, and converting the angle from degrees to radians.
dataset.sf$angle_rad <- deg2rad(runif(nrow(dataset.sf), 0, 360))
# 3. Generate a random distance in meters of 0-5,000 meters for Rural points with 1% of rural points being given 0-10,000 meter distance.
# get number of rural clusters
nRuralClusters <- sum(dataset.sf$URBAN_RURA == "R")
# Split urban and rural
dataset_R.sf <- subset(dataset.sf, URBAN_RURA == "R")
dataset_U.sf <- subset(dataset.sf, URBAN_RURA == "U")
# For rural, assign 5000 meter displacement with 1% randomly assigned up to 10000 meter displacement
dataset_R.sf$m_displaced <- ifelse({runif(nRuralClusters) |> rank(ties.method = "random") <= floor(nRuralClusters*0.01)}, 10000, 5000)
# For urban, assign 2000 meter displacement
dataset_U.sf$m_displaced <- 2000
# Combine them back together
dataset_C.sf <- rbind(dataset_R.sf, dataset_U.sf)
dataset_C.sf$random_meters <- runif(nrow(dataset_C.sf), min = 0, max = dataset_C.sf$m_displaced)
# 4. Generate the offset by applying trigonometry formulas (law of cosines) using the distance as the hypotenuse and the radians calculated in step 2.
dataset_C.sf$xOffset <- sin(dataset_C.sf$angle_rad)*dataset_C.sf$random_meters
dataset_C.sf$yOffset <- cos(dataset_C.sf$angle_rad)*dataset_C.sf$random_meters
# 5. Add the offset to the original coordinate (in meters) to return the displaced coordinates.
dataset_C.sf$newX <- st_coordinates(dataset_C.sf)[,1] + dataset_C.sf$xOffset
dataset_C.sf$newY <- st_coordinates(dataset_C.sf)[,2] + dataset_C.sf$yOffset
# Remove geometry
dataset_C <- st_set_geometry(dataset_C.sf, NULL)
dataset_f.sf <- st_as_sf(dataset_C, coords = c("newX", "newY"), crs = st_crs(32618))
#st_geometry(dataset_C.sf) <- st_sfc(st_point(c(st_coordinates(dataset_C.sf$geometry)[,1] + dataset_C.sf$xOffset, st_coordinates(dataset_C.sf$geometry)[,2] + dataset_C.sf$yOffset)))
return(dataset_f.sf)
}
Basically, I removed the geometry, and created a new geometry object using the updated points. Make sure to use the original CRS!

Calculate average of a subset of a vector only when subset values meet a condition in R?

I have a daily curve x and I am trying to approximate the average peak and offpeak values of x:
https://ibb.co/Fq1Byzk
I have defined a delta threshold such that when delta is below the threshold value, x will be in the offpeak or peak period. I want to get the average peak value where the average is only of values within x where the delta < threshold. Right now it is averaging out the outliers as well.
delta <- matrix(0,24,ncol=1)
for (i in 2:24){
# i-th element is the i-th hour per day
delta[i] = x[i,2]-x[i-1,2]
}
# Find hour at which max and min daily values occur
max_threshold = 0.15*max(delta)
min_threshold = 0.15*min(delta)
c <- abs(delta) < max_threshold
t1 <- which(delta>max_threshold)[1]-1 # t1: time index at end of off-peak
t2 <- which.max(delta) + 1 # t2 is time of initial peak
t3 <- which.min(delta)-2 # t3 is time of end peak
t4 <- which.min(delta) # t4 time index of evening off-peak
am <- mean(x[1:t1,2]) # average morning off-peak value
peak <- mean(x[t2:t3,2]) #average peak value
pm <- mean(x[t4:24,2]) # average evening off-peak value
> dput(x)
structure(list(time = structure(c(1451952000, 1451955600, 1451959200,
1451962800, 1451966400, 1451970000, 1451973600, 1451977200, 1451980800,
1451984400, 1451988000, 1451991600, 1451995200, 1451998800, 1452002400,
1452006000, 1452009600, 1452013200, 1452016800, 1452020400, 1452024000,
1452027600, 1452031200, 1452034800, 1452038400, 1452042000, 1452045600,
1452049200, 1452052800, 1452056400, 1452060000, 1452063600, 1452067200,
1452070800, 1452074400, 1452078000, 1452081600, 1452085200, 1452088800,
1452092400, 1452096000, 1452099600, 1452103200, 1452106800, 1452110400,
1452114000, 1452117600, 1452121200), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Crow_education_Omer = c(0.019186330898848,
0.0192706664192825, 0.0182164724138513, 0.018174304653634, 0.019355001939717,
0.0197345117816722, 0.023951287803397, 0.0323848398468467, 0.0343245568168401,
0.0378244809148717, 0.0393003525224754, 0.0403545465279066, 0.0405232175687756,
0.0393425202826927, 0.0398907011655169, 0.0377401453944372, 0.0344932278577091,
0.0317101556833707, 0.0304872906370705, 0.0297282709531601, 0.0287584124681633,
0.0252584883701317, 0.0196080085010205, 0.0197345117816722, 0.0194815052203687,
0.0196080085010205, 0.0184273112149375, 0.0184694789751548, 0.0191441631386307,
0.019692344021455, 0.025469327171218, 0.0352522475416196, 0.0376136421137855,
0.0403967142881239, 0.0435592963044175, 0.0433484575033313, 0.0430532831818105,
0.042968947661376, 0.043306289743114, 0.044655658070066, 0.0424207667785518,
0.0416195793344241, 0.0382883262772615, 0.03769797763422, 0.0330173562501054,
0.0281680638251219, 0.0234452746807901, 0.0225597517162278)), row.names = 97:144, class = "data.frame")
Also, how would I be able to ggplot both the new simplified curve along with the original curve x on the same graph? I can't seem to melt or rbind() the new curve with reduced number of data points with x since my time column is POSIXCT.
Thanks.
This is just a partial solution, since it breaks down for the second day. I named the data.frame df instead of x.
library(ggplot2)
library(dplyr)
library(lubridate)
df_obj <- df %>%
group_by(day = day(time)) %>% # group by days
filter(day == 5) %>% # filter for day 5
mutate(
delta_rev = Crow_education_Omer - lag(
Crow_education_Omer,
default = first(Crow_education_Omer)
), # delta between day n and n-1
delta_for = lead(
Crow_education_Omer,
default = last(Crow_education_Omer)
) - Crow_education_Omer, # delta between day n-1 and n
max_tresh = 0.15 * max(delta_rev)
) %>%
group_by(grp = 1 - (abs(delta_rev) < 0.15 * max(delta_rev) | abs(delta_for) < 0.15 * max(delta_for)),
grp2 = cumsum(grp != lag(grp, default = 0))
) %>%
mutate(
average = mean(Crow_education_Omer) *
(1 - grp) *
(abs(first(Crow_education_Omer) - last(Crow_education_Omer)) < max_tresh)
)
First we need to modify your existing data.frame to build up your averages. Based on this calculation, we use ggplot2 for plotting:
df_obj %>%
ggplot(aes(x = time, y = Crow_education_Omer)) +
geom_point() +
geom_line(aes(color = "sample")) +
geom_line(data = df_obj[df_obj$average != 0, ], aes(x = time, y = average, color = "average")) +
xlab("Time") +
ylab("Value")
returns
But for day 6 this doesn't work as expected: Changing to filter(day == 6) and plotting again returns
which isn't the expected result. Changing the threshold value to 0.33 * max(delta) and plotting again creates
So, perhaps you can build up on this code to create a correct and working solution. Good luck!
A few explanations:
We build up delta_rev and delta_for. delta_rev equals your delta, so for a given row/data point i we calculate df[i,2] - df[i-1,2].
delta_for changes this, now we calculate df[i + 1,2] - df[i,2] for a given i. My idea here is: Using both, delta_rev and delta_for allows us to look at the preceeding and succeeding points. This gives us more information about the neighbours of a given point and is useful to determine if the point belongs to a group (am, peak, pm).
The group_by-function tries to build up the groups based on the treshhold. grp checks, if a data point is < 0.15 max(delta), grp2 creates a unique grouping number.
There are a few issues:
Based on this algorithm, there can be more than three groups.
The group_by finds another group between 15:00 and 20:00, we filter it out (that's the (abs(first(Crow_education_Omer) - last(Crow_education_Omer)) < max_tresh)-part). I'm not sure, if this is a good solution.
As stated above, this doesn't return a reasonable plot for day 6. Perhaps geom_point's df_obj[df_obj$average != 0, ]-part causes this.

Calculating distance of multiple coordinates in R

As written in the title I want to calculate the distance from all gas stations in my hometown to the two motorway accesses around here using the package osrm.
stations_ms contains Latitude & Longitude for the gas stations and highway_ms.df contains the lat & long for the motorway accesses.
Calculating the distance for just one row of my dataset is no problem, but I am not able to create a loop/function, which does it for every row.
Here is my code:
route4 <- osrmRoute(src = c(stations_ms$longitude[1], stations_ms$latitude[1]),
dst = highway_ms.df[1,],
overview = "FALSE")
for (i in 1:nrow(stations_ms)) {
route[i] <- osrmRoute(src = c(stations_ms$longitude[i], stations_ms$latitude[i]),
dst = highway_ms.df[1,],
overwiew = "FALSE")
}
```
Maybe someone can help me :)
Here is a workable example that might be helpful.
The overview in osrmRoute has the following options:
"full", "simplified" or FALSE. Use "full" to return the detailed
geometry, use "simplified" to return a simplified geometry, use FALSE
to return only time and distance.
If you only want time and distance, using FALSE should work fine. My comment was in regard to spelling (had a "w" instead of a "v").
I made up some example data:
my_points <- data.frame(
id = 1:3,
longitude = c(13.4, 13.5, 13.3),
latitude = c(52.4, 52.5, 52.3)
)
And wanted to find distances to a pharmacy in Berlin (using apotheke.df that comes with the osrm package). You could do:
library(osrm)
route <- list()
for (i in 1:nrow(my_points)) {
route[[i]] <- osrmRoute(src = c(my_points$longitude[i], my_points$latitude[i]),
dst = apotheke.df[1,],
overview = FALSE)
}
This starts with an empty list called route. Then, we fill in each list element with both time and duration. The end result is the following list:
R> route
[[1]]
duration distance
20.56 11.77
[[2]]
duration distance
17.38 7.63
[[3]]
duration distance
33.12 27.45
Which can be converted to a matrix or data frame (in this case, I made a matrix):
R> do.call(rbind, route)
duration distance
[1,] 20.56 11.77
[2,] 17.38 7.63
[3,] 33.12 27.45

Mapping slope of an area and returning percent above and below a threshold in R

I am trying to figure our the proportion of an area that has a slope of 0, +/- 5 degrees. Another way of saying it is anything above 5 degrees and below 5 degrees are bad. I am trying to find the actual number, and a graphic.
To achieve this I turned to R and using the Raster package.
Let's use a generic country, in this case, the Philippines
{list.of.packages <- c("sp","raster","rasterVis","maptools","rgeos")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)}
library(sp) # classes for spatial data
library(raster) # grids, rasters
library(rasterVis) # raster visualisation
library(maptools)
library(rgeos)
Now let's get the altitude information and plot the slopes.
elevation <- getData("alt", country = "PHL")
x <- terrain(elevation, opt = c("slope", "aspect"), unit = "degrees")
plot(x$slope)
Not very helpful due to the scale, so let's simply look at the Island of Palawan
e <- drawExtent(show=TRUE) #to crop out Palawan (it's the long skinny island that is roughly midway on the left and is oriented between 2 and 8 O'clock)
gewataSub <- crop(x,e)
plot(gewataSub, 1)## Now visualize the new cropped object
A little bit better to visualize. I get a sense of the magnitude of the slopes and that with a 5 degree restriction, I am mostly confined to the coast. But I need a little bit more for analysis.
I would like Results to be something to be in two parts:
1. " 35 % (made up) of the selected area has a slope exceeding +/- 5 degrees" or " 65 % of the selected area is within +/- 5 degrees". (with the code to get it)
2. A picture where everything within +/- 5 degrees is one color, call it good or green, and everything else is in another color, call it bad or red.
Thanks
There are no negative slopes, so I assume you want those that are less than 5 degrees
library(raster)
elevation <- getData('alt', country='CHE')
x <- terrain(elevation, opt='slope', unit='degrees')
z <- x <= 5
Now you can count cells with freq
f <- freq(z)
If you have a planar coordinate reference system (that is, with units in meters or similar) you can do
f <- cbind(f, area=f[,2] * prod(res(z)))
to get areas. But for lon/lat data, you would need to correct for different sized cells and do
a <- area(z)
zonal(a, z, fun=sum)
And there are different ways to plot, but the most basic one
plot(z)
You can use reclassify from the raster package to achieve that. The function assigns each cell value that lies within a defined interval a certain value. For example, you can assign cell values within interval (0,5] to value 0 and cell values within the interval (5, maxSlope] to value 1.
library(raster)
library(rasterVis)
elevation <- getData("alt", country = "PHL")
x <- terrain(elevation, opt = c("slope", "aspect"), unit = "degrees")
plot(x$slope)
e <- drawExtent(show = TRUE)
gewataSub <- crop(x, e)
plot(gewataSub$slope, 1)
m <- c(0, 5, 0, 5, maxValue(gewataSub$slope), 1)
rclmat <- matrix(m, ncol = 3, byrow = TRUE)
rc <- reclassify(gewataSub$slope, rclmat)
levelplot(
rc,
margin = F,
col.regions = c("wheat", "gray"),
colorkey = list(at = c(0, 1, 2), labels = list(at = c(0.5, 1.5), labels = c("<= 5", "> 5")))
)
After the reclassification you can calculate the percentages:
length(rc[rc == 0]) / (length(rc[rc == 0]) + length(rc[rc == 1])) # <= 5 degrees
[1] 0.6628788
length(rc[rc == 1]) / (length(rc[rc == 0]) + length(rc[rc == 1])) # > 5 degrees
[1] 0.3371212

Trying to program trading signals in R

I new new to R and am trying to program a pair trading strategy in R.
I have already written the code for downloading the data. And have created additional columns and prepared the data. Now i need to calculate the trading signals.
My signal rules are as follows.
- If Z-Score is greater than 2.25 , Sell the pair; Buy back when Z-Score is less than 0.25.
- If Z-Score is less than -2.25 , Buy the pair; sell (Exit) when z-score is above -0.25.
- close any open position if there is a change in signal.
When we sell a pair, we sell the first stock and buy the second stock. In this case, we sell ACC and Buy Ambujacem.
When we buy a pair, we buy the first stock and sell the second stock. In this case, we buy ACC and Sell Ambujacem.
Could anyone help me with the coding for the trading signals.
Enclosing the code.
Regards,
Subash
# Trading Code
library(quantmod)
getSymbols("ACC.NS", from=as.Date('2007-01-01'), to=as.Date('2015-07-24'))
getSymbols("AMBUJACEM.NS", from=as.Date('2007-01-01'), to=as.Date('2015-07-24'))
acc=ACC.NS[,6]
amb=AMBUJACEM.NS[,6]
t.zoo <- merge(acc, amb, all=TRUE)
t.zoo=as.data.frame(t.zoo)
typeof(t.zoo)
t.zoo=na.omit(t.zoo)
#adding columns
t.zoo$spread <- 0
t.zoo$adfTest <- 0
t.zoo$mean <- 0
t.zoo$stdev <- 0
t.zoo$zScore <- 0
t.zoo$signal <- 0
t.zoo$BuyPrice <- 0
t.zoo$SellPrice <- 0
t.zoo$LongReturn <- 0
t.zoo$ShortReturn <- 0
t.zoo$Slippage <- 0
t.zoo$TotalReturn <- 0
#preparing the data
#Calculating the pair ratio
t.zoo$pairRatio <- t.zoo$ACC.NS.Adjusted/t.zoo$AMBUJACEM.NS.Adjusted
#Calculate the log prices of the two time series
t.zoo$LogA <- log10(t.zoo$ACC.NS.Adjusted)
t.zoo$LogB <- log10(t.zoo$AMBUJACEM.NS.Adjusted)
#Calculating the spread
t.zoo$spread <- t.zoo$ACC.NS.Adjusted/t.zoo$AMBUJACEM.NS.Adjusted
#Calculating the mean
# Computes the mean using the SMA function
# choose the number of days for calculating the mean
SMAdays = 20
t.zoo$mean <- SMA(t.zoo$spread,SMAdays)
#Calculating the Std Deviation
t.zoo$stdev <- rollapply(t.zoo$spread,20,sd, fill=NA, align='right')
#Calculating the Z Score
t.zoo$zScore <- (t.zoo$pairRatio - t.zoo$mean)/t.zoo$spread
View(t.zoo)
#Calculation of trading signals and trading prices
#Trigger sell or buy signal if Z Score moves above 2.25 or below -2.25.
# Close position if Z Score reaches 0.2 or -0.2.
# close any open position if there is a change in signal.
I think the main issue was to come up with trading signals for a strategy that depends not only on the current level of indicator but also on the direction from which the indicator is crossed.
There were a number of problems with the code posted in comments, including use of single = for comparisons . So I've worked it afresh
Here's my attempt at solving this. It seems to be fine. I've added some plotting code to eyeball the results. I suggest you check the result over different periods.
This code comes after the one in the original question . Only difference is that I have kept t.zoo as an xts/zoo object and not converted it to data.frame. Also, I've multiplied zScores with 100
It generates trigger dates and also a column depicting the state of strategy. Calculating returns would be easy from there
colnames(t.zoo)
#t.zoo must be an xts object
#working on a separate xts object
sigs<- t.zoo[, c("ACC.NS.Adjusted", "AMBUJACEM.NS.Adjusted" , "zScore")]
# creating my own triggers as there are not enough good values
# buyTrig<- mean(t.zoo$zScore ,na.rm = T) - 1*sd(t.zoo$zScore ,na.rm = T)
# sellTrig<- (-1) * buyTrig
# sqOffTrig<- mean(t.zoo$zScore ,na.rm = T) - 0.5*sd(t.zoo$zScore ,na.rm = T)
# Another approach: scaling tz.zoo to fit your criterion
sigs$zScore<- sigs$zScore*100
buyTrig<- (-2.25)
sellTrig<- (-1) * buyTrig
sqOffTrig<- 0.25
cat ( buyTrig, sellTrig , sqOffTrig)
hist(sigs$zScore, breaks = 40)
abline(v=c(buyTrig,sellTrig), col="red")
abline(v=c(-sqOffTrig, sqOffTrig), col="green")
sum(sigs$zScore >= -sqOffTrig & sigs$zScore<= sqOffTrig , na.rm = T) # 139
sigs$action<- 0
sigs$mode <- NA
sigs$zLag<- lag.xts(sigs$zScore,1)
sigs[19:22,]
#these are not the real trigger dates, but they will serve our purpose
# along with na.locf
buyTrigDays<- time(sigs[sigs$zScore<= buyTrig & sigs$zLag > buyTrig, ])
sellTrigDays<- time(sigs[sigs$zScore>= sellTrig & sigs$zLag < sellTrig, ])
#square offs
buySqOffDays<- time( sigs[sigs$zScore>= (-1*sqOffTrig) & sigs$zLag < (-1*sqOffTrig), ] )
buySqOffDays
sellSqOffDays<- time( sigs[sigs$zScore<= (sqOffTrig) & sigs$zLag > (sqOffTrig), ] )
sellSqOffDays
sigs$mode[buyTrigDays]=1 ; sigs$mode[sellTrigDays]= -1;
sigs$mode[buySqOffDays]=0 ; sigs$mode[sellSqOffDays]= 0;
sigs$mode
# use local fill to repeat these triggered position into future
# till you meet another non NA value
sigs$mode<- na.locf(sigs$mode, fromLast = F)
plot((sigs$zScore["2015"] ))
points(sigs$zScore[sigs$mode==1], col="red", on=1, pch = 19)
points(sigs$zScore[sigs$mode==-1], col="green", on=1 , pch = 19)
points(sigs$zScore[sigs$mode==0], col="blue", on=1)
sum(is.na(sigs$mode))
#now to get the real dates when square off is triggered
trigdays<- time( sigs[diff(sigs$mode,1) != 0, ] ) #when the value changes
squareOffTrigger_real<- time(sigs[sigs$mode==0][trigdays])
buyTrigger_real<- time(sigs[sigs$mode==1] [trigdays])
sellTrigger_real<- time(sigs[sigs$mode==-1][trigdays])
#check
length(sellTrigger_real) + length(buyTrigger_real) == length(squareOffTrigger_real)
plot(sigs$zScore["2015"])
points(sigs$zScore[buyTrigger_real] , col="blue", pch = 19, on=1)
points(sigs$zScore[sellTrigger_real] , col="red", pch = 19, on=1)
points(sigs$zScore[squareOffTrigger_real] , col="green", pch = 19, on=1)
abline(h=c(-sqOffTrig, sqOffTrig) , col= "green" )
# further calculations can be easily made using either the mode
# column or the trigger dates computed at the end

Resources