Related
I have a large dataframe (t2m.all) with longitude (lon), latitude (lat) and hourly temperatures since 1958 (X1958.01.01.00.00.00). A subset of the data is given in the code below:
dput(t2m.all[1:300,1:3])
structure(list(Lon = c(-102, -101.9, -101.8, -101.7, -101.6,
-101.5, -101.4, -101.3, -101.2, -101.1, -101, -100.9, -100.8,
-100.7, -100.6, -100.5, -100.4, -100.3, -100.2, -100.1, -100,
-99.9, -99.8, -99.7, -99.6, -99.5, -99.4, -99.3, -99.2, -99.1,
-99, -98.9, -98.8, -98.7, -98.6, -98.5, -98.4, -98.3, -98.2,
-98.1, -98, -97.9, -97.8, -97.7, -97.6, -97.5, -97.4, -97.3,
-97.2, -97.1, -97, -96.9, -96.8, -96.7, -96.6, -96.5, -96.4,
-96.3, -96.2, -96.1, -96, -95.9, -95.8, -95.7, -95.6, -95.5,
-95.4, -95.3, -95.2, -95.1, -95, -102, -101.9, -101.8, -101.7,
-101.6, -101.5, -101.4, -101.3, -101.2, -101.1, -101, -100.9,
-100.8, -100.7, -100.6, -100.5, -100.4, -100.3, -100.2, -100.1,
-100, -99.9, -99.8, -99.7, -99.6, -99.5, -99.4, -99.3, -99.2,
-99.1, -99, -98.9, -98.8, -98.7, -98.6, -98.5, -98.4, -98.3,
-98.2, -98.1, -98, -97.9, -97.8, -97.7, -97.6, -97.5, -97.4,
-97.3, -97.2, -97.1, -97, -96.9, -96.8, -96.7, -96.6, -96.5,
-96.4, -96.3, -96.2, -96.1, -96, -95.9, -95.8, -95.7, -95.6,
-95.5, -95.4, -95.3, -95.2, -95.1, -95, -102, -101.9, -101.8,
-101.7, -101.6, -101.5, -101.4, -101.3, -101.2, -101.1, -101,
-100.9, -100.8, -100.7, -100.6, -100.5, -100.4, -100.3, -100.2,
-100.1, -100, -99.9, -99.8, -99.7, -99.6, -99.5, -99.4, -99.3,
-99.2, -99.1, -99, -98.9, -98.8, -98.7, -98.6, -98.5, -98.4,
-98.3, -98.2, -98.1, -98, -97.9, -97.8, -97.7, -97.6, -97.5,
-97.4, -97.3, -97.2, -97.1, -97, -96.9, -96.8, -96.7, -96.6,
-96.5, -96.4, -96.3, -96.2, -96.1, -96, -95.9, -95.8, -95.7,
-95.6, -95.5, -95.4, -95.3, -95.2, -95.1, -95, -94.9, -102, -101.9,
-101.8, -101.7, -101.6, -101.5, -101.4, -101.3, -101.2, -101.1,
-101, -100.9, -100.8, -100.7, -100.6, -100.5, -100.4, -100.3,
-100.2, -100.1, -100, -99.9, -99.8, -99.7, -99.6, -99.5, -99.4,
-99.3, -99.2, -99.1, -99, -98.9, -98.8, -98.7, -98.6, -98.5,
-98.4, -98.3, -98.2, -98.1, -98, -97.9, -97.8, -97.7, -97.6,
-97.5, -97.4, -97.3, -97.2, -97.1, -97, -96.9, -96.8, -96.7,
-96.6, -96.5, -96.4, -96.3, -96.2, -96.1, -96, -95.9, -95.8,
-95.7, -95.6, -95.5, -95.4, -95.3, -95.2, -95.1, -95, -94.9,
-102, -101.9, -101.8, -101.7, -101.6, -101.5, -101.4, -101.3,
-101.2, -101.1, -101, -100.9, -100.8, -100.7), Lat = c(60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9,
59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9,
59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9,
59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9,
59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9,
59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9,
59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.9, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8, 59.8,
59.8, 59.8, 59.8, 59.8, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7,
59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7,
59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7,
59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7,
59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7,
59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7,
59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.7, 59.6,
59.6, 59.6, 59.6, 59.6, 59.6, 59.6, 59.6, 59.6, 59.6, 59.6, 59.6,
59.6, 59.6), X1958.01.01.00.00.00 = c(-37.2, -37.2, -36.6, -35.9,
-36, -36.1, -35.8, -35.5, -35.1, -34.7, -34.2, -33.6, -33.6,
-34, -33.8, -33.2, -32.8, -32.6, -32.5, -32.5, -32.4, -32.3,
-32.3, -32.7, -33.1, -33.2, -33.3, -33.3, -33.3, -33.1, -32.9,
-32.6, -32.4, -32.1, -31.8, -31.5, -31.2, -30.9, -30.6, -30.3,
-30, -29.7, -29.4, -29.1, -28.8, -28.5, -28.2, -27.9, -27.7,
-27.4, -27.1, -26.8, -26.6, -26.3, -26.1, -25.9, -25.6, -25.4,
-25.2, -24.9, -24.7, -24.4, -24.1, -23.8, -23.6, -23.3, -23,
-22.7, -22.4, -22.1, -21.9, -36.6, -37.1, -36.6, -36.1, -36,
-36.1, -35.7, -35, -34.6, -34.8, -34.7, -34, -33.8, -33.7, -33.6,
-33.2, -32.4, -32.3, -32.3, -32.3, -32.3, -32.8, -32.9, -32.7,
-33, -33.1, -33.1, -33.1, -33.1, -33, -32.8, -32.5, -32.3, -32.1,
-31.8, -31.5, -31.2, -30.9, -30.6, -30.3, -30.1, -29.8, -29.4,
-29.1, -28.8, -28.5, -28.3, -28, -27.7, -27.4, -27.1, -26.9,
-26.6, -26.4, -26.1, -25.9, -25.6, -25.4, -25.2, -24.9, -24.7,
-24.4, -24.1, -23.9, -23.6, -23.3, -23, -22.8, -22.5, -22.2,
-21.9, -36.7, -36.8, -36.8, -36.4, -36.2, -36, -35.9, -35.2,
-34.6, -34.6, -34.4, -34.2, -34.2, -33.8, -33.3, -32.7, -32.3,
-32.2, -32.3, -32.6, -33.3, -33.5, -33.2, -33.1, -32.5, -32.2,
-32.7, -33, -32.9, -32.8, -32.6, -32.5, -32.2, -32, -31.7, -31.5,
-31.2, -30.9, -30.6, -30.4, -30.1, -29.8, -29.5, -29.2, -28.9,
-28.6, -28.3, -28, -27.7, -27.4, -27.2, -26.9, -26.7, -26.4,
-26.2, -25.9, -25.7, -25.4, -25.2, -24.9, -24.7, -24.4, -24.2,
-23.9, -23.6, -23.4, -23.1, -22.8, -22.5, -22.3, -22, -21.9,
-36.6, -36.6, -36.5, -36.3, -36.2, -35.7, -35.1, -34.5, -34.1,
-34.5, -34.7, -34, -33.3, -33, -32.6, -32.6, -32.8, -33, -33.3,
-33.2, -32.9, -32.9, -33, -32.9, -33, -33.1, -33, -33, -32.9,
-32.8, -32.6, -32.4, -32.2, -32, -31.7, -31.4, -31.2, -31, -30.7,
-30.4, -30.1, -29.8, -29.4, -29.1, -28.8, -28.5, -28.3, -28,
-27.8, -27.5, -27.2, -26.9, -26.7, -26.4, -26.1, -25.9, -25.6,
-25.4, -25.2, -24.9, -24.7, -24.4, -24.2, -23.9, -23.6, -23.4,
-23.1, -22.9, -22.6, -22.3, -22, -21.9, -36.4, -36.3, -36, -35.7,
-35.3, -34.5, -34.3, -34.5, -34.5, -34.3, -33.9, -33.5, -33.2,
-32.9)), row.names = c(NA, 300L), class = "data.frame")
I wish to subset t2m.all to isolate a single row returning the temperature value for a specified latitude and longitude pairing. This works for some coordinates but strangely not for others. For example, the below code works for these coordinates:
res = subset(t2m.all, Lon == -100.7 & Lat == 59.8)
but does not work for these coordinates:
res = subset(t2m.all, Lon == -100.7 & Lat == 59.6)
Both set of coordinates are clearly in the t2m.all dataframe, so why can R subset for some sets of coordinates but not for others? The logic seems sound to me, so I'm not sure what can be wrong. I have spent a long time going through all possibilities and troubleshooting on Stackoverflow, but I have yet to find a solution. Any suggestions? Many thanks in advance.
The issue results from round-off errors. You could set a tolerance value when comparing Lon & Lat with a certain value. In base you could use abs(x-y) < 1e-5 to achieve it:
t2m.all |>
subset(abs(Lon - -100.7) < 1e-5 & abs(Lat - 59.6) < 1e-5)
# Lon Lat X1958.01.01.00.00.00
# 1 -100.7 59.6 -32.9
The dplyr equivalent is near():
library(dplyr)
t2m.all %>%
filter(near(Lon, -100.7) & near(Lat, 59.6))
# Lon Lat X1958.01.01.00.00.00
# 1 -100.7 59.6 -32.9
This might be a bit of a dumb question, but roaming around SO and other websites I can't find a straightforward answer: I've got data on the relationship between age and a continuous outcome:
library(dplyr)
library(tidyverse)
library(magrittr)
mydata <-
structure(list(ID = c(104, 157, 52, 152, 114, 221, 320, 125,
75, 171, 80, 76, 258, 82, 142, 203, 37, 92, 202, 58, 194, 38,
4, 137, 25, 87, 40, 117, 21, 255, 277, 315, 96, 134, 185, 94,
3, 153, 172, 65, 279, 209, 60, 13, 154, 160, 24, 29, 159, 213,
127, 74, 48, 126, 184, 132, 61, 141, 27, 49, 8, 39, 164, 162,
34, 205, 179, 119, 77, 135, 138, 165, 103, 253, 14, 20, 310,
84, 30, 273, 22, 105, 262, 116, 86, 83, 145, 31, 95, 51, 81,
271, 36, 50, 189, 2, 115, 7, 197, 54), age = c(67.1, 70.7, 53,
61.7, 66.1, 57.7, 54.1, 67.2, 60.9, 55.8, 40.7, 57.6, 64.1, 70.7,
47.5, 46.3, 66.7, 55, 63.3, 68.2, 61.2, 60.5, 52, 65.3, 48.9,
56.9, 62.7, 75.2, 61.4, 57.9, 53.6, 58.1, 51, 67.3, 63.9, 57,
43.2, 64.7, 62.8, 56.3, 51.7, 39.4, 45.2, 57.8, 55.7, 69.6, 61.5,
50.1, 73.7, 55.5, 65.2, 54.6, 49, 35.2, 52.9, 46.3, 55, 52.5,
54.2, 61, 57.4, 56.5, 53.6, 47.7, 64.2, 53.4, 60.9, 58.2, 60.7,
50.3, 48.3, 74.7, 52.1, 59.9, 52.4, 70.8, 61.2, 66.5, 55.4, 57.5,
59.2, 60.1, 52.3, 60.2, 54.8, 36.3, 61.5, 48.6, 56, 62, 64.8,
40.4, 68.3, 60, 69.1, 56.6, 45.3, 58.5, 52.3, 52), continuous_outcome = c(3636.6,
1128.2, 2007.5, 802.9, 332.3, 2636.1, 169.5, 67.9, 3261.8, 1920.3,
155.2, 1677.2, 198.2, 11189.7, 560.9, 633.1, 196.1, 13.9, 100.7,
7594.5, 1039.8, 83.9, 2646.8, 284.6, 306, 1135.6, 1883.1, 5681.4,
1706.2, 2241.1, 97.7, 1106.8, 1107.1, 290.8, 2123.4, 267, 115.3,
138.5, 152.7, 1338.9, 6709.8, 561.7, 1931.7, 3112.4, 1876.3,
3795.9, 5706.7, 7.4, 1324.9, 4095.4, 205.4, 1886, 177.3, 304.4,
1319.1, 415.9, 537.2, 3141.1, 740, 1976.7, 624.8, 983.1, 1163.5,
1432.6, 3730.4, 2023.4, 498.2, 652.5, 982.7, 1345.3, 138.4, 1505.1,
3528.1, 11.9, 884.5, 10661.6, 1911.4, 2800.8, 81.5, 396.4, 409.1,
417.3, 186, 1892.4, 1689.7, 0, 210.1, 210.5, 3484.5, 3196.8,
57.2, 20.2, 947, 540, 1603.1, 1571.8, 9.1, 149.2, 122, 63.2)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
As you can see in the tibble, age is a continuous variable measured to precision of 1 decimal place:
head(mydata)
# A tibble: 6 x 3
ID age continuous_outcome
<dbl> <dbl> <dbl>
1 104 67.1 3637.
2 157 70.7 1128.
3 52 53 2008.
4 152 61.7 803.
5 114 66.1 332.
6 221 57.7 2636.
When I fit a simple linear regression (for now assuming all assumptions are not-violated) I get the following beta-coefficient:
fit <-
lm(formula=continuous_outcome ~ age,
data=mydata)
fit
Call:
lm(formula = continuous_outcome ~ age, data = mydata)
Coefficients:
(Intercept) age
-3400.12 86.06
The beta-coefficient for age is 86.06. Does this mean that, as age is measured to 1 decimal place, that for every 0.1 years increase my outcome increases by 86.06? If so, how do I rescale age so that I am measuring the effect of age per, for example, 5 years or 10 years?
Thanks in advance!
The beta coefficient shows the amount that the dependent variable (DV, in this case continuous_outcome) will increase for every one unit increase in your independent variable (IV, in this case age in years).
If you want to show the relationship per 1/10th of a year, multiply your age column before fitting the model, or divide the beta coefficient by 10.
For your specific requests, since the beta coefficient is 86.06, you can multiply this by the number of years to get the increase of the continuous variable. So:
1 year increase = +86.06
5 year increase = +430.3 (86.06 * 5)
10 year increase = +860.6 (86.06 * 10)
To answer the last question (The estimate for the effect of age per 5 years), that would be 430.3, which is 86.06 * 5. So for every 5 years that a persons age increases, the continuous_outcome increases by 430.3 on average.
I'm reviewing factors related to cancer and hoping to find how they're related to one another. After I input my .xslx file into R, I'm at a loss how to use that list of data to find the z-scores for each measurement and then using that matrix Z of z-scores of X to compute the covariance matrix of A using cov(Z). How do I plot and analyze entries of A (which tell me how correlated the measurements in X are)?
This is an example of the plot I'm trying to achieve with my factors (UrbanPop, Rape, Assault, and Murder are factors of that plot -- you can ignore the states plotted).
Thank you!
The factors I'm reviewing are Age, BMI, Glucose, Insulin, HOMA, Leptin, Adiponectin, Resistin, MCP.1
This is my data -- I used dput(mydata)
structure(list(Age = c(48, 83, 82, 68, 86, 49, 89, 76, 73, 75,
34, 29, 25, 24, 38, 44, 47, 61, 64, 32, 36, 34, 29, 35, 54, 45,
50, 66, 35, 36, 66, 53, 28, 43, 51, 67, 66, 69, 60, 77, 76, 76,
75, 69, 71, 66, 75, 78, 69, 85, 76, 77, 45, 45, 49, 34, 42, 68,
51, 62, 38, 69, 49, 51, 59, 45, 54, 64, 46, 44, 45, 44, 51, 72,
46, 43, 55, 43, 86, 41, 59, 81, 48, 71, 42, 65, 48, 85, 48, 58,
40, 82, 52, 49, 60, 49, 44, 40, 71, 69, 74, 66, 65, 72, 57, 73,
45, 46, 68, 75, 54, 45, 62, 65, 72, 86), BMI = c(23.5, 20.69049,
23.12467, 21.36752, 21.11111, 22.85446, 22.7, 23.8, 22, 23, 21.47,
23.01, 22.86, 18.67, 23.34, 20.76, 22.03, 32.03896, 34.52972,
36.51264, 28.57668, 31.97501, 32.27079, 30.27682, 30.48316, 37.03561,
38.57876, 31.44654, 35.25076, 34.17489, 36.21228, 36.79017, 35.85581,
34.42217, 27.68878, 29.60677, 31.23859, 35.0927, 26.34929, 35.58793,
29.21841, 27.2, 27.3, 32.5, 30.3, 27.7, 25.7, 25.3, 29.4, 26.6,
27.1, 25.9, 21.30395, 20.83, 20.95661, 24.24242, 21.35991, 21.08281,
19.13265, 22.65625, 22.49964, 21.51386, 21.36752, 22.89282, 22.83288,
23.1405, 24.21875, 22.22222, 20.83, 19.56, 20.26, 24.74, 18.37,
23.62, 22.21, 26.5625, 31.97501, 31.25, 26.66667, 26.67276, 28.67263,
31.64037, 32.46191, 25.5102, 29.29688, 29.66655, 28.125, 27.68878,
31.25, 29.15452, 30.83653, 31.21748, 30.80125, 32.46191, 31.23141,
29.77778, 27.88762, 27.63605, 27.91552, 28.44444, 28.65014, 26.5625,
30.91558, 29.13632, 34.83815, 37.10938, 29.38476, 33.18, 35.56,
30.48, 36.05, 26.85, 26.84, 32.05, 25.59, 27.18), Glucose = c(70,
92, 91, 77, 92, 92, 77, 118, 97, 83, 78, 82, 82, 88, 75, 86,
84, 85, 95, 87, 86, 87, 84, 84, 90, 83, 106, 90, 90, 80, 101,
101, 87, 89, 77, 79, 82, 101, 103, 76, 83, 94, 85, 93, 102, 90,
94, 60, 89, 96, 110, 85, 102, 74, 94, 92, 93, 102, 93, 92, 95,
112, 78, 103, 98, 116, 86, 98, 88, 114, 92, 106, 105, 105, 86,
101, 92, 103, 201, 97, 77, 100, 99, 112, 98, 85, 90, 196, 199,
139, 128, 100, 87, 134, 131, 70, 99, 103, 104, 108, 88, 89, 97,
83, 95, 134, 90, 92, 131, 152, 119, 92, 100, 97, 82, 138), Insulin = c(2.707,
3.115, 4.498, 3.226, 3.549, 3.226, 4.69, 6.47, 3.35, 4.952, 3.469,
5.663, 4.09, 6.107, 5.782, 7.553, 2.869, 18.077, 4.427, 14.026,
4.345, 4.53, 5.81, 4.376, 5.537, 6.76, 6.703, 9.245, 6.817, 6.59,
15.533, 10.175, 8.576, 23.194, 3.855, 5.819, 4.181, 5.646, 5.138,
3.881, 5.376, 14.07, 5.197, 5.43, 8.34, 6.042, 8.079, 3.508,
10.704, 4.462, 26.211, 4.58, 13.852, 4.56, 12.305, 21.699, 2.999,
6.2, 4.364, 3.482, 5.261, 6.683, 2.64, 2.74, 6.862, 4.902, 3.73,
5.7, 3.42, 15.89, 3.44, 58.46, 6.03, 4.42, 36.94, 10.555, 16.635,
4.328, 41.611, 22.033, 3.188, 9.669, 28.677, 10.395, 4.172, 14.649,
2.54, 51.814, 12.162, 16.582, 41.894, 18.077, 30.212, 24.887,
30.13, 8.396, 9.208, 2.432, 18.2, 8.808, 3.012, 6.524, 10.491,
10.949, 12.548, 5.636, 4.713, 5.75, 8.15, 7.01, 11.91, 3.33,
4.53, 5.73, 2.82, 19.91), HOMA = c(0.467409, 0.706897, 1.009651,
0.612725, 0.805386, 0.732087, 0.890787, 1.883201, 0.801543, 1.013839,
0.667436, 1.145436, 0.827271, 1.33, 1.06967, 1.6, 0.59, 3.790144,
1.037394, 3.00998, 0.921719, 0.972138, 1.203832, 0.906707, 1.229214,
1.383997, 1.752611, 2.05239, 1.513374, 1.300427, 3.869788, 2.534932,
1.84041, 5.091856, 0.732193, 1.133929, 0.845677, 1.406607, 1.305395,
0.727558, 1.100646, 3.262364, 1.089638, 1.245642, 2.098344, 1.341324,
1.873251, 0.519184, 2.349885, 1.056602, 7.111918, 0.960273, 3.485163,
0.832352, 2.853119, 4.924226, 0.687971, 1.55992, 1.001102, 0.790182,
1.232828, 1.84629, 0.507936, 0.696143, 1.658774, 1.402626, 0.791257,
1.37788, 0.742368, 4.468268, 0.780651, 15.28534, 1.56177, 1.14478,
7.836205, 2.629602, 3.775036, 1.099601, 20.63073, 5.271762, 0.605507,
2.38502, 7.002923, 2.871792, 1.008511, 3.071407, 0.56388, 25.05034,
5.96992, 5.685415, 13.22733, 4.458993, 6.483495, 8.225983, 9.736007,
1.449709, 2.248594, 0.61789, 4.668907, 2.346451, 0.653805, 1.432235,
2.510147, 2.241625, 2.940415, 1.862886, 1.046286, 1.304867, 2.633537,
2.628283, 3.495982, 0.755688, 1.1174, 1.370998, 0.570392, 6.777364
), Leptin = c(8.8071, 8.8438, 17.9393, 9.8827, 6.6994, 6.8317,
6.964, 4.311, 4.47, 17.127, 14.57, 35.59, 20.45, 8.88, 15.26,
14.09, 26.65, 30.7729, 21.2117, 49.3727, 15.1248, 28.7502, 45.6196,
39.2134, 12.331, 39.9802, 46.6401, 45.9624, 50.6094, 10.2809,
74.7069, 27.1841, 68.5102, 31.2128, 20.092, 21.9033, 16.2247,
83.4821, 24.2998, 21.7863, 28.562, 35.891, 10.39, 15.145, 56.502,
24.846, 65.926, 6.633, 45.272, 7.85, 21.778, 13.74, 7.6476, 7.7529,
11.2406, 16.7353, 19.0826, 9.6994, 11.0816, 9.8648, 8.438, 32.58,
6.3339, 8.0163, 14.9037, 17.9973, 8.6874, 12.1905, 12.87, 13.08,
7.65, 18.16, 9.62, 21.78, 10.16, 9.8, 37.2234, 25.7816, 47.647,
44.7059, 17.022, 38.8066, 46.076, 19.0653, 12.2617, 26.5166,
15.5325, 70.8824, 18.1314, 22.8884, 31.0385, 31.6453, 29.2739,
42.3914, 37.843, 51.3387, 12.6757, 14.3224, 53.4997, 14.7485,
31.1233, 14.9084, 44.0217, 26.8081, 33.1612, 41.4064, 23.8479,
18.69, 17.87, 50.53, 89.27, 54.68, 12.45, 61.48, 24.96, 90.28
), Adiponectin = c(9.7024, 5.429285, 22.43204, 7.16956, 4.81924,
13.67975, 5.589865, 13.25132, 10.35873, 11.57899, 13.11, 26.72,
23.67, 36.06, 17.95, 20.32, 38.04, 7.780255, 5.46262, 5.1, 8.6,
7.64276, 6.209635, 9.048185, 9.73138, 4.617125, 4.667645, 10.35526,
6.966895, 5.065915, 7.53955, 20.03, 4.7942, 8.300955, 3.19209,
2.19428, 4.267105, 6.796985, 2.19428, 8.12555, 7.36996, 9.34663,
9.000805, 11.78796, 8.13, 7.652055, 3.74122, 10.5673, 8.2863,
7.9317, 4.935635, 9.75326, 21.05663, 8.237405, 8.412175, 21.82375,
8.462915, 8.574655, 5.80762, 11.23624, 4.77192, 4.138025, 3.886145,
9.349775, 4.230105, 4.294705, 3.70523, 4.783985, 18.55, 20.37,
16.67, 16.1, 12.76, 17.86, 9.76, 6.420295, 11.01846, 12.71896,
5.357135, 13.49487, 16.44048, 10.63653, 21.57, 5.4861, 6.695585,
7.28287, 10.22231, 7.901685, 4.104105, 10.26266, 6.160995, 9.92365,
6.26854, 10.79394, 8.40443, 10.73174, 5.47817, 6.78387, 1.65602,
5.288025, 7.65222, 8.42996, 3.71009, 2.78491, 2.36495, 3.335665,
6.644245, 9.16, 11.9, 10.06, 8.01, 12.1, 21.42, 22.54, 33.75,
14.11), Resistin = c(7.99585, 4.06405, 9.27715, 12.766, 10.57635,
10.3176, 12.9361, 5.1042, 6.28445, 7.0913, 6.92, 4.58, 5.14,
6.85, 9.35, 7.64, 3.32, 13.68392, 6.70188, 17.10223, 9.1539,
5.62592, 24.6033, 16.43706, 10.19299, 8.70448, 11.78388, 23.3819,
22.03703, 15.72187, 22.32024, 10.26309, 21.44366, 6.71026, 10.37518,
4.2075, 3.29175, 82.1, 20.2535, 17.2615, 8.04375, 8.4156, 7.5767,
11.78796, 4.2989, 6.7052, 4.49685, 4.6638, 4.53, 9.6135, 8.49395,
11.774, 23.03408, 28.0323, 23.1177, 12.06534, 17.37615, 13.74244,
5.57055, 10.69548, 15.73606, 15.69876, 22.94254, 11.55492, 8.2049,
5.2633, 10.34455, 13.91245, 13.56, 4.62, 7.84, 5.31, 3.21, 4.82,
5.68, 16.1, 7.16514, 38.6531, 24.3701, 27.8325, 31.6904, 29.5583,
10.15726, 42.7447, 53.6717, 19.46324, 16.11032, 55.2153, 53.6308,
13.97399, 17.55503, 19.94687, 24.24591, 5.768, 11.50005, 20.76801,
23.03306, 26.0136, 49.24184, 16.48508, 18.35574, 14.91922, 20.4685,
14.76966, 9.9542, 6.89235, 15.55625, 8.89, 4.19, 11.73, 5.06,
10.96, 7.32, 10.33, 3.27, 4.35), MCP.1 = c(417.114, 468.786,
554.697, 928.22, 773.92, 530.41, 1256.083, 280.694, 136.855,
318.302, 354.6, 174.8, 313.73, 632.22, 165.02, 63.61, 191.72,
444.395, 252.449, 588.46, 534.224, 572.783, 904.981, 733.797,
1227.91, 586.173, 887.16, 1102.11, 667.928, 581.313, 864.968,
695.754, 358.624, 960.246, 473.859, 585.307, 634.602, 263.499,
378.996, 618.272, 698.789, 377.227, 335.393, 270.142, 200.976,
225.88, 206.802, 209.749, 215.769, 232.006, 45.843, 488.829,
552.444, 382.955, 573.63, 481.949, 321.919, 448.799, 90.6, 703.973,
199.055, 713.239, 737.672, 359.232, 355.31, 518.586, 635.049,
395.976, 301.21, 220.66, 193.87, 244.75, 513.66, 195.94, 312,
806.724, 483.377, 775.322, 1698.44, 783.796, 910.489, 426.175,
738.034, 799.898, 1041.843, 1698.44, 1698.44, 1078.359, 1698.44,
923.886, 638.261, 994.316, 764.667, 656.393, 396.021, 602.486,
407.206, 293.123, 256.001, 353.568, 572.401, 269.487, 396.648,
232.018, 655.834, 788.902, 621.273, 209.19, 198.4, 99.45, 218.28,
268.23, 330.16, 314.05, 392.46, 90.09)), class = "data.frame", row.names = c(NA,
-116L))
To get the z-scores you can use scale() and to get the covariance matrix you can use cov(). But, there is no reason to produce the covariance matrix from the z-scores instead of the raw data. You can also visualize a correlation matrix using the corrplot function in the corrplot package. The corrplot function will also take the raw data as an input. Not the covariance matrix.
I am trying to perform 6 months forecasting over production data for three power plants, I built my data as an hts object that has 3 levels. However, when I am performing the forecast function and then try to see the accuracy using test data I get the following error: "Error in x - fcasts: non-conformable arrays"
Furthermore, when I try to apply the "arima" as a forecasting method on the hts object I get the following (the warning message is repeated 9 times, as I have 9 time series in the hts object):
forecasts <- forecast(data,h = 6 , method = "bu" , fmethod = "arima")
I used the following instructions to get the hts object:
and the data has the following structure:
I am not sure where I am going wrong. Anyone can help with some thoughts??
Thank you!
The data:
structure(list(LarGroup1 = c(188.3, 187.2, 94.7, 109.2, 202.7,
146.6, 121.9, 151.3, 111.1, 103.4, 188.1, 168.1, 233.9, 230.7,
187.1, 0, 98.9, 173.5, 149.4, 168.6, 4.7, 14.8, 91.8, 166.5,
170.5, 123.6, 85.2, 64.4), LarGroup2 = c(159.1, 127.7, 210.3,
199.8, 113, 143.4, 144.5, 83.8, 41.6, 35.1, 95.2, 178.2, 241.1,
236.4, 181.9, 194.3, 196.1, 92.4, 154.6, 78.9, 35.7, 0, 74.5,
75.1, 140, 142.5, 3.8, 17.5), RibGroup1 = c(49.4, 102.4, 50.8,
118.8, 108.4, 139.5, 121.7, 69.6, 53.4, 28, 113.3, 96.3, 70.8,
124.4, 54.4, 128.7, 63.3, 2.1, 41.3, 0.4, 0.6, 0, 5.4, 57.9,
9.9, 30, 221, 167.2), RibGroup2 = c(32.7, 32, 98.1, 6.3, 85.5,
96.6, 41.1, 44.9, 50.4, 27.3, 0, 45.4, 199.1, 179.2, 86.1, 0,
58.4, 43.3, 41.8, 42.1, 22.1, 11.8, 71.8, 112, 204.1, 40.9, 24.5,
210.9), RibGroup3 = c(90.8, 15.4, 10.5, 124.4, 33.9, 8.4, 38.3,
56.9, 13.5, 0, 32.6, 132.8, 160.7, 168.7, 60.7, 131.9, 110.8,
29.2, 131.3, 62.1, 6.1, 0, 0, 3.4, 23.9, 192.7, 165.5, 0), SinGroup1 = c(235.2,
225.4, 226.1, 234.4, 222.1, 232.3, 233.4, 201.9, 195.3, 209.4,
233.6, 223.6, 222.2, 232, 224, 149.8, 201.6, 220.2, 203.1, 212.1,
71.9, 82.3, 183.2, 210.6, 198.6, 230.8, 218, 163.2), SinGroup2 = c(233.4,
225.6, 227, 51.6, 76, 230.7, 233.1, 202.7, 200.2, 207.2, 228.4,
226.2, 183.9, 230.4, 222.3, 227.7, 177.9, 152, 218.6, 210.6,
80.9, 63.2, 188.1, 209.5, 233.2, 210.1, 226.5, 200.5), SinGroup3 = c(233.2,
188.5, 226.9, 234.7, 222.8, 234.6, 220.6, 156.4, 209.2, 218.7,
232.9, 226.1, 215.4, 231, 222.7, 222.7, 183.7, 203.8, 216.8,
112, 0, 39.6, 180.8, 203.6, 221.1, 228.9, 202.8, 186.7), SinGroup4 = c(218,
215.5, 226.8, 235.6, 223.6, 234.8, 234.9, 69.3, 192, 207.8, 235.2,
217.2, 235.1, 231.8, 223.5, 230.5, 225.6, 220.1, 220, 211.9,
114.8, 44.5, 158.5, 206.3, 231.8, 179, 225.3, 198.6)), class = "data.frame", row.names = c(NA,
-28L))
In the accuracy function, you need to include test data, not training data. You ask for 6 steps ahead, but your test data only consists of 4 time periods.
The seasonal differencing error suggests you are using an old version of the forecast package. Please update your packages.
The following code works using current CRAN packages (forecast v8.4, hts v
library(hts)
Production_data <- data.frame(
LarGroup1 = c(
188.3, 187.2, 94.7, 109.2, 202.7,
146.6, 121.9, 151.3, 111.1, 103.4, 188.1, 168.1, 233.9, 230.7,
187.1, 0, 98.9, 173.5, 149.4, 168.6, 4.7, 14.8, 91.8, 166.5,
170.5, 123.6, 85.2, 64.4
), LarGroup2 = c(
159.1, 127.7, 210.3,
199.8, 113, 143.4, 144.5, 83.8, 41.6, 35.1, 95.2, 178.2, 241.1,
236.4, 181.9, 194.3, 196.1, 92.4, 154.6, 78.9, 35.7, 0, 74.5,
75.1, 140, 142.5, 3.8, 17.5
), RibGroup1 = c(
49.4, 102.4, 50.8,
118.8, 108.4, 139.5, 121.7, 69.6, 53.4, 28, 113.3, 96.3, 70.8,
124.4, 54.4, 128.7, 63.3, 2.1, 41.3, 0.4, 0.6, 0, 5.4, 57.9,
9.9, 30, 221, 167.2
), RibGroup2 = c(
32.7, 32, 98.1, 6.3, 85.5,
96.6, 41.1, 44.9, 50.4, 27.3, 0, 45.4, 199.1, 179.2, 86.1, 0,
58.4, 43.3, 41.8, 42.1, 22.1, 11.8, 71.8, 112, 204.1, 40.9, 24.5,
210.9
), RibGroup3 = c(
90.8, 15.4, 10.5, 124.4, 33.9, 8.4, 38.3,
56.9, 13.5, 0, 32.6, 132.8, 160.7, 168.7, 60.7, 131.9, 110.8,
29.2, 131.3, 62.1, 6.1, 0, 0, 3.4, 23.9, 192.7, 165.5, 0
), SinGroup1 = c(
235.2,
225.4, 226.1, 234.4, 222.1, 232.3, 233.4, 201.9, 195.3, 209.4,
233.6, 223.6, 222.2, 232, 224, 149.8, 201.6, 220.2, 203.1, 212.1,
71.9, 82.3, 183.2, 210.6, 198.6, 230.8, 218, 163.2
), SinGroup2 = c(
233.4,
225.6, 227, 51.6, 76, 230.7, 233.1, 202.7, 200.2, 207.2, 228.4,
226.2, 183.9, 230.4, 222.3, 227.7, 177.9, 152, 218.6, 210.6,
80.9, 63.2, 188.1, 209.5, 233.2, 210.1, 226.5, 200.5
), SinGroup3 = c(
233.2,
188.5, 226.9, 234.7, 222.8, 234.6, 220.6, 156.4, 209.2, 218.7,
232.9, 226.1, 215.4, 231, 222.7, 222.7, 183.7, 203.8, 216.8,
112, 0, 39.6, 180.8, 203.6, 221.1, 228.9, 202.8, 186.7
), SinGroup4 = c(
218,
215.5, 226.8, 235.6, 223.6, 234.8, 234.9, 69.3, 192, 207.8, 235.2,
217.2, 235.1, 231.8, 223.5, 230.5, 225.6, 220.1, 220, 211.9,
114.8, 44.5, 158.5, 206.3, 231.8, 179, 225.3, 198.6
)
)
Production_data_ts <- ts(Production_data, frequency = 12, start = c(2016, 7))
Production_data_hts <- hts(Production_data_ts, characters = c(3, 6))
data <- window(Production_data_hts, start = c(2016, 7), end = c(2018, 6))
test <- window(Production_data_hts, start = c(2018, 7), end = c(2018, 10))
forecasts <- forecast(data, h = 4, method = "bu")
accuracy(forecasts, test)
I have some stock data together with some returns that are presented below.
Now I would like to coerce both the daily price changes (open, high, low, close, volume, adj. close) and the returns given to weekly or monthly values.
I know that the weekly prices can be obtained by xts::to.weekly(), but this drops the return. I don't know the exact mechanism behind the to.weekly function, but the returns need to be summarized with the sum function (I'm thinking of using xts::apply.weekly()), but then this would not be consistent with the stock price data....
How can this transition to weekly or monthly timescale be achieved efficiently?
Data <-
structure(c(64.5, 67, 72, 76, 75.75, 72, 75.5, 76, 76, 78, 78,
77.5, 79.25, 80, 76.25, 84, 89.75, 90.75, 92.25, 95.75, 94.5,
95, 92, 95.75, 100, 98, 104.25, 101.25, 100.25, 96.5, 94.75,
89, 94, 91.25, 99.25, 100.25, 100.25, 98, 96.5, 94.75, 97.5,
96.25, 99.25, 97, 98, 98.75, 97.25, 98.75, 100.25, 100.25, 103.25,
105.75, 108.5, 108.25, 103.75, 101.5, 99.75, 100, 99, 94.5, 99,
101.5, 105, 64.75, 73.25, 76.5, 76.75, 76, 75.75, 76, 76, 76,
80.75, 79, 79.5, 83, 80.5, 76.75, 91.5, 92.75, 94.75, 100.25,
96, 97.5, 96.75, 92, 100, 101.75, 104, 105, 103.25, 100.75, 99,
95.5, 92.75, 94, 97.75, 103.75, 101, 100.25, 99.5, 97.75, 96.75,
98, 99, 100, 100.75, 99.25, 98.75, 98, 102.25, 101, 103, 105,
109, 110.5, 108.25, 105, 102.25, 100.75, 100, 99.75, 98.75, 102.5,
103, 107.25, 60.75, 66.75, 71.25, 74.25, 72.25, 71.5, 74.25,
76, 76, 77, 75.75, 76, 79, 75.75, 73.25, 82, 88.75, 89.5, 91.75,
92.25, 92.75, 92, 92, 94, 97.75, 96.5, 98.5, 99.25, 93.5, 94,
88.25, 87.5, 91, 91, 98.75, 98.5, 100.25, 95.75, 95.5, 90.75,
96.75, 96, 95.25, 97, 95.75, 95.75, 96.75, 98.5, 97.25, 100.25,
102.5, 105.5, 107.5, 103.5, 102.75, 97.5, 98.25, 100, 94, 94.25,
98, 100, 103.75, 64.25, 72.75, 75.75, 75, 73.75, 75.5, 76, 76,
76, 79, 76, 79.25, 81.25, 76.75, 75.75, 88, 90.25, 93.75, 97.5,
95, 95, 92, 92, 100, 97.75, 102.75, 99.75, 100.25, 97.25, 94.5,
89, 91.75, 92.5, 97, 99.5, 100.25, 100.25, 96.75, 96, 96, 97.5,
98, 95.5, 100.75, 98, 96.75, 98, 100.25, 99.5, 102.25, 103.5,
108, 109.25, 104, 102.75, 99, 100, 100, 94.5, 97.5, 102, 103,
104.5, 6808900, 8180500, 5628500, 3238900, 3765800, 3177100,
887600, 0, 0, 3923200, 2425700, 3331200, 4058600, 3682800, 3293500,
10525000, 5664200, 3982600, 4702300, 6479800, 2565300, 2480500,
0, 3653000, 3400, 4010500, 5145800, 2782200, 3925100, 2770700,
4618500, 2712300, 1675400, 3331500, 5343000, 1169700, 0, 2095400,
1016600, 3642600, 3729400, 3575300, 3396900, 1963400, 3547300,
1865100, 2496300, 3226800, 2333200, 4285900, 3933000, 7400700,
3325200, 1848400, 21000, 4273700, 1910400, 0, 3168000, 2356000,
2184800, 1950300, 2649900, 51.5865, 58.4112, 60.8199, 60.2178,
59.2141, 60.6192, 61.0207, 61.0207, 61.0207, 63.4294, 61.0207,
63.6301, 65.2359, 61.6228, 60.8199, 70.6555, 72.462, 75.2722,
78.2831, 76.2758, 76.2758, 73.8671, 73.8671, 80.2903, 78.4838,
82.4983, 80.0896, 80.4911, 78.0824, 75.8744, 71.4584, 73.6664,
74.2686, 77.8816, 79.8889, 80.4911, 80.4911, 77.6809, 77.0787,
77.0787, 78.2831, 78.6845, 76.6773, 80.8925, 78.6845, 77.6809,
78.6845, 80.4911, 79.8889, 82.0969, 83.1005, 86.7136, 87.7172,
83.5019, 82.4983, 79.4874, 80.2903, 80.2903, 75.8744, 78.2831,
81.8961, 82.699, 83.9034, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.086956169, -0.022499605, 0.051150683,
-0.029196965, 0.005013135, -0.029925048, -0.028277819, -0.058201449,
0.030899097, 0.00817469, 0.048647746, 0.025773739, 0.007537968,
0, -0.034913177, -0.007752227, 0, 0.015625588, 0.005127543, -0.025509471,
0.054973245, -0.027295485, -0.012754736, 0.012919521, 0.022960049,
-0.007481572, 0.027638383, 0.012224579, 0.043478679, 0.011573732,
-0.048055569, -0.012018888, -0.03649651, 0.010100972, 0, 0, 0,
0.046153001, 0.009803886, 0.014563659), .Dim = c(63L, 7L), .Dimnames = list(
NULL, c("Open", "High", "Low", "Close", "Volume", "Adj.Close",
"Return")), index = structure(c(1238544000, 1238630400, 1238716800,
1238976000, 1239062400, 1239148800, 1239235200, 1239321600, 1239580800,
1239667200, 1239753600, 1239840000, 1239926400, 1240185600, 1240272000,
1240358400, 1240444800, 1240531200, 1240790400, 1240876800, 1240963200,
1241049600, 1241136000, 1241395200, 1241481600, 1241568000, 1241654400,
1241740800, 1.242e+09, 1242086400, 1242172800, 1242259200, 1242345600,
1242604800, 1242691200, 1242777600, 1242864000, 1242950400, 1243209600,
1243296000, 1243382400, 1243468800, 1243555200, 1243814400, 1243900800,
1243987200, 1244073600, 1244160000, 1244419200, 1244505600, 1244592000,
1244678400, 1244764800, 1245024000, 1245110400, 1245196800, 1245283200,
1245369600, 1245628800, 1245715200, 1245801600, 1245888000, 1245974400),
tzone = "UTC", tclass = "Date"), class = c("xts", "zoo"), .indexCLASS = "Date",
tclass = "Date", .indexTZ = "UTC", tzone = "UTC")
Use period.apply (or apply.daily, apply.weekly, etc.) with your own custom function. Something like:
library(quantmod) # for Op, Hi, Lo, Cl, and Vo functions
myFun <- function(x) {
# need coredata, so c.xts will not be dispatched
cx <- coredata(x)
c(Open = first(Op(cx)),
Hi = max(Hi(cx)),
Low = min(Lo(cx)),
Close = last(cx[,"Close"]),
Volume = sum(Vo(cx)),
Adj.Close = last(cx[,"Adj.Close"]),
Return = sum(cx[,"Return"]))
}
out <- period.apply(Data, endpoints(Data, "months"), myFun)
# Open Hi Low Close Volume Adj.Close Return
# 2009-04-30 64.5 100.25 60.75 92.0 88802000 73.8671 0.0000000
# 2009-05-29 92.0 105.00 87.50 95.5 58597300 76.6773 0.0486306
# 2009-06-26 97.0 110.50 94.00 104.5 54739400 83.9034 0.1222869