I am constructing a facet_grid using stat_hexbin however I would like the alpha value to be independent for each of the facet plots.
I am currently using the following code:
ggplot (data, aes (x, y , fill = z)) +
stat_binhex(bins=20, aes(alpha = ..count..)) +
facet_grid(. ~ z) +
guides(alpha = F) +
coord_equal() +
theme_bw()
which produces the following plot:
However, the alpha value, which is defined by ..count.. doesn't work when applied outside of the aes within stat_binhex.
I would like to show that there is some clustering in the 90 grouping on the right, around the (100,0) region, but the hexes are very pale, since there is such heavy clustering around (0,0) in the 10 grouping (leftmost plot) which skews the alpha.
Main question: How can I make the alpha independent for each facet, but still connected to count/density to better show the clustering in '70' and '90' groups?
Many thanks!
Data:
# rounded x and y, from 2 days of 365
structure(list(x = c(-24, 41, 43, 14, 9, 30, 8, -14, -45, 42,
65, 39, 43, 49, 39, 61, -53, -16, 29, 27, 9, 6, -61, 20, 5, -30,
-10, 75, 94, 28, 70, 44, -11, 26, 29, 33, 26, -35, 20, 40, 7,
4, 14, 4, -41, -7, -21, 95, 20, 50, 63, 31, 47, 19, 20, 19, 23,
-25, 29, -8, -73, 13, -82, 4, -29, 3, 9, 3, 35, 45, 64, -14,
-4, 34, 13, 12, 20, 13, 15, -17, 12, 19, -55, -49, 95, -19, 45,
94, 23, 29, 22, -91, -39, -35, -3, 63, 2, 5, 30, 62, 1, 4, -61,
-6, -2, 5, -26, -23, 5, 6, 8, 45, 104, -7, 8, 44, -43, -8, 9,
12, 29, 30, 69, 90, 12, -28, -10, -9, 49, 60, 32, 43, -11, 12,
28, 91, 11, 13, 43, 61, 11, 12, 28, 31, 47, 12, 13, 30, 46, 66,
98, 11, 12, 29, 31, 44, 64, -11, 14, 48, 62, 96, 10, 11, 12,
29, 67, 30, 93, -10, -9, 44, 101, -28, 34, 46, 10, 27, 30, 61,
8, 24, -7, -2, 52, 65, 5, -43, 41, 45, 91, -24, -23, 37, 73,
97, -61, 63, 57, 52, -37, -35, 19, 24, 110, -91, -5, -17, 95,
13, 85, -52, -50, 78, 30, 37, -8, -27, 19, -78, -75, 52, 42,
-11, -37, 27, 62, 78, -16, -56, 41), y = c(-100, -95, -95, -92,
-88, -86, -84, -82, -81, -78, -73, -72, -71, -70, -69, -68, -67,
-67, -64, -63, -62, -59, -58, -57, -56, -54, -54, -54, -54, -52,
-52, -49, -48, -48, -48, -47, -46, -45, -45, -45, -44, -42, -41,
-40, -39, -39, -38, -38, -37, -36, -36, -35, -35, -34, -34, -33,
-33, -32, -32, -31, -30, -30, -29, -29, -28, -27, -27, -26, -26,
-26, -26, -25, -25, -25, -24, -23, -23, -22, -22, -21, -21, -21,
-20, -20, -19, -18, -18, -18, -17, -17, -16, -14, -14, -14, -13,
-13, -12, -12, -12, -12, -11, -11, -10, -10, -10, -10, -9, -9,
-9, -9, -9, -9, -9, -8, -8, -8, -7, -7, -7, -7, -6, -6, -6, -6,
-5, -4, -4, -4, -4, -4, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 4, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10,
10, 11, 11, 11, 11, 12, 13, 14, 14, 14, 15, 15, 15, 16, 16, 18,
19, 20, 21, 23, 23, 24, 24, 24, 26, 27, 28, 28, 29, 30, 32, 32,
32, 36, 36, 41, 42, 44, 48, 48, 50, 51, 57, 60, 62, 76, 76, 85,
89, 93), z = c(90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
90, 90, 90, 90, 90, 70, 70, 70, 70, 70, 90, 70, 70, 70, 70, 90,
90, 70, 90, 70, 50, 70, 70, 70, 70, 70, 50, 70, 50, 50, 50, 50,
70, 50, 50, 90, 50, 70, 70, 50, 70, 50, 50, 50, 50, 50, 50, 30,
90, 30, 90, 30, 50, 30, 30, 30, 50, 50, 70, 30, 30, 50, 30, 30,
30, 30, 30, 30, 30, 30, 70, 70, 90, 30, 50, 90, 30, 30, 30, 90,
50, 50, 10, 70, 10, 10, 30, 70, 10, 10, 70, 10, 10, 10, 30, 30,
10, 10, 10, 50, 90, 10, 10, 50, 50, 10, 10, 10, 30, 30, 70, 90,
10, 30, 10, 10, 50, 70, 30, 50, 10, 10, 30, 90, 10, 10, 50, 70,
10, 10, 30, 30, 50, 10, 10, 30, 50, 70, 90, 10, 10, 30, 30, 50,
70, 10, 10, 50, 70, 90, 10, 10, 10, 30, 70, 30, 90, 10, 10, 50,
90, 30, 30, 50, 10, 30, 30, 70, 10, 30, 10, 10, 50, 70, 10, 50,
50, 50, 90, 30, 30, 50, 70, 90, 70, 70, 70, 70, 50, 50, 30, 30,
90, 90, 30, 30, 90, 30, 90, 70, 70, 90, 50, 50, 50, 50, 50, 90,
90, 70, 70, 70, 70, 70, 90, 90, 90, 90, 90)), .Names = c("x",
"y", "z"), row.names = c(NA, -231L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000000330788>)
Related
here is a reprex
data<- structure(list(lanmark_id = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67), V1 = c(0.00291280916742007,
0.00738863171211713, 0.0226678081211574, 0.0475105228945172,
0.0932285720818941, 0.167467706279089, 0.257162845610094, 0.365202733889021,
0.49347857580521, 0.623654594804239, 0.738846221030799, 0.838001377618909,
0.911583795022151, 0.954620025430512, 0.976736039833402, 0.99275439380643,
1.00100526672829, 0.0751484964183746, 0.136267471453466, 0.223219796351563,
0.312829176190895, 0.396253287447153, 0.589077347394549, 0.682150866526948,
0.771279538477539, 0.856242644022999, 0.915433541338973, 0.493665602840245,
0.491283285973581, 0.488913167946858, 0.486968906096063, 0.384707082576335,
0.43516446651127, 0.48730704698643, 0.541730425616146, 0.590794609520034,
0.176234316360877, 0.230353437655898, 0.295908510434122, 0.350673723300921,
0.2927721757992, 0.228392965512228, 0.634474821310078, 0.692554938010577,
0.757884656518485, 0.809961553290539, 0.760324208523177, 0.696892501347341,
0.299062528225204, 0.371899560139738, 0.440183530232855, 0.488448817156316,
0.542120710507391, 0.613931454931259, 0.683122622479693, 0.614367295821043,
0.544516611213321, 0.487065702940653, 0.43466839036949, 0.367662837035504,
0.329392110306872, 0.439192556373207, 0.488617118648197, 0.543288506065858,
0.652131615571443, 0.541622182786469, 0.486664920417254, 0.437126878794749
), V2 = c(0.201088019764115, 0.335422141956174, 0.468591127485112,
0.597955245417373, 0.719502795031081, 0.826191980419368, 0.912263437847338,
0.978932088608654, 0.996572250349122, 0.975164350943783, 0.906204543800476,
0.817791059656974, 0.711167374856116, 0.587462637963028, 0.457981280500493,
0.327526817895531, 0.19652402489511, 0.0832018969548692, 0.0247526745448235,
0.00543973063471442, 0.0169853862992864, 0.0463565705952832,
0.0442986445765913, 0.0151651597693172, 0.00747493463745755,
0.0263496825405166, 0.0805712600069456, 0.160307477500307, 0.24640401358039,
0.332244740019727, 0.420995916418539, 0.486383354389177, 0.505514985155285,
0.521022030162301, 0.5059272511442, 0.48818970795347, 0.184054088286897,
0.153658218058329, 0.153359749238857, 0.186997311695192, 0.20294291755153,
0.204166125257439, 0.186997311695192, 0.153386090373069, 0.155932705636629,
0.184603717976376, 0.203900583330345, 0.202836636618411, 0.670663080116174,
0.635972857244521, 0.619932598923225, 0.632625553953685, 0.620132318139554,
0.637530241507316, 0.668109937001625, 0.718821664744205, 0.73956412947459,
0.744898219300658, 0.74046882628352, 0.720755964662638, 0.672731384920681,
0.666152981987244, 0.670464844757437, 0.664772611108765, 0.671145517468628,
0.673968618595099, 0.67986363963374, 0.675352028351748), coef2 = c(0,
0, 0, 0, 0, 0, 0, 0, 0.565178003460693, 0, 0, 0, 0, 0, 0, 0,
0, 0.0433232019717308, 0.0433232019717308, 0.442833876807268,
0.574211955093656, 0.574211955093656, 0.574211955093656, 0.574211955093656,
0.442833876807268, 0.0433232019717308, 0.0433232019717308, 0.0612451242746323,
0.0612451242746323, 0, 0, 0, 0, 0, 0, 0, 0.343056259557492, 0.701076795777046,
0.674029769391816, 0, 0.538117834886036, 0.990039002564078, 0.451921167678043,
0.701076795777046, 0.701076795777046, 0.316009233172263, 0.990039002564078,
0.990039002564078, 0.878350036859346, 0.343364662128988, 0.282119537854356,
0.282119537854356, 0.282119537854356, 0.343364662128988, 0.384793696241895,
0.608382647917744, 0.608382647917744, 1, 0.608382647917744, 0.608382647917744,
0.384793696241895, 0.501936678206125, 0.501936678206125, 0, 0.878350036859346,
0, 0.501936678206125, 0.501936678206125)), row.names = c(NA,
-68L), class = c("tbl_df", "tbl", "data.frame"))
I used this data to create a deulanay plot in R
library(tidyverse)
library(ggforce)
data%>%
mutate(coef2 = coef2/max(coef2))%>%
ggplot(aes(V1, V2))+
geom_delaunay_tile(aes(colour = coef2, fill = coef2), alpha = .5)+
geom_delaunay_segment2(aes(colour = coef2, fill = coef2))+
geom_point(aes(colour = coef2))+
ylim(1,0)+
scale_color_viridis_c(option = "magma")+
scale_fill_viridis_c(option = "magma")+
theme_minimal()
which gives this
I want to fill all triangles with a blend of colors that match the color of each point, just as the lines are colored.
as you can see I have tried using fill = coef2 within de geom_delaunay but this doesn't really achieve what I want.
is there a way to do this in R.
Many thanks!
I'm currently running principal component analysis. For the interpretation I want to create a profile (pattern) plot to visualize the correlation between each principal component and the original variables. Is anyone familiar with a package or code to create this in R? I'm using the prcomp() function in R.
See examples:
https://canadianaudiologist.ca/predicting-speech-perception-from-the-audiogram-and-vice-versa/
https://blogs.sas.com/content/iml/2019/11/04/interpret-graphs-principal-components.html
This is similar data to my db:
db <- structure(list(T025 = c(20, 60, 20, 10, 85, 5, 15, 10, 10, 25,
15, 5, 15, 30, 15, 15, 10, 25, 45, 25, 55, 20, 65, 20, 10, 10,
15, 15, 30, 35, 10, 50, 20, 15, 30, 15, 20, 35, 30, 20, 10, 20,
30, 15, 40, 15, 10, 10, 20, 25, -5, 10, 40, 0, 15, 5, 15, 30,
15, 80, 15, 35, 10, 50, 25, 10, 15, 20, 20, 20, 25, 20, 30, 10,
20, 50, 25, 25, 55, 30, 20, 30, 15, 10, 15, 15, 35, 20, 30, 15,
40, 20, 25, 15, 20, 35, 15, 25, 20, 40, 0, 20, 10, 10, 15, 10,
20, 10, 35, 35, 25, 30, 20, 25, 15, 30, 35, 25, 30, 5, 20, 30,
15, 25, 10), T05 = c(0, 25, 0, 5, 25, 5, 0, 0, 5, 5, 5, -5, 5,
15, 15, 5, 0, 15, 25, 15, 50, 20, 45, 5, 5, 5, 0, 10, 10, 10,
5, 20, 15, 10, 20, 10, -5, 10, 30, -5, 0, 10, 35, 5, 40, 0, 0,
-5, 15, 25, 0, 5, 35, -5, 5, 0, 5, 5, 10, 70, 0, 20, 5, 30, 10,
10, 5, 5, 25, 10, 20, 5, 25, 5, 10, 35, 15, 10, 45, 15, 15, 25,
10, 5, 10, 5, 20, 15, 15, 5, 10, 10, 20, 5, 15, 25, 5, 20, 10,
35, -10, 5, 0, -5, 0, 5, 15, 5, 15, 35, 20, 25, 10, 15, 15, 25,
45, 0, 25, 0, 5, 25, 0, 20, 5), T1 = c(25, 20, 25, 20, 50, 10,
15, 20, 25, 25, 25, 25, 15, 45, 25, 25, 20, 35, 40, 35, 65, 45,
45, 30, 25, 20, 5, 20, 30, 25, 20, 35, 25, 25, 35, 15, 15, 25,
45, 20, 25, 35, 40, 25, 60, 15, 15, 15, 25, 45, 20, 20, 60, 15,
20, 25, 45, 45, 25, 75, 10, 45, 15, 50, 20, 25, 20, 15, 40, 30,
50, 20, 40, 20, 35, 50, 35, 15, 50, 30, 20, 45, 25, 25, 20, 45,
30, 35, 30, 30, 15, 15, 30, 25, 25, 25, 15, 40, 25, 55, 20, 30,
10, 15, 50, 15, 40, 20, 20, 55, 35, 45, 20, 50, 35, 20, 65, 10,
35, 15, 30, 55, 25, 15, 25), T2 = c(20, 20, 15, 25, 70, 10, 15,
45, 50, 30, 20, 25, 10, 40, 20, 40, 30, 40, 25, 30, 45, 25, 50,
20, 20, 20, 10, 10, 45, 10, 5, 40, 20, 15, 50, 25, 15, 20, 25,
30, 20, 30, 35, 15, 65, 20, 25, 10, 10, 60, 25, 20, 70, 5, 15,
15, 15, 25, 15, 60, 25, 55, 5, 50, 30, 35, 5, 10, 30, 10, 55,
25, 40, 35, 40, 45, 25, 20, 35, 40, 5, 40, 10, 25, 10, 40, 30,
20, 25, 25, 10, 25, 30, 45, 20, 25, 10, 55, 40, 60, 5, 10, 10,
5, 20, 0, 40, 20, 35, 80, 25, 40, 15, 55, 25, 15, 65, 5, 25,
5, 35, 45, 10, 5, 10), T4 = c(10, 25, 35, 35, 70, 20, 15, 70,
55, 30, 50, 35, 40, 40, 35, 45, 60, 50, 15, 25, 70, 10, 60, 40,
30, 15, 15, 15, 50, 5, 20, 70, 5, 35, 65, 40, 20, 65, 50, 30,
45, 55, 65, 35, 45, 35, 40, 20, 5, 65, 20, 25, 75, 10, 25, 25,
10, 25, 20, 55, 20, 65, 5, 60, 70, 45, 15, 25, 35, 5, 70, 55,
65, 40, 35, 55, 35, 45, 45, 45, 20, 40, 25, 50, 15, 55, 55, 40,
30, 60, 10, 60, 40, 35, 30, 65, 5, 75, 55, 80, 15, 30, 55, 15,
50, 25, 45, 30, 45, 90, 20, 45, 20, 40, 35, 20, 70, 20, 30, 45,
50, 55, 45, 5, 45), T8 = c(5, 55, 55, 40, 75, 40, 5, 70, 25,
10, 50, 55, 5, 35, 10, 30, 40, 55, 20, 20, 65, -5, 55, 50, -10,
45, 5, 50, 65, 20, 0, 75, 15, 30, 50, 50, 30, 70, 45, 25, 35,
40, 85, 30, 60, 50, 55, 15, 10, 75, 60, 20, 90, 0, 20, 55, -10,
20, 10, 45, 20, 65, 0, 70, 85, 0, -5, 30, 35, 5, 80, 45, 60,
25, 35, 55, 30, 45, 65, 45, -5, 35, 35, 40, 50, 55, 50, 70, 45,
40, 0, 55, 45, 30, 0, 56, 0, 45, 50, 70, 15, 20, 45, -10, 45,
55, 45, 20, 50, 85, 5, 50, 10, 20, 25, 0, 70, 0, 25, 5, 45, 35,
40, -5, 25)), row.names = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26",
"177", "191", "200", "205", "208", "212", "231", "236", "240",
"246", "250", "259", "263", "264", "275", "276", "282", "293",
"303", "304", "307", "309", "315", "316", "320", "322", "324",
"327", "333", "338", "343", "356", "365", "377", "379", "393",
"395", "399", "405", "411", "426", "428", "439", "448", "451",
"459", "490", "495", "498", "513", "515", "521", "524", "528",
"532", "550", "552", "559", "566", "570", "577", "583", "587",
"595", "624", "638", "641", "645", "647", "650", "660", "668",
"677", "683", "688", "691", "702", "704", "710", "719", "730",
"732", "748", "752", "758", "766", "772", "780", "782", "790",
"810", "828", "830", "836", "853", "862", "880", "889", "896"
), class = "data.frame")
db.pca <- prcomp(db, center= TRUE, scale.=TRUE)
summary(db.pca)
str(db.pca)
ggbiplot(db.pca)
screeplot(db.pca, type="line")
Here is a way with package FactoMineR to get the correlations. The plot is a base R plot.
library(FactoMineR)
res.pca <- PCA(iris[-5], graph = FALSE)
cos2 <- res.pca$var$cos2
old_par <- par(xpd = TRUE)
matplot(
cos2,
type = "l",
xlab = "variable",
ylab = "correlation",
main = "Component Pattern Profiles",
xaxt = "n"
)
axis(1, at = 1:nrow(cos2), labels = rownames(cos2))
legend(
x = "bottom",
inset = c(0, -0.2),
legend = colnames(cos2),
col = 1:ncol(cos2),
lty = 1:ncol(cos2),
bty = "n",
horiz = TRUE
)
par(old_par)
using your data I did this:
comp = prcomp(db, center=T, scale.=T)
b =matrix(ncol = 3)[-1,]
for(i in 1:ncol(comp$x)){
for(j in colnames(db)){
b = rbind(b, c(i,j,cor.test(comp$x[,i], db[,j])$estimate))
}
}
b= as.data.frame(b)
b$cor= as.numeric(b$cor)
ggplot(b,aes(x=V2,y=cor, group = V1, col= V1))+
geom_line()+
theme_classic()
And I obtained this :
did it help?
I'm trying to extract slope values from a number of linear regression models. I plotting acetone emission against water content on different days.
I have these graphs and models
I have tried to extract the slope values using this code:
Library(broom)
Library(tidyverse)
lm_table <- df %>%
nest_by(days) %>%
summarise(mdl = list(lm(water_content ~ acetone, data)), .groups = "drop") %>%
mutate(adjrsquared = map_dbl(mdl, ~summary(.)$adj.r.squared ),
mdl = map(mdl, broom::tidy)) %>%
unnest(mdl)%>%
filter(term=="acetone")
and also this code:
lm_table2 <- df %>%
nest_by(days) %>%
mutate(model = list(lm(water_content ~ acetone, data)),
coefficients2 = list(tidy(model)))
coefficients2 = lm_table2 %>%
unnest(coefficients2)
Both codes however give different slope values than what I get from the graphs. Any ideas as to why that is?
Here's the data
df <- structure(list(i.x45.03 = c(22, 17, 11, 1782, 1767, 250, 3568,
79, 219, 855, 12009, 395, 1552, 705, 2282, 84, 3396, 252, 2058,
1480, 5, 745, 2573, 1005, 946, 3320, 5406, 2192, 20, 1207, 9519,
66, 463, 250, 1095, 16556, 88, 2695, 275, 16, 1577, 29, 3221,
25, 6295, 2, 63, 123, 8, 1, 37, 5308, 4546, 994, 4567, 421, 0,
1938, 19480, 1027, 3474, 1982, 2819, 69, 27733, 2152, 15429,
996, 8, 3435, 8748, 17062, 269, 26188, 35823, 2572, 67, 761,
13493, 1, 1, 1, 16, 9, 29, 89, 20, 11, 21644, 3, 37, 13, 0, 0,
0, 0, 3, 30, 19, 0, 0, 242, 7246, 1, 20081, 77, 0, 0, 0, 5878,
0, 0, 22, 2, 4, 1, 93, 12, 2, 73, 0, 19, 0, 0, 2, 48, 3, 0, 0,
0, 0, 22, 4, 0, 0, 0, 0, 0, 0, 1, 87, 0, 0, 3, 0, 0, 4, 1, 0,
82, 7, 0, 0, 0, 7, 22, 34, 17, 0, 0, 0, 0, 0, 2, 19, 3, 0, 990,
0, 0, 0, 0, 84, 9, 0, 5, 1246, 1944, 633, 23640, 262, 5399, 83,
19, 4417, 125, 7801, 69, 6755, 6, 39, 262), i.water_content_percent_es = c(98,
39, 85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8,
17, 10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19,
32, 40, 79, 22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39,
85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17,
10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 98, 23, 8, 17, 10, 75,
52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17, 10, 75, 52,
13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79, 22,
49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91), daysincubated4 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), days = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), water_content = c(98,
39, 85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8,
17, 10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19,
32, 40, 79, 22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39,
85, 14, 21, 28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17,
10, 75, 52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 98, 23, 8, 17, 10, 75,
52, 13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 98, 39, 85, 14, 21,
28, 50, 83, 21, 59, 20, 66, 61, 70, 46, 23, 8, 17, 10, 75, 52,
13, 9, 8, 47, 8, 8, 46, 86, 24, 17, 31, 35, 19, 32, 40, 79, 22,
49, 91, 15, 90, 63, 90, 60, 53, 29, 91, 31, 35, 19, 32, 40, 79,
22, 49, 91, 15, 90, 63, 90, 60, 53, 29, 91), acetone = c(22,
17, 11, 1782, 1767, 250, 3568, 79, 219, 855, 12009, 395, 1552,
705, 2282, 84, 3396, 252, 2058, 1480, 5, 745, 2573, 1005, 946,
3320, 5406, 2192, 20, 1207, 9519, 66, 463, 250, 1095, 16556,
88, 2695, 275, 16, 1577, 29, 3221, 25, 6295, 2, 63, 123, 8, 1,
37, 5308, 4546, 994, 4567, 421, 0, 1938, 19480, 1027, 3474, 1982,
2819, 69, 27733, 2152, 15429, 996, 8, 3435, 8748, 17062, 269,
26188, 35823, 2572, 67, 761, 13493, 1, 1, 1, 16, 9, 29, 89, 20,
11, 21644, 3, 37, 13, 0, 0, 0, 0, 3, 30, 19, 0, 0, 242, 7246,
1, 20081, 77, 0, 0, 0, 5878, 0, 0, 22, 2, 4, 1, 93, 12, 2, 73,
0, 19, 0, 0, 2, 48, 3, 0, 0, 0, 0, 22, 4, 0, 0, 0, 0, 0, 0, 1,
87, 0, 0, 3, 0, 0, 4, 1, 0, 82, 7, 0, 0, 0, 7, 22, 34, 17, 0,
0, 0, 0, 0, 2, 19, 3, 0, 990, 0, 0, 0, 0, 84, 9, 0, 5, 1246,
1944, 633, 23640, 262, 5399, 83, 19, 4417, 125, 7801, 69, 6755,
6, 39, 262)), row.names = c(NA, -192L), class = "data.frame")
and the code for the graph I've made is:
library(ggpmisc)
library(tidyverse)
formula <- y~x
ggplot(df, aes(water_content, acetone)) +
geom_point() +
geom_smooth(method = "lm",formula = y~x) +
theme_bw()+
facet_wrap(~days, scales = "free")+
stat_poly_eq(
aes(label = paste(stat(adj.rr.label), stat(eq.label), stat(p.value.label), sep = "*\", \"*")),
formula = formula, parse = TRUE, size=3)
Any ideas why I don't get the same slope values?
All help is much appreciated!
You swapped x and y. Possibly because of using complex 'tidyverse' coding this was not obvious.
library(nlme)
lmList(acetone ~ water_content | days, data = df)
gives
Call:
Model: acetone ~ water_content | days
Data: df
Coefficients:
(Intercept) water_content
0 3314.26811 -31.663431
4 12046.87296 -154.277916
24 3103.13075 -44.368527
116 63.82385 -0.792739
Degrees of freedom: 192 total; 184 residual
Residual standard error: 4538.636
I have the code below which plots two time-series. I'd like to add a vertical line every say 10 units on the x-axis to basically divide the plot up into like 5 squares. Any tips are very much appreciated.
Code:
## Plot Forecast & Actual
ts.plot(ts(CompareDf$stuff1),ts(CompareDf$stuff2),col=1:2,xlab="Hour",ylab="Minu tes",main='testVar')
legend("topleft", legend = c("Actual","Forecast"), col = 1:2, lty = 1)
Data:
dput(CompareDf)
structure(list(stuff1 = c(6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55), stuff2 = c(8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)), .Names = c("stuff1",
"stuff2"), row.names = c(NA, -50L), class = "data.frame")
After plotting timeseries data, use abline to draw vertical lines.
abline(v = seq(10, 50, 10))
Can anybody help be decipher the output of ucm. My main objective is to check if the ts data is seasonal or not. But i cannot plot and look and every time. I need to automate the entire process and provide an indicator for the seasonality.
I want to understand the following output
ucmxmodel$s.season
# Time Series:
# Start = c(1, 1)
# End = c(4, 20)
# Frequency = 52
# [1] -2.391635076 -2.127871717 -0.864021134 0.149851212 -0.586660213 -0.697838635 -0.933982269 0.954491859 -1.531715424 -1.267769820 -0.504165631
# [12] -1.990792301 1.273673437 1.786860414 0.050859315 -0.685677002 -0.921831488 -1.283081922 -1.144376739 -0.964042949 -1.510837956 1.391991657
# [23] -0.261175626 5.419494363 0.543898305 0.002548125 1.126895943 1.474427901 2.154721023 2.501352782 0.515453691 -0.470886132 1.209419689
ucmxmodel$vs.season
# [1] 1.375832 1.373459 1.371358 1.369520 1.367945 1.366632 1.365582 1.364795 1.364270 1.364007 1.364007 1.364270 1.364795 1.365582 1.366632 1.367945
# [17] 1.369520 1.371358 1.373459 1.375816 1.784574 1.784910 1.785223 1.785514 1.785784 1.786032 1.786258 1.786461 1.786643 1.786802 1.786938 1.787052
# [33] 1.787143 1.787212 1.787257 1.787280 1.787280 1.787257 1.787212 1.787143 1.787052 1.786938 1.786802 1.786643 1.786461 1.786258 1.786032 1.785784
# [49] 1.785514 1.785223 1.784910 1.784578 1.375641 1.373276 1.371175 1.369337 1.367762 1.366449 1.365399 1.364612 1.364087 1.363824 1.363824 1.364087
# [65] 1.364612 1.365399 1.366449 1.367762 1.369337 1.371175 1.373276 1.375636 1.784453 1.784788 1.785101 1.785392 1.785662 1.785910 1.786136 1.786339
ucmxmodel$est.var.season
# Season_Variance
# 0.0001831373
How can i use the above info without looking at the plots to determine the seasonality and at what level ( weekly, monthly, quarterly or yearly)?
In addition, i am getting NULL in est
ucmxmodel$est
# NULL
Data
The data for a test is:
structure(c(44, 81, 99, 25, 69, 42, 6, 25, 75, 90, 73, 65, 55,
9, 53, 43, 19, 28, 48, 71, 36, 1, 66, 46, 55, 56, 100, 89, 29,
93, 55, 56, 35, 87, 77, 88, 18, 32, 6, 2, 15, 36, 48, 80, 48,
2, 22, 2, 97, 14, 31, 54, 98, 43, 62, 94, 53, 17, 45, 92, 98,
7, 19, 84, 74, 28, 11, 65, 26, 97, 67, 4, 25, 62, 9, 5, 76, 96,
2, 55, 46, 84, 11, 62, 54, 99, 84, 7, 13, 26, 18, 42, 72, 1,
83, 10, 6, 32, 3, 21, 100, 100, 98, 91, 89, 18, 88, 90, 54, 49,
5, 95, 22), .Tsp = c(1, 3.15384615384615, 52), class = "ts")
and
structure(c(40, 68, 50, 64, 26, 44, 108, 90, 62, 60, 90, 64, 120, 82, 68, 60,
26, 32, 60, 74, 34, 16, 22, 44, 50, 16, 34, 26, 42, 14, 36, 24, 14, 16, 6, 6,
12, 20, 10, 34, 12, 24, 46, 30, 30, 46, 54, 42, 44, 42, 12, 52, 42, 66, 40,
60, 42, 44, 64, 96, 70, 52, 66, 44, 64, 62, 42, 86, 40, 56, 50, 50, 62, 22,
24, 14, 14, 18, 18, 10, 20, 10, 4, 18, 10, 10, 14, 20, 10, 32, 12, 22, 20, 20,
26, 30, 36, 28, 56, 34, 14, 54, 40, 30, 42, 36, 52, 30, 32, 52, 42, 62, 46,
64, 70, 48, 40, 64, 40, 120, 58, 36, 40, 34, 36, 26, 18, 28, 16, 32, 18, 12,
20), .Tsp = c(1, 4.36, 52), class = "ts")
I think the most straightforward approach would be to follow Rob Hyndman's approach (he is the author of many time series packages in R). For your data it would work as follows,
require(fma)
# Create a model with multiplicative errors (see https://www.otexts.org/fpp/7/7).
fit1 <- stlf(test2)
# Create a model with additive errors.
fit2 <- stlf(data, etsmodel = "ANN")
deviance <- 2 * c(logLik(fit1$model) - logLik(fit2$model))
df <- attributes(logLik(fit1$model))$df - attributes(logLik(fit2$model))$df
# P-value
1 - pchisq(deviance, df)
# [1] 1
Based on this analysis we find the p-value of 1 which would lead us to conclude there is no seasonality.
I quite like the stl() function provided in R. Try this minimal example:
# some random data
x <- rnorm(200)
# as a time series object
xt <- ts(x, frequency = 10)
# do the decomposition
xts <- stl(xt, s.window = "periodic")
# plot the results
plot(xts)
Now you can get an estimate of the 'seasonality' by comparing the variances.
vars <- apply(xts$time.series, 2, var)
vars['seasonal'] / sum(vars)
You now have the seasonal variance as a proportion of sum of variances after decomposition.
I highly recommend reading the original paper so that you understand whats happening under the hood here. Its very accessible and I like this method as it is quite intuitive.