Related
I'm applying the example here:
https://quantdev.ssri.psu.edu/sites/qdev/files/09_EnsembleMethods_2017_1127.html
to my data, to build a model for classification using the caret package.
I got to the point:
cvcontrol <- trainControl(method="repeatedcv", number = 10, repeats=3,allowParallel=TRUE)
train.rf <- train(as.factor(variate) ~ .,
data=train.n.inp,
method="rf",
trControl=cvcontrol,
importance=TRUE)
rf.classTrain <- predict(train.rf, type="raw")
#computing confusion matrix
cM <- confusionMatrix(train.n.inp$variate,rf.classTrain)
I don't understand the need to use the predict function to calculate the confusion matrix, or, in other words, what is the difference between cM and train.rf$finalModel:
train.rf$finalModel
OOB estimate of error rate: 43.08%
Confusion matrix:
MV UV class.error
MV 25 12 0.3243243
UV 16 12 0.5714286
> cM
Confusion Matrix and Statistics
Reference
Prediction MV UV
MV 37 0
UV 0 28
Accuracy : 1
I am confused by the (large) difference between the two confusion matrices and unsure which one reflects the accuracy of the model. Any help appreciated.
the data:
dput(train.n.inp)
structure(list(variate = structure(c(1L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = c("MV",
"UV"), class = "factor"), AMB = c(0.148918043959789, 0.137429106929874,
0.13522219247215, 0.152139165429334, 0.193551266136034, 0.1418753904697,
0.132098434875739, 0.256245486778797, 0.136593400352133, 0.0183612037420183,
0.0235701709547339, 0.030539801539972, 0.0532418112925866, 0.0506048730618504,
0.0443005622763673, 0.172991261592386, 0.135717125493919, 0.139092406429261,
0.1225892299329, 0.13579014839877, 0.183709401293317, 0.122207888096455,
0.00542803592726925, 0.0192455922563268, 0.0731446096925737,
0.0150264910871489, 0.0487793004405717, 0.0433918327937752, 0.0122597343588996,
0.0211847560629296, 0.114451232870044, 0.113712890165437, 0.00788647372392488,
-0.03807738805183, 0.00735097242168299, -0.00173226557619129,
0.000279921135262793, 0.0487306185040041, 0.00901021509302318,
0.164378615647997, 0.081505732298031, 0.0337690366656119, 0.0520247628784008,
0.0318461001711981, 0.0467265454486446, 0.0503046677863513, 0.026150313592808,
0.102418680881792, 0.145640126897581, 0.158703113209843, 0.166192017785134,
0.145234444092853, 0.189096868940113, 0.142573164893833, 0.157794383727251,
0.312043099741174, 0.136009217113324, 0.115213916542934, 0.119757563955894,
0.120065882887488, 0.141891617781889, 0.177956819122265, 0.13731551574455,
0.328513821613157, 0.110426859447136), MB = c(-0.73416, -0.67752,
-0.66664, -0.75004, -0.9542, -0.69944, -0.65124, -1.26328, -0.6734,
-0.09052, -0.1162, -0.15056, -0.26248, -0.24948, -0.2184, -0.85284,
-0.66908, -0.68572, -0.60436, -0.66944, -0.90568, -0.60248, -0.02676,
-0.09488, -0.3606, -0.07408, -0.24048, -0.21392, -0.06044, -0.10444,
-0.56424, -0.5606, -0.0388800000000001, 0.18772, -0.0362400000000001,
0.00854000000000001, -0.00138, -0.24024, -0.04442, -0.81038,
-0.40182, -0.16648, -0.25648, -0.157, -0.23036, -0.248, -0.12892,
-0.50492, -0.718, -0.7824, -0.81932, -0.716, -0.93224, -0.70288,
-0.77792, -1.53836, -0.67052, -0.568, -0.5904, -0.59192, -0.69952,
-0.87732, -0.67696, -1.61956, -0.5444), MGE = c(1.58768, 1.6152,
1.53288, 1.52972, 1.12908, 1.50552, 1.48988, 1.67552, 1.55052,
1.23556, 1.27284, 1.21336, 0.84592, 1.30172, 1.14048, 1.26828,
1.20884, 1.21764, 1.22876, 1.22168, 1.27944, 1.22528, 1.26932,
1.25408, 1.183, 1.38032, 1.33416, 0.95584, 1.31188, 1.39796,
1.33848, 1.4458, 1.18416, 1.23868, 1.22968, 1.17838, 1.17278,
1.13368, 1.11374, 1.31642, 1.14034, 1.21984, 1.17128, 1.16364,
1.15036, 1.12984, 1.22484, 1.17244, 1.2768, 1.55744, 1.66964,
1.54848, 1.17416, 1.56424, 1.48928, 1.9326, 1.54588, 1.228, 1.29096,
1.39296, 1.38432, 1.275, 1.32704, 1.9442, 1.35128)), row.names = c(NA,
-65L), class = "data.frame")
I am trying to make a PCA plot using ggplot and geom_point.
I would like to illustrate 3 factors (Diet, Time, Antibiotics).
I thought I could outline the points in black for one factor).
However this isn't showing the third factor (Time) for the Fill color.
Here is a subset of my data:
> dput(dat.pcx.annot.test)
structure(list(PC1 = c(25.296379160162, 1.4703101394886, 11.4138097811008,
1.41798772574591, 23.7253675969881, 15.5683516005535, -34.6012195481675,
-25.7129281491955, -2.97230018393742, 4.83421092719293, -0.0274189140249825,
23.227939504077, 15.2002258785889, -35.2243685702227, -34.2537374460037,
-7.6380794043063), PC2 = c(27.2678813936857, -9.88577494210313,
-6.19394322321806, -8.88953660465497, 33.6791127012231, -13.2912233546802,
7.77877968081575, 2.7371646557436, -8.41929538502921, -11.5151849519265,
-9.40733576034963, 32.3549860618533, -11.2170071727855, 10.0455709347794,
3.05679707335492, -6.66218028060621), Diet = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L), .Label = c("RC",
"WD"), class = "factor"), Time = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("ZT14",
"ZT2"), class = "factor"), Antibiotics = structure(c(2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("Antibiotics ",
"None"), class = "factor")), row.names = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 18L, 19L, 20L, 21L, 22L), class = "data.frame")
Here is the plotting command :
ggplot(dat.pcx.annot.test,aes(x=PC1,y=PC2,color=Diet,shape=Antibiotics,Fill=Time))+
geom_point(size=3,alpha=0.5)+
scale_color_manual(values = c("black","white") )
And the plot it produces:
I thought if I had both color and fill specified then they would both show.
I would like black outlines for Antibiotics, and Fill color for Time.
Right now Time is not represented.
Any help on how to simultaneously view the 3 factors.
Thanks
Yes I had a fill typo. And I finally figured out how to get the legends to correspond. Here is my final answer.
ggplot(dat.pcx.annot,aes(x=PC1,y=PC2,color=Diet,shape=Antibiotics,fill=Time))+
geom_point(size=3)+
scale_shape_manual(values = c(21, 22) )+
scale_color_manual(values = c("black","white") )+
scale_fill_manual(values=c("#EC9DAE","#AEDE94"))+
xlab(PC1var)+
ylab(PC2var)+
guides(fill=guide_legend(override.aes=list(shape=21)))+
guides(color=guide_legend(override.aes=list(shape=21)))
guides(fill=guide_legend(override.aes=list(shape=21,fill=c("#EC9DAE","#AEDE94"),color=c("black","white"))))
ggsave("cohort2_pca.pdf")
I have some data structured in the same way as the following:
structure(list(respectfromsuperior = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, NA, 2L, 1L, 1L, 1L, 1L, 2L), .Label = c("agree",
"disagree"), class = "factor"), respectideserve = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L), .Label = c("agree",
"disagree"), class = "factor"), undesirablechange = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, NA, 2L, 2L,
2L, 2L, 2L, 1L, 1L, NA, 1L, 2L, 1L, 2L, 2L, 2L), .Label = c("agree",
"disagree"), class = "factor"), jobsecuritypoor = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("agree",
"disagree"), class = "factor"), promotionprospectsadequate = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("agree",
"disagree"), class = "factor"), salaryadequate = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("agree",
"disagree"), class = "factor"), branch = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Edinburgh",
"Head Office", "Manchester"), class = "factor")), .Names = c("respectfromsuperior",
"respectideserve", "undesirablechange", "jobsecuritypoor", "promotionprospectsadequate",
"salaryadequate", "branch"), class = "data.frame", row.names = c(1L,
2L, 4L, 6L, 10L, 11L, 13L, 15L, 16L, 17L, 19L, 20L, 22L, 23L,
25L, 27L, 29L, 30L, 32L, 33L, 34L, 35L, 39L, 40L, 41L, 42L, 43L,
44L, 45L))
I would like to use ggplot 2 to plot a bar graph with the following features:
the bars representing percentage of respondents who agree with
statements in columns 2:6 of the data (disagree not plotted). Percentage calculated as a
percentage of branch members (not as percentage of total
respondents)
bars grouped by branch on the x axis
the questions (columns 2:6) are used as the 'Fill' argument
I've tried playing around with the code below but not able to work it out:
data.r <- melt(rewitemsbr, id.vars='branch')
ggplot(data=data.r, aes(x=value, fill=variable)) +
geom_bar(stat="count", position=position_dodge())
this is the best I've come up with:
Any help very much appreciated thank you.
You can try following.
# get the stats using aggregate
res <- aggregate(d[,1:6], list(d$branch), function(x) sum(x=="agree", na.rm = T)/length(x))
res
Group.1 respectfromsuperior respectideserve undesirablechange jobsecuritypoor promotionprospectsadequate salaryadequate
1 Edinburgh 1.0 0.8888889 0.1111111 0.0 0.6666667 0.4444444
2 Head Office 0.7 0.3000000 0.4000000 0.2 0.2000000 0.0000000
3 Manchester 0.8 0.8000000 0.2000000 0.1 0.6000000 0.2000000
# to long format
library(reshape2)
res_long <- melt(res, id.vars='Group.1')
# plot
ggplot(data=res_long, aes(x=Group.1, y=value, fill=variable)) +
geom_bar(stat="identity", position=position_dodge())
A factorial combination of 16 treatments (4*2*2) was replicated three times and laid out in a strip-split block. Treatments consisted of eight site preparations (4*2) applied as whole plot treatments and two levels of weeding(weeding/no-weeding) were applied randomly to subplots. The analysis was run in Genstat giving the following results:
Variate: result
Source of variation d.f. s.s. m.s. v.r. F pr.
Rep stratum 2 35.735 17.868
Rep.Burning stratum
Burning 1 0.003 0.003 0.00 0.972
Residual 2 3.933 1.966 1.53
Rep.Site_prep stratum
Site_prep 3 7.981 2.660 0.45 0.727
Residual 6 35.477 5.913 4.61
Rep.Burning.Site_prep stratum
Burning.Site_prep 3 2.395 0.798 0.62 0.626
Residual 6 7.691 1.282 0.60
Rep.Burning.Site_prep.*Units* stratum
Weeding 1 13.113 13.113 6.13 0.025
Burning.Weeding 1 0.486 0.486 0.23 0.640
Site_prep.Weeding 3 17.703 5.901 2.76 0.076
Burning.Site_prep.Weed.3 3.425 1.142 0.53 0.666
Residual 16 34.248 2.141
Total 47 162.190
I want to repeat these results in R. I used both the base::aov function and the lmerTest::lmer function. I managed to get the correct results with aov using function
result ~ Burning * Weeding * Site.prep + Error(Rep/Burning*Site.prep). With lmer I used the function
result ~ Burning*Site.prep*Weeding+(1|Rep/(Burning:Site.prep)) giving me only partially correct results. The SS values and the F-values for Burning, Site.prep and Burning:Site.prep deviated (although not too much)from the Genstat results, but the Weeding and Weeding interactions gave the same SS and F-valus as the Genstat output.
I would like to know how I should specify the lmer model to reproduce the Genstat and aov results.
Data and code below:
x <- structure(list(
Rep = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3"
), class = "factor"),Burning = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("Burn",
"No-burn"), class = "factor"), Site.prep = structure(c(4L, 4L,4L, 4L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L),
.Label = c("Chop_Pit", "Chop_Rip", "Pit", "Rip"), class = "factor"), Weeding = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L),
.Label = c("Weedfree", "Weedy"), class = "factor"),
Dbh14 = c(27.4, 28.4083333333333, 27.7066666666667, 27.3461538461538, 28.6, 28.3333333333333, 27.0909090909091,
27.8076923076923, 27.1833333333333, 27.5461538461538, 24.3076923076923,
29.3461538461538, 27.4, 25.1, 26.61, 28.0461538461538, 27.71,
25.2533333333333, 25.3833333333333, 24.2307692307692, 24.2533333333333,
24.95, 24.34375, 26.9909090909091, 24.775, 25.9076923076923,
25.1666666666667, 25.9933333333333, 27.0466666666667, 30.5625,
27.36, 25.2636363636364, 29.6846153846154, 27.7, 28.3071428571429,
29.4857142857143, 27.025, 30.1, 31.2454545454545, 24.2888888888889,
28.4875, 29.23, 30, 28.5, 29.3615384615385, 27.45, 28.8153846153846,
29.1866666666667)), .Names = c("Rep", "Burning", "Site.prep",
"Weeding", "result"), class = "data.frame", row.names = c(NA, -48L))
model1 <- aov(result ~ Burning* Weeding*Site.prep+ Error(Rep/Burning*Site.prep), data=x)
summary(model1)
model2 <- lmer(result ~ Burning*Site.prep*Weeding+(1|Rep/(Burning:Site.prep)),data=x)
anova(model2)
Applying the three-way split-plot-factorial ANOVA example from the site mentioned by #cuttlefish44, leads to:
library(lme4)
library(nlme)
m1 <- aov(result ~ Weeding*Burning*Site.prep + Error(Rep/Burning*Site.prep), data=x)
m2 <- lmer(result ~ Weeding*Burning*Site.prep + (1|Rep) + (1|Burning:Rep) +
(1|Site.prep:Rep), data=x)
m3 <- anova(lme(result ~ Weeding*Burning*Site.prep,
random=list(Rep=pdBlocked(list(~1, pdIdent(~Burning-1), pdIdent(~Site.prep-1)))),
method="ML", data=x))
summary(m1)
anova(m2)
m3
Except for Site.prep, the results match. Moreover, the results between lmer() and lme() are pretty similar (also for Site.prep). I'm not sure whether this is the result of differences in modelling approaches: the multi-level approach takes both within and between effects into account.
I'm new to R and have the following challenge;
I want to create a visualization that basically combines 2 kind of 'heatmaps' in order to visualize at what times there are truly dark skies (for astronomy). For this I want to have a heatmap that visualizes the brightness of the moon based on the moonrise and moonset times and the phase of the moon. On this then we can plot a 'band'like heatmap for the time the sun is up with some transparency.
I'm not sure if this is going to work visualy or if I need to find some other solution, however this seems like a good challenge to get into R some more.
But I could use some pointers as I'm stuck already loading the matrix of size 24(hours) x 31(days) with all the 720 values. When trying to create a basic data.frame from the vectors I get the error that the number of rows are inconsistent.
Furthermore I have some heatmap examples working already, but I'm not sure how to combine 2 of them in the same plot like I described.
As an illustration the current 'heatmap' as it is in excel
And some data:
MOON
moon <- structure(list(X1.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("0%", "100%"), class = "factor"), X2.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("0%", "98%"), class = "factor"),
X3.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L
), .Label = c("0%", "94%"), class = "factor"), X4.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("0%", "89%"), class = "factor"),
X5.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("0%", "82%"), class = "factor"), X6.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("0%", "74%"), class = "factor"),
X7.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("0%", "65%"), class = "factor"), X8.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0%", "56%"), class = "factor"),
X9.9.12 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0%", "47%"), class = "factor"), X10.9.12 = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0%", "37%"), class = "factor"),
X11.9.12 = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0%", "28%"), class = "factor"), X12.9.12 = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0%", "20%"), class = "factor"),
X13.9.12 = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0%", "12%"), class = "factor"), X14.9.12 = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("0%", "6%"), class = "factor"),
X15.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("0%", "2%"), class = "factor"), X16.9.12 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "0%", class = "factor"),
X17.9.12 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
), .Label = c("0%", "1%"), class = "factor")), .Names = c("X1.9.12",
"X2.9.12", "X3.9.12", "X4.9.12", "X5.9.12", "X6.9.12", "X7.9.12",
"X8.9.12", "X9.9.12", "X10.9.12", "X11.9.12", "X12.9.12", "X13.9.12",
"X14.9.12", "X15.9.12", "X16.9.12", "X17.9.12"), class = "data.frame", row.names = c("0:00:00",
"1:00:00", "2:00:00", "3:00:00", "4:00:00", "5:00:00", "6:00:00",
"7:00:00", "8:00:00", "9:00:00", "10:00:00", "11:00:00", "12:00:00",
"13:00:00", "14:00:00", "15:00:00", "16:00:00", "17:00:00", "18:00:00",
"19:00:00", "20:00:00", "21:00:00", "22:00:00", "23:00:00"))
SUN
September
Day Sunrise Sunset
1 6:52 20:26
2 6:54 20:24
3 6:56 20:22
4 6:57 20:20
5 6:59 20:17
6 7:00 20:15
7 7:02 20:13
8 7:04 20:10
9 7:05 20:08
10 7:07 20:06
11 7:08 20:05
12 7:09 20:02
13 7:11 20:00
14 7:13 19:58
15 7:14 19:55
16 7:16 19:53
17 7:17 19:51
18 7:19 19:48
19 7:21 19:46
20 7:22 19:44
21 7:25 19:40
22 7:26 19:38
23 7:28 19:35
24 7:30 19:33
25 7:31 19:31
26 7:33 19:28
27 7:35 19:26
28 7:36 19:24
29 7:38 19:21
30 7:40 19:19
So from what I understood, there are basically two questions:
Data organization
The easiest would be, if you'd have all data in one data.frame in long format. I.e. for each combination of time and date you have one row, with additional columns for the moon and sun intensity.
So we start with melting and fixing the moon data:
library(reshape2)
moon$time <- row.names(moon)
moon <- melt(moon, id.vars="time", variable.name="date", value.name="moon" )
moon$date <- sub("X(.*)", "\\1", moon$date)
moon$moon <- 1 - as.numeric(sub("%", "", moon$moon)) /100
Now we bring the sun data to an comparable form, by at least give them the same identifier for the date:
sun$Day <- paste( sun$Day, "9.12", sep ="." )
Next step is to merge the data by the date resp. Day and to set a comparable column for the sun intensity as is given already for the moon intensity. This can be done by casting the times to a time format and compare Sunrise and Sunset with the actual time:
mdf <- merge( moon, sun, by.x = "date", by.y = "Day" )
mdf$time.tmp <- strptime(mdf$time, format="%H:%M")
mdf$Sunrise <- round(strptime(mdf$Sunrise, format="%H:%M"), units = "hours")
mdf$Sunset <- round(strptime(mdf$Sunset, format="%H:%M"), units = "hours")
mdf$sun <- ifelse( mdf$Sunrise <= mdf$time.tmp & mdf$Sunset >= mdf$time.tmp, 1, 0 )
mdf <- mdf[c("date", "time", "moon", "sun")]
mdf[ 5:10, ]
date time moon sun
1.9.12 4:00:00 0 0
1.9.12 5:00:00 0 0
1.9.12 6:00:00 0 0
1.9.12 7:00:00 0 1
1.9.12 8:00:00 1 1
1.9.12 9:00:00 1 1
Plotting
Adding multiple layers with different transparencies begs literally for ggplot2. In order to use this in a proper way, there is one more data manipulation necessary, which ensures the proper order on the axes: date and time have to be converted to factors with factor levels ordered not lexically, but by time:
mdf <- within( mdf, {
date <- factor( date, levels=unique(date)[ order(as.Date( unique(date), "%d.%m.%y" ) ) ] )
time <- factor( time, levels=unique(time)[ order(strptime( time, format="%H:%M:%S"), decreasing=TRUE ) ] )
} )
This can be plot now:
library( ggplot2 )
ggplot( data = mdf, aes(x = date, y = time ) ) +
geom_tile( aes( alpha = sun ), fill = "goldenrod1" ) +
geom_tile( aes( alpha = moon ), fill = "dodgerblue3" ) +
scale_alpha_continuous( "moon", range=c(0,0.5) ) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Which gives you the following result