how to use ggplot conditional on data - r

I asked this question and it seams ggplot2 currently has a bug with empty data.frames.
Therefore I am trying to check if the dataframe is empty, before I make the plot. But what ever I come up with, it gets really ugly, and doesn't work. So I am asking for your help.
example data:
SOdata <- structure(list(id = 10:55, one = c(7L, 8L, 7L, NA, 7L, 8L, 5L,
7L, 7L, 8L, NA, 10L, 8L, NA, NA, NA, NA, 6L, 5L, 6L, 8L, 4L,
7L, 6L, 9L, 7L, 5L, 6L, 7L, 6L, 5L, 8L, 8L, 7L, 7L, 6L, 6L, 8L,
6L, 8L, 8L, 7L, 7L, 5L, 5L, 8L), two = c(7L, NA, 8L, NA, 10L,
10L, 8L, 9L, 4L, 10L, NA, 10L, 9L, NA, NA, NA, NA, 7L, 8L, 9L,
10L, 9L, 8L, 8L, 8L, 8L, 8L, 9L, 10L, 8L, 8L, 8L, 10L, 9L, 10L,
8L, 9L, 10L, 8L, 8L, 7L, 10L, 8L, 9L, 7L, 9L), three = c(7L,
10L, 7L, NA, 10L, 10L, NA, 10L, NA, NA, NA, NA, 10L, NA, NA,
4L, NA, 7L, 7L, 4L, 10L, 10L, 7L, 4L, 7L, NA, 10L, 4L, 7L, 7L,
7L, 10L, 10L, 7L, 10L, 4L, 10L, 10L, 10L, 4L, 10L, 10L, 10L,
10L, 7L, 10L), four = c(7L, 10L, 4L, NA, 10L, 7L, NA, 7L, NA,
NA, NA, NA, 10L, NA, NA, 4L, NA, 10L, 10L, 7L, 10L, 10L, 7L,
7L, 7L, NA, 10L, 7L, 4L, 10L, 4L, 7L, 10L, 2L, 10L, 4L, 12L,
4L, 7L, 10L, 10L, 12L, 12L, 4L, 7L, 10L), five = c(7L, NA, 6L,
NA, 8L, 8L, 7L, NA, 9L, NA, NA, NA, 9L, NA, NA, NA, NA, 7L, 8L,
NA, NA, 7L, 7L, 4L, NA, NA, NA, NA, 5L, 6L, 5L, 7L, 7L, 6L, 9L,
NA, 10L, 7L, 8L, 5L, 7L, 10L, 7L, 4L, 5L, 10L), six = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2010-05-25",
"2010-05-27", "2010-06-07"), class = "factor"), seven = c(0.777777777777778,
0.833333333333333, 0.333333333333333, 0.888888888888889, 0.5,
0.888888888888889, 0.777777777777778, 0.722222222222222, 0.277777777777778,
0.611111111111111, 0.722222222222222, 1, 0.888888888888889, 0.722222222222222,
0.555555555555556, NA, 0, 0.666666666666667, 0.666666666666667,
0.833333333333333, 0.833333333333333, 0.833333333333333, 0.833333333333333,
0.722222222222222, 0.833333333333333, 0.888888888888889, 0.666666666666667,
1, 0.777777777777778, 0.722222222222222, 0.5, 0.833333333333333,
0.722222222222222, 0.388888888888889, 0.722222222222222, 1, 0.611111111111111,
0.777777777777778, 0.722222222222222, 0.944444444444444, 0.555555555555556,
0.666666666666667, 0.722222222222222, 0.444444444444444, 0.333333333333333,
0.777777777777778), eight = c(0.666666666666667, 0.333333333333333,
0.833333333333333, 0.666666666666667, 1, 1, 0.833333333333333,
0.166666666666667, 0.833333333333333, 0.833333333333333, 1, 1,
0.666666666666667, 0.666666666666667, 0.333333333333333, 0.5,
0, 0.666666666666667, 0.5, 1, 0.666666666666667, 0.5, 0.666666666666667,
0.666666666666667, 0.666666666666667, 0.333333333333333, 0.333333333333333,
1, 0.666666666666667, 0.833333333333333, 0.666666666666667, 0.666666666666667,
0.5, 0, 0.833333333333333, 1, 0.666666666666667, 0.5, 0.666666666666667,
0.666666666666667, 0.5, 1, 0.833333333333333, 0.666666666666667,
0.833333333333333, 0.666666666666667), nine = c(0.307692307692308,
NA, 0.461538461538462, 0.538461538461538, 1, 0.769230769230769,
0.538461538461538, 0.692307692307692, 0, 0.153846153846154, 0.769230769230769,
NA, 0.461538461538462, NA, NA, NA, NA, 0, 0.615384615384615,
0.615384615384615, 0.769230769230769, 0.384615384615385, 0.846153846153846,
0.923076923076923, 0.615384615384615, 0.692307692307692, 0.0769230769230769,
0.846153846153846, 0.384615384615385, 0.384615384615385, 0.461538461538462,
0.384615384615385, 0.461538461538462, NA, 0.923076923076923,
0.692307692307692, 0.615384615384615, 0.615384615384615, 0.769230769230769,
0.0769230769230769, 0.230769230769231, 0.692307692307692, 0.769230769230769,
0.230769230769231, 0.769230769230769, 0.615384615384615), ten = c(0.875,
0.625, 0.375, 0.75, 0.75, 0.75, 0.625, 0.875, 1, 0.125, 1, NA,
0.625, 0.75, 0.75, 0.375, NA, 0.625, 0.5, 0.75, 0.875, 0.625,
0.875, 0.75, 0.625, 0.875, 0.5, 0.75, 0, 0.5, 0.875, 1, 0.75,
0.125, 0.5, 0.5, 0.5, 0.625, 0.375, 0.625, 0.625, 0.75, 0.875,
0.375, 0, 0.875), elleven = c(1, 0.8, 0.7, 0.9, 0, 1, 0.9, 0.5,
0, 0.8, 0.8, NA, 0.8, NA, NA, 0.8, NA, 0.4, 0.8, 0.5, 1, 0.4,
0.5, 0.9, 0.8, 1, 0.8, 0.5, 0.3, 0.9, 0.2, 1, 0.8, 0.1, 1, 0.8,
0.5, 0.2, 0.7, 0.8, 1, 0.9, 0.6, 0.8, 0.2, 1), twelve = c(0.666666666666667,
NA, 0.133333333333333, 1, 1, 0.8, 0.4, 0.733333333333333, NA,
0.933333333333333, NA, NA, 0.6, 0.533333333333333, NA, 0.533333333333333,
NA, 0, 0.6, 0.533333333333333, 0.733333333333333, 0.6, 0.733333333333333,
0.666666666666667, 0.533333333333333, 0.733333333333333, 0.466666666666667,
0.733333333333333, 1, 0.733333333333333, 0.666666666666667, 0.533333333333333,
NA, 0.533333333333333, 0.6, 0.866666666666667, 0.466666666666667,
0.533333333333333, 0.333333333333333, 0.6, 0.6, 0.866666666666667,
0.666666666666667, 0.6, 0.6, 0.533333333333333)), .Names = c("id",
"one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "elleven", "twelve"), class = "data.frame", row.names = c(NA,
-46L))
And the plot
iqr <- function(x, ...) {
qs <- quantile(as.numeric(x), c(0.25, 0.5, 0.75), na.rm = T)
names(qs) <- c("ymin", "y", "ymax")
qs
}
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)+
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
geom_point(data = SOdata[SOdata[[y]] < low,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)+
stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}
for (i in names(SOdata)[-c(1,7)]) {
p<- magic(i)
ggsave(paste("magig_plot_",i,".png",sep=""), plot=p, height=3.5, width=5.5)
}
The problem is that sometimes in the call to geom_point the subset returns an empty dataframe, which sometimes (!) causes ggplot2 to plot all the data instead of none of the data.
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
This is kindda of important to me, and I am really stuck trying to find a solution. Any help that will get me started is much appreciated.
Thanks in advance.

I guess you could replace this
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)+
geom_point(data = SOdata[SOdata[[y]] > high,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)+
geom_point(data = SOdata[SOdata[[y]] < low,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)+
stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}
with something like
magic <- function(y, ...) {
high <- median(SOdata[[y]], na.rm=T)+1.5*sd(SOdata[[y]],na.rm=T)
low <- median(SOdata[[y]], na.rm=T)-1.5*sd(SOdata[[y]],na.rm=T)
k <- SOdata[[y]] > high
z <- SOdata[[y]] < low
k[is.na(k)]<- FALSE
z[is.na(z)]<- FALSE
p <- ggplot(SOdata, aes_string(x="six", y=y))+
stat_summary(fun.data="iqr", geom="crossbar", fill="grey", alpha=0.3)
if (sum(k) > 0) {p <- p + geom_point(data = SOdata[k,], position=position_jitter(w=0.1, h=0),col="green", alpha=0.5)}
if (sum(z) > 0) {p <- p + geom_point(data = SOdata[z,], position=position_jitter(w=0.1, h=0),col="red", alpha=0.5)}
p + stat_summary(fun.y=median, geom="point",shape=18 ,size=4, col="orange")
}

Related

Different colors for every column in heat map using geom_tile()

I am trying to get a heat map where every column has a different color.
I have a heatmap like this:
# install.packages("reshape")
library(reshape)
library(ggplot2)
# Data
set.seed(8)
m <- matrix(round(rnorm(200), 2), 5, 5)
colnames(m) <- paste("Row", 1:5)
rownames(m) <- paste("col", 1:5)
# long format
df <- melt(m)
colnames(df) <- c("x", "y", "value")
ggplot(df, aes(x = x, y = y, fill = value)) +
geom_tile()
I would like to get for each columun col1,col2,col3,col4, and col5 a different color.
For example:
For col1 blue, col2 2 green, violet for col3, yellow for col4 and orange in col5.
I need to catch these ideas because I am doing the next plot with the next dataset:
dput(bdd)
structure(list(var = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L), .Label = c("var_1", "var_2",
"var_3", "var_4", "var_5", "var_6", "var_7", "var_8", "var_9",
"var_10", "var_11", "var_12", "var_13", "var_14"), class = "factor"),
value = c(4.93, 2.85, 2.075, 1.91, 1.73, 1.34, 0.615, 0.145,
0.14, 0.11, 0.09, 0.06, 0.06, 0.015, 4.13, 1.65, 1.985, 0.51,
5.805, 0.84, 1.28, 0.03, 0.235, 0.145, 0.145, 0.205, 0.03,
0.2, 1.135, 2.175, 2.735, 1.69, 0.86, 0.715, 1.905, 0.17,
0.86, 0.055, 0.03, 0.075, 0.14, 0.005, 3.55, 4.225, 5.985,
0.185, 1.17, 0.91, 0.49, 1.34, 0.485, 0.1, 0.145, 1.145,
0.53, 0.11, 12.06, 1.995, 2.205, 0.48, 1.875, 2.03, 0.335,
0.26, 1.25, 0.225, 0.245, 0.52, 0.075, 0.04), country = structure(c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), .Label = character(0)),
country1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), .Label = c("C1", "C2", "C3", "C4", "C5"), class = "factor")), row.names = c(NA,
-70L), class = c("tbl_df", "tbl", "data.frame"))
ggplot(data=bdd,aes(x=country1,y=var,fill=value))+
geom_tile(aes(alpha=value,fill=country),color="white")+
geom_text(aes(label = sprintf("%0.3f", round(value, digits = 3))))+
scale_fill_gradient(low="white", high="blue")+
scale_alpha(range = c(0, 1))+
theme_classic()+theme(axis.title.x=element_blank(), axis.text.x=element_text(angle=0,hjust=0.5,vjust=0.5), legend.position = "none")+
labs( fill="% ",y = "y ")
But what I need is every column with a different color as in the first example.
Best.
ggplot(data=bdd,aes(x=country1,y=var,fill=country1))+
geom_tile(aes(alpha=value),color="white")+
geom_text(aes(label = sprintf("%0.3f", round(value, digits = 3))))+
scale_alpha(range = c(0, 1))+
theme_classic()+theme(axis.title.x=element_blank(), axis.text.x=element_text(angle=0,hjust=0.5,vjust=0.5), legend.position = "none")+
labs( fill="% ",y = "y ")
To specify the colors for each column to be different than the default spectrum, you could use one of the discrete fill options like scale_fill_discrete, scale_fill_manual, or a custom palette like ggthemes::scale_fill_tableau(palette = "Nuriel Stone")

How to calculate regression residuals in R for each individual in a longitudinal analysis?

I am working on a longitudinal/repeated measures multilevel model (MLM). Usually, for time-varying covariates (in my case "weekly gross income/1000"), you would calculate a person-mean centered version of the variable (i.e. deducting the person-year income response from the average of the person's weekly income across all of said person's time points). However, this can lead to bias (see here) and hence a better (more generalisable) approach is to center around a regression line for each individual (as it happens, the residuals from the regression serve this purpose).
Therefore, I need to calculate the following regression, but for each individual (roughly 10,000 individuals with 25,000 observations):
lm(Weekly_Gross_Pay_Main_Job~nYear, data=df)
Then, the really critical part is that I need to extract the residuals to a separate column in my main dataset, matched up with each person. These residuals will take the place of my group-mean centered variable (which will in turn be used in my MLM).
Here is a possible starting point using the function that I have for the group-mean centering. If this could be updated to fit a regression with the residuals output for each person, then that would be ideal (if not, then I am open to other approaches):
#Group mean-centering a variable. Relevant for L1 variables only.
gmc = function(variable, group){
return(ave(variable, group, FUN = function(x){x - mean(x)}))
}
df$Weekly_Gross_Pay_Main_Jobgmc <- gmc(df$Weekly_Gross_Pay_Main_Job, df$Person_ID)
Data extract in long format (where Person_ID is the person, nYear is time, Weekly_Gross_Pay_Main_Job is weekly income/1000 and Weekly_Gross_Pay_Main_Jobgmc is the group-mean centered version):
structure(list(Person_ID = c(100003L, 100003L, 100003L, 100006L,
100006L, 100006L, 100006L, 100010L, 100010L, 100010L, 100010L,
100010L, 100010L, 100011L, 100014L, 100014L, 100014L, 100014L,
100014L, 100016L, 100018L, 100018L, 100018L, 100018L, 100018L,
100018L, 100018L, 100018L, 100018L, 100020L, 100020L, 100020L,
100020L, 100020L, 100020L, 100020L, 100020L, 100020L, 100021L,
100021L, 100024L, 100024L, 100024L, 100024L, 100024L, 100024L,
100024L, 100024L, 100024L, 100024L, 100025L, 100025L, 100025L,
100025L, 100025L, 100025L, 100025L, 100025L, 100027L, 100027L,
100027L, 100027L, 100029L, 100029L, 100029L, 100029L, 100029L,
100031L, 100031L, 100031L, 100032L, 100032L, 100032L, 100033L,
100033L, 100033L, 100033L, 100033L, 100033L, 100034L, 100034L,
100034L, 100037L, 100037L, 100037L, 100037L, 100037L, 100037L,
100037L, 100044L, 100044L, 100044L, 100044L, 100044L, 100044L,
100044L, 100045L, 100045L, 100045L, 100045L), nYear = c(5L, 6L,
7L, 2L, 3L, 4L, 6L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 5L, 6L, 7L,
8L, 9L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 4L, 5L, 6L, 1L, 2L, 3L, 3L, 4L, 5L, 6L, 7L, 8L,
2L, 3L, 5L, 5L, 6L, 7L, 8L, 9L, 11L, 13L, 2L, 3L, 4L, 6L, 7L,
8L, 9L, 4L, 5L, 6L, 7L), Weekly_Gross_Pay_Main_Job = c(0, 0.58,
0.35, 0.035, 0.65, 0.195, 0.43, 0, 0, 0, 0, 0, 0, 0.12, 1.653,
0.967, 1.742, 1.323, 0, 0.709, 0.155, 0.431, 0.235, 0.17, 0.285,
0.357, 0.28, 0.335, 0.375, 0.111, 0.333, 0.582, 0.882, 0.85,
0.944, 1.615, 1.615, 1.35, 0.168, 0.08, 0, 0, 0, 0, 0, 0, 0,
0.134, 0.737, 0, 0.02, 0.372, 0.1, 0.014, 0.307, 0.39, 0.671,
0.5, 0.278, 0.32, 0.425, 0.4, 0.57, 0.917, 0.75, 0.402, 0.437,
0.211, 0.537, 0.54, 0.135, 0.15, 0.65, 0.324, 0.399, 0.497, 0.67,
0.825, 0.825, 0.25, 0.319, 0.35, 0.885, 0.941, 0.975, 0.975,
1.02, 1.096, 1.148, 0.1, 0.11, 0.413, 0.477, 0.578, 0.686, 0.686,
0.511, 0.578, 0.8, 0.75), Weekly_Gross_Pay_Main_Jobgmc = c(-0.31,
0.27, 0.04, -0.2925, 0.3225, -0.1325, 0.1025, 0, 0, 0, 0, 0,
0, 0, 0.516, -0.17, 0.605, 0.186, -1.137, 0, -0.136444444444444,
0.139555555555556, -0.0564444444444445, -0.121444444444444, -0.00644444444444447,
0.0655555555555555, -0.0114444444444444, 0.0435555555555556,
0.0835555555555555, -0.809222222222222, -0.587222222222222, -0.338222222222222,
-0.0382222222222223, -0.0702222222222223, 0.0237777777777777,
0.694777777777778, 0.694777777777778, 0.429777777777778, 0.044,
-0.044, -0.0871, -0.0871, -0.0871, -0.0871, -0.0871, -0.0871,
-0.0871, 0.0469, 0.6499, -0.0871, -0.27675, 0.07525, -0.19675,
-0.28275, 0.01025, 0.09325, 0.37425, 0.20325, -0.07775, -0.03575,
0.06925, 0.04425, -0.0452, 0.3018, 0.1348, -0.2132, -0.1782,
-0.218333333333333, 0.107666666666667, 0.110666666666667, -0.176666666666667,
-0.161666666666667, 0.338333333333333, -0.266, -0.191, -0.093,
0.0800000000000001, 0.235, 0.235, -0.0563333333333333, 0.0126666666666667,
0.0436666666666666, -0.120714285714286, -0.0647142857142858,
-0.0307142857142858, -0.0307142857142858, 0.0142857142857142,
0.0902857142857143, 0.142285714285714, -0.335714285714286, -0.325714285714286,
-0.0227142857142857, 0.0412857142857143, 0.142285714285714, 0.250285714285714,
0.250285714285714, -0.1368, -0.0698000000000001, 0.1522, 0.1022
)), row.names = c(NA, 100L), class = "data.frame")
not sure if I'm reading you right, this might be a very naive answer missing the point, but doesn't "residuals" just work.
Here's a linear mixed effects model with some data i had lying around
some.model<-lme(DV~IV, random=~1|Id, data=df)
head(residuals(some.model))
7 7 24 24 32 32
-0.054135825 -0.054135825 0.064271638 0.064271638 -0.001975424 -0.001975424
If you really want to put it into a column with the idnumber next to it it takes a few more steps. It probably can be done in a single step but i'm really bad.
extra.column<-residuals(some.model)
extra.column.id<-names(residuals(some.model))
extra.column<-residuals(some.model)
cbind(extra.column,extra.column.id)
extra.column extra.column.id
7 "-0.0541358252373243" "7"
7 "-0.0541358252373243" "7"
24 "0.0642716380035857" "24"
24 "0.0642716380035857" "24"
32 "-0.0019754241828096" "32"
32 "-0.0019754241828096" "32"
Sorry if this is not what you're looking for, but check out the residuals command.
Here is how I ended up doing it:
#Before you begin, time needs to be grand-mean centered.
df$nYearmc <- df$nYear-mean(df$nYear, na.rm=TRUE)
#Now to regress the time-varying covariate onto grand-mean centered time and complete the process.
#First, create a group called `by_person`.
df <- tidyr::unite(df, Person_Year, c(Person_ID, nYearmc), remove=FALSE)
by_Person <- dplyr::group_by(df, Person_ID)
#Second, regress the time-varying covariate onto the newly created grand-mean centered time variable and merge with the main data frame.
df.Weekly_Gross_Pay_Main_Job <- dplyr::do(by_Person, augment(lm(Weekly_Gross_Pay_Main_Job~nYearmc, data=.)))
df.Weekly_Gross_Pay_Main_Job <- tidyr::unite(df.Weekly_Gross_Pay_Main_Job, Person_Year, c(Person_ID, nYearmc), remove=FALSE)
df <- merge(df, df.Weekly_Gross_Pay_Main_Job, by="Person_Year")
#Third, copy over the required columns (renaming them would be more efficient, but either way).
df$RegResGrossPay <- df$.resid
#Fourth, do an optional tidy up.
colnames(df)[colnames(df)=="Person_ID.x"] <- "Person_ID"
colnames(df)[colnames(df)=="nYearmc.x"] <- "nYearmc"
colnames(df)[colnames(df)=="Weekly_Gross_Pay_Main_Job.x"] <- "Weekly_Gross_Pay_Main_Job"
df$Person_ID.y <- NULL
df$nYearmc.y <- NULL
df$Weekly_Gross_Pay_Main_Job.y <- NULL
df$.fitted <- NULL
df$.se.fit <- NULL
df$.resid <- NULL
df$.hat <- NULL
df$.sigma <- NULL
df$.cooksd <- NULL
df$.std.resid <- NULL
df.Weekly_Gross_Pay_Main_Job <- NULL
#Fifth, generate plots of the variables you need.
ggplot(df, aes(nYearmc, RegResGrossPay))+geom_line(aes(group=Person_ID), alpha =1/3)+geom_smooth(method="lm",se=FALSE)

Adding x-axis lables on a faceted plot-ggplot2

I have a facetted geom_col() plot with ggplot2 and I would like to have the x-axis labels on each level of the plots. So after each row, I would have the labels. My graph currently looks like this:
dput(res2)
structure(list(X = structure(c(8L, 1L, 7L, 9L, 6L, 4L, 5L, 3L,
2L, 1L, 7L, 9L, 6L, 4L, 5L, 3L, 2L, 8L, 7L, 9L, 6L, 4L, 5L, 3L,
2L, 8L, 1L, 9L, 6L, 4L, 5L, 3L, 2L, 8L, 1L, 7L, 6L, 4L, 5L, 3L,
2L, 8L, 1L, 7L, 9L, 4L, 5L, 3L, 2L, 8L, 1L, 7L, 9L, 6L, 5L, 3L,
2L, 8L, 1L, 7L, 9L, 6L, 4L, 3L, 2L, 8L, 1L, 7L, 9L, 6L, 4L, 5L
), .Label = c("Blue", "Green", "Magenta", "Maroon", "Orange",
"Pink", "Purple", "Red", "Yellow"), class = "factor"), Phenotype = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Blue", "Green", "Magenta",
"Maroon", "Orange", "Pink", "Purple", "Red", "Yellow"), class = "factor"),
heritability = c(0.12, 0.14, 0.34, 0.21, 0.33, 0.35, 0.25,
0.49, 0.19, 0.42, -0.12, 0.4, 0.13, 0.42, 0.47, 0.2, 0.17,
0.14, -0.1, 0.14, 0.45, 0.24, 0.47, -0.28, 0.34, 0.18, 0.15,
0.37, -0.47, 0.12, 0.17, -0.11, 0.53, 0.41, -0.2, 0.14, 0.26,
0.45, 0.41, 0.48, 0.15, -0.35, 0.22, 0.32, 0.29, 0.47, 0.17,
-0.25, 0.27, 0.38, 0.52, -0.11, 0.5, 0.28, 0.34, 0.31, 0.52,
0.14, -0.23, 0.21, 0.11, -0.42, 0.39, 0.32, 0.51, 0.39, 0.15,
0.46, 0.5, 0.42, 0.46, 0.18), pvalue = c(0.05, 0.09, 0.05,
0.05, 0.09, 0.02, 0.01, 0.1, 0.05, 0.04, 0.08, 0.01, 0.08,
0.05, 0.07, 0.06, 0.01, 0.04, 0.04, 0.01, 0.06, 0.1, 0.07,
0.01, 0.05, 0.02, 0.08, 0.1, 0.03, 0.06, 0.02, 0.08, 0.09,
0.01, 0.06, 0.04, 0.07, 0.03, 0.03, 0.07, 0.01, 0.01, 0.06,
0.05, 0.04, 0.06, 0.04, 0.03, 0.04, 0.04, 0.09, 0.1, 0.07,
0.01, 0.08, 0.06, 0.01, 0.07, 0.06, 0.08, 0.09, 0.1, 0.09,
0.01, 0.07, 0.05, 0.07, 0.06, 0.1, 0.1, 0.08, 0.09)), class = "data.frame",
row.names = c(NA, -72L))
And here is my plot code:
A <- ggplot(res2, aes(Phenotype, heritability))
# uses a bar chart, geom_col represents hereditity values as the hights of the bars.
A + geom_col(position = 'stack', fill = "#0000ff") +
# Facets the data according to the Phenotypes in the X column of the data
facet_wrap(.~ X) +
# Theme info: tilts the x-axis labels 90 degrees and pushes labels to be centered below the bars
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .4), plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))+
labs(title ="Heritability of Phenotype Permutations", subtitle = "P-values indicated")+
# adds the pvalues above the bars, sets their position to be above or below the bar.
geom_text(aes(y = heritability + .06 * sign(heritability), label = pvalue), position = position_dodge(width = 0.9), size = 3.3)
In facet_wrap you can specify scales = 'free_x', which will allow each subplot to have its own x-axis including its own axis labels.
(Similar for free_y or both)
In your example using facet_wrap(.~X, scales= 'free_x') will produce the following plot:

ggplot2 - customize two-factor legend

I am using ggplot2 to plot monthly vertical profiles of soil moisture in two sites, for both observed and modeled data.
I am using interaction to add colours to both factors (month and type). I am also creating two different manual color palettes with the colors I need. This is how to to reproduce the plot:
library(ggplot2)
df1<- structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("IL_Shabbona_5_NNE", "ME_Limestone_4_NNW",
"ME_Old_Town_2_W", "MI_Chatham_1_SE", "MI_Gaylord_9_SSW", "MN_Goodridge_12_NNW",
"MN_Sandstone_6_W", "NY_Ithaca_13_E", "NY_Millbrook_3_W", "WI_Necedah_5_WNW"
), class = "factor"), month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L), depth = c(5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 50, 50, 50,
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
50, 50, 50, 50, 50, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100), value = c(0.38,
0.4, 0.37, 0.32, 0.29, 0.3, 0.24, 0.28, 0.24, 0.26, 0.32, 0.39,
0.13, NaN, 0.13, 0.12, 0.1, 0.1, 0.06, 0.07, 0.09, 0.1, 0.12,
0.13, 0.39, 0.39, 0.37, 0.35, 0.33, 0.31, 0.27, 0.29, 0.27, 0.28,
0.34, 0.38, 0.1, NaN, 0.12, 0.11, 0.09, 0.09, 0.05, 0.06, 0.09,
0.09, 0.11, 0.11, 0.39, 0.41, 0.38, 0.35, 0.34, 0.32, 0.29, 0.33,
0.31, 0.3, 0.34, 0.36, 0.1, NaN, 0.1, 0.1, 0.09, 0.08, 0.05,
0.05, 0.08, 0.08, 0.1, 0.1, 0.32, 0.31, 0.33, 0.34, 0.36, 0.34,
0.29, 0.33, 0.32, 0.31, 0.32, 0.33, 0.06, 0.06, 0.07, 0.06, 0.06,
0.05, 0.03, 0.03, 0.04, 0.05, 0.06, 0.06, 0.4, 0.4, 0.41, 0.41,
0.45, 0.47, 0.43, 0.4, 0.39, 0.38, 0.38, 0.4, 0.05, 0.05, 0.05,
0.06, 0.05, 0.05, 0.04, 0.04, 0.05, 0.05, 0.06, 0.05, 0.35, 0.35,
0.36, 0.33, 0.29, 0.28, 0.27, 0.26, 0.26, 0.28, 0.3, 0.36, 0.35,
0.35, 0.36, 0.33, 0.29, 0.28, 0.27, 0.27, 0.27, 0.28, 0.3, 0.35,
0.34, 0.35, 0.35, 0.34, 0.3, 0.29, 0.28, 0.28, 0.28, 0.29, 0.3,
0.34, 0.28, 0.29, 0.3, 0.32, 0.31, 0.3, 0.29, 0.29, 0.29, 0.3,
0.3, 0.29, 0.26, 0.27, 0.27, 0.29, 0.29, 0.29, 0.28, 0.28, 0.28,
0.29, 0.29, 0.28, 0.38, 0.38, 0.39, 0.38, 0.31, 0.3, 0.29, 0.29,
0.3, 0.31, 0.35, 0.39, 0.36, 0.36, 0.37, 0.37, 0.31, 0.31, 0.29,
0.3, 0.3, 0.31, 0.33, 0.37, 0.37, 0.37, 0.37, 0.38, 0.32, 0.32,
0.31, 0.31, 0.31, 0.32, 0.33, 0.37, 0.31, 0.32, 0.32, 0.34, 0.33,
0.32, 0.31, 0.31, 0.32, 0.32, 0.31, 0.3, 0.27, 0.28, 0.28, 0.29,
0.31, 0.3, 0.3, 0.29, 0.3, 0.3, 0.3, 0.28), type = rep(c("observed","modeled"), each=120)), class = "data.frame", row.names = c(NA,
-240L))
# Create blue and red palettes
mypal.blue <- colorRampPalette(RColorBrewer::brewer.pal(6,"PuBu"))
mypal.red <- colorRampPalette(RColorBrewer::brewer.pal(6,"YlOrRd"))
# Plot
ggplot(df1, aes(x=value, y=-depth, colour=interaction(as.factor(month),type))) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=c(mypal.blue(12),mypal.red(12))) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank()) + theme(legend.position = c(0.75, 0.13))
However, the legend is a complete mess.
I would like to create two separate legends, loosely based on this example.
one showing orange for observed and blue for modeled
the other one showing the actual color gradients and the months (ideally with the first letter instead of numbers)
How to create such legends?
Updated Answer
It just hit me that there is a relatively straightforward way to hack the legend to get pretty close to what you want. We relabel the legend labels and add a title. The hacky part is that you have to fiddle with the legend title spacing, legend key width, and text size to get the titles lined up over the legend keys.
With all those lines and colors and the complicated legend, the plot seems very busy and difficult to interpret beyond showing that the model doesn't fit the data very well, so maybe it would still be better to consider one of the other options in my or #neilfws's answer. In addition, because the legend title is manually hardcoded, it's not linked to the aesthetic mapping and you therefore have to be careful that "Modeled" and "Observed" are in the right order above the legend keys.
ggplot(df1, aes(x=value, y=-depth, colour=interaction(as.factor(month),type))) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=c(mypal.blue(12),mypal.red(12)),
labels=rep(month.abb, 2)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title=element_text(size=rel(0.6)),
legend.text=element_text(size=rel(0.5)),
legend.key.width=unit(1.1,"cm")) +
labs(colour="Modeled Observed")
Original Answer
AFAIK, there's no way to generate two separate legends for a single aesthetic within the normal ggplot workflow. In this case, that means you can have only a single color legend. Probably you could hack two different color legends by manipulating the underlying ggplot grob structure.
Another option would be to use two different aesthetics. The example below uses linetype to distinguish modeled and observed, but it doesn't provide as much constrast as the two different color sets.
library(tidyverse)
ggplot(df1 %>%
mutate(month=factor(month.abb[month], levels=month.abb)),
aes(x=value, y=-depth, linetype=type, colour=month)) +
geom_path(size=1) + geom_point(size=0.7) +
facet_wrap(~ site, nrow=3) +
theme_bw(base_size=20) +
scale_colour_manual(values=mypal.red(12)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank())
For reference, here's what your original code produces (minus the change in legend position):
Another option would be to facet by month in addition to type. This takes up more space, but makes it easier to see both the month trend and the difference between modeled and observed.
ggplot(df1 %>%
mutate(month=factor(month.abb[month], levels=month.abb)),
aes(x=value, y=-depth, colour=type)) +
geom_path(size=1) + geom_point(size=0.7) +
facet_grid(month ~ site) +
theme_classic() +
theme(panel.background=element_rect(colour="grey50", fill=NA))
Looking at your data, it seems to me that what you want to visualize can be expressed something like this:
"How do observed values compare to modelled values at different depths, for each site, through time?"
So I would approach the chart differently: plot value versus month, color by type and use facets for site and depth.
library(tidyverse)
df1 %>%
mutate(Month = factor(month.abb[month],
levels = month.abb)) %>%
ggplot(aes(Month, value)) +
geom_point(aes(color = type)) +
facet_grid(depth~site) +
theme_bw()
It's now immediately apparent that the modeled values for site IL_Shabbona_5_NNE are closer to the observed, and more so at shallower depth.

JAGS code for estimating group means with Beta distribution

I'd like to estimate the means and sd's of percent canopy cover for 13 sites (9 are birds and 4 are potential habitats) using JAGS. I'm using a beta distribution to account for the fact that the data are bound by 0 and 1.
I have code for the model statement that works perfectly for other distributions (Poisson and log-normal) and I was attempting to adapt that code but I failed miserably.
Below are the R code, the model statement, and the data. I'm using R 3.1.1 in Windows Vista. If you could look at the model statement and straighten me out I would be very thankful.
Thanks,
Jeff
######## MODEL ##############
model{
for (i in 1:227) {
log(mean[i]) <- a[site[i]]
cover20p[i] ~ dbeta(1, 0.5)
}
for (i in 1:13){
a[i] ~ dnorm(0, tau)
median[i] <- exp(a[i])
}
sd ~ dunif(0, 10)
tau <- 1 / (sd*sd) # precision
}
######### R code ##########
frag <- read.csv("f:\\brazil\\TIandFRAG.csv", header=T)
library(R2jags)
library(rjags)
setwd("f://brazil")
site <- frag$site
cover20p <- frag$cover20p/100
N <- length(frag$site)
jags.data <- list("site", "cover20p")
jags.params <- c("median", "test100MF","test100MT","test100fc","test100fa",
"test100gv","test100hm","test100mc", "test100ca","test100ct", "test10MF",
"test10MT", "test10fc","test10fa", "test10gv", "test10hm", "test10mc", "test10ca", "test10ct",
"test1MF", "test1MT", "test1fc", "test1fa", "test1gv", "test1hm",
"test1mc", "test1ca", "test1ct", "t1est1_con","t2est10_con","t3est100_con",
"t4est1_100","t5est1_10","t6est10_100")
#inits1 <- list(a=0, sd=0)
#inits2 <- list(a=100, sd=50)
#jags.inits <- list(inits1, inits2)
jags.inits <- function() {
list(a=c(0,0,0,0,0,0,0,0,0,0,0,0,0), sd=1)}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=1000000, n.burnin=20000, model.file="fragmodelbeta.txt")
my.coda <- as.mcmc(jagsfit)
summary(my.coda, quantiles=c(0.05, 0.25,0.5,0.75, 0.95))
print(jagsfit, digits=3)
##### DATA ###################
structure(list(site = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L
), canopy = c(0.95, 0.8, 0.85, 0.9, 0.35, 0.999, 0.999, 0.999,
0.95, 0.55, 0.9, 0.85, 0.7, 0.65, 0.05, 0.6, 0.999, 0.999, 0.85,
0.9, 1e-04, 0.45, 0.999, 0.7, 0.95, 0.5, 0.95, 0.6, 0.65, 0.7,
0.4, 0.85, 0.6, 0.95, 0.75, 0.9, 0.85, 0.75, 0.7, 0.85, 0.3,
0.7, 0.8, 0.7, 0.75, 0.8, 0.75, 0.95, 0.9, 0.05, 0.85, 0.6, 0.65,
0.5, 0.85, 0.95, 0.85, 0.25, 0.75, 0.999, 0.65, 0.95, 0.8, 0.9,
0.6, 0.8, 0.999, 0.2, 0.8, 0.4, 0.999, 0.95, 0.4, 0.999, 0.999,
0.95, 0.45, 0.2, 0.7, 0.95, 0.7, 0.8, 0.5, 0.85, 0.55, 1e-04,
0.25, 0.45, 0.999, 0.95, 0.999, 0.9, 0.6, 0.35, 0.95, 0.3, 0.999,
0.999, 0.5, 0.4, 0.9, 0.999, 0.7, 0.999, 0.9, 0.999, 0.4, 0.55,
0.8, 0.7, 0.999, 1e-04, 0.8, 1e-04, 0.7, 0.5, 0.8, 0.75, 1e-04,
0.45, 0.1, 1e-04, 0.4, 0.55, 0.4, 0.999, 0.9, 0.9, 0.15, 0.55,
0.35, 0.9, 0.65, 0.25, 0.999, 0.85, 0.999, 0.95, 0.7, 0.5, 0.7,
0.2, 0.95, 0.999, 0.999, 0.25, 0.85, 0.5, 0.8, 0.75, 0.85, 0.7,
0.95, 0.05, 0.65, 0.65, 0.999, 0.999, 0.999, 0.65, 0.4, 0.6,
0.9, 0.85, 0.75, 0.5, 0.65, 0.999, 0.65, 0.55, 0.75, 0.4, 0.9,
0.35, 0.999, 0.999, 0.4, 0.5, 0.8, 0.95, 0.95, 0.55, 0.7, 0.85,
0.8, 0.8, 0.65, 0.999, 0.6, 0.5, 0.999, 0.8, 0.999, 0.45, 0.999,
0.999, 0.8, 0.85, 0.999, 0.999, 0.999, 0.999, 0.5, 0.6, 0.15,
0.75, 0.6, 0.1, 0.05, 1e-04, 0.999, 0.6, 0.1, 0.35, 0.9, 0.9,
0.95, 0.95, 0.9, 0.55, 0.65, 0.9, 0.4, 0.999, 0.65, 0.5, 0.8)), .Names = c("site",
"canopy"), class = "data.frame", row.names = c(NA, -227L))
In your model, you have cover20p as one of the variables, but have the data for canopy in the frag data.frame. I suspect you want canopy[i] ~ dbeta(1,0.5) in your model specification, and canopy <- frag$canopy and jags.params = "median" in your r code.
I think you could use a logit model for your probabilities. Maybe something like the following.
First, I convert your canopy observations back to the format that I suspect they began in, i.e. the number of canopy hits out of 20 samples at each site. I set 0.0001 to 0 and 0.999 to 1, and multiply the other canopy values by 20.
d$hits <- ifelse(d$canopy < 0.05, 0, ifelse(d$canopy > 0.95, 20, d$canopy * 20))
M <- function() {
for (i in 1:n) {
hits[i] ~ dbin(p[site[i]], 20)
}
for (j in 1:nsites) {
logit.p[j] ~ dnorm(mu, sigma^-2)
logit(p[j]) <- logit.p[j]
}
mu ~ dnorm(0, 0.0001) # uninformative prior for grand mean of logit(p)
sigma ~ dunif(0, 10) # uninformative prior for sd of logit(p)
}
j <- jags(list(site=d$site, hits=d$hits, n=nrow(d), nsites=length(unique(d$site))),
NULL, 'p', M)
plot(j$BUGSoutput$summary[-1, '50%'], pch=20, xlab='site', xaxt='n', las=1,
ylim=c(0, 1), ylab=expression("p (median" %+-% "95% credible interval)"))
segments(1:13, j$BUGSoutput$summary[-1, '2.5%'],
y1=j$BUGSoutput$summary[-1, '97.5%'])
axis(1, 1:13, 1:13)

Resources