Pairwise analyse at once in r - r

I have a data as follows. For each site I have certain amount of different measurements (value1, value2, value3). My goal is to perform, for e.g., Bartlett test for all possible pairs with all possible variables (like site id=1 vs site id=2 (and all the values), site id=1 vs site id=3 and so on).
Could You please teach me how to do it in automated way, cause with choosing pairs with subset or %in% it is quite time demanding and seems to be the wrong way.
pair1 = subset(mydata,site id==1|site id==2),
pair2 = subset(mydata,site id==1|site id==3).
etc...
DATA
dput(el)
structure(list(nr = 1:62, site_id = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), value1 = c(0.135956723, 0.244470396,
0.986831591, 0.272748803, 0.089672362, 0.087918874, 0.29432428,
0.281550906, 0.491512301, 0.202822283, 0.636965524, 0.439072133,
0.512626669, 0.076218623, 0.537676093, 0.410301432, 0.704414491,
0.028086268, 0.934842257, 0.319693894, 0.038503085, 0.724755387,
0.933940599, 0.293119698, 0.206668204, 0.931947832, 0.570267962,
0.153459278, 0.761549617, 0.168553595, 0.125666771, 0.072239583,
0.585168488, 0.434769948, 0.693265848, 0.507971072, 0.784221012,
0.625158967, 0.734257194, 0.745229936, 0.40953356, 0.070758169,
0.468803818, 0.482476343, 0.329618097, 0.690907203, 0.043867132,
0.335846451, 0.910523185, 0.337186798, 0.94565722, 0.468518602,
0.269354849, 0.357422627, 0.660574954, 0.636926103, 0.558315665,
0.489907305, 0.47082103, 0.808036842, 0.80682936, 0.486316865
), value2 = c(0.072786841, 0.53838031, 0.41372062, 0.927891345,
0.681514932, 0.099571511, 0.356290822, 0.22791718, 0.222255425,
0.274876628, 0.215780917, 0.679079775, 0.557144492, 0.768317182,
0.209794907, 0.756651704, 0.950439091, 0.394732921, 0.477008544,
0.248762115, 0.452692267, 0.479918885, 0.617401621, 0.107246095,
0.968902896, 0.581772822, 0.654269288, 0.2403724, 0.309798716,
0.305768959, 0.184387495, 0.035095852, 0.513505392, 0.976717695,
0.713275402, 0.948746684, 0.44320735, 0.222039163, 0.440820346,
0.914348945, 0.824638633, 0.392305879, 0.711367921, 0.013197053,
0.990004958, 0.46783633, 0.368384378, 0.105245106, 0.01894147,
0.351691108, 0.689240176, 0.281890828, 0.643299941, 0.295450072,
0.929042677, 0.451298968, 0.087512416, 0.367461399, 0.101109718,
0.388519279, 0.886552629, 0.371934921), value3 = c(0.862942279,
0.306199206, 0.815403468, 0.120029065, 0.120468166, 0.97214058,
0.605333252, 0.381385396, 0.501217425, 0.159266606, 0.712387132,
0.532604745, 0.581300843, 0.764953483, 0.833804202, 0.576785884,
0.739833632, 0.894288301, 0.533339352, 0.454653122, 0.141139261,
0.820376994, 0.804809068, 0.097680334, 0.286965944, 0.610407569,
0.084827216, 0.428986455, 0.080766377, 0.435308821, 0.93199262,
0.453242669, 0.106639551, 0.191650525, 0.807339195, 0.53331683,
0.101494804, 0.952323476, 0.243649472, 0.903883695, 0.265602323,
0.364928386, 0.239852295, 0.388701845, 0.964790214, 0.031507745,
0.922879901, 0.419279331, 0.923975616, 0.370413352, 0.159053801,
0.450200201, 0.262717668, 0.258232936, 0.604593393, 0.625352584,
0.086596067, 0.876201214, 0.95281149, 0.728431032, 0.232121342,
0.53337486)), .Names = c("nr", "site_id", "value1", "value2",
"value3"), row.names = c(NA, -62L), class = "data.frame")

This is probably not very efficient, but It does what you need.
First we create a matrix with all possible combinations of the site_id. We then create a list with all the subsetted data frames. Finally we apply the function to the list for all value columns.
m1 <- combn(1:length(unique(el$site_id)),2)
l2 <- lapply(1:ncol(m1), function(i) el[el$site_id %in% m1[,i],])
final.list <- lapply(l2, function(i) sapply(i, function(j) bartlett.test(j, i$site_id)))

Related

Form groups using block random assignment on two covariates

I often have groups of people who differ in their nationality and their status. They have to work in groups, and I would like to use block random assignment to create groups of a maximum of 5 individuals. Each group should have at least one person who is "foreign" and one who is "female". I have found the library randomizr which is supposedly able to do block random assignments, but my code does not work as intended.
An example dataset would be:
structure(list(Student = c("Susan", "Ciara", "Carl",
"Paula", "Emil", "Tammy", "Logan", "Anna", "Victor",
"Felix", "Federica", "Jesus", "Jens", "Samira", "Berit", "Yi",
"Lea", "Gordon", "Boris", "Silvester", "Celine", "Thomas", "Eduardo",
"RoY", "Marlene", "Amelie", "Claudius", "Herbert", "Cynthia", "Melanie",
"Leander", "Leona", "Tobias", "Leander", "Peter",
"Lilly", "Roxy", "Joachim"), Nationality = structure(c(2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L), levels = c("Non-foreign", "Foreign"), class = "factor"),
Gender = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L), levels = c("female",
"male"), class = "factor")), class = "data.frame", row.names = c(NA,
-38L))
UPDATE: I have carefully read the vignette for the randomzir package again. I found that it is possible to create blocks with more than 1 covariate. I am now looking to see if i can assign these blocks to the students to get block random groups. I need to test if the code below works as intended.
blocks <- with(data, paste(Nationality, Gender, sep = "_"))
Z <- block_ra(blocks = blocks, num_arms = 6)
table(data$Student, Z)

Displaying Multiple Regression Equations

I have three regressions in one plot that I am trying to display the equation of each for. I've been working off of this question to try and do this. However, the filtering doesn't seem to do anything and it displays the same equation 3 times.
The end goal is to compare cpue in relation to veg, while controlling for location (block), and get the slopes/r^2 values for each of the three regression lines.
Data
cpue<- structure(list(lake = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), veg = c(254.8026498, 219.9422136, 450.9662078, 484.8605026,
407.1662151, 286.7015617, 351.6441798, 179.9959443, 340.4276843,
247.2907435, 502.4119071, 336.4259995, 349.1543197, 281.7493811,
201.8284859, 325.6380404, 288.3855723, 230.8755861, 214.8890894,
326.6376698, 214.7468224, 132.0511504, 335.2727641, 336.8727253,
143.8923225, 277.3053436, 302.7005649, 355.0332852, 307.5736711,
371.8407176, 168.7645221, 365.9156811, 349.205548, 273.8392697,
171.4513348, 197.1067049, 350.5833827, 202.9605797, 365.3415045,
413.2762633, 329.8539209, 377.1415341, 180.8524994, 217.4007852,
258.5909286, 146.7092479, 258.7440138, 393.2014549, 492.6719497,
208.5002392, 219.1466664, 182.1366352, 308.0534171, 317.6037795,
131.7534807, 324.0011761, 469.5861988, 237.4492916, 318.6897863,
47.94967582, 223.5382632, 386.2227607, 343.7657123, 493.6393726,
204.2960349, 294.4218332, 178.7555635, 454.0358039, 207.1363947,
364.6063223, 462.8508521, 292.8613255, 330.3893897, 209.1769838,
237.4264742, 427.8856667), cpue = c(32.63512612, 47.98168449,
33.26735173, 14.41435377, 30.94664495, 40.26817963, 41.26204388,
31.63227286, 36.97932408, 21.54620143, 34.27556883, 6.506644061,
32.24677471, 38.24536746, 30.95968644, 24.86408391, 31.15438304,
21.69779047, 39.86223079, 27.92263229, 23.55684281, 34.6157024,
42.06943746, 24.70597527, 28.36396188, 50.34591832, 55.06361184,
48.69468021, 26.00084784, 44.77320597, 14.56328001, 33.29291085,
21.55078237, 29.95980975, 40.61006429, 43.46931237, 26.26407484,
15.87009067, 39.47297313, 20.50811378, 35.66157343, 35.64563497,
44.47319537, 42.06574907, 40.16356125, 35.57462201, 32.10051291,
34.1254268, 34.21084448, 28.18410732, 32.11249307, 38.39890418,
31.24778375, 29.76951583, 41.52508487, 34.48914051, 28.30923803,
29.33886042, 37.57268795, 59.29849175, 28.9317113, 41.27342427,
38.44878019, 44.53768204, 44.48611219, 33.15553274, 34.48894561,
34.86722967, 31.92515626, 50.04825584, 53.67528105, 37.53150868,
33.16255301, 33.22374846, 28.28172263, 42.5795616), block = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1",
"2", "3"), class = "factor")), row.names = c(NA, -76L), class = "data.frame")
Code
# Make lm() with blocking variable----------
lm_eqn2 <- function(df2){
m2 <- lmer(cpue ~ veg + (1|block), cpue);
eq2 <- substitute(italic(CPUE) == a + b*","~~italic(r)^2~"="~r2, # Write CPUE = a+b, r^2 = x
list(a = format(unname(coef(m2)[1]), digits = 4), # define 'a'
b = format(unname(coef(m2)[2]), digits = 2), # define 'b'
r2 = format(summary(m)$r.squared, digits = 3))) # define 'r2'
as.character(as.expression(eq)); # declare expression as a character
}
ggplot(cpue, aes(x=veg, y=cpue, col=block))+
geom_point()+
geom_smooth(method="lm", show.legend=F, se=F)+
annotate("text", x=100, y=20, label= lm_eqn2(cpue %>% filter(block==1)), parse=T)+
annotate("text", x=200, y=30, label= lm_eqn2(cpue %>% filter(block==2)), parse=T)+
annotate("text", x=300, y=40, label= lm_eqn2(cpue %>% filter(block==3)), parse=T)
When I try to view the equation for each line with the following code:
lm_eqn2(cpue %>% filter(block==2))
it returns the same equation for each blocking number that I filter it by. This makes me think there's something wrong with the code that I made the model and the equation with? The only thing different (that I can tell) from the linked question is that my model has a blocking variable. Not sure if that would actually affect anything though.
Any help would be greatly appreciated.
You have a few problems here.
Firstly, it isn't good practice to use the same name for the dataframe and a vector within. It makes lines like lmer(cpue ~ veg + (1|block), cpue); and ggplot(cpue, aes(x=veg, y=cpue, col=block))+ confusing to many.
But also, using cpue here for the dataframe within your function, means that your function doesn't care what you are passing to it later. Such that m2 <- lmer(cpue ~ veg + (1|block), cpue); is the same every time - hence the same equation is being produced. cpue %>% filter(block==2) is ignored as an argument because df2 doesn't exist within your function. So you need something like this:
lm_eqn2 <- function(df2){
m2 <- lmer(cpue ~ veg + (1|block), df2); ## note the change to df2 here
eq2 <- substitute(italic(CPUE) == a + b*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m2)[1]), digits = 4),
b = format(unname(coef(m2)[2]), digits = 2),
r2 = format(summary(m2)$r.squared, digits = 3)))
as.character(as.expression(eq2));
}
** also note that m and eq were not found (in your original code), so I changed them to m2 and eq2 respectively.
This gives the error:
Error: grouping factors must have > 1 sampled level
which makes sense, because you've fit block as a random intercept in your model code, yet you are filtering your data by the blocking factor. So there is only one "type" of blocking factor in each of the lines cpue %>% filter(block==1), cpue %>% filter(block==2), and cpue %>% filter(block==3). That means there is no information added to your regression when you use (1|block), since block is now a constant.
You might want to explain what you are hoping to do with this blocking factor. Some relevant posts: https://stats.stackexchange.com/q/4700/238878 and https://stats.stackexchange.com/q/31569/238878

Error when running poisson regression with a binary outcome

I am trying to run a poisson regression to predict a common binary outcome.
This is my first attempt at using dput - if I have used it inappropriately, please let me know so I can correct it.
Example data:
df <- structure(list(id = 1:30, sex = structure(c(1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L), .Label = c("Female", "Male"
), class = "factor"), migStat = structure(c(1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("Australian-born",
"Migrant"), class = "factor"), mhAreaBi = structure(c(1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L), .Label = c("Metropolitan",
"Regional"), class = "factor"), empStatBi = structure(c(2L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Student / employed",
"Unemployed"), class = "factor"), pensBenBi = structure(c(1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c("No benefit",
"In receipt of pension benefit"), class = "factor"), maritStatBi = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Married (including de facto)",
"Not married"), class = "factor"), cto = structure(c(1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor")), .Names = c("id", "sex", "migStat",
"mhAreaBi", "empStatBi", "pensBenBi", "maritStatBi", "cto"), row.names = c(NA,
-30L), class = "data.frame")
When running the regression using glm in R, I receive an error:
fit <- glm(cto ~ sex + migStat + mhAreaBi + empStatBi + pensBenBi + maritStatBi, df, family = poisson)
Error in if (any(y < 0)) stop("negative values not allowed for the 'Poisson' family") :
missing value where TRUE/FALSE needed
In addition: Warning message:
In Ops.factor(y, 0) : ‘<’ not meaningful for factors
The same error has been explained briefly in this thread:
Because the "<" operator is not defined for factors the result that is
passed to if is of length 0. Setting the factor variable on the RHS
and using the integer values on hte LHS succeeds.
The error does not appear when I convert the outcome to an integer; however, this:
seems to defeat the purpose of predicting a binary outcome (unless a numeric variable with range 0-1 is treated the same as a factor variable with two levels); and
does not seem necessary (at least according to this post, which uses geeglm from geepack to predict a binary outcome [unfortunately, I receive the same error when I adapt the code to my own dataset])
Questions:
Could I receive further explanation of the error?
If I convert my outcome to an integer with range 0-1, will glm treat it the same as a binary variable? If not, is there an approach better suited to running a regression for a common binary outcome?
I think the best option here is:
df$cto_binary <- as.numeric(df$cto == "Yes")
fit <- glm(cto_binary ~ sex + migStat + mhAreaBi + empStatBi + pensBenBi + maritStatBi,
df, family = poisson)
As this way you explicitly show in your code what will be a 1/success in your binary outcome and don't get tripped up by things like the ordering of factor levels. Note that in R as.numeric(c(FALSE, TRUE)) gives c(0, 1), so you always know what you're going to get from a logical comparison.

Looping through class ltraj?

Apologies if this is not the best forum to inquire this question.
Has anyone been able to loop/iterate though multiple GPS collared individuals that are in class ltraj (adehabitatlt)?
I've been trying to calculate Prox (https://cran.r-project.org/web/packages/wildlifeDI/vignettes/wildlifeDI-vignette.pdf) for multiple individuals but am struggling with how exactly to loop class ltraj because it's different than dataframe (which I'm accustomed to).
Thanks in advance.
install.packages('wildlifeDI', dependencies=TRUE)
library(wildlifeDI)
library(adehabitatLT)
chupacabra <- structure(list(CollarID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L), .Label = c("A4116F", "A4117M", "A4118F"), class = "factor"),
DateTime = structure(c(1433653200, 1433667600, 1433682060,
1433682300, 1433682600, 1433682900, 1433683200, 1433683500,
1433683800, 1433684100, 1433684400, 1433684700, 1433685000,
1433685300, 1433685600, 1433685900, 1433686200, 1433686500,
1433686800, 1433687100, 1433687400, 1433687700, 1433688000,
1433688300, 1433688600, 1433688900, 1433689200, 1433689500,
1433689800, 1433690100, 1433690400, 1433690700, 1433691000,
1433691300, 1433691600, 1433691900, 1433692200, 1433692500,
1433692800, 1433693100, 1433693400, 1433693700, 1433694000,
1433694300, 1433694600, 1433694900, 1433695200, 1433695500,
1433695800, 1433696100, 1433696400, 1433710860, 1433714400,
1433714700, 1433715000, 1433715300, 1433715600, 1433715900,
1433716200, 1433716500, 1433716800, 1433717100, 1433717400,
1433717700, 1433718000, 1433718300, 1433718600, 1433718900,
1433719200, 1433719500, 1433719800, 1433720100, 1433720400,
1433720700, 1433721000, 1433721300, 1433721600, 1433721900,
1433722200, 1433722500, 1433722800, 1433723100, 1433723400,
1433723700, 1433724060, 1433724300, 1433724600, 1433724900,
1433725200, 1433653200, 1433667660, 1433682060, 1433682300,
1433682600, 1433682900, 1433683200, 1433683500, 1433683800,
1433684100, 1433684400, 1433684700, 1433685000, 1433685300,
1433685660, 1433685900, 1433686200, 1433686500, 1433686800,
1433687100, 1433687400, 1433687700, 1433688000, 1433688300,
1433688660, 1433688900, 1433689200, 1433689500, 1433689800,
1433690100, 1433690400, 1433690700, 1433691000, 1433691300,
1433691600, 1433691900, 1433692200, 1433692500, 1433692800,
1433693100, 1433693400, 1433693700, 1433694060, 1433694300,
1433694600, 1433694900, 1433695200, 1433695500, 1433695800,
1433696100, 1433696400, 1433710860, 1433714400, 1433714700,
1433715000, 1433715300, 1433715600, 1433715900, 1433716200,
1433716500, 1433716800, 1433717100, 1433717400, 1433717700,
1433718000, 1433718300, 1433718600, 1433718900, 1433719200,
1433719500, 1433719800, 1433720100, 1433720400, 1433720700,
1433721000, 1433721300, 1433721600, 1433721900, 1433722200,
1433722500, 1433722800, 1433723100, 1433723400, 1433723700,
1433724000, 1433724300, 1433724600, 1433724900, 1433725200,
1433653249, 1433667666, 1433682089, 1433682349, 1433682632,
1433682936, 1433683234, 1433683536, 1433683837, 1433684144,
1433684443, 1433684739, 1433685031, 1433685370, 1433685634,
1433685935, 1433686236, 1433686536, 1433686826, 1433687142,
1433687448, 1433687736, 1433688034, 1433688337, 1433688649,
1433688936, 1433689236, 1433689531, 1433689827, 1433690139,
1433690433, 1433690736, 1433691048, 1433691336, 1433691634,
1433691941, 1433692236, 1433692535, 1433692833, 1433693129,
1433693434, 1433693735, 1433694028, 1433694373, 1433694642,
1433694931, 1433695234, 1433695542, 1433695831, 1433696148,
1433696448, 1433710908, 1433714437, 1433714737, 1433715036,
1433715366, 1433715636, 1433715969, 1433716234, 1433716536,
1433716827, 1433717137, 1433717435, 1433717733, 1433718048,
1433718336, 1433718636, 1433718973, 1433719272, 1433719530,
1433719837, 1433720136, 1433720431, 1433720736, 1433721031,
1433721336, 1433721640, 1433721946, 1433722236, 1433722528,
1433722842, 1433723137, 1433723434, 1433723730, 1433724035,
1433724370, 1433724634, 1433724936, 1433725236), class = c("POSIXct",
"POSIXt"), tzone = ""), UTM_X = c(636979.2503, 636977.6583,
637402.4471, 637400.3063, 637402.3105, 637407.1977, 637406.3305,
637408.2991, 637407.1907, 637407.8414, 637406.7617, 637407.1614,
637409.8019, 637431.5235, 637465.9644, 637495.9583, 637525.2219,
637573.6033, 637645.3501, 637683.3844, 637691.6229, 637693.4815,
637693.4973, 637691.2483, 637691.9061, 637693.6377, 637692.1106,
637692.3169, 637690.9989, 637691.4503, 637693.6252, 637692.4915,
637694.9434, 637692.6685, 637692.8116, 637694.6787, 637694.4404,
637695.9109, 637696.8945, 637695.2403, 637695.4283, 637694.6085,
637693.4962, 637695.6229, 637734.7283, 637773.2897, 637774.9891,
637787.6573, 637792.285, 637807.0486, 637834.6231, 637497.3348,
637149.9982, 637145.0345, 637178.159, 637181.8251, 637181.1075,
637178.023, 637175.327, 637179.9138, 637180.2833, 637181.5512,
637185.8749, 637181.0011, 637177.401, 637177.4498, 637176.787,
637176.0093, 637175.5126, 637177.9578, 637178.5819, 637188.3911,
637188.7303, 637189.496, 637204.3885, 637195.2063, 637204.9823,
637201.5235, 637212.3355, 637274.4294, 637293.0009, 637296.3954,
637331.3382, 637358.4369, 637365.1677, 637357.5562, 637355.3896,
637345.4827, 637339.1054, 628920.3789, 628869.9781, 630028.6781,
630156.4557, 629878.756, 629658.9786, 629412.6432, 629257.5965,
629405.8967, 629113.4479, 628955.5124, 628852.0231, 628711.9202,
628632.7134, 628621.7724, 628622.2565, 628683.6018, 628771.1182,
628790.8437, 628867.7592, 628881.9794, 628830.9898, 628681.9202,
628575.3395, 628578.1836, 628656.4902, 628659.2271, 628656.689,
628660.4677, 628657.294, 628657.077, 628689.6585, 628727.0131,
628716.6979, 628703.8397, 628678.6953, 628679.3594, 628681.3549,
628625.6275, 628563.1372, 628488.425, 628482.5023, 628469.2209,
628417.9697, 628407.7352, 628405.374, 628393.143, 628394.0092,
628396.2344, 628395.05, 628395.7787, 627684.7989, 627704.889,
627702.5528, 627702.0422, 627708.7906, 627706.9374, 627687.0371,
627622.0573, 627605.7932, 627603.5707, 627587.8803, 627606.0471,
627603.2967, 627602.954, 627603.5844, 627604.1232, 627601.697,
627581.6104, 627599.7062, 627616.327, 627661.7402, 627889.446,
627883.5896, 627803.1167, 627792.5918, 627716.0886, 627720.8854,
627671.8217, 627666.9994, 627586.7035, 627584.4273, 627532.492,
627502.6326, 627430.6781, 627408.8845, 627357.5049, 627406.0466,
627427.1382, 636666.3215, 636629.7032, 637179.9041, 637187.7067,
637183.5281, 637193.2082, 637227.2331, 637290.2543, 637347.9311,
637373.0887, 637368.8923, 637371.0722, 637383.95, 637480.1799,
637510.543, 637558.428, 637676.2714, 637682.3564, 637680.8591,
637682.8516, 637680.8317, 637680.8341, 637681.9818, 637681.2897,
637681.3658, 637681.9234, 637681.8824, 637682.0629, 637684.8756,
637681.602, 637682.7548, 637680.8578, 637682.9887, 637680.2496,
637681.4629, 637682.3731, 637682.2223, 637684.1076, 637681.7127,
637681.1249, 637681.6758, 637681.595, 637682.5253, 637702.3094,
637728.9487, 637784.0853, 637776.5727, 637785.2538, 637786.6413,
637807.9935, 637834.8672, 637485.5191, 637148.5674, 637139.2974,
637174.9104, 637191.9371, 637179.4262, 637175.7715, 637176.3455,
637174.5459, 637174.2012, 637173.7462, 637177.3967, 637176.6907,
637177.8458, 637178.0774, 637178.4151, 637178.3272, 637178.2442,
637177.6655, 637176.734, 637186.2713, 637185.0998, 637197.4201,
637197.9147, 637204.1485, 637203.1784, 637204.4993, 637205.3515,
637279.9058, 637303.773, 637303.5724, 637330.3473, 637354.416,
637366.5627, 637340.7274, 637357.5505, 637350.709, 637349.689
), UTM_Y = c(3365828.581, 3365826.066, 3364992.673, 3364991.006,
3364989.036, 3364990.816, 3364989.486, 3364991.849, 3364991.37,
3364990.059, 3364989.58, 3364991.403, 3364991.536, 3364985.614,
3365030.733, 3365054.446, 3365091.064, 3365138.444, 3365289.033,
3365390.111, 3365398.839, 3365387.124, 3365390.427, 3365386.696,
3365387.104, 3365379.344, 3365386.131, 3365388.805, 3365385.152,
3365385.158, 3365386.394, 3365385.637, 3365385.48, 3365386.071,
3365385.397, 3365387.416, 3365387.269, 3365389.505, 3365389.971,
3365387.833, 3365389.676, 3365390.685, 3365385.96, 3365384.934,
3365352.152, 3365369.878, 3365376.795, 3365390.013, 3365382.689,
3365382.189, 3365410.939, 3365683.847, 3365620.829, 3365574.121,
3365527.084, 3365501.513, 3365502.801, 3365512.739, 3365514.733,
3365512.885, 3365511.016, 3365512.562, 3365510.255, 3365511.235,
3365509.494, 3365509.439, 3365509.431, 3365509.388, 3365510.678,
3365509.534, 3365511.083, 3365511.85, 3365514.659, 3365513.371,
3365525.476, 3365526.036, 3365529.429, 3365528.676, 3365513.172,
3365507.793, 3365514.623, 3365512.105, 3365504.477, 3365512.401,
3365495.238, 3365490.863, 3365441.075, 3365411.542, 3365403.003,
3371496.516, 3371594.382, 3370587.966, 3370380.241, 3370270.012,
3370346.817, 3370433.295, 3370488.189, 3370225.222, 3370122.896,
3370174.202, 3370232.298, 3370192.371, 3370255.722, 3370283.548,
3370283.21, 3370305.674, 3370344.002, 3370354.4, 3370348.973,
3370200.353, 3370078.071, 3370123.589, 3370194.686, 3370393.878,
3370500.265, 3370498.635, 3370498.882, 3370497.663, 3370499.687,
3370500.172, 3370633.763, 3370704.904, 3370839.426, 3370879.943,
3370950.842, 3370957.988, 3370963, 3371031.496, 3371082.487,
3371109.89, 3371112.17, 3371118.807, 3371167.072, 3371168.581,
3371170.127, 3371178.074, 3371177.097, 3371178.11, 3371176.777,
3371178.482, 3371566.662, 3371622.632, 3371621.252, 3371619.772,
3371623.975, 3371627.245, 3371636.71, 3371612.734, 3371598.776,
3371590.192, 3371636.009, 3371656.352, 3371656.719, 3371656.471,
3371656.755, 3371659.1, 3371656.401, 3371688.243, 3371717.065,
3371741.492, 3371755.505, 3371618.156, 3371595.308, 3371615.82,
3371560.55, 3371552.166, 3371572.884, 3371547.544, 3371530.616,
3371559.755, 3371591.63, 3371612.877, 3371657.663, 3371727.149,
3371739.263, 3371823.645, 3371912.149, 3371969.549, 3366104.602,
3365712.344, 3365494.627, 3365496.045, 3365484.575, 3365475.02,
3365485.304, 3365467.377, 3365477.805, 3365507.809, 3365510.682,
3365519.888, 3365527.19, 3365491.394, 3365490.37, 3365468.274,
3365393.413, 3365389.355, 3365386.964, 3365391.977, 3365389.125,
3365388.937, 3365389.35, 3365389.375, 3365387.159, 3365387.133,
3365386.578, 3365386.735, 3365386.161, 3365387.472, 3365387.487,
3365387.064, 3365387.977, 3365385.016, 3365387.836, 3365388.036,
3365387.048, 3365389.909, 3365387.074, 3365384.939, 3365387.717,
3365388.026, 3365388.16, 3365385.728, 3365344.996, 3365374.693,
3365377.679, 3365387.866, 3365389.823, 3365391.779, 3365410.631,
3365698.174, 3365622.297, 3365571.954, 3365511.957, 3365510.265,
3365505.086, 3365509.196, 3365512.44, 3365513.438, 3365508.777,
3365509.049, 3365509.838, 3365506.403, 3365507.748, 3365510.711,
3365509.075, 3365507.666, 3365508.152, 3365505.285, 3365498.401,
3365508.531, 3365508.483, 3365513.538, 3365520.783, 3365519.376,
3365523.92, 3365529.634, 3365529.866, 3365498.661, 3365512.941,
3365509.801, 3365503.056, 3365513.548, 3365502.683, 3365482.215,
3365438.852, 3365412.317, 3365406.363)), .Names = c("CollarID",
"DateTime", "UTM_X", "UTM_Y"), row.names = c(NA, -267L), class = "data.frame")
Commenting out incorrect transformation
# chupacabra$DateTime <-as.POSIXct(strptime(chupacabra$DateTime, format='%m/%d/%Y %H:%M:%S'),origin='1970-01-01')
chupacabra2<-as.ltraj(chupacabra[, c("UTM_X","UTM_Y")], date=chupacabra$DateTime,id=chupacabra$CollarID, typeII=TRUE)
monster1<-chupacabra2[1] #extract the first chupacabra
monster2<-chupacabra2[2] #extract the second chupacabra
proxdf <-Prox(monster1,monster2, tc=0.5*60,dc=210, local =TRUE)
Here's a sample of chupcabras that we've been tracking. We'd like to examine how often they interact with each other. This dataset has 3 individuals (but we have many many more chupcabras) and it is inefficient to pull out the animals/creatures 1 by 1 to calculate proximity. I'd like to do a for loop (for i in unique ID, perhaps) but I don't understand how to do this when the data is in ltraj format. Any assistance would be appreciated.
After removing the harmful code that incorrectly reformatted the DateTime value, I plotted the "trajectory" for the 3 2-animal combinations. Clearly animal1 did not interact with animal 2 since their ranges were disjoint. Animal 1 and animal 3 do appear to interact, since about halfway through their joint sojourns they have the roughly same trajectory:
plot( chupacabra2[[1]]$x, chupacabra2[[1]]$y, type="l",
xlim=range( c(chupacabra2[[1]]$x, chupacabra2[[3]]$x)),
ylim= range(c(chupacabra2[[1]]$y, chupacabra2[[3]]$y)))
lines( chupacabra2[[3]]$x, chupacabra2[[3]]$y, col="red")
This appears in proxdf13 as:
> monster1<-chupacabra2[1]
>
> monster3<-chupacabra2[3]
>
> proxdf13 <-Prox(monster1,monster3, tc=0.5*60,dc=210, local =TRUE)
> proxdf13
date prox
1 2015-06-07 06:01:00 549.074863
2 2015-06-07 07:20:00 104.169909
3 2015-06-07 08:10:00 6.205875
4 2015-06-07 09:05:00 14.409016
5 2015-06-07 09:20:00 11.189309
6 2015-06-07 15:40:00 6.481131
7 2015-06-07 16:25:00 4.259042
8 2015-06-07 17:15:00 10.648210
9 2015-06-07 17:35:00 4.181297
10 2015-06-07 17:41:00 7.574566
So a guess that "interact" (might) means something along the lines of "is within 15 distance units for more than 2 days in succession". So a natural function to consider would be rle:
> rle( proxdf13$prox < 15 )
Run Length Encoding
lengths: int [1:2] 2 8
values : logi [1:2] FALSE TRUE
> RL13 <- rle( proxdf13$prox < 15 )
> max( RL13$lengths [ RL13$values] )
[1] 8
And test whether this is greater than some value, say 2?
So that was the method to handle a single 2way animal-animal combination. The comments of the questioner below suggest I may have lost him in the material that follows. To get the 2way combinations of a sequence use combn:
> combn(1:3, 2)
[,1] [,2] [,3]
[1,] 1 1 2
[2,] 2 3 3
That is a matrix that will be use to generate the index values for pulling two single animal "trajectory" dataframes at a time from the 'ltraj' object using each column separately. When the apply function is used it can "loop" over etiehr rows or columns of a matrix and using 2 as the index value to apply specifieds the columns.
So putting this all together (using apply to loop over the column indices of combn-result to get the two-way combinations):
( max.days.prox <- apply( combn( seq(length(chupacabra2)), 2 ), 2,
# loops over columns of the "combinations matrix"
function(x) {
proxcomb <- Prox( chupacabra2[ x[1] ], chupacabra2[ x[2] ],
tc=0.5*60,dc=210, local =TRUE)
RLcomb <- rle( proxcomb$prox < 15 )
Interact.days <- max( RLcomb$lengths [ RLcomb$values] ) } ) )
# [1] -Inf 8 -Inf
We rbind that to the result to look at the items of interest:
> rbind(combn( seq(length(chupacabra2)), 2 ) , max.days.prox)
[,1] [,2] [,3]
1 1 2
2 3 3
max.days.prox -Inf 8 -Inf
So only the pairing of animal1 and animal3 provided evidence of an interaction. This would generalize to larger instances of ltraj-objects

Boxplot with two levels and multiple data.frames

I have 4 data.frames with two factor levels in each data.frame. df1 is reproduced below. Please duplicate df1 to produce df2...df4.
How can I produce boxplots with ggplot2 such that my final figure looks very similar to the figure below? The seasons in the figure represent the dataframe names while present and future represent level names and the legend represents heavy, heavy, heaviest in the data reproduced here.
Ignore the dotted horizontal red line.
df1= structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NN", "SS"), class = "factor"),
heavy = c(0.136230125, 0.136281211, 0.136038018, 0.135392862,
0.137088902, 0.136028293, 0.13640057, 0.135317058, 0.13688615,
0.136448994, 0.137089424, 0.136810847, 0.135865471, 0.136130096,
0.136361327, 0.137796714, 0.136052839, 0.135892646, 0.13544437,
0.136452363, 0.135367421, 0.135617509, 0.138202559, 0.135396942,
0.135930092, 0.135661805, 0.135666, 0.135860128, 0.137648687,
0.136057353, 0.136057731, 0.135162399, 0.136080113, 0.135285036,
0.136204839, 0.138058091, 0.137215664, 0.135696637, 0.135863902,
0.135733243, 0.138274445, 0.136632122, 0.137787919, 0.135033093,
0.136926798, 0.136766413, 0.13690947, 0.135203152, 0.138370968,
0.136862356, 0.136083112, 0.138212845, 0.135964773, 0.13583601,
0.134923731, 0.135828965, 0.136272539, 0.138127602, 0.137028323,
0.136526836, 0.136407397, 0.137025373, 0.138358757, 0.137858521,
0.135464076, 0.136302506, 0.135528362, 0.137540677, 0.136455865,
0.138470144, 0.137227895, 0.136296955, 0.136792631, 0.135875782,
0.13815733, 0.136383864, 0.136696618, 0.13857652, 0.136700903,
0.136743873, 0.136033619, 0.135970522, 0.135816385, 0.136003984,
0.136583925, 0.136768202, 0.136292002, 0.136316737, 0.136540075,
0.136051218, 0.135924119, 0.136736303, 0.136946894, 0.136266073,
0.136263692, 0.136399301, 0.13611577, 0.135857095, 0.136769488,
0.136072466, 0.135564224, 0.136496131, 0.137659507, 0.136704681,
0.136542173, 0.136777403, 0.135771538, 0.13665463, 0.136984748,
0.137717859, 0.138195237, 0.136232227, 0.135956814), heavier = c(0.227332679,
0.227200132, 0.227299118, 0.227289816, 0.22724478, 0.227082442,
0.227861315, 0.227055561, 0.227112284, 0.228651438, 0.228158412,
0.228789678, 0.227188949, 0.228850198, 0.227246991, 0.227359368,
0.227359531, 0.227310607, 0.229490445, 0.227295226, 0.227958185,
0.228104958, 0.227254823, 0.22715392, 0.228062515, 0.227509559,
0.227143662, 0.230048719, 0.227860836, 0.228467792, 0.227263728,
0.227222794, 0.227165592, 0.227140611, 0.228424335, 0.227356425,
0.227243374, 0.228936267, 0.227320467, 0.22738371, 0.227694891,
0.227270428, 0.227751798, 0.228803279, 0.227330453, 0.229679261,
0.228999206, 0.227227604, 0.227247085, 0.227198567, 0.229234921,
0.227211613, 0.23007234, 0.226793036, 0.226474338, 0.226654333,
0.229964991, 0.22880328, 0.22700099, 0.226640822, 0.227522393,
0.227463578, 0.227832692, 0.227293936, 0.230154101, 0.229813709,
0.22761097, 0.227445308, 0.228669159, 0.22660539, 0.229017398,
0.230421347, 0.227041103, 0.227583471, 0.229547568, 0.22676335,
0.226737661, 0.229922588, 0.226907188, 0.227102239, 0.226469073,
0.230680908, 0.227763879, 0.226882448, 0.226741993, 0.226693024,
0.22671415, 0.226773662, 0.227795194, 0.226983096, 0.226647946,
0.226799552, 0.226759218, 0.22692942, 0.226601519, 0.227098192,
0.226886889, 0.226959012, 0.226552119, 0.226809761, 0.226786285,
0.226709252, 0.226834015, 0.228033943, 0.226693494, 0.22748613,
0.227608804, 0.22685023, 0.226586619, 0.227718907, 0.228890098,
0.226701909, 0.230919944), heaviest = c(0.316870607, 0.316772978,
0.316851707, 0.317017543, 0.316673994, 0.317224709, 0.319234458,
0.31861305, 0.319804304, 0.318605816, 0.316930034, 0.31688398,
0.316789552, 0.320783976, 0.317094325, 0.31809319, 0.317134565,
0.318173976, 0.317213167, 0.317084404, 0.321712205, 0.317128056,
0.316866913, 0.3170489, 0.31712423, 0.31684494, 0.319497635,
0.316932301, 0.316864646, 0.317279005, 0.316887692, 0.317134437,
0.316792589, 0.320894499, 0.319883014, 0.316924639, 0.316575642,
0.31686389, 0.316985994, 0.321566256, 0.316683995, 0.320299883,
0.317308965, 0.318151948, 0.316479828, 0.319857732, 0.317171909,
0.322137849, 0.316526917, 0.316870364, 0.322205784, 0.317055758,
0.320329144, 0.318015397, 0.318719989, 0.317910658, 0.317292016,
0.321348723, 0.319915048, 0.317160762, 0.318773245, 0.319627925,
0.31869767, 0.322422407, 0.32082693, 0.318034899, 0.318760783,
0.318325502, 0.320739086, 0.317216142, 0.32284544, 0.319466593,
0.318740499, 0.317489944, 0.319064923, 0.322014928, 0.317353897,
0.318904583, 0.317931141, 0.323295254, 0.318924712, 0.318965677,
0.317700019, 0.31793468, 0.317699508, 0.317168657, 0.318903983,
0.317493401, 0.317511406, 0.317483897, 0.31748495, 0.317776804,
0.318893431, 0.317663608, 0.316978585, 0.317473467, 0.317500429,
0.317144259, 0.317330826, 0.317610353, 0.317881476, 0.31707787,
0.317728374, 0.317452137, 0.31938939, 0.317199373, 0.31898747,
0.318878952, 0.317987024, 0.318951952, 0.318419561, 0.319568088,
0.321165413)), .Names = c("id", "heavy", "heavier", "heaviest"
), class = "data.frame", row.names = c(NA, -113L))
## create some data.frames: this results in a list of four dfs
createDF <- quote(data.frame(id=sample(c("NN", "SS"), 100, rep=T),
heavy=runif(100),
heavier=runif(100),
heaviest=runif(100)))
dfs <- lapply(1:4, function(i) eval(createDF))
## join and shape them
library(reshape2)
dat <- do.call(rbind, dfs)
dat$dfid <- paste("df", rep(1:4, times=sapply(dfs, nrow)))
dat <- melt(dat, id.vars=c("id", "dfid"))
ggplot(dat, aes(id, value, group=interaction(variable, id), fill=variable)) +
geom_boxplot() +
facet_grid(~dfid)
Something like this?
df1$season<- 'winter'
df2$season<- 'spring'
df3$season<- 'summer'
df4$season<- 'fall'
df1.m <- melt(df1, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df2.m <- melt(df2, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df3.m <- melt(df3, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df4.m <- melt(df4, id.vars=c('id', 'season'), variable.name='weight', value.name='weight')
df.all <- rbind(df1.m, df2.m, df3.m, df4.m)
ggplot(df.all, aes(x=id, y=weight, fill=weightCat)) + geom_boxplot() + facet_grid(. ~ season)

Resources