R: Vectorizing a condition - r
I have a dataset with over a million rows and 67 columns. I create a new column that records scores according to my code below.
I am stuck at a condition I need to take care of in:
DF$change[DF[,63] == "M"] <- DF$change[DF[,63] == "M"] - pos2
When DF$change[DF[,63] == "M"] - pos2 = 0 I need to see if this happend on the bidside or the askside. The way I can determine that is by seeing if the position of DF[,64] is even(askside) or odd(bidside). There's a caveat, I can't use the value from pos2 , that I have already calculated, to do this because of my function called mywhich(I can provide code if needed) used in the code.
So to determine even/odd I have to recalculate the position of DF[,64]. Once I know even or odd, DF$change should be either -1 or 1 depending on whether DF[,66] > DF[,64] or <.
Now, I've tried subsetting but I don't see how that can work because I have recalculate the positions. I tried not using mywhich for this part but I cant seem to get my head around it to make it work.
Any pointers/suggestions? What else I should I try? Should I write a separate function that handles this? Write another version of a which function? I am a little lost
This is what I have so far:
> DF$change <- apply(DF[, 1:62] == DF[,64], 1, mywhich)
> DF$change[DF[,63] == "C"] <- apply(DF[which(DF[,63] == "C") - 1, 1:62] == DF[DF[,63] == "C",64], 1, mywhich)*(-1)
> pos2 <- apply(DF[which(DF[,63] == "M") - 1, 1:62] == DF[DF[,63] == "M",66], 1, mywhich)
> DF$change[DF[,63] == "M"] <- DF$change[DF[,63] == "M"] - pos2
This is the output:
> head(DF, 20)
DateTime Seq BP1 BQ1 BO1 AP1 AQ1 AO1 BP2 BQ2 BO2 AP2 AQ2 AO2 BP3 BQ3 BO3 AP3 AQ3 AO3 BP4 BQ4 BO4 AP4 AQ4 AO4 BP5 BQ5 BO5 AP5 AQ5 AO5 BP6 BQ6 BO6 AP6 AQ6 AO6 BP7 BQ7 BO7 AP7 AQ7 AO7 BP8 BQ8 BO8 AP8 AQ8 AO8 BP9 BQ9 BO9 AP9
1 2015-11-30 09:15:00.368 92 80830 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 2015-11-30 09:15:00.368 108 80830 1 1 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 2015-11-30 09:15:00.375 406 81100 1 1 83435 1 1 80830 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 2015-11-30 09:15:00.375 479 81100 1 1 82165 1 1 80830 1 1 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 2015-11-30 09:15:00.377 643 81100 1 1 82165 1 1 80830 1 1 83200 1 1 0 0 0 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 2015-11-30 09:15:00.378 722 81100 1 1 82165 1 1 80830 1 1 82650 1 1 0 0 0 83200 1 1 0 0 0 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 2015-11-30 09:15:00.380 811 81100 1 1 82165 1 1 80830 1 1 82650 1 1 0 0 0 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 2015-11-30 09:15:00.380 822 81100 1 1 82165 1 1 80835 1 1 82650 1 1 80830 1 1 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9 2015-11-30 09:15:00.380 828 81100 1 1 82345 1 1 80835 1 1 82650 1 1 80830 1 1 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 2015-11-30 09:15:00.383 1046 81100 1 1 82345 1 1 80835 1 1 82650 1 1 80830 1 1 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 2015-11-30 09:15:00.384 1103 81100 1 1 82165 1 1 80835 1 1 82650 1 1 80830 1 1 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 2015-11-30 09:15:00.384 1171 81100 1 1 82345 1 1 80835 1 1 82650 1 1 80830 1 1 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 2015-11-30 09:15:00.384 1186 81100 1 1 82345 1 1 80835 1 1 82650 1 1 80830 1 1 82900 1 1 0 0 0 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
14 2015-11-30 09:15:00.384 1196 81100 1 1 82165 1 1 80835 1 1 82650 1 1 80830 1 1 82900 1 1 0 0 0 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
15 2015-11-30 09:15:00.385 1238 81100 1 1 82340 1 1 80835 1 1 82650 1 1 80830 1 1 82900 1 1 0 0 0 83200 1 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
16 2015-11-30 09:15:00.385 1249 81100 1 1 82340 1 1 80835 1 1 82650 1 1 80830 1 1 82900 1 1 0 0 0 83200 2 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
17 2015-11-30 09:15:00.385 1254 81200 1 1 82340 1 1 81100 1 1 82650 1 1 80835 1 1 82900 1 1 80830 1 1 83200 2 1 0 0 0 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
18 2015-11-30 09:15:00.387 1273 81200 1 1 82340 1 1 81100 1 1 82650 1 1 80835 1 1 82900 1 1 80830 1 1 83200 2 1 80035 1 1 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
19 2015-11-30 09:15:00.388 1333 81200 1 1 82165 1 1 81100 1 1 82650 1 1 80835 1 1 82900 1 1 80830 1 1 83200 2 1 80035 1 1 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
20 2015-11-30 09:15:00.388 1343 81200 1 1 82340 1 1 81100 1 1 82650 1 1 80835 1 1 82900 1 1 80830 1 1 83200 2 1 80035 1 1 83430 1 1 0 0 0 83435 1 1 0 0 0 83500 1 1 0 0 0 0 0 0 0 0 0 0
AQ9 AO9 BP10 BQ10 BO10 AP10 AQ10 AO10 C Price Qty OldPrice OldQty change
1 0 0 0 0 0 0 0 0 N 80830 1 NA NA 5
2 0 0 0 0 0 0 0 0 N 83435 1 NA NA 5
3 0 0 0 0 0 0 0 0 N 81100 1 NA NA 5
4 0 0 0 0 0 0 0 0 N 82165 1 NA NA 5
5 0 0 0 0 0 0 0 0 N 83200 1 NA NA 4
6 0 0 0 0 0 0 0 0 N 82650 1 NA NA 4
7 0 0 0 0 0 0 0 0 N 83430 1 NA NA 2
8 0 0 0 0 0 0 0 0 N 80835 1 NA NA 4
9 0 0 0 0 0 0 0 0 M 82345 1 82165 1 0
10 0 0 0 0 0 0 0 0 N 83500 1 NA NA 0
11 0 0 0 0 0 0 0 0 M 82165 1 82345 1 0
12 0 0 0 0 0 0 0 0 M 82345 1 82165 1 0
13 0 0 0 0 0 0 0 0 N 82900 1 NA NA 3
14 0 0 0 0 0 0 0 0 M 82165 1 82345 1 0
15 0 0 0 0 0 0 0 0 M 82340 1 82165 1 0
16 0 0 0 0 0 0 0 0 N 83200 1 NA NA 2
17 0 0 0 0 0 0 0 0 N 81200 1 NA NA 5
18 0 0 0 0 0 0 0 0 N 80035 1 NA NA 1
19 0 0 0 0 0 0 0 0 M 82165 1 82340 1 0
20 0 0 0 0 0 0 0 0 M 82340 1 82165 1 0
> dput(DF[1:20,])
structure(list(DateTime = structure(c(1448855100.369, 1448855100.369,
1448855100.375, 1448855100.376, 1448855100.378, 1448855100.379,
1448855100.38, 1448855100.38, 1448855100.38, 1448855100.383,
1448855100.384, 1448855100.385, 1448855100.385, 1448855100.385,
1448855100.386, 1448855100.386, 1448855100.386, 1448855100.387,
1448855100.389, 1448855100.389), class = c("POSIXct", "POSIXt"
), tzone = ""), Seq = c(92L, 108L, 406L, 479L, 643L, 722L, 811L,
822L, 828L, 1046L, 1103L, 1171L, 1186L, 1196L, 1238L, 1249L,
1254L, 1273L, 1333L, 1343L), BP1 = c(80830L, 80830L, 81100L,
81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L,
81100L, 81100L, 81100L, 81100L, 81100L, 81200L, 81200L, 81200L,
81200L), BQ1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BO1 = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), AP1 = c(0L, 83435L, 83435L, 82165L, 82165L, 82165L, 82165L,
82165L, 82345L, 82345L, 82165L, 82345L, 82345L, 82165L, 82340L,
82340L, 82340L, 82340L, 82165L, 82340L), AQ1 = c(0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), AO1 = c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP2 = c(0L, 0L, 80830L, 80830L,
80830L, 80830L, 80830L, 80835L, 80835L, 80835L, 80835L, 80835L,
80835L, 80835L, 80835L, 80835L, 81100L, 81100L, 81100L, 81100L
), BQ2 = c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), BO2 = c(0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AP2 = c(0L,
0L, 0L, 83435L, 83200L, 82650L, 82650L, 82650L, 82650L, 82650L,
82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 82650L,
82650L, 82650L), AQ2 = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AO2 = c(0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), BP3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 80830L, 80830L,
80830L, 80830L, 80830L, 80830L, 80830L, 80830L, 80830L, 80835L,
80835L, 80835L, 80835L), BQ3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BO3 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), AP3 = c(0L, 0L, 0L, 0L, 83435L, 83200L, 83200L,
83200L, 83200L, 83200L, 83200L, 83200L, 82900L, 82900L, 82900L,
82900L, 82900L, 82900L, 82900L, 82900L), AQ3 = c(0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), AO3 = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP4 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 80830L, 80830L, 80830L,
80830L), BQ4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), BO4 = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L
), AP4 = c(0L, 0L, 0L, 0L, 0L, 83435L, 83430L, 83430L, 83430L,
83430L, 83430L, 83430L, 83200L, 83200L, 83200L, 83200L, 83200L,
83200L, 83200L, 83200L), AQ4 = c(0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), AO4 = c(0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), BP5 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 80035L, 80035L, 80035L), BQ5 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L), BO5 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), AP5 = c(0L, 0L, 0L,
0L, 0L, 0L, 83435L, 83435L, 83435L, 83435L, 83435L, 83435L, 83430L,
83430L, 83430L, 83430L, 83430L, 83430L, 83430L, 83430L), AQ5 = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), AO5 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP6 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), BQ6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO6 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
AP6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 83500L, 83500L,
83500L, 83435L, 83435L, 83435L, 83435L, 83435L, 83435L, 83435L,
83435L), AQ6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AO6 = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), BP7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ7 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), BO7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP7 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 83500L, 83500L,
83500L, 83500L, 83500L, 83500L, 83500L, 83500L), AQ7 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), AO7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP8 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), BQ8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO8 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), AP8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AQ8 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), AO8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BP9 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), BQ9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO9 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), AP9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AQ9 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), AO9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BP10 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), BQ10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO10 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), AP10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AQ10 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), AO10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), C = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 4L,
4L, 4L, 3L, 3L), .Label = c("", "C", "M", "N"), class = "factor"),
Price = c(80830L, 83435L, 81100L, 82165L, 83200L, 82650L,
83430L, 80835L, 82345L, 83500L, 82165L, 82345L, 82900L, 82165L,
82340L, 83200L, 81200L, 80035L, 82165L, 82340L), Qty = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), OldPrice = c(NA, NA, NA, NA, NA, NA, NA,
NA, 82165L, NA, 82345L, 82165L, NA, 82345L, 82165L, NA, NA,
NA, 82340L, 82165L), OldQty = c(NA, NA, NA, NA, NA, NA, NA,
NA, 1L, NA, 1L, 1L, NA, 1L, 1L, NA, NA, NA, 1L, 1L), change = c(5,
5, 5, 5, 4, 4, 2, 4, 0, 0, 0, 0, 3, 0, 0, 2, 5, 1, 0, 0)), .Names = c("DateTime",
"Seq", "BP1", "BQ1", "BO1", "AP1", "AQ1", "AO1", "BP2", "BQ2",
"BO2", "AP2", "AQ2", "AO2", "BP3", "BQ3", "BO3", "AP3", "AQ3",
"AO3", "BP4", "BQ4", "BO4", "AP4", "AQ4", "AO4", "BP5", "BQ5",
"BO5", "AP5", "AQ5", "AO5", "BP6", "BQ6", "BO6", "AP6", "AQ6",
"AO6", "BP7", "BQ7", "BO7", "AP7", "AQ7", "AO7", "BP8", "BQ8",
"BO8", "AP8", "AQ8", "AO8", "BP9", "BQ9", "BO9", "AP9", "AQ9",
"AO9", "BP10", "BQ10", "BO10", "AP10", "AQ10", "AO10", "C", "Price",
"Qty", "OldPrice", "OldQty", "change"), row.names = c(NA, 20L
), class = "data.frame")
Scroll down to find dput(DF)
I finally came to the bottom of this, I figured I could create as many new column I wanted in order to calculate what I had to so that's what I did. I think this is the best way to do it but am open to suggestions if there are other ways or even faster.
Here is my code:
DF<- read.csv(file = file,header = FALSE,sep = "", col.names = c("DateTime","Seq","BP1","BQ1","BO1","AP1","AQ1","AO1","BP2","BQ2","BO2","AP2","AQ2","AO2","BP3","BQ3","BO3","AP3","AQ3","AO3","BP4","BQ4","BO4","AP4","AQ4","AO4","BP5","BQ5","BO5","AP5","AQ5","AO5","BP6","BQ6","BO6","AP6","AQ6","AO6","BP7","BQ7","BO7","AP7","AQ7","AO7","BP8","BQ8","BO8","AP8","AQ8","AO8","BP9","BQ9","BO9","AP9","AQ9","AO9","BP10","BQ10","BO10","AP10","AQ10","AO10","C","Price","Qty","OldPrice","OldQty"))
DF<- DF[which(DF$DateTime != 0),]
options(digits.secs = 3)
DF$DateTime= as.POSIXct(DF$DateTime/(10^9), origin="1970-01-01", tz = "GMT") #timestamp conversion
source('~/R/mywhich.R')
source('~/nwhich.R')
#matching with same line for all
DF$change <- apply(DF[, 1:62] == DF[,64], 1, mywhich)
#matching "C" with previous line
DF$change[DF[,63] == "C"] <- apply(DF[which(DF[,63] == "C") - 1, 1:62] == DF[DF[,63] == "C",64], 1, mywhich)*(-1)
#matching old price with previous line in "M"
pos2 <- apply(DF[which(DF[,63] == "M") - 1, 1:62] == DF[DF[,63] == "M",66], 1, mywhich)
#subracting the two position in "M"
DF$change[DF[,63] == "M"] <- DF$change[DF[,63] == "M"] - pos2
# arbitrary number to create side
DF$side <- 1000
DF$side[DF[,63] == "M" & DF[,68] == 0] <- apply(DF[which(DF[,63] == "M"), 1:62] == DF[DF[,63] == "M",64], 1, nwhich)%%2 #check this -- erroneous for modifications that have happend outside level 5 -- might have to add another column for this
#DF$side[DF[,63] == "M" & DF[,68] != 0] <- DF$change[DF[,63] == "M" & DF[,68] != 0]
#DF$side[DF[,63] == "N"] <- DF$change[DF[,63] == "N"]
#DF$side[DF[,63] == "C"] <- DF$change[DF[,63] == "C"]
DF$diff <- 0
#price difference
DF$diff[DF[,63] == "M" & DF[,68] == 0] <- DF$OldPrice[DF[,63] == "M" & DF[,68] == 0] - DF$Price[DF[,63] == "M" & DF[,68] == 0]
#askside -- price increase
DF$modify[DF[,69] == 0 & DF[,70] > 0] <- -1
#askside -- price decrease
DF$modify[DF[,69] == 0 & DF[,70] < 0] <- 1
#bidside -- price decrease
DF$modify[DF[,69] == 1 & DF[,70] < 0] <- -1
#bidside -- price increase
DF$modify[DF[,69] == 1 & DF[,70] > 0] <- 1
#copying change to modify
DF$modify[DF[,63] == "N"] <- DF$change[DF[,63] == "N"]
DF$modify[DF[,63] == "C"] <- DF$change[DF[,63] == "C"]
DF$modify[DF[,63] == "M" & DF[,68] != 0] <- DF$change[DF[,63] == "M" & DF[,68] != 0]
df = data.frame(Time=DF$DateTime, Modify = DF$modify)
finalxts <- as.xts(x = df$Modify, order.by = df$Time)
#finalxts <- aggregatets(finalxts, FUN = "sum", on = "minutes", k = 1, dropna = TRUE)
finalxts
Thank you for the help everyone.
Related
how to transform a matrix in a **hypergraph** of an object of class network
I have a matrix like below that is a hyper graph matrix, I transformed it to the object network , but I dunno how can I transform this matrix in a hypergraph of an object of class network, can you help me? any idea? mat<-as.matrix(data) g<- as.network.matrix(mat) g E1 E2 E3 E4 E5 E6 E7 E8 E9 E10 E11 E12 E13 E14 EVELYN 1 1 1 1 1 1 0 1 1 0 0 0 0 0 LAURA 1 1 1 0 1 1 1 1 0 0 0 0 0 0 THERESA 0 1 1 1 1 1 1 1 1 0 0 0 0 0 BRENDA 1 0 1 1 1 1 1 1 0 0 0 0 0 0 CHARLOTTE 0 0 1 1 1 0 1 0 0 0 0 0 0 0 FRANCES 0 0 1 0 1 1 0 1 0 0 0 0 0 0 ELEANOR 0 0 0 0 1 1 1 1 0 0 0 0 0 0 PEARL 0 0 0 0 0 1 0 1 1 0 0 0 0 0 RUTH 0 0 0 0 1 0 1 1 1 0 0 0 0 0 VERNE 0 0 0 0 0 0 1 1 1 0 0 1 0 0 MYRA 0 0 0 0 0 0 0 1 1 1 0 1 0 0 KATHERINE 0 0 0 0 0 0 0 1 1 1 0 1 1 1 SYLVIA 0 0 0 0 0 0 1 1 1 1 0 1 1 1 NORA 0 0 0 0 0 1 1 0 1 1 1 1 1 1 HELEN 0 0 0 0 0 0 1 1 0 1 1 1 0 0 DOROTHY 0 0 0 0 0 0 0 1 1 0 0 0 0 0 OLIVIA 0 0 0 0 0 0 0 0 1 0 1 0 0 0 FLORA 0 0 0 0 0 0 0 0 1 0 1 0 0 0
I guess mat is a incidence matrix, and I am not sure if you are looking for something like below if you are using network package as.matrix(network(t(mat)),matrix.type = "incidence") Besides, the incidence matrix visualization via igraph can be achieved from the following: g <- igraph::graph_from_incidence_matrix(mat) then plot(g) gives DATA mat <- structure(c(1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), .Dim = c(18L, 14L), .Dimnames = list(c("EVELYN", "LAURA", "THERESA", "BRENDA", "CHARLOTTE", "FRANCES", "ELEANOR", "PEARL", "RUTH", "VERNE", "MYRA", "KATHERINE", "SYLVIA", "NORA", "HELEN", "DOROTHY", "OLIVIA", "FLORA"), c("E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "E10", "E11", "E12", "E13", "E14")))
Conditional lead variable in R
I want to create conditional lead/lag variables that would that capture pre and post years of an agreement signed by countries. More precisely, I want to create the following variables: a variable that is =1 in the 4 years pre/before the agreement, 0 otherwise a variable that is =1 5 years pre the agreement and a variable that is =1 only after 4 years after the ratification I have a country-year data (please see below for the sample of the data). The X1 indicates whether a country has signed the agreemen (=1), or not (=0). The variables I want to create (my expected output) are manually done in the sample data below, labeled as X1_pre4, X1_pre5 and X1_post5. The first captures the 4 years (or up to 4 years) before the agreement is signed. The second captures the 5 years before the agreement is signed. And the last variable captures the 5 years after the agreement is signed (it starts the same year as the agreement is signed, but it's fine if it starts after that, too). I have been suggested to use some sort of "split-operate-unsplit" construct. But I personally think that this is can be done in dplyr, using the mutate command. Currently, I've been trying to work with this logic: data$X1_pre4[data$year<="1972" & data$X1=="0" ] <- "1" But this is not good enough (far away), as I am not sure how to group by country here. Even if I figure this out, it won't do the work as I have over 100 of X's (agreements). I simply need a code that is much smarter. data <- structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"), year = c(1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L, 1978L, 1979L, 1980L, 1981L, 1982L, 1983L, 1984L, 1985L, 1986L, 1987L, 1988L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L, 1978L, 1979L, 1980L, 1981L, 1982L, 1983L, 1984L, 1985L, 1986L, 1987L, 1988L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L, 1978L, 1979L, 1980L, 1981L, 1982L, 1983L, 1984L, 1985L, 1986L, 1987L, 1988L, 1989L, 1990L, 1991L), X1 = c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), X1_pre4 = c(1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X1_pre5 = c(1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X1_post4 = c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA, -60L))
This would correspond to your logic: check_pre <- function(idx, k) { pre_vec <- sapply(1:length(idx), function(x) +any(idx[x:(pmin(x + k, length(idx)))] %in% 1)); pre_vec[idx == 1] <- 0; return(pre_vec) } check_post <- function(idx, k) sapply(1:length(idx), function(x) +any(idx[(pmax(x - k, 1)):x] %in% 1)) df %>% group_by(country) %>% mutate( idx = +( (lag(X1) == 0 & X1 == 1) | row_number() == 1 & X1 == 1), X1_pre4 = check_pre(idx, 4), X1_pre5 = check_pre(idx, 5), X1_post4 = check_post(idx, 4), idx = NULL ) Basically we create an index of when the agreement occurred, and then check for the rows before/after this index with custom functions check_pre and check_post. This is the output: country year X1 X1_pre4 X1_pre5 X1_post4 1 A 1970 0 1 1 0 2 A 1971 0 1 1 0 3 A 1972 1 0 0 1 4 A 1973 1 0 0 1 5 A 1974 1 0 0 1 6 A 1975 1 0 0 1 7 A 1976 1 0 0 1 8 A 1977 1 0 0 0 9 A 1978 1 0 0 0 10 A 1979 1 0 0 0 11 A 1980 1 0 0 0 12 A 1981 1 0 0 0 13 A 1982 1 0 0 0 14 A 1983 1 0 0 0 15 A 1984 1 0 0 0 16 A 1985 1 0 0 0 17 A 1986 1 0 0 0 18 A 1987 1 0 0 0 19 A 1988 1 0 0 0 20 B 1970 0 0 0 0 21 B 1971 0 0 0 0 22 B 1972 0 0 0 0 23 B 1973 0 0 1 0 24 B 1974 0 1 1 0 25 B 1975 0 1 1 0 26 B 1976 0 1 1 0 27 B 1977 0 1 1 0 28 B 1978 1 0 0 1 29 B 1979 1 0 0 1 30 B 1980 1 0 0 1 31 B 1981 1 0 0 1 32 B 1982 1 0 0 1 33 B 1983 1 0 0 0 34 B 1984 1 0 0 0 35 B 1985 1 0 0 0 36 B 1986 1 0 0 0 37 B 1987 1 0 0 0 38 B 1988 1 0 0 0 39 C 1970 1 0 0 1 40 C 1971 0 0 0 1 41 C 1972 0 0 0 1 42 C 1973 0 0 0 1 43 C 1974 0 0 0 1 44 C 1975 0 0 0 0 45 C 1976 0 0 0 0 46 C 1977 0 0 0 0 47 C 1978 0 0 0 0 48 C 1979 0 0 1 0 49 C 1980 0 1 1 0 50 C 1981 0 1 1 0 51 C 1982 0 1 1 0 52 C 1983 0 1 1 0 53 C 1984 1 0 0 1 54 C 1985 1 0 0 1 55 C 1986 1 0 0 1 56 C 1987 1 0 0 1 57 C 1988 1 0 0 1 58 C 1989 1 0 0 0 59 C 1990 1 0 0 0 60 C 1991 1 0 0 0 It corresponds to your desired output in majority of cases, however from row 39 onwards you don't have it marked as post-agreement - though it occurred in 1970. Either a typo or you'll need to further explain the logic.
R Printing ftable() output to csv with factor names
I'm working with ftable in R to create contingency tables. I want to print an ftable object to a csv, but when I use write.csv() on the ftable object the csv no longer lists the factor names that are included in the ftable on R. This is the type of output that I get Here's an example ftable in R structure(c(1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), .Dim = c(12L, 7L), class = "ftable", row.vars = list( ï..petal_size = c("large ", "small", "small "), stem_length = c("long", "long ", "short", "short ")), col.vars = list(flow_color = c("blue", "green", "indigo ", "orange", "red ", "violet", "yellow"))) Is there a solution to this such that I can keep the factor names?
One option would be to use the function write.ftable but you will have a lot of manual work to do, due to the fact everything (in CSV file) will be written in a single column write.ftable(ftable(df), file = "table.csv", quote = FALSE) # And the otuput NOTE: WHEN OPENING CSV EVERYTHING WILL BE IN SINGLE COLUMN flow_color blue green indigo orange red violet yellow i..petal_size stem_length large long 1 0 1 1 2 1 1 long 0 0 0 0 0 0 0 short 0 0 0 0 0 1 1 short 0 1 0 1 0 0 0 small long 1 2 0 0 1 0 0 long 0 0 1 0 0 0 0 short 0 0 1 0 0 1 0 short 1 0 0 0 0 0 1 small long 0 0 0 0 0 0 0 long 0 0 0 0 0 0 0 short 0 0 0 1 0 0 0 short 0 0 0 0 0 0 0 Or another option using stats to first format ftable and then use write.table df <- ftable(df) cont <- stats:::format.ftable(df, quote = FALSE) write.table(cont, sep = ";", file = "table.csv") And the output
how to prepare an adjacency matrix for network analysis
I am trying to convert the raw data below to an adjacent matrix by assigning the value on the column "s_chloramphenicol" in preparation for a network analysis. df <- structure(list(studyid0 = c(1L, 5L, 6L, 8L, 9L, 11L, 3052L, 3057L, 3058L, 3058L, 3060L, 3063L, 3064L, 3067L), s_chloramphenicol = c(0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L)), row.names = c(NA, -14L), class = "data.frame", .Names = c("studyid0", "s_chloramphenicol" )) The expected output is df<-structure(list(`1` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `5` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `6` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `8` = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), `9` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `11` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `3052` = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L), `3057` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `3058` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `3060` = c(0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), `3063` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `3064` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `3067` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("1", "5", "6", "8", "9", "11", "3052", "3057", "3058", "3060", "3063", "3064", "3067"), class = "data.frame", row.names = c(1L, 5L, 6L, 8L, 9L, 11L, 3052L, 3057L, 3058L, 3060L, 3063L, 3064L, 3067L))
You can use the function outer: df2 <- outer(df$s_chloramphenicol, df$s_chloramphenicol) rownames(df2) <- colnames(df2) <- df$studyid0 df2 Output: 1 5 6 8 9 11 3052 3057 3058 3058 3060 3063 3064 3067 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 1 0 0 1 0 0 0 1 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3052 0 0 0 1 0 0 1 0 0 0 1 0 0 0 3057 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3058 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3058 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3060 0 0 0 1 0 0 1 0 0 0 1 0 0 0 3063 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3064 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3067 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Fourth Corner Algorithm in R
This is a question about the fourthcorner algorithm in R. It's designed to measure the relationship between three different tables: an n x m table (table R) of m environmental variables (columns) at n sites (rows), an n x p table (table L) of p abundances (columns) at n sites (rows), and a p x s table (table Q) of s traits (columns) for p species (rows). The fourthcorner function is in the package ade4. All three of my dataframes are binary (0s and 1s denoting the presence or absence of a variable, a species at a site, or a trait, respectively). I've tried using "yes" and "no" instead of 0s and 1s without success. Here are some example matrices in the format I'm using: tabQ Trait1 Trait2 Trait3 Trait4 Sp1 0 1 0 0 Sp2 0 1 0 0 Sp3 1 0 1 0 Sp4 1 0 1 0 Sp5 0 1 0 0 Sp6 0 1 0 0 Sp7 0 0 0 1 Sp8 0 0 0 1 tabR EnV1 EnV2 EnV3 EnV4 Site1 1 1 1 1 Site2 1 1 0 1 Site3 0 1 0 1 Site4 1 1 1 1 Site5 1 1 0 1 Site6 0 1 0 0 Site7 0 1 0 1 Site8 0 1 0 1 Site9 1 1 1 1 Site10 1 1 0 1 Site11 1 1 1 1 Site12 0 1 0 0 Site13 1 1 0 1 Site14 1 1 0 1 Site15 0 1 0 1 Site16 1 1 0 1 Site17 0 1 0 1 Site18 1 1 1 1 Site19 1 1 0 1 Site20 1 1 0 1 tabL Sp1 Sp2 Sp3 Sp4 Sp5 Sp6 Sp7 Sp8 Site1 1 1 0 0 0 0 0 0 Site2 1 1 0 0 0 0 0 0 Site3 1 1 0 0 0 0 0 0 Site4 1 0 0 0 0 0 0 1 Site5 1 1 0 0 0 0 0 0 Site6 1 0 0 0 1 0 0 0 Site7 1 0 0 0 0 0 0 0 Site8 0 0 0 0 1 0 0 0 Site9 1 0 0 0 0 0 0 0 Site10 1 1 0 0 0 0 0 0 Site11 0 0 1 1 0 0 0 0 Site12 0 0 0 0 0 1 0 0 Site13 1 0 0 0 0 0 0 0 Site14 0 0 0 0 1 0 0 0 Site15 1 1 0 0 0 0 0 0 Site16 1 1 0 0 0 0 0 0 Site17 1 0 0 0 0 0 0 0 Site18 0 0 1 0 0 0 0 0 Site19 1 0 0 0 0 0 0 0 Site20 1 1 0 0 0 0 1 0 I read these dataframes into R from text files, and I specify that the first column is row names. This is the error I get when I try to use the fourthcorner function on my matrices: fourth1=fourthcorner(tabR,tabL,tabQ,nrepet=1) Error in apply(sim, 2, function(x) length(na.omit(x))) : dim(X) must have a positive length I don't understand where the problem lies, is it a formatting issue? If so, should I reformat one of the matrices? Which one is causing the trouble? Or can I not use binary traits and environmental variables for this function? In other words, can I solve this problem by changing a piece of code, or is it impossible to use this function for this question? As an additional tidbit of information, I did email the author of the function, but unfortunately I did not understand his response fully, possibly because my R skills still leave much to be desired. Here is his response if it is helpful: Q could contain quantitative or qualitative traits. In R, qualitative traits should be coded as factors to obtain adapted statistics (i.e. chi2 or eta2). If you code qualitative variables as dummy variables, they would be considered as quantitative. Thank you very much to any and all insight.
I noted that your example fails only nrepet is equal to one, so if you can use any other positive number you should be fine. However, if you do need nrepet=1, you should contact with the author of ade4 and ask to him/her to fix the fourthcorner function code. I traced back the error and found that fourthcorner calls as.krandtest with sim = res$tabD[-1,] where res$tabD is a matrix with nrepet+1 rows. When nrepet=1 and you remove one row from a two-row matrix, R automatically converts the resulting one-row matrix into a vector, but as.krandtest function expects sim to be a matrix and thus raises the error. Here is your input data just in case somebody else would like to answer your question: tabR structure(list(EnV1 = c(1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L), EnV2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), EnV3 = c(1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L), EnV4 = c(1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("EnV1", "EnV2", "EnV3", "EnV4"), row.names = c("Site1", "Site2", "Site3", "Site4", "Site5", "Site6", "Site7", "Site8", "Site9", "Site10", "Site11", "Site12", "Site13", "Site14", "Site15", "Site16", "Site17", "Site18", "Site19", "Site20"), class = "data.frame") tabL structure(list(Sp1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L), Sp2 = c(1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L), Sp3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L), Sp4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sp5 = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Sp6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L ), Sp7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), Sp8 = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Sp1", "Sp2", "Sp3", "Sp4", "Sp5", "Sp6", "Sp7", "Sp8"), row.names = c("Site1", "Site2", "Site3", "Site4", "Site5", "Site6", "Site7", "Site8", "Site9", "Site10", "Site11", "Site12", "Site13", "Site14", "Site15", "Site16", "Site17", "Site18", "Site19", "Site20"), class = "data.frame") tabQ structure(list(Trait1 = c(0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), Trait2 = c(1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L), Trait3 = c(0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), Trait4 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L)), .Names = c("Trait1", "Trait2", "Trait3", "Trait4"), row.names = c("Sp1", "Sp2", "Sp3", "Sp4", "Sp5", "Sp6", "Sp7", "Sp8"), class = "data.frame")