choices combination,order & tree - r

I have following data that represents sequence of person's choice between four values (f1,f2,c1,c2) :
df=structure(list(combi = structure(c(24L, 8L, 3L, 19L, 4L, 23L,
15L, 12L, 14L, 22L, 5L, 13L, 18L, 9L, 2L, 25L, 11L, 7L, 21L,
10L, 6L, 17L, 20L, 16L), .Label = c("", "c1-c2-f1-f2", "c1-c2-f2-f1",
"c1-f1-c2-f2", "c1-f1-f2-c2", "c1-f2-c2-f1", "c1-f2-f1-c2", "c2-c1-f1-f2",
"c2-c1-f2-f1", "c2-f1-c1-f2", "c2-f1-f2-c1", "c2-f2-c1-f1", "c2-f2-f1-c1",
"f1-c1-c2-f2", "f1-c1-f2-c2", "f1-c2-c1-f2", "f1-c2-f2-c1", "f1-f2-c1-c2",
"f1-f2-c2-c1", "f2-c1-c2-f1", "f2-c1-f1-c2", "f2-c2-c1-f1", "f2-c2-f1-c1",
"f2-f1-c1-c2", "f2-f1-c2-c1"), class = "factor"), nb = c(10L,
0L, 2L, 4L, 1L, 5L, 1L, 2L, 1L, 3L, 1L, 0L, 3L, 5L, 0L, 18L,
5L, 2L, 5L, 0L, 4L, 4L, 11L, 2L)), .Names = c("combi", "nb"), class = "data.frame", row.names = c(1L,
3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L,
29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L))
I'm wondering if there's tree representation (or else) that could quantifiy, for each step choices number, by taking in account sub chain that are commun. Example :
f2 (52) -f1 (28) -c1-c2 (10)
-c2-c1 (18)
f2(52) there is 52 times chains begining by f2. there is 28 times chain beginning by f2-f1.
Thanks a lot.

If you read the combi values in (using as.character) you can expand those values to character columns:
df2 <- cbind(df, read.table(text=as.character(df$combi), sep="-",stringsAsFactors=FALSE) )
Then you can tabulate at whatever level you want:
xtabs(nb~V1, data=df2) # First level only
#V1
#c1 c2 f1 f2
#10 12 15 52
xtabs(nb~paste(V1,V2,sep="-"), data=df2) # first and second
#--
# paste(V1, V2, sep = "-")
#c1-c2 c1-f1 c1-f2 c2-c1 c2-f1 c2-f2 f1-c1 f1-c2 f1-f2 f2-c1 f2-c2 f2-f1
# 2 2 6 5 5 2 2 6 7 16 8 28
You can also deploy the addmargins function to compactly the display the two "most senior" position sub-totals:
addmargins( xtabs(nb~V1+V2, data=df2))
#=========
V2
V1 c1 c2 f1 f2 Sum
c1 0 2 2 6 10
c2 5 0 5 2 12
f1 2 6 0 7 15
f2 16 8 28 0 52
Sum 23 16 35 15 89
This could be "flattened" with ftable:
ftable( addmargins( xtabs(nb~V1+V2, data=df2)), row.vars=1:2)
V1 V2
c1 c1 0
c2 2
f1 2
f2 6
Sum 10
c2 c1 5
c2 0
f1 5
f2 2
Sum 12
f1 c1 2
c2 6
f1 0
f2 7
Sum 15
f2 c1 16
c2 8
f1 28
f2 0
Sum 52
Sum c1 23
c2 16
f1 35
f2 15
Sum 89
And the final tally would be:
xtabs(nb~paste(V1,V2,V3,V4,sep="-"), data=df2)
#-----
paste(V1, V2, V3, V4, sep = "-")
c1-c2-f1-f2 c1-c2-f2-f1 c1-f1-c2-f2 c1-f1-f2-c2 c1-f2-c2-f1 c1-f2-f1-c2 c2-c1-f1-f2 c2-c1-f2-f1
0 2 1 1 4 2 0 5
c2-f1-c1-f2 c2-f1-f2-c1 c2-f2-c1-f1 c2-f2-f1-c1 f1-c1-c2-f2 f1-c1-f2-c2 f1-c2-c1-f2 f1-c2-f2-c1
0 5 2 0 1 1 2 4
f1-f2-c1-c2 f1-f2-c2-c1 f2-c1-c2-f1 f2-c1-f1-c2 f2-c2-c1-f1 f2-c2-f1-c1 f2-f1-c1-c2 f2-f1-c2-c1
3 4 11 5 3 5 10 18
To see it all in a column:
as.matrix( xtabs(nb~paste(V1,V2,V3,V4,sep="-"), data=df2) )
#----------------
[,1]
c1-c2-f1-f2 0
c1-c2-f2-f1 2
c1-f1-c2-f2 1
c1-f1-f2-c2 1
c1-f2-c2-f1 4
c1-f2-f1-c2 2
c2-c1-f1-f2 0
c2-c1-f2-f1 5
c2-f1-c1-f2 0
c2-f1-f2-c1 5
c2-f2-c1-f1 2
c2-f2-f1-c1 0
f1-c1-c2-f2 1
f1-c1-f2-c2 1
f1-c2-c1-f2 2
f1-c2-f2-c1 4
f1-f2-c1-c2 3
f1-f2-c2-c1 4
f2-c1-c2-f1 11
f2-c1-f1-c2 5
f2-c2-c1-f1 3
f2-c2-f1-c1 5
f2-f1-c1-c2 10
f2-f1-c2-c1 18
I suppose a "final answer with all the subtotals might be:
ftable( addmargins( xtabs(nb~V1+V2+paste(V3,V4,sep="-"), data=df2)), row.vars=1:3)
However, that has so many zero entries that I hesitate to recommend. You could strip out zero rows:
my.ftable <- ftable( addmargins( xtabs(nb~V1+V2+paste(V3,V4,sep="-"), data=df2)), row.vars=1:3)
my.df.table <- as.data.frame(my.ftable)
names(my.df.table)[3] <- "3rd_4th"
my.df.table[ my.df.table$Freq > 0, ]
#---------
V1 V2 3rd_4th Freq
14 f2 f1 c1-c2 10
15 Sum f1 c1-c2 10
18 f1 f2 c1-c2 3
20 Sum f2 c1-c2 3
23 f1 Sum c1-c2 3
24 f2 Sum c1-c2 10
25 Sum Sum c1-c2 13
34 f2 c2 c1-f1 3
35 Sum c2 c1-f1 3
42 c2 f2 c1-f1 2
45 Sum f2 c1-f1 2
47 c2 Sum c1-f1 2
49 f2 Sum c1-f1 3
50 Sum Sum c1-f1 5
# and many more rows
#... until
321 c1 Sum Sum 10
322 c2 Sum Sum 12
323 f1 Sum Sum 15
324 f2 Sum Sum 52
325 Sum Sum Sum 89

The data.tree package specialises in tree representation. It is based on splitting variables in a hierarchal order, for example world -> continent -> country -> city. In your case, you've mentioned every order for c1, c2, f1 and f2. Likely you'd need to do four tree plots e.g. c1 --> either c2, f1 or f2, each leading to the two unused values, and then plot them.
A basic example starting with c1, and then splitting off, and not including specific values:
library(data.tree)
c1 <- Node$new("c1") # 1st level chain, "c1"
c2 <- c1$AddChild("c2") # new 2nd level chain, "c2", off c1
f1 <- c2$AddChild("f1-f2") # new level off c2
f2 <- c2$AddChild("f2-f1") # new level off c2
f1 <- c1$AddChild("f1") # new 2nd level chain, "f1", off c1
c2 <- f1$AddChild("c2-f2") # new level off f1
f2 <- f1$AddChild("f2-c2") # new level off f1
f2 <- c1$AddChild("f2") # new 2nd level chain, "f2", off c1
c2 <- f2$AddChild("c2-f1") # new level off f2
f1 <- f2$AddChild("f1-c2") # new level off f2
print(c1)
levelName
1 c1
2 ¦--c2
3 ¦ ¦--f1-f2
4 ¦ °--f2-f1
5 ¦--f1
6 ¦ ¦--c2-f2
7 ¦ °--f2-c2
8 °--f2
9 ¦--c2-f1
10 °--f1-c2
plot(c1)

Maybe not exactly what you mean by a "tree structure" but this gives you the numbers
in a table using base R. It should be easy to format that as you like from this result.
df=structure(list(combi = structure(c(24L, 8L, 3L, 19L, 4L, 23L,
15L, 12L, 14L, 22L, 5L, 13L, 18L, 9L, 2L, 25L, 11L, 7L, 21L,
10L, 6L, 17L, 20L, 16L), .Label = c("", "c1-c2-f1-f2", "c1-c2-f2-f1",
"c1-f1-c2-f2", "c1-f1-f2-c2", "c1-f2-c2-f1", "c1-f2-f1-c2", "c2-c1-f1-f2",
"c2-c1-f2-f1", "c2-f1-c1-f2", "c2-f1-f2-c1", "c2-f2-c1-f1", "c2-f2-f1-c1",
"f1-c1-c2-f2", "f1-c1-f2-c2", "f1-c2-c1-f2", "f1-c2-f2-c1", "f1-f2-c1-c2",
"f1-f2-c2-c1", "f2-c1-c2-f1", "f2-c1-f1-c2", "f2-c2-c1-f1", "f2-c2-f1-c1",
"f2-f1-c1-c2", "f2-f1-c2-c1"), class = "factor"), nb = c(10L,
0L, 2L, 4L, 1L, 5L, 1L, 2L, 1L, 3L, 1L, 0L, 3L, 5L, 0L, 18L,
5L, 2L, 5L, 0L, 4L, 4L, 11L, 2L)), .Names = c("combi", "nb"), class = "data.frame", row.names = c(1L,
3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L,
29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L))
tmp <- sapply(as.character(df$combi), strsplit, split = "-")
tmp <- do.call(rbind, tmp)
colnames(tmp) <- paste0("str", 1:4)
rownames(tmp) <- NULL
tmp <- data.frame(df, tmp)
tmp$str3 <- paste(tmp$str3, tmp$str4, sep = "-")
str1 <- aggregate(list(nb_str1 = tmp[,"nb"]), tmp["str1"], sum)
str2 <- aggregate(list(nb_str2 = tmp[,"nb"]), tmp[c("str1", "str2")], sum)
str3 <- aggregate(list(nb_str3 = tmp[,"nb"]), tmp[c("str1", "str2", "str3")], sum)
tmp <- merge(str3, str1)
tmp <- merge(tmp, str2)
tmp <- tmp[, c("str1", "nb_str1", "str2", "nb_str2", "str3", "nb_str3")]
tmp
#> str1 nb_str1 str2 nb_str2 str3 nb_str3
#> 1 c1 10 c2 2 f1-f2 0
#> 2 c1 10 c2 2 f2-f1 2
#> 3 c1 10 f1 2 c2-f2 1
#> 4 c1 10 f1 2 f2-c2 1
#> 5 c1 10 f2 6 c2-f1 4
#> 6 c1 10 f2 6 f1-c2 2
#> 7 c2 12 c1 5 f1-f2 0
#> 8 c2 12 c1 5 f2-f1 5
#> 9 c2 12 f1 5 c1-f2 0
#> 10 c2 12 f1 5 f2-c1 5
#> 11 c2 12 f2 2 c1-f1 2
#> 12 c2 12 f2 2 f1-c1 0
#> 13 f1 15 c1 2 c2-f2 1
#> 14 f1 15 c1 2 f2-c2 1
#> 15 f1 15 c2 6 c1-f2 2
#> 16 f1 15 c2 6 f2-c1 4
#> 17 f1 15 f2 7 c1-c2 3
#> 18 f1 15 f2 7 c2-c1 4
#> 19 f2 52 c1 16 c2-f1 11
#> 20 f2 52 c1 16 f1-c2 5
#> 21 f2 52 c2 8 c1-f1 3
#> 22 f2 52 c2 8 f1-c1 5
#> 23 f2 52 f1 28 c1-c2 10
#> 24 f2 52 f1 28 c2-c1 18
Created on 2018-03-15 by the reprex package (v0.2.0).

Related

Finding columns that contain values based on another column

I have the following data frame:
Step 1 2 3
1 5 10 6
2 5 11 5
3 5 13 9
4 5 15 10
5 13 18 10
6 15 20 10
7 17 23 10
8 19 25 10
9 21 27 13
10 23 30 7
I would like to retrieve the columns that satisfy one of the following conditions: if step 1 = step 4 or step 4 = step 8. In this case, column 1 and 3 should be retrieved. Column 1 because the value at Step 1 = value at step 4 (i.e., 5), and for column 3, the value at step 4 = value at step 8 (i.e., 10).
I don't know how to do that in R. Can someone help me please?
You can get the column indices by the following code:
df[1, -1] == df[4, -1] | df[4, -1] == df[8, -1]
# X1 X2 X3
# 1 TRUE FALSE TRUE
# data
df <- structure(list(Step = 1:10, X1 = c(5L, 5L, 5L, 5L, 13L, 15L,
17L, 19L, 21L, 23L), X2 = c(10L, 11L, 13L, 15L, 18L, 20L, 23L,
25L, 27L, 30L), X3 = c(6L, 5L, 9L, 10L, 10L, 10L, 10L, 10L, 13L,
7L)), class = "data.frame", row.names = c(NA, -10L))

Converting a list of vectors and numbers (from replicate) into a data frame

After running the replicate() function [a close relative of lapply()] on some data I ended up with an output that looks like this
myList <- structure(list(c(55L, 13L, 61L, 38L, 24L), 6.6435972422341, c(37L, 1L, 57L, 8L, 40L), 5.68336098665417, c(19L, 10L, 23L, 52L, 60L ),
5.80430476680636, c(39L, 47L, 60L, 14L, 3L), 6.67554407822367,
c(57L, 8L, 53L, 6L, 2L), 5.67149520387856, c(40L, 8L, 21L,
17L, 13L), 5.88446015238962, c(52L, 21L, 22L, 55L, 54L),
6.01685181395007, c(12L, 7L, 1L, 2L, 14L), 6.66299948053721,
c(41L, 46L, 21L, 30L, 6L), 6.67239635545512, c(46L, 31L,
11L, 44L, 32L), 6.44174324641076), .Dim = c(2L, 10L), .Dimnames = list(
c("reps", "score"), NULL))
In this case the vectors of integers are indexes that went into a function that I won't get into and the scalar-floats are scores.
I'd like a data frame that looks like
Index 1 Index 2 Index 3 Index 4 Index 5 Score
55 13 61 38 24 6.64
37 1 57 8 40 5.68
19 10 23 52 60 5.80
and so on.
Alternatively, a matrix of the indexes and an array of the values would be fine too.
Things that haven't worked for me.
data.frame(t(random.out)) # just gives a data frame with a column of vectors and another of scalars
cbind(t(random.out)) # same as above
do.call(rbind, random.out) # intersperses vectors and scalars
I realize other people have similar problems,
eg. Convert list of vectors to data frame
but I can't quite find an example with this particular kind of vectors and scalars together.
myList[1,] is a list of vectors, so you can combine them into a matrix with do.call and rbind. myList[2,] is a list of single scores, so you can combine them into a vector with unlist:
cbind(as.data.frame(do.call(rbind, myList[1,])), Score=unlist(myList[2,]))
# V1 V2 V3 V4 V5 Score
# 1 55 13 61 38 24 6.643597
# 2 37 1 57 8 40 5.683361
# 3 19 10 23 52 60 5.804305
# 4 39 47 60 14 3 6.675544
# 5 57 8 53 6 2 5.671495
# 6 40 8 21 17 13 5.884460
# 7 52 21 22 55 54 6.016852
# 8 12 7 1 2 14 6.662999
# 9 41 46 21 30 6 6.672396
# 10 46 31 11 44 32 6.441743

Handling ties in finding index of n th maximum value in R

I am working on a dataframe and trying to find the index of nth maximum value (n varies by a loop), however, in the columns I have tied values and the program throws an error. Below is a sample dataset. I am basically trying to generate a similar dataframe, but with only the index values of all the values in the column vector of the dataframe.
For the output DF, column 1 in the output DF will have index values of elements of Refer_1, so Output_DF[1,1] will have the index for highest value, while Output_DF[10,1] will have the index of lowest value. Below is the input DF.
Input
1 17
2 21
3 13
4 26
5 204
6 36
7 14
8 25
9 45
10 37
Output (index values)
5
9
10
6
4
8
2
1
7
3
I am currently using which, unlist and partial together to get the indexes, however, I am unable to rectify the error. Note that the ties can occur with any nth maximum value (not necessarily the column maxima).
which(Consolidated_data_new[,i]==unlist(sort(Consolidated_data_new[,i],partial=j)[j]))
Please note that I want the code to return only one value at a time, and handle the 2nd tied value in the next loop iteration.
Please help solve this.
Regards,
library(data.table)
DT<-structure(list(Refer_1 = c(11L, 15L, 7L, 19L, 104L, 24L, 11L,
22L, 39L, 19L), Refer_2 = c(17L, 21L, 13L, 25L, 204L, 36L, 14L,
25L, 45L, 37L)), .Names = c("Refer_1", "Refer_2"), row.names = c(NA,
-10L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000000000130788>)
DT[,lapply(.SD, order,decreasing=TRUE)]
Refer_1 Refer_2
1: 5 5
2: 9 9
3: 6 10
4: 8 6
5: 4 4
6: 10 8
7: 2 2
8: 1 1
9: 7 7
10: 3 3
Your comments suggest you are working with a dataframe that has more than one column and that you want an output dataframe that has the results of order with decreasing=TRUE applied to every column:
> DF[2] <- sample(1:300, 10)
> DF[3] <- sample(1:300, 10)
> DF
Input V2 V3
1 17 210 3
2 21 72 4
3 13 263 1
4 26 249 6
5 204 223 10
6 36 83 7
7 14 107 2
8 25 295 5
9 45 198 9
10 37 112 8
> ordDF <- as.data.frame(lapply(DF, order, decreasing=TRUE))
> names(ordDF) <- paste0("res", 1:length(DF) )
> ordDF
res1 res2 res3
1 5 8 4
2 9 3 9
3 10 4 2
4 6 5 7
5 4 1 10
6 8 9 8
7 2 10 1
8 1 7 6
9 7 6 3
10 3 2 5
> dput(ordDF)
structure(list(res1 = c(5L, 9L, 10L, 6L, 4L, 8L, 2L, 1L, 7L,
3L), res2 = c(8L, 3L, 4L, 5L, 1L, 9L, 10L, 7L, 6L, 2L), res3 = c(4L,
9L, 2L, 7L, 10L, 8L, 1L, 6L, 3L, 5L)), .Names = c("res1", "res2",
"res3"), row.names = c(NA, -10L), class = "data.frame")

Get number of rows with same value in one column and positive binary value in another column

(Sorry for the weird title but I just couldn't think of a short way to put this)
Since I managed to oversimplify my problem in the last question I asked, I'm providing you with the actual problem this time.
The provided dataframe contains the columns "usr" , "usrMsgCnt" and "isRefound" with usr being a name, usrMsgCnt being a number and isRefound being binary.
A new column is to be added whichs value is calculated as follows:
usrMsgCnt/ number of rows where usr is equal to the usr of this line
and isRefound is equal to 1
For the example data's first row the new value would be:
9 / 5 with 5 being produced by
length(data$usr[data$usr=="Jan.Schrader" & data$isRefound==1])
Looping through this is not an option considering the size of the original dataset
Here's the dput of a tiny chunk of the data
structure(list(usr = structure(c(21L, 21L, 21L, 21L, 6L, 5L,
6L, 6L, 6L, 21L, 20L, 21L, 6L, 20L, 21L, 21L, 21L, 6L, 6L, 6L
), .Label = c("alsmith", "Amanda.Coles", "Andrew.Coles", "babsimieth",
"Bernd.Ludwig", "Bernhard.Schiemann", "bfueck", "Bram.Ridder",
"brian.tripney", "carlosgardeazabal", "christine.elsweiler",
"cmfinner", "daniel.goncalves", "david", "de56", "eko.ma", "freundlu",
"gmcphail", "ian.ferguson", "Ian.Ruthven", "Jan.Schrader", "jearmour",
"jyang", "Laura.Schnall", "Marc.Roper", "marek.maleika", "Martin.Hacker",
"martin.scholz", "maziminke", "mclanger", "Michael.Cashmore",
"morgan.harvey", "mrussell", "msherrif", "murray.wood", "Nadine.Mahrholz",
"noam.ascher", "pburns", "Peter.Gregory", "raina", "robertnm",
"ronald.teijeira", "ronaldtf", "sbenus", "starmstr", "steve.neely",
"Sven.Friedemann", "tinchen"), class = "factor"), usrMsgCnt = c(9L,
9L, 9L, 9L, 5L, 0L, 5L, 5L, 5L, 9L, 0L, 9L, 5L, 0L, 9L, 9L, 9L,
37L, 37L, 37L), isRefound = c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L)), .Names = c("usr",
"usrMsgCnt", "isRefound"), row.names = c(NA, 20L), class = "data.frame")
Assuming isRefound is actually binary:
library(data.table)
DT <- data.table(DF,key="usr")
DT[,newvar:=usrMsgCnt/sum(isRefound),by=usr]
Edit: If the order is essential, you should not set the key (which orders the data.table) and create an index variable (for safety).
DT <- data.table(DF)
DT[,id:=.I]
DT[,newvar:=usrMsgCnt/sum(isRefound),by=usr]
print(DT)
# usr usrMsgCnt isRefound id newvar
# 1: Jan.Schrader 9 0 1 1.8
# 2: Jan.Schrader 9 1 2 1.8
# 3: Jan.Schrader 9 1 3 1.8
# 4: Jan.Schrader 9 1 4 1.8
# 5: Bernhard.Schiemann 5 1 5 1.0
# 6: Bernd.Ludwig 0 0 6 NaN
# 7: Bernhard.Schiemann 5 0 7 1.0
# 8: Bernhard.Schiemann 5 1 8 1.0
# 9: Bernhard.Schiemann 5 1 9 1.0
# 10: Jan.Schrader 9 1 10 1.8
# 11: Ian.Ruthven 0 0 11 NaN
# 12: Jan.Schrader 9 0 12 1.8
# 13: Bernhard.Schiemann 5 1 13 1.0
# 14: Ian.Ruthven 0 0 14 NaN
# 15: Jan.Schrader 9 0 15 1.8
# 16: Jan.Schrader 9 0 16 1.8
# 17: Jan.Schrader 9 1 17 1.8
# 18: Bernhard.Schiemann 37 0 18 7.4
# 19: Bernhard.Schiemann 37 1 19 7.4
# 20: Bernhard.Schiemann 37 0 20 7.4
The same conceptual approach can be used with the base R approach and plyr approach demonstrated at your previous question:
within(DF, {
newvar <- usrMsgCnt/ave(isRefound, usr, FUN = sum)
})
library(plyr)
ddply(DF, .(usr), transform,
newvar = usrMsgCnt/sum(isRefound))
However, performance of the data.table package will be superior for huge datasets.

Subset of data with criteria of two columns

I would like to create a subset of data that consists of Units that have a higher score in QTR 4 than QTR 1 (upward trend). Doesn't matter if QTR 2 or 3 are present.
Unit QTR Score
5 4 34
1 1 22
5 3 67
2 4 78
3 2 39
5 2 34
1 2 34
5 1 67
1 3 70
1 4 89
3 4 19
Subset would be:
Unit QTR Score
1 1 22
1 2 34
1 3 70
1 4 89
I've tried variants of something like this:
upward_subset <- subset(mydata,Unit if QTR=4~Score > QTR=1~Score)
Thank you for your time
If the dataframe is named "d", then this succeeds on your test set:
d[ which(d$Unit %in%
(sapply( split(d, d["Unit"]),
function(dd) dd[dd$QTR ==4, "Score"] - dd[dd$QTR ==1, "Score"]) > 0)) ,
]
#-------------
Unit QTR Score
2 1 1 22
7 1 2 34
9 1 3 70
10 1 4 89
An alternative in two steps:
result <- unlist(
by(
test,
test$Unit,
function(x) x$Score[x$QTR==4] > x$Score[x$QTR==2])
)
test[test$Unit %in% names(result[result==TRUE]),]
Unit QTR Score
2 1 1 22
7 1 2 34
9 1 3 70
10 1 4 89
A solution using data.table (Probably there are better versions than what I have at the moment).
Note: Assuming a QTR value for a given Unit is unique
Data:
df <- structure(list(Unit = c(5L, 1L, 5L, 2L, 3L, 5L, 1L, 5L, 1L, 1L,
3L), QTR = c(4L, 1L, 3L, 4L, 2L, 2L, 2L, 1L, 3L, 4L, 4L), Score = c(34L,
22L, 67L, 78L, 39L, 34L, 34L, 67L, 70L, 89L, 19L)), .Names = c("Unit",
"QTR", "Score"), class = "data.frame", row.names = c(NA, -11L
))
Solution:
dt <- data.table(df, key=c("Unit", "QTR"))
dt[, Score[Score[QTR == 4] > Score[QTR == 1]], by=Unit]
Unit V1
1: 1 22
2: 1 34
3: 1 70
4: 1 89

Resources