Testing for multiple identical columns in R - r

Is there a short way to test for identity over multiple columns?
For example, over this input
data=data.table(one=c(1,2,3,4), two=c(7,8,9,10), three=c(1,2,3,4), four=c(1,2,3,4) )
Is there something that would return all the columns that are identical to data$one? Something like
allcolumnsidentity(data$one, data) # compares all columns with respect to data$one
Should return (TRUE, FALSE, TRUE, TRUE) since data$three and data$four are identical to data$one.
I saw the identical() and comapre() commands, but they deal with comparing between two columns. Is there a generalized way to do it?
Best wishes

Here are 3 more possible solutions an a benchmark on a bit bigger data set
n <- 1e6
data=data.table(one=rep(1:4, n),
two=rep(7:10, n),
three=rep(1:4, n),
four=rep(1:4, n))
library(microbenchmark)
microbenchmark(
apply(data, 2, identical, data$one) ,
colSums(data == data$one) == nrow(data),
colSums(as.matrix(data) == data$one) == nrow(data),
data[, lapply(.SD, function(x) sum(x == data$one) == .N)],
data[, lapply(.SD, function(x) identical(x, data$one))]
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# apply(data, 2, identical, data$one) 352.58769 414.846535 457.767582 437.041789 521.895046 643.77981 100
# colSums(data == data$one) == nrow(data) 1264.95548 1315.882084 1335.827386 1326.250976 1346.501505 1466.64232 100
# colSums(as.matrix(data) == data$one) == nrow(data) 110.05474 114.618818 125.116033 121.631323 126.912647 185.69939 100
# data[, lapply(.SD, function(x) sum(x == data$one) == .N)] 75.36791 77.960613 85.599088 79.327108 89.369938 156.03422 100
# data[, lapply(.SD, function(x) identical(x, data$one))] 7.00261 7.448851 8.687903 8.776724 9.491253 15.72188 100
And here are some comparisons in case you have many columns
n <- 1e7
set.seed(123)
data <- data.table(matrix(sample(n, replace = TRUE), ncol = 400))
microbenchmark(
apply(data, 2, identical, data$V1) ,
colSums(data == data$V1) == nrow(data),
colSums(as.matrix(data) == data$V1) == nrow(data),
data[, lapply(.SD, function(x) sum(x == data$V1) == .N)],
data[, lapply(.SD, function(x) identical(x,data$V1))]
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# apply(data, 2, identical, data$V1) 176.65997 185.23895 235.44088 234.60227 253.88658 331.18788 100
# colSums(data == data$V1) == nrow(data) 680.48398 759.82115 786.64634 774.86919 804.91661 987.26456 100
# colSums(as.matrix(data) == data$V1) == nrow(data) 60.62470 62.86181 70.41601 63.75478 65.16708 120.30393 100
# data[, lapply(.SD, function(x) sum(x == data$V1) == .N)] 83.95790 86.72680 90.45487 88.46165 90.04441 142.08614 100
# data[, lapply(.SD, function(x) identical(x, data$V1))] 40.86718 42.65486 45.06100 44.29602 45.49430 91.57465 100

Ok, that was easier than expected :)
Simply used apply like so:
apply(data, 2, identical, data$one)
# returned:
# one two three four
# TRUE FALSE TRUE TRUE

Related

Checking whether a number of vectors exist in a matrix, need speed

I have two large numerical matrices and need to check which rows in one of them exist in the other one (exist as in being equal). This is my code:
myMatrix1 <- rbind(c(1,2,3),c(4,5,6),c(7,8,9))
myMatrix2 <- rbind(c(10,11,12),c(4,5,6),c(13,14,15))
logicalMatrix <- apply(myMatrix1,1,checkForEquality)
result <- apply(logicalMatrix,1,any)
checkForEquality <- function(x){
apply(myMatrix2, 1, innerFcn, oneRow = x)
}
innerFcn <- function(x, oneRow){
isTRUE(all.equal(x, oneRow))
}
result is
[1] FALSE TRUE FALSE
With two 2067*198 matrices this takes 350 seconds on my machine. With CPU-parallelization I guess I could bring it down to around 15 seconds. Unfortunately anything above 1 second is unacceptable. I need some direction. The matrices contain only 0's, 1's and 2's, if that matters.
You can use euclidean distance, which can be expressed as vectorized/linear algebra operations:
dist_xy <- outer(rowSums(x^2), rowSums(y^2), '+') - tcrossprod(x, 2 * y))
Benchmark:
nr = 1e5
nc = 200
x = t(sample(0:2, size = nc, replace = TRUE))
y = matrix(sample(0L:2L, size = nc * nr, replace = TRUE), nrow = nr)
all.equal(apply(y, 1, function(z) identical(z, x)),
drop(outer(rowSums(x^2), rowSums(y^2), '+') == tcrossprod(x, 2 * y)))
# [1] TRUE
microbenchmark::microbenchmark(
A = apply(y, 1, function(z) identical(z, x)),
B = apply(y, 1, function(z) all(z == x)),
C = drop(outer(rowSums(x^2), rowSums(y^2), '+') == tcrossprod(x, 2 * y)),
times = 3
)
Unit: milliseconds
expr min lq mean median uq max neval
A 543.1362 559.9647 585.3627 576.7931 606.4760 636.1589 3
B 609.7400 636.5922 667.3405 663.4445 696.1408 728.8370 3
C 368.2808 416.4194 441.9118 464.5580 478.7273 492.8965 3
This should benefits even more if you have larger matrices.
Calculate the distance matrix:
X <- rbind(c(1,2,3),c(4,5,6),c(7,8,9))
Y <- rbind(c(10,11,12),c(4,5,6),c(13,14,15))
library(pracma)
ind <- which(distmat(X, Y) == 0L, arr.ind = TRUE)
# row col
#[1,] 2 2
X[ind[, 1],]
#[1] 4 5 6
Y[ind[, 2],]
#[1] 4 5 6
If you need to consider floating point accuracy, use this:
ind <- which(abs(distmat(X, Y)) < tol, arr.ind = TRUE)
You can get a roughly 10x speedup by switching away from all.equal. all(x == y) and pracma::distmat, and identical(x, y) are all about 10x faster, with identical(x, y) as the fastest (note: this is for comparisons of length 200, as in your data. For longer vectors, all(x == y) may be faster!).
# sample data
## demo on single row vs matrix comparison
nr = 1e5
nc = 200
x = sample(0:2, size = nc, replace = TRUE)
y = matrix(sample(0L:2L, size = nc * nr, replace = TRUE), nrow = nr)
library(pracma)
microbenchmark::microbenchmark(
apply(y, 1, function(z) identical(z, x)),
apply(y, 1, function(z) all(z == x)),
apply(y, 1, function(z) all.equal(z, x)),
which(distmat(x, y) == 0L, arr.ind = TRUE),
times = 2
)
# Unit: milliseconds
# expr min lq mean median uq max
# apply(y, 1, function(z) identical(z, x)) 619.1912 619.1912 644.3034 644.3034 669.4156 669.4156
# apply(y, 1, function(z) all(z == x)) 759.4982 759.4982 789.4765 789.4765 819.4548 819.4548
# apply(y, 1, function(z) all.equal(z, x)) 7618.6853 7618.6853 7657.7665 7657.7665 7696.8477 7696.8477
# which(distmat(x, y) == 0L, arr.ind = TRUE) 824.8337 824.8337 899.2349 899.2349 973.6360 973.6360
You could obviously go faster with Rcpp, but even doing row-wise comparisons in a for loop will be pretty quick in R (thanks to JIT compilation).
The next spot I'd try to optimize would be early stopping - if you find match, you don't need to check any more comparisons with that row, so you can go on to the next row. You can't do this well with apply, but with a for loop you have greater control.
Subtract b from a and then find take the rowSum of the absolute values in case the positive and negative differences cancel out. The zero sum rows will be identical:
diff <- a - b
idrows <- rowSum(abs(diff))
Just try:
asplit(m2,1) %in% asplit(m1,1)
An example with some medium matrices:
dims<-c(1920,198)
set.seed(4)
m1<-matrix(sample(0:2,prod(dims),TRUE),ncol=dims[2])
m2<-matrix(sample(0:2,prod(dims),TRUE),ncol=dims[2])
#simulate some rows in m2 which are equal to some rows in m1
samind<-sample(nrow(m1),50)
samind2<-sample(nrow(m2),50)
m2[samind2,]<-m1[samind,]
system.time(res<-asplit(m2,1) %in% asplit(m1,1))
# user system elapsed
# 0.407 0.000 0.406
#Check whether the result is correct
identical(which(res),sort(samind2))
#[1] TRUE

Random rearrangement of a vector in R [duplicate]

I want to permute a vector so that an element can't be in the same place after permutation, as it was in the original. Let's say I have a list of elements like this: AABBCCADEF
A valid shuffle would be: BBAADEFCCA
But these would be invalid: BAACFEDCAB or BCABFEDCAB
The closest answer I could find was this: python shuffle such that position will never repeat. But that's not quite what I want, because there are no repeated elements in that example.
I want a fast algorithm that generalizes that answer in the case of repetitions.
MWE:
library(microbenchmark)
set.seed(1)
x <- sample(letters, size=295, replace=T)
terrible_implementation <- function(x) {
xnew <- sample(x)
while(any(x == xnew)) {
xnew <- sample(x)
}
return(xnew)
}
microbenchmark(terrible_implementation(x), times=10)
Unit: milliseconds
expr min lq mean median uq max neval
terrible_implementation(x) 479.5338 2346.002 4738.49 2993.29 4858.254 17005.05 10
Also, how do I determine if a sequence can be permuted in such a way?
EDIT: To make it perfectly clear what I want, the new vector should satisfy the following conditions:
1) all(table(newx) == table(x))
2) all(x != newx)
E.g.:
newx <- terrible_implementation(x)
all(table(newx) == table(x))
[1] TRUE
all(x != newx)
[1] TRUE
#DATA
set.seed(1)
x <- sample(letters, size=295, replace=T)
foo = function(S){
if(max(table(S)) > length(S)/2){
stop("NOT POSSIBLE")
}
U = unique(S)
done_chrs = character(0)
inds = integer(0)
ans = character(0)
while(!identical(sort(done_chrs), sort(U))){
my_chrs = U[!U %in% done_chrs]
next_chr = my_chrs[which.min(sapply(my_chrs, function(x) length(setdiff(which(!S %in% x), inds))))]
x_inds = which(S %in% next_chr)
candidates = setdiff(seq_along(S), union(x_inds, inds))
if (length(candidates) == 1){
new_inds = candidates
}else{
new_inds = sample(candidates, length(x_inds))
}
inds = c(inds, new_inds)
ans[new_inds] = next_chr
done_chrs = c(done_chrs, next_chr)
}
return(ans)
}
ans_foo = foo(x)
identical(sort(ans_foo), sort(x)) & !any(ans_foo == x)
#[1] TRUE
library(microbenchmark)
microbenchmark(foo(x))
#Unit: milliseconds
# expr min lq mean median uq max neval
# foo(x) 19.49833 22.32517 25.65675 24.85059 27.96838 48.61194 100
I think this satisfies all your conditions. The idea is to order by the frequency, start with the most common element and shift the value to the next value in the frequency table by the number of times the most common element appears. This will guarantee all elements will be missed.
I've written in data.table, as it helped me during debugging, without losing too much performance. It's a modest improvement performance-wise.
library(data.table)
library(magrittr)
library(microbenchmark)
permute_avoid_same_position <- function(y) {
DT <- data.table(orig = y)
DT[, orig_order := .I]
count_by_letter <-
DT[, .N, keyby = orig] %>%
.[order(N)] %>%
.[, stable_order := .I] %>%
.[order(-stable_order)] %>%
.[]
out <- copy(DT)[count_by_letter, .(orig, orig_order, N), on = "orig"]
# Dummy element
out[, new := first(y)]
origs <- out[["orig"]]
nrow_out <- nrow(out)
maxN <- count_by_letter[["N"]][1]
out[seq_len(nrow_out) > maxN, new := head(origs, nrow_out - maxN)]
out[seq_len(nrow_out) <= maxN, new := tail(origs, maxN)]
DT[out, j = .(orig_order, orig, new), on = "orig_order"] %>%
.[order(orig_order)] %>%
.[["new"]]
}
set.seed(1)
x <- sample(letters, size=295, replace=T)
testthat::expect_true(all(table(permute_avoid_same_position(x)) == table(x)))
testthat::expect_true(all(x != permute_avoid_same_position(x)))
microbenchmark(permute_avoid_same_position(x), times = 5)
# Unit: milliseconds
# expr min lq mean median uq max
# permute_avoid_same_position(x) 5.650378 5.771753 5.875116 5.788618 5.938604 6.226228
x <- sample(1:1000, replace = TRUE, size = 1e6)
testthat::expect_true(all(table(permute_avoid_same_position(x)) == table(x)))
testthat::expect_true(all(x != permute_avoid_same_position(x)))
microbenchmark(permute_avoid_same_position(x), times = 5)
# Unit: milliseconds
# expr min lq mean median uq max
# permute_avoid_same_position(x) 239.7744 385.4686 401.521 438.2999 440.9746 503.0875
We could extract substrings by the boundary of the repeating elements, sample and replicate
library(stringr)
sapply(replicate(10, sample(str_extract_all(str1, "([[:alpha:]])\\1*")[[1]]),
simplify = FALSE), paste, collapse="")
#[1] "BBAAEFDCCA" "AAAFBBEDCC" "BBAAAEFCCD" "DFACCBBAAE" "AAFCCBBEAD"
#[6] "DAAAECCBBF" "AAFCCDBBEA" "CCEFADBBAA" "BBAAEADCCF" "AACCBBDFAE"
data
str1 <- "AABBCCADEF"

Which lines of a matrix are equal to a certain vector

I have a piece of code searching which lines of a matrix boxes are equal to a given vector x. This codes uses the apply function, and i wonder if it can be optimized more ?
x = floor(runif(4)*10)/10
boxes = as.matrix(do.call(expand.grid, lapply(1:4, function(x) {
seq(0, 1 - 1/10, length = 10)
})))
# can the following line be more optimised ? :
result <- which(sapply(1:nrow(boxes),function(i){all(boxes[i,] == x)}))
I did not manage to get rid of the apply function myself but maybe you'll have better ideas than me :)
One option is which(colSums(t(boxes) == x) == ncol(boxes)).
Vectors are recycled column-wise, so we need to transpose boxes before comparing to x with ==. Then we can pick which column (transposed row) has a sum of ncol(boxes), i.e. all TRUE values.
Here's a benchmark for this (possibly not representative) example
Irnv <- function() which(sapply(1:nrow(boxes),function(i){all(boxes[i,] == x)}))
ICT <- function() which(colSums(t(boxes) == x) == ncol(boxes))
RS <- function() which(rowSums(mapply(function(i, j) boxes[, i] == j, seq_len(ncol(boxes)), x)) == length(x))
RS2 <- function(){
boxes <- data.frame(boxes)
which(rowSums(mapply(`==`, boxes, x)) == length(x))
}
akrun <- function() which(rowSums((boxes == x[col(boxes)])) == ncol(boxes))
microbenchmark(Irnv(), ICT(), RS(), RS2(), akrun())
# Unit: microseconds
# expr min lq mean median uq max neval
# Irnv() 19218.470 20122.2645 24182.2337 21882.8815 24949.1385 66387.719 100
# ICT() 300.308 323.2830 466.0395 342.3595 430.1545 7878.978 100
# RS() 566.564 586.2565 742.4252 617.2315 688.2060 8420.927 100
# RS2() 698.257 772.3090 1017.0427 842.2570 988.9240 9015.799 100
# akrun() 442.667 453.9490 579.9102 473.6415 534.5645 6870.156 100
We can also use rowSums on a replicated 'x' to make the lengths same
which(rowSums((boxes == x[col(boxes)])) == ncol(boxes))
Or use the rep
which(rowSums(boxes == rep(x, each = nrow(boxes))) == ncol(boxes))
Or with sweep and rowSums
which(rowSums(sweep(boxes, 2, x, `==`)) == ncol(boxes))
which(sapply(1:nrow(boxes),function(i){all(boxes[i,] == x)}))
#[1] 5805
A variation to your answer using mapply.
which(rowSums(mapply(function(i, j) boxes[, i] == j, seq_len(ncol(boxes)), x)) == length(x))
#[1] 5805
We can simplify (only reducing the key strokes, see ICT's benchmarks) the above version if boxes is allowed to be dataframe.
boxes <- data.frame(boxes)
which(rowSums(mapply(`==`, boxes, x)) == length(x))
#[1] 5805
Benchmarks on my system for various answers on a fresh R session
Irnv <- function() which(sapply(1:nrow(boxes),function(i){all(boxes[i,] == x)}))
ICT <- function() which(colSums(t(boxes) == x) == ncol(boxes))
RS <- function() which(rowSums(mapply(function(i, j) boxes[, i] == j, seq_len(ncol(boxes)), x)) == length(x))
RS2 <- function(){
boxes <- data.frame(boxes)
which(rowSums(mapply(`==`, boxes, x)) == length(x))
}
akrun <- function() which(rowSums((boxes == x[col(boxes)])) == ncol(boxes))
akrun2 <- function() which(rowSums(boxes == rep(x, each = nrow(boxes))) == ncol(boxes))
akrun3 <- function() which(rowSums(sweep(boxes, 2, x, `==`)) == ncol(boxes))
library(microbenchmark)
microbenchmark(Irnv(), ICT(), RS(), RS2(), akrun(), akrun2(), akrun3())
#Unit: microseconds
# expr min lq mean median uq max neval
#Irnv() 16335.205 16720.8905 18545.0979 17640.7665 18691.234 49036.793 100
#ICT() 195.068 215.4225 444.9047 233.8600 329.288 4635.817 100
#RS() 527.587 577.1160 1344.3033 639.7180 1373.426 36581.216 100
#RS2() 648.996 737.6870 1810.3805 847.9865 1580.952 35263.632 100
#akrun() 384.498 402.1985 761.0542 421.5025 1176.129 4102.214 100
#akrun2() 840.324 853.9825 1415.9330 883.3730 1017.014 34662.084 100
#akrun3() 399.645 459.7685 1186.7605 488.3345 1215.601 38098.927 100
data
set.seed(3251)
x = floor(runif(4)*10)/10
boxes = as.matrix(do.call(expand.grid, lapply(1:4, function(x) {
seq(0, 1 - 1/10, length = 10)
})))

Count number of 1's from right to left, stopping at the first 0

I want to count the number of 1's that occur from RIGHT to LEFT across multiple columns, which stops when encountering the first 0.
Example DF:
df<-data.frame(replicate(7,sample(0:1,30,rep=T)))
colnames(df)<-seq(1950,2010,10)
I've manually entered the desired result here under a new column "condition" as an example:
Thanks in advance for your help,
Cai
Here's a fully vectorized attempt
indx <- rowSums(df) == ncol(df) # Per Jaaps comment
df$condition <- ncol(df) - max.col(-df, ties = "last")
df$condition[indx] <- ncol(df) - 1
This is basically finds the first zero from the right and counts how many columns were before that (which are basically the 1s in a binary data)
EDIT
Had to add handling for the special case when all the rows are ones
df$condition <- apply(df, 1, function(x) {
y <- rev(x)
sum(cumprod(y))
})
[Edit: now works]
Try this
df$condition <- apply(df,1,function(x){x<- rev(x);m <- match(0,x)[1]; if (is.na(m)) sum(x) else sum(x[1:m])})
we're matching the first 0, then summing up until this element.
If there's no zero we sum the full row
Here's a benchmark of all solutions :
library(stringr)
microbenchmark(
Moody_Mudskipper = apply(df,1,function(x){x<- rev(x);m <- match(0,x)[1]; if (is.na(m)) sum(x) else sum(x[1:m])}),
akrun = apply(df, 1, function(x) {x1 <- rle(x)
x2 <- tail(x1$lengths, 1)[tail(x1$values, 1)==1]
if(length(x2)==0) 0 else x2}),
akrun2 = str_count(do.call(paste0, df), "[1]+$"),
roland = apply(df, 1, function(x) {y <- rev(x);sum(y * cumprod(y != 0L))}),
David_Arenburg = ncol(df) - max.col(-df, ties = "last"),
times = 10)
# Unit: microseconds
# expr min lq mean median uq max neval
# Moody_Mudskipper 1437.948 1480.417 1677.1929 1536.159 1597.209 3009.320 10
# akrun 6985.174 7121.078 7718.2696 7691.053 7856.862 9289.146 10
# akrun2 1101.731 1188.793 1290.8971 1226.486 1343.099 1790.091 10
# akrun3 693.315 791.703 830.3507 820.371 884.782 1030.240 10
# roland 1197.995 1270.901 1708.5143 1332.305 1727.802 4568.660 10
# David_Arenburg 2845.459 3060.638 3406.3747 3167.519 3495.950 5408.494 10
# David_Arenburg_corrected 3243.964 3341.644 3757.6330 3384.645 4195.635 4943.099 10
For a bigger example David's solution is indeed the fastest, as said in the chosen solution's comments:
df<-data.frame(replicate(7,sample(0:1,1000,rep=T)))
# Unit: milliseconds
# expr min lq mean median uq max neval
# Moody_Mudskipper 31.324456 32.155089 34.168533 32.827345 33.848560 44.952570 10
# akrun 225.592061 229.055097 238.307506 234.761584 241.266853 271.000470 10
# akrun2 28.779824 29.261499 33.316700 30.118144 38.026145 46.711869 10
# akrun3 14.184466 14.334879 15.528201 14.633227 17.237317 18.763742 10
# roland 27.946005 28.341680 29.328530 28.497224 29.760516 33.692485 10
# David_Arenburg 3.149823 3.282187 3.630118 3.455427 3.727762 5.240031 10
# David_Arenburg_corrected 3.464098 3.534527 4.103335 3.833937 4.187141 6.165159 10
We can loop through the rows, use rle
df$condition <- apply(df, 1, function(x) {x1 <- rle(x)
x2 <- tail(x1$lengths, 1)[tail(x1$values, 1)==1]
if(length(x2)==0) 0 else x2})
Or another option is str_extract
library(stringr)
v1 <- str_extract(do.call(paste0, df), "1+$")
d$condition <- ifelse(is.na(v1), 0, nchar(v1))
Or with a slightly more efficient stringi
library(stringi)
v1 <- stri_count(stri_extract(do.call(paste0, df), regex = "1+$"), regex = ".")
v1[is.na(v1)] <- 0
df$condition <- v1
Or with a more compact option
stri_count(do.call(paste0, df), regex = '(?=1+$)')

which.max ties method in R

which.max and which.min will return the smallest index of the max or min value if there are ties.
Is there a way around this so that the largest index is returned without affecting the efficiency of the function?
max.col has this exact functionality, but I am dealing with a vector not a matrix.
You could do like this:
x<-c(1,2,1,4,3,4)
#identical to which.max, except returns all indices with max
which(x==max(x))
[1] 4 6
z<-which(x==max(x))
z[length(z)]
[1] 6
#or with tail
tail(which(x==max(x)),1)
[1] 6
edit:
Or, you could also use max.col function for vectors like this:
max.col(t(x),"last")
[1] 6
#or
max.col(matrix(x,nrow=1),"last")
[1] 6
edit: Some benchmarking:
x<-sample(1:1000,size=10000,replace=TRUE)
library(microbenchmark)
microbenchmark(which.max(x),{z<-which(x==max(x));z[length(z)]},
tail(which(x==max(x)),1),max.col(matrix(x,nrow=1),"last"),
max.col(t(x),"last"),which.max(rev(x)),times=1000)
Unit: microseconds
expr min lq median uq max neval
which.max(x) 29.390 30.323 30.323 31.256 17550.276 1000
{ z <- which(x == max(x)) z[length(z)] } 40.586 42.452 42.919 44.318 631.178 1000
tail(which(x == max(x)), 1) 57.380 60.646 61.579 64.844 596.657 1000
max.col(matrix(x, nrow = 1), "last") 134.353 138.085 139.485 144.383 710.949 1000
max.col(t(x), "last") 116.159 119.425 121.291 125.956 729.610 1000
which.max(rev(x)) 89.569 91.435 92.368 96.566 746.404 1000
So all methods seem to be slower than the original (which gives wrong result), but z <- which(x == max(x));z[length(z)] seems to be fastest option of these.
You could reverse x
which.max(rev(x))
which.min(rev(x))
The which function has an 'arr.ind' parameter normally set to FALSE but usefully set to TRUE in this case:
x <- sample(1:20, 50, repl=TRUE)
> which(x==max(x), arr.ind=TRUE)
[1] 11 23
> tail(which(x==max(x), arr.ind=TRUE) , 1)
[1] 23
Using the arr.ind argument is particularly useful with matrix or array structures, but it does work with atomic vectors as well.
To expand on Jouni's answer, you could instead use max on the result of which:
x <- c(1, 2, 1, 4, 3, 4)
which(x == max(x))
[1] 4 6
max(which(x == max(x)))
[1] 6
Benchmarking:
x <- sample(1:1000, size = 10000, replace = TRUE)
library(microbenchmark)
microbenchmark(which.max(x), {z <- which(x == max(x)); z[length(z)]},
tail(which(x == max(x)), 1), max.col(matrix(x, nrow = 1), "last"),
max.col(t(x), "last"), which.max(rev(x)), max(which(x == max(x))), times = 1000)
Unit: microseconds
expr min lq mean median uq max neval
which.max(x) 6.322 6.717 7.171838 7.112 7.112 40.297 1000
{ z <- which(x == max(x)) z[length(z)] } 27.260 28.445 37.126964 28.840 29.630 2276.346 1000
tail(which(x == max(x)), 1) 35.952 37.927 45.198484 38.718 40.298 1005.038 1000
max.col(matrix(x, nrow = 1), "last") 160.791 162.766 181.698171 163.557 169.087 1688.494 1000
max.col(t(x), "last") 84.149 86.124 100.249921 86.915 89.680 1230.618 1000
which.max(rev(x)) 53.729 55.310 69.442985 56.100 57.680 1076.149 1000
max(which(x == max(x))) 26.865 27.655 35.552256 28.050 28.841 1029.137 1000

Resources