Changing vector elements according to if condition - r

I have pos as matrix of array indices that has 24 rows and 2 columns. In first column it contains the values 1,2,3,4.
$position
row col
[1,] 4 6
[2,] 1 6
[3,] 4 5
[4,] 2 6
[5,] 1 5
[6,] 3 6
[7,] 4 4
[8,] 2 5
[9,] 1 4
[10,] 3 5
[11,] 2 4
[12,] 4 3
[13,] 1 3
[14,] 3 4
[15,] 2 3
[16,] 4 2
[17,] 3 3
[18,] 1 2
[19,] 2 2
[20,] 3 2
[21,] 4 1
[22,] 1 1
[23,] 2 1
[24,] 3 1
I tried the code
ch<-c(5,7,10,5)
C<-150
s<-c(1,1,1,1); s
cost<-sum(ch*s)
repeat
{
for(i in 1:24)
{
for (j in 1:4)
{
if (pos[i,1]==j) s[j]<-s[j]+1 else s
}
if (cost<C)
{
break
}
}
}
s
Here s returns s=c(1,1,1,4280236) but the result should be s=c(5,6,6,5)

pos <- structure(c(4L, 1L, 4L, 2L, 1L, 3L, 4L, 2L, 1L, 3L, 2L, 4L, 1L,
3L, 2L, 4L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 6L, 6L, 5L, 6L, 5L,
6L, 4L, 5L, 4L, 5L, 4L, 3L, 3L, 4L, 3L, 2L, 3L, 2L, 2L, 2L, 1L,
1L, 1L, 1L), .Dim = c(24L, 2L), .Dimnames = list(NULL, c("row", "col")))
ch <- c(5,7,10,5)
C <- 150
s <- c(1,1,1,1)
for (i in 24:1) {
# for(j in 1:4)
# {
# if (pos[i,1]==j) s[j] <- s[j]+1
# }
j <- pos[i,1]; s[j] <- s[j]+1
cost <- sum(ch*s)
if (cost>=C) break
}
s; cost
As a variant one can run through the first column of the matrix pos
for (j in pos[24:1, "row"]) {
s[j] <- s[j]+1
cost <- sum(ch*s)
if (cost>=C) break
}
s; cost

Related

Efficient way to calculate percentage of a specific value in a specific time

I have a csv file like these: this csv filled is called df_plane in R
Situation
flight_uses
People-ID
1
1
1
2
1
1
3
0
1
1
1
2
2
1
2
3
1
2
1
1
3
2
0
3
3
1
3
1
1
4
2
1
4
3
0
4
1
1
5
2
0
5
3
0
5
1
1
6
2
1
6
3
NA
6
1
NA
7
2
1
7
3
1
7
1
1
8
2
0
8
3
0
8
1
NA
9
2
NA
9
3
1
9
1
1
10
2
1
10
3
0
10
1
0
11
2
0
11
3
0
11
I would like to find out what percentage of people uses airplane in situation 2. I would like to know if there is a more efficient way than use the code below. Because with the below code I have to calculate it manually.
table(select(df_plane,situation,flight_uses))
You can use functions from the janitor package.
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
df_plane <- tibble::tribble(
~Situation, ~flight_uses, ~`People-ID`,
1L, 1L, 1L,
2L, 1L, 1L,
3L, 0L, 1L,
1L, 1L, 2L,
2L, 1L, 2L,
3L, 1L, 2L,
1L, 1L, 3L,
2L, 0L, 3L,
3L, 1L, 3L,
1L, 1L, 4L,
2L, 1L, 4L,
3L, 0L, 4L,
1L, 1L, 5L,
2L, 0L, 5L,
3L, 0L, 5L,
1L, 1L, 6L,
2L, 1L, 6L,
3L, NA, 6L,
1L, NA, 7L,
2L, 1L, 7L,
3L, 1L, 7L,
1L, 1L, 8L,
2L, 0L, 8L,
3L, 0L, 8L,
1L, NA, 9L,
2L, NA, 9L,
3L, 1L, 9L,
1L, 1L, 10L,
2L, 1L, 10L,
3L, 0L, 10L,
1L, 0L, 11L,
2L, 0L, 11L,
3L, 0L, 11L
) |>
clean_names()
df_plane |>
tabyl(situation, flight_uses) |>
adorn_percentages() |>
adorn_pct_formatting()
#> situation 0 1 NA_
#> 1 9.1% 72.7% 18.2%
#> 2 36.4% 54.5% 9.1%
#> 3 54.5% 36.4% 9.1%
Created on 2022-10-26 with reprex v2.0.2
In Situation 2, 54.5% of passengers uses airplane.
You can use mean to calculate the proportion
> with(df_plane,mean(replace(flight_uses, is.na(flight_uses), 0)[Situation==2]))
[1] 0.5454545
Are you asking, of those rows where Situation==2, what is the percent where flight_uses==1?
dplyr approach
dplyr is useful for these types of manipulations:
library(dplyr)
df_plane |>
filter(Situation == 2) |>
summarise(
percent_using_plane = sum(flight_uses==1, na.rm=T) / n() * 100
)
# percent_using_plane
# 1 54.54545
base R
If you want to stick with the base R table syntax (which seems fine in this case but can become unwieldy once calculations get more complicated), you were nearly there:
table(df_plane[df_plane$Situation==2,]$flight_uses) / nrow(df_plane[df_plane$Situation==2,])*100
# 0 1
# 36.36364 54.54545
Use with instead of dplyr::select and wrap it in proportions.
proportions(with(df_plane, table(flight_uses, Situation, useNA='ifany')), 2)
# Situation
# flight_uses 1 2 3
# 0 0.09090909 0.36363636 0.54545455
# 1 0.72727273 0.54545455 0.36363636
# <NA> 0.18181818 0.09090909 0.09090909

Finding the maximum square sub-matrix with all equal elements

Does anybody know how it could be possible to subset the maximum K such that K x K is a submatrix with all identical elements, i.e., all the elements in this submatrix must be the same from a given a N x N matrix?
I found many examples in other programming languages except R. I also prefer dplyr if you know.
There is a link to the solution with other languages:
https://www.geeksforgeeks.org/maximum-size-sub-matrix-with-all-1s-in-a-binary-matrix/
But this link provides a special case when all identical elements are next to each other. It retrieves a maximum block of the same elements, not a submatrix in general. I do not want to limit subsetting with this condition.
Here is a base R implementation to make it.
If you want to search the maximum square sub-matrix within a non-square matrix, you can try the code below:
r <- list()
for (w in rev(seq(min(dim(M))))) {
for (rs in seq(nrow(M)-w+1)) {
for (cs in seq(ncol(M)-w+1)) {
mat <- M[rs-1+(1:w),cs-1+(1:w)]
u <- unique(c(mat))
if (all(u!=0) &length(u)==1) r[[length(r)+1]] <- mat
}
}
if (length(r)>0) break
}
such that
> r
[[1]]
[,1] [,2]
[1,] 3 3
[2,] 3 3
[[2]]
[,1] [,2]
[1,] 2 2
[2,] 2 2
[[3]]
[,1] [,2]
[1,] 3 3
[2,] 3 3
[[4]]
[,1] [,2]
[1,] 2 2
[2,] 2 2
[[5]]
[,1] [,2]
[1,] 1 1
[2,] 1 1
[[6]]
[,1] [,2]
[1,] 1 1
[2,] 1 1
[[7]]
[,1] [,2]
[1,] 3 3
[2,] 3 3
[[8]]
[,1] [,2]
[1,] 3 3
[2,] 3 3
DATA
M <- structure(c(1L, 3L, 1L, 2L, 1L, 3L, 3L, 2L, 2L, 3L, 3L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 1L, 3L, 2L, 1L,
1L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 3L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L,
3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L, 1L, 3L, 3L, 2L, 3L, 2L, 1L,
2L, 1L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 2L, 2L,
1L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 3L), .Dim = c(15L, 10L))
> M
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1 2 2 1 1 3 2 2 1 3
[2,] 3 2 1 3 3 1 2 3 1 3
[3,] 1 2 3 2 3 1 2 2 2 1
[4,] 2 3 1 2 2 2 3 1 2 1
[5,] 1 1 3 3 3 1 2 2 2 2
[6,] 3 3 2 3 3 1 2 1 1 2
[7,] 3 1 2 2 2 1 3 3 1 1
[8,] 2 1 2 2 3 1 3 3 1 2
[9,] 2 1 2 2 3 3 3 1 2 3
[10,] 3 1 3 2 1 2 1 2 1 3
[11,] 3 2 2 1 1 1 2 1 3 3
[12,] 1 1 1 2 1 1 2 3 2 3
[13,] 1 1 3 2 1 3 1 2 3 3
[14,] 1 2 2 2 3 3 3 3 3 1
[15,] 2 2 1 2 2 3 3 3 2 3
EDIT
The approach above is inefficient when with large matrix since all combinations checked. The method below is a R implementation of algorithm stated in https://www.geeksforgeeks.org/maximum-size-sub-matrix-with-all-1s-in-a-binary-matrix/, which is is far more efficient.
M <- unname(as.matrix(read.csv(file = "test2.csv")))
S <- matrix(0,nrow = nrow(M),ncol = ncol(M))
S[,1] <- M[,1]
for (i in 1:nrow(S)) {
for (j in 2:ncol(S)) {
if (M[i,j]==1) {
if (i==1) {
S[i,j] <- M[i,j]
} else {
S[i,j] <- min(c(S[i,j-1],S[i-1,j],S[i-1,j-1]))+1
}
}
}
}
inds <- which(S == max(S),arr.ind = TRUE)
w <- seq(max(S))-1
res <- lapply(seq(nrow(inds)), function(k) M[inds[k,"row"]-w,inds[k,"col"]-w])
I found the following answer to this question using dplyr:
M1 <- M %>% data.frame %>% mutate(sumVar = rowSums(.)) %>%
arrange(desc(sumVar)) %>% dplyr::select(-sumVar)
M2 <- M1 %>% as.matrix %>% t %>% data.frame %>%
mutate(sumVar = rowSums(.)) %>% arrange(desc(sumVar)) %>%
dplyr::select(-sumVar) %>% as.matrix %>% t %>% data.frame %>%
arrange_all(funs(desc(.)))
i <- 1
j <- 1
while(sum(M2[1:i,1:j]) == i*j){
i <- i+1
j <- j+1
M3 <- M2[1:i-1,1:j-1]
}
This is a toy data as #ThomasIsCoding proposed:
M <- structure(c(1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Dim = c(5L,
5L))
and this is the result:
> M
[,1] [,2] [,3] [,4] [,5]
[1,] 1 1 1 0 1
[2,] 1 1 1 1 1
[3,] 1 1 1 1 1
[4,] 1 1 1 1 1
[5,] 0 1 1 1 1
> M1
X1 X2 X3 X4 X5
1 1 1 1 1 1
2 1 1 1 1 1
3 1 1 1 1 1
4 1 1 1 0 1
5 0 1 1 1 1
> M2
X1 X2 X3 X4 X5
1 1 1 1 1 1
2 1 1 1 1 1
3 1 1 1 1 1
4 1 1 1 1 0
5 1 1 1 0 1
> M3
X1 X2 X3 X4
1 1 1 1 1
2 1 1 1 1
3 1 1 1 1
4 1 1 1 1
Note that some more functions should be added to keep the variable names and find them after using arrange!

Dataframe column to matrix by two other columns

I have a dataframe
df<-data.frame(i=rep(1:3,3),j=sort(rep(1:3,3)),v=sample(1:9,9))
df
i j v
1 1 1 3
2 2 1 1
3 3 1 9
4 1 2 8
5 2 2 5
6 3 2 4
7 1 3 7
8 2 3 2
9 3 3 6
that I want to transform to matrix M such that
M[i,j]<-df$v[which(df$i==i & df$j==j)]
is there an easy way to do that?
Based on your description, you can just do,
matrix(df$v, ncol = max(df$j))
# [,1] [,2] [,3]
#[1,] 2 4 7
#[2,] 3 1 5
#[3,] 8 6 9
Data Used:
dput(df)
structure(list(i = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), j = c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), v = c(2L, 3L, 8L, 4L, 1L, 6L,
7L, 5L, 9L)), class = "data.frame", row.names = c(NA, -9L))

How many rows in a matrix do not contain a zero?

This is my matrix in R:
[,1] [,2] [,3]
[1,] 5 0 0
[2,] 0 0 5
[3,] 0 2 3
[4,] 1 2 2
[5,] 5 0 0
[6,] 1 4 0
[7,] 4 1 0
[8,] 0 0 5
[9,] 1 2 2
[10,] 3 2 0
[11,] 4 0 1
mat <- structure(c(5L, 0L, 0L, 1L, 5L, 1L, 4L, 0L, 1L, 3L, 4L, 0L, 0L,
2L, 2L, 0L, 4L, 1L, 0L, 2L, 2L, 0L, 0L, 5L, 3L, 2L, 0L, 0L, 0L,
5L, 2L, 0L, 1L), .Dim = c(11L, 3L))
I need find how many rows do not contain a zero, in this case the answer is 2:
the 4th row (1,2,2)
the 9th row, also(1,2,2)
Is there is a command for this or should I make a routine? I tried with two for() loops, but it's bad.
quick answer:
sum( 0 < apply(mat,1,prod) )
also:
nonzerorows <- 0 < apply(mat,1,prod) # logical selector of rows
mat[ nonzerorows, ]
mat[!nonzerorows, ]
which(nonzerorows)
sum(nonzerorows)
OP's data:
mat <- structure(c(5L, 0L, 0L, 1L, 5L, 1L, 4L, 0L, 1L, 3L, 4L, 0L, 0L,
2L, 2L, 0L, 4L, 1L, 0L, 2L, 2L, 0L, 0L, 5L, 3L, 2L, 0L, 0L, 0L,
5L, 2L, 0L, 1L), .Dim = c(11L, 3L))
mat <- matrix(sample(0:4, 16, replace=T), 4, 4)
mat
# [,1] [,2] [,3] [,4]
# [1,] 4 1 2 2
# [2,] 3 3 1 1
# [3,] 1 2 4 4
# [4,] 0 4 4 4
apply(mat, 1, function(x) all(x!=0))
# [1] TRUE TRUE TRUE FALSE
which(apply(mat, 1, function(x) all(x!=0)))
# [1] 1 2 3
A more general approach, e.g. when you're concerned about other numbers or elements could be this:
sum(apply(mat,1,function(x) {0 %in% x == F}))

multi-conditional statement by group

I've got a simple dataset.
structure(list(ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 4L,
4L, 5L, 5L), Primrely = c(0L, 2L, 1L, 1L, 1L, 1L, 3L, 4L, 4L,
3L, 1L, 2L, 2L), Primset = c(-4L, -3L, 1L, 2L, -4L, 5L, 3L, 1L,
2L, -4L, -2L, -3L, 3L), Primvalue = c(45L, 5L, 6L, 15L, 53L,
45L, 44L, 65L, 1L, 5L, 1L, 12L, 5L), Secrely = c(5L, 7L, 2L,
1L, 2L, 0L, 4L, 5L, 1L, 1L, 1L, 0L, 2L), Secset = c(-3L, 1L,
2L, -2L, -3L, 2L, 5L, 7L, 7L, 4L, 3L, 2L, 1L), Secvalue = c(38L,
-2L, -1L, 8L, 46L, 38L, 37L, 58L, -6L, -2L, -6L, 5L, -2L), Desired = structure(c(NA,
1L, NA, NA, 2L, 2L, NA, NA, NA, NA, NA, 1L, 1L), .Label = c("Primary",
"Secondary"), class = "factor")), .Names = c("ID", "Primrely",
"Primset", "Primvalue", "Secrely", "Secset", "Secvalue", "Desired"
), class = "data.frame", row.names = c(NA, -13L))
ID Primrely Primset Primvalue Secrely Secset Secvalue Desired
1 1 0 -4 45 5 -3 38 <NA>
2 1 2 -3 5 7 1 -2 Primary
3 1 1 1 6 2 2 -1 <NA>
4 1 1 2 15 1 -2 8 <NA>
5 2 1 -4 53 2 -3 46 Secondary
6 2 1 5 45 0 2 38 Secondary
7 2 3 3 44 4 5 37 <NA>
8 3 4 1 65 5 7 58 <NA>
9 4 4 2 1 1 7 -6 <NA>
10 4 3 -4 5 1 4 -2 <NA>
11 4 1 -2 1 1 3 -6 <NA>
12 5 2 -3 12 0 2 5 Primary
13 5 2 3 5 2 1 -2 Primary
For each ID, I'd like to select rows that meet the criteria (Prim = primary, Sec = secondary): If Primrely is 0 or 2 and Primset is -3:3, select all rows for each ID. If no rows for a given ID meet the primary criteria, select rows that meet the secondary criteria (Secrely is 0 or 2 and Secset is -3:3). Ideally, I'd like to add a column (Desired) that indicate which criteria was met (primary/secondary/NA).
I've been working with ifelse and if else functions without much luck mainly because I don't know how to command R to ingore a given ID if the primary criteria was already met (eg ID #1 meets the second criteria but doesn't need it because it already met the first criteria). In other words, if a 'primary' shows up in a given ID, it trumps all the 'secondary' criteria that were met. I would appreciate any advice.
If I understand you correctly now:
(left in the steps to show you what I was doing, you can remove them and/or do this all in one step if you want)
dat <- structure(list(ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 4L,
4L, 5L, 5L), Primrely = c(0L, 2L, 1L, 1L, 1L, 1L, 3L, 4L, 4L,
3L, 1L, 2L, 2L), Primset = c(-4L, -3L, 1L, 2L, -4L, 5L, 3L, 1L,
2L, -4L, -2L, -3L, 3L), Primvalue = c(45L, 5L, 6L, 15L, 53L,
45L, 44L, 65L, 1L, 5L, 1L, 12L, 5L), Secrely = c(5L, 7L, 2L,
1L, 2L, 0L, 4L, 5L, 1L, 1L, 1L, 0L, 2L), Secset = c(-3L, 1L,
2L, -2L, -3L, 2L, 5L, 7L, 7L, 4L, 3L, 2L, 1L), Secvalue = c(38L,
-2L, -1L, 8L, 46L, 38L, 37L, 58L, -6L, -2L, -6L, 5L, -2L), Desired = structure(c(NA,
1L, NA, NA, 2L, 2L, NA, NA, NA, NA, NA, 1L, 1L), .Label = c("Primary",
"Secondary"), class = "factor")), .Names = c("ID", "Primrely",
"Primset", "Primvalue", "Secrely", "Secset", "Secvalue", "Desired"
), class = "data.frame", row.names = c(NA, -13L))
within(dat, {
Desired_step1 <- ifelse(Primrely %in% c(0,2) & Primset %in% -3:3,
1, ifelse(Secrely %in% c(0,2) & Secset %in% -3:3,
2, 3))
Desired_new <- factor(ave(Desired_step1, ID, FUN = function(x)
ifelse(x == min(x), x, NA)),
levels = 1:3, labels = c('Primary', 'Secondary', 'NA'))
Desired_step1 <- c('1'='Primary','2'='Secondary','3'=NA)[Desired_step1]
})
# ID Primrely Primset Primvalue Secrely Secset Secvalue Desired Desired_new Desired_step1
# 1 1 0 -4 45 5 -3 38 <NA> <NA> <NA>
# 2 1 2 -3 5 7 1 -2 Primary Primary Primary
# 3 1 1 1 6 2 2 -1 <NA> <NA> Secondary
# 4 1 1 2 15 1 -2 8 <NA> <NA> <NA>
# 5 2 1 -4 53 2 -3 46 Secondary Secondary Secondary
# 6 2 1 5 45 0 2 38 Secondary Secondary Secondary
# 7 2 3 3 44 4 5 37 <NA> <NA> <NA>
# 8 3 4 1 65 5 7 58 <NA> NA <NA>
# 9 4 4 2 1 1 7 -6 <NA> NA <NA>
# 10 4 3 -4 5 1 4 -2 <NA> NA <NA>
# 11 4 1 -2 1 1 3 -6 <NA> NA <NA>
# 12 5 2 -3 12 0 2 5 Primary Primary Primary
# 13 5 2 3 5 2 1 -2 Primary Primary Primary
Here's my quick & dirty solution assuming your data.frame is named df. You can refine it yourself I think:
df$Desired <- ifelse((df$Primrely==0 | df$Primrely==2) & (df$Primset >= -3 & df$Primset <= 3),
"Primary",
NA)
idx <- is.na(df$Desired)
df$Desired[idx] <- ifelse((df$Secrely[idx]==0 | df$Secrely[idx]==2) & (df$Secset[idx] >= -3 & df$Secset[idx] <= 3),
"Secondary",
NA)

Resources