Loop through rows and variables with same prefix - r

Suppose we have the following data:
d <- data.frame(
"V" = c("A", "B"),
"X1" = c("A", "A"),
"X2" = c("B","B"),
"X3" = c("C", "C"),
"Y1" = c(1, 4),
"Y2" = c(2, 5),
"Y3" = c(3, 6)
)
d[] <- lapply(d, as.character)
d
V X1 X2 X3 Y1 Y2 Y3
1 A A B C 1 2 3
2 B A B C 4 5 6
I want to create a variable VAL that will take the value of Y[n] if V=X[n]
I can do it with ifelse statements but I want to avoid nested ifelse because n is unknown
d$VAL_ifelse = ifelse(d$V == d$X1,d$Y1,
ifelse(d$V == d$X2,d$Y2,
ifelse(d$V == d$X3,d$Y3,NA)))
I tried to create this loop but problem is with j I think ?
d_X_var=grep("^X", names(d), value=TRUE)
for(i in 1:nrow(d)){
for(j in 1:length(d_X_var)){
if((d[i,c('V')] == d[i,paste0('X',j)]) == TRUE){
d$VAL_loop[i] <- as.character(d[i,paste0('Y',j)])
} else if((d[i,c('V')] != d[i,paste0('X',j)]) == TRUE){
d$VAL_loop[i] <- NA
}
}
}
d
V X1 X2 X3 Y1 Y2 Y3 VAL_ifelse VAL_loop
1 A A B C 1 2 3 1 <NA>
2 B A B C 4 5 6 5 <NA>

We can use vectorized way to get VAL
d$Val <- d[5:7][which(d[2:4] == d$V, arr.ind = TRUE)]
d
# V X1 X2 X3 Y1 Y2 Y3 Val
#1 A A B C 1 2 3 1
#2 B A B C 4 5 6 5
The above is true when you know the column numbers beforehand of X and Y columns. If you don't know we can use grep first to get column numbers and then subset.
X_cols <- grep("^X", names(d))
Y_cols <- grep("^Y", names(d))
d$Val <- d[Y_cols][which(d[X_cols] == d$V, arr.ind = TRUE)]

We can use max.col from base Rin a vectorized way
d$Val <- d[5:7][cbind(seq_len(nrow(d)), max.col(d$V == d[2:4], 'first'))]
d
# V X1 X2 X3 Y1 Y2 Y3 Val
#1 A A B C 1 2 3 1
#2 B A B C 4 5 6 5
Update
If there are no matches we can get the output as NA with rowSums (data from the comments)
d <- data.frame( "V" = c("A", "B","C","D","C"), "X1" = c("A", "A","A","A","A"), "X2" = c("B","B","B","B","A"), "X3" = c("C", "C","C","D","A"), "Y1" = c(1, 4, 7, 10, 13), "Y2" = c(2, 5, 8, 11, 14), "Y3" = c(3, 6, 9, 12,15), "Val_expected" = c(1,5,9,12,NA) )
d[,] <- lapply(d, as.character)
d$Val <- d[5:7][cbind(seq_len(nrow(d)), max.col(d$V == d[2:4], 'first'))]
d$Val <- as.numeric(d$Val) * (NA^ !rowSums(d$V == d[2:4]))
d$Val
#[1] 1 5 9 12 NA

Here is a slightly convoluted way using ifelse and diag:
d$Val <- ifelse(d$V == diag(as.matrix(d[,2:4])), diag(as.matrix(d[,5:7])), NA)
Output:
V X1 X2 X3 Y1 Y2 Y3 Val
1 A A B C 1 2 3 1
2 B A B C 4 5 6 5

Related

How to create a matrix of data migration?

Suppose we start with this dataframe, and R-code that generates it immediately below:
> data
ID Period Values Flags
1 1 1 5 X0
2 1 2 10 X1
3 1 3 15 X2
4 1 4 20 X3
5 2 1 0 X0
6 2 2 2 X2
7 2 3 4 XO
8 2 4 6 X1
9 3 1 3 XO
10 3 2 6 XO
11 3 3 9 X2
12 3 4 12 XO
data <-
data.frame(
ID = c(1,1,1,1,2,2,2,2,3,3,3,3),
Period = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
Values = c(5, 10, 15, 20, 0, 2, 4, 6, 3, 6, 9, 12),
Flags = c("X0","X1","X2","X3","X0","X2","XO", "X1", "XO","XO","X2","XO")
)
I am trying to generate code that shows the migration of the number of ID's (and Values by number of ID's) from one "Flag" category to the next, based on the 2 periods input by the user. So for example, if the user inputs period 1 as the "from" period and period 4 as the "to" period, we'd get the migration tables as shown in the image at the bottom. I also include 2/3 from/to on the right side of the image, for sake of illustration.
I've typically done this sort of analysis in Excel, a cumbersome multi-step process, and am now trying it out in R.
Any suggestions for coding this?
Here are functions to create the two required tables.
Note that you had a problem with X0 and XO in your mock data set as r2evans already suggested (I've transformed everything to X0).
data <-
data.frame(
ID = c(1,1,1,1,2,2,2,2,3,3,3,3),
Period = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
Values = c(5, 10, 15, 20, 0, 2, 4, 6, 3, 6, 9, 12),
Flags = c("X0","X1","X2","X3","X0","X2","X0", "X1", "X0","X0","X2","X0")
)
generateTable <- function(data){
df <- data.frame(matrix(NA, ncol=length(unique(data$Flags)), nrow=length(unique(data$Flags))))
row.names(df) <- unique(data$Flags)
names(df) <- unique(data$Flags)
return(df)
}
numbers2migrate <- function(data, from=1, to=4){
df <- generateTable(data)
for (i in unique(data$ID)){
id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
column <- which(names(df) == id_from)
row <- which(row.names(df) == id_to)
df[row, column] <- ifelse(is.na(df[row, column]), 1, df[row, column] + 1)
}
return(df)
}
values2migrate <- function(data, from=1, to=4){
df <- generateTable(data)
for (i in unique(data$ID)){
id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
column <- which(names(df) == id_from)
row <- which(row.names(df) == id_to)
val <- (data$Values[(data$ID == i & data$Period == from)])
df[row, column] <- val
}
return(df)
}
> numbers2migrate(data, from=1, to=4)
X0 X1 X2 X3
X0 1 NA NA NA
X1 1 NA NA NA
X2 NA NA NA NA
X3 1 NA NA NA
> values2migrate(data,1,4)
X0 X1 X2 X3
X0 3 NA NA NA
X1 0 NA NA NA
X2 NA NA NA NA
X3 5 NA NA NA

if else: else portion not returning output

I am currently trying to cycle through a dataframe of integers and characters and change one value of each row, conditionally. For all rows that do not meet the conditions I would just like to add them back into a new dataframe filled with the modified rows.
I've done this before with no trouble, but I feel as though I have been staring at this too long without any enlightenment.
a<-data.frame(cbind(1,'a',2,'c',3,'d'), stringsAsFactors = F)
b<-data.frame(cbind(1,'a',2,'c',3,'g'), stringsAsFactors = F)
c<-data.frame(cbind(1,'f',4,'g',5,'h'), stringsAsFactors = F)
x<-rbind(a,b,c)
fun<-function(x){
fin<-NULL
for(i in 1:nrow(x)){
v<-x[i+1,]
if ((x[i,1]== v[i,1]) & (x[i,2]==v[i,2]) ){
x[i,3]<-"f"
fin<-rbind(fin, x[i,])
}else {fin<-rbind(fin, x[i,]) }
return(fin)
}
}
fun(x)
X1 X2 X3 X4 X5 X6
1 1 a f c 3 d
>
The result I desire:
X1 X2 X3 X4 X5 X6
1 1 a f c 3 d
1 1 a 2 c 3 g
1 1 f 4 g 5 h
Or an alternative:
library(dplyr)
library(magrittr)
> z <- x %>% mutate(match = ifelse(( (lead(X1)==X1) & (lead(X2)==X2)),"YES","NO"))
> z %>% mutate(X3 = replace(X3, match=="YES", "f"))
X1 X2 X3 X4 X5 X6 match
1 1 a f c 3 d YES
2 1 a 2 c 3 g NO
3 1 f 4 g 5 h <NA>

R replacing a column from a data frame with a row from another data frame

I want to replace the first column of A with the first row of B. For example:
A <- data.frame(matrix("a", 4, 4), stringsAsFactors = FALSE)
> A
X1 X2 X3 X4
1 a a a a
2 a a a a
3 a a a a
4 a a a a
B <- data.frame(matrix("b", 4, 4), stringsAsFactors = FALSE)
> B
X1 X2 X3 X4
1 b b b b < Take this row
2 b b b b
3 b b b b
4 b b b b
I want A to become:
> A
X1 X2 X3 X4
1 b a a a
2 b a a a
3 b a a a
4 b a a a
^
replace it with this column
I tried:
A[, 1] = B[1, ]
But I get the following warning message:
In `[<-.data.frame`(`*tmp*`, , 1, value = list(X1 = "b", X2 = "b", :
provided 4 variables to replace 1 variables
By default, R does not drop the dimension when there is just one row left (while it does when there is just one column).
From ?extract.data.frame:
drop: logical. If TRUE the result is coerced to the lowest possible dimension. The default is to drop if only one column is left, but not to drop if only one row is left.
You can see that doing:
A[, 1]
# [1] "a" "a" "a" "a"
The result is a vector
and
B[1, ]
# X1 X2 X3 X4
#1 b b b b
the result is still a data.frame
You need to unlist the result:
A[, 1] = unlist(B[1, ])
A
# X1 X2 X3 X4
#1 b a a a
#2 b a a a
#3 b a a a
#4 b a a a
This should also work, without changing row / col names:
A[, 1] = t(B)[,1]
This should do it
A[, 1] = t(B[1, ])

Join matrices by both colnames and rownames in R

I would like to join matrices by both colnames and rownames in R:
m1 = matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3, byrow = TRUE,
dimnames = list(c("r1", "r2"),
c("a", "b", "c")))
m2 = matrix(c(4, 5, 0, 2,3,4), nrow = 2, ncol = 3, byrow = TRUE,
dimnames = list(c("r2", "r3"),
c("d", "b", "c")))
Check m1:
> m1
a b c
r1 1 2 3
r2 11 12 13
Check m2:
> m2
d b c
r2 4 5 0
r3 2 3 4
I want to get m3 which looks like this:
> m3
a b c d
r1 1 2 3 0
r2 11 17 13 4
r3 0 3 4 2
I did't find an elegant way to do so. Using the rbind.fill.matrix function in package plyr, I can indirectly get m3.
require(plyr)
m3 = rbind.fill.matrix(m1, m2)
rownames(m3) = c(rownames(m1), rownames(m2))
m3[is.na(m3)]=0 # replace na with zero
m3 = t(sapply(by(m3,rownames(m3),colSums),identity)) # aggregate matrix by rownames
I guess there must be some better ways to do so. What's your suggestion?
The following seems valid:
tmp = rbind(as.data.frame(as.table(m1)), as.data.frame(as.table(m2)))
#tmp = aggregate(Freq ~ Var1 + Var2, tmp, sum) #unnecessary
xtabs(Freq ~ Var1 + Var2, tmp)
# Var2
#Var1 a b c d
# r1 1 2 3 0
# r2 11 17 13 4
# r3 0 3 4 2
edit: As noted by #AnandaMahto, xtabs is a 'contingency-table' and not a 'reshape-data' function and, so, it sums by default.
I used this code:
m1 = m1[sort(rownames(m1)),sort(colnames(m1))]
m2 = m2[sort(rownames(m2)),sort(colnames(m2))]
nr = unique(c(rownames(m1),rownames(m2)))
nc = unique(c(colnames(m1),colnames(m2)))
m3 = matrix(0,nr=length(nr),nc=length(nc),dimnames=list(nr,nc))
m3[rownames(m3)%in%rownames(m1),colnames(m3)%in%colnames(m1)]=m1
m3[rownames(m3)%in%rownames(m2),colnames(m3)%in%colnames(m2)]=m3[rownames(m3)%in%rownames(m2),colnames(m3)%in%colnames(m2)]+m2

Create a vector from repetitons of items from a matrix

I have a data frame m
A 2
B 3
C 4
and I want to create a data frame like
A 1
A 2
B 1
B 2
B 3
C 1
C 2
C 3
C 4
Any help? Thanks a lot in advance
Your original question can be answered by:
text <- LETTERS[1:3]
n <- 2:4
rep(text, times=n)
[1] "A" "A" "B" "B" "B" "C" "C" "C" "C"
Your new question is quite different:
df <- data.frame(
text <- LETTERS[1:3],
n <- 2:4
)
data.frame(
text = rep(df$text, times=df$n),
seq = sequence(df$n)
)
text seq
1 A 1
2 A 2
3 B 1
4 B 2
5 B 3
6 C 1
7 C 2
8 C 3
9 C 4
rep accepts vectors. Try this:
dat <- data.frame(V1 = letters[1:3], V2 = 2:4)
rep(dat[, 1], dat[, 2])
> rep(dat[, 1], dat[, 2])
[1] a a b b b c c c c
Assuming m is a data frame:
m <- data.frame(V1 = LETTERS[1:3], V2 = 2:4, stringsAsFactors = FALSE)
This will do what you want:
with(m, rep(V1, times = V2))
e.g.
> with(m, rep(V1, times = V2))
[1] "A" "A" "B" "B" "B" "C" "C" "C" "C"
Edit: To address the edit made by the OP, try the following:
with(m, data.frame(X1 = rep(V1, times = V2),
X2 = unlist(lapply(V2, seq_len))))
Which produces:
> with(m, data.frame(X1 = rep(V1, times = V2),
+ X2 = unlist(lapply(V2, seq_len))))
X1 X2
1 A 1
2 A 2
3 B 1
4 B 2
5 B 3
6 C 1
7 C 2
8 C 3
9 C 4
Or more succinctly via sequence() — as per #Andrie's Answer (which I also keep forgetting about):
with(m, data.frame(X1 = rep(V1, times = V2), X2 = sequence(V2)))
#Andrie's answer is the only one so far that answers your new question. There may be a better way to do this but:
m <- data.frame(V1 = LETTERS[1:3], V2 = 2:4, stringsAsFactors = FALSE)
library(plyr)
ddply(m,"V1",function(x) data.frame(V2=seq(x[,2])))

Resources