subsets of different vectors R - r

I have three vectors as shown below.
q = c("a == 1", "a == 2", "a == 3")
w = c("b >= 50", "b >= 100")
t = c("c >= 40 & c <= 80", "c > 80")
I want to be able to combine all the vectors into one large vector so that every possible subset is in a larger vector. For example I want to have
("a == 1 & b >= 50", "a == 1 & b >= 100", "a ==2 & b >=50",
"a == 2 & b >= 100", "a == 3 & b >= 50", "a == 3 & b >= 100",
"a ==1 & c >= 40 & c <= 80", "a ==1 & c > 80",
"a ==2 & c >= 40 & c <= 80", "a ==2 & c > 80",
"a ==3 & c >= 40 & c <= 80", "a ==3 & c > 80",
"b >= 50 & c >= 40 & c <= 80", "b >= 50 & c > 80",
"b >= 100 & c >= 40 & c <= 80", "b >= 100 & c > 80",
"a == 1 & b >= 50 & c >= 40 & c <= 80", "a == 1 & b >=50 & c > 80",
"a == 2 & b >= 50 & c >= 40 & c <= 80", "a == 2 & b >=50 & c > 80",
"a == 3 & b >= 50 & c >= 40 & c <= 80", "a == 3 & b >=50 & c > 80")
"a == 2 & b >= 100 & c >= 40 & c <= 80", "a == 2 & b >=100 & c > 80",
"a == 3 & b >= 100 & c >= 40 & c <= 80", "a == 3 & b >=100 & c > 80")
So I need every subset to be created and joined with the "&" sign but I don't want to be comparing any element in the same vector. I also have three vectors in this example but the number of vectors should be variable. Does anyone know how to achieve this? Thanks!

We can create strings using expand.grid and combn. Create a combn of list ('lst') elements picking 2 or 3 in a list (using lapply), expand the list elements into a data.frame and paste with do.call (specifying the sep as " & ")
lst <- list(q w, t)
unlist( lapply(2:3, function(i) combn(lst, i,
FUN = function(x) do.call(paste, c(expand.grid(x), sep = " & ")),
simplify = FALSE)))

Related

Report all possible combinations of a string-separated vector

In the tidyverse I would like to mutate/expand a string vector so that all possible combinations of elements (separated by " & ") are reported, one for each line.
I tried decomposing my function using t(combn(unlist(strsplit(x, " & ")),2)), but fails when there is no " & ".
In the example:
"A" remains "A" (or becomes "A & A")
"A & B" remains "A & B"
"C & D & E" becomes "C & D", "C & E" and "D & E" in three different rows
Note (1): I cannot predict the number of combinations in advance "A & B & C & D..."
Note (2): Order is not important (i.e. "C & D" == "D & C")
Note (3): This would feed into a separate function and be used in a igraph application.
Thanks in advance.
data <- data.frame(names=c(1:3), combinations=c("A","A & B","C & D & E"))
names combinations
1 1 A
2 2 A & B
3 3 C & D & E
expected <- data.frame(projects=c(1,2,3,3,3), combinations=c("A","A & B","C & D","C & E","D & E"))
projects combinations
1 1 A
2 2 A & B
3 3 C & D
4 3 C & E
5 3 D & E
You can use combn to create combinations within each name :
library(dplyr)
library(tidyr)
data %>%
separate_rows(combinations, sep = ' & ') %>%
group_by(names) %>%
summarise(combinations = if(n() > 1)
combn(combinations, 2, paste0, collapse = ' & ') else combinations) %>%
ungroup
# names combinations
# <int> <chr>
#1 1 A
#2 2 A & B
#3 3 C & D
#4 3 C & E
#5 3 D & E
A data.table option
setnames(
setDT(data)[
,
{
s <- unlist(strsplit(combinations, " & "))
if (length(s) == 1) s else combn(s, 2, paste0, collapse = " & ")
},
names
], "V1", "combinations"
)[]
gives
names combinations
1: 1 A
2: 2 A & B
3: 3 C & D
4: 3 C & E
5: 3 D & E
Using data.table method
library(splitstackshape)
setnames(cSplit(data, 'combinations', sep=' & ', 'long', type.convert = FALSE)[,
if(.N > 1) combn(combinations, 2, FUN = paste, collapse = ' & ') else
combinations, names], 'V1', 'combinations')[]
# names combinations
#1: 1 A
#2: 2 A & B
#3: 3 C & D
#4: 3 C & E
#5: 3 D & E

how to remove these characters from a string c () \ ][ ""

I have a string like this
st <- "c(c(c(\"E\" >= \"E\", \"B\" <= \"E\" | \"D\" <= \"B\"), \"A\" >= \"C\" | \"A\" >= \"A\"), c(\"E\" >= \"C\", \"D\" <= \"C\" | \"C\" <= \"C\")) []"
I want to remove these characters from it c () \ ][ ""
i would like to get this
"E >= E, B <= E | D <= B, A >= C | A >= A, E >= C, D <= C | C <= C"
Here is one approach that could solve your problem:
gsub('[c()"]| \\[\\]', "", st)
# [1] "E >= E, B <= E | D <= B, A >= C | A >= A, E >= C, D <= C | C <= C"

Join Tables by Condition in R

I've two tables I would like to join by condition.
Data_1 <- data.frame(Section_A = c("a","a","a","a","a","b","b","b","b","b","b","c","c","c","c"),
ID1 = c(1,2,3,4,5,10,11,12,13,14,15,20,21,22,23),
Key = c("A","A","A","B","B","C","C","C","A","B","B","A","B","C","C"))
Data_2 <- data.frame(Section_B = c("d","d","d","d","d","d","e","e","e","f","f","f","f","f","f"),
ID2 = c(31,32,33,34,35,36,41,42,43,54,55,56,57,58,59),
Key = c("B","B","A","A","C","A","C","B","A","C","A","A","C","B","A"))
The following conditions must be satisfied when joining the tables.
Section_A "level a" can "receive" no more than 2 (count(ID2) for Section_A(a) <= 2)
Section_A "level b" can "receive" no more than 1 (count(ID2) for Section_A(b) <= 1)
Section_A "level c" can "receive" no more than 2 (count(ID2) for Section_A(c) <= 2)
Section_B "level d" can "give" no more than 2 (count(ID2) for Section_B(d) <= 2)
Section_B "level e" can "give" no more than 2 (count(ID2) for Section_B(e) <= 2)
Section_B "level f" can "give" no more than 1 (count(ID2) for Section_B(f) <= 1)
Any of ID1 and ID2 can't repeat itself in the joined table.
The expected outcome is:
Expected_Outcome <- data.frame(Section_A = c("a","a","b","c","c"),
ID1 = c(1,2,10,20,21),
Key = c("A","A","C","B","C"),
Section_B = c("d","d", "e","e","f"),
ID2 = c(33,34,41,42,57))

programming R ifelse conditions loop

Hello i need help with programming R. I have data.frame B with four column
x<- c(1,2,1,2,1,2,1,2,1,2,1,2,.......etc.)
y<-c(5,5,8,8,12,12,19,19,30,30,50,50,...etc.)
z<- c(2018-11-08,2018-11-08,2018-11-09,2018-11-09,2018-11-11,2018-11-11,2018-11-20,2018-11-20,2018-11-29,2018-11-29,2018-11-30,2018-11-30,.......etc.)
m<-c(0,1,1,0,1,1,0,1,0,1,0,1,...etc.)
2 milion rows and i need create next columns . Next columns should look as
t<-c(0,1,0,0,0,0,0,1,0,1,0,1,....)
code in cycle look like
B$t[1]=ifelse(B$y[i]==B$y[i+1] & B$z[i]==B$z[i+1] & B$x[i]==2 & B$m[1]==1,1,0)
for (i in 2:length(B$z))
{
B$t[i]<-ifelse(B$y[i]==B$y[i-1] & B$z[i]==B$z[i-1] & B$x[i]==2 & B$m[i]==1 & B$m[i]!=B$m[i-1],1,0)
}
I do not want to use cycle- loop.
I use basic package in R.
And i have new one question when i have data.frame E
x<- c(1,2,3,1,2,3,1,2,3,1,2,3,.......etc.)
y<-c(5,5,5,8,8,8,12,12,12,,19,19,19,30,30,30,50,50,50,...etc.)
z<- c(2018-11-08,2018-11-08,2018-11-08,2018-11-09,2018-11-09,2018-11-09,2018-11-11,2018-11-11,2018-11-11,2018-11-20,2018-11-20,2018-11-20,2018-11-29,2018-11-29,2018-11-29,2018-11-30,2018-11-30,2018-11-30,.......etc.)
m<-c(0,1,1,0,0,1,0,1,0,1,0,1,0,0,1...etc.)
2 milion rows and i need create next columns . Next columns should look as
t<-c(0,1,0,0,1,....)
code in cycle look like
E$t[1]=ifelse(E$y[i]==E$y[i+1] & E$z[i]==E$z[i+1] & E$x[1]==2 & E$m[1]==1,1,0)
E$t[2]=ifelse(E$y[i]==E$y[i+1] & E$z[i]==E$z[i+1] & E$x[2]==3 & E$m[2]==1,1,0)
for (i in 3:length(E$y))
{
E$t[i]<-ifelse(E$y[i]==E$y[i-2] & E$z[i]==E$z[i-2] & E$x[i]==3 & E$m[i]==1 &
E$m[i-1]==0 & E$m[i-2]==0,1,0)
}
I do not want to use cycle- loop.
I use basic package in R.
Here is a solution with base R:
N <- nrow(B)
B$t <- ifelse(B$y==c(NA, B$y[-N]) & B$z==c(NA, B$z[-N]) & B$x==2 & B$m==1 & B$m!=c(NA, B$m[-N]), 1, 0)
Here is a solution with data.table:
library("data.table")
B <- data.table(
x= c(1,2,1,2,1,2,1,2,1,2,1,2), y= c(5,5,8,8,12,12,19,19,30,30,50,50),
z= c("2018-11-08", "2018-11-08", "2018-11-09", "2018-11-09", "2018-11-11", "2018-11-11", "2018-11-20",
"2018-11-20", "2018-11-29", "2018-11-29", "2018-11-30", "2018-11-30"),
m= c(0,1,1,0,1,1,0,1,0,1,0,1)
)
B[, t := ifelse(y==c(NA, y[- .N]) & z==c(NA, z[- .N]) & x==2 & m==1 & m!=c(NA, m[- .N]), 1, 0)]
or (if logical is acceptable)
B[, t := (y==c(NA, y[- .N]) & z==c(NA, z[- .N]) & x==2 & m==1 & m!=c(NA, m[- .N]))]
or using shift()
B[, t := (y==shift(y) & z==shift(z) & x==2 & m==1 & m!=shift(m))]
With dplyr you can use if_else and lag:
library(dplyr)
dat %>%
mutate(t = if_else(
y == lag(y) & z == lag(z) & x == 2 & m == 1 & m != lag(m), 1, 0)
) # mutate lets you create a new variable in dat (named t here)
# x y z m t
# 1 1 5 2018-11-08 0 0
# 2 2 5 2018-11-08 1 1
# 3 1 8 2018-11-09 1 0
# 4 2 8 2018-11-09 0 0
# 5 1 12 2018-11-11 1 0
# 6 2 12 2018-11-11 1 0
# 7 1 19 2018-11-20 0 0
# 8 2 19 2018-11-20 1 1
# 9 1 30 2018-11-29 0 0
# 10 2 30 2018-11-29 1 1
# 11 1 50 2018-11-30 0 0
# 12 2 50 2018-11-30 1 1
Data:
x<- c(1,2,1,2,1,2,1,2,1,2,1,2)
y<-c(5,5,8,8,12,12,19,19,30,30,50,50)
z<- c("2018-11-08","2018-11-08","2018-11-09","2018-11-09","2018-11-11","2018-11-11","2018-11-20","2018-11-20","2018-11-29","2018-11-29","2018-11-30","2018-11-30")
m<-c(0,1,1,0,1,1,0,1,0,1,0,1)
dat <- data.frame(x, y, z, m)

Automatically replacing only particular characters in a string

To show you my problem. We have a string containing a random system of equations:
x0<-"3w+2x+y-3z=-5; 5w+x+2z=31; -2w-x+3y+4z=7; -3x-5y+z=8"
Next steps:
varnames <- sort(strapply(x0, "[a-z]", simplify = unique))
spl <- strsplit(x0, ";")[[1]]
my_string<-unlist(spl)
my_string<-trimws(my_string)
ss1 <- gsubfn("[a-z]", x ~ (match(x, varnames) == seq_along(varnames))+0,
spl)
ss2 <- gsub("(\\d)c", "\\1*c", ss1)
ss3 <- sub("=.*", "", ss2)
A <- eval(parse(text = paste("rbind(", paste(ss3, collapse = ","), ")")))
b <- as.numeric(sub(".*=", "", ss2))
z<-matrix(cbind(A,b), nrow=ncol(cbind(A,b)), ncol=nrow(cbind(A,b)),
byrow=TRUE)
x1<-toString(z)
x1<-stringr::str_replace_all(z1,","," &")
x1
The output is:
3 & 2 & 1 & -3 & -5 & 5 & 1 & 0 & 2 & 31 & -2 & -1 & 3 & 4 & 7 & 0 & -3 & -5 & 1 & 8
But I want to achieve:
3 & 2 & 1 & -3 &|& -5 \\ 5 & 1 & 0 & 2 &|& 31 \\ -2 & -1 & 3 & 4 &|& 7 \\ 0 & -3 & -5 & 1 &|& 8
It means how to replace in x1 every fourth (in this example) "&" char (which stands in the x0 string for "=") with "&|&" and every fifth "&" (which stands in the x0 for ";") with "\\" to be able to create in markdown a Latex table, like this:
Thank you in advance.
Man that took a lot out of me, lol. Feels like there must be an easier way..
library(stringr)
x2 <- str_replace_all(x1, '(-?\\d+\\s&\\s-?\\d+\\s&\\s-?\\d+\\s&\\s)(-?\\d+\\s?)&?\\s?(-?\\d+)\\s?&?\\s?', '\\1\\2&|& \\3 \\\\ ')
x2 <- substr(x2, 1, nchar(x2)-3)
x2
#[1] "3 & 2 & 1 & -3 &|& -5 \\ 5 & 1 & 0 & 2 &|& 31 \\ -2 & -1 & 3 & 4 &|& 7 \\ 0 & -3 & -5 & 1 &|& 8"

Resources