In the tidyverse I would like to mutate/expand a string vector so that all possible combinations of elements (separated by " & ") are reported, one for each line.
I tried decomposing my function using t(combn(unlist(strsplit(x, " & ")),2)), but fails when there is no " & ".
In the example:
"A" remains "A" (or becomes "A & A")
"A & B" remains "A & B"
"C & D & E" becomes "C & D", "C & E" and "D & E" in three different rows
Note (1): I cannot predict the number of combinations in advance "A & B & C & D..."
Note (2): Order is not important (i.e. "C & D" == "D & C")
Note (3): This would feed into a separate function and be used in a igraph application.
Thanks in advance.
data <- data.frame(names=c(1:3), combinations=c("A","A & B","C & D & E"))
names combinations
1 1 A
2 2 A & B
3 3 C & D & E
expected <- data.frame(projects=c(1,2,3,3,3), combinations=c("A","A & B","C & D","C & E","D & E"))
projects combinations
1 1 A
2 2 A & B
3 3 C & D
4 3 C & E
5 3 D & E
You can use combn to create combinations within each name :
library(dplyr)
library(tidyr)
data %>%
separate_rows(combinations, sep = ' & ') %>%
group_by(names) %>%
summarise(combinations = if(n() > 1)
combn(combinations, 2, paste0, collapse = ' & ') else combinations) %>%
ungroup
# names combinations
# <int> <chr>
#1 1 A
#2 2 A & B
#3 3 C & D
#4 3 C & E
#5 3 D & E
A data.table option
setnames(
setDT(data)[
,
{
s <- unlist(strsplit(combinations, " & "))
if (length(s) == 1) s else combn(s, 2, paste0, collapse = " & ")
},
names
], "V1", "combinations"
)[]
gives
names combinations
1: 1 A
2: 2 A & B
3: 3 C & D
4: 3 C & E
5: 3 D & E
Using data.table method
library(splitstackshape)
setnames(cSplit(data, 'combinations', sep=' & ', 'long', type.convert = FALSE)[,
if(.N > 1) combn(combinations, 2, FUN = paste, collapse = ' & ') else
combinations, names], 'V1', 'combinations')[]
# names combinations
#1: 1 A
#2: 2 A & B
#3: 3 C & D
#4: 3 C & E
#5: 3 D & E
I've two tables I would like to join by condition.
Data_1 <- data.frame(Section_A = c("a","a","a","a","a","b","b","b","b","b","b","c","c","c","c"),
ID1 = c(1,2,3,4,5,10,11,12,13,14,15,20,21,22,23),
Key = c("A","A","A","B","B","C","C","C","A","B","B","A","B","C","C"))
Data_2 <- data.frame(Section_B = c("d","d","d","d","d","d","e","e","e","f","f","f","f","f","f"),
ID2 = c(31,32,33,34,35,36,41,42,43,54,55,56,57,58,59),
Key = c("B","B","A","A","C","A","C","B","A","C","A","A","C","B","A"))
The following conditions must be satisfied when joining the tables.
Section_A "level a" can "receive" no more than 2 (count(ID2) for Section_A(a) <= 2)
Section_A "level b" can "receive" no more than 1 (count(ID2) for Section_A(b) <= 1)
Section_A "level c" can "receive" no more than 2 (count(ID2) for Section_A(c) <= 2)
Section_B "level d" can "give" no more than 2 (count(ID2) for Section_B(d) <= 2)
Section_B "level e" can "give" no more than 2 (count(ID2) for Section_B(e) <= 2)
Section_B "level f" can "give" no more than 1 (count(ID2) for Section_B(f) <= 1)
Any of ID1 and ID2 can't repeat itself in the joined table.
The expected outcome is:
Expected_Outcome <- data.frame(Section_A = c("a","a","b","c","c"),
ID1 = c(1,2,10,20,21),
Key = c("A","A","C","B","C"),
Section_B = c("d","d", "e","e","f"),
ID2 = c(33,34,41,42,57))
Hello i need help with programming R. I have data.frame B with four column
x<- c(1,2,1,2,1,2,1,2,1,2,1,2,.......etc.)
y<-c(5,5,8,8,12,12,19,19,30,30,50,50,...etc.)
z<- c(2018-11-08,2018-11-08,2018-11-09,2018-11-09,2018-11-11,2018-11-11,2018-11-20,2018-11-20,2018-11-29,2018-11-29,2018-11-30,2018-11-30,.......etc.)
m<-c(0,1,1,0,1,1,0,1,0,1,0,1,...etc.)
2 milion rows and i need create next columns . Next columns should look as
t<-c(0,1,0,0,0,0,0,1,0,1,0,1,....)
code in cycle look like
B$t[1]=ifelse(B$y[i]==B$y[i+1] & B$z[i]==B$z[i+1] & B$x[i]==2 & B$m[1]==1,1,0)
for (i in 2:length(B$z))
{
B$t[i]<-ifelse(B$y[i]==B$y[i-1] & B$z[i]==B$z[i-1] & B$x[i]==2 & B$m[i]==1 & B$m[i]!=B$m[i-1],1,0)
}
I do not want to use cycle- loop.
I use basic package in R.
And i have new one question when i have data.frame E
x<- c(1,2,3,1,2,3,1,2,3,1,2,3,.......etc.)
y<-c(5,5,5,8,8,8,12,12,12,,19,19,19,30,30,30,50,50,50,...etc.)
z<- c(2018-11-08,2018-11-08,2018-11-08,2018-11-09,2018-11-09,2018-11-09,2018-11-11,2018-11-11,2018-11-11,2018-11-20,2018-11-20,2018-11-20,2018-11-29,2018-11-29,2018-11-29,2018-11-30,2018-11-30,2018-11-30,.......etc.)
m<-c(0,1,1,0,0,1,0,1,0,1,0,1,0,0,1...etc.)
2 milion rows and i need create next columns . Next columns should look as
t<-c(0,1,0,0,1,....)
code in cycle look like
E$t[1]=ifelse(E$y[i]==E$y[i+1] & E$z[i]==E$z[i+1] & E$x[1]==2 & E$m[1]==1,1,0)
E$t[2]=ifelse(E$y[i]==E$y[i+1] & E$z[i]==E$z[i+1] & E$x[2]==3 & E$m[2]==1,1,0)
for (i in 3:length(E$y))
{
E$t[i]<-ifelse(E$y[i]==E$y[i-2] & E$z[i]==E$z[i-2] & E$x[i]==3 & E$m[i]==1 &
E$m[i-1]==0 & E$m[i-2]==0,1,0)
}
I do not want to use cycle- loop.
I use basic package in R.
Here is a solution with base R:
N <- nrow(B)
B$t <- ifelse(B$y==c(NA, B$y[-N]) & B$z==c(NA, B$z[-N]) & B$x==2 & B$m==1 & B$m!=c(NA, B$m[-N]), 1, 0)
Here is a solution with data.table:
library("data.table")
B <- data.table(
x= c(1,2,1,2,1,2,1,2,1,2,1,2), y= c(5,5,8,8,12,12,19,19,30,30,50,50),
z= c("2018-11-08", "2018-11-08", "2018-11-09", "2018-11-09", "2018-11-11", "2018-11-11", "2018-11-20",
"2018-11-20", "2018-11-29", "2018-11-29", "2018-11-30", "2018-11-30"),
m= c(0,1,1,0,1,1,0,1,0,1,0,1)
)
B[, t := ifelse(y==c(NA, y[- .N]) & z==c(NA, z[- .N]) & x==2 & m==1 & m!=c(NA, m[- .N]), 1, 0)]
or (if logical is acceptable)
B[, t := (y==c(NA, y[- .N]) & z==c(NA, z[- .N]) & x==2 & m==1 & m!=c(NA, m[- .N]))]
or using shift()
B[, t := (y==shift(y) & z==shift(z) & x==2 & m==1 & m!=shift(m))]
With dplyr you can use if_else and lag:
library(dplyr)
dat %>%
mutate(t = if_else(
y == lag(y) & z == lag(z) & x == 2 & m == 1 & m != lag(m), 1, 0)
) # mutate lets you create a new variable in dat (named t here)
# x y z m t
# 1 1 5 2018-11-08 0 0
# 2 2 5 2018-11-08 1 1
# 3 1 8 2018-11-09 1 0
# 4 2 8 2018-11-09 0 0
# 5 1 12 2018-11-11 1 0
# 6 2 12 2018-11-11 1 0
# 7 1 19 2018-11-20 0 0
# 8 2 19 2018-11-20 1 1
# 9 1 30 2018-11-29 0 0
# 10 2 30 2018-11-29 1 1
# 11 1 50 2018-11-30 0 0
# 12 2 50 2018-11-30 1 1
Data:
x<- c(1,2,1,2,1,2,1,2,1,2,1,2)
y<-c(5,5,8,8,12,12,19,19,30,30,50,50)
z<- c("2018-11-08","2018-11-08","2018-11-09","2018-11-09","2018-11-11","2018-11-11","2018-11-20","2018-11-20","2018-11-29","2018-11-29","2018-11-30","2018-11-30")
m<-c(0,1,1,0,1,1,0,1,0,1,0,1)
dat <- data.frame(x, y, z, m)
To show you my problem. We have a string containing a random system of equations:
x0<-"3w+2x+y-3z=-5; 5w+x+2z=31; -2w-x+3y+4z=7; -3x-5y+z=8"
Next steps:
varnames <- sort(strapply(x0, "[a-z]", simplify = unique))
spl <- strsplit(x0, ";")[[1]]
my_string<-unlist(spl)
my_string<-trimws(my_string)
ss1 <- gsubfn("[a-z]", x ~ (match(x, varnames) == seq_along(varnames))+0,
spl)
ss2 <- gsub("(\\d)c", "\\1*c", ss1)
ss3 <- sub("=.*", "", ss2)
A <- eval(parse(text = paste("rbind(", paste(ss3, collapse = ","), ")")))
b <- as.numeric(sub(".*=", "", ss2))
z<-matrix(cbind(A,b), nrow=ncol(cbind(A,b)), ncol=nrow(cbind(A,b)),
byrow=TRUE)
x1<-toString(z)
x1<-stringr::str_replace_all(z1,","," &")
x1
The output is:
3 & 2 & 1 & -3 & -5 & 5 & 1 & 0 & 2 & 31 & -2 & -1 & 3 & 4 & 7 & 0 & -3 & -5 & 1 & 8
But I want to achieve:
3 & 2 & 1 & -3 &|& -5 \\ 5 & 1 & 0 & 2 &|& 31 \\ -2 & -1 & 3 & 4 &|& 7 \\ 0 & -3 & -5 & 1 &|& 8
It means how to replace in x1 every fourth (in this example) "&" char (which stands in the x0 string for "=") with "&|&" and every fifth "&" (which stands in the x0 for ";") with "\\" to be able to create in markdown a Latex table, like this:
Thank you in advance.
Man that took a lot out of me, lol. Feels like there must be an easier way..
library(stringr)
x2 <- str_replace_all(x1, '(-?\\d+\\s&\\s-?\\d+\\s&\\s-?\\d+\\s&\\s)(-?\\d+\\s?)&?\\s?(-?\\d+)\\s?&?\\s?', '\\1\\2&|& \\3 \\\\ ')
x2 <- substr(x2, 1, nchar(x2)-3)
x2
#[1] "3 & 2 & 1 & -3 &|& -5 \\ 5 & 1 & 0 & 2 &|& 31 \\ -2 & -1 & 3 & 4 &|& 7 \\ 0 & -3 & -5 & 1 &|& 8"