Calculating the difference of all variables to all other variables - r

Suppose my Dataframe look like:
w <- sample(-10:10, size =10)
x <- sample(-10:10, size =10)
y <- sample(-10:10, size =10)
z <- sample(-10:10, size =10)
df <- data.frame(w,x,y,z)
Now I want to calculate the amout of the difference (|w-x|) of each column to all the others.So I want it like the example down but additionally also wie the columns w_z, and x_y, y_z, y_z ....(my orignial df have some more Variables)
df$w_x <- abs(df$w-df$x)
df$w_y <- abs(df$w-df$x)
Alternativly, if there are easyer solution, its ok to have the calculation to a new df / matrix. Thanks in advance.

Here is a method with combn
nm1 <- combn(names(df), 2, FUN = paste, collapse = "_")
df[nm1] <- combn(df, 2, FUN = function(x) abs(x[[1]]- x[[2]]))
-output
> df
w x y z w_x w_y w_z x_y x_z y_z
1 7 3 -2 6 4 9 1 5 3 8
2 -6 6 7 0 12 13 6 1 6 7
3 -8 0 -7 8 8 1 16 7 8 15
4 9 -9 -4 -2 18 13 11 5 7 2
5 5 -3 5 -4 8 0 9 8 1 9
6 2 -6 -8 -10 8 10 12 2 4 2
7 -5 7 2 -9 12 7 4 5 16 11
8 3 -2 4 -6 5 1 9 6 4 10
9 -10 -4 9 9 6 19 19 13 13 0
10 8 5 10 4 3 2 4 5 1 6

Related

Specifying multiple column names inside mutate

How do I specify column names inside mutate when multiple columns are generated?
In this example:
set.seed(5)
data.frame(x2 = sample(1:10, 10),
x3 = sample(1:10, 10),
x1 = sample(1:10, 10),
y3 = sample(1:10, 10),
y2 = sample(1:10, 10),
y1 = sample(1:10, 10)) |>
mutate(z1 = x1 - y1,
z2 = x2 - y2,
z3 = x3 - y3) |>
mutate(zz = across(num_range(prefix = 'x',
range = 1:3)) - across(num_range(prefix = 'y',
range = 1:3)))
Resulting in:
x2 x3 x1 y3 y2 y1 z1 z2 z3 zz.x1 zz.x2 zz.x3
1 2 3 9 10 9 6 3 -7 -7 3 -7 -7
2 9 10 6 6 4 5 1 5 4 1 5 4
3 7 6 4 8 8 3 1 -1 -2 1 -1 -2
4 3 2 3 4 10 8 -5 -7 -2 -5 -7 -2
5 1 5 2 5 7 7 -5 -6 0 -5 -6 0
6 6 4 5 3 6 2 3 0 1 3 0 1
7 5 8 10 2 1 4 6 4 6 6 4 6
8 10 7 8 7 3 1 7 7 0 7 7 0
9 4 1 1 9 2 9 -8 2 -8 -8 2 -8
10 8 9 7 1 5 10 -3 3 8 -3 3 8
I want zz.x1 be named zz1, ...
Here's a dplyr-way:
mutate takes its name(s) from the first across and we can change this using the .names-argument. It accepts glue-style input that we can adapt to your needs using str_replace().
library(dplyr)
library(stringr)
df |>
mutate(across(num_range(prefix = 'x', range = 1:3),
.names = "{str_replace(col, 'x', 'z')}")
- across(num_range(prefix = 'y',range = 1:3)))
Output:
x2 x3 x1 y3 y2 y1 z1 z2 z3
1 2 3 9 10 9 6 3 -7 -7
2 9 10 6 6 4 5 1 5 4
3 7 6 4 8 8 3 1 -1 -2
4 3 2 3 4 10 8 -5 -7 -2
5 1 5 2 5 7 7 -5 -6 0
6 6 4 5 3 6 2 3 0 1
7 5 8 10 2 1 4 6 4 6
8 10 7 8 7 3 1 7 7 0
9 4 1 1 9 2 9 -8 2 -8
10 8 9 7 1 5 10 -3 3 8
Data:
set.seed(5)
df <- data.frame(x2 = sample(1:10, 10),
x3 = sample(1:10, 10),
x1 = sample(1:10, 10),
y3 = sample(1:10, 10),
y2 = sample(1:10, 10),
y1 = sample(1:10, 10))
Update: Or similar to OP's desired output
df2 |>
mutate(across(num_range(prefix = 'x', range = 1:3),
.names = "{str_replace(col, 'x', 'zz')}")
- across(num_range(prefix = 'y',range = 1:3)))
Output:
x2 x3 x1 y3 y2 y1 z1 z2 z3 zz1 zz2 zz3
1 2 3 9 10 9 6 3 -7 -7 3 -7 -7
2 9 10 6 6 4 5 1 5 4 1 5 4
3 7 6 4 8 8 3 1 -1 -2 1 -1 -2
4 3 2 3 4 10 8 -5 -7 -2 -5 -7 -2
5 1 5 2 5 7 7 -5 -6 0 -5 -6 0
6 6 4 5 3 6 2 3 0 1 3 0 1
7 5 8 10 2 1 4 6 4 6 6 4 6
8 10 7 8 7 3 1 7 7 0 7 7 0
9 4 1 1 9 2 9 -8 2 -8 -8 2 -8
10 8 9 7 1 5 10 -3 3 8 -3 3 8
Data
set.seed(5)
df2 <- data.frame(x2 = sample(1:10, 10),
x3 = sample(1:10, 10),
x1 = sample(1:10, 10),
y3 = sample(1:10, 10),
y2 = sample(1:10, 10),
y1 = sample(1:10, 10)) |>
mutate(z1 = x1 - y1,
z2 = x2 - y2,
z3 = x3 - y3)
I don't know how to do this with dplyr but in base R it is pretty straightforward. This might partly answer also your previous question.
# hard-coded variable suffixes
suff <- 1:3
# OR suffixes extracted from data
suff <- sort(unique(sub('[a-z]*', '', names(df))))
for (i in suff) {
df[[paste0('zz', i)]] <- df[[paste0('x', i)]] - df[[paste0('y', i)]]
}
df
# x2 x3 x1 y3 y2 y1 zz1 zz2 zz3
# 1 2 3 9 10 9 6 3 -7 -7
# 2 9 10 6 6 4 5 1 5 4
# 3 7 6 4 8 8 3 1 -1 -2
# 4 3 2 3 4 10 8 -5 -7 -2
# 5 1 5 2 5 7 7 -5 -6 0
# 6 6 4 5 3 6 2 3 0 1
# 7 5 8 10 2 1 4 6 4 6
# 8 10 7 8 7 3 1 7 7 0
# 9 4 1 1 9 2 9 -8 2 -8
# 10 8 9 7 1 5 10 -3 3 8
A more efficient way which avoids the loop over suffixes would be like this:
zz <- df[paste0('x', suff)] - df[paste0('y', suff)]
names(zz) <- paste0('zz', suff)
df <- cbind(df, zz)
Data:
set.seed(5)
df <- data.frame(x2 = sample(1:10, 10),
x3 = sample(1:10, 10),
x1 = sample(1:10, 10),
y3 = sample(1:10, 10),
y2 = sample(1:10, 10),
y1 = sample(1:10, 10))

Generating all possible outcomes in r

Given a vector with numeric values, how do I generate all possible outcomes for subtraction to find the differences and put them in a data.frame?
dataset1 <- data.frame(numbers = c(1,2,3,4,5,6,7,8,9,10))
i.e. (1 - 1, 1 - 2 , 1 - 3,...)
Ideally, I would want the output to give me a data frame with 3 columns (Number X, Number Y, Difference) using dataset1.
The expand.grid function can get you "pairings" which are different than the pairings you get with combn. Since you included 1-1 I'm assuming you didn't want since it doesn't return 1-1 and only gives you 45 combinations.
> pairs=expand.grid(X=1:10, Y=1:10)
> pairs$diff <- with(pairs, X-Y)
> pairs
X Y diff
1 1 1 0
2 2 1 1
3 3 1 2
4 4 1 3
5 5 1 4
6 6 1 5
7 7 1 6
8 8 1 7
9 9 1 8
10 10 1 9
11 1 2 -1
12 2 2 0
13 3 2 1
14 4 2 2
15 5 2 3
16 6 2 4
17 7 2 5
snipped remainder (total of 100 rows)
Use outer as another way to get such a group of paired differences;
> tbl <- matrix( outer(X=1:10, Y=1:10, "-"), 10, dimnames=list(X=1:10, Y=1:10))
> tbl
Y
X 1 2 3 4 5 6 7 8 9 10
1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9
2 1 0 -1 -2 -3 -4 -5 -6 -7 -8
3 2 1 0 -1 -2 -3 -4 -5 -6 -7
4 3 2 1 0 -1 -2 -3 -4 -5 -6
5 4 3 2 1 0 -1 -2 -3 -4 -5
6 5 4 3 2 1 0 -1 -2 -3 -4
7 6 5 4 3 2 1 0 -1 -2 -3
8 7 6 5 4 3 2 1 0 -1 -2
9 8 7 6 5 4 3 2 1 0 -1
10 9 8 7 6 5 4 3 2 1 0
But I didn't see a compact way to create a dataframe of the sort you specified.
The now deleted comment by #RitchieSacramento iswas correct:
> tbl <- matrix( outer(X=1:10, Y=1:10, "-"), 10, dimnames=list(X=1:10, Y=1:10))
> as.data.frame.table(tbl)
X Y Freq
1 1 1 0
2 2 1 1
3 3 1 2
4 4 1 3
5 5 1 4
6 6 1 5
7 7 1 6
8 8 1 7
9 9 1 8
10 10 1 9
11 1 2 -1
12 2 2 0
13 3 2 1
14 4 2 2
15 5 2 3
16 6 2 4
You can use the combn() function to generate the list of all combinations take 2 at a time.
numbers = c(1,2,3,4,5,6,7,8,9,10)
output <-combn(numbers, 2, FUN = NULL, simplify = TRUE )
answer <- as.data.frame(t(output))
answer$Difference <- answer[ ,1] - answer[ ,2]
head(answer)
V1 V2 Difference
1 1 2 -1
2 1 3 -2
3 1 4 -3
4 1 5 -4
5 1 6 -5
6 1 7 -6

R Subtracting columns within a list

I'd like to subtract specific columns within a list. I'm still learning how to properly use the apply functions. For example, given
> b <- list(data.frame(12:16, 3*2:6), data.frame(10:14, 2*1:5))
> b
[[1]]
X12.16 X3...2.6
1 12 6
2 13 9
3 14 12
4 15 15
5 16 18
[[2]]
X10.14 X2...1.5
1 10 2
2 11 4
3 12 6
4 13 8
5 14 10
I'd like some function x so that I get
> x(b)
[[1]]
X12.16 X3...2.6 <newcol>
1 12 6 6
2 13 9 4
3 14 12 2
4 15 15 0
5 16 18 -2
[[2]]
X10.14 X2...1.5 <newcol>
1 10 2 8
2 11 4 7
3 12 6 6
4 13 8 5
5 14 10 4
Thanks in advance.
If your data.frames had nice and consistent names, you could use transform with lapply
b <- list(data.frame(a=12:16, b=3*2:6), data.frame(a=10:14, b=2*1:5))
lapply(b, transform, c=a-b)
Here is a solution:
lapply(b, function(x) {
x[, 3] <- x[, 1] - x[, 2]
x
})
[[1]]
X12.16 X3...2.6 V3
1 12 6 6
2 13 9 4
3 14 12 2
4 15 15 0
5 16 18 -2
[[2]]
X10.14 X2...1.5 V3
1 10 2 8
2 11 4 7
3 12 6 6
4 13 8 5
5 14 10 4
with dplyr:
library(dplyr)
lapply(b, function(x) x %>% mutate(new_col = .[[1]]-.[[2]]))
Result:
[[1]]
X12.16 X3...2.6 new_col
1 12 6 6
2 13 9 4
3 14 12 2
4 15 15 0
5 16 18 -2
[[2]]
X10.14 X2...1.5 new_col
1 10 2 8
2 11 4 7
3 12 6 6
4 13 8 5
5 14 10 4

From one vector delete all elements of another vector in r [duplicate]

This question already has answers here:
R: Remove the number of occurrences of values in one vector from another vector, but not all
(2 answers)
Closed 6 years ago.
I have 2 vectors
vec_1
[1] 2 3 4 5 6 7 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9
[35] 10 11 12 13 14 2 3 4 5 6 7 8 9 10 11 12 13 14
vec_2
[1] 12 3 13 3 14 4 10 8 9 5 7 5 13 11 6 10 8 8 14 12 6 11 8 5 3 6
I want to delete all elements of vec_2 from vec_1
And sure, that function setdiff is not the case,because, for example, in vec_2 there are two 10s values. And I want to delete only to 10(not all four values of 10).
EDITED: expected output:
vec_1
[1] 2 2 2 2 3 4 4 4 5 6 7 7 7 9 9 9 10 10 11 11 12 12 13 13 14 14
How can i do this in r?
Here is one idea via union
unlist(sapply(union(vec_1, vec_2), function(i)
rep(i, each = length(vec_1[vec_1 == i]) - length(vec_2[vec_2 == i]))))
#[1] 2 2 2 2 3 4 4 4 5 6 7 7 7 9 9 9 10 10 11 11 12 12 13 13 14 14
Definitely, not the best solution but here is one way.
I created a simplified example.
vec1 <- c(1, 2, 3, 1, 1, 5)
vec2 <- c(1, 3, 5)
#Converting the frequency table to a data frame
x1 <- data.frame(table(vec1))
x2 <- data.frame(table(vec2))
#Assuming your vec1 has all the elements present in vec2
new_df <- merge(x1, x2, by.x = "vec1", by.y = "vec2", all.x = TRUE)
new_df
# vec1 Freq.x Freq.y
#1 1 3 1
#2 2 1 NA
#3 3 1 1
#4 5 1 1
#Replacing NA's by 0
new_df[is.na(new_df)] <- 0
#Subtracting the frequencies of common elements in two vectors
final <- cbind(new_df[1], new_df[2] - new_df[3])
final
# vec1 Freq.x
#1 1 2
#2 2 1
#3 3 0
#4 5 0
#Recreating a new vector based on the final dataframe
rep(final$vec1, times = final$Freq.x)
# [1] 1 1 2
You can do this using a simple for loop:
for(i in 1:length(vec2)){
i=which(vec1 %in% vec2[i])[1]
vec1=vec1[-i]
}
You just identify the first position and remove from the original vector.
You can try this too:
for (el in vec2[vec2 %in% intersect(vec1, vec2)])
vec1 <- vec1[-which(vec1==el)[1]]
sort(vec1)
#[1] 2 2 2 2 3 4 4 4 5 6 7 7 7 9 9 9 10 10 11 11 12 12 13 13 14 14

zoo - Round coredata to integer

I've got a list of 69 zoo objects, I used na.approx to fill few gaps, but since my time series deal with counts I need the imputed values to be integers.
This code
list_int <- lapply(list_dec, round(coredata(list_dec), digits=0))
gives me the following error message
Error in round(coredata(list_dec), digits=0) :
non-numeric argument to mathematical function
I thought it was a problem with applying the function to a list instead of a vector, but the function
coredata(list_dec)
correctly shows all 69 time series (without need for lapply).
So why can't round apply to coredata?
EDITED
As suggested here's a minimal data set
A1 <- runif(20, min=-5, max=13)
A2 <- runif(20, min=-1, max=5)
A3 <- runif(20, min=-3, max=10)
A4 <- runif(20, min=0, max=2)
ls <- list(A1, A2, A3, A4)
list_dec <- lapply(ls, as.zoo)
As discussed in the comments, you can accomplish what you want by the following:
> library(zoo)
> A1 <- runif(20, min=-5, max=13)
> A2 <- runif(20, min=-1, max=5)
> A3 <- runif(20, min=-3, max=10)
> A4 <- runif(20, min=0, max=2)
> ls <- list(A1, A2, A3, A4)
> list_dec <- lapply(ls, as.zoo)
Now list_dec looks as follows:
> list_dec
[[1]]
1 2 3 4 5 6 7 8 9 10 11 12 13
9.20889929 8.03050882 1.52621137 9.91528049 12.71637959 11.93573340 3.34967427 9.75224030 7.90654714 0.08199464 -2.84403691 11.57990103 4.74868873
14 15 16 17 18 19 20
2.94023319 10.71812525 -2.05394366 -1.07669056 7.17503613 4.84871327 4.58929978
[[2]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1.0756646 0.5615212 0.5697795 0.9629726 2.5962021 3.1932062 0.6894849 1.9844943 1.3351256 4.0043998 0.4756172 0.4573920 0.6009208 4.4963877 4.4149804
16 17 18 19 20
3.7762369 2.9670795 -0.8241576 2.1796402 2.5504061
[[3]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1.5765136 1.7310402 0.7273943 4.0838831 -0.9946958 -2.0222258 7.5756159 3.9105252 3.9006369 -0.9939739 4.7603811 8.5079521 3.3653795 0.8546201 3.8143874
16 17 18 19 20
5.0847501 -2.6324485 2.0860695 5.7202315 9.5304238
[[4]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1.36751418 1.44009472 1.41155170 1.55018689 1.31378442 1.09746739 0.09224919 0.66425731 0.61047787 1.63552109 1.56096710 1.59775494 1.69658733 1.08939868 1.96183397
16 17 18 19 20
1.20476936 0.94640977 0.73820689 0.65899943 1.54647028
Now you can directly call lapply like this:
lapply(list_dec,round)
which gives you the desired output:
[[1]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
9 8 2 10 13 12 3 10 8 0 -3 12 5 3 11 -2 -1 7 5 5
[[2]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 1 1 3 3 1 2 1 4 0 0 1 4 4 4 3 -1 2 3
[[3]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 2 1 4 -1 -2 8 4 4 -1 5 9 3 1 4 5 -3 2 6 10
[[4]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 1 2 1 1 0 1 1 2 2 2 2 1 2 1 1 1 1 2

Resources