Average Value of Subgraph in r - r

The Data looks like this
library(igraph)
From <- c(1,2,3,4,5,6,7,8)
To <- c(NA,1,2,3,2,NA,6,7)
Value<- c(1,0,0.5,0.5,0,-1,-1,-0.5)
Data <- data.frame(From,To, Value)
Network <- graph.data.frame(Data[,c("From","To")])
Network<- Network - "NA"
plot(Network)
Now i would like to know the AverageValue of the Partial Graph they are in and at it to the initial Dataframe.
At the end it should look like this:
From <- c(1,2,3,4,5,6,7,8)
To <- c(NA,1,2,3,2,NA,6,7)
Value<- c(1,0,0.5,0.5,0,-1,-1,-0.5)
AverageTreeValue<- c(0.4,0.4,0.4,0.4,0.4,-0.833,-0.833,-0.833)
FinalData <- data.frame(From,To, Value, AverageTreeValue)

You can use the clusters function to compute connected components in your graph, aggregate to compute the mean value for each of these clusters, and merge to combine the two together:
Data$group <- clusters(Network)$membership
(FinalData <- merge(Data, aggregate(Value~group, Data, mean), by="group"))
# group From To Value.x Value.y
# 1 1 1 NA 1.0 0.4000000
# 2 1 2 1 0.0 0.4000000
# 3 1 3 2 0.5 0.4000000
# 4 1 4 3 0.5 0.4000000
# 5 1 5 2 0.0 0.4000000
# 6 2 6 NA -1.0 -0.8333333
# 7 2 7 6 -1.0 -0.8333333
# 8 2 8 7 -0.5 -0.8333333
Alternately, you could use match to perform the merge and get some more control over the names of the generated column:
groups <- clusters(Network)$membership
means <- aggregate(Value~group, data.frame(Value=Data$Value, group=groups), mean)
Data$AverageTreeValue <- means$Value[match(groups, means$group)]
Data
# From To Value AverageTreeValue
# 1 1 NA 1.0 0.4000000
# 2 2 1 0.0 0.4000000
# 3 3 2 0.5 0.4000000
# 4 4 3 0.5 0.4000000
# 5 5 2 0.0 0.4000000
# 6 6 NA -1.0 -0.8333333
# 7 7 6 -1.0 -0.8333333
# 8 8 7 -0.5 -0.8333333

Related

Different random numbers when two conditions are met in R

I have a data frame of three columns Distance, Age, and Value where there are three repeated Value for every Distance and Age combination. I would like to generate a random number for Value for certain Distance and Age combinations. I can get a random number to generate however, it is the same random number repeated and I need three different random numbers.
Example Data
set.seed(321)
dat <- data.frame(matrix(ncol = 3, nrow = 27))
colnames(dat)[1:3] <- c("Distance", "Age", "Value")
dat$Distance <- rep(c(0.5,1.5,2.5), each = 9)
dat$Age <- rep(1:3, times = 9)
The code below creates a random number for the Distance and Age combo but the random number is the same for each of the three measurements, they should be different random numbers.
dat$Value <- ifelse(dat$Distance == '0.5' & dat$Age == '1',
rep(rnorm(3,10,2),3), NA)
Instead of getting the same repeated random number for the Distance and Age combo
head(dat)
Distance Age Value
1 0.5 1 13.40981
2 0.5 2 NA
3 0.5 3 NA
4 0.5 1 13.40981
5 0.5 2 NA
6 0.5 3 NA
I would like different random numbers for the Distance and Age combo
head(dat)
Distance Age Value
1 0.5 1 13.40981
2 0.5 2 NA
3 0.5 3 NA
4 0.5 1 11.18246
5 0.5 2 NA
6 0.5 3 NA
The numbers for Value don't really matter and are for demonstration purposes only.
Replace rep(rnorm(3,10,2),3) with rnorm(nrow(dat), 10, 2).
Something like this?
library(dplyr)
dat %>%
mutate(Value = ifelse(Distance == 0.5 & Age == 1, sample(1000,nrow(dat), replace = TRUE), NA))
Distance Age Value
1 0.5 1 478
2 0.5 2 NA
3 0.5 3 NA
4 0.5 1 707
5 0.5 2 NA
6 0.5 3 NA
7 0.5 1 653
8 0.5 2 NA
9 0.5 3 NA
10 1.5 1 NA
11 1.5 2 NA
12 1.5 3 NA
13 1.5 1 NA
14 1.5 2 NA
15 1.5 3 NA
16 1.5 1 NA
17 1.5 2 NA
18 1.5 3 NA
19 2.5 1 NA
20 2.5 2 NA
21 2.5 3 NA
22 2.5 1 NA
23 2.5 2 NA
24 2.5 3 NA
25 2.5 1 NA
26 2.5 2 NA
27 2.5 3 NA
You can eliminate the ifelse():
idx <- dat$Distance == '0.5' & dat$Age == '1'
dat$Value[idx] <- rnorm(sum(idx), 10, 2)
head(dat)
head(dat, 7)
# Distance Age Value
# 1 0.5 1 10.91214
# 2 0.5 2 NA
# 3 0.5 3 NA
# 4 0.5 1 10.84067
# 5 0.5 2 NA
# 6 0.5 3 NA
# 7 0.5 1 11.15517

Filter a group of a data.frame based on multiple conditions

I am looking for an elegant way to filter the values of a specific group of big data.frame based on multiple conditions.
My data frame looks like this.
data=data.frame(group=c("A","B","C","A","B","C","A","B","C"),
time= c(rep(1,3),rep(2,3), rep(3,3)),
value=c(0.2,1,1,0.1,10,20,10,20,30))
group time value
1 A 1 0.2
2 B 1 1.0
3 C 1 1.0
4 A 2 0.1
5 B 2 10.0
6 C 2 20.0
7 A 3 10.0
8 B 3 20.0
9 C 3 30.0
I would like only for the time point 1 to filter out all the values that are smaller than 1 but bigger than 0.1
I want my data.frame to look like this.
group time value
1 A 1 0.2
4 A 2 0.1
5 B 2 10.0
6 C 2 20.0
7 A 3 10.0
8 B 3 20.0
9 C 3 30.0
Any help is highly appreciated.
With dplyr you can do
library(dplyr)
data %>% filter(!(time == 1 & (value <= 0.1 | value >= 1)))
# group time value
# 1 A 1 0.2
# 2 A 2 0.1
# 3 B 2 10.0
# 4 C 2 20.0
# 5 A 3 10.0
# 6 B 3 20.0
# 7 C 3 30.0
Or if you have too much free time and you decided to avoid dplyr:
ind <- with(data, (data$time==1 & (data$value > 0.1 & data$value < 1)))
ind <- ifelse((data$time==1) & (data$value > 0.1 & data$value < 1), TRUE, FALSE)
#above two do the same
data$ind <- ind
data <- data[!(data$time==1 & ind==F),]
data$ind <- NULL
group time value
1 A 1 0.2
4 A 2 0.1
5 B 2 10.0
6 C 2 20.0
7 A 3 10.0
8 B 3 20.0
9 C 3 30.0
Another simple option would be to use subset twice and then append the results in a row wise manner.
rbind(
subset(data, time == 1 & value > 0.1 & value < 1),
subset(data, time != 1)
)
# group time value
# 1 A 1 0.2
# 4 A 2 0.1
# 5 B 2 10.0
# 6 C 2 20.0
# 7 A 3 10.0
# 8 B 3 20.0
# 9 C 3 30.0

r rbind dataframes in each list using lapply function

I want to add some data points. odtl is the original data andadtl is the data points to add. adtl is set to NA but will be interpolated by zoo :: na.spline after rbind.
During this process, two lists(odtl and adtl) contain three data frames each. I want to combine the data frames in the order in which they are loaded into each list.
I succeed this using the for function as follows. But my lapply function doesn't work. Could you make this loop as a lapply or apply family functions?
Thanks.
> odtl # original dataset
[[1]]
x index
1 1 1
2 2 2
3 3 3
4 4 4
5 5 5
[[2]]
x index
1 1 1
2 2 2
3 3 3
4 4 4
[[3]]
x index
1 1 1
2 2 2
3 3 3
> adtl # dataset for add
[[1]]
x index
1 NA 1.5
[[2]]
x index
1 NA 1.5
2 NA 2.5
3 NA 3.5
[[3]]
x index
1 NA 1.5
2 NA 2.5
> wdtl <- list() # This is the goal.
> for(i in 1:length(odtl)){
+ wdtl[[i]] <- rbind(odtl[[i]], adtl[[i]])
+ }
> wdtl # This is the goal but I want complete it by lapply or something
[[1]]
x index
1 1 1.0
2 2 2.0
3 3 3.0
4 4 4.0
5 5 5.0
6 NA 1.5
[[2]]
x index
1 1 1.0
2 2 2.0
3 3 3.0
4 4 4.0
5 NA 1.5
6 NA 2.5
7 NA 3.5
[[3]]
x index
1 1 1.0
2 2 2.0
3 3 3.0
4 NA 1.5
5 NA 2.5
You may use Map() which element-wise applies a function to the first elements of each of its arguments.
Map(rbind, odtl, adtl)
# [[1]]
# x index
# 1 1 1.0
# 2 2 2.0
# 3 3 3.0
# 4 4 4.0
# 5 5 5.0
# 6 NA 1.5
# 7 NA 2.5
# 8 NA 3.5
# 9 NA 4.5
# 10 NA 5.5
#
# [[2]]
# x index
# 1 1 1.0
# 2 2 2.0
# 3 3 3.0
# 4 4 4.0
# 5 NA 1.5
# 6 NA 2.5
# 7 NA 3.5
# 8 NA 4.5
#
# [[3]]
# x index
# 1 1 1.0
# 2 2 2.0
# 3 3 3.0
# 4 NA 1.5
# 5 NA 2.5
# 6 NA 3.5
Data
odtl <- list(data.frame(x=1:5, index=1:5),
data.frame(x=1:4, index=1:4),
data.frame(x=1:3, index=1:3))
adtl <- list(data.frame(x=NA, index=seq(1.5, 5.5, 1)),
data.frame(x=NA, index=seq(1.5, 4.5, 1)),
data.frame(x=NA, index=seq(1.5, 3.5, 1)))
I think the solution in the comments by #thelatemail should be the most elegant one. If you want to use lapply, then the below would be the something you want
wdtl <- sapply(seq(odtl), function(k) rbind(odtl[[k]],adtl[[k]]))
Specifically from the lapply, apply etc. family of functions, you could use mapply
> odtl <- list(data.frame(x=1:5, index=1:5),
data.frame(x=1:4, index=1:4),
data.frame(x=1:3, index=1:3))
> adtl <- list(data.frame(x=NA, index=seq(1.5, 5.5, 1)),
data.frame(x=NA, index=seq(1.5, 4.5, 1)),
data.frame(x=NA, index=seq(1.5, 3.5, 1)))v
> mapply(rbind, odtl, adtl, SIMPLIFY = FALSE)
# [[1]]
# x index
# 1 1 1.0
# 2 2 2.0
# 3 3 3.0
# 4 4 4.0
# 5 5 5.0
# 6 NA 1.5
# 7 NA 2.5
# 8 NA 3.5
# 9 NA 4.5
# 10 NA 5.5
#
# [[2]]
# x index
# 1 1 1.0
# 2 2 2.0
# 3 3 3.0
# 4 4 4.0
# 5 NA 1.5
# 6 NA 2.5
# 7 NA 3.5
# 8 NA 4.5
#
# [[3]]
# x index
# 1 1 1.0
# 2 2 2.0
# 3 3 3.0
# 4 NA 1.5
# 5 NA 2.5
# 6 NA 3.5
Note that Map is a wrapper around mapply(FUN = f, ..., SIMPLIFY = FALSE).

How to assign strings instead of on-the-fly numbers as the value of the given specific dataframe?

(reproducible example given) The function causfinder::causalitycombinations below:
causalitycombinations <- function (nvars, ncausers, ndependents)
{
independents <- combn(nvars, ncausers)
swingnumber <- dim(combn(nvars - ncausers, ndependents))[[2]]
numberofallcombinations <- dim(combn(nvars, ncausers))[[2]] * swingnumber
dependents <- matrix(, nrow = dim(combn(nvars, ncausers))[[2]] * swingnumber, ncol = ndependents)
for (i in as.integer(1:dim(combn(nvars, ncausers))[[2]])) {
dependents[(swingnumber * (i - 1) + 1):(swingnumber * i), ] <- t(combn(setdiff(seq(1:nvars), independents[, i]), ndependents))
}
swingedindependents <- matrix(, nrow = dim(combn(nvars, ncausers))[[2]] * swingnumber, ncol = ncausers)
for (i in as.integer(1:dim(combn(nvars, ncausers))[[2]])) {
for (j in as.integer(1:swingnumber)) {
swingedindependents[(i - 1) * swingnumber + j, ] <- independents[, i]
}
}
independentsdependents <- cbind(swingedindependents, dependents)
others <- matrix(, nrow = dim(combn(nvars, ncausers))[[2]] * swingnumber, ncol = nvars - ncausers - ndependents)
for (i in as.integer(1:((dim(combn(nvars, ncausers))[[2]]) *
swingnumber))) {
others[i, ] <- setdiff(seq(1:nvars), independentsdependents[i, ])
}
causalitiestemplate <- cbind(independentsdependents, others)
causalitiestemplate
}
lists all the multivariate causality combinations. For example, in a 4-variable system, conditioned on the other 2 variables of the system, they are (when variables are assigned to numbers 1,2,3,4 and this assignment is kept throughout the analysis):
causalitycombinations(4,1,1)
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 1 3 2 4
[3,] 1 4 2 3
[4,] 2 1 3 4
[5,] 2 3 1 4
[6,] 2 4 1 3 # to check whether 2nd var Grangercauses 4th var condioned on 1 and 3
[7,] 3 1 2 4
[8,] 3 2 1 4
[9,] 3 4 1 2
[10,] 4 1 2 3
[11,] 4 2 1 3
[12,] 4 3 1 2
Now,
data.frame(from = causalitycombinations(4,1,1)[,1], to= causalitycombinations(4,1,1)[,2],
pval = c(0.5,0.6,0.1, #I just typed random p-vals here
0.4,0.8,0.2,
0.1,0.5,0.9,
0.0,0.0,0.1)
)
produces:
from to pval
1 1 2 0.5
2 1 3 0.6
3 1 4 0.1
4 2 1 0.4
5 2 3 0.8
6 2 4 0.2
7 3 1 0.1
8 3 2 0.5
9 3 4 0.9
10 4 1 0.0
11 4 2 0.0
12 4 3 0.1
In the above "from" and "to" columns' entries, I wanna print variables' names (say: "inf", "gdp", "exc", "stock") instead of their representative numbers (i.e., 1,2,3,4). How to achieve this?
Equivalently, how to list combinations with strings instead of numbers
We can update columns with matching names by position from string vector:
# update columns with matching name
df1$from <- c("inf", "gdp", "exc", "stock")[df1$from]
df1$to <- c("inf", "gdp", "exc", "stock")[df1$to]
# result
df1
# from to pval
# 1 inf gdp 0.5
# 2 inf exc 0.6
# 3 inf stock 0.1
# 4 gdp inf 0.4
# 5 gdp exc 0.8
# 6 gdp stock 0.2
# 7 exc inf 0.1
# 8 exc gdp 0.5
# 9 exc stock 0.9
# 10 stock inf 0.0
# 11 stock gdp 0.0
# 12 stock exc 0.1
# input data
df1 <- read.table(text=" from to pval
1 1 2 0.5
2 1 3 0.6
3 1 4 0.1
4 2 1 0.4
5 2 3 0.8
6 2 4 0.2
7 3 1 0.1
8 3 2 0.5
9 3 4 0.9
10 4 1 0.0
11 4 2 0.0
12 4 3 0.1", header = TRUE)

R: create variables up to power of n

Say I have a data frame with m variables, how can I get their generated variables up to power of n? For example, df is a data frame with 2 variables a and b:
df <- data.frame(a=c(1,2), b=c(3,4))
I want to add variables up to power of 3, which means adding to df these generated columns:
a^2, a*b, b^2, a^3, a^2*b, b^2*a, b^3
How can I do this?
Use polym:
df <- data.frame(a=c(1,2), b=c(3,4))
# a b
#1 1 3
#2 2 4
res <- do.call(polym, c(df, degree=3, raw=TRUE))
# 1.0 2.0 3.0 0.1 1.1 2.1 0.2 1.2 0.3
#[1,] 1 1 1 3 3 3 9 9 27
#[2,] 2 4 8 4 8 16 16 32 64
#attr(,"degree")
#[1] 1 2 3 1 2 3 2 3 3
Edit:
Here is a possibility to create the desired column names:
colnames(res) <- apply(
do.call(rbind,
strsplit(colnames(res), ".", fixed=TRUE)),
1,
function(x) paste(rep(names(df), as.integer(x)), collapse="")
)
# a aa aaa b ab aab bb abb bbb
#[1,] 1 1 1 3 3 3 9 9 27
#[2,] 2 4 8 4 8 16 16 32 64
#attr(,"degree")
#[1] 1 2 3 1 2 3 2 3 3

Resources