calculate the modularity on each step - r

library(igraph)
g=graph.famous("Zachary")
c=edge.betweenness.community(g)
a=membership(c)
c$merges
b=community.to.membership(g,c$merges,steps=33)
[1] 1 1 2 1 3 3 3 1 4 5 3 1 1 1 4 4 3 1 4 1 4 1 4 4 2 2 4 2 2 4 4 2 4 4
[1] 9 4 0 0 0 0 5 0 3 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 6 0 0 0 0 7 0 0 8
I expect b is same with membership(c) because there are 33 steps. But why is b different from membership(c)?

This seems to be a bug in igraph because even the following does not work:
> library(igraph)
> g <- graph.full(3)
> c <- edge.betweenness.community(g)
> community.to.membership(g, c$merges, steps=1)$membership
This gives me a membership vector of 1 2 0, which is clearly wrong; the correct result should be 1 0 0 or 0 1 1. Actually, you can fix the bug by subtracting 1 from the merge matrix:
> g <- graph.famous("zachary")
> c <- edge.betweenness.community(g)
> membership(c)
[1] 1 1 2 1 3 3 3 1 4 5 3 1 1 1 4 4 3 1 4 1 4 1 4 4 2 2 4 2 2 4 4 2 4 4
> community.to.membership(g, c$merges-1, steps=29)$membership
[1] 0 0 2 0 3 3 3 0 1 4 3 0 0 0 1 1 3 0 1 0 1 0 1 1 2 2 1 2 2 1 1 2 1 1
These two membership vectors are essentially the same (with a bit of reindexing). Note that you only need 29 steps to reach the same membership vector because there are 34 vertices in the graph and you have 5 communities, so you need to perform 34-5=29 merges. Performing 33 steps would get you a single community only.
I will file a bug report for this in igraph's bug tracker.

Related

File entire vector by a certain appearance of another vector

I have the following data:
players<-rep(1:3,each=3)
trial<-rep(1:3)
choice<-c(1,0,0,0,0,0,0,1,0)
gamematrix<-data.frame(cbind(players,trial,choice))
players trial choice
1 1 1 1
2 1 2 0
3 1 3 0
4 2 1 0
5 2 2 0
6 2 3 0
7 3 1 0
8 3 2 1
9 3 3 0
Now I want to create a new vector:
for each participant who have at least one choice of "1", to get the value "3" and "0" otherwise:
players trial choice win
1 1 1 1 3
2 1 2 0 3
3 1 3 0 3
4 2 1 0 0
5 2 2 0 0
6 2 3 0 0
7 3 1 0 3
8 3 2 1 3
9 3 3 0 3
In the simple example above, player "1", had "1" in the first trial, while player 3 in the second trial, thus for all their choices the value is "3" in the new vector.
Any ideas how to do it? thanks!
A base R option using ave + ifelse
within(
gamematrix,
win <- ave(choice,players,FUN = function(x) ifelse(any(x==1),3,0))
)
giving
players trial choice win
1 1 1 1 3
2 1 2 0 3
3 1 3 0 3
4 2 1 0 0
5 2 2 0 0
6 2 3 0 0
7 3 1 0 3
8 3 2 1 3
9 3 3 0 3
Update
If you criteria is depending on the first two values of choice, you can try
within(
gamematrix,
win <- ave(choice,players,FUN = function(x) ifelse(all(head(x,2)==1),3,0))
)
which gives
players trial choice win
1 1 1 1 0
2 1 2 0 0
3 1 3 0 0
4 2 1 0 0
5 2 2 0 0
6 2 3 0 0
7 3 1 0 0
8 3 2 1 0
9 3 3 0 0
Try this dplyr approach:
library(dplyr)
#Code
gamematrix <- gamematrix %>% group_by(players) %>%
mutate(win=ifelse(length(choice[choice==1])>=1,3,0))
Output:
# A tibble: 9 x 4
# Groups: players [3]
players trial choice win
<dbl> <dbl> <dbl> <dbl>
1 1 1 1 3
2 1 2 0 3
3 1 3 0 3
4 2 1 0 0
5 2 2 0 0
6 2 3 0 0
7 3 1 0 3
8 3 2 1 3
9 3 3 0 3
There is no reason for this data to be a data.frame. Keep it as a numeric matrix. If you do so you can do in one line using only vectorized functions.
cbind(gamematrix, win = (rowSums(gamematrix == 1) > 0) * 3)
for your second case:
I would like it to be only for those players who had "choice=1" in the first N (e.g., first 2 trials)
cbind(gamematrix, win = (rowSums(gamematrix[,c(1,2)] == 1) > 0) * 3)
Vectorized solutions are usually more performant than solutions incorporating a buried loop (e.g. ave).
An option with rowsum from base R
gamematrix$win <- with(gamematrix, 3 * players %in%
names(which(rowsum(choice, players)[,1] > 0)))
gamematrix$win
#[1] 3 3 3 0 0 0 3 3 3

Count values in column by group R

I want to transform the following dataframe into a dataframe that adds a column of index numbers and counts the values in the rows. Like this:
A B C D E value A B C D E
1 2 3 4 4 0 2 2 0 1 1
1 4 4 2 1 => 1 3 0 0 0 2
1 2 2 2 0 2 0 2 2 2 1
0 0 2 0 1 3 0 0 1 1 0
0 0 4 3 2 4 0 1 2 1 1
I am pretty much a beginner in R and can't figure out how to do this.
Thanks in advance :)
You can do:
df <- read.table(header=TRUE, text=
"A B C D E
1 2 3 4 4
1 4 4 2 1
1 2 2 2 0
0 0 2 0 1
0 0 4 3 2")
sapply(df+1, tabulate, nbins=5)
# > sapply(df+1, tabulate, nbins=5)
# A B C D E
# [1,] 2 2 0 1 1
# [2,] 3 0 0 0 2
# [3,] 0 2 2 2 1
# [4,] 0 0 1 1 0
# [5,] 0 1 2 1 1
Eventually you want correct the rownames:
result <- sapply(df+1, tabulate, nbins=5)
rownames(result) <- (1:nrow(result))-1
result

Converting data to longitudinal data

Hi i am having difficulties trying to convert my data into longitudinal data using the Reshape package. Would be grateful if anyone could help me, thank you!
Data is as follows:
m <- matrix(sample(c(0, 0:), 100, replace = TRUE), 10)
ID<-c(1:10)
dim(ID)=c(10,1)
m<- cbind(ID,m)
d <- as.data.frame(m)
names(d)<-c('ID', 'litter1', 'litter2', 'litter3', 'litter4', 'litter5', 'litter6', 'litter7', 'litter8', 'litter9', 'litter10')
print(d)
ID litter1 litter2 litter3 litter4 litter5 litter6 litter7 litter8 litter9 litter10
1 0 0 0 3 1 0 2 0 0 3
2 0 2 1 2 0 0 0 2 0 0
3 1 0 1 2 0 3 3 3 2 0
4 2 1 2 3 0 2 3 3 1 0
5 0 1 2 0 0 0 3 3 1 0
6 2 1 2 0 3 3 0 0 0 0
7 0 1 0 3 0 0 1 2 2 0
8 0 1 3 3 2 1 3 2 3 0
9 0 2 0 2 2 3 2 0 0 3
10 2 2 2 2 1 3 0 3 0 0
I wish to convert the above data into a longitudinal data with columns 'ID', 'litter category' which tells us the category of the litter, i.e. 1-10 and 'litter number' which tells us the number of pieces for each litter category:
ID littercategory litternumber
1 4 3
1 5 1
1 7 2
1 10 3
2 2 2
2 3 1
2 4 2
2 8 2
and so on.
Would really appreciate your help thank you!
You could do that as follows:
library(reshape2)
d = melt(d, id.vars=c("ID"))
colnames(d) = c('ID','littercategory','litternumber')
# remove the text in the littercategory column, keep only the number.
d$littercategory = gsub('litter','',d$littercategory)
d = d[d$litternumber!=0]
Output:
ID littercategory litternumber
1 1 4
2 1 8
3 1 6
4 1 4
7 1 6
8 1 5
10 1 10
1 2 6
2 2 9
As you can see, only the ordering is different as the output you requested, but I'm sure you can fix that yourself. (If not, there are plenty of resources on how to do that).
Hope this helps!
To get desired output you have to melt your data and filter out values larger than 0.
library(data.table)
result <- setDT(melt(d, "ID"))[value != 0][order(ID)]
# To get exact structure modify result
result[, .(ID,
littercategory = sub("litter", "", variable),
litternumber = value)]

Transforming a two-way table() into pairwise list of counts in R

Starting with some sample two-way frequency table:
a <- c(1,2,3,4,4,3,4,2,2,2)
b <- c(1,2,3,4,1,2,4,3,2,2)
tab <- table(a,b)
> tab
b
a 1 2 3 4
1 1 0 0 0
2 0 3 1 0
3 0 1 1 0
4 1 0 0 2
I need to transform the table into the following format:
goal <- data.frame(a=c(1,2,3,4),b=c(1,2,3,4),count=c(1,3,1,2))
> goal
a b count
1 1 1 1
2 2 2 3
3 3 3 1
4 4 4 2
. . . .
How can I form all pairwise combinations from the two-way table and add the frequency counts in the third column?
Intuition tells me there should be a simple kind of 'reverse' function for table, but I could not find anything on SO or Google.
Naturally, after posting the question I found the right search query for Google...
> as.data.frame(tab)
a b Freq
1 1 1 1
2 2 1 0
3 3 1 0
4 4 1 1
5 1 2 0
6 2 2 3
7 3 2 1
8 4 2 0
9 1 3 0
10 2 3 1
11 3 3 1
12 4 3 0
13 1 4 0
14 2 4 0
15 3 4 0
16 4 4 2

In R, how to replace values in multiple columns with a vector of values equal to the same width?

I am trying to replace every row's values in 2 columns with a vector of length 2. It is easier to show you.
First here is a some data.
set.seed(1234)
x<-data.frame(x=sample(c(0:3), 10, replace=T))
x$ab<-0 #column that will be replaced
x$cd<-0 #column that will be replaced
The data looks like this:
x ab cd
1 0 0 0
2 2 0 0
3 2 0 0
4 2 0 0
5 3 0 0
6 2 0 0
7 0 0 0
8 0 0 0
9 2 0 0
10 2 0 0
Every time x=2 or x=3, I want to ab=0 and cd=1.
My attempt is this:
x[with(x, which(x==2|x==3)), c(2:3)] <- c(0,1)
Which does not have the intended results:
x ab cd
1 0 0 0
2 2 0 1
3 2 1 0
4 2 0 1
5 3 1 0
6 2 0 1
7 0 0 0
8 0 0 0
9 2 1 0
10 2 0 1
Can you help me?
The reason it doesn't work as you want is because R stores matrices and arrays in column-major layout. And when you a assign a shorter array to a longer array, R cycles through the shorter array. For example if you have
x<-rep(0,20)
x[1:10]<-c(2,3)
then you end up with
[1] 2 3 2 3 2 3 2 3 2 3 0 0 0 0 0 0 0 0 0 0
What is happening in your case is that the sub-array where x is equal to 2 or 3 is being filled in column-wise by cycling through the vector c(0,1). I don't know of any simple way to change this behavior.
Probably the easiest thing to do here is simply fill in the columns one at a time. Or, you could do something like this:
indices<-with(x, which(x==2|x==3))
x[indices,c(2,3)]<-rep(c(0,1),each=length(indices))
Another alternative: Using a data.table, this is a one-liner:
require(data.table)
DT <- data.table(x)
DT[x%in%2:3,`:=`(ab=0,cd=1)]
Original answer: You can pass a matrix of row-column pairs:
ijs <- expand.grid(with(x, which(x==2|x==3)),c(2:3))
ijs <- ijs[order(ijs$Var1),]
x[as.matrix(ijs)] <- c(0,1)
which yields
x ab cd
1 0 0 0
2 2 0 1
3 2 0 1
4 2 0 1
5 3 0 1
6 2 0 1
7 0 0 0
8 0 0 0
9 2 0 1
10 2 0 1
My original answer worked on my computer, but not a commenter's.
Generalized for multi-columns and multi-values:
mycol<-as.list(names(x)[-1])
myvalue<-as.list(c(0,1))
kk<-Map(function(y,z) list(x[x[,1] %in% c(2,3),y]<-z,x),mycol, myvalue)
myresult<-data.frame(kk[[2]][[2]])
x ab cd
1 1 0 0
2 1 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 3 0 1
7 2 0 1
8 3 0 1
9 3 0 1
10 0 0 0
You could use ifelse:
> set.seed(1234)
> dat<-data.frame(x=sample(c(0:3), 10, replace=T))
> dat$ab <- 0
> dat$cd <- ifelse(dat$x==2 | dat$x==3, 1, 0)
x ab cd
1 0 0 0
2 2 0 1
3 2 0 1
4 2 0 1
5 3 0 1
6 2 0 1
7 0 0 0
8 0 0 0
9 2 0 1
10 2 0 1
What about that?
x[x$x%in%c(2,3),c(2,3)]=matrix(rep(c(0,1),sum(x$x%in%c(2,3))),ncol=2,byrow=TRUE)
x$ab[x$x==2 | x$x==3] <- 0
x$cd[x$x==2 | x$x==3] <- 1
EDIT
Here is a general approach that would work with lots of columns. You simply create a vector of the replacement values you wish to use for each column.
set.seed(1234)
y<-data.frame(x=sample(c(0:3), 10, replace=T))
y$ab<-4 #column that will be replaced
y$cd<-2 #column that will be replaced
y$ef<-0 #column that will be replaced
y
# x ab cd ef
#1 0 4 2 0
#2 2 4 2 0
#3 2 4 2 0
#4 2 4 2 0
#5 3 4 2 0
#6 2 4 2 0
#7 0 4 2 0
#8 0 4 2 0
#9 2 4 2 0
#10 2 4 2 0
replacement.values <- c(10,20,30)
y2 <- y
y2[,2:ncol(y)] <- sapply(2:ncol(y), function(j) {
apply(y, 1, function(i) {
ifelse((i[1] %in% c(2,3)), replacement.values[j-1], i[j])
})
})
y2
# x ab cd ef
#1 0 4 2 0
#2 2 10 20 30
#3 2 10 20 30
#4 2 10 20 30
#5 3 10 20 30
#6 2 10 20 30
#7 0 4 2 0
#8 0 4 2 0
#9 2 10 20 30
#10 2 10 20 30

Resources