Contrasts between successive levels of a factor in R - r

I'm writing this post because I'm stuck in the analysis of a data file from a laboratorial experiment.
In this experiment, I counted the number of females (of a small arthropod) present in a specific environment, throughout 26 time points (TP). However, I want to understand if the number of females was different between each successive time point (e.g. if the number of females counted in TP 1 is different than TP 2; the number of females counted in TP 2 is different than TP 3; and so on...)
The data frame has the following columns:
Replicate (who contain the number of the replicate, going from 1 to 8); TimePoint (the day in which the females where counted, going from 1 to 26); Females (the number of females counted in each time point); and Block (experiment had 2 blocks)
I've tried to do some successive contrasts, but I dont think its the best way. This is my code:
m1<-lmer(Females~TimePoint+(1|Block))
Suc_contrasts2<-glht(m1,linfct=mcp(TimePoint=
c(
"t1 - t2 == 0",
"t2 - t3 == 0",
"t3 - t4 == 0",
"t4 - t5 == 0",
"t5 - t6 == 0",
"t6 - t7 == 0",
"t7 - t8 == 0",
"t8 - t9 == 0",
"t9 - t10 == 0",
"t10 - t11== 0",
"t11 - t12 == 0",
"t12 - t13 == 0",
"t13 - t14 == 0",
"t14 - t15 == 0",
"t15 - t16 == 0",
"t16 - t17 == 0",
"t17 - t18 == 0",
"t18 - t19 == 0",
"t19 - t20 == 0",
"t20 - t21== 0",
"t21 - t22 == 0",
"t22 - t23 == 0",
"t23 - t24 == 0",
"t24 - t25 == 0",
"t25 - t26 == 0")))
summary(Suc_contrasts2)
summary(Suc_contrasts2, test=adjusted ("bonferroni"))
I've been looking on google for other ways to do planned comparisons, but all i've found was not really appropriate for my data set. I'm still new at this, so sorry for any newbie mistake.
Thus my question is, is there any better way to compare the number of females I found between each pair of successive time points?
Edit 1:
I also tried doing contrasts like this, but the results don't seem right..
levels(TimePoint)
# [1] "t1" "t10" "t11" "t12" "t13" "t14" "t15" "t16" "t17" "t18" "t19" "t2" "t20" "t21" "t22" "t23" "t24" "t25" "t26"
# [20] "t3" "t4" "t5" "t6" "t7" "t8" "t9"
# tell R which TimePoints to compare
c1<- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #1v2
c2<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0) #2v3
c3<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0) #3v4
c4<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0) #4v5
c5<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0) #5v6
c5<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0) #6v7
c6<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0) #7v8
c7<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1) #8v9
c8<- c(0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) #9v10
c9<- c(0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #10v11
c10<- c(0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #11v12
c11<- c(0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #11v12
c12<- c(0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #12v13
c13<- c(0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #13v14
c14<- c(0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #14v15
c15<- c(0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #15v16
c16<- c(0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #16v17
c17<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #17v18
c18<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #18v19
c19<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #19v20
c20<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #20v21
c21<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #21v22
c22<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) #22v23
c23<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0) #23v24
c24<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0) #24v25
c25<- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0) #25v26
# combined the above lines into a matrix
mat <- cbind(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25)
# tell R that the matrix gives the contrasts you want
contrasts(TimePoint) <- mat
model2 <- aov(Females ~ TimePoint)
summary(model2)
# Df Sum Sq Mean Sq F value Pr(>F)
# line2$TimePoint 25 9694 387.8 6.939 <2e-16 ***
# Residuals 390 21794 55.9
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model2, split=list(TimePoint=list("1v2"=1, "2v3" = 2, "3v4"=3, "4v5"=4, "5v6"=5, "6v7"=6, "7v8"=7, "8v9"=8, "9v10"=9, "10v11"=10, "11v12"=11, "12v13"=12, "13v14"=13, "14v15"=14, "15v16"=15, "16v17"=16, "17v18"=17, "18v19"=18, "19v20"=19, "20v21"=20, "21v22"=21, "22v23"=22, "23v24"=23, "24v25"=24, "25v26"=25)))
Thanks for your time,
André

Another option for fitting successive-differences contrasts:
m1 <- lmer(Females~TimePoint+(1|Block), contrasts=list(TimePoint=MASS::contr.sdif))
This doesn't take the multiplicity of testing into account (which you might get away with since these are pre-planned contrasts): you could use p.adjust() on the p-values.
#AndreasM's points about the ordering of your factor, choice of random vs fixed effects, etc., should definitely be taken into consideration.

I think this website my help you: backward difference coding
Following the information there, difference contrasts between subsequent factor levels could be set like this (see below). Note, that I only use a simple example with 5 factor levels.
#create dummy data
df <- expand.grid(TimePoint = c("t01", "t02", "t03", "t04", "t05"),
Replicate = 1:8, Block = 1:2)
set.seed(2)
df$Females <- runif(nrow(df), min = 0, max = 100)
#set backward difference contrasts
contrasts(df$TimePoint) <- matrix(c(-4/5, 1/5, 1/5, 1/5, 1/5,
-3/5, -3/5, 2/5, 2/5, 2/5,
-2/5, -2/5, -2/5, 3/5, 3/5,
-1/5, -1/5, -1/5, -1/5, 4/5), ncol = 4)
When fitting a simple linear model, the parameter estimates correspond to the expected values, i.e., contrast "TimePoint1" corresponds to t2 - t1, contrast "TimePoint2" to t3 - t2 and so on.
#fit linear model
m1 <- lm(Females ~ TimePoint, data = df)
coef(m1)
(Intercept) TimePoint1 TimePoint2 TimePoint3 TimePoint4
50.295659 -10.116045 7.979465 -10.182389 2.209413
#mean by time point
with(df, tapply(Females, TimePoint, mean))
t01 t02 t03 t04 t05
57.23189 47.11584 55.09531 44.91292 47.12233
I want to add that I am not sure whether what you are trying to do is sensible. But this I don´t feel comfortable to evaluate and it would be a question for CrossValidated. I worry that treating 26 time points as categorical factor levels is not the best way to go. Also, in your initial code you seem to fit a model treating block as a random factor. This does not make sense if block has only 2 levels (as you write), see for example here: Link
Finally, I noticed that in your example, factor levels of your TimePoint variable are not ordered right (t1, t10, t11... instead of t1, t2, t3, ...). You could change this for instance with this line of code:
df$TimePoint <- factor(df$TimePoint, levels = paste0("t", 1:26),
labels = paste0("t", sprintf("%02d", 1:26)))

Related

RC Bray function not accepting phyloseq otu_table as argument

I am using James Stegen et al's code here to calculate an abundance-weighted raup-crick value for my 16S dataset.
I load in my phyloseq object then extract the otu_table. I then use the otu_table as the spXsite argument in the function raup_crick_abundance(). My otu_table is available as a dput() below the text.
physeq<-qza_to_phyloseq(
features="~/Documents/qiime2-analyses/CRD/fresh_run/table.qza",
tree="~/Documents/qiime2-analyses/CRD/fresh_run/rooted-tree.qza",
"~/Documents/qiime2-analyses/CRD/fresh_run/taxonomy.qza",
metadata = "crd-metadata.txt")
otu_table <- (otu_table(physeq))
raup_crick_abundance = function(spXsite=otu_table, plot_names_in_col1=TRUE,
reps=9999, set_all_species_equal=FALSE,
as.distance.matrix=TRUE, report_similarity=FALSE){
Where the remaining code is verbatim from the github link above.
I am having a hard time understanding the error I have been receiving:
Error in sample.int(x, size, replace, prob) :
incorrect number of probabilities
Called from: sample.int(x, size, replace, prob)
Browse[1]> traceback()
1: raup_crick_abundance()
I had thought perhaps the function was looking for an equivalent number of columns and rows, but that was not the case. I searched the function sample() and think it's hoping to select values from my argument otu_table but can't find the expected range?
The sample() causing problems is on line 48 of the github where I believe the function is weighing the otu occurrences and entering the number of occurrences into a column.
##two empty null communities of size gamma:
com1<-rep(0,gamma)
com2<-rep(0,gamma)
##add observed number of species to com1, weighting by species occurrence frequencies:
com1[sample(1:gamma, sum(spXsite.inc[null.one,]), replace=FALSE, prob=occur)]<-1
com1.samp.sp = sample(which(com1>0),(sum(spXsite[null.one,])-sum(com1)),replace=TRUE,prob=abundance[which(com1>0)]);
com1.samp.sp = cbind(com1.samp.sp,1); # head(com1.samp.sp);
com1.sp.counts = as.data.frame(tapply(com1.samp.sp[,2],com1.samp.sp[,1],FUN=sum)); colnames(com1.sp.counts) = 'counts'; # head(com1.sp.counts);
com1.sp.counts$sp = as.numeric(rownames(com1.sp.counts)); # head(com1.sp.counts);
com1[com1.sp.counts$sp] = com1[com1.sp.counts$sp] + com1.sp.counts$counts; # com1;
#sum(com1) - sum(spXsite[null.one,]); ## this should be zero if everything work properly
rm('com1.samp.sp','com1.sp.counts');
Any thoughts are greatly appreciated. Thank you in advance!
> dput(otu_table)
new("otu_table", .Data = structure(c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 4), .Dim = c(6L, 48L), .Dimnames = list(
c("e54924083c02bd088c69537d02406eb8", "3112899fc7a2adb7f74a081a82c7cde4",
"db5d745b02355b6fed513c4953b62326", "2faf2046aa9ab2f6598df79cd52e9c7b",
"bec8db81cc8ec453136c82ede8327a9f", "601e47b8adcbd21d159f74421710e1b5"
), c("sample-10", "sample-11", "sample-12", "sample-14",
"sample-16", "sample-18", "sample-19", "sample-20", "sample-21",
"sample-22", "sample-23", "sample-24", "sample-25", "sample-26",
"sample-27", "sample-28", "sample-29", "sample-30", "sample-31",
"sample-32", "sample-33", "sample-34", "sample-35", "sample-36",
"sample-37", "sample-40", "sample-41", "sample-43", "sample-44",
"sample-45", "sample-46", "sample-50", "sample-55", "sample-56",
"sample-57", "sample-58", "sample-59", "sample-61", "sample-62",
"sample-63", "sample-64", "sample-65", "sample-67", "sample-68",
"sample-69", "sample-70", "sample-71", "sample-8"))), taxa_are_rows = TRUE)
>

Working on bipartite networks with igraph : problem with basic measures (density, normalized degree)

I'm new to bipartite network analysis and i've some trouble with basic measures.
I'm trying to work on bipartite networks without projecting in 1-mode graphs.
My problems come from the fact that the igraph package allows to create bipartite graphs but that the measures do not seem to adapt to the specificity of these graphs.
So, my general question is how do you do when you work directly on bipartite networks ?
Here a concrete exemple with density
## Working with an incidence matrix (sample) with 47 columns and 10 rows (unweighted / undirected)
# Want to compute basic global index like density with igraph
library(igraph)
g <- graph.incidence(m, directed = F )
graph.density(g) # result = 0.04636591
# Now trying to compute basic density for a bipartite graph without igraph (number of edges divided by the product of the two types of vertices)
library(Matrix)
d <- nnzero(m)/ (ncol(m)*nrow(m)) # result 0.1574468
# It seems that bipartite package does the job
library(bipartite)
networklevel(m, index=c("connectance")) # result 0.1574468
But the bipartite package is very specific to ecology fields and lot of measures are designed to food web and interaction between species (and some, like clustering coefficient, don't seem to take into account the bipartite nature of the graph : e.g compute 4-cycles).
So, are there simpler ways to work on bipartite networks with igraph ? To measure some global indexes (density, clustering coefficient with 4-cycles, I know that tnet does this but my actual networks are too large), and to normalize local indexes like degree, closeness, betweenness centralities taking into account the bipartite specificity (like in Borgatti S.P., Everett M.G., 1997, « Network analysis of 2-mode data », Social Networks) ?
Any advice will be appreciated !
Below the code to reproduce the sample of my matrix "m"
m <- structure(c(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0), .Dim = c(10L, 47L), .Dimnames = list(
c("02723", "13963", "F3238", "02194", "15051", "04477", "02164",
"06283", "04080", "08304"), c("1185241", "170063", "10350868",
"217831", "2210247", "2262963", "1816670", "1848354", "2232593",
"146214", "1880252", "2261639", "2262581", "2158177", "1850147",
"2262912", "146412", "2262957", "1566083", "1841811", "146384",
"216281", "2220957", "1846986", "1951567", "1581130", "105343",
"1580240", "170654", "1796236", "1835553", "1835848", "146400",
"1174872", "1283240", "2253354", "1283617", "146617", "160263",
"2263115", "184745", "1809858", "1496747", "10346824", "148730",
"2262582", "146268")))
Density: you already got it
Degree
degv1 <- degree(g, V(g)[type == FALSE])
degv2 <- degree(g, V(g)[type == TRUE])
Normalized degree: divise by the vcount of the other node category
degnormv1 <- degv1/length(V(g)[type == TRUE])
degnormv2 <- degv2/length(V(g)[type == FALSE])
No answer regarding closeness, betweenness nor clustering coefficient
For the normalized degree, here a solution without igraph
normalizedegreeV1 <- data.frame(ND = colSums(m)/nrow(m))
normalizedegreeV2 <- data.frame(ND = rowSums(m)/ncol(m))
but that leaves the other questions about centrality measures open...

Translating SAS language to R language: Creating a new variable

I have a sas code and I want to translate into R. I am interested in creating variables based on the conditions of other variables.
data wp;
set wp;
if totalcriteria =>3 and nonecom=0 then content=1;
if totalcriteria =>3 and nonecom=1 then content=0;
if totalcriteria <3 and nonecom=0 then content=0;
if totalcriteria <3 and nonecom=1 then content=0;
run;
This is a code I have in. My conditions for "content" as changed and I would like to translate the sas code to R to hopefully replace the "mutate" line of the code below or fit in with the code below:
wpnew <- wp %>%
mutate(content = ifelse (as.numeric(totalcriteria >= 3),1,0))%>%
group_by(district) %>%
summarise(totalreports =n(),
totalcontent = sum(content),
per.content=totalcontent/totalreports*100)
Can you help me translate this SAS code to R language. Thank you in advance.
Here is the dput output
structure(list(Finances = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), Exercise = c(0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), Relationships = c(0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0), Laugh = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 1), Gratitude = c(0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 1), Regrets = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0), Meditate = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Clutter = c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0), Headache = c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0), Loss = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0), Anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0), Difficulty = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), nonecom = c(1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
1, 0, 1, 1, 0), Othercon = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), totalcriteria = c(0, 0, 2, 3, 2, 0, 0, 4, 3,
0, 0, 0, 3, 0, 0, 2)), class = "data.frame", row.names = c(NA,
-16L))
This is what I would like it to look like
V1 V2 V3...V12 nonecom Othercon totalcriteria content
1 1 1 0 1 0 3 0
0 0 1 0 0 0 8 1
1 0 0 0 0 1 2 0
1 0 1 0 1 0 1 0
I use case_when just because I find it more similar in terms of syntax. Your current approach only tests the first part of the IF condition, not the second part regarding nonecom.
wpnew <- wp %>%
mutate(content = case_when(sum.content >= 3 & nonecom == 0 ~ 1,
TRUE ~ 0))

How to fix 'Node inconsistent with parents' in R2jags::jags

I am working with the R-package R2jags. After running the code I attach below, R produced the error message: "Node inconsistent with parents".
I tried to solve it. However, the error message persists. The variables I am using are:
i) "Adop": a 0-1 dummy variable.
ii) "NumInfo": a counter variable whose range is {0, 1, 2,...}.
iii) "Price": 5
iv) "NRows": 326.
install.packages("R2jags")
library(R2jags)
# Data you need to run the model.
# Adop: a 0-1 dummy variable.
Adop <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
# NumInfo: a counter variable.
NumInfo <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1)
# NRows: length of both 'NumInfo' and 'Adop'.
NRows <- length(NumInfo)
# Price: 5
Price <- 5
Data <- list("NRows" = NRows, "Adop" = Adop, "NumInfo" = NumInfo, "Price" = Price)
# The Bayesian model. The parameters I would like to infer are: 'mu.m', 'tau2.m', 'r.s', 'lambda.s', 'k', 'c', and 'Sig2'.
# I would like to obtain samples from the posterior distribution of the vector of parameters.
Bayesian_Model <- "model {
mu.m ~ dnorm(0, 1)
tau2.m ~ dgamma(1, 1)
r.s ~ dgamma(1, 1)
lambda.s ~ dgamma(1, 1)
k ~ dunif(1, 1/Price)
c ~ dgamma(1, 1)
Sig2 ~ dgamma(1, 1)
precision.m <- 1/tau2.m
m ~ dnorm(mu.m, precision.m)
s2 ~ dgamma(r.s, lambda.s)
for(i in 1:NRows){
Media[i] <- NumInfo[i]/Sig2 * m
Var[i] <- equals(NumInfo[i], 0) * 10 + (1 - equals(NumInfo[i], 0)) * NumInfo[i]/Sig2 * s2 * (NumInfo[i]/Sig2 + 1/s2)
Prec[i] <- pow(Var[i], -1)
W[i] ~ dnorm(Media[i], Prec[i])
PrAd1[i] <- 1 - step(-m/s2 - 1/c * 1/s2 * log(1 - k * Price) + 1/2 * c)
PrAd2[i] <- 1 - step(-W[i] - m/s2 - 1/c * 1/s2 * log(1 - k * Price) + 1/2 * c - 1/c * log(1 - k * Price))
PrAd[i] <- equals(NumInfo[i], 0) * PrAd1[i] + (1 - equals(NumInfo[i], 0)) * PrAd2[i]
Adop[i] ~ dbern(PrAd[i])
}
}"
# Save the Bayesian model in your computer with an extension '.bug'.
# Suppose that you saved the .bug file in: "C:/Users/Default/Bayesian_Model.bug".
writeLines(Bayesian_Model, "C:/Users/Default/Bayesian_Model.bug")
# Here I would like to use jags command from R-package called R2jags.
# I would like to generate 1000 iterations.
MCMC_Bayesian_Model <- R2jags::jags(
model.file = "C:/Users/Default/Bayesian_Model.bug",
data = Data,
n.chains = 1,
n.iter = 1000,
parameters.to.save = c("mu.m", "tau2.m", "r.s", "lambda.s", "k", "c", "Sig2")
)
When running the code, R produced the error message: "Node inconsistent with parents". I do not know what the mistakes are. I was wondering if you could help me with this problem, please. If you need more information, please let me know. Thank you very much.
It's a little hard to figure out the model without knowing what you're trying to do, but I suggest two fixes:
Instead of k ~ dunif(1, 1/Price), did you mean k ~ dunif(0, 1/Price)? For dunif(a, b), you must have a < b (see page 48 here: http://people.stat.sc.edu/hansont/stat740/jags_user_manual.pdf).
I inserted an additional line in the model,
PrAd01[i] <- max(min(PrAd[i], 0.99), 0.01)
and changed the last line to
Adop[i] ~ dbern(PrAd01[i])
Page 49 of the manual above states that 0 < p < 1 for dbern(p).
The model runs with the above two changes.

plot a very large data with many zeros

This is a small portion of a vey big data
df<- structure(list(A = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0.68906, 0, 0, 0, 0, 0, 0, 0, 0, 0.13597, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), B = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0.40001, 0, 0, 0, 0, 0.69718, 0, 0, 0, 0, 0, 0, 0,
0, 0.090752, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), C = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0.84068, 0, 0, 0, 0.34713, 0, 0, 0, 0, 0.65201,
0, 0, 0.25725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), D = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.86419, 0, 0, 0, 0.3845,
0, 0, 0, 0, 0.67091, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), E = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.1083, 0.8324,
0, 0, 0, 0.38499, 0, 0, 0, 0, 0.69064, 0, 0, 0.14596, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), F = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 1.0954, 0.74426, 0, 0, 0, 0.37715, 0, 0, 0, 0, 0.68884,
0, 0, 0.20826, 0, 0.38782, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), G = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0985, 0.66651, 0, 0,
0, 0, 0, 0, 0, 0, 0.68861, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.1812,
0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("A", "B", "C", "D", "E",
"F", "G"), class = "data.frame", row.names = c(NA, -39L))
What I want is to show the values in a more stressed way when there are a lot of zeros in a data
How I plot it is like this
eucl_dist=dist(df,method = 'euclidean')
hie_clust=hclust(eucl_dist,method = 'complete')
my_palette <- colorRampPalette(c( "green", "yellow", "red"))(n = 1000)
heatmap.2(mydata, scale = c("none"), Colv=F, Rowv=as.dendrogram(hie_clust),
xlab = "X", ylab = "Y", key=TRUE, keysize=1.1, trace="none",
density.info=c("none"), margins=c(4, 4), col=my_palette, dendrogram="row")
But as you see, in this small example, the zero dominate my plot and when it is very large then it is impossible to see anything. also I cannot change the position of the values
You are asking a lot of questions here, I'll try to answer those I see.
Zero dominates plot
Zeros dominate you data but, what do the zeros mean? Without some insight into what the zeros actually mean its hard to prescribe one best way to deal with it.
Colormap
The colorful colormap that you chose is not the best way to describe quantitative data. I would suggest a simple white to blue (or color of your choice) so that your zeros are shown as white and get hidden with the nonzero data emphasized. Example (only changing my_palette <- colorRampPalette(c("white", "cornflowerblue"))(n = 1000)):
Changing the position of the values
I'm not certain what you mean here but the layout is fixed by the dendrogram you defined.

Resources