Related
I have a dataframe with papers and the parameters they reported one-hot encoded:
.
structure(list(CitationKey = c("White2021", "Tunabot2019", "Fujiwara2017",
"Dewar1994a", "Dewar1994b", "Blank2007_BF", "Blank2007_YF", "Katzschmann2018",
"Leftwich2012", "Gibouin2018", "Donley2000_KT", "Donley2000_CM",
"Sepulveda2000_KT", "Sepulveda2000_CM", "Sepulveda2000_YF", "Salumae2013",
"Jezov2012", "Anderson2000", "ShaoXu2019", "Rufo2013", "LauderFlammangAlben2012",
"Low2010", "Chong2009", "Niu2014", "LiuHu2005", "LiuHu2010",
"Tan2006", "Zhang2010", "Zhong2018a", "Zhong2018b", "ZhongDu2016",
"Xie2020", "Struebig2020", "Bainbridge1958_CD", "Bainbridge1958_T",
"Bainbridge1958_G", "Videler1991_PV", "Videler1991_TS", "Videler1991_AF",
"Videler1991_SScom", "Videler1991_GM", "Videler1991_TT", "Videler1991_OM"
), COM = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), Duty_Cycle = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Re ` = c(1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Flow_Speed = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), Drag_Force = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), Drag_Coefficient = c(1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Head_Amplitude = c(1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), Tail_Amplitude = c(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0), Frequency = c(1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), COT = c(1, 1, 1,
1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Stride_Length = c(1,
1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
), Swimming_Speed = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), Strouhal_Number = c(1, 1, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), WPM = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), Thrust = c(0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), Energy_Consumption = c(0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Acceleration = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), Wavelength = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), Turning_Speed = c(0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Turning_Radius = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), Lateral_Force = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), Power = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Efficiency = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Turning_Diameter = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Yaw Rate` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), Voltage = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Turning_Angle = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), Froude_Efficiency = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Speed_Efficiency = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0)), row.names = c(NA, -43L), class = c("tbl_df",
"tbl", "data.frame"))
Using this thread, I was able to make a correlation matrix
reduced_df[,2:ncol(reduced_df)] %>%
cor(use="pairwise.complete.obs", method='spearman') %>%
ggcorrplot(show.diag = F,
type="lower",
lab=TRUE,
lab_size=2)
However, COM and Duty_Cycle, for example have a value of 1.0 because they are both present in the same paper once, and then neither are reported in any other papers - so they correlate exactly. Frequency and COT, however, are co-reported in 8 papers and then vary, so they don't correlate well. I'd like to alter the correlation matrix to show the frequency of both parameters being co-reported in the same papers. COT and Frequency should be 8x higher in value than COM and Duty Cycle.
However, I don't know how to do this. I'd greatly appreciate any advice or ideas.
I am using James Stegen et al's code here to calculate an abundance-weighted raup-crick value for my 16S dataset.
I load in my phyloseq object then extract the otu_table. I then use the otu_table as the spXsite argument in the function raup_crick_abundance(). My otu_table is available as a dput() below the text.
physeq<-qza_to_phyloseq(
features="~/Documents/qiime2-analyses/CRD/fresh_run/table.qza",
tree="~/Documents/qiime2-analyses/CRD/fresh_run/rooted-tree.qza",
"~/Documents/qiime2-analyses/CRD/fresh_run/taxonomy.qza",
metadata = "crd-metadata.txt")
otu_table <- (otu_table(physeq))
raup_crick_abundance = function(spXsite=otu_table, plot_names_in_col1=TRUE,
reps=9999, set_all_species_equal=FALSE,
as.distance.matrix=TRUE, report_similarity=FALSE){
Where the remaining code is verbatim from the github link above.
I am having a hard time understanding the error I have been receiving:
Error in sample.int(x, size, replace, prob) :
incorrect number of probabilities
Called from: sample.int(x, size, replace, prob)
Browse[1]> traceback()
1: raup_crick_abundance()
I had thought perhaps the function was looking for an equivalent number of columns and rows, but that was not the case. I searched the function sample() and think it's hoping to select values from my argument otu_table but can't find the expected range?
The sample() causing problems is on line 48 of the github where I believe the function is weighing the otu occurrences and entering the number of occurrences into a column.
##two empty null communities of size gamma:
com1<-rep(0,gamma)
com2<-rep(0,gamma)
##add observed number of species to com1, weighting by species occurrence frequencies:
com1[sample(1:gamma, sum(spXsite.inc[null.one,]), replace=FALSE, prob=occur)]<-1
com1.samp.sp = sample(which(com1>0),(sum(spXsite[null.one,])-sum(com1)),replace=TRUE,prob=abundance[which(com1>0)]);
com1.samp.sp = cbind(com1.samp.sp,1); # head(com1.samp.sp);
com1.sp.counts = as.data.frame(tapply(com1.samp.sp[,2],com1.samp.sp[,1],FUN=sum)); colnames(com1.sp.counts) = 'counts'; # head(com1.sp.counts);
com1.sp.counts$sp = as.numeric(rownames(com1.sp.counts)); # head(com1.sp.counts);
com1[com1.sp.counts$sp] = com1[com1.sp.counts$sp] + com1.sp.counts$counts; # com1;
#sum(com1) - sum(spXsite[null.one,]); ## this should be zero if everything work properly
rm('com1.samp.sp','com1.sp.counts');
Any thoughts are greatly appreciated. Thank you in advance!
> dput(otu_table)
new("otu_table", .Data = structure(c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 4), .Dim = c(6L, 48L), .Dimnames = list(
c("e54924083c02bd088c69537d02406eb8", "3112899fc7a2adb7f74a081a82c7cde4",
"db5d745b02355b6fed513c4953b62326", "2faf2046aa9ab2f6598df79cd52e9c7b",
"bec8db81cc8ec453136c82ede8327a9f", "601e47b8adcbd21d159f74421710e1b5"
), c("sample-10", "sample-11", "sample-12", "sample-14",
"sample-16", "sample-18", "sample-19", "sample-20", "sample-21",
"sample-22", "sample-23", "sample-24", "sample-25", "sample-26",
"sample-27", "sample-28", "sample-29", "sample-30", "sample-31",
"sample-32", "sample-33", "sample-34", "sample-35", "sample-36",
"sample-37", "sample-40", "sample-41", "sample-43", "sample-44",
"sample-45", "sample-46", "sample-50", "sample-55", "sample-56",
"sample-57", "sample-58", "sample-59", "sample-61", "sample-62",
"sample-63", "sample-64", "sample-65", "sample-67", "sample-68",
"sample-69", "sample-70", "sample-71", "sample-8"))), taxa_are_rows = TRUE)
>
I am trying to count how many enterprise zones were designated in two specific years out of 8 total years. I first isolated the years with the following code:
ez <- ezunem %>% select(3,7,8)
Then tried to count the enterprise zones that were in the select columns:
ez %>% count(ez, sort=TRUE)
This didnt work. What is the best way to count dummy variables = 1 out of two columns in R.
Edit: here is the output with dput(ez) command:
structure(list(ez = structure(c(0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"),
d84 = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), format.stata = "%9.0g"),
d85 = structure(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), format.stata = "%9.0g")), row.names = c(NA,
-198L), class = c("tbl_df", "tbl", "data.frame"))
So, basically I want my output to count the 1's for d84 and d85.
Something like this?
library(dplyr)
ez %>%
group_by(ez) %>%
summarise(across(c(d84, d85), sum, .names="count_{.col}"))
ez count_d84 count_d85
<dbl> <dbl> <dbl>
1 0 16 12
2 1 6 10
My goal is to create a distance-decay curve for species data vs geographic distance. However, I am running into errors. For the betapart package, this may be due to the lack of columns relative to the number of rows. Is there a way to get past this? If not, is there another method for creating a distance-decay curve (and plotting it)? I also tried the ddecay package but ran into errors there too. Any help is much appreciated. Data is in structure form below.
# BETAPART -------------------------------------------------
library(betapart)
spat.dist<-dist(coords)
dissim.BCI<-beta.pair.abund(spec)$beta.bray.bal
plot(spat.dist, dissim.BCI, ylim=c(0,1), xlim=c(0, max(spat.dist)))
BCI.decay.exp<-decay.model(dissim.BCI, spat.dist, y.type="dissim", model.type="exp", perm=100)
#========================================================================================================
I also tried a few other packages --------------------------
# ddecay package -------------------------------------------
devtools::install_github("chihlinwei/ddecay")
the issue with this method is that it requires the use of a gradient however, I would like to avoid that if possible but I do not see a way around this. Also they do not include their example data in the package.
dd <- beta.decay(gradient=spat.dist, counts=decostand(spec, method="pa"),
coords=coords, nboots=1000,
dis.fun = "beta.pair", index.family = "sorensen", dis = 1, like.pairs=T)
x <- vegdist(coords, method = "euclidean")
y <- 1 - dist(decostand(spec, method="pa"), index.family = "sorensen")[[1]]
plot(x, y)
lines(dd$Predictions[, "x"], dd$Predictions[,"mean"], col="red", lwd=2)
#========================================================================================================
# DATA -----------------------------------------------------
spec <- structure(list(Ccol = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Acol = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), NYcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), Mcol = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), AAcol = c(14, 0, 14, 3, 11, 1, 0, 2, 0,
3, 0, 4, 0, 1, 8, 2, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 7),
Ncol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1), ATBcol = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 3), CVcol = c(0, 0, 0, 0, 0, 0, 1, 20,
0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 2, 0, 0,
0, 6), AZNcol = c(0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), GBcol = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), KHAcol = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0), AFcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0), AFPcol = c(0,
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1), TIAcol = c(4, 1, 0, 2, 6, 0,
1, 1, 0, 2, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0), AUcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), AScol = c(0,
4, 0, 2, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 5, 0, 0), NSAcol = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 7, 0, 0, 3, 0, 0, 0, 4, 0, 2, 0, 1, 0, 9, 5, 1,
0, 0, 2, 0), WZcol = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 10, 4,
0, 0, 0, 0, 0, 0, 1, 5, 0, 0, 0, 17, 4, 0, 0, 0, 0, 0), AJcol = c(0,
3, 6, 0, 0, 1, 0, 4, 0, 0, 0, 0, 39, 12, 0, 0, 0, 0, 0, 0,
0, 4, 5, 1, 12, 13, 16, 0, 5), EADcol = c(4, 1, 2, 1, 2,
0, 0, 0, 0, 4, 0, 2, 1, 1, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0,
0, 0, 0, 0, 1), CAcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), Pcol = c(0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 60, 0, 0,
13, 0, 8, 1, 0, 0, 0, 0, 0), ASDcol = c(3, 5, 6, 17, 3, 5,
26, 2, 0, 17, 3, 10, 6, 3, 2, 4, 0, 0, 5, 25, 0, 0, 0, 2,
2, 9, 0, 2, 8), RMAcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
OUcol = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), KAcol = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12,
0, 0, 0, 0, 0, 8, 1), PACcol = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 2, 0, 37, 0, 24,
1, 0, 0), LAAcol = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), GAcol = c(1,
0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0,
0, 0, 3, 0, 0, 0, 2, 0, 0), AAcol = c(1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0), EVAcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0), EAcol = c(0,
0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), AKcol = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0), Acol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0), QAcol = c(0,
0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), YAcol = c(11, 24, 21, 63, 44,
95, 12, 43, 0, 5, 26, 22, 25, 48, 86, 2, 0, 0, 13, 0, 0,
2, 0, 0, 60, 6, 7, 0, 45), BANcol = c(0, 0, 0, 3, 0, 0, 0,
0, 0, 0, 0, 0, 24, 0, 6, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
9, 17, 17), VCcol = c(0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Vcol = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0), Ocol = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), AVcol = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), JXcol = c(0,
3, 3, 0, 0, 0, 0, 0, 8, 0, 0, 10, 3, 0, 0, 5, 0, 0, 0, 1,
0, 0, 0, 2, 4, 1, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-29L))
coords <- structure(list(Lat.x = c(34.43363, 34.36784, 34.32587, 34.19891,
34.24217, 34.24863, 34.18137, 34.16838, 34.10961, 34.08329, 34.40571,
34.39591, 34.39292, 34.37466, 34.28948, 34.26146, 34.04687, 34.0409,
34.068339, 34.34679, 34.17161, 34.23308, 34.21544, 34.14922,
34.27539, 34.2323, 34.19057, 34.07042, 34.06289), Lon.x = c(-94.94494,
-94.92512, -94.94429, -94.84497, -94.8573, -94.85641, -94.887,
-94.91322, -94.92913, -94.93276, -95.02622, -95.04382, -94.96295,
-94.83733, -94.81071, -94.79161, -95.03968, -95.0608, -95.086986,
-95.03345, -95.23862, -95.25619, -95.1041, -95.02286, -95.02672,
-95.02626, -95.02941, -95.01746, -94.98786)), class = "data.frame", row.names = c(NA,
-29L))
You can get more answers, if you tell what was the problem. For instance, which functions failed and what was the error message. I had a look at betapart::decay.model(), where I could get this error message:
Error in eval(family$initialize) :
cannot find valid starting values: please specify some
I cut the long story short: you cannot use this function with your data because you have dissimilarities of 1 in your data, dissimilarities are turned into similarities with 1-dissimilarity and this makes these values zero similarities (that is, these pairs of sampling unit have nothing in common, they share no species). Function decay.model uses glm with gaussian family with log-link, and log-link requires that you give the starting values, if you have zeros in the y-variate.
I think that you have four alternatives:
You do not use the method as it does not suit your data.
You modify the decay.model function so that you can specify the starting values, like the error message suggested. This means that you add mustart to the function call so that it reads, e.g., glm(y ~ x, family=gaussian(link="log"), mustart=pmax(y, 0.01)). This replaces zeros with 0.01 as starting values.
You change maximum distances from 1 to something smaller, for instance, 0.99: dissim.BCI[dissim.BCI==1] <- 0.99. However, this changes the data, and also changes the results from those you get with alternative 2 (which only changes starting values, but data are unmodified). However, the effect is not very large and any Bayesian would claim that dissimilarity 1 is just a frequentist folly (you just haven't seen the case that is in common with these sampling units).
You change the maximum distance to missing values. This will change data more than alternative 3. It removes maximum dissimilarities and these no longer influence the decay curve. The effect is the same as censoring greatest dissimilarities. The results change more than in alternative 3.
I am working with the R-package R2jags. After running the code I attach below, R produced the error message: "Node inconsistent with parents".
I tried to solve it. However, the error message persists. The variables I am using are:
i) "Adop": a 0-1 dummy variable.
ii) "NumInfo": a counter variable whose range is {0, 1, 2,...}.
iii) "Price": 5
iv) "NRows": 326.
install.packages("R2jags")
library(R2jags)
# Data you need to run the model.
# Adop: a 0-1 dummy variable.
Adop <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
# NumInfo: a counter variable.
NumInfo <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1)
# NRows: length of both 'NumInfo' and 'Adop'.
NRows <- length(NumInfo)
# Price: 5
Price <- 5
Data <- list("NRows" = NRows, "Adop" = Adop, "NumInfo" = NumInfo, "Price" = Price)
# The Bayesian model. The parameters I would like to infer are: 'mu.m', 'tau2.m', 'r.s', 'lambda.s', 'k', 'c', and 'Sig2'.
# I would like to obtain samples from the posterior distribution of the vector of parameters.
Bayesian_Model <- "model {
mu.m ~ dnorm(0, 1)
tau2.m ~ dgamma(1, 1)
r.s ~ dgamma(1, 1)
lambda.s ~ dgamma(1, 1)
k ~ dunif(1, 1/Price)
c ~ dgamma(1, 1)
Sig2 ~ dgamma(1, 1)
precision.m <- 1/tau2.m
m ~ dnorm(mu.m, precision.m)
s2 ~ dgamma(r.s, lambda.s)
for(i in 1:NRows){
Media[i] <- NumInfo[i]/Sig2 * m
Var[i] <- equals(NumInfo[i], 0) * 10 + (1 - equals(NumInfo[i], 0)) * NumInfo[i]/Sig2 * s2 * (NumInfo[i]/Sig2 + 1/s2)
Prec[i] <- pow(Var[i], -1)
W[i] ~ dnorm(Media[i], Prec[i])
PrAd1[i] <- 1 - step(-m/s2 - 1/c * 1/s2 * log(1 - k * Price) + 1/2 * c)
PrAd2[i] <- 1 - step(-W[i] - m/s2 - 1/c * 1/s2 * log(1 - k * Price) + 1/2 * c - 1/c * log(1 - k * Price))
PrAd[i] <- equals(NumInfo[i], 0) * PrAd1[i] + (1 - equals(NumInfo[i], 0)) * PrAd2[i]
Adop[i] ~ dbern(PrAd[i])
}
}"
# Save the Bayesian model in your computer with an extension '.bug'.
# Suppose that you saved the .bug file in: "C:/Users/Default/Bayesian_Model.bug".
writeLines(Bayesian_Model, "C:/Users/Default/Bayesian_Model.bug")
# Here I would like to use jags command from R-package called R2jags.
# I would like to generate 1000 iterations.
MCMC_Bayesian_Model <- R2jags::jags(
model.file = "C:/Users/Default/Bayesian_Model.bug",
data = Data,
n.chains = 1,
n.iter = 1000,
parameters.to.save = c("mu.m", "tau2.m", "r.s", "lambda.s", "k", "c", "Sig2")
)
When running the code, R produced the error message: "Node inconsistent with parents". I do not know what the mistakes are. I was wondering if you could help me with this problem, please. If you need more information, please let me know. Thank you very much.
It's a little hard to figure out the model without knowing what you're trying to do, but I suggest two fixes:
Instead of k ~ dunif(1, 1/Price), did you mean k ~ dunif(0, 1/Price)? For dunif(a, b), you must have a < b (see page 48 here: http://people.stat.sc.edu/hansont/stat740/jags_user_manual.pdf).
I inserted an additional line in the model,
PrAd01[i] <- max(min(PrAd[i], 0.99), 0.01)
and changed the last line to
Adop[i] ~ dbern(PrAd01[i])
Page 49 of the manual above states that 0 < p < 1 for dbern(p).
The model runs with the above two changes.