How to gather dataframe to make columns into a single column - r

I have this data:
structure(list(id = 1:6, arthritis = c(1L, 0L, 0L, 0L, 0L, 1L
), asthma = c(0L, 0L, 0L, 0L, 0L, 0L), cancer = c(0L, 0L, 0L,
0L, 0L, 0L), cerebvascdz = c(0L, 0L, 0L, 0L, 0L, 0L), chf = c(0L,
0L, 0L, 0L, 0L, 0L), crf = c(0L, 0L, 0L, 0L, 0L, 0L), copd = c(0L,
0L, 0L, 0L, 0L, 0L), depression = c(0L, 0L, 0L, 1L, 1L, 1L),
diabetes = c(0L, 0L, 0L, 0L, 0L, 0L), hyperlipid = c(1L,
0L, 1L, 0L, 1L, 0L), htn = c(1L, 0L, 1L, 1L, 0L, 1L), ihd = c(1L,
0L, 0L, 0L, 0L, 0L), obesity = c(0L, 0L, 0L, 0L, 0L, 0L),
osteoporosis = c(0L, 0L, 0L, 0L, 0L, 1L)), row.names = c(NA,
6L), class = "data.frame")
which contains an id for a patient. all the rest of the columns are comorbidities that the patient might have, designated as a boolean.
I'm trying to use the gather method to flip the table around, as shown like this. Every comorbidity that the patient has is supposed to be populated on the right with the patient id on the left.
I'm pretty sure I'm supposed to be using the gather function, but I can't seem to get this working. Does anyone have any insight into what I should be doing to have the frame switch to the required format?

Here is the proper code
pmh %>%
gather(diagnosis,num, arthritis:osteoporosis) %>%
arrange(id) %>%
filter(num == 1)
I'm new to R and I think I had misunderstood the first argument that gather takes.

Related

mlogit : using varying alternatives for mlogit in R

I am trying to use varying alternatives for each person. However not able to get it working. If I make the alternatives same for each person, it works fine. How to make it varying and work.
Data :
> dput( df1 )
structure(list(Choice = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L), A = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 1L, 0L, 0L, -1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), B = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 1L, 0L, 0L, -1L, 0L,
0L), C = c(1L, 0L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L,
1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L), D = c(0L, 1L, 0L, 0L,
0L, -1L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 1L, 0L, 0L, -1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), E = c(0L, 0L, 1L, 0L, 0L, 0L, -1L, 0L, 0L, 0L, 1L,
0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L), F = c(0L, 0L,
0L, 1L, 0L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, -1L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, -1L), Alternative = c(1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L)), row.names = c(NA, -38L), class = "data.frame")
Code :
model = mlogit( Choice ~ B + C + D + E + F | 0, data = df1,
alt.levels = unique( df1$Alternative ),
shape = "long")
Error
Error in dfidx::dfidx(data = data, dfa$idx, drop.index = dfa$drop.index, :
the data must be balanced in order to use the levels argument
You need to provide mlogit with an explicit ID variable denoting which participant made the choice. It can't infer them from the data.frame you've provided.
I'm assuming in your reproducible example that the alternatives in rows running sequentially from [1 - 4] or [1 - 3] represent the choice sets presented to a unique individual. If so, then you can fit a model like so:
library(mlogit)
# Explicitly create an ID variable
df1$ID <- rep(1:12, times = c(rep(4, 2), rep(3, 10)))
#Convert to dfidx data
dfx1 <- mlogit.data(df1,
shape = "long",
choice = "Choice",
id.var = "ID")
# Fit a model
m0 <- mlogit(Choice ~ B + C + D + E + F | 0,
data = dfx1)

dist() function in R returning "Error in dist(x, method = "binary") : unused argument (method = "binary")?

I am trying to create a distance matrix using dist on categorical variables. I performed dummy.data.frame successfully on my categorical variables, but when I run dist() R returns this message:
Error in dist(dummyClusterdf, method = "binary") : unused argument (method = "binary")"
Has anyone else had this problem? Any help would be appreciated.
#dummify categorical variables
dummyClusterdf <- dummy.data.frame(data = clusterdf, sep = ".", dummy.classes = "ALL", all = FALSE)
glimpse(dummyClusterdf)
#distance matrix
surveydist <- dist(dummyClusterdf, method = "binary")
Error in dist(dummyClusterdf, method = "binary") : unused argument (method = "binary")
here is a sample of the data:
> dput(head(dummyClusterdf))
structure(list(`Age_Range.23 - 25` = c(0L, 0L, 0L, 1L, 0L, 0L
), `Age_Range.26 - 30` = c(1L, 0L, 0L, 0L, 0L, 0L), `Age_Range.30 +` = c(0L,
1L, 1L, 0L, 1L, 1L), User_Role.Faculty = c(0L, 1L, 1L, 0L, 0L,
1L), User_Role.Staff = c(1L, 0L, 0L, 1L, 1L, 0L), User_Role.Administration = c(0L,
0L, 0L, 0L, 0L, 0L), `Sustainability_Importance.Extremely Unimportant` = c(0L,
0L, 0L, 0L, 0L, 0L), Sustainability_Importance.Neutral = c(0L,
0L, 0L, 0L, 0L, 0L), `Sustainability_Importance.Very important` = c(0L,
1L, 1L, 0L, 0L, 1L), `Sustainability_Importance.Extremely important` = c(1L,
0L, 0L, 1L, 1L, 0L), `Self_rate_Sustainable_Lifestyle.Somewhat disagree` = c(0L,
0L, 0L, 0L, 0L, 0L), `Self_rate_Sustainable_Lifestyle.Neither agree nor disagree` = c(0L,
1L, 0L, 0L, 0L, 0L), `Self_rate_Sustainable_Lifestyle.Somewhat agree` = c(1L,
0L, 1L, 1L, 1L, 1L), `Self_rate_Sustainable_Lifestyle.Strongly agree` = c(0L,
0L, 0L, 0L, 0L, 0L), `School_Prioritizes_Sustainability.A little` = c(0L,
0L, 0L, 0L, 0L, 0L), `School_Prioritizes_Sustainability.A moderate amount` = c(1L,
0L, 1L, 0L, 0L, 0L), `School_Prioritizes_Sustainability.A lot` = c(0L,
0L, 0L, 0L, 0L, 0L), `School_Prioritizes_Sustainability.A great deal` = c(0L,
1L, 0L, 1L, 1L, 1L), Lights_on_Empty_Room.Never = c(0L, 0L, 0L,
0L, 0L, 0L), Lights_on_Empty_Room.Sometimes = c(0L, 0L, 1L, 0L,
0L, 0L), `Lights_on_Empty_Room.About half the time` = c(0L, 0L,
0L, 0L, 0L, 0L), `Lights_on_Empty_Room.Most of the time` = c(1L,
0L, 0L, 0L, 0L, 1L), Lights_on_Empty_Room.Always = c(0L, 1L,
0L, 1L, 1L, 0L), Look_kWh_on_Bill.Yes = c(1L, 1L, 1L, 1L, 0L,
1L), Look_kWh_on_Bill.No = c(0L, 0L, 0L, 0L, 1L, 0L), `Real_time_Updates_Reduce_Use.Strongly disagree` = c(0L,
0L, 0L, 0L, 0L, 0L), `Real_time_Updates_Reduce_Use.Somewhat disagree` = c(0L,
0L, 0L, 0L, 0L, 0L), `Real_time_Updates_Reduce_Use.Neither agree nor disagree` = c(0L,
0L, 0L, 0L, 0L, 0L), `Real_time_Updates_Reduce_Use.Somewhat agree` = c(1L,
0L, 1L, 0L, 0L, 0L), `Real_time_Updates_Reduce_Use.Strongly agree` = c(0L,
1L, 0L, 1L, 1L, 1L), `Electric_Cost_Importance.Slightly important` = c(0L,
0L, 0L, 0L, 0L, 0L), `Electric_Cost_Importance.Moderately important` = c(0L,
0L, 1L, 0L, 0L, 0L), `Electric_Cost_Importance.Very important` = c(1L,
0L, 0L, 1L, 1L, 1L), `Electric_Cost_Importance.Extremely important` = c(0L,
1L, 0L, 0L, 0L, 0L), `Electric_Environment_Importance.Slightly important` = c(0L,
0L, 0L, 0L, 0L, 0L), `Electric_Environment_Importance.Moderately important` = c(1L,
0L, 1L, 0L, 0L, 0L), `Electric_Environment_Importance.Very important` = c(0L,
0L, 0L, 1L, 0L, 1L), `Electric_Environment_Importance.Extremely important` = c(0L,
1L, 0L, 0L, 1L, 0L), Work_Can_Change_Thermostat.Yes = c(0L, 0L,
0L, 0L, 0L, 0L), Work_Can_Change_Thermostat.No = c(1L, 1L, 1L,
1L, 1L, 1L), `Peacock_VS_Campus_Efficiency.Not efficient at all` = c(0L,
0L, 0L, 0L, 0L, 0L), `Peacock_VS_Campus_Efficiency.Slightly efficient` = c(0L,
0L, 1L, 0L, 0L, 0L), `Peacock_VS_Campus_Efficiency.Moderately efficient` = c(1L,
1L, 0L, 1L, 1L, 0L), `Peacock_VS_Campus_Efficiency.Very efficient` = c(0L,
0L, 0L, 0L, 0L, 1L), `Peacock_VS_Campus_Efficiency.Extremely efficient` = c(0L,
0L, 0L, 0L, 0L, 0L), `Display_Useful.Not at all useful` = c(0L,
0L, 1L, 0L, 0L, 0L), `Display_Useful.Slightly useful` = c(1L,
0L, 0L, 0L, 0L, 0L), `Display_Useful.Moderately useful` = c(0L,
0L, 0L, 1L, 0L, 1L), `Display_Useful.Very useful` = c(0L, 1L,
0L, 0L, 1L, 0L), `Display_Useful.Extremely useful` = c(0L, 0L,
0L, 0L, 0L, 0L), `Display_Impact.None at all` = c(0L, 0L, 1L,
0L, 0L, 0L), `Display_Impact.A little` = c(1L, 0L, 0L, 0L, 0L,
1L), `Display_Impact.A moderate amount` = c(0L, 1L, 0L, 1L, 1L,
0L), `Display_Impact.A lot` = c(0L, 0L, 0L, 0L, 0L, 0L), Gender.Male = c(0L,
1L, 0L, 0L, 0L, 1L), Gender.Female = c(1L, 0L, 1L, 1L, 1L, 0L
), `Gender.Prefer not to answer` = c(0L, 0L, 0L, 0L, 0L, 0L),
`Race/Ethnicity.I would prefer not to answer` = c(0L, 0L,
0L, 0L, 0L, 0L), `Race/Ethnicity.Other` = c(1L, 0L, 0L, 0L,
0L, 0L), `Race/Ethnicity.Asian/Asian American` = c(0L, 1L,
0L, 0L, 0L, 0L), `Race/Ethnicity.Hispanic or Latino` = c(0L,
0L, 0L, 0L, 0L, 0L), `Race/Ethnicity.White or Caucasian` = c(0L,
0L, 1L, 1L, 1L, 1L), `Prize_Participation.Yes please!` = c(1L,
1L, 1L, 1L, 1L, 1L), `Prize_Participation.No thanks, I'd like to be finished` = c(0L,
0L, 0L, 0L, 0L, 0L)), dummies = list(Age_Range = 1:3, User_Role = 4:6,
Sustainability_Importance = 7:10, Self_rate_Sustainable_Lifestyle = 11:14,
School_Prioritizes_Sustainability = 15:18, Lights_on_Empty_Room = 19:23,
Look_kWh_on_Bill = 24:25, Real_time_Updates_Reduce_Use = 26:30,
Electric_Cost_Importance = 31:34, Electric_Environment_Importance = 35:38,
Work_Can_Change_Thermostat = 39:40, Peacock_VS_Campus_Efficiency = 41:45,
Display_Useful = 46:50, Display_Impact = 51:54, Gender = 55:57,
`Race/Ethnicity` = 58:62, Prize_Participation = 63:64), row.names = c("1",
"2", "3", "4", "5", "6"), class = "data.frame")
Mankind_008 answered the question! (Solved by using stats::dist)

Export ftable factors to html

I have a table created from ftable()
structure(c(1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L), .Dim = c(12L, 7L), class = "ftable", row.vars = list(
ï..petal_size = c("large ", "small", "small "), stem_length = c("long",
"long ", "short", "short ")), col.vars = list(flow_color = c("blue",
"green", "indigo ", "orange", "red ", "violet", "yellow")))
I would like to export it using htmlTable, but when I use htmlTableon this i get this result with no factors and just numbers like in the picture here
How do I recover the factor names for the htmltable? Please note the final output should have the same number of rows and columns as the picture's output, but it needs to have the factor names on the rows and columns.
I will convert it first to data.frame and the add the necessary tweaks to obtain the desired output:
tableToHtml <-structure(c(1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L), .Dim = c(12L, 7L), class = "ftable", row.vars = list(
ï..petal_size = c("large ", "small", "small "), stem_length = c("long",
"long ", "short", "short ")), col.vars = list(flow_color = c("blue",
"green", "indigo ", "orange", "red ", "violet", "yellow")))
library(htmlTable)
htmlTable(as.data.frame(tableToHtml),rnames=F, header=rep("", length(colnames(as.data.frame(tableToHtml)))))

Why does metaMDS() produce a horizontal distribution of our data?

We have a species presence table (so binary: 1=present, 0=absent). When using metaMDS of the vegan package, it produces a horizontal distribution of our data when plotted, instead of clusters.
We tried using different distance methods (Euclidean, Bray, Jaccard), but they all seem to produce the same plot.
myfungi.all looks like this:
structure(list(Sample = 1:12, Habitat = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Dune", "Forest"
), class = "factor"), OTU88 = c(0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L), OTU28 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), OTU165 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU178 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L), OTU97 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L
), OTU39 = c(0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L),
OTU104 = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L
), OTU95 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L), OTU90 = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU119 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU451 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L), OTU98 = c(1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU45 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L), OTU2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L), OTU24 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), OTU169 = c(0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU29 = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU85 = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L), OTU140 = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L), OTU42 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L), OTU70 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L), OTU25 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU34 = c(1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L), OTU181 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU201 = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU17 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), OTU1146 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L), OTU14 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L,
1L, 1L), OTU72 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L), OTU13 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 1L), OTU20 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L), OTU63 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU170 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU262 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU48 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU6 = c(0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L), OTU3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L), OTU31 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU73 = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 0L), OTU32 = c(0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), OTU37 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU196 = c(0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), OTU5 = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), OTU11 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 1L), OTU16 = c(0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), OTU41 = c(0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), OTU71 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), OTU109 = c(0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), OTU233 = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L)), class = "data.frame", row.names = c(NA, -12L))
Our script looks like this:
myfungi.all = read.csv("soil_fungi.csv",header=T)
myfungi = myfungi.all[,c(3:51)]
myfungi.nmds.bc <- metaMDS(myfungi, distance = "bray", k = 2, binary = TRUE)
plot(myfungi.nmds.bc, type="t", main=paste("NMDS/Bray-Curtis -?? Stress =", round(myfungi.nmds.bc$stress,10)))
Does anyone have suggestions as what seems to be the problem?
At the moment our plot looks like this:
The solution you reported gives a perfect fit (stress nearly 0), and also gives a warning because of this dubious stress. The solution effectively puts your sampling units into two points so that you have absolutely dichotomous data. As Ben Bolker demonstrated, Principal Coordinates Analysis, PCoA (which you also can perform with stats::cmdscale, vegan::wcmdscale or vegan::dbrda) still has points in two major cluster, but spreads points within these clusters. PCoA is a linear method, but NMDS is non-linear and therefore often needs more data. It seems that in this case the weak ties (read the documentation ?monoMDS or Kruskal's papers cited in that documentation) is the stage that puts most demand on the data, and setting weakties = FALSE will prevent collapsing non-identical observations into two points:
m3 <- metaMDS(myfungi, weakties = FALSE)
m3 # stress 0.04124
stressplot(m3) # compare this to your result stressplot(myfungi.nmds.bc)
plot(m3)
The default monoMDS with weakties = TRUE (like Kruskal recommended) will consider the dichotomy of two groups as the only important non-linear difference, but with weakties = FALSE the solutions cannot proceed to zero stress. You still have a dichotomy, but with scatter.
Best guess is that you simply don't have enough data to distinguish two separate environmental axes: when I run your code I get
Warning message: In metaMDS(myfungi[, -(1:2)], distance = "bray", k = 2, binary = TRUE) : stress is (nearly) zero: you may have insufficient data
Out of your 53 species, only 35 are informative (the others appear either at none or at all of the sites):
m2 <- myfungi[,apply(myfungi,2,var)>0]
ncol(m2) ## 35
vv <- function(x) (image(Matrix(as.matrix(x))))
How many distinct distribution patterns are there?
nrow(unique(t(m2))) ## 27
You could try PCoA instead:
library(ape)
biplot(pcoa(vegdist(m2,"bray"))
As Jari Oksanen points out, you could also do this with cmdscale() in base R:
plot(cmdscale(vegdist(mm,"bray")),
col=as.numeric(myfungi$Habitat))

Barplots with percentages of different variables

Given following example:
structure(list(jdgcbrbR = c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L),
ctprpwrR = c(0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L), hrshsntaR = c(0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L), dbctvrdR = c(0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L), lwstrobR = c(0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L), rgbrklwR = c(0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), ctinpltR = c(0L,
0L, 1L, 1L, 0L, 0L, 0L, 0L), stcbg2tR = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), tmprsR = c(NA, NA, NA, 0L, 0L, NA, NA, NA
), caplcstR = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L), widprsnR = c(0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L), wevdctR = c(0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L)), .Names = c("jdgcbrbR", "ctprpwrR", "hrshsntaR",
"dbctvrdR", "lwstrobR", "rgbrklwR", "ctinpltR", "stcbg2tR", "tmprsR",
"caplcstR", "widprsnR", "wevdctR"), row.names = 747:754, class = "data.frame")
How could I create a set of barplots, graphing the percentage of 1-values in each variable, thus giving a nice overview of the evolution of the percentage of 1's.
So far I tried creating the percentages (which failed because nrow is perceived as NULL):
pct_jdgcbrbR <- (sum(jdgcbrbR) / nrow(jdgcbrbR) * 100)
pct_jdgcbrbR
And I found the barplot function which could be usefull:
barplot(percentages, main="INR",
xlab="varnames")
The result should look something like this example I made in Excel:
The following works for me, assuming your data.frame is called "temp":
barplot((colSums(temp, na.rm=TRUE)/nrow(temp))*100)

Resources