I am working with ggstatsplot to get visual representations of my statistical analyses.
I have numerous datasets, all very similar in make-up. Some work just fine, while others don't. data1 is a working example, and data2 doesn't work.
data1 <- structure(list(
treatment = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L),
.Label = c("negative_ctrl", "positive_ctrl", "treatmentA", "treatmentB", "treatmentC", "treatmentD"), class = "factor"),
value = c(1.74501, 2.04001, 1.89501, 1.84001,
1.89501, 9.75001, 8.50001, 8.80001, 11.50001, 10.25001, 7.90001,
9.25001, 11.45001, 7.75001, 7.75001, 7.55001, 8.70001, 8.20001,
6.95001, 6.60001, 7.40001, 7.15001, 8.25001, 9.20001, 8.95001,
6.45001, 6.05001, 5.40001, 7.95001, 6.80001, 4.65001, 6.40001,
6.40001, 6.70001, 5.40001, 3.20001, 2.70001, 4.30001, 4.10001,
3.60001, 4.00001, 3.00001, 4.70001, 3.10001, 3.50001, 6.45001,
5.45001, 4.90001, 7.25001, 4.55001, 4.70001, 6.25001, 5.65001,
6.00001, 5.10001)),
row.names = c(NA, -55L), class = c("tbl_df", "tbl", "data.frame"))
data2 <- structure(list(
treatment = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L),
.Label = c("negative_ctrl", "positive_ctrl", "treatmentA", "treatmentB", "treatmentC", "treatmentD"), class = "factor"),
value = c(1.00001, 1.00001, 1.00001, 1.00001, 1.00001, 6.77501,
5.68751, 5.99201, 8.24501, 7.01251, 4.79501, 5.99126, 8.26276,
5.35376, 5.38751, 4.60251, 5.38901, 4.85201, 4.44401, 5.20501,
6.20701, 5.77001, 4.05201, 3.65126, 3.02401, 4.68351, 3.90001,
2.56951, 3.70001, 3.61901, 3.96401, 2.93601, 1.53901, 1.40801,
2.05601, 2.08501, 1.89701, 1.79501, 1.50001, 2.09151, 1.53551,
1.57501, 3.88851, 3.09151, 2.75501, 4.40626, 2.42001, 2.60951,
3.83501, 3.37151, 3.70001, 2.92701)),
row.names = c(NA, -52L), class = c("tbl_df", "tbl", "data.frame"))
I call the most basic analysis for both datasets:
library(Rmpfr)
library(ggstatsplot)
ggstatsplot::ggbetweenstats(
data = data1,
x = treatment,
y = value,
messages = FALSE )
ggstatsplot::ggbetweenstats(
data = data2,
x = treatment,
y = value,
messages = FALSE )
For data1 I get this:
for data2 I get:
> Error in stats::optim(par = 1.1 * rep(lambda, 2), fn = function(x) { : non-finite value supplied by optim
At first I thought the issue might be a few zeros that I passed on in the negative control, but I first upped them by a tiny amount and then by 1 to make sure the range of the values is not an issue. The only discrepancy I can see is that I only have 7 instead of 10 measurements for treatmentA (level 3) in data2 but 10 in data1 (had to remove a few NAs due to sample failure). However, in both cases the negative control (level 1) only has 5 values, and I don't think that in this type of analysis there is an issue with different sample sizes between the groups.
It's a good idea to try basic plots out in these cases eg isolate the boxplots:
So comparing the two datasets:
boxplot(value ~ treatment, data=data1)
boxplot(value ~ treatment, data=data2)
data2 has a treatment with no variability ("negative_ctrl"), 0 SD. I'm guessing this function is doing some tests that require variation. You will need to read the documentation for the function to see if this is brought up but you can get views either by removing these treatments, or forcing a very small amount of variation eg
# run without negative_ctrl
ggstatsplot::ggbetweenstats(
data = data2[data2$treatment != "negative_ctrl",],
x = treatment,
y = value,
messages = FALSE )
# add some tiny fake variation to force it through (this is a hack)
data3 <- data2
data3[data3$treatment=="negative_ctrl",][1,][["value"]] <- 1.0001
ggstatsplot::ggbetweenstats(
data = data3,
x = treatment,
y = value,
messages = FALSE )
Related
I have a data set that looks like this:
structure(list(n = c(236896L, 73258L, 75570L, 5684L, 10242L,
2037L, 74194L, 41764L, 288115L, 6728L, 18964L, 5395L, 23192L,
12575L, 39591L, 12566L, 44458L, 126957L, 47316L, 152175L, 92913L,
81229L, 29622L, 1708L, 8526L, 52117L, 95385L, 22480L, 30521L,
51660L, 74320L, 273107L, 58L, 59686L, 77454L, 51471L, 66610L,
232321L, 53435L, 45270L), name = structure(c(9L, 9L, 6L, 5L,
2L, 5L, 6L, 9L, 6L, 4L, 4L, 4L, 4L, 2L, 9L, 2L, 6L, 1L, 4L, 6L,
4L, 9L, 2L, 5L, 3L, 4L, 6L, 2L, 9L, 2L, 4L, 4L, 7L, 9L, 1L, 6L,
6L, 6L, 8L, 2L), .Label = c("Ami", "Cho", "Fal", "For", "Ric",
"Sam", "Taw", "Tex", "Tol"), class = "factor"), change2 = c(0.0753607803884176,
-0.08058465598786, -0.00410425493512865, -0.0220964428266722,
0.0629320532004209, -0.0797306134519322, 0.0660481799732004,
-0.0572995403797303, -0.00713582946272, 0.00756646981276647,
0.032732914683994, -0.00632056690250293, 0.050358229187504, 0.0265162711945312,
0.0218803226963826, -0.0508818612242459, 0.00485925918649957,
0.0315158006542641, -0.0315622434590242, -0.0602515470219345,
-0.0409479919129347, 0.111224942380013, 0.00704490808823113,
0.0236731452544392, -0.0811686305416274, -0.0274692750452023,
0.00160881330548216, -0.0211269729894635, -0.0377625466699325,
-0.0311273993307701, -0.0118001904995042, 0.0023179680499073,
0.0263453251509878, 0.0767020512037913, -0.0113771665605732,
-0.0428469659333539, 0.0714746847470087, 0.10720066191237, 0.0153144105362596,
-0.109538998188302), Season = structure(c(2L, 1L, 4L, 3L, 3L,
1L, 4L, 3L, 2L, 4L, 4L, 4L, 1L, 4L, 3L, 1L, 3L, 4L, 1L, 1L, 3L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 3L, 4L, 1L, 2L, 3L, 2L, 2L, 3L, 4L,
2L, 3L, 2L), .Label = c("fall", "spring", "summer", "winter"), class = "factor"),
off = c(230915, 57957, 85583, 10526, 35316.6, 4851, 87287,
48226, 198700, 42050.6, 46252.8, 29974, 56566, 20959, 43175,
10385, 56997, 208126, 100672, 80516, 244507, 128730, 38470,
5177, 22435.6, 121202, 114234, 26140, 24693, 53812.6, 124281,
666114, 583, 76915, 140824.7, 91912, 78828, 219171, 95419,
33783.9)), row.names = c(NA, -40L), class = "data.frame")
I am running a GAM that looks like this:
gam1<-gam(n~Season+s(change2, by=Season, k=5)+
s(name, bs="re")+
offset(log(off)),
data=data,family=nb,method="REML")
With a random effect for the name variable and a fixed effect for the Season variable.
I am able to get all the outputs using the summary() command for this model, however when I try to display the partial effect plots with the gratia::draw() command, I get the following error:
library(gratia)
draw(gam1)
Error in eval(predvars, data, env) : object 'off' not found
In addition: Warning message:
In predict.gam(object, newdata = pred_data, type = "terms", terms = term, :
not all required variables have been supplied in newdata!
The partial effect plots show up with the plot.gam function, so I am wondering why this model will not work for the draw command? Also the model will run if I take the offset out, however this information is crucial to the analysis. Is there a reason why I can't run a fixed effect with an offset?
I can't reproduce this error with version 0.7.1 (on GitHub, but about to be submitted to CRAN) and I don't think anything of relevance to the reported problem changed between 0.7.0 (the current CRAN version) and this patched version.
With your data in tmp:
r$> gam1 <- gam(n ~ Season + s(change2, by = Season, k = 5)+
s(name, bs = "re") +
offset(log(off)),
data = tmp, family = nb, method = "REML")
r$> draw(gam1)
I get:
I am using R cld() function with emmeans, but the order of factor level in the output is different from what I set. Before calling cld(), the by.years output is also in the desired order (screenshot), but when I do cld(), the output is in the alphabetical order of Light - Moderate - No(screenshot). I also checked cld.years$Grazing.intensity, the levels are correct. Is there a way to specify the order of factor levels in the cld() output? Any help is appreciated.
# sample data
plants <- structure(list(Grazing.intensity = structure(c(3L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 2L, 2L, 1L, 2L, 3L, 3L, 3L), .Label = c("Light-grazing", "Moderate-grazing", "No-grazing"), class = "factor"), Grazing.intensity1 = structure(c(3L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 2L, 2L, 1L, 2L, 3L, 3L, 3L), .Label = c("LG", "MG", "NG"), class = "factor"), Years = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("Dry-year", "Wet-year"), class = "factor"), Month = structure(c(2L, 2L, 2L, 1L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 2L, 2L, 3L), .Label = c("Aug.", "Jul.", "Sept."), class = "factor"), Plots = c(1L, 3L, 8L, 6L, 9L, 7L, 2L, 2L, 10L, 10L, 7L, 7L, 9L, 4L, 2L), Species.richness = c(8L, 6L, 10L, 11L, 9L, 5L, 7L, 13L, 10L, 6L, 5L, 5L, 14L, 8L, 10L)), class = "data.frame", row.names = c(NA, -15L))
# set the order of factor levels
plants$Grazing.intensity <- factor(plants$Grazing.intensity, levels =
c('No-grazing','Light-grazing','Moderate-grazing'))
attach(plants)
lmer.mod <- lmer(Species.richness ~ Grazing.intensity*Years + (1|Month), data = plants)
by.years <- emmeans(lmer.mod, specs = ~ Grazing.intensity:Years, by = 'Years', type = "response")
# display cld
cld.years <- cld(by.years, Letters = letters)
This is my first time posting sample data in StackOverflow, so it may be wrong.. I used dput().
I solved the issue. The order changed because the levels are displayed in the increasing order of emmean. I set sort = FALSE, and the result was displayed in the default order. I should have read the documentations more thoroughly.
I've got a list of formula objects to fit Linear Quantile Mixed Models with lqmm::lqmm().
I cannot use summary() to return model coefficients with standard errors etc. from the produced models.
d <- structure(list(DID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), pain = c(4L, 2L, 6L, 3L, 3L,
4L, 3L, 3L, 4L, 5L, 4L, 4L, 5L, 3L, 4L, 3L, 2L, 6L, 5L, 7L, 6L,
3L, 5L, 1L, 5L, 3L, 4L, 4L, 6L, 5L, 5L, 6L, 5L, 6L, 5L, 6L, 6L,
5L, 6L, 7L, 4L, 5L, 6L, 6L, 5L, 6L, 4L, 5L, 6L, 7L), wound = c(4L,
3L, 3L, 3L, 4L, 5L, 4L, 3L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 3L,
4L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 5L, 3L, 8L, 7L, 7L, 7L, 7L,
9L, 8L, 8L, 8L, 6L, 7L, 6L, 8L, 7L, 6L, 8L, 7L, 6L, 7L, 8L, 7L,
7L), mobility = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 2L, 1L, 1L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 2L, 3L,
3L, 6L, 5L, 6L, 6L, 5L, 6L, 5L, 5L, 5L, 5L, 6L, 5L, 6L, 5L, 5L,
5L, 6L, 5L, 5L, 3L, 5L, 6L)), row.names = c(NA, 50L), class = "data.frame")
library(lqmm)
x <- as.formula("pain ~ wound + mobility")
m1 <- lqmm(x,
random = ~ 1,
group = DID,
data = d)
summary(m1)
Error: object of type 'symbol' is not subsettable
I tried using eval(x) as suggested here, but got a recursion error.
m2 <- lqmm(eval(x),
random = ~ 1,
group = DID,
data = d)
summary(m2)
Error: evaluation nested too deeply: infinite recursion / options(expressions=)?
Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)?
Error: no more error handlers available (recursive errors?); invoking 'abort' restart
Any ideas on how to extract model parameters?
Full sample data was taken from here.
Run this like below, It should work:
x <- as.formula('pain ~ wound + mobility')
m1 <- lqmm(x,
random = ~ 1,
group = DID,
data = d)
## Fixing the call fixed here.
m1$call$fixed <- x
summary(m1)
Output:
> m1$call$fixed <- x
> summary(m1)
Call: lqmm(fixed = pain ~ wound + mobility, random = ~1, group = DID,
data = d)
Quantile 0.5
Fixed effects:
Value Std. Error lower bound
(Intercept) 2.765900 1.294809 0.163883
wound 0.052025 0.077028 -0.102770
mobility 0.469649 0.127371 0.213687
upper bound Pr(>|t|)
(Intercept) 5.3679 0.0376887 *
wound 0.2068 0.5025982
mobility 0.7256 0.0005675 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
AIC:
[1] 166.1 (df = 5)
There is catch after little debug, I realised objects returned by below two approaches are not similar hence I have manipulated one of them like above:
m2 <- lqmm(pain ~ wound + mobility,
random = ~ 1,
group = DID,
data = d)
m1 <- lqmm(x,
random = ~ 1,
group = DID,
data = d)
If we closely observe m1$call and m2$call,(m1 is working well with summary) however, both are different objects and hence leading to the error which OP has encountered, I think its a bug but please let me know if there is any other explanation. Also while running all.equal(m1, m2) it tells me there is indeed a difference. So, after fiddling it with the given info, I have resetted the fixed element of list to original x (which is formula), which it seems to be working for now:
> all.equal(m1, m2)
[1] "Component “call”: target, current do not match when deparsed"
I would like to calculate column D based on the date column A. Column D should represent the number of observations grouped by column B.
Edit: fake data below
data <- structure(list(date = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 9L,
10L, 11L, 12L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("1/1/2015",
"1/2/2015", "1/3/2015", "1/4/2015", "1/5/2015", "1/6/2015", "5/10/2015",
"5/11/2015", "5/6/2015", "5/7/2015", "5/8/2015", "5/9/2015"), class = "factor"),
Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "B",
"C"), class = "factor"), Value = c(215630672L, 1650864L,
124017368L, 128073224L, 97393448L, 128832128L, 14533968L,
46202296L, 214383720L, 243346080L, 85127128L, 115676688L,
79694024L, 109398680L, 235562856L, 235473648L, 158246712L,
185424928L), Number.of.Observations.So.Far = c(1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L
)), class = "data.frame", row.names = c(NA, -18L))
What function in R will create a column D like so?
We can group by 'Country' and create sequence column with row_number()
library(dplyr)
df1 %>%
group_by(Country) %>%
mutate(NumberOfObs = row_number())
Or with base R
df1$NumberOfObs <- with(df1, ave(seq_along(Country), Country, FUN = seq_along))
Or with table
df1$NumberOfObs <- sequence(table(df1$Country))
Or in data.table
library(data.table)
setDT(df1)[, NumberOfObs := rowid(Country)][]
data
df1 <- read.csv('file.csv')
I have a table that has in the first column the starting node, the ending node, and the cost to move in that direction. It is one directional, you can't move backwards. These are all the combinations. Seems like I'm making an obvious mistake..
mygraph = structure(list(V1 = c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L,
5L, 5L, 6L, 7L, 8L, 9L), V2 = c(3L, 4L, 3L, 4L, 5L, 7L, 6L, 6L,
8L, 9L, 7L, 8L, 10L, 10L, 10L, 10L), V3 = c(3L, 2L, 4L, 2L, 4L,
2L, 3L, 4L, 2L, 5L, 2L, 2L, 3L, 4L, 2L, 3L)), .Names = c("V1",
"V2", "V3"), class = "data.frame", row.names = c(NA, -16L))
names(mygraph)=c('start','end','cost')
library(igraph)
mygraph = graph.data.frame(mygraph, directed=T) # I think this is right?
plot(mygraph) #looks completely wrong???
help=get.shortest.paths(mygraph,1,10) #I'm doing something wrong want to see route and total cost of going from node 1-10
help
If you change the last term 'cost' in the following line to weight, it will generate the right solution.
names(mygraph) <- c('start', 'end', 'weight')
This happens because function get.shortest.paths() uses attribute weight (not cost) as the costs of edges.