Related
I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
I have a data such that produced from special function:
where t0=1, alpha, q, gamma, C and beta are unknown parameters.
The question is how to fit the above function to following data, in R?
mydata<-structure(list(x = 1:100, y = c(0, 0, 2, 1, 3, 4, 4, 3, 7, 8,
9, 11, 12, 11, 15, 15, 17, 21, 49, 43, 117, 75, 85, 97, 113,
129, 135, 147, 149, 149, 123, 129, 127, 122, 143, 157, 144, 139,
123, 117, 141, 138, 124, 134, 158, 151, 136, 133, 121, 117, 122,
125, 117, 111, 98, 94, 92, 89, 73, 87, 91, 88, 94, 90, 93, 76,
60, 96, 71, 80, 71, 63, 65, 47, 74, 63, 78, 68, 55, 48, 51, 45,
48, 50, 71, 48, 35, 51, 69, 62, 64, 66, 51, 59, 58, 34, 57, 56,
63, 50)), class = "data.frame", row.names = c(NA, -100L))
I defined the function as follows:
t0<<-1
fyy<-function(t,cc0,alpha0,qq0,beta0,gamma0){
ret<-cc0*((t-t0)^alpha0)/(((1+(qq0-1)*beta0*(t-t0)^gamma0))^(1/(qq0-1)))
return(ret)
}
but I don't know how to continue?
as #mhovd mentioned I used "nls" function but I got an error as follows:
> fit <- nls(y~fyy(x,cc0 ,alpha0 ,beta0 ,gamma0 ,qq0 ),
data=data.frame(mydata), start=list(cc0 = .01,alpha0 =1,beta0 =.3,gamma0
= 2,qq0 = 1))
Error in numericDeriv(form[[3L]], names(ind), env) :
Missing value or an infinity produced when evaluating the model
In the comments #masoud references a paper about the specific function in the question. It suggests fixing gamma0 and qq0 and if we do that we do get a solution -- fm shown in red in the plot. We have also shown an alternate parametric curve as fm2 in blue. It also has 3 optimized parameters but has lower residual sum of squares (lower is better).
fyy <- function(t,cc0,alpha0,qq0,beta0,gamma0){
cc0 * ((t-t0)^alpha0) / (((1+(qq0-1)*beta0*(t-t0)^gamma0))^(1/(qq0-1)))
}
mydata0 <- subset(mydata, y > 0)
# fixed values
t0 <- 1
gamma0 <- 3
qq0 <- 1.2
st <- list(cc0 = 1, alpha0 = 1, beta0 = 1) # starting values
fm <- nls(y ~ fyy(x, cc0, alpha0, qq0, beta0, gamma0), mydata0,
lower = list(cc0 = 0.1, alpha0 = 0.1, beta0 = 0.00001),
start = st, algorithm = "port")
deviance(fm) # residual sum of squares
## [1] 61458.5
st2 <- list(a = 1, b = 1, c = 1)
fm2 <- nls(y ~ exp(a + b/x + c*log(x)), mydata0, start = st2)
deviance(fm2) # residual sum of squares
## [1] 16669.24
plot(mydata0, ylab = "y", xlab = "t")
lines(fitted(fm) ~ x, mydata0, col = "red")
lines(fitted(fm2) ~ x, mydata0, col = "blue")
legend("topright", legend = c("fm", "fm2"), lty = 1, col = c("red", "blue"))
I have a graph that I'm trying to make with ggplot and gridExtra, but my error bars are out of place. I want the error bars to be at the top of each bar, not where they are now. What can I do to correct them?
Also, what ggsave parameters will generate a graph with the same pixel parameters that I am using with the r png base function? ggsave seems to work more consistently than this function, so I need to use it.
Data:
###Open packages###
library(readxl)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
#Dataframes
set1 <- data.frame(type = c(1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
3,
3),
flowRate = c(24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58),
speed = c(0.563120137230256,
0.301721535875508,
0.170683367727845,
0.698874950490133,
0.158488731250147,
0.162788814307903,
0.105943103772245,
0.682354871986346,
0.17945825301837,
0.806637519498752,
0.599304186634932,
0.268788206619179,
0.518615600601962,
0.907628477211427,
0.144209408332705,
0.161586044320138,
0.946354993801663,
0.488881557759483,
0.497120443885793,
0.666120238846602,
0.264813203831783,
0.717007333314455,
0.95119232422312,
0.833669574933742,
0.450082932184122,
0.309570971522678,
0.732874401666482))
set2 <- data.frame(type = c(1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
3,
3),
flowRate = c(24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58,
24,
24,
24,
45,
45,
45,
58,
58,
58),
speed = c(0.489966876244169,
0.535542121502899,
0.265940150225231,
0.399521957817437,
0.0831661276630631,
0.302201301891001,
0.78194419406759,
0.202331797255324,
0.192182716686147,
0.163038660094618,
0.658020173938572,
0.735633308902771,
0.480982144690572,
0.749452781972296,
0.491759702396918,
0.459610541236644,
0.397660083986082,
0.939983924945833,
0.128956722185581,
0.998492083119223,
0.440514184126494,
0.242917958355044,
0.350643319960552,
0.02613674288471,
0.71625407018877,
0.589325978787179,
0.649116781211748))
Code:
#Standard error of the mean function
sem <- function(x) sd(x)/sqrt(length(x))
#Aggregate dataframes, mean and Standard Error
mean_set1 <- aggregate(set1, by=list(set1$flowRate, set1$speed), mean)
mean_set1 <- select(mean_set1, -Group.1, -Group.2)
mean_set1 <- arrange(mean_set1, type, flowRate)
sem_set1 <- aggregate(set1, by=list(set1$flowRate, set1$speed), sem)
sem_set1 <- as.data.frame(sem_set1)
sem_set1 <- cbind(mean_set1$type, mean_set1$flowRate, sem_set1$Group.2)
sem_set1 <- as.data.frame(sem_set1)
mean_set2 <- aggregate(set2, by=list(set2$flowRate, set2$speed), mean)
mean_set2 <- select(mean_set2, -Group.1, -Group.2)
mean_set2 <- arrange(mean_set2, type, flowRate)
sem_set2 <- aggregate(set2, by=list(set2$flowRate, set2$speed), sem)
sem_set2 <- as.data.frame(sem_set2)
sem_set2 <- cbind(mean_set2$type, mean_set2$flowRate, sem_set2$Group.2)
sem_set2 <- as.data.frame(sem_set2)
#Graph sets
set1_graph <- ggplot(mean_set1, aes(x=type, y=speed, fill=factor(flowRate)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name="Flow Rate")+
xlab("type")+ylab("Speed")+
geom_errorbar(aes(ymin= mean_set1$speed,ymax=mean_set1$speed+sem_set1$V3), width=0.2, position = position_dodge(0.6))
set2_graph <- ggplot(mean_set2, aes(x=type, y=speed, fill=factor(flowRate)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name="Speed")+
xlab("type")+ylab("Flow Rate")+
geom_errorbar(aes(ymin= mean_set2$speed,ymax=mean_set2$speed+sem_set2$V3), width=0.2, position = position_dodge(0.6))
#Grid.arrange and save image
png("image.png", width = 1000, height = 700)
grid.arrange(set1_graph, set2_graph,nrow=1, ncol=2)
dev.off()
I have constructed models in glmer and would like to predict these on a rasterStack representing the fixed effects in my model. my glmer model is in the form of:
m1<-glmer(Severity ~ x1 + x2 + x3 + (1 | Year) + (1 | Ecoregion), family=binomial( logit ))
As you can see, I have random effects which I don't have as spatial layer - for example 'year'. Therefore the problem is really predicting glmer on rasterStacks when you don't have the random effects data random effects layers. If I use it out of the box without adding my random effects I get an error.
m1.predict=predict(object=all.var, model=m1, type='response', progress="text", format="GTiff")
Error in predict.averaging(model, blockvals, ...) :
Your question is very brief, and does not indicated what, if any, trouble you have encountered. This seems to work 'out of the box', but perhaps not in your case. See ?raster::predict for options.
library(raster)
# example data. See ?raster::predict
logo <- brick(system.file("external/rlogo.grd", package="raster"))
p <- matrix(c(48, 48, 48, 53, 50, 46, 54, 70, 84, 85, 74, 84, 95, 85,
66, 42, 26, 4, 19, 17, 7, 14, 26, 29, 39, 45, 51, 56, 46, 38, 31,
22, 34, 60, 70, 73, 63, 46, 43, 28), ncol=2)
a <- matrix(c(22, 33, 64, 85, 92, 94, 59, 27, 30, 64, 60, 33, 31, 9,
99, 67, 15, 5, 4, 30, 8, 37, 42, 27, 19, 69, 60, 73, 3, 5, 21,
37, 52, 70, 74, 9, 13, 4, 17, 47), ncol=2)
xy <- rbind(cbind(1, p), cbind(0, a))
v <- data.frame(cbind(pa=xy[,1], extract(logo, xy[,2:3])))
v$Year <- sample(2000:2001, nrow(v), replace=TRUE)
library(lme4)
m <- lmer(pa ~ red + blue + (1 | Year), data=v)
# here adding Year as a constant, as it is not a variable (RasterLayer) in the RasterStack object
x <- predict(logo, m, const=(data.frame(Year=2000)))
If you don't have the random effects, just use re.form=~0 in your predict call to predict at the population level:
x <- predict(logo, m, re.form=~0)
works without complaint for me with #RobertH's example (although I don't know if correctly)
I was trying to plot a metaregression for proportions using the meta package. The metaregression using metaprop works as expected. But when I run bubble, I get the error listed below the script:
library(meta)
sample <- c(74, 62,370, 72, 40, 84, 290, 244, 173, 106, 89, 139, 43, 398, 179, 31)
BLIPS <- c(23, 12, 11, 11, 1, 17, 52, 28, 6, 4, 3, 4, 1, 56, 22, 1)
covar <- c(21, 11, 14, 1, 4, 47, 2, 42, 16, 44, 3, 34, 11, 15, 21, 4)
hr <- data.frame(sample, BLIPS, covar)
meta <- metaprop(BLIPS, sample)
reg <- metareg(meta, covar)
reg
bubble(reg)
Error in [.data.frame(x$.meta$x$data, , covar.name) : undefined
columns selected
Currently your metaregression uses the variables from the global environment and not the variables from your data.frame hr. This appears to work as for the regression itself, but not for the bubble plot. If you just add data = hr to your metaprop call, then the bubble plot works as expected.
hr <- data.frame(sample, BLIPS, covar)
meta <- metaprop(BLIPS, sample, data = hr)
reg <- metareg(meta, covar)
reg
bubble(reg)