I would like to add a legend and labels to my graph created using a reghelper package.
This is the code:
dv1 = runif(n = 100, min = 1, max = 7)
dv2 = runif(n = 100, min = 1, max = 7)
dv3 = runif(n = 100, min = 1, max = 7)
country <- rep(c("India", "US", "Poland"), length.out = 100)
df <- data.frame(country, dv1, dv2, dv3)
library(reghelper)
dv1 <- as.numeric(dv1)
dv2 <- as.numeric(dv2)
dv3 <- as.numeric(dv3)
country <- as.factor(country)
lm0 <- lm(dv1 ~ dv2 * dv3 + country, data = df, na.action = na.exclude)
summary(lm0)
graph_model(lm0, y=dv1, x=dv2, lines=country, split=dv3)
What should I add to the code to add a title, x and y labels, and legend labels?
Thank you in advance!
graph_model() produces ggplot2 object, so you can manipulate it with usual ggplot2 functions.
For example:
library(ggplot2)
p <- graph_model(lm0, y=dv1, x=dv2, lines=country, split=dv3) +
labs(x='Nowy X', y='Nowy Y', title='Tytulik')
update_labels(p, list(colour="Legenda"))
Related
I'm doing an analysis on air pollutants using Bayesian Kernel Machine Regression, using the bkmr package in R.
https://jenfb.github.io/bkmr/overview.html
The link is to Jennifer Bobb's instructions on how to use this package. I don't think it is relevant to the issue though. What I want to do is have PM2.5, O3, and NO2 show up in my charts with the 2.5, 3, and 2 as subscripts. I'm trying to use this function and getting no luck:
colnames(dat) <- c("LTE4", "$O[3]", "$PM[2.5]", "$NO[2]", "Diethyl", "Dimethyl", "age", "tmpf", "relh", "sex", "agany", "agself", "asthma")
When I do this what happens I just see these labels show up in the plots with with the $ and [] instead of subscripted numbers. Any ideas?
This is the full code I am using:
### January BKMR Analysis ###
## Hierarchical Variable Selection ##
## Updated June 6, 2022 ##
# Reading in necessary packages
library(tidyverse)
library(bkmr)
trio_semipro <- readRDS("C:/Users/Matt/OneDrive/Documents/Fresno Thesis/Thesis Code/trio_semipro.rds")
trio_semipro
dim(trio_semipro)
head(trio_semipro)
trio_semipro$log_lte4 <- log(trio_semipro$Final)
# Separating out dataframes for winter and summer to run separate models for each season
trio_semipro_w <- trio_semipro %>%
filter(visit_month == 1)
trio_semipro_s <- trio_semipro %>%
filter(visit_month == 2)
# Summer and Winter Dataframes
trio_semipro_w
trio_semipro_s
head(trio_semipro_w)
#view(trio_semipro_w)
dat = cbind(trio_semipro_w$log_lte4, trio_semipro_w$O3,
trio_semipro_w$PM25, trio_semipro_w$NO2, trio_semipro_w$diethyl, trio_semipro_w$dimethyl,
trio_semipro_w$age, trio_semipro_w$tmpf, trio_semipro_w$relh, trio_semipro_w$sex, trio_semipro_w$agriculture_anyone,
trio_semipro_w$agriculture_self, trio_semipro_w$asthma)
head(dat)
colnames(dat) = c("LTE4", "$O[3]", "$PM[2.5]", "$NO[2]", "Diethyl", "Dimethyl", "age", "tmpf", "relh", "sex", "agany", "agself", "asthma")
dat = as.data.frame(dat)
dat$sex
# recode the binary variable to be 0, 1 and NA
dat$agself = dat$agself-1
dat$agself[which(dat$agself==2)]=NA
dat$agself
# recode sex variable
dat$sex = dat$sex -1
# recode agany variable
dat$agany = dat$agany - 1
dat$agany[which(dat$agany==2)]=NA
#recode asthma variable
dat$asthma = dat$asthma - 1
dat$asthma[which(dat$asthma==2)]=NA
dat$asthma
dat$sex
dat$agany
# good
head(dat)
complete_dat = dat[-which(apply(dat, 1, anyNA)),]
dim(complete_dat)
# Fit BKMR
zscaled <- apply(complete_dat[,(2:6)], 2, scale)
yscaled <- scale(complete_dat$lte4)
xscaled <- cbind(scale(complete_dat[,7:9]), complete_dat[,10:13])
fit_bkmr = kmbayes(y=yscaled, Z= zscaled, X = xscaled,
iter = 20000, varsel = TRUE, groups=c(1,1,1,2,2), verbose=FALSE)
plot(fit_bkmr$sigsq.eps, type = "l")
TracePlot(fit = fit_bkmr, par = "beta", comp = 4)
TracePlot(fit = fit_bkmr, par = "sigsq.eps")
TracePlot(fit = fit_bkmr, par = "r", comp = 1)
# Estimating posterior inclusion probabilities
ExtractPIPs(fit_bkmr)
# Estimating h
y <- yscaled
Z <- zscaled
X <- xscaled
med_vals <- apply(Z, 2, median)
Znew <- matrix(med_vals, nrow = 1)
# Summarize model output
pred.resp.univar <- PredictorResponseUnivar(fit = fit_bkmr)
library(ggplot2) # Using ggplot to plot cross sections of h
ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) +
geom_smooth(stat = "identity") +
geom_hline(yintercept = 0, lty = 5, col = "red2", alpha = 0.4) +
facet_wrap(~ variable, nrow = 1) +
ylab("h(z)")
# visualze the bivarate exposure-response function for two predictors, where
# all of the other predictors are fixed at a particular percentile.
pred.resp.bivar <- PredictorResponseBivar(fit = fit_bkmr, min.plot.dist = 1)
ggplot(pred.resp.bivar, aes(z1, z2, fill = est)) +
geom_raster() +
facet_grid(variable2 ~ variable1) +
scale_fill_gradientn(colours=c("#0000FFFF","#FFFFFFFF","#FF0000FF")) +
xlab("expos1") +
ylab("expos2") +
ggtitle("h(expos1, expos2)")
I generated a plot using a long format table, ggplot() and facet_wrap() functions in Rstudio. I want to add values of Tukey's tests applied to different levels of data and annotated with a system of stars (or letters) for significance.
Here is the example code :
# create a df for example
a <- paste0("Sample_", rep(1:100, 1))
b <- c(rep("Dubai", 30), rep("London", 35), rep("Bucarest", 35))
c <- c(rep("Sun", 16), rep("Rain", 16), rep("Cloud", 17), rep("Thunder", 16), rep("Star", 35))
d <- runif(n = 100, min = 0.5, max = 50)
e <- runif(n = 100, min = 0.5, max = 50)
f <- runif(n = 100, min = 0.1, max = 3)
df <- data.frame("Sample"= a, "Location"=b, "Obs"=c, "Measure1"=d, "Measure2"=e, "Measure3"=f)
# convert df to long format
long <- reshape2::melt(df, id.vars = c("Sample", "Location", "Obs"), measure.vars = c("Measure1", "Measure2", "Measure3"))
# make a plot
p <- ggplot(long,aes(Location,value, color=Obs)) +
facet_wrap(~ variable, drop=T, scale="free")+
geom_boxplot(outlier.colour = NA, alpha=0.8, position = position_dodge2(width=1, preserve="single"))+
geom_point(size=1.2, aes(shape=Obs), position=position_dodge(width=0.7, preserve='total'))+
scale_shape_manual("Obs", values = c(16,17,17,16,16),
labels = c("Sun",
"Rain",
"Cloud",
"Thunder",
"Star"))+
scale_color_manual("Obs",
values=c("#00BF7D", "#5B6BF7", "#00B0F6", "#A3A500", "#F8766D"),
labels = c("Sun",
"Rain",
"Cloud",
"Thunder",
"Star"))+
labs(x="Location", y = "Measure")+
theme(legend.text.align = 0)
p
I get this :
And I would like this :
I tried with geom_signif() and stat_compare_means() functions but without success.
Any idea please ?
Thank you for your attention.
Here is some reproducible code:
set.seed(42)
df <- data.frame(
x = round(runif(n = 100, min = 1, max = 3), 0)
, y = round(runif(n = 100, min = 1, max = 3), 0)
)
chisq <- chisq.test(
df$x
,df$y
, simulate.p.value = TRUE
)
chisq
options(repr.plot.width = 5, repr.plot.height = 5)
corrplot(chisq$residuals, is.cor = FALSE, xlab="x", ylab="y")
I would like the corrplot to show the label x and y. I think xlab and ylab should work but it does not. Any ideas? Thanks!
I have some repeated measures, ordinal response data:
dat <- data.frame(
id = factor(sample(letters[1:5], 50, replace = T)),
response = factor(sample(1:7, 50, replace = T), ordered = T),
x1 = runif(n = 50, min = 1, max = 10),
x2 = runif(n = 50, min = 100, max = 1000)
)
I have built the following model:
library(ordinal)
model <- clmm(response ~ x1 + x2 + (1|id), data = dat)
I have some new data:
new_dat <- data.frame(
id = factor(sample(letters[1:5], 5, replace = T)),
x1 = runif(n = 5, min = 1, max = 10),
x2 = runif(n = 5, min = 100, max = 1000)
)
I want to be able to use the model to predict the probability of each level of dat$response occurring for new_dat, whilst still also accounting for id.
Unfortunately predict() does not work for clmm objects. predict() does work for clmm2 objects but it ignores any random effects included.
What I want to achieve is something similar to what has been done in Figure 3 of the following using this code:
library(ordinal)
fm2 <- clmm2(rating ~ temp + contact, random=judge, data=wine, Hess=TRUE, nAGQ=10)
pred <- function(eta, theta, cat = 1:(length(theta)+1), inv.link = plogis){
Theta <- c(-1e3, theta, 1e3)
sapply(cat, function(j)
inv.link(Theta[j+1] - eta) - inv.link(Theta[j] - eta))
}
mat <- expand.grid(judge = qnorm(0.95) * c(-1, 0, 1) * fm2$stDev,
contact = c(0, fm2$beta[2]),
temp = c(0, fm2$beta[1]))
pred.mat <- pred(eta=rowSums(mat), theta=fm2$Theta)
lab <- paste("contact=", rep(levels(wine$contact), 2), ", ", "temp=", rep(levels(wine$temp), each=2), sep="")
par(mfrow=c(2, 2))
for(k in c(1, 4, 7, 10)) {
plot(1:5, pred.mat[k,], lty=2, type = "l", ylim=c(0,1),
xlab="Bitterness rating scale", axes=FALSE,
ylab="Probability", main=lab[ceiling(k/3)], las=1)
axis(1); axis(2)
lines(1:5, pred.mat[k+1, ], lty=1)
lines(1:5, pred.mat[k+2, ], lty=3)
legend("topright",
c("avg. judge", "5th %-tile judge", "95th %-tile judge"),
lty=1:3, bty="n")
}
Except, my model contains multiple continuous covariates (as opposed to binary factors).
How can I use the model data to predict the probability of each level of dat$response occurring for new_dat, whilst still also accounting for id?
Many thanks.
So far, i have reached to fit the model in the survreg function like below:
model <- survreg(formula = Surv(TimeDeath, event) ~ age + BM1 + BM2 +
mutation + sex + BM1:BM2 + BM1:mutation,
data = DF, dist = "lognormal")
Now, i need to predict failure time of a male patient who is 51 years old, he did not have the gene mutation, and for BM1 he had the value 3.7 mg/dL and for BM2 the value 251 mg/dL.
I continued like below:
ND <- with(DF, data.frame(
age = rep(seq(min(age), max(age), length.out = 20), 2),
BM1 = rep(seq(min(BM1), max(BM1), length.out = 20), 2),
BM2 = rep(seq(min(BM2), max(BM2), length.out = 20), 2),
mutation = c("No", "Yes"),
sex = c("male", "40")
))
prs <- predict(model_final, ND, se.fit = TRUE, type = "lp")
ND$pred <- prs[[1]]
ND$se <- prs[[2]]
ND$lo <- exp(ND$pred - 1.96 * ND$se)
ND$up <- exp(ND$pred + 1.96 * ND$se)
ND$pred <- exp(ND$pred)
library(lattice)
xyplot(pred + lo + up ~ age + BM1, data = ND, type = "l",
lty = c(1,2,2), col = "black", lwd = 4, xlab = "Age",
ylab = "Survival Time")
I know i have not defined the ND object correctly, but i don't know how to do it, and also, the plot function.
Some help please?
Look at ?predict.survreg. The construction of CI's does look suspicious, I would have thought you would instead have set se.fit=TRUE There is a new data argument which is where you include parameters needed for prediction as part of the newdata argument:
all.combos < expand.grid( mutation=c("No", "Yes"), BM1= 3.7 , BM2= 251 ,
sex = c("male", "40"),
age-seq(min(age), max(age), length.out = 20) ) )
preds.combos <- predict(model, all.combos, se.fit=TRUE)