Add boxplots in the coplot() function - r

Consider the dataset "ToothGrow" from the "datasets" package: a 60 rows dataset for three variables: "Tooth length", "Supplement lenght", "Dose in milligrams per day".
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
I use the function coplot() to see the effect of the variable dose on the variable len for each factor of supp.
with(ToothGrowth, coplot(len ~ dose | supp))
How can I create the same plot with boxplots for len ~ dose, instead of having a point for each case?
Using the coplot() function from R base graphics would be preferable.

Try this:
library(lattice)
data("ToothGrowth")
ToothGrowth[,3]<-factor(ToothGrowth[,3])
#before
xyplot(len ~ dose | supp, data=ToothGrowth, layout=c(2,1))
#after
bwplot(len ~ dose | supp, data=ToothGrowth, layout=c(2,1))
The result is the following:
Edit:
If you want to only employ the R base package you can use the following.
coplot(len ~ dose | supp, data=ToothGrowth, xlim = c(0, 4),
panel = function(x, y, ...){boxplot(y ~ x, add=TRUE)})
Which yields:

Related

Inflection point for binomial mixed GLM model

I'd like to explore some possibilities and comparison approaches for inflection point calculation for the binomial mixed GLM model. I find the inflection package that used Extremum Surface Estimator (ESE) and Extremeum Distance Estimator (EDE). I make:
library(inflection)
library(dplyr)
library(glmmTMB)
library(DHARMa)
library(ggplot2)
library(ggeffects)
# My binomial data set
binom.ds <- read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/mort_binon.csv")
str(binom.ds)
# 'data.frame': 400 obs. of 4 variables:
# $ temp : num 0 0 0 0 0 0 0 0 0 0 ...
# $ days : int 5 5 5 5 5 5 5 5 5 5 ...
# $ rep : chr "r1" "r2" "r3" "r4" ...
# $ mortality: int 0 1 1 1 1 1 1 1 0 1 ...
# Fit a binomial mixed GLM model
m_F <- glmmTMB(mortality ~ temp + days +
(1 | days ), data = binom.ds,
family = "binomial")
# Check the fitted model using DHARMa
plot(s1 <- simulateResiduals(m_F))
# All look likes OK
# Find a inflection point
# for temp
ds_F <- cbind(x=binom.ds$temp,y=exp(predict(m_F)))
ds_F<-as.data.frame(ds_F)
bb=bede(ds_F$x,ds_F$y,0);bb
bb$iplast
# [1] 12.5
# $iters
# n a b EDE
# 1 400 0 25 12.5
# Vizualize the inflection point for temp
ggpredict(m_F, terms = "temp [all]") %>% plot(add.data = TRUE) + geom_vline(xintercept = bb$iplast, colour="red", linetype = "longdash")
#for days
ds_F <- cbind(x=binom.ds$days,y=exp(predict(m_F)))
ds_F<-as.data.frame(ds_F)
bb2=bede(ds_F$x,ds_F$y,0);bb2
bb2$iplast
# [1] 22.5
# $iters
# n a b EDE
# 1 400 5 30 17.5
# 2 221 5 30 17.5
# 3 181 15 5 10.0
# 4 61 15 30 22.5
# Vizualize the inflection point for days
ggpredict(m_F, terms = "days [all]") %>% plot(add.data = TRUE) + geom_vline(xintercept = bb2$iplast, colour="red", linetype = "longdash")
My question is there other approaches/packages for this calculus?

X needs to be a numerical value

I have an assignment for a statistics class where I copied code from the teacher, but I keep getting an error where x should be numerical and I don't see where the issue is
Survey=read.csv("ClassSurvey.csv", na.strings = c("", " ", "NA"))
attach(Survey)
library(FSAdata)
op = par(oma=c(0,0,1.5,0), mar=c(3,3,2,1))
hist(Height ~ Gender,
las=1,
nrow=2, ncol=1,
cex.main=0.9, cex.lab=0.8, cex.axis=0.8,
mgp=c(1.8,0.6,0),
xlab = "Height (cms)")
thank you!!
You have a couple of problems. First, you can't make a histogram on a variable that is not numeric. Try this:
data(ToothGrowth)
str(ToothGrowth)
'data.frame': 60 obs. of 3 variables:
$ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
$ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
$ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
attach(ToothGrowth)
Attempt to draw a histogram of supp, a variable of class "factor" (it's not numeric).
> hist(supp)
Error in hist.default(supp ~ dose) : 'x' must be numeric
Your second problem is that the hist function doesn't accept a formula y ~ x. So
hist(len~supp)
fails as well, even though len is numeric. This works though:
hist(len)
I wonder if you copied the teacher's code incorrectly? Or the teacher's code is incorrect? Or something else.

Error for neuralnet package in R

I am trying to implement a simple Multi-layer feed forward neural network using "neuralnet" package available in R for the "iris" dataset.
The code that I am using is as follows-
library(neuralnet)
data(iris)
D <- data.frame(iris, stringsAsFactors=TRUE)
# create formula-
f <- as.formula(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)
# convert qualitative variables to dummy (binary) variables-
m <- model.matrix(f, data = D)
# create neural network-
iris_nn <- neuralnet(f, data = m, hidden = 4, learningrate = 0.3)
I have two questions at this point of time-
1.) How do I use the "hidden" parameter? According to the manual pages, its saying-
hidden: a vector of integers specifying the number of hidden neurons (vertices) in each layer
How should I supply the vector of integer? Say if I wanted to have 1 hidden layer of 4 neurons/perceptrons in each layer Or if I wanted to have 3 hidden layers of 5 neurons in each layer.
2.) The last line of code gives me the error-
Error in eval(predvars, data, env) : object 'Species' not found
If I remove the "hidden" parameter, this error still persists.
What am I doing wrong here?
Edit: after adding the line-
m <- model.matrix(f, data = D)
The matrix 'm' no longer contains "Species" variable/attribute which I am trying to predict.
Output of
str(D)
str(D) 'data.frame': 150 obs. of 5 variables: $ Sepal.Length: num
5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
I have coded this with "nnet" successfully. Posting my code for reference-
data(iris)
library(nnet)
# create formula-
f <- as.formula(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)
# create a NN with hidden layer having 4 neurons/node and
# maximum number of iterations = 3
iris_nn <- nnet(f, data = iris, size = 4, maxit = 3)
# create a test data-
new_obs <- data.frame(Sepal.Length = 5.5, Sepal.Width = 3.1, Petal.Length = 1.4, Petal.Width = 0.4)
# make prediction-
predict(iris_nn, new_obs) # gives percentage of which class it may belong
predict(iris_nn, new_obs, type = "class") # gives the class instead of percentages of which 'class' this data type may belong to
# create a 'confusion matrix' to measure accuracy of model-
# rows are actual values and columns are predicted values-
# table(iris$Species, predict(iris_nn, iris[, 1:4], type = "class"))
cat("\n\nConfusion Matrix for # of iters = 3\n")
print(table(iris$Species, predict(iris_nn, iris[, 1:4], type = "class")))
cat("\n\n")
rm(iris_nn)
# setting 'maxit' to 1000, makes the model coverge-
iris_nn <- nnet(f, data = iris, size = 4, maxit = 1000)
# create a new confusion matrix to check model accuracy again-
cat("\n\nConfusion Matrix for # of iters = 1000\n")
print(table(iris$Species, predict(iris_nn, iris[, 1:4], type = "class")))
# table(iris$Species, predict(iris_nn, iris[, 1:4], type = "class"))
# to plot 'iris_nn' trained NN-
# library("NeuralNetTools")
# plotnet(iris_nn)
Thanks!!
No clue how NN runs and what's the best way to run it. Don't know much about the iris dataset as well.
Just pointing out why its not running - the column Species
str(d)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
Species is a factor NN doesnt take factors.
Convert to dummy varibles -
d$set <-0
d$set[d$Species == "setosa"] <- 1
d$versi <-0
d$versi[d$Species == "versicolor"] <- 1
f <- as.formula(set+versi ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)
iris_nn <- neuralnet(f, data = d, hidden = 4, learningrate = 0.3)
EDIT:
So when you say hidden = c(5,3)
then the neural network diagram would have your input nodes, 5 side by side hidden nodes(a layer), 3 side by side hidden nodes(another layer), output node/nodes
No clue how they impact the accuracy.
The compute for neuralnet is like predict for all other machine learning models.
library(neuralnet)
library(caret) #has the confusionmatrix function in it
#for some reason compute needs to be called like that, calling normally was producing some error
nnans <- neuralnet::compute(NN, test)
confusionMatrix(nnans, test_labels))
1.) Referring to your question how to use the "hidden" parameter, here are some examples.
neuralnet(f, data = m, hidden = c(2,3,2) , learningrate = 0.3)
or
neuralnet(f, data = m, hidden = c(2,2) , learningrate = 0.3)

Shading a specific area using a density plot - ggplot2

I have a data visualization question regarding ggplot2.
I'm trying to figure out how can I shade a specificity area in my density_plot. I googled it a lot and I tried all solutions.
My code is:
original_12 <- data.frame(sum=rnorm(100,30,5), sex=c("M","F"))
cutoff_12 <- 35
ggplot(data=original_12, aes(original_12$sum)) + geom_density() +
facet_wrap(~sex) +
geom_vline(data=original_12, aes(xintercept=cutoff_12),
linetype="dashed", color="red", size=1)
So, from this:
I want this:
The question on ggplot2 shade area under density curve by group is different than mine because they use different groups and graphs.
Similar to this SO question except the facet adds an additional complexity.
You need to rename the PANEL data as "sex" and factor it correctly to match your already existing aesthetic option. Your original "sex" factor is ordered alphabetically (default data.frame option), which is a little confusing at first.
make sure you name your plot "p" to create a ggplot object:
p <- ggplot(data=original_12, aes(original_12$sum)) +
geom_density() +
facet_wrap(~sex) +
geom_vline(data=original_12, aes(xintercept=cutoff_12),
linetype="dashed", color="red", size=1)
The ggplot object data can be extracted...here is the structure of the data:
str(ggplot_build(p)$data[[1]])
'data.frame': 1024 obs. of 16 variables:
$ y : num 0.00114 0.00121 0.00129 0.00137 0.00145 ...
$ x : num 17 17 17.1 17.1 17.2 ...
$ density : num 0.00114 0.00121 0.00129 0.00137 0.00145 ...
$ scaled : num 0.0121 0.0128 0.0137 0.0145 0.0154 ...
$ count : num 0.0568 0.0604 0.0644 0.0684 0.0727 ...
$ n : int 50 50 50 50 50 50 50 50 50 50 ...
$ PANEL : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
$ group : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
$ ymin : num 0 0 0 0 0 0 0 0 0 0 ...
$ ymax : num 0.00114 0.00121 0.00129 0.00137 0.00145 ...
$ fill : logi NA NA NA NA NA NA ...
$ weight : num 1 1 1 1 1 1 1 1 1 1 ...
$ colour : chr "black" "black" "black" "black" ...
$ alpha : logi NA NA NA NA NA NA ...
$ size : num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
$ linetype: num 1 1 1 1 1 1 1 1 1 1 ...
It cannot be used directly because you need to rename the PANEL data and factor it to match your original dataset. You can extract the data from the ggplot object here:
to_fill <- data_frame(
x = ggplot_build(p)$data[[1]]$x,
y = ggplot_build(p)$data[[1]]$y,
sex = factor(ggplot_build(p)$data[[1]]$PANEL, levels = c(1,2), labels = c("F","M")))
p + geom_area(data = to_fill[to_fill$x >= 35, ],
aes(x=x, y=y), fill = "red")
#DATA
set.seed(2)
original_12 <- data.frame(sum=rnorm(100,30,5), sex=c("M","F"))
cutoff_12 <- 35
#Calculate density for each sex
temp = do.call(rbind, lapply(split(original_12, original_12$sex), function(a){
d = density(a$sum)
data.frame(sex = a$sex[1], x = d$x, y = d$y)
}))
#For each sex, seperate the data for the shaded area
temp2 = do.call(rbind, lapply(split(temp, temp$sex), function(a){
rbind(data.frame(sex = a$sex[1], x = cutoff_12, y = 0), a[a$x > cutoff_12,])
}))
#Plot
ggplot(temp) +
geom_line(aes(x = x, y = y)) +
geom_vline(xintercept = cutoff_12) +
geom_polygon(data = temp2, aes(x = x, y = y)) +
facet_wrap(~sex) +
theme_classic()

R: Split metricsgraphics histogram by factor

I have a data frame that looks sort of like the following:
'data.frame': 400 obs. of 4 variables:
$ admit: Factor w/ 2 levels "rejected","accepted": 1 2 2 2 1 2 2 1 2 1 ...
$ gpa : num 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
Now I would like to turn this into a histogram of GPA using the metricsgraphics package, but split the data by the factor 'admit'. How is this done?
Using ggplot I can do something like the following:
ggplot(data, aes(gpa)) +
geom_histogram(aes(fill=admit, y=..density..),
position="dodge",
binwidth=0.1
)
but I'm looking at how to specifically do so using metricsgraphics.
I currently have
mjs_plot(data, x = gpa) %>%
mjs_histogram(bins = 80)
but of course this doesn't split by the factor.
I think you'll have to produce each plot and arrange it into a grid. From the package vignette:
moar_plots <- lapply(1:7, function(x) {
mjs_plot(rbeta(10000, x, x), width="250px", height="250px", linked=TRUE) %>%
mjs_histogram(bar_margin=2) %>%
mjs_labs(x_label=sprintf("Plot %d", x))
})
mjs_grid(moar_plots, nrow=4, ncol=3, widths=c(rep(0.33, 3)))

Resources