I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
here is a reprex
data<- structure(list(lanmark_id = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67), V1 = c(0.00291280916742007,
0.00738863171211713, 0.0226678081211574, 0.0475105228945172,
0.0932285720818941, 0.167467706279089, 0.257162845610094, 0.365202733889021,
0.49347857580521, 0.623654594804239, 0.738846221030799, 0.838001377618909,
0.911583795022151, 0.954620025430512, 0.976736039833402, 0.99275439380643,
1.00100526672829, 0.0751484964183746, 0.136267471453466, 0.223219796351563,
0.312829176190895, 0.396253287447153, 0.589077347394549, 0.682150866526948,
0.771279538477539, 0.856242644022999, 0.915433541338973, 0.493665602840245,
0.491283285973581, 0.488913167946858, 0.486968906096063, 0.384707082576335,
0.43516446651127, 0.48730704698643, 0.541730425616146, 0.590794609520034,
0.176234316360877, 0.230353437655898, 0.295908510434122, 0.350673723300921,
0.2927721757992, 0.228392965512228, 0.634474821310078, 0.692554938010577,
0.757884656518485, 0.809961553290539, 0.760324208523177, 0.696892501347341,
0.299062528225204, 0.371899560139738, 0.440183530232855, 0.488448817156316,
0.542120710507391, 0.613931454931259, 0.683122622479693, 0.614367295821043,
0.544516611213321, 0.487065702940653, 0.43466839036949, 0.367662837035504,
0.329392110306872, 0.439192556373207, 0.488617118648197, 0.543288506065858,
0.652131615571443, 0.541622182786469, 0.486664920417254, 0.437126878794749
), V2 = c(0.201088019764115, 0.335422141956174, 0.468591127485112,
0.597955245417373, 0.719502795031081, 0.826191980419368, 0.912263437847338,
0.978932088608654, 0.996572250349122, 0.975164350943783, 0.906204543800476,
0.817791059656974, 0.711167374856116, 0.587462637963028, 0.457981280500493,
0.327526817895531, 0.19652402489511, 0.0832018969548692, 0.0247526745448235,
0.00543973063471442, 0.0169853862992864, 0.0463565705952832,
0.0442986445765913, 0.0151651597693172, 0.00747493463745755,
0.0263496825405166, 0.0805712600069456, 0.160307477500307, 0.24640401358039,
0.332244740019727, 0.420995916418539, 0.486383354389177, 0.505514985155285,
0.521022030162301, 0.5059272511442, 0.48818970795347, 0.184054088286897,
0.153658218058329, 0.153359749238857, 0.186997311695192, 0.20294291755153,
0.204166125257439, 0.186997311695192, 0.153386090373069, 0.155932705636629,
0.184603717976376, 0.203900583330345, 0.202836636618411, 0.670663080116174,
0.635972857244521, 0.619932598923225, 0.632625553953685, 0.620132318139554,
0.637530241507316, 0.668109937001625, 0.718821664744205, 0.73956412947459,
0.744898219300658, 0.74046882628352, 0.720755964662638, 0.672731384920681,
0.666152981987244, 0.670464844757437, 0.664772611108765, 0.671145517468628,
0.673968618595099, 0.67986363963374, 0.675352028351748), coef2 = c(0,
0, 0, 0, 0, 0, 0, 0, 0.565178003460693, 0, 0, 0, 0, 0, 0, 0,
0, 0.0433232019717308, 0.0433232019717308, 0.442833876807268,
0.574211955093656, 0.574211955093656, 0.574211955093656, 0.574211955093656,
0.442833876807268, 0.0433232019717308, 0.0433232019717308, 0.0612451242746323,
0.0612451242746323, 0, 0, 0, 0, 0, 0, 0, 0.343056259557492, 0.701076795777046,
0.674029769391816, 0, 0.538117834886036, 0.990039002564078, 0.451921167678043,
0.701076795777046, 0.701076795777046, 0.316009233172263, 0.990039002564078,
0.990039002564078, 0.878350036859346, 0.343364662128988, 0.282119537854356,
0.282119537854356, 0.282119537854356, 0.343364662128988, 0.384793696241895,
0.608382647917744, 0.608382647917744, 1, 0.608382647917744, 0.608382647917744,
0.384793696241895, 0.501936678206125, 0.501936678206125, 0, 0.878350036859346,
0, 0.501936678206125, 0.501936678206125)), row.names = c(NA,
-68L), class = c("tbl_df", "tbl", "data.frame"))
I used this data to create a deulanay plot in R
library(tidyverse)
library(ggforce)
data%>%
mutate(coef2 = coef2/max(coef2))%>%
ggplot(aes(V1, V2))+
geom_delaunay_tile(aes(colour = coef2, fill = coef2), alpha = .5)+
geom_delaunay_segment2(aes(colour = coef2, fill = coef2))+
geom_point(aes(colour = coef2))+
ylim(1,0)+
scale_color_viridis_c(option = "magma")+
scale_fill_viridis_c(option = "magma")+
theme_minimal()
which gives this
I want to fill all triangles with a blend of colors that match the color of each point, just as the lines are colored.
as you can see I have tried using fill = coef2 within de geom_delaunay but this doesn't really achieve what I want.
is there a way to do this in R.
Many thanks!
In order to check linearity in logistic regression ->
Is independent1 and independent2variable linear related to the log-odds of depdendent?
I would like optimize this (working) calculations:
This is the code:
# Check Linearity ---------------------------------------------------------
# quartiles of independent1
quantile(df$independent1, probs=c(0, 0.25, 0.5, 0.75, 1))
table(df$dependent[df$independent1<52])
table(df$dependent[df$independent1>=52 & df$independent1 < 60])
table(df$dependent[df$independent1>=60 & df$independent1 < 73])
table(df$dependent[df$independent1>=73 & df$independent1 < 91])
p1 <- mean(df$dependent[df$independent1<52])
p2 <- mean(df$dependent[df$independent1>=52 & df$independent1 < 60])
p3 <- mean(df$dependent[df$independent1>=60 & df$independent1 < 73])
p4 <- mean(df$dependent[df$independent1>=73 & df$independent1 < 91])
probs <- c(p1, p2, p3, p4)
# calculate the log-odds
logits <- log(probs/(1-probs))
# quartiles of independent1
q <- quantile(df$independent1, probs=seq(0,1,0.25))
# calculate median independent1 for each of the 4 groups
meds <- c( median(df$independent1[ df$independent1<q[2]]),
median(df$independent1[ df$independent1>=q[2] & df$independent1<q[3]]),
median(df$independent1[ df$independent1>=q[3] & df$independent1<q[4]]),
median(df$independent1[ df$independent1>=q[4]])
)
plot(meds, logits, main="xxx",
xlab = "independent1",
ylab = "log-odds(dependent|independent1)", las=1)
For one variable this might be ok. But I have more independent variables. So how could I optimize this code (checking and plotting) for each independent variable (in this example independent1 and independent2)
My dataframe:
df <- structure(list(dependent = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), independent1 = c(84,
49, 54, 75, 49, 70, 75, 42, 60, 72, 80, 73, 51, 61, 59, 78, 45,
38, 78, 65, 91, 60, 39, 31, 42, 72, 41, 77, 73, 74, 39, 86, 71,
55, 43, 75, 80, 75, 67, 74, 46, 70, 57, 66, 57, 72, 46, 52, 53,
76, 57, 86, 67, 71, 57, 50, 76, 61, 41, 57, 62, 41, 64, 82, 53,
75, 59, 38, 54, 56, 68, 63, 73, 26, 75, 76, 81, 46, 77, 53, 59,
66, 51, 72, 80, 70, 39, 57, 62, 85, 84, 57, 73, 55, 70, 78, 66,
69, 60, 51, 72, 68, 60, 62, 64, 44, 50, 59, 45, 81, 54, 68, 75,
66, 54, 45, 52, 87, 44, 77, 49, 84, 68, 76, 82, 44, 58, 55, 69,
33, 48, 62, 60, 76, 56, 73, 55, 58, 53, 53, 60, 52, 60, 41, 39,
36, 38, 59, 54, 64), independent2 = c(23, 25, 34, 25, 31, 25,
32, 19, 25, 28, 22, 18, 30, 26, 25, 25, 25, 19, 24, 27, 23, 28,
39, 27, 30, 28, 22, 28, 25, 23, 18, 27, 27, 19, 25, 27, 26, 26,
21, 26, 23, 28, 37, 32, 24, 32, 26, 23, 24, 27, 28, 25, 24, 22,
34, 23, 35, 20, 29, 29, 21, 29, 25, 26, 23, 33, 25, 26, 29, 27,
26, 28, 19, 22, 29, 22, 26, 35, 32, 29, 26, 23, 31, 30, 27, 28,
23, 27, 34, 22, 24, 28, 21, 25, 18, 32, 21, 24, 31, 31, 24, 30,
27, 23, 16, 26, 26, 19, 38, 21, 32, 34, 28, 19, 30, 24, 26, 24,
40, 26, 15, 26, 28, 22, 25, 26, 31, 24, 26, 42, 26, 30, 28, 21,
21, 19, 22, 20, 26, 31, 22, 25, 21, 20, 27, 27, 26, 29, 22, 24
)), row.names = c(NA, -150L), class = c("tbl_df", "tbl", "data.frame"
))
I'll demonstrate a somewhat different and decidedly more efficient method of splitting a variable that is to be used in a logistic regression model:
df$q41 <- with(df, cut(independent1, quantile(independent1), include = TRUE))
# creates 4 level factor into roughly equally sized groups
table(df$q41)
#--------------------
#[26,52] (52,60] (60,73] (73,91]
# 39 37 39 35
#Examine for "eyeball" trends in the log-odds of dependent
fit1.q41 <- glm(dependent~q41+0, data=df, fam="binomial")
fit1.q41
#---------------------------
Call: glm(formula = dependent ~ q41 + 0, family = "binomial", data = df)
Coefficients:
q41[26,52] q41(52,60] q41(60,73] q41(73,91]
-3.638 -2.862 -2.918 -2.048
Degrees of Freedom: 150 Total (i.e. Null); 146 Residual
Null Deviance: 207.9
Residual Deviance: 65.52 AIC: 73.52
I chose to remove the intercept term because its presence prevented viewing the coefficient of the lowest group on the same scale as the upper 3. The coefficients are just the logits for the grouping I created. Compare:
> logits
[1] -3.555348 -2.740840 -2.970414 -2.169054
> coef(fit1.q41)
q41[26,52] q41(52,60] q41(60,73] q41(73,91]
-3.637586 -2.862201 -2.917771 -2.047693
I then tried to automate the process but ran into a bit of a problem because of the small numbers of events in one of the quartile groups, The ridiculously low coefficient for the lowest quartile in independent2 is from that lack of any events or "1"'s in that catergory. (An estimate log-odds of -19.566069 does rather point to a proportion of 0.)
lapply( df[-1], function(x){cat(str(x)); IVq <- cut(x, quantile(x), include = TRUE); logits<-coef( summary(glm(df$dependent~IVq+0, fam="binomial"))); logits})
num [1:150] 84 49 54 75 49 70 75 42 60 72 ...
num [1:150] 23 25 34 25 31 25 32 19 25 28 ...
$independent1
Estimate Std. Error z value Pr(>|z|)
IVq[26,52] -3.637586 1.0130639 -3.590678 3.298191e-04
IVq(52,60] -2.862201 0.7270292 -3.936845 8.256004e-05
IVq(60,73] -2.917771 0.7259663 -4.019155 5.840732e-05
IVq(73,91] -2.047693 0.5312796 -3.854266 1.160776e-04
$independent2
Estimate Std. Error z value Pr(>|z|)
IVq[15,23] -19.566069 1639.9716035 -0.01193074 9.904809e-01
IVq(23,26] -3.091042 0.7229988 -4.27530783 1.908734e-05
IVq(26,28] -2.397895 0.7385489 -3.24676555 1.167245e-03
IVq(28,42] -1.856298 0.4808846 -3.86017349 1.133066e-04
> lapply( df[-1], function(x){ IVq <- cut(x, quantile(x), include = TRUE); table(IVq, df$dependent) })
$independent1
IVq 0 1
[26,52] 38 1
(52,60] 35 2
(60,73] 37 2
(73,91] 31 4
$independent2
IVq 0 1
[15,23] 43 0
(23,26] 44 2
(26,28] 22 2
(28,42] 32 5
At any rate I think I've demonstrated a more R-ish approach to calculating logits within quartiles. It also sets you up for a model comparison approach to examining departures from linearity as well as demonstrating possible pitfalls. If you had more events you might have considered looking at the change in deviance from the null model with the addition of a quartile factor on top of a simple linear model ... or even more powerfully using poly to create your comparison model.
In the past when working with datasets with adequate numbers of events I have chosen to split on the basis of quantiles calculated from an event==1 subset rather than letting the splits be based on the whole dataset.
I know that polr does not give p-values because they are not very reliable. Nevertheless, I would like to add them to my modelsummary (Vignette) output. I know to get the values as follows:
library(MASS)
polr_res <- polr(as.ordered(rep77) ~ foreign + length + mpg, Hess=TRUE, data=fullauto);summary(polr_res)
Call:
polr_res(formula = as.ordered(rep77) ~ foreign + length + mpg, data = fullauto,
Hess = TRUE)
## coefficient test
library("AER")
coeftest(polr_res)
modelsummary
Because polr has no p-values, I cannot call modelsummary(models, stars=TRUE) on my models (which includes other models which do have p-values and for which I want to show stars).
library(modelsummary)
models <- list(
"Ordinal Probit" = polr_res,
)
# model_names <- c("OLS", "")
modelsummary(models, stars=TRUE)
I tried first to simply add the p-values to the tidy object, but I cannot add that object to the list of models.
polr_pval <- coeftest(polr)[,4]
polr_pval <- as.data.frame(polr_pval)
tidy_polr <- tidy(polr)
tidy_polr[,5] <- polr_pval
The vignette describes that I can make a custom class which adapts the polr, but I do not understand how:
https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#customizing-existing-models-part-i-
https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#customizing-existing-models-part-ii-
Could anyone help me figure this out?
EDIT:
I am posting an edit showing the problem I was having when using Vincent's answer, with R version 3.6.1 (2019-07-05). If you are encountering this issue, (preferably) update to R version 4.0.0 or download an update for modelsummary from Github (see also Vincent's comments below).:
library(remotes)
remotes::install_github('vincentarelbundock/modelsummary')
Output:
DATA for R
fullauto <- structure(list(make = structure(c(1, 1, 1, 2, 2, 3, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
9, 10, 10, 11, 11, 12, 12, 12, 13, 14, 14, 14, 14, 14, 14, 15,
15, 15, 15, 15, 15, 15, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18,
18, 18, 19, 20, 21, 21, 21, 22, 22, 22, 22, 23), label = "Make", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(AMC = 1, Audi = 2, BMW = 3,
Buick = 4, Cad. = 5, Chev. = 6, Datsun = 7, Dodge = 8, Fiat = 9,
Ford = 10, Honda = 11, Linc. = 12, Mazda = 13, Merc. = 14, Olds = 15,
Peugeot = 16, Plym. = 17, Pont. = 18, Renault = 19, Subaru = 20,
Toyota = 21, VW = 22, Volvo = 23)), model = structure(c(1, 2,
3, 4, 5000, 320, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 200, 210, 510, 810, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 98, 604, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 260), label = "Model", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Concord = 1, Pacer = 2, Spirit = 3,
Fox = 4, Century = 5, Electra = 6, LeSabre = 7, Opel = 8, Regal = 9,
Riviera = 10, Skylark = 11, Deville = 12, Eldrado = 13, Seville = 14,
Chevette = 15, Impala = 16, Malibu = 17, MCarlo = 18, Monza = 19,
Nova = 20, Colt = 21, Diplomat = 22, Magnum = 23, StRegis = 24,
Strada = 25, Fiesta = 26, Mustang = 27, Accord = 28, Civic = 29,
Cntntl = 30, `Mark V` = 31, Vrsills = 32, GLC = 33, Bobcat = 34,
Cougar = 35, `XR-7` = 36, Marquis = 37, Monarch = 38, Zephyr = 39,
Cutlass = 40, CutlSupr = 41, `Delta 88` = 42, Omega = 43, Starfire = 44,
Toronado = 45, Arrow = 46, Champ = 47, Horizon = 48, Sapporo = 49,
Volare = 50, Catalina = 51, Firebird = 52, GranPrix = 53, `Le Mans` = 54,
Phoenix = 55, Sunbird = 56, `Le Car` = 57, Subaru = 58, Celica = 59,
Corolla = 60, Corona = 61, Rabbit = 62, Diesel = 63, Scirocco = 64,
Dasher = 65)), price = structure(c(4099, 4749, 3799, 6295, 9690,
9735, 4816, 7827, 5788, 4453, 5189, 10372, 4082, 11385, 14500,
15906, 3299, 5705, 4504, 5104, 3667, 3955, 6229, 4589, 5079,
8129, 3984, 4010, 5886, 6342, 4296, 4389, 4187, 5799, 4499, 11497,
13594, 13466, 3995, 3829, 5379, 6303, 6165, 4516, 3291, 4733,
5172, 4890, 4181, 4195, 10371, 8814, 12990, 4647, 4425, 4482,
6486, 4060, 5798, 4934, 5222, 4723, 4424, 4172, 3895, 3798, 5899,
3748, 5719, 4697, 5397, 6850, 7140, 11995), label = "Price", format.stata = "%8.0g"),
mpg = structure(c(22, 17, 22, 23, 17, 25, 20, 15, 18, 26,
20, 16, 19, 14, 14, 21, 29, 16, 22, 22, 24, 19, 23, 35, 24,
21, 30, 18, 16, 17, 21, 28, 21, 25, 28, 12, 12, 14, 30, 22,
14, 14, 15, 18, 20, 19, 19, 18, 19, 24, 16, 21, 14, 38, 34,
25, 26, 18, 18, 18, 19, 19, 19, 24, 26, 35, 18, 31, 18, 25,
41, 25, 23, 17), label = "Mileage (mpg)", format.stata = "%8.0g"),
rep78 = structure(c(3, 3, NA, 3, 5, 4, 3, 4, 3, NA, 3, 3,
3, 3, 2, 3, 3, 4, 3, 2, 2, 3, 4, 5, 4, 4, 5, 2, 2, 2, 3,
4, 3, 5, 4, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 4, 3, 1,
3, 4, NA, 3, 5, 3, NA, 2, 4, 1, 3, 3, NA, 2, 3, 5, 5, 5,
5, 4, 5, 4, 4, 5), label = "Repair Record 1978", format.stata = "%9.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Poor = 1, Fair = 2, Average = 3,
Good = 4, Excellent = 5)), rep77 = structure(c(2, 1, NA,
3, 2, 4, 3, 4, 4, NA, 3, 4, 3, 3, 2, 3, 3, 4, 3, 3, 2, 3,
3, 5, 4, 4, 4, 2, 2, 2, 1, NA, 3, 5, 4, 4, 4, 3, 4, 3, 3,
4, 2, NA, 3, 3, 4, 4, 3, 1, 3, 4, NA, 3, 4, NA, NA, 2, 4,
2, 3, 3, NA, 2, 3, 4, 5, 5, 5, 3, 4, 3, 3, 3), label = "Repair Record 1977", format.stata = "%9.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Poor = 1, Fair = 2, Average = 3,
Good = 4, Excellent = 5)), hdroom = structure(c(2.5, 3, 3,
2.5, 3, 2.5, 4.5, 4, 4, 3, 2, 3.5, 3.5, 4, 3.5, 3, 2.5, 4,
3.5, 2, 2, 3.5, 1.5, 2, 2.5, 2.5, 2, 4, 4, 4.5, 2.5, 1.5,
2, 3, 2.5, 3.5, 2.5, 3.5, 3.5, 3, 3.5, 3, 3.5, 3, 3.5, 4.5,
2, 4, 4.5, 2, 3.5, 4, 3.5, 2, 2.5, 4, 1.5, 5, 4, 1.5, 2,
3.5, 3.5, 2, 3, 2.5, 2.5, 3, 2, 3, 3, 2, 2.5, 2.5), label = "Headroom (in.)", format.stata = "%6.1f"),
rseat = structure(c(27.5, 25.5, 18.5, 28, 27, 26, 29, 31.5,
30.5, 24, 28.5, 30, 27, 31.5, 30, 30, 26, 29.5, 28.5, 28.5,
25, 27, 21, 23.5, 22, 27, 24, 29, 29, 28, 26.5, 26, 23, 25.5,
23.5, 30.5, 28.5, 27, 25.5, 25.5, 29.5, 25, 30.5, 27, 29,
28, 28, 29, 27, 25.5, 30, 31.5, 30.5, 21.5, 23, 25, 22, 31,
29, 23.5, 28.5, 28, 27, 25, 23, 25.5, 22, 24.5, 23, 25.5,
25.5, 23.5, 37.5, 29.5), label = "Rear Seat (in.)", format.stata = "%6.1f"),
trunk = structure(c(11, 11, 12, 11, 15, 12, 16, 20, 21, 10,
16, 17, 13, 20, 16, 13, 9, 20, 17, 16, 7, 13, 6, 8, 8, 8,
8, 17, 17, 21, 16, 9, 10, 10, 5, 22, 18, 15, 11, 9, 16, 16,
23, 15, 17, 16, 16, 20, 14, 10, 17, 20, 14, 11, 11, 17, 8,
16, 20, 7, 16, 17, 13, 7, 10, 11, 14, 9, 11, 15, 15, 16,
12, 14), label = "Trunk space (cu. ft.)", format.stata = "%8.0g"),
weight = structure(c(2930, 3350, 2640, 2070, 2830, 2650,
3250, 4080, 3670, 2230, 3280, 3880, 3400, 4330, 3900, 4290,
2110, 3690, 3180, 3220, 2750, 3430, 2370, 2020, 2280, 2750,
2120, 3600, 3600, 3740, 2130, 1800, 2650, 2240, 1760, 4840,
4720, 3830, 1980, 2580, 4060, 4130, 3720, 3370, 2830, 3300,
3310, 3690, 3370, 2730, 4030, 4060, 3420, 3260, 1800, 2200,
2520, 3330, 3700, 3470, 3210, 3200, 3420, 2690, 1830, 2050,
2410, 2200, 2670, 1930, 2040, 1990, 2160, 3170), label = "Weight (lbs.)", format.stata = "%8.0g"),
length = structure(c(186, 173, 168, 174, 189, 177, 196, 222,
218, 170, 200, 207, 200, 221, 204, 204, 163, 212, 193, 200,
179, 197, 170, 165, 170, 184, 163, 206, 206, 220, 161, 147,
179, 172, 149, 233, 230, 201, 154, 169, 221, 217, 212, 198,
195, 198, 198, 218, 200, 180, 206, 220, 192, 170, 157, 165,
182, 201, 214, 198, 201, 199, 203, 179, 142, 164, 174, 165,
175, 155, 155, 156, 172, 193), label = "Length (in.)", format.stata = "%8.0g"),
turn = structure(c(40, 40, 35, 36, 37, 34, 40, 43, 43, 34,
42, 43, 42, 44, 43, 45, 34, 43, 31, 41, 40, 43, 35, 32, 34,
38, 35, 46, 46, 46, 36, 33, 43, 36, 34, 51, 48, 41, 33, 39,
48, 45, 44, 41, 43, 42, 42, 42, 43, 40, 43, 43, 38, 37, 37,
36, 38, 44, 42, 42, 45, 40, 43, 41, 34, 36, 36, 35, 36, 35,
35, 36, 36, 37), label = "Turn Circle (ft.) ", format.stata = "%8.0g"),
displ = structure(c(121, 258, 121, 97, 131, 121, 196, 350,
231, 304, 196, 231, 231, 425, 350, 350, 231, 250, 200, 200,
151, 250, 119, 85, 119, 146, 98, 318, 318, 225, 105, 98,
140, 107, 91, 400, 400, 302, 86, 140, 302, 302, 302, 250,
140, 231, 231, 231, 231, 151, 350, 350, 163, 156, 86, 105,
119, 225, 231, 231, 231, 231, 231, 151, 79, 97, 134, 97,
134, 89, 90, 97, 97, 163), label = "Displacement (cu. in.)", format.stata = "%8.0g"),
gratio = structure(c(3.57999992370605, 2.52999997138977,
3.07999992370605, 3.70000004768372, 3.20000004768372, 3.64000010490417,
2.9300000667572, 2.41000008583069, 2.73000001907349, 2.86999988555908,
2.9300000667572, 2.9300000667572, 3.07999992370605, 2.27999997138977,
2.19000005722046, 2.24000000953674, 2.9300000667572, 2.55999994277954,
2.73000001907349, 2.73000001907349, 2.73000001907349, 2.55999994277954,
3.89000010490417, 3.70000004768372, 3.53999996185303, 3.54999995231628,
3.53999996185303, 2.47000002861023, 2.47000002861023, 2.94000005722046,
3.36999988555908, 3.15000009536743, 3.07999992370605, 3.04999995231628,
3.29999995231628, 2.47000002861023, 2.47000002861023, 2.47000002861023,
3.73000001907349, 2.73000001907349, 2.75, 2.75, 2.25999999046326,
2.4300000667572, 3.07999992370605, 2.9300000667572, 2.9300000667572,
2.73000001907349, 3.07999992370605, 2.73000001907349, 2.41000008583069,
2.41000008583069, 3.57999992370605, 3.04999995231628, 2.97000002861023,
3.36999988555908, 3.53999996185303, 3.23000001907349, 2.73000001907349,
3.07999992370605, 2.9300000667572, 2.9300000667572, 3.07999992370605,
2.73000001907349, 3.72000002861023, 3.80999994277954, 3.05999994277954,
3.21000003814697, 3.04999995231628, 3.77999997138977, 3.77999997138977,
3.77999997138977, 3.74000000953674, 2.98000001907349), label = "Gear Ratio", format.stata = "%6.2f"),
order = structure(c(1, 2, 3, 5, 4, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 47, 48, 49, 50, 51, 52, 46, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74), label = "Original order", format.stata = "%8.0g"),
foreign = structure(c(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1), label = "Foreign", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Domestic = 0, Foreign = 1
)), wgtd = structure(c(2930, 3350, 2640, NA, NA, NA, 3250,
4080, 3670, 2230, 3280, 3880, 3400, 4330, 3900, 4290, 2110,
3690, 3180, 3220, 2750, 3430, NA, NA, NA, NA, 2120, 3600,
3600, 3740, NA, 1800, 2650, NA, NA, 4840, 4720, 3830, NA,
2580, 4060, 4130, 3720, 3370, 2830, 3300, 3310, 3690, 3370,
2730, 4030, 4060, NA, 3260, 1800, 2200, 2520, 3330, 3700,
3470, 3210, 3200, 3420, 2690, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), format.stata = "%9.0g"), wgtf = structure(c(NA,
NA, NA, 2070, 2830, 2650, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 2370, 2020, 2280, 2750, NA,
NA, NA, NA, 2130, NA, NA, 2240, 1760, NA, NA, NA, 1980, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3420, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1830, 2050, 2410,
2200, 2670, 1930, 2040, 1990, 2160, 3170), format.stata = "%9.0g")), label = "Automobile Models", row.names = c(NA,
-74L), class = c("tbl_df", "tbl", "data.frame"))
I think the easiest way to achieve this is to define a tidy_custom.polr method as described here in the documentation.. For instance, you could do:
library(MASS)
library(AER)
library(modelsummary)
tidy_custom.polr <- function(x, ...) {
s <- coeftest(x)
out <- data.frame(
term = row.names(s),
p.value = s[, "Pr(>|z|)"])
out
}
mod = list(
"LM" = lm(gear ~ hp + mpg, data = mtcars),
"POLR" = polr(as.ordered(gear) ~ hp + mpg, data = mtcars))
modelsummary(mod, stars = TRUE)