How do I use column index as x axis in R - r

I have a data frame with 7 columns and 100 observations
I divided observations into two groups
the question I'm working on is: b) Construct two time plots of the mean blood lead levels superimposed on the blood lead levels at each occasion for succimer and placebo groups.
This is my code so far:
library(tidyverse)
library(haven)
library(dplyr)
library(plyr)
library(foreign)
library(ggplot2)
tlc = read_dta(file = 'tlc.dta')
head(tlc)
## a)
placebo = subset(tlc, tlc$trt==0)
succimer = subset(tlc, tlc$trt==1)
summary(placebo[, 3:6])
summary(succimer[, 3:6])
placebo_mean=colMeans(placebo[ ,3:6])
placebo_std=apply(placebo[ ,3:6],2,sd)
placebo_var=placebo_std^2
succimer_mean=colMeans(succimer[ ,3:6])
succimer_std=apply(succimer[ ,3:6],2,sd)
succimer_var=succimer_std^2
## b)
## c)
placebo_cor=cor(placebo[ , 3:6]) %>% round(digits = 3)
succimer_cor=cor(succimer[ , 3:6]) %>% round(digits = 3)
placebo_cov=cov(placebo[ , 3:6]) %>% round(digits = 3)
succimer_cov=cov(succimer[ , 3:6]) %>% round(digits = 3)
So the purpose is to plot all observation by using values as y axis, and columns y0, y1, y4, y6 (represent to week 0, week 1, week 4, week 6) as x axis, then plot the mean of each group superimposed on the plot. I'm planning to use different colors to distinguish two groups, so the final plot will have a lot of points on each x coordinate, and two short lines to indicate means for each group at each x coordinate.
My question is how to use column index as x axis in R? with or with out using ggplot. I know this question may be too elementary, but it caused a lot of trouble for me as a beginner.
below is my data:
dput(tlc)
structure(list(id = structure(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100), format.stata = "%9.0g"),
trt = structure(c(0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1), format.stata = "%9.0g", class = "haven_labelled", labels = c(Placebo = 0,
Succimer = 1)), y0 = structure(c(30.7999992370605, 26.5,
25.7999992370605, 24.7000007629395, 20.3999996185303, 20.3999996185303,
28.6000003814697, 33.7000007629395, 19.7000007629395, 31.1000003814697,
19.7999992370605, 24.7999992370605, 21.3999996185303, 27.8999996185303,
21.1000003814697, 20.6000003814697, 24, 37.5999984741211,
35.2999992370605, 28.6000003814697, 31.8999996185303, 29.6000003814697,
21.5, 26.2000007629395, 21.7999992370605, 23, 22.2000007629395,
20.5, 25, 33.2999992370605, 26, 19.7000007629395, 27.8999996185303,
24.7000007629395, 28.7999992370605, 29.6000003814697, 32,
21.7999992370605, 24.3999996185303, 33.7000007629395, 24.8999996185303,
19.7999992370605, 26.7000007629395, 26.7999992370605, 20.2000007629395,
35.4000015258789, 25.2999992370605, 20.2000007629395, 24.5,
20.2999992370605, 20.3999996185303, 24.1000003814697, 27.1000003814697,
34.7000007629395, 28.5, 26.6000003814697, 24.5, 20.5, 25.2000007629395,
34.7000007629395, 30.2999992370605, 26.6000003814697, 20.7000007629395,
27.7000007629395, 24.2999992370605, 36.5999984741211, 28.8999996185303,
34, 32.5999984741211, 29.2000007629395, 26.3999996185303,
21.7999992370605, 27.2000007629395, 22.3999996185303, 32.5,
24.8999996185303, 24.6000003814697, 23.1000003814697, 21.1000003814697,
25.7999992370605, 30, 22.1000003814697, 20, 38.0999984741211,
28.8999996185303, 25.1000003814697, 19.7999992370605, 22.1000003814697,
23.5, 29.1000003814697, 30.2999992370605, 25.3999996185303,
30.6000003814697, 22.3999996185303, 31.2000007629395, 31.3999996185303,
41.0999984741211, 29.3999996185303, 21.8999996185303, 20.7000007629395
), format.stata = "%9.0g"), y1 = structure(c(26.8999996185303,
14.8000001907349, 23, 24.5, 2.79999995231628, 5.40000009536743,
20.7999992370605, 31.6000003814697, 14.8999996185303, 31.2000007629395,
17.5, 23.1000003814697, 26.2999992370605, 6.30000019073486,
20.2999992370605, 23.8999996185303, 16.7000007629395, 33.7000007629395,
25.5, 15.8000001907349, 27.8999996185303, 15.8000001907349,
6.5, 26.7999992370605, 12, 4.19999980926514, 11.5, 21.1000003814697,
3.90000009536743, 26.2000007629395, 21.3999996185303, 13.1999998092651,
21.6000003814697, 21.2000007629395, 26.3999996185303, 17.5,
30.2000007629395, 19.2999992370605, 16.3999996185303, 14.8999996185303,
20.8999996185303, 18.8999996185303, 6.40000009536743, 20.3999996185303,
10.6000003814697, 30.3999996185303, 23.8999996185303, 17.5,
10, 21, 17.2000007629395, 20.1000003814697, 14.8999996185303,
39, 32.5999984741211, 22.3999996185303, 5.09999990463257,
17.5, 25.1000003814697, 39.5, 29.3999996185303, 25.2999992370605,
19.2999992370605, 4, 24.2999992370605, 23.2999992370605,
28.8999996185303, 10.6999998092651, 19, 9.19999980926514,
15.3000001907349, 10.6000003814697, 28.5, 22, 25.1000003814697,
23.6000003814697, 25, 20.8999996185303, 5.59999990463257,
21.8999996185303, 27.6000003814697, 21, 22.7000007629395,
40.7999992370605, 12.5, 28.1000003814697, 11.6000003814697,
21.1000003814697, 7.90000009536743, 16.7999992370605, 3.5,
24.2999992370605, 28.2000007629395, 7.09999990463257, 10.8000001907349,
3.90000009536743, 15.1000003814697, 22.1000003814697, 7.59999990463257,
8.10000038146973), format.stata = "%9.0g"), y4 = structure(c(25.7999992370605,
19.5, 19.1000003814697, 22, 3.20000004768372, 4.5, 19.2000007629395,
28.5, 15.3000001907349, 29.2000007629395, 20.5, 24.6000003814697,
19.5, 18.5, 18.3999996185303, 19, 21.7000007629395, 34.4000015258789,
26.2999992370605, 22.8999996185303, 27.2999992370605, 23.7000007629395,
7.09999990463257, 25.2999992370605, 16.7999992370605, 4,
9.5, 17.3999996185303, 12.8000001907349, 34, 21, 14.6000003814697,
23.6000003814697, 22.8999996185303, 23.7999992370605, 21,
30.2000007629395, 16.3999996185303, 11.6000003814697, 14.5,
22.2000007629395, 18.8999996185303, 5.09999990463257, 19.2999992370605,
9, 26.5, 22.2000007629395, 17.3999996185303, 15.6000003814697,
16.7000007629395, 15.8999996185303, 17.8999996185303, 18.1000003814697,
28.7999992370605, 27.5, 21.7999992370605, 8.19999980926514,
19.6000003814697, 23.3999996185303, 38.5999984741211, 33.0999984741211,
25.1000003814697, 21.8999996185303, 4.19999980926514, 18.3999996185303,
40.4000015258789, 32.7999992370605, 12.6000003814697, 16.2999992370605,
8.30000019073486, 24.6000003814697, 14.3999996185303, 35,
19.1000003814697, 27.7999992370605, 21.2000007629395, 21.7000007629395,
21.7000007629395, 7.30000019073486, 23.6000003814697, 24,
8.60000038146973, 21.2000007629395, 38, 16.7000007629395,
27.5, 13, 21.5, 12.3999996185303, 15.1000003814697, 3, 22.7000007629395,
27, 17.2000007629395, 19.7999992370605, 7, 10.8999996185303,
25.2999992370605, 10.8000001907349, 25.7000007629395), format.stata = "%9.0g"),
y6 = structure(c(23.7999992370605, 21, 23.2000007629395,
22.5, 9.39999961853027, 11.8999996185303, 18.3999996185303,
25.1000003814697, 14.6999998092651, 30.1000003814697, 27.5,
30.8999996185303, 19, 16.2999992370605, 20.7999992370605,
17, 20.2999992370605, 31.3999996185303, 30.2999992370605,
25.8999996185303, 34.2000007629395, 23.3999996185303, 16,
24.7999992370605, 19.2000007629395, 16.2000007629395, 14.5,
21.1000003814697, 12.6999998092651, 28.2000007629395, 22.3999996185303,
11.6000003814697, 27.7000007629395, 21.8999996185303, 22,
24.2000007629395, 27.5, 17.6000003814697, 16.6000003814697,
63.9000015258789, 19.7999992370605, 15.5, 15.1000003814697,
23.7999992370605, 16, 28.1000003814697, 27.2000007629395,
18.6000003814697, 15.1999998092651, 13.5, 17.7000007629395,
18.7000007629395, 21.2999992370605, 34.7000007629395, 22.7999992370605,
21, 23.6000003814697, 18.3999996185303, 22.2000007629395,
43.2999992370605, 28.3999996185303, 27.8999996185303, 21.7999992370605,
11.6999998092651, 27.7999992370605, 39.2999992370605, 31.7999992370605,
21.2000007629395, 18.6000003814697, 18.3999996185303, 32.4000015258789,
18.7000007629395, 30.5, 18.7000007629395, 27.2999992370605,
21.1000003814697, 23.8999996185303, 19.8999996185303, 12.3000001907349,
24.7999992370605, 23.7000007629395, 24.6000003814697, 20.5,
32.7000007629395, 22.2000007629395, 24.7999992370605, 23.1000003814697,
20.6000003814697, 18.8999996185303, 18.7999992370605, 11.5,
20.1000003814697, 25.5, 18.7000007629395, 22.2000007629395,
17.7999992370605, 27.1000003814697, 4.09999990463257, 13,
12.3000001907349), format.stata = "%9.0g")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
also I have tried this:
p=ggplot(tlc, aes(x=colnames(tlc[,3:6],do.NULL=TRUE)),
y=value)
p=p+geom_point()
No errors found when running the code, but R did report an error (Aesthetics must be either length 1 or the same as the data (100): x) when I call 'p' to plot it.

I don't have your data, but it sounds like you want something that looks like this:
Here is how I made it:
library(tidyverse)
# Setting up some fake data: 100 observations and 7 variables
set.seed(123)
some_data <- data.frame(y0 = rnorm(100),
y1 = runif(100),
y2 = rexp(100, 2),
y3 = rnorm(100, 2, 1),
y4 = rexp(100),
y5 = rnorm(100, 2,2),
y6 = runif(100, -5, 5))
# pivoting the data to longer format:
long_data <- some_data %>%
pivot_longer(cols = everything(),
names_to = "variable")
# building the base plot
p <- ggplot(long_data, aes(x = variable, y = value))
# adding the points - use position_jitter to give it some width if you want
p <- p + geom_point(position = position_jitter(width = 0.2))
# adding the bars at mean - play around with width, color, and size
p <- p + stat_summary(geom = "errorbar",
fun = mean,
width = 0.4,
aes(ymax = ..y.., ymin = ..y..),
color = "orange",
size = 1.5)
p # show plot

Related

From Boxplot to Barplot in ggplot possible?

I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
    value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)

fill delaunay triangles with colors of vertex points in R

here is a reprex
data<- structure(list(lanmark_id = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67), V1 = c(0.00291280916742007,
0.00738863171211713, 0.0226678081211574, 0.0475105228945172,
0.0932285720818941, 0.167467706279089, 0.257162845610094, 0.365202733889021,
0.49347857580521, 0.623654594804239, 0.738846221030799, 0.838001377618909,
0.911583795022151, 0.954620025430512, 0.976736039833402, 0.99275439380643,
1.00100526672829, 0.0751484964183746, 0.136267471453466, 0.223219796351563,
0.312829176190895, 0.396253287447153, 0.589077347394549, 0.682150866526948,
0.771279538477539, 0.856242644022999, 0.915433541338973, 0.493665602840245,
0.491283285973581, 0.488913167946858, 0.486968906096063, 0.384707082576335,
0.43516446651127, 0.48730704698643, 0.541730425616146, 0.590794609520034,
0.176234316360877, 0.230353437655898, 0.295908510434122, 0.350673723300921,
0.2927721757992, 0.228392965512228, 0.634474821310078, 0.692554938010577,
0.757884656518485, 0.809961553290539, 0.760324208523177, 0.696892501347341,
0.299062528225204, 0.371899560139738, 0.440183530232855, 0.488448817156316,
0.542120710507391, 0.613931454931259, 0.683122622479693, 0.614367295821043,
0.544516611213321, 0.487065702940653, 0.43466839036949, 0.367662837035504,
0.329392110306872, 0.439192556373207, 0.488617118648197, 0.543288506065858,
0.652131615571443, 0.541622182786469, 0.486664920417254, 0.437126878794749
), V2 = c(0.201088019764115, 0.335422141956174, 0.468591127485112,
0.597955245417373, 0.719502795031081, 0.826191980419368, 0.912263437847338,
0.978932088608654, 0.996572250349122, 0.975164350943783, 0.906204543800476,
0.817791059656974, 0.711167374856116, 0.587462637963028, 0.457981280500493,
0.327526817895531, 0.19652402489511, 0.0832018969548692, 0.0247526745448235,
0.00543973063471442, 0.0169853862992864, 0.0463565705952832,
0.0442986445765913, 0.0151651597693172, 0.00747493463745755,
0.0263496825405166, 0.0805712600069456, 0.160307477500307, 0.24640401358039,
0.332244740019727, 0.420995916418539, 0.486383354389177, 0.505514985155285,
0.521022030162301, 0.5059272511442, 0.48818970795347, 0.184054088286897,
0.153658218058329, 0.153359749238857, 0.186997311695192, 0.20294291755153,
0.204166125257439, 0.186997311695192, 0.153386090373069, 0.155932705636629,
0.184603717976376, 0.203900583330345, 0.202836636618411, 0.670663080116174,
0.635972857244521, 0.619932598923225, 0.632625553953685, 0.620132318139554,
0.637530241507316, 0.668109937001625, 0.718821664744205, 0.73956412947459,
0.744898219300658, 0.74046882628352, 0.720755964662638, 0.672731384920681,
0.666152981987244, 0.670464844757437, 0.664772611108765, 0.671145517468628,
0.673968618595099, 0.67986363963374, 0.675352028351748), coef2 = c(0,
0, 0, 0, 0, 0, 0, 0, 0.565178003460693, 0, 0, 0, 0, 0, 0, 0,
0, 0.0433232019717308, 0.0433232019717308, 0.442833876807268,
0.574211955093656, 0.574211955093656, 0.574211955093656, 0.574211955093656,
0.442833876807268, 0.0433232019717308, 0.0433232019717308, 0.0612451242746323,
0.0612451242746323, 0, 0, 0, 0, 0, 0, 0, 0.343056259557492, 0.701076795777046,
0.674029769391816, 0, 0.538117834886036, 0.990039002564078, 0.451921167678043,
0.701076795777046, 0.701076795777046, 0.316009233172263, 0.990039002564078,
0.990039002564078, 0.878350036859346, 0.343364662128988, 0.282119537854356,
0.282119537854356, 0.282119537854356, 0.343364662128988, 0.384793696241895,
0.608382647917744, 0.608382647917744, 1, 0.608382647917744, 0.608382647917744,
0.384793696241895, 0.501936678206125, 0.501936678206125, 0, 0.878350036859346,
0, 0.501936678206125, 0.501936678206125)), row.names = c(NA,
-68L), class = c("tbl_df", "tbl", "data.frame"))
I used this data to create a deulanay plot in R
library(tidyverse)
library(ggforce)
data%>%
mutate(coef2 = coef2/max(coef2))%>%
ggplot(aes(V1, V2))+
geom_delaunay_tile(aes(colour = coef2, fill = coef2), alpha = .5)+
geom_delaunay_segment2(aes(colour = coef2, fill = coef2))+
geom_point(aes(colour = coef2))+
ylim(1,0)+
scale_color_viridis_c(option = "magma")+
scale_fill_viridis_c(option = "magma")+
theme_minimal()
which gives this
I want to fill all triangles with a blend of colors that match the color of each point, just as the lines are colored.
as you can see I have tried using fill = coef2 within de geom_delaunay but this doesn't really achieve what I want.
is there a way to do this in R.
Many thanks!

Unable to plot binary outcome and continuous predictor?

I am trying to show how age (V1) is correlated with a binary outcome (V2), however, I am not having any luck with plotting this.
Here are my data:
> dput(head(test, 100))
structure(list(V1 = c(48, 92, 36, NA, 69, NA, NA, 19, 69, 82,
NA, 39, 42, NA, 68, 72, 27, 78, 42, 15, 79, 48, 38, 46, 17, 33,
24, 41, 68, 28, 79, NA, 52, 81, 74, 58, 57, 71, 51, 51, 51, 51,
31, 96, 47, NA, 66, 66, 73, 55, 79, 60, 60, 76, 34, 53, 58, 70,
80, 33, 17, 54, 42, 64, NA, 72, 53, 55, 59, NA, 68, 71, 70, 77,
16, 74, 74, 29, 49, NA, 64, 65, 65, 65, 57, 63, 60, 78, 77, 75,
54, 55, 97, NA, NA, 74, 80, 73, 74, 67), V2 = c(1, 0, 1, NA,
1, NA, NA, 1, 1, 1, NA, 0, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 0, 1, 1, 0, NA, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, NA, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
1, NA, 1, 1, 1, 1, NA, 0, 1, 1, 1, 1, 1, 0, 1, 0, NA, 1, 1, 1,
1, 0, 0, 0, 1, 0, 1, 1, 0, 0, NA, NA, 0, 1, 0, 0, 0)), row.names = c(NA,
100L), class = "data.frame")
Here is what I attempted to do, but I am not getting any sort of smoothing curve to show how age is associated with the binary outcome:
ggplot(test, aes(x=V1, y=V2))+
geom_point(size=2, alpha=0.4)+
stat_smooth(method="loess", color="blue", size=1.5)
And this is what I am trying to create (although I am open to suggestions for betting plotting methods).
This is my output (haven't changed the axis labels, but the y-axis should be the binary outcome and the x-axis is age):
If you have binary outcome data and a numeric predictor, the typical way to model this would be with logistic regression. You can show a logistic regression quite easily in ggplot by passing method = glm and method.args = list(family = binomial)) to geom_smooth.
You can augment this by adding the successes and failures as a sort of "rug plot", and adding a few aesthetic tweaks:
ggplot(test, aes(V1, V2)) +
geom_point(shape = "|", size = 6, na.rm = TRUE, aes(color = factor(V2))) +
geom_smooth(method = glm, method.args = list(family = binomial), na.rm = TRUE,
formula = y ~ x, color = "navy", fill = "lightblue") +
coord_cartesian(ylim = c(0, 1), expand = 0) +
labs(x = "Age", y = "Probability") +
theme_minimal(base_size = 16) +
theme(axis.line = element_line(color = "gray"),
axis.ticks = element_line(color = "gray"),
axis.ticks.length = unit(3, "mm"),
legend.position = "none")
Note that this is preferable to a plain loess because with a loess (or other methods that do not explicitly account for the binary nature of the data) will give inaccurate confidence intervals (your target plot has a confidence interval which goes above 100% probability, which clearly doesn't make sense).

Check linearity in logistic regression

In order to check linearity in logistic regression ->
Is independent1 and independent2variable linear related to the log-odds of depdendent?
I would like optimize this (working) calculations:
This is the code:
# Check Linearity ---------------------------------------------------------
# quartiles of independent1
quantile(df$independent1, probs=c(0, 0.25, 0.5, 0.75, 1))
table(df$dependent[df$independent1<52])
table(df$dependent[df$independent1>=52 & df$independent1 < 60])
table(df$dependent[df$independent1>=60 & df$independent1 < 73])
table(df$dependent[df$independent1>=73 & df$independent1 < 91])
p1 <- mean(df$dependent[df$independent1<52])
p2 <- mean(df$dependent[df$independent1>=52 & df$independent1 < 60])
p3 <- mean(df$dependent[df$independent1>=60 & df$independent1 < 73])
p4 <- mean(df$dependent[df$independent1>=73 & df$independent1 < 91])
probs <- c(p1, p2, p3, p4)
# calculate the log-odds
logits <- log(probs/(1-probs))
# quartiles of independent1
q <- quantile(df$independent1, probs=seq(0,1,0.25))
# calculate median independent1 for each of the 4 groups
meds <- c( median(df$independent1[ df$independent1<q[2]]),
median(df$independent1[ df$independent1>=q[2] & df$independent1<q[3]]),
median(df$independent1[ df$independent1>=q[3] & df$independent1<q[4]]),
median(df$independent1[ df$independent1>=q[4]])
)
plot(meds, logits, main="xxx",
xlab = "independent1",
ylab = "log-odds(dependent|independent1)", las=1)
For one variable this might be ok. But I have more independent variables. So how could I optimize this code (checking and plotting) for each independent variable (in this example independent1 and independent2)
My dataframe:
df <- structure(list(dependent = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), independent1 = c(84,
49, 54, 75, 49, 70, 75, 42, 60, 72, 80, 73, 51, 61, 59, 78, 45,
38, 78, 65, 91, 60, 39, 31, 42, 72, 41, 77, 73, 74, 39, 86, 71,
55, 43, 75, 80, 75, 67, 74, 46, 70, 57, 66, 57, 72, 46, 52, 53,
76, 57, 86, 67, 71, 57, 50, 76, 61, 41, 57, 62, 41, 64, 82, 53,
75, 59, 38, 54, 56, 68, 63, 73, 26, 75, 76, 81, 46, 77, 53, 59,
66, 51, 72, 80, 70, 39, 57, 62, 85, 84, 57, 73, 55, 70, 78, 66,
69, 60, 51, 72, 68, 60, 62, 64, 44, 50, 59, 45, 81, 54, 68, 75,
66, 54, 45, 52, 87, 44, 77, 49, 84, 68, 76, 82, 44, 58, 55, 69,
33, 48, 62, 60, 76, 56, 73, 55, 58, 53, 53, 60, 52, 60, 41, 39,
36, 38, 59, 54, 64), independent2 = c(23, 25, 34, 25, 31, 25,
32, 19, 25, 28, 22, 18, 30, 26, 25, 25, 25, 19, 24, 27, 23, 28,
39, 27, 30, 28, 22, 28, 25, 23, 18, 27, 27, 19, 25, 27, 26, 26,
21, 26, 23, 28, 37, 32, 24, 32, 26, 23, 24, 27, 28, 25, 24, 22,
34, 23, 35, 20, 29, 29, 21, 29, 25, 26, 23, 33, 25, 26, 29, 27,
26, 28, 19, 22, 29, 22, 26, 35, 32, 29, 26, 23, 31, 30, 27, 28,
23, 27, 34, 22, 24, 28, 21, 25, 18, 32, 21, 24, 31, 31, 24, 30,
27, 23, 16, 26, 26, 19, 38, 21, 32, 34, 28, 19, 30, 24, 26, 24,
40, 26, 15, 26, 28, 22, 25, 26, 31, 24, 26, 42, 26, 30, 28, 21,
21, 19, 22, 20, 26, 31, 22, 25, 21, 20, 27, 27, 26, 29, 22, 24
)), row.names = c(NA, -150L), class = c("tbl_df", "tbl", "data.frame"
))
I'll demonstrate a somewhat different and decidedly more efficient method of splitting a variable that is to be used in a logistic regression model:
df$q41 <- with(df, cut(independent1, quantile(independent1), include = TRUE))
# creates 4 level factor into roughly equally sized groups
table(df$q41)
#--------------------
#[26,52] (52,60] (60,73] (73,91]
# 39 37 39 35
#Examine for "eyeball" trends in the log-odds of dependent
fit1.q41 <- glm(dependent~q41+0, data=df, fam="binomial")
fit1.q41
#---------------------------
Call: glm(formula = dependent ~ q41 + 0, family = "binomial", data = df)
Coefficients:
q41[26,52] q41(52,60] q41(60,73] q41(73,91]
-3.638 -2.862 -2.918 -2.048
Degrees of Freedom: 150 Total (i.e. Null); 146 Residual
Null Deviance: 207.9
Residual Deviance: 65.52 AIC: 73.52
I chose to remove the intercept term because its presence prevented viewing the coefficient of the lowest group on the same scale as the upper 3. The coefficients are just the logits for the grouping I created. Compare:
> logits
[1] -3.555348 -2.740840 -2.970414 -2.169054
> coef(fit1.q41)
q41[26,52] q41(52,60] q41(60,73] q41(73,91]
-3.637586 -2.862201 -2.917771 -2.047693
I then tried to automate the process but ran into a bit of a problem because of the small numbers of events in one of the quartile groups, The ridiculously low coefficient for the lowest quartile in independent2 is from that lack of any events or "1"'s in that catergory. (An estimate log-odds of -19.566069 does rather point to a proportion of 0.)
lapply( df[-1], function(x){cat(str(x)); IVq <- cut(x, quantile(x), include = TRUE); logits<-coef( summary(glm(df$dependent~IVq+0, fam="binomial"))); logits})
num [1:150] 84 49 54 75 49 70 75 42 60 72 ...
num [1:150] 23 25 34 25 31 25 32 19 25 28 ...
$independent1
Estimate Std. Error z value Pr(>|z|)
IVq[26,52] -3.637586 1.0130639 -3.590678 3.298191e-04
IVq(52,60] -2.862201 0.7270292 -3.936845 8.256004e-05
IVq(60,73] -2.917771 0.7259663 -4.019155 5.840732e-05
IVq(73,91] -2.047693 0.5312796 -3.854266 1.160776e-04
$independent2
Estimate Std. Error z value Pr(>|z|)
IVq[15,23] -19.566069 1639.9716035 -0.01193074 9.904809e-01
IVq(23,26] -3.091042 0.7229988 -4.27530783 1.908734e-05
IVq(26,28] -2.397895 0.7385489 -3.24676555 1.167245e-03
IVq(28,42] -1.856298 0.4808846 -3.86017349 1.133066e-04
> lapply( df[-1], function(x){ IVq <- cut(x, quantile(x), include = TRUE); table(IVq, df$dependent) })
$independent1
IVq 0 1
[26,52] 38 1
(52,60] 35 2
(60,73] 37 2
(73,91] 31 4
$independent2
IVq 0 1
[15,23] 43 0
(23,26] 44 2
(26,28] 22 2
(28,42] 32 5
At any rate I think I've demonstrated a more R-ish approach to calculating logits within quartiles. It also sets you up for a model comparison approach to examining departures from linearity as well as demonstrating possible pitfalls. If you had more events you might have considered looking at the change in deviance from the null model with the addition of a quartile factor on top of a simple linear model ... or even more powerfully using poly to create your comparison model.
In the past when working with datasets with adequate numbers of events I have chosen to split on the basis of quantiles calculated from an event==1 subset rather than letting the splits be based on the whole dataset.

Adding p-values to a polr model (for modelsummary)

I know that polr does not give p-values because they are not very reliable. Nevertheless, I would like to add them to my modelsummary (Vignette) output. I know to get the values as follows:
library(MASS)
polr_res <- polr(as.ordered(rep77) ~ foreign + length + mpg, Hess=TRUE, data=fullauto);summary(polr_res)
Call:
polr_res(formula = as.ordered(rep77) ~ foreign + length + mpg, data = fullauto,
Hess = TRUE)
## coefficient test
library("AER")
coeftest(polr_res)
modelsummary
Because polr has no p-values, I cannot call modelsummary(models, stars=TRUE) on my models (which includes other models which do have p-values and for which I want to show stars).
library(modelsummary)
models <- list(
"Ordinal Probit" = polr_res,
)
# model_names <- c("OLS", "")
modelsummary(models, stars=TRUE)
I tried first to simply add the p-values to the tidy object, but I cannot add that object to the list of models.
polr_pval <- coeftest(polr)[,4]
polr_pval <- as.data.frame(polr_pval)
tidy_polr <- tidy(polr)
tidy_polr[,5] <- polr_pval
The vignette describes that I can make a custom class which adapts the polr, but I do not understand how:
https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#customizing-existing-models-part-i-
https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#customizing-existing-models-part-ii-
Could anyone help me figure this out?
EDIT:
I am posting an edit showing the problem I was having when using Vincent's answer, with R version 3.6.1 (2019-07-05). If you are encountering this issue, (preferably) update to R version 4.0.0 or download an update for modelsummary from Github (see also Vincent's comments below).:
library(remotes)
remotes::install_github('vincentarelbundock/modelsummary')
Output:
DATA for R
fullauto <- structure(list(make = structure(c(1, 1, 1, 2, 2, 3, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
9, 10, 10, 11, 11, 12, 12, 12, 13, 14, 14, 14, 14, 14, 14, 15,
15, 15, 15, 15, 15, 15, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18,
18, 18, 19, 20, 21, 21, 21, 22, 22, 22, 22, 23), label = "Make", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(AMC = 1, Audi = 2, BMW = 3,
Buick = 4, Cad. = 5, Chev. = 6, Datsun = 7, Dodge = 8, Fiat = 9,
Ford = 10, Honda = 11, Linc. = 12, Mazda = 13, Merc. = 14, Olds = 15,
Peugeot = 16, Plym. = 17, Pont. = 18, Renault = 19, Subaru = 20,
Toyota = 21, VW = 22, Volvo = 23)), model = structure(c(1, 2,
3, 4, 5000, 320, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 200, 210, 510, 810, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 98, 604, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 260), label = "Model", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Concord = 1, Pacer = 2, Spirit = 3,
Fox = 4, Century = 5, Electra = 6, LeSabre = 7, Opel = 8, Regal = 9,
Riviera = 10, Skylark = 11, Deville = 12, Eldrado = 13, Seville = 14,
Chevette = 15, Impala = 16, Malibu = 17, MCarlo = 18, Monza = 19,
Nova = 20, Colt = 21, Diplomat = 22, Magnum = 23, StRegis = 24,
Strada = 25, Fiesta = 26, Mustang = 27, Accord = 28, Civic = 29,
Cntntl = 30, `Mark V` = 31, Vrsills = 32, GLC = 33, Bobcat = 34,
Cougar = 35, `XR-7` = 36, Marquis = 37, Monarch = 38, Zephyr = 39,
Cutlass = 40, CutlSupr = 41, `Delta 88` = 42, Omega = 43, Starfire = 44,
Toronado = 45, Arrow = 46, Champ = 47, Horizon = 48, Sapporo = 49,
Volare = 50, Catalina = 51, Firebird = 52, GranPrix = 53, `Le Mans` = 54,
Phoenix = 55, Sunbird = 56, `Le Car` = 57, Subaru = 58, Celica = 59,
Corolla = 60, Corona = 61, Rabbit = 62, Diesel = 63, Scirocco = 64,
Dasher = 65)), price = structure(c(4099, 4749, 3799, 6295, 9690,
9735, 4816, 7827, 5788, 4453, 5189, 10372, 4082, 11385, 14500,
15906, 3299, 5705, 4504, 5104, 3667, 3955, 6229, 4589, 5079,
8129, 3984, 4010, 5886, 6342, 4296, 4389, 4187, 5799, 4499, 11497,
13594, 13466, 3995, 3829, 5379, 6303, 6165, 4516, 3291, 4733,
5172, 4890, 4181, 4195, 10371, 8814, 12990, 4647, 4425, 4482,
6486, 4060, 5798, 4934, 5222, 4723, 4424, 4172, 3895, 3798, 5899,
3748, 5719, 4697, 5397, 6850, 7140, 11995), label = "Price", format.stata = "%8.0g"),
mpg = structure(c(22, 17, 22, 23, 17, 25, 20, 15, 18, 26,
20, 16, 19, 14, 14, 21, 29, 16, 22, 22, 24, 19, 23, 35, 24,
21, 30, 18, 16, 17, 21, 28, 21, 25, 28, 12, 12, 14, 30, 22,
14, 14, 15, 18, 20, 19, 19, 18, 19, 24, 16, 21, 14, 38, 34,
25, 26, 18, 18, 18, 19, 19, 19, 24, 26, 35, 18, 31, 18, 25,
41, 25, 23, 17), label = "Mileage (mpg)", format.stata = "%8.0g"),
rep78 = structure(c(3, 3, NA, 3, 5, 4, 3, 4, 3, NA, 3, 3,
3, 3, 2, 3, 3, 4, 3, 2, 2, 3, 4, 5, 4, 4, 5, 2, 2, 2, 3,
4, 3, 5, 4, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 4, 3, 1,
3, 4, NA, 3, 5, 3, NA, 2, 4, 1, 3, 3, NA, 2, 3, 5, 5, 5,
5, 4, 5, 4, 4, 5), label = "Repair Record 1978", format.stata = "%9.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Poor = 1, Fair = 2, Average = 3,
Good = 4, Excellent = 5)), rep77 = structure(c(2, 1, NA,
3, 2, 4, 3, 4, 4, NA, 3, 4, 3, 3, 2, 3, 3, 4, 3, 3, 2, 3,
3, 5, 4, 4, 4, 2, 2, 2, 1, NA, 3, 5, 4, 4, 4, 3, 4, 3, 3,
4, 2, NA, 3, 3, 4, 4, 3, 1, 3, 4, NA, 3, 4, NA, NA, 2, 4,
2, 3, 3, NA, 2, 3, 4, 5, 5, 5, 3, 4, 3, 3, 3), label = "Repair Record 1977", format.stata = "%9.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Poor = 1, Fair = 2, Average = 3,
Good = 4, Excellent = 5)), hdroom = structure(c(2.5, 3, 3,
2.5, 3, 2.5, 4.5, 4, 4, 3, 2, 3.5, 3.5, 4, 3.5, 3, 2.5, 4,
3.5, 2, 2, 3.5, 1.5, 2, 2.5, 2.5, 2, 4, 4, 4.5, 2.5, 1.5,
2, 3, 2.5, 3.5, 2.5, 3.5, 3.5, 3, 3.5, 3, 3.5, 3, 3.5, 4.5,
2, 4, 4.5, 2, 3.5, 4, 3.5, 2, 2.5, 4, 1.5, 5, 4, 1.5, 2,
3.5, 3.5, 2, 3, 2.5, 2.5, 3, 2, 3, 3, 2, 2.5, 2.5), label = "Headroom (in.)", format.stata = "%6.1f"),
rseat = structure(c(27.5, 25.5, 18.5, 28, 27, 26, 29, 31.5,
30.5, 24, 28.5, 30, 27, 31.5, 30, 30, 26, 29.5, 28.5, 28.5,
25, 27, 21, 23.5, 22, 27, 24, 29, 29, 28, 26.5, 26, 23, 25.5,
23.5, 30.5, 28.5, 27, 25.5, 25.5, 29.5, 25, 30.5, 27, 29,
28, 28, 29, 27, 25.5, 30, 31.5, 30.5, 21.5, 23, 25, 22, 31,
29, 23.5, 28.5, 28, 27, 25, 23, 25.5, 22, 24.5, 23, 25.5,
25.5, 23.5, 37.5, 29.5), label = "Rear Seat (in.)", format.stata = "%6.1f"),
trunk = structure(c(11, 11, 12, 11, 15, 12, 16, 20, 21, 10,
16, 17, 13, 20, 16, 13, 9, 20, 17, 16, 7, 13, 6, 8, 8, 8,
8, 17, 17, 21, 16, 9, 10, 10, 5, 22, 18, 15, 11, 9, 16, 16,
23, 15, 17, 16, 16, 20, 14, 10, 17, 20, 14, 11, 11, 17, 8,
16, 20, 7, 16, 17, 13, 7, 10, 11, 14, 9, 11, 15, 15, 16,
12, 14), label = "Trunk space (cu. ft.)", format.stata = "%8.0g"),
weight = structure(c(2930, 3350, 2640, 2070, 2830, 2650,
3250, 4080, 3670, 2230, 3280, 3880, 3400, 4330, 3900, 4290,
2110, 3690, 3180, 3220, 2750, 3430, 2370, 2020, 2280, 2750,
2120, 3600, 3600, 3740, 2130, 1800, 2650, 2240, 1760, 4840,
4720, 3830, 1980, 2580, 4060, 4130, 3720, 3370, 2830, 3300,
3310, 3690, 3370, 2730, 4030, 4060, 3420, 3260, 1800, 2200,
2520, 3330, 3700, 3470, 3210, 3200, 3420, 2690, 1830, 2050,
2410, 2200, 2670, 1930, 2040, 1990, 2160, 3170), label = "Weight (lbs.)", format.stata = "%8.0g"),
length = structure(c(186, 173, 168, 174, 189, 177, 196, 222,
218, 170, 200, 207, 200, 221, 204, 204, 163, 212, 193, 200,
179, 197, 170, 165, 170, 184, 163, 206, 206, 220, 161, 147,
179, 172, 149, 233, 230, 201, 154, 169, 221, 217, 212, 198,
195, 198, 198, 218, 200, 180, 206, 220, 192, 170, 157, 165,
182, 201, 214, 198, 201, 199, 203, 179, 142, 164, 174, 165,
175, 155, 155, 156, 172, 193), label = "Length (in.)", format.stata = "%8.0g"),
turn = structure(c(40, 40, 35, 36, 37, 34, 40, 43, 43, 34,
42, 43, 42, 44, 43, 45, 34, 43, 31, 41, 40, 43, 35, 32, 34,
38, 35, 46, 46, 46, 36, 33, 43, 36, 34, 51, 48, 41, 33, 39,
48, 45, 44, 41, 43, 42, 42, 42, 43, 40, 43, 43, 38, 37, 37,
36, 38, 44, 42, 42, 45, 40, 43, 41, 34, 36, 36, 35, 36, 35,
35, 36, 36, 37), label = "Turn Circle (ft.) ", format.stata = "%8.0g"),
displ = structure(c(121, 258, 121, 97, 131, 121, 196, 350,
231, 304, 196, 231, 231, 425, 350, 350, 231, 250, 200, 200,
151, 250, 119, 85, 119, 146, 98, 318, 318, 225, 105, 98,
140, 107, 91, 400, 400, 302, 86, 140, 302, 302, 302, 250,
140, 231, 231, 231, 231, 151, 350, 350, 163, 156, 86, 105,
119, 225, 231, 231, 231, 231, 231, 151, 79, 97, 134, 97,
134, 89, 90, 97, 97, 163), label = "Displacement (cu. in.)", format.stata = "%8.0g"),
gratio = structure(c(3.57999992370605, 2.52999997138977,
3.07999992370605, 3.70000004768372, 3.20000004768372, 3.64000010490417,
2.9300000667572, 2.41000008583069, 2.73000001907349, 2.86999988555908,
2.9300000667572, 2.9300000667572, 3.07999992370605, 2.27999997138977,
2.19000005722046, 2.24000000953674, 2.9300000667572, 2.55999994277954,
2.73000001907349, 2.73000001907349, 2.73000001907349, 2.55999994277954,
3.89000010490417, 3.70000004768372, 3.53999996185303, 3.54999995231628,
3.53999996185303, 2.47000002861023, 2.47000002861023, 2.94000005722046,
3.36999988555908, 3.15000009536743, 3.07999992370605, 3.04999995231628,
3.29999995231628, 2.47000002861023, 2.47000002861023, 2.47000002861023,
3.73000001907349, 2.73000001907349, 2.75, 2.75, 2.25999999046326,
2.4300000667572, 3.07999992370605, 2.9300000667572, 2.9300000667572,
2.73000001907349, 3.07999992370605, 2.73000001907349, 2.41000008583069,
2.41000008583069, 3.57999992370605, 3.04999995231628, 2.97000002861023,
3.36999988555908, 3.53999996185303, 3.23000001907349, 2.73000001907349,
3.07999992370605, 2.9300000667572, 2.9300000667572, 3.07999992370605,
2.73000001907349, 3.72000002861023, 3.80999994277954, 3.05999994277954,
3.21000003814697, 3.04999995231628, 3.77999997138977, 3.77999997138977,
3.77999997138977, 3.74000000953674, 2.98000001907349), label = "Gear Ratio", format.stata = "%6.2f"),
order = structure(c(1, 2, 3, 5, 4, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 47, 48, 49, 50, 51, 52, 46, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74), label = "Original order", format.stata = "%8.0g"),
foreign = structure(c(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1), label = "Foreign", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Domestic = 0, Foreign = 1
)), wgtd = structure(c(2930, 3350, 2640, NA, NA, NA, 3250,
4080, 3670, 2230, 3280, 3880, 3400, 4330, 3900, 4290, 2110,
3690, 3180, 3220, 2750, 3430, NA, NA, NA, NA, 2120, 3600,
3600, 3740, NA, 1800, 2650, NA, NA, 4840, 4720, 3830, NA,
2580, 4060, 4130, 3720, 3370, 2830, 3300, 3310, 3690, 3370,
2730, 4030, 4060, NA, 3260, 1800, 2200, 2520, 3330, 3700,
3470, 3210, 3200, 3420, 2690, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), format.stata = "%9.0g"), wgtf = structure(c(NA,
NA, NA, 2070, 2830, 2650, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 2370, 2020, 2280, 2750, NA,
NA, NA, NA, 2130, NA, NA, 2240, 1760, NA, NA, NA, 1980, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3420, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1830, 2050, 2410,
2200, 2670, 1930, 2040, 1990, 2160, 3170), format.stata = "%9.0g")), label = "Automobile Models", row.names = c(NA,
-74L), class = c("tbl_df", "tbl", "data.frame"))
I think the easiest way to achieve this is to define a tidy_custom.polr method as described here in the documentation.. For instance, you could do:
library(MASS)
library(AER)
library(modelsummary)
tidy_custom.polr <- function(x, ...) {
s <- coeftest(x)
out <- data.frame(
term = row.names(s),
p.value = s[, "Pr(>|z|)"])
out
}
mod = list(
"LM" = lm(gear ~ hp + mpg, data = mtcars),
"POLR" = polr(as.ordered(gear) ~ hp + mpg, data = mtcars))
modelsummary(mod, stars = TRUE)

Resources