Using a sample dataframe:
df <- structure(list(SITCD = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("GSO/TO", "IKOF", "JL",
"MES", "SSD", "USSD"), class = "factor"), Code = structure(c(27L,
21L, 3L, 25L, 26L, 20L, 2L, 28L, 230L, 16L, 4L, 10L, 15L, 1L), .Label = c("AAR-2107",
"AAR-643", "AAR-644", "AAR-995", "HAR-2956", "HAR-2957", "I-430",
"I-431", "I-432", "I-9490", "I-9491", "K-1461", "K-1740", "K-1915",
"K-2034", "K-2096", "K-2385", "K-2386", "K-2387", "K-3112", "K-3220",
"K-3224", "Lu-1095", "Lu-1103", "LU-3282", "LU-3283", "LU-3284",
"LU-3400", "Lu-487", "Lu-489,90", "Lu-491,92", "Lu-528", "Lu-529",
"Lu-530", "Lu-531", "Lu-585", "Lu-586", "Lu-608", "Lu-646", "Lu-647",
"Lu-648", "Lu-711", "Lu-714", "Lu-766", "Lu-768", "Lu-790", "Lu-792",
"Lu-793", "Lu-826", "Lu-827", "Lu-828", "Lu-829", "Lu-830", "Lu-831",
"Lu584", "M-1611", "M-1612", "M-1613", "M-1614", "M-1615", "M-1616",
"M-1617", "M-1618", "M-1619", "M-1620", "M-1621", "M-1622", "M-1623",
"M-1624", "OS-49305", "OS-49306", "OS-49308", "OS-49309", "OS-49311",
"OS-49312", "OS-49313", "OS-49314", "OS-49315", "OS-49384", "OS-49385",
"OS-49386", "OS-49387", "OS-49403", "OS-49414", "OS-49437", "OS-49440",
"OS-49441", "OS-49442", "OS-49493", "OS-49496", "OS-49499", "OS-49502",
"OS-49506", "OS-49515", "OS-49516", "OS-49517", "OS-49518", "OS-49519",
"OS-49520", "OS-49555", "OS-49558", "OS-49562", "OS-49565", "OS-49578",
"OS-49580", "OS-49581", "OS-49582", "OS-49583", "OS-49584", "OS-49605",
"OS-49606", "OS-49607", "OS-51568", "OS-51716", "OS-51759", "OS-51760",
"OS-51765", "OS-51766", "OS-51767", "OS-51769", "OS-51770", "OS-51774",
"OS-51775", "OS-51776", "OS-51845", "OS-51846", "OS-51847", "OS-51874",
"OS-51875", "OS-51882", "OS-51883", "OS-51884", "OS-51885", "OS-52112",
"OS-52956", "OS-52957", "OS-52962", "OS-52963", "OS-52964", "OS-52966",
"OS-52967", "OS-52968", "OS-52969", "OS-52970", "OS-54002", "OS-54004",
"OS-54005", "OS-54006", "OS-54007", "OS-54008", "OS-54009", "OS-54045",
"OS-54046", "OS-54048", "OS-54073", "OS-54074", "OS-54075", "OS-54076",
"OS-54077", "OS-54892", "OS-55609", "OS-55610", "OS-55611", "OS-55612",
"OS-55613", "OS-55614", "OS-55724", "OS-55725", "OS-55728", "OS-55729",
"OS-55730", "OS-55731", "OS-55732", "OS-55733", "OS-55734", "OS-55735",
"OS-55736", "OS-55737", "OS-58249", "OS-58250", "OS-58324", "OS-58325",
"OS-58326", "OS-58327", "OS-58509", "OS-58606", "OS-58607", "OS-58609",
"OS-58673", "OS-58674", "OS-58701", "OS-58702", "OS-58703", "OS-58704",
"OS-58705", "OS-58732", "OS-58735", "OS-59579", "OS-62849", "OS-62850",
"OS-62851", "OS-62852", "OS-62855", "OS-62985", "OS-62986", "OS-62992",
"OS-62994", "OS-64754", "OS-64755", "OS-64756", "OS-64759", "OS-64760",
"OS-64762", "OS-64764", "OS-64765", "OS-64766", "OS-64843", "OS-64844",
"OS-64845", "OS-64849", "OS-65398", "OS-65399", "OS-65401", "OS-65405",
"OS-65406", "OS-65435", "OS-65436", "OS-65437", "OS-65438", "T-10382",
"Unknown", "W-1381", "Y596", "Y599", "Y600", "Y602", "Y702",
"Y703", "Y704", "Y708", "Y711", "Y712", "Y713", "Y714", "Y716",
"Y717", "Y876", "Y878", "Y879", "Y882", "Y883", "Y884"), class = "factor"),
Type = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L), .Label = c("Above", "At", "Below"), class = "factor"),
RSL = c(5, 8, 17.5, 19, 27, 30, 30, 33, 35, 40, 40, 50, 53,
70), RSL_error = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5,
2), Age = c(8183.5, 9221.5, 10424.5, 10069, 9092, 10465.5,
9204.5, 10531.5, 9844.5, 10073.5, 9905, 9907.5, 11660, 10698.5
), age_error = c(232.5, 295.5, 519.5, 371, 323, 377.5, 336.5,
324.5, 318.5, 408.5, 327, 380.5, 463, 394.5), x_min_error = c(7951L,
8926L, 9905L, 9698L, 8769L, 10088L, 8868L, 10207L, 9526L,
9665L, 9578L, 9527L, 11197L, 10304L), x_max_error = c(8416L,
9517L, 10944L, 10440L, 9415L, 10843L, 9541L, 10856L, 10163L,
10482L, 10232L, 10288L, 12123L, 11093L), y_min_error = c(3,
6, 15.5, 17, 25, 28, 28, 31, 33, 38, 38, 48, 48, 68), y_max_error = c(7,
10, 19.5, 21, 29, 32, 32, 35, 37, 42, 42, 52, 58, 72)), .Names = c("SITCD",
"Code", "Type", "RSL", "RSL_error", "Age", "age_error", "x_min_error",
"x_max_error", "y_min_error", "y_max_error"), row.names = c(NA,
14L), class = "data.frame")
I wish to draw a graph using the following code:
g <- ggplot (df, aes(x=Age, y=RSL, shape = Type)) +
geom_point() +
scale_shape_manual(values=c(1,15,5)) + #makes open circle/triangle
theme(axis.line=element_line(colour = "black", size = 0.5, linetype = "solid")) + # adds solid black x and y axis
geom_errorbar(aes(ymin=y_min_error, ymax=y_max_error,width=0,)) + # y error bar
geom_errorbarh(aes(xmin=x_min_error, xmax=x_max_error,height=0,)) +
theme_classic() +
theme_bw()+ #Black outline around the graph
xlim(0, 14000) +#Set axis limits
ylim(0, 120) +
#scale_x_continuous(breaks=seq(0,14000,2000))+
#scale_y_continuous(breaks=seq(0,120,20))+
theme(legend.position="bottom")
g
I was wondering why I am having difficulty setting the axes scale. I am trying to use the scale_x_continuous(breaks=seq(...) code which wasn't working. I then read elsewhere that I had to set the limits of the scales which I did with xlim/ylim but I can't use this with the scale_x_continuous code as I get the error message:
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Does anyone have any ideas?
Replace xlim(0, 14000) with scale_x_continuous(breaks=seq(1, 15000, 1000), limits = c(0, 14000))
Tidier code:
library(ggplot2)
ggplot(df, aes(Age, RSL, shape = Type)) +
geom_point() +
geom_errorbarh(aes(xmin = x_min_error,
xmax = x_max_error,
height = 0)) +
geom_errorbar(aes(ymin = y_min_error,
ymax = y_max_error,
width = 0)) +
scale_shape_manual(values = c(1, 15, 5)) +
scale_y_continuous(limits = c(0, 120)) +
scale_x_continuous(breaks=seq(1, 15000, 1000),
limits = c(0, 14000))
Related
I am making a geom_col in ggplot2. The x-axis is a numerical vector of timepoints (0, 6, 18, 24, 32, 44). There is a difference between each column corresponding to the numerical difference between each timepoint. But i want an equal distance between all the columns. I have searched for answers in here, but i didn't find a similar issue.
This is my code:
ggplot(data = ny_dataframe_scratch, aes(x=timepoint, y = relative_wound_healing, fill = Condition)) +
geom_col(width = 5, position = position_dodge()) +
scale_x_continuous(breaks=c(0, 6, 18, 24, 32, 44), name = "Time point, hours") +
scale_y_continuous(name = "Relative scratch area") +
scale_fill_manual(values=c("palevioletred4", "slategray")) +
geom_point(data = ny_dataframe_scratch, position = position_dodge(width = 5), aes(x=timepoint, y=relative_wound_healing, fill = Condition))
This is the output of dput():
structure(list(timepoint = c(0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6,
6, 18, 18, 18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 32, 32, 32,
32, 32, 32, 44, 44, 44, 44, 44, 44), Condition = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L), .Label = c("Control", "Knockout"), class = "factor"),
relative_wound_healing = c(1, 1, 1, 1, 1, 1, 0.819981, 0.78227,
0.811902, 0.873852, 0.893572, 0.910596, 0.39819, 0.436948,
0.559486, 0.534719, 0.591295, 0.612154, 0.222731, 0.2592,
0.453575, 0.37238, 0.477891, 0.505393, 0.05243246, 0.0809449,
0.2108063, 0.261122, 0.3750218, 0.4129873, 0, 0.0240122,
0.0778219, 0.0806758, 0.2495444, 0.3203724)), class = "data.frame", row.names = c(NA,
-36L))
Picture of how the graph looks:
The x-scale has proportional gaps because ‘ggplot2’ considers the values as continuous rather than categorical.
To make it categorical, you can for instance use factors:
aes(x = factor(timepoint, ordered = TRUE), …
(Without ordered = TRUE, ‘ggplot2’ assumes alphabetical ordering, so it would put 11 before 5, which probably isn’t what you want.)
To fix the bar heights, you need to compute and plot a summary statistic — ‘ggplot2’ allows you to do this using stat_summary (instead of geom_col):
stat_summary(fun.y = mean, geom = "col", position = position_dodge())
Taken together:
ggplot(ny_dataframe_scratch) +
aes(x = factor(timepoint, ordered = TRUE), y = relative_wound_healing, fill = Condition) +
scale_fill_manual(values = c("palevioletred4", "slategray")) +
stat_summary(fun.y = mean, geom = "col", position = position_dodge()) +
geom_point(position = position_dodge(width = 1)) +
labs(x = "Time point, hours", y = "Relative scratch area")
Your timepoints are "numeric". Try coercing them to factor. At that point, ggplot should plot them at equidistance from each other.
xy$timepoint <- as.factor(xy$timepoint)
I have this plot visualizing surgical procedures before and after Covid-19.
The time span I am looking at is between 2017-01-02 and 2020-12-31 (yyyy-mm-dd). However, the plot automatically includes Dec2016 and March2021. I want to omit these two breaks.
First
lin.model <- b %>%
group_by(corona, cons_week) %>%
summarise(n = n()) %>%
mutate(cons_week_dt = as.Date("2017-01-02") + cons_week * 7)
Then
ggplot(lin.model,
aes(x = cons_week_dt, y = n, color = corona, fill = corona)) +
geom_point(size = 5, shape = 21) +
geom_smooth(lty = 2, show.legend = F) +
geom_smooth(se = F, method = lm, color = "black", show.legend = F) +
scale_color_manual(name = "",
values = c("#8B3A62", "#6DBCC3"),
labels = c("COVID-19", "Normal"),
guide = guide_legend(reverse=TRUE)) +
scale_fill_manual(name = "",
values = alpha(c("#8B3A62", "#6DBCC3"), .25),
labels = c("COVID-19", "Normal"),
guide = guide_legend(reverse=TRUE)) +
annotate("text", x = as.Date("2020-03-13"), y = 26.5,
label = "Lockdown\n2020-03-11", cex = 5, color = "red") +
scale_x_date(name = "",
date_breaks = "3 months", date_labels = "%b%Y", expand = c(0.07, 0)) +
scale_y_continuous(name = "",
breaks = seq(0, 30, 5), limits = c(0, 30)) +
theme(axis.title.y = element_text(color = "grey20",
size = 17,
face="bold",
margin=ggplot2::margin(r=10)),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size = 15, angle = 45, hjust = 1),
axis.text.y = element_text(size = 15),
panel.grid.major = element_line(colour = "grey90"),
panel.grid.minor = element_line(colour = "grey90"),
panel.border = element_blank(),
panel.background = element_blank(),
legend.position = "top",
legend.key = element_rect(fill = "white"),
legend.text=element_text(size=15))
Attempts
I tried
(1) ... coord_cartesian(xlim = as.Date(c('2/1/2017', '31/12/2020'), format="%d/%m/%Y")) + ...
But that did not work
(2) ... xlim(as.Date(c('2/1/2017', '31/12/2020'), format="%d/%m/%Y")) + ...
That worked, but the x-axis turned into years regardless of what specified in scale_x_date(date_break = ... )
Expected output
My aim is to omit the Dec2016 and March2021-part:
Data sample
b <- structure(list(diagnosis = c("2017-10-19", "2017-07-11", "2020-06-30",
"2020-06-27", "2017-01-04", "2017-12-07", "2017-09-18", "2020-07-27",
"2020-08-28", "2020-12-29", "2018-04-12", "2020-06-20", "2020-08-29",
"2018-02-05", "2018-01-12", "2017-07-15", "2018-03-07", "2020-02-29",
"2019-08-24", "2017-08-08", "2018-11-27", "2017-03-15", "2017-05-12",
"2020-10-22", "2019-08-31", "2017-11-17", "2019-04-17", "2018-11-15",
"2018-02-08", "2019-08-09", "2019-10-06", "2017-08-30", "2019-05-09",
"2017-06-05", "2017-10-04", "2018-01-27", "2017-06-16", "2019-03-29",
"2017-06-16", "2018-07-19", "2020-04-23", "2020-01-31", "2020-06-27",
"2019-12-11", "2019-08-13", "2017-05-07", "2020-05-08", "2020-09-05",
"2019-12-18", "2018-07-24", "2017-07-31", "2017-01-23", "2018-09-08",
"2018-12-18", "2017-08-01", "2019-04-11", "2017-05-12", "2019-03-15",
"2019-06-12", "2017-05-10", "2020-10-27", "2018-08-26", "2019-06-03",
"2020-07-31", "2017-12-02", "2018-11-07", "2018-03-23", "2019-08-18",
"2019-08-30", "2018-07-23", "2018-08-08", "2018-10-10", "2019-05-26",
"2017-11-18", "2020-07-19", "2017-02-07", "2017-08-15", "2020-01-05",
"2019-07-28", "2017-05-28", "2017-01-02", "2018-09-25", "2017-03-26",
"2017-04-24", "2018-03-26", "2020-12-01", "2018-09-27", "2019-09-26",
"2017-10-06", "2019-01-11", "2020-08-15", "2017-02-06", "2018-06-07",
"2018-03-15", "2017-12-17", "2017-02-08", "2019-11-02", "2020-12-05",
"2017-09-16", "2017-06-18"), cons_week = c(42, 28, 183, 182,
1, 49, 38, 187, 191, 209, 67, 181, 191, 58, 54, 28, 62, 165,
138, 32, 100, 11, 19, 199, 139, 46, 120, 98, 58, 136, 144, 35,
123, 23, 40, 56, 24, 117, 24, 81, 173, 161, 182, 154, 137, 18,
175, 192, 155, 82, 31, 4, 88, 103, 31, 119, 19, 115, 128, 19,
200, 86, 127, 187, 48, 97, 64, 137, 139, 82, 84, 93, 125, 46,
185, 6, 33, 157, 134, 21, 1, 91, 12, 17, 65, 205, 91, 143, 40,
106, 189, 6, 75, 63, 50, 6, 148, 205, 37, 24), corona = structure(c(2L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L), .Label = c("C19", "Normal"), class = "factor")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
Use expand = c(0, 0).
library(ggplot2)
ggplot(lin.model,
aes(x = cons_week_dt, y = n, color = corona, fill = corona)) +
geom_point(size = 5, shape = 21) +
geom_smooth(lty = 2, show.legend = F) +
geom_smooth(se = F, method = lm, color = "black", show.legend = F) +
scale_color_manual(name = "",
values = c("#8B3A62", "#6DBCC3"),
labels = c("COVID-19", "Normal"),
guide = guide_legend(reverse=TRUE)) +
scale_fill_manual(name = "",
values = alpha(c("#8B3A62", "#6DBCC3"), .25),
labels = c("COVID-19", "Normal"),
guide = guide_legend(reverse=TRUE)) +
annotate("text", x = as.Date("2020-03-13"), y = 26.5,
label = "Lockdown\n2020-03-11", cex = 5, color = "red") +
scale_x_date(name = "",
date_breaks = "3 months", date_labels = "%b%Y", expand = c(0, 0)) +
scale_y_continuous(name = "",
breaks = seq(0, 30, 5), limits = c(0, 30)) +
theme(axis.title.y = element_text(color = "grey20",
size = 17,
face="bold",
margin=ggplot2::margin(r=10)),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(size = 15, angle = 45, hjust = 1),
axis.text.y = element_text(size = 15),
panel.grid.major = element_line(colour = "grey90"),
panel.grid.minor = element_line(colour = "grey90"),
panel.border = element_blank(),
panel.background = element_blank(),
legend.position = "top",
legend.key = element_rect(fill = "white"),
legend.text=element_text(size=15))
I would like to control the thickness for lines in my plotting, however I ran into some difficulty. It seems like if I add size=0.06 or size=2 in geom_line(), it did not really change the line thickness to different size. Also it added strange legend in the output. how should I fix that?
I codes I used to do plotting are:
ggplot(data =df)+
geom_line(aes(x = ADY, y = AVAL, color = PARAMCD, yaxs="d", xaxs="d", size=0.06))+
geom_point(aes(x = ADY, y = AVAL))+
scale_color_discrete(breaks=c("SYSBP", "DIABP", "PULSE"),name = "Vital signs", labels = c("Systolic BP", "Diastolic BP", "Pulse"))+
scale_colour_manual(values=c(DIABP="#512d69",SYSBP="#007254",PULSE="#fd9300"))
The output for size=0.06 and size =2 are:
Could someone give me some guidance on this? I don't want size to be shown on legend and I would like to control the thickness of the lines.Thanks.
The sample data can be build using codes:
df<- structure(list(ADY = c(-6, -6, -6, 1, 1, 1, 8, 8, 8, 15, 15,
15, 22, 22, 22, 29, 29, 29, 43, 43, 43, 57, 57, 57, 64, 87, 87,
87, 101, 101, 101), AVAL = c(66, 67, 127, 70, 58, 136, 68, 74,
140, 145, 74, 58, 75, 72, 149, 82, 66, 143, 86, 60, 159, 64,
87, 136, NA, 73, 58, 135, 141, 74, 74), PARAMCD = structure(c(3L,
1L, 2L, 1L, 3L, 2L, 3L, 1L, 2L, 2L, 1L, 3L, 1L, 3L, 2L, 1L, 3L,
2L, 1L, 3L, 2L, 3L, 1L, 2L, NA, 1L, 3L, 2L, 2L, 1L, 3L), .Label = c("DIABP",
"SYSBP", "PULSE"), class = "factor")), row.names = c(NA, -31L
), class = "data.frame")
size should be outside aes for your case :
You can see the difference between between size = 0.06 and size = 2.
library(ggplot2)
ggplot(data =df)+
geom_line(aes(x = ADY, y = AVAL, color = PARAMCD, yaxs="d", xaxs="d"), size=0.06) +
geom_point(aes(x = ADY, y = AVAL))+
scale_colour_manual(values=c(DIABP="#512d69",SYSBP="#007254",PULSE="#fd9300"))
ggplot(data =df)+
geom_line(aes(x = ADY, y = AVAL, color = PARAMCD, yaxs="d", xaxs="d"), size=2) +
geom_point(aes(x = ADY, y = AVAL))+
scale_colour_manual(values=c(DIABP="#512d69",SYSBP="#007254",PULSE="#fd9300"))
I created a boxplot showing the dispersal distance $dist of some species $spe, and I would like the width of the boxes to be proportional to the density of regeneration of these species. I used "varwidth" and weight aesthetic as shown below, but this is still not correct, as it is still proportional to the number of observations and not only to the density of regeneration...
(for the density, I calculated the proportion for each species, so it goes from 10 to 100. It is given in the column data_dist2$prop2)
p <- ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
coord_flip() +
geom_boxplot(varwidth = TRUE, alpha=0.3, aes(weight=data_dist2$prop2), fill='grey10')
Would you have any idea how to make the boxplot exactly proportional to my prop2 column?
Reproductive example :
structure(list(spe = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Abies concolor", "Picea abies", "Sequoia semp."
), class = "factor"), dist = c(0, 0, 3, 3, 4, 4, 25, 46, 59,
113, 113, 9, 12, 12, 12, 15, 22, 22, 22, 22, 35, 35, 36, 49,
85, 85, 90, 5, 5, 1, 1, 8, 13, 48, 48, 52, 52, 52, 65, 89), prop2 = c(92.17,
92.17, 92.17, 92.17, 92.17, 92.17, 92.17, 92.17, 92.17, 92.17,
92.17, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9,
10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100)), row.names = c(NA,
-40L), class = "data.frame")
Weight doesn't seem to be designed exactly for this, but you can hack it a bit. First note that the weight given to each group is the sum of the weights of the observations, so if you have a different number of observation for each species then you may need to change prop2 to the current value divided by the number of observations in the group. (I can't tell from your example if this applies)
Then note that the width is proportional to the square root of the weight, so change your code to reverse that with:
p <- ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
coord_flip() +
geom_boxplot(varwidth = TRUE, alpha=0.3, aes(weight=data_dist2$prop2^2), fill='grey10')
Miff beats me to it, but anyway here's my answer. As Miff said, you can weight the width by your prop2.
ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
geom_boxplot(aes(weight = prop2),
varwidth = TRUE,
fill='grey10', alpha=0.3) +
coord_flip()
But geom_boxplot() implicitly takes the sample size into account. So you need to divide that away in your weights. Here's how you can do it with data.table.
library(data.table)
setDT(data_dist2) # convert to data.table
data_dist2[, weight := prop2 / .N, by = spe] # Divide prop2 by sample size for each species
ggplot(data_dist2, aes(x = reorder(spe, prop2), y = dist)) +
geom_boxplot(aes(weight = weight), # note weight = weight, not weight = prop2
varwidth = TRUE,
fill='grey10', alpha=0.3) +
coord_flip()
I have been having trouble graphing growth curve model results that have been calculated over both subjects and items (both included as random effects), while models calculated over a dataset that is averaged over items, so that subjects are the only random effect, seems to work fine. I cannot seem to figure out why this would be or how to fix it.
Graph of Subject Only Model
Graph of Subjects and Items Model
Summary DF Subj Items
Summary DF Subj Only
> dput(head(new.df.subjitems, 20))
structure(list(Item.No = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1),
Subject_No = c(330, 322, 141, 330, 330,
330, 137, 330, 330, 330, 137, 330, 137, 330, 330, 137, 137, 330,
137, 141),
Bin.No = c(35, 17, 19, 44, 42, 34, 31, 23, 36, 32,
33, 28, 23, 33, 37, 7, 4, 30, 28, 31),
TargetFix = c(1, 1, 1,
0, 0.02, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0),
Condition.E = structure(c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L), .Label = c("First", "Second", "Max Entropy, Unrelated",
"Third", "Fourth", "Max Entropy, Competitive"), class = "factor"),
ot1 = c(-0.00489995957550024, 0.220498180897511, -0.171498585142509,
0.142098827689507, 0.210698261746511, -0.20089834259551,
-0.0538995553305028, -0.151898746840508, -0.112699070236506,
-0.230298100048512, 0.0146998787265008, -0.161698665991508,
-0.151898746840508, 0.0146998787265008, -0.122498989387506,
0.0538995553305028, 0.181298504293509, -0.0440996361795023,
-0.161698665991508, -0.0538995553305028),
ot2 = c(-0.158018948215706,
0.226392531578272, 0.0744512352170155, 0.00151941296361255,
0.192965446378795, 0.161057774142931, -0.135227753761518,
0.024310607417801, -0.0577376926172773, 0.26133902974136,
-0.156499535252094, 0.0486212148356019, 0.024310607417801,
-0.156499535252094, -0.0395047370539266, -0.135227753761518,
0.101800668562042, -0.142824818579581, 0.0486212148356019,
-0.135227753761518),
ot3 = c(0.0112384799617412, 0.177762495548696,
0.0718614343707494, -0.14311051566666, 0.113069194230467,
-0.05612035827049, 0.113717568074414, 0.12506411034348, 0.167352493276442,
-0.250560469916257, -0.0335353360396831, 0.101434485808536,
0.12506411034348, -0.0335353360396831, 0.16389449944206,
-0.113717568074414, -0.035984748339037, 0.0957432042894495,
0.101434485808536, 0.113717568074414),
ot4 = c(0.158779933129858,
0.0903598052498265, -0.175158477576023, -0.166766788637329,
-0.00086703192354873, -0.0706803112875428, 0.0864589810947309,
-0.178854426512889, -0.0950686572656207, 0.205524262921174,
0.153690832709029, -0.182263550144233, -0.178854426512889,
0.153690832709029, -0.123325047363878, 0.0864589810947308,
-0.15558879673071, 0.109609880754701, -0.182263550144233,
0.0864589810947309)),
.Names = c("Item.No", "Subject_No",
"Bin.No", "TargetFix", "Condition.E", "ot1", "ot2", "ot3", "ot4"
), row.names = c(1L, 5L, 8L, 22L, 29L, 59L, 61L, 74L, 78L, 86L,
90L, 98L, 101L, 111L, 115L, 120L, 126L, 133L, 140L, 145L), class = "data.frame")
> dput(head(df.subjonly, 20))
structure(list(Subject_No = c(103, 103, 103, 103, 103, 103, 103,
103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103
),
Bin.No = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20),
TargetFix = c(0.16667, 0.16667, 0.16667,
0.16667, 0.32667, 0.39, 0.5, 0.5, 0.5, 0.5, 0.62667, 0.59, 0.66667,
0.66667, 0.76667, 0.76333, 0.66667, 0.40667, 0.33333, 0.48333
),
Condition.E = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("First",
"Second", "Max Entropy, Unrelated", "NaN, Unrelated", "Third",
"Fourth", "Max Entropy, Competitive", "NaN, Competitive", "Low Entropy, NaN",
"High Entropy, NaN", "Max Entropy, NaN", "NaN, NaN"), class = "factor"),
ot1 = c(-0.240098019199512, -0.230298100048512, -0.220498180897511,
-0.210698261746511, -0.20089834259551, -0.19109842344451,
-0.181298504293509, -0.171498585142509, -0.161698665991508,
-0.151898746840508, -0.142098827689507, -0.132298908538507,
-0.122498989387506, -0.112699070236506, -0.102899151085505,
-0.0930992319345048, -0.0832993127835043, -0.0734993936325038,
-0.0636994744815033, -0.0538995553305028),
ot2 = c(0.297804940868062,
0.26133902974136, 0.226392531578271, 0.192965446378795, 0.161057774142931,
0.13066951487068, 0.101800668562042, 0.0744512352170154,
0.0486212148356019, 0.0243106074178009, 0.0015194129636125,
-0.0197523685269634, -0.0395047370539266, -0.0577376926172774,
-0.0744512352170155, -0.0896453648531411, -0.103320081525654,
-0.115475385234555, -0.126111275979843, -0.135227753761518
),
ot3 = c(-0.331823325024233, -0.250560469916256, -0.177762495548696,
-0.113069194230468, -0.0561203582704902, -0.00655577997768254,
0.0359847483390369, 0.0718614343707493, 0.101434485808536,
0.12506411034348, 0.14311051566666, 0.15593390946916, 0.16389449944206,
0.167352493276442, 0.166668098663387, 0.162201523293977,
0.154312974859294, 0.143362661050417, 0.12971078955843, 0.113717568074414
),
ot4 = c(0.347265133901292, 0.205524262921176, 0.0903598052498273,
-0.000867031923547791, -0.0706803112875423, -0.121489365408538,
-0.15558879673071, -0.175158477576023, -0.182263550144233,
-0.178854426512889, -0.16676678863733, -0.147721588350685,
-0.123325047363878, -0.0950686572656211, -0.0643291795224186,
-0.0323686454785664, -0.000334356356151326, 0.0307411167449481,
0.0599399328470624, 0.0864589810947308)), .Names = c("Subject_No",
"Bin.No", "TargetFix", "Condition.E", "ot1", "ot2", "ot3", "ot4"
), row.names = 3:22, class = "data.frame")
>
# create 4th-order polynormial in the range of Bin.no
t <- poly ((unique(new.df.subjitems$Bin.No)), 4)
# create variables ot1, ot2, ot3, ot4 corresponding to the orthogonal
polynomial time terms and populate their values
# with the Bin.No-appropriate orthogonal polynomial values:
new.df.subjitems[,paste("ot", 1:4, sep="")] <- t[new.df.subjitems$Bin.No, 1:4]
Model.subjitems.2 <- lmer(TargetFix ~ (ot1+ot2+ot3+ot4)*Condition.E +
(1+ot1+ot2+ot3+ot4|Subject_No) +(1+ot1+ot2+ot3+ot4|Item.No),
control = lmerControl(optimizer="bobyqa"),
data=new.df.subjitems, REML=F)
# create 4th-order polynormial in the range of Bin.no
t <- poly ((unique(df.subjonly$Bin.No)), 4)
# create variables ot1, ot2, ot3, ot4 corresponding to the orthogonal
polynomial time terms and populate their values
# with the Bin.No-appropriate orthogonal polynomial values:
df.subjonly[,paste("ot", 1:4, sep="")] <- t[df.subjonly$Bin.No, 1:4]
Model.subj.2 <- lmer(TargetFix ~ (ot1+ot2+ot3+ot4)*Condition.E +
(1+ot1+ot2+ot3+ot4|Subject_No),
control = lmerControl(optimizer="bobyqa"), data=df.subjonly,
REML=F)
# Graph Subject Items
ggplot(data=new.df.subjitems, aes(Bin.No, TargetFix, color=Condition.E,
lty=Condition.E, shape=Condition.E)) +
stat_summary(fun.data=mean_se,geom="ribbon",linetype=0,alpha=0.25) +
stat_summary(fun.data=mean_se,geom="point",size=1,alpha=0.40)
+stat_summary(aes(y=fitted(Model.subjitems.2)),
fun.y=mean,geom="line",size=2.0,alpha=0.9) +
theme_bw(base_size=10) +
labs(y="Fixation Proportion", x="Bins") +
ggtitle("Subjects and Items, Target Fixations") +
theme(text=element_text(color = "black", size=20, family = "Georgia")) +
theme(axis.text = element_text(color = "black", size=10, family = "Georgia"))
+
scale_color_viridis(begin=0, end = .8, discrete=TRUE) +
scale_shape_manual(values=c(3,2,16,7)) + #16,3,2
theme(legend.key.width=unit(4,"line")) +
ggsave("Testing_SubjectItems_v3.png",width=10,height=5)
# Graph Subject Only
ggplot(data=df.subjonly, aes(Bin.No, TargetFix, color=Condition.E,
lty=Condition.E, shape=Condition.E)) +
stat_summary(fun.data=mean_se,geom="ribbon",linetype=0,alpha=0.25) +
stat_summary(fun.data=mean_se,geom="point",size=1,alpha=0.40) +
stat_summary(aes(y=fitted(Model.subj.2)),
fun.y=mean,geom="line",size=2.0,alpha=0.9) +
theme_bw(base_size=10) +
labs(y="Fixation Proportion", x="Bins") +
ggtitle("Subj Only, Target Fixations") +
theme(text=element_text(color = "black", size=20, family = "Georgia")) +
theme(axis.text = element_text(color = "black", size=10, family = "Georgia"))
+
scale_color_viridis(begin=0, end = .8, discrete=TRUE) +
scale_shape_manual(values=c(3,2,16,7)) + #16,3,2
theme(legend.key.width=unit(4,"line")) +
ggsave("Testing_SubjectOnly_v3.png",width=10,height=5)