Change labels in facet wrap ggplot [duplicate] - r

This question already has answers here:
How to change facet labels?
(23 answers)
Closed 4 years ago.
Greeting,
I am having some difficulties with correctly naming labels in ggplot2 using facet_wrap. My graphing code is:
library(ggplot2)
mlm.plots <- ggplot(positions.df, aes(x, y)) +
geom_point(size=0.75, shape=1, colour="darkred", fill="pink")+
geom_abline(data = multilevel.df, aes(intercept=V2, slope=V3,
group=party), color="red", size=.1)+
facet_wrap(~party, ncol=2) +
xlab("")+
ylab("")
This produces the following:
What I really want is instead of the labels ("1", "2"...) to have the names of the political parties in each case. I have a feeling there should be an elegant solution using labeller() but I can't figure one out.
I think part of the problem is that I have set up party as a factor as follows:
party <- as.factor(rep(c("National Coalition Party", "Centre Party",
"Social Democratic Party", "Left Alliance", "Christian Democrats",
"True Finns", "Swedish People's Party", "Greens"), J))
I also know that the plot goes very wonky if party is as.character.
The data to make the graphs is as follows:
structure(list(party = c(5, 1, 6, 4, 2, 8, 7, 3, 5, 1, 6, 4,
2, 8, 7, 3, 5, 1, 6, 4, 2, 8, 7, 3, 5, 1, 6, 4, 2, 8, 7, 3, 5,
1, 6, 4, 2, 8, 7, 3, 5, 1, 6, 4, 2, 8, 7, 3, 5, 1, 6, 4, 2, 8,
7, 3, 5, 1, 6, 4, 2, 8, 7, 3), x = c(-16.667, -36.735, 43.243,
-64.286, 37.963, -9.091, 6.593, -38.614, -30.496, -10.549, -45.455,
-46.515, 25.926, -23.81, -29.53, -38.614, -4.478, 1.266, 1.176,
-30.357, 5, 13.084, -7.692, -38.614, 14.62, 26.829, -13.725,
-14.894, 24.299, 13.084, 4.145, -13.433, 29.464, -1.049, -1.37,
-38.168, 19.444, -5.882, 14.516, -6.25, 9.756, -7.636, -24.742,
-45.946, 7.813, -5.882, -19.931, -33.523, -20.556, -15.09, -36.432,
-42.051, -15.108, -6.518, -25.472, -21.471, 13.75, -16.383, -11.384,
-44.767, -16.771, 0.472, -23.392, -27.715), y = c(-0.295492376,
0.187137648, -0.209073538, 1.026732887, -0.524148543, 0.232093035,
-1.617201837, -0.038851011, -0.351777544, 0.637192933, -0.783167803,
1.549387151, -0.742792721, -0.054633476, -2.204811412, 0.009461977,
-0.594714182, 1.172333694, -0.951553793, 1.59911439, -1.246200649,
-0.337551454, -2.631499836, 0.6051641, -0.885991535, 1.492537342,
-1.275241929, 1.658246706, -1.331133971, -0.676627085, -3.220241861,
0.82922329, -0.841711554, 1.611623219, -1.531110402, 1.469424694,
-1.979679497, -0.724442893, -3.523278033, 1.187782421, -0.842631246,
1.35252299, -1.950335, 0.859798616, -2.152810527, -0.623310324,
-3.48956421, 1.718330701, -0.809637545, 0.741273409, -1.96458669,
1.466255347, -2.675088542, -1.066556748, -3.436585287, 1.935368096,
-0.870188157, 0.477034948, -2.49292584, 1.93375064, -2.924310472,
-1.033098158, -3.250669464, 2.086336567)), .Names = c("party",
"x", "y"), row.names = c(NA, -64L), class = "data.frame")
and
structure(list(party = c(5, 1, 6, 4, 2, 8, 7, 3), V2 =
c(-0.671389852256272,
1.07302815113772, -1.26372215643281, 1.79721076947721,
-1.95951156748975,
-0.541929683566524, -2.8106689095983, 1.42500879635995), V3 =
c(-0.000574845695491941,
0.018171274525851, 0.0127869327689727, 0.00934727979573554,
0.0251920546515927,
0.00326951650086729, 0.00867962541673107, 0.0153496027643832),
V4 = c(-0.417933984027918, -0.417933984027918, -0.417933984027918,
-0.417933984027918, -0.417933984027918, -0.417933984027918,
-0.417933984027918, -0.417933984027918), V5 = c(0.0114033982479481,
0.0114033982479481, 0.0114033982479481, 0.0114033982479481,
0.0114033982479481, 0.0114033982479481, 0.0114033982479481,
0.0114033982479481)), .Names = c("party", "V2", "V3", "V4",
"V5"), row.names = c("National.Coalition.Party", "Centre.Party",
"Social.Democratic.Party", "Left.Alliance", "Christian.Democrats",
"True.Finns", "Swedish.People.s.Party", "Greens"), class =
"data.frame")

When converting the party column to factor, use the labels argument to specify the labels.
positions.df$party <- factor(positions.df$party,
labels = c("National Coalition Party", "Centre Party",
"Social Democratic Party", "Left Alliance", "Christian Democrats",
"True Finns", "Swedish People's Party", "Greens"))
multilevel.df$party <- factor(multilevel.df$party,
labels = c("National Coalition Party", "Centre Party",
"Social Democratic Party", "Left Alliance", "Christian Democrats",
"True Finns", "Swedish People's Party", "Greens"))
After that, you can plot your data using your original code.
mlm.plots <- ggplot(positions.df, aes(x, y)) +
geom_point(size=0.75, shape=1, colour="darkred", fill="pink")+
geom_abline(data = multilevel.df, aes(intercept=V2, slope=V3,
group=party), color="red", size=.1)+
facet_wrap(~party, ncol=2) +
xlab("")+
ylab("")
mlm.plots

Related

How to use stat_function or geom_line to add curve to scatterplot

This is a revised question that I posted earlier which has been improved for clarity.
I am attempting to make a scatterplot of CO2 rate data (y) at varying temperatures (t) with two categorical variables, depth (Mineral, Organic) and substrate (Calcareous, Metapelite, Peridotite) with fitted lines following the equation y = a*exp(b*t) where y = CO2 rate, a = basal respiration (intercept), b = slope and t = temperature (time equivalent). I have already fitted all of the exponential curves so I have the values for y, a, b and t for each data point. I am struggling to figure out how to plot the exponential curves using the function exp_funct <- function(y,a,b,t){y=a*exp(b*t)}
for each category of the two groups, depth and substrate. So far, I have produced the base scatterplot using GGplot2 colouring by depth and faceting by substrate but I do not know what the best approach is to fit the curves, I have attempted using geom_line and stat_function with no success.
Here's my attempt at some reproduceable code:
co2_data <- structure(list(chamber_temp = c(10, 10, 10, 10, 10, 15, 15, 15,
15, 15, 15, 19, 19, 19, 25, 25, 25, 25, 25, 25, 35, 35, 35, 35,
35, 35, 5, 5, 5, 5, 5, 5), substrate = c("Calcareous", "Metapelite",
"Metapelite", "Peridotite", "Peridotite", "Calcareous", "Calcareous",
"Metapelite", "Metapelite", "Peridotite", "Peridotite", "Calcareous",
"Calcareous", "Metapelite", "Calcareous", "Calcareous", "Metapelite",
"Metapelite", "Peridotite", "Peridotite", "Calcareous", "Calcareous",
"Metapelite", "Metapelite", "Peridotite", "Peridotite", "Calcareous",
"Calcareous", "Metapelite", "Metapelite", "Peridotite", "Peridotite"
), depth = c("Mineral", "Mineral", "Organic", "Mineral", "Organic",
"Mineral", "Organic", "Mineral", "Organic", "Mineral", "Organic",
"Mineral", "Organic", "Organic", "Mineral", "Organic", "Mineral",
"Organic", "Mineral", "Organic", "Mineral", "Organic", "Mineral",
"Organic", "Mineral", "Organic", "Mineral", "Organic", "Mineral",
"Organic", "Mineral", "Organic"), N.x = c(3, 6, 4, 5, 8, 6, 8,
7, 8, 8, 8, 3, 8, 4, 6, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 6, 8,
8, 8, 8, 8), basal_respiration = c(0.0092, 0.0124666666666667,
0.04935, 0.0101, 0.05785, 0.01315, 0.01415, 0.013, 0.0402, 0.01075,
0.05785, 0.0171, 0.01415, 0.03105, 0.01315, 0.01415, 0.013075,
0.0402, 0.01075, 0.05785, 0.01315, 0.01415, 0.013075, 0.0402,
0.01075, 0.05785, 0.01315, 0.01415, 0.013075, 0.0402, 0.01075,
0.05785), sd.x = c(0.00744781847254617, 0.00234065517893317,
0.00178978583448784, 0.00166132477258362, 0.0118691677407113,
0.00666175652512158, 0.00727284577825528, 0.00256059888828115,
0.00986798575481049, 0.00193833507349551, 0.0118691677407113,
0.00294448637286709, 0.00727284577825528, 0.000866025403784439,
0.00666175652512158, 0.00727284577825528, 0.00238012604708238,
0.00986798575481049, 0.00193833507349551, 0.0118691677407113,
0.00666175652512158, 0.00727284577825528, 0.00238012604708238,
0.00986798575481049, 0.00193833507349551, 0.0118691677407113,
0.00666175652512158, 0.00727284577825528, 0.00238012604708238,
0.00986798575481049, 0.00193833507349551, 0.0118691677407113),
se.x = c(0.0043, 0.000955568475364854, 0.000894892917243919,
0.000742967024840269, 0.0041963844982488, 0.00271965071286737,
0.00257133928416413, 0.000967815409397198, 0.00348885982193938,
0.000685304937340201, 0.0041963844982488, 0.0017, 0.00257133928416413,
0.00043301270189222, 0.00271965071286737, 0.00257133928416413,
0.000841501633985341, 0.00348885982193938, 0.000685304937340201,
0.0041963844982488, 0.00271965071286737, 0.00257133928416413,
0.000841501633985341, 0.00348885982193938, 0.000685304937340201,
0.0041963844982488, 0.00271965071286737, 0.00257133928416413,
0.000841501633985341, 0.00348885982193938, 0.000685304937340201,
0.0041963844982488), ci.x = c(0.0185014067379227, 0.00245636696547958,
0.00284794865810747, 0.00206280715944113, 0.00992287255356713,
0.00699108472177222, 0.00608025123040774, 0.00236815899497472,
0.00824984254536553, 0.00162048867457091, 0.00992287255356713,
0.00731450964057409, 0.00608025123040774, 0.00137803967327781,
0.00699108472177222, 0.00608025123040774, 0.00198983517147669,
0.00824984254536553, 0.00162048867457091, 0.00992287255356713,
0.00699108472177222, 0.00608025123040774, 0.00198983517147669,
0.00824984254536553, 0.00162048867457091, 0.00992287255356713,
0.00699108472177222, 0.00608025123040774, 0.00198983517147669,
0.00824984254536553, 0.00162048867457091, 0.00992287255356713
), N.y = c(3, 6, 4, 5, 8, 6, 8, 7, 8, 8, 8, 3, 8, 4, 6, 8,
8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8), slope = c(0.120293333333333,
0.0593333333333333, 0.07685, 0.05602, 0.067475, 0.108913333333333,
0.15655, 0.0600714285714286, 0.08535, 0.057525, 0.067475,
0.0975333333333333, 0.15655, 0.09385, 0.108913333333333,
0.15655, 0.058125, 0.08535, 0.057525, 0.067475, 0.108913333333333,
0.15655, 0.058125, 0.08535, 0.057525, 0.067475, 0.108913333333333,
0.15655, 0.058125, 0.08535, 0.057525, 0.067475), sd.y = c(0.0326433842199814,
0.00744813175680094, 0.00456106712659804, 0.00374259268422306,
0.00379877799900366, 0.0244087448810189, 0.0131734581640509,
0.00707406396298344, 0.00967780966954816, 0.00357481268240529,
0.00379877799900366, 0.00594670777265315, 0.0131734581640509,
0.00225166604983954, 0.0244087448810189, 0.0131734581640509,
0.0085558250833653, 0.00967780966954816, 0.00357481268240529,
0.00379877799900366, 0.0244087448810189, 0.0131734581640509,
0.0085558250833653, 0.00967780966954816, 0.00357481268240529,
0.00379877799900366, 0.0244087448810189, 0.0131734581640509,
0.0085558250833653, 0.00967780966954816, 0.00357481268240529,
0.00379877799900366), se.y = c(0.0188466666666667, 0.00304068705686359,
0.00228053356329902, 0.00167373833080324, 0.00134307084165888,
0.00996482837004454, 0.00465752079973885, 0.0026737448578026,
0.00342162242218512, 0.00126388714460023, 0.00134307084165888,
0.00343333333333334, 0.00465752079973885, 0.00112583302491977,
0.00996482837004454, 0.00465752079973885, 0.00302494096754678,
0.00342162242218512, 0.00126388714460023, 0.00134307084165888,
0.00996482837004454, 0.00465752079973885, 0.00302494096754678,
0.00342162242218512, 0.00126388714460023, 0.00134307084165888,
0.00996482837004454, 0.00465752079973885, 0.00302494096754678,
0.00342162242218512, 0.00126388714460023, 0.00134307084165888
), ci.y = c(0.0810906617800115, 0.007816334916228, 0.00725767561259645,
0.00464704259594057, 0.00317585788379371, 0.0256154068032699,
0.0110132866353603, 0.0065424179794951, 0.00809085135929258,
0.00298861819339805, 0.00317585788379371, 0.0147724410388065,
0.0110132866353603, 0.0035829031505223, 0.0256154068032699,
0.0110132866353603, 0.00715284877149766, 0.00809085135929258,
0.00298861819339805, 0.00317585788379371, 0.0256154068032699,
0.0110132866353603, 0.00715284877149766, 0.00809085135929258,
0.00298861819339805, 0.00317585788379371, 0.0256154068032699,
0.0110132866353603, 0.00715284877149766, 0.00809085135929258,
0.00298861819339805, 0.00317585788379371), N = c(3, 6, 4,
5, 8, 6, 8, 7, 8, 8, 8, 3, 8, 4, 6, 8, 8, 8, 8, 8, 6, 8,
8, 8, 8, 8, 6, 8, 8, 8, 8, 8), co2_rate_u_m_h_g = c(0.0303333333333333,
0.0113333333333333, 0.0645, 0.0066, 0.129375, 0.0615, 0.1325,
0.0254285714285714, 0.132, 0.021, 0.14325, 0.085, 0.208,
0.17, 0.198666666666667, 0.71025, 0.0575, 0.344, 0.05225,
0.3115, 0.5155, 3.27125, 0.10375, 0.7835, 0.079, 0.6065,
-0.00766666666666667, 0.024625, 0.02675, 0.065125, 0.012125,
0.061), sd = c(0.0161658075373095, 0.0109483636524673, 0.0137719521734091,
0.00634822809924155, 0.0181102772716804, 0.0332009036021612,
0.0291498591028205, 0.00639940473422184, 0.0225895045161622,
0.0101136400116731, 0.0263425240140077, 0.00435889894354067,
0.0731358813637816, 0.0127279220613579, 0.0471197057149837,
0.302157834819581, 0.0214941852602047, 0.0326233921333407,
0.0116833214455479, 0.0554204706893452, 0.130053450550149,
1.288200932641, 0.0353138985184506, 0.129266060068814, 0.0200997512422418,
0.0639262968470052, 0.0164032517101539, 0.0136793640203045,
0.00948306761699881, 0.0180193348537461, 0.00918753036924038,
0.0156296421675518), se = c(0.00933333333333333, 0.00446965074449646,
0.00688597608670453, 0.00283901391331568, 0.0064029499339869,
0.0135542121374378, 0.0103060315211184, 0.00241874763794291,
0.00798659591351123, 0.00357571171736681, 0.00931348868193715,
0.00251661147842358, 0.0258574388301924, 0.00636396103067893,
0.0192365393053024, 0.106828926994785, 0.00759934207678533,
0.0115341109013965, 0.00413067791046458, 0.0195940953204931,
0.0530940988560248, 0.455447807500643, 0.012485348556265,
0.0457024538259631, 0.00710633520177595, 0.0226013589983308,
0.00669659946871877, 0.00483638553053828, 0.0033527707092152,
0.00637079693377748, 0.00324828251322361, 0.00552591298209755
), ci = c(0.0401580921443283, 0.0114896030154409, 0.0219142491554048,
0.00788236628321375, 0.0151405706956398, 0.0348422115168589,
0.0243698920725162, 0.00591846226021141, 0.0188852983847605,
0.00845521464359003, 0.0220229012042435, 0.0108281052473581,
0.0611431269419499, 0.0202529642690536, 0.0494490985187144,
0.252610271543504, 0.0179695885709161, 0.0272738383580029,
0.00976750116260314, 0.0463326729828588, 0.136482726098776,
1.07696293095078, 0.0295231579857332, 0.108069130674172,
0.0168038125580669, 0.0534437216064078, 0.0172141569548202,
0.0114362345155632, 0.00792804292904021, 0.0150645409315832,
0.0076809676067933, 0.0130667078496593)), row.names = c(NA,
-32L), class = "data.frame")
exp_funct <- function(y,a,b,t){y=a*exp(b*t)}
ggplot(co2_data ,
aes(x = chamber_temp, y = co2_rate_u_m_h_g, colour = substrate, linetype = substrate,
shape = substrate, fill = substrate),
ymax=co2_rate_u_m_h_g+se, ymin=co2_rate_u_m_h_g-se) +
geom_point(stat="identity", position = "dodge", width = 0.7) +
geom_errorbar(aes(ymax=co2_rate_u_m_h_g+se, ymin=co2_rate_u_m_h_g-se),
width=0.1, size=0.1, color="black") +
facet_wrap(~depth) +
stat_function(data = co2_data ,
aes(y= co2_rate_u_m_h_g, a = basal_respiration, b = slope, c = chamber_temp),
fun = exp_funct(y =co2_rate_u_m_h_g, a = basal_respiration, b = slope, t = chamber_temp))
Error in exp_funct(y = co2_rate_u_m_h_g, a = basal_respiration, b = slope, :
object 'basal_respiration' not found
In addition: Warning message:
Ignoring unknown parameters: width
Additionally to Allan's answer, a collegue figured out how to do it using nls
library(tidyverse)
install.packages("devtools")
devtools::install_github("onofriAndreaPG/aomisc")
df <- expand.grid(substrate = c("Calcareous", "Metapelite", "Peridotite"),
depth = c("Mineral", "Organic"))
mods <- purrr::map(split(df, 1:nrow(df)),
function(x) nls(co2_rate_u_m_h_g ~ NLS.expoGrowth(chamber_temp, a, b),
data = dplyr::filter(co2_data, substrate == x[[1]], depth == x[[2]])))
pred_dfs <- purrr::map(split(df, 1:nrow(df)), function(x) expand.grid(substrate = x[[1]], depth = x[[2]], chamber_temp = seq(from = 0, to = 35)))
nls_pred <- function(x, y) {
pred <- predict(x, newdata = y)
y$pred <- pred
return(y)
}
preds <- purrr::map2_dfr(mods, pred_dfs, nls_pred)
ggplot(co2_data ,
aes(x = chamber_temp, y = co2_rate_u_m_h_g, linetype = substrate,
shape = substrate),
ymax=co2_rate_u_m_h_g+se, ymin=co2_rate_u_m_h_g-se) +
geom_line(data = preds, aes(y = pred, x = chamber_temp, colour = substrate), linewidth = 1) +
geom_errorbar(aes(ymax=co2_rate_u_m_h_g+se, ymin=co2_rate_u_m_h_g-se),
width=0.1, size=0.1, color="black") +
geom_point(aes(fill = substrate), size = 3) +
scale_shape_manual(values = c(21, 22, 24)) +
facet_wrap(~depth) +
theme_bw() +
theme(legend.position = "bottom")
You can't feed parameters into stat_function that way. In this case, I would probably just generate a little summary data frame inside a geom_line call:
library(tidyverse)
ggplot(co2_data ,
aes(x = chamber_temp, y = co2_rate_u_m_h_g,
colour = substrate, linetype = substrate,
shape = substrate, fill = substrate,
ymax = co2_rate_u_m_h_g + se, ymin = co2_rate_u_m_h_g - se)) +
geom_point(position = position_dodge(width = 0.7)) +
geom_errorbar(width = 0.1, size = 0.1, color = 'black') +
facet_wrap(~depth) +
geom_line(data = . %>% group_by(substrate, depth) %>%
summarize(chamber_temp = seq(0, 35, length.out = 100),
basal_respiration = mean(basal_respiration),
slope = mean(slope), se = mean(se),
co2_rate_u_m_h_g = basal_respiration *
exp(chamber_temp * slope)))

How do I make ggrepel move (some) labels outside US map boundaries?

I'm trying to create my first map using ggrepel, but as you can see I've instead created a dumpster fire of overlapping labels. Most of the locations I'm mapping and labelling are clustered in the northeast, so the labels overlap. How do I get some of the labels to slide over beyond the map boundaries (in the ocean, so to speak)? Here's the code I used to create this monster:
plot_usmap(fill = "light blue", alpha = 0.5) +
ggrepel::geom_label_repel(data = top_18_2_transformed, aes(x=x, y=y, label=INSTNM),
size=3,
label.padding = unit(.75,"mm"),
nudge_y = 20,
nudge_x = 20,
box.padding=0.3,
max.overlaps=30,
point.padding=NA,
family="Avenir Next",
fill="gray99",
alpha=1.0,
label.r=unit(0.2,"lines"),
min.segment.length = 0.1,
label.size=unit(.15,"mm"),
segment.color="black",
segment.size=1,seed=1000) +
geom_point(data = top_18_2_transformed, aes(x = x, y = y, size = UGDS),
color = "red",
alpha = 0.75) +
labs(title = "Select Colleges",
size = "Undergrad Enrollment") +
theme(legend.position = "right")
And here's a picture of my problematic map:
Thanks in advance for any corrections you may be able to offer.
UPDATE 31 March 2022: here's the dput(top_18_2_transformed):
structure(list(lon = c(-74.659365, -122.167359, -78.937624, -75.19391,
-71.093226, -77.073463, -118.125878, -117.709837, -71.222839,
-79.941993, -72.926688, -76.483084, -73.961885, -71.169242, -74.025334,
-75.380236, -70.624084, -71.118313), lat = c(40.348732, 37.429434,
36.001135, 39.950929, 42.359243, 38.908809, 34.137349, 34.106515,
42.385995, 40.44357, 41.311158, 42.4472, 40.808286, 42.336213,
40.744776, 40.606822, 41.739072, 42.374471), UNITID = c(186131,
243744, 198419, 215062, 166683, 131496, 110404, 115409, 164739,
211440, 130794, 190415, 190150, 164924, 186867, 213543, 166692,
166027), OPEID = c(262700, 130500, 292000, 337800, 217800, 144500,
113100, 117100, 212400, 324200, 142600, 271100, 270700, 212800,
263900, 328900, 218100, 215500), OPEID6 = c(2627, 1305, 2920,
3378, 2178, 1445, 1131, 1171, 2124, 3242, 1426, 2711, 2707, 2128,
2639, 3289, 2181, 2155), INSTNM = c("Princeton University", "Stanford University",
"Duke University", "University of Pennsylvania", "Massachusetts Institute of Technology",
"Georgetown University", "California Institute of Technology",
"Harvey Mudd College", "Bentley University", "Carnegie Mellon University",
"Yale University", "Cornell University", "Columbia University in the City of New York",
"Boston College", "Stevens Institute of Technology", "Lehigh University",
"Massachusetts Maritime Academy", "Harvard University"), CITY = c("Princeton",
"Stanford", "Durham", "Philadelphia", "Cambridge", "Washington",
"Pasadena", "Claremont", "Waltham", "Pittsburgh", "New Haven",
"Ithaca", "New York", "Chestnut Hill", "Hoboken", "Bethlehem",
"Buzzards Bay", "Cambridge"), STABBR = c("NJ", "CA", "NC", "PA",
"MA", "DC", "CA", "CA", "MA", "PA", "CT", "NY", "NY", "MA", "NJ",
"PA", "MA", "MA"), ZIP = c("08544-0070", "94305", "27708", "19104-6303",
"02139-4307", "20057-0001", "91125", "91711", "02452-4705", "15213-3890",
"6520", "14853", "10027", "2467", "07030-5991", "18015", "02532-1803",
"2138"), ACCREDAGENCY = c("Middle States Commission on Higher Education",
"Western Association of Schools and Colleges Senior Colleges and University Commission",
"Southern Association of Colleges and Schools Commission on Colleges",
"Middle States Commission on Higher Education", "New England Commission on Higher Education",
"Middle States Commission on Higher Education", "Western Association of Schools and Colleges Senior Colleges and University Commission",
"Western Association of Schools and Colleges Senior Colleges and University Commission",
"New England Commission on Higher Education", "Middle States Commission on Higher Education",
"New England Commission on Higher Education", "Middle States Commission on Higher Education",
"Middle States Commission on Higher Education", "New England Commission on Higher Education",
"Middle States Commission on Higher Education", "Middle States Commission on Higher Education",
"New England Commission on Higher Education", "New England Commission on Higher Education"
), INSTURL = c("www.princeton.edu/", "www.stanford.edu/", "www.duke.edu/",
"www.upenn.edu/", "web.mit.edu/", "www.georgetown.edu/", "www.caltech.edu/",
"https://www.hmc.edu/", "www.bentley.edu/", "www.cmu.edu/", "https://www.yale.edu/",
"www.cornell.edu/", "www.columbia.edu/", "www.bc.edu/", "www.stevens.edu/",
"www.lehigh.edu/", "https://www.maritime.edu/", "www.harvard.edu/"
), SCH_DEG = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3), PREDDEG = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3), HIGHDEG = c(4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), REGION = c(2, 8, 5, 2, 1, 2, 8, 8, 1, 2, 1,
2, 2, 1, 2, 2, 1, 1), CCBASIC = c(15, 15, 15, 15, 15, 15, 15,
21, 18, 15, 15, 15, 15, 15, 16, 16, 22, 15), ADM_RATE = c(0.0578,
0.0434, 0.076, 0.0766, 0.067, 0.1436, 0.0642, 0.1367, 0.4672,
0.1544, 0.0608, 0.1085, 0.0545, 0.2722, 0.3996, 0.321, 0.9146,
0.0464), ACTCM25 = c(33, 32, 33, 33, 34, 31, 35, 33, 27, 33,
33, 32, 33, 31, 31, 29, 19, 33), ACTCM75 = c(35, 35, 35, 35,
36, 35, 36, 35, 31, 35, 35, 35, 35, 34, 34, 33, 24, 35), SAT_AVG = c(1517,
1503, 1522, 1511, 1547, 1473, 1557, 1526, 1327, 1513, 1517, 1487,
1511, 1437, 1429, 1380, 1100, 1517), DISTANCEONLY = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), UGDS = c(5308,
6994, 6546, 10774, 4516, 7141, 938, 893, 4157, 6535, 6089, 14976,
8221, 9637, 3641, 5164, 1654, 7547), CURROPER = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), COSTT4_A = c(70900,
71587, 75105, 75303, 70240, 73840, 72084, 76953, 68577, 72265,
73900, 73879, 76907, 73053, 68734, 68383, 27858, 73485), COSTT4_P = c("NULL",
"NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL",
"NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL",
"NULL"), TUITIONFEE_IN = c(52800, 53529, 58031, 57770, 53790,
56058, 54600, 58660, 51830, 57119, 55500, 57222, 61788, 57910,
54014, 55240, 10018, 51925), TUITIONFEE_OUT = c(52800, 53529,
58031, 57770, 53790, 56058, 54600, 58660, 51830, 57119, 55500,
57222, 61788, 57910, 54014, 55240, 25752, 51925), AVGFACSAL = c(20724,
20865, 16863, 18277, 19624, 15798, 20595, 14397, 14592, 12296,
19830, 15574, 19431, 15599, 15318, 13763, 8928, 20988), PFTFAC = c("0.835",
"0.9881", "0.9364", "0.7779", "0.9885", "0.4815", "0.9289", "0.8992",
"0.6696", "0.9161", "0.717", "0.9074", "0.4521", "0.6662", "1",
"0.8392", "0.5867", "0.862"), C150_4 = c(0.979, 0.9432, 0.9462,
0.96, 0.954, 0.9491, 0.9357, 0.9167, 0.8952, 0.9049, 0.972, 0.9453,
0.9549, 0.9404, 0.8473, 0.8981, 0.7629, 0.971), RET_FT4 = c(0.9768,
0.9876, 0.9827, 0.9808, 0.9946, 0.9679, 0.9826, 0.9744, 0.9201,
0.9732, 0.9892, 0.9748, 0.9853, 0.9467, 0.9394, 0.9349, 0.8672,
0.9722), RET_PT4 = c("NULL", "NULL", "NULL", "0.9245", "NULL",
"0.6667", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "0.95",
"NULL", "NULL", "NULL", "NULL", "NULL"), MD_EARN_WNE_P10 = c("95689",
"97798", "93115", "103246", "111222", "96375", "112166", "108988",
"107974", "99998", "88655", "91176", "89871", "93021", "98159",
"95033", "91668", "84918"), PCT25_EARN_WNE_P10 = c("52729", "61965",
"61558", "65218", "67120", "61372", "67501", "69466", "73117",
"62003", "60311", "59566", "56005", "62006", "72669", "65644",
"68187", "56301"), PCT75_EARN_WNE_P10 = c("167686", "172245",
"151838", "174907", "169465", "147685", "175675", "173725", "146079",
"159483", "146102", "147189", "141158", "147010", "127298", "134075",
"129421", "153746"), MD_EARN_WNE_P6 = c("84713", "88873", "77260",
"80445", "112623", "71107", "129420", "112059", "78514", "87824",
"72046", "78779", "79434", "70858", "82237", "79832", "79354",
"77816"), GRAD_DEBT_MDN_SUPP = c("10450", "12000", "13500", "16763",
"13418", "16500", "PrivacySuppressed", "22089", "25000", "22014",
"13142", "14500", "21500", "18000", "27000", "23000", "26000",
"12665"), GRAD_DEBT_MDN10YR_SUPP = c("104.4654099", "119.9602793",
"134.9553142", "167.5745134", "134.1355856", "164.945384", "PrivacySuppressed",
"220.8168841", "249.9172485", "220.0671323", "131.3764992", "144.9520041",
"214.9288337", "179.9404189", "269.9106283", "229.9238686", "259.9139384",
"126.6080781"), C100_4 = c(0.898, 0.7288, 0.8831, 0.8571, 0.8691,
0.9076, 0.8434, 0.8565, 0.8479, 0.7599, 0.8777, 0.8694, 0.8635,
0.9003, 0.4566, 0.8003, 0.6322, 0.8476), ICLEVEL = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), OPENADMP = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), GRADS = c("2997",
"10253", "10037", "14803", "6990", "12080", "1299", "NULL", "1086",
"7562", "7517", "8984", "23235", "4846", "3624", "1775", "97",
"21592"), ACCREDCODE = c("MSACHE", "WASCSR", "SACSCC", "MSACHE",
"NECHE", "MSACHE", "WASCSR", "WASCSR", "NECHE", "MSACHE", "NECHE",
"MSACHE", "MSACHE", "NECHE", "MSACHE", "MSACHE", "NECHE", "NECHE"
), RET_FT4_POOLED = c(0.9788, 0.9879, 0.9793, 0.9821, 0.9909,
0.9651, 0.9806, 0.9716, 0.9262, 0.97, 0.9892, 0.9741, 0.9825,
0.9479, 0.9423, 0.9378, 0.8633, 0.9817), C100_4_POOLED = c(0.8856,
0.739, 0.8788, 0.8546, 0.8602, 0.9009, 0.8242, 0.8551, 0.8326,
0.7546, 0.8772, 0.8766, 0.8677, 0.8918, 0.4515, 0.7621, 0.5955,
0.8573), BOOKSUPPLY = c("1050", "1245", "1434", "1358", "820",
"1200", "1428", "800", "1300", "1000", "1050", "970", "1294",
"1250", "1200", "1000", "1500", "1000"), ADMCON7 = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1), MDCOMP_ALL = c(0.5845,
0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845,
0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845,
0.5845), MDCOST_ALL = c(15387.5, 15387.5, 15387.5, 15387.5, 15387.5,
15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5,
15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5), MDEARN_ALL = c(37078,
37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078,
37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078), PPTUG_EF = c(0,
0, 0.0031, 0.0537, 0.0064, 0.0214, 0, 0.0011, 0.0118, 0.017,
2e-04, 3e-04, 0.0633, 0.0127, 0, 0.0128, 0.023, 0.0745), INEXPFTE = c(60048,
113338, 68756, 56874, 80756, 31693, 105185, 34419, 15842, 28167,
57231, 29893, 96463, 23266, 12504, 24995, 9687, 46272), C150_4_POOLED = c(0.9712,
0.9435, 0.9512, 0.9574, 0.9477, 0.9452, 0.9278, 0.9179, 0.8917,
0.8968, 0.969, 0.9452, 0.9566, 0.9297, 0.8608, 0.886, 0.7484,
0.974), GRAD_DEBT_MDN = c("10450", "12000", "13500", "16763",
"13418", "16500", "17747", "22089", "25000", "22014", "13142",
"14500", "21500", "18000", "27000", "23000", "26000", "12665"
), x = c(2107384.76948701, -1933340.27810509, 1876178.25472949,
2077243.02501463, 2314261.77712267, 1955381.08673633, -1660141.85673732,
-1623368.30493136, 2303424.70345276, 1678023.03854027, 2211596.23078863,
1896995.53745184, 2147624.50302849, 2309370.68277906, 2144734.86774305,
2041573.64168227, 2373567.48443726, 2311783.20749272), y = c(-188894.792987744,
-582296.149881856, -762721.806918975, -245389.810253038, 123275.753360416,
-404107.357328073, -1027748.36033576, -1039201.65863312, 122405.777575308,
-300870.762534603, -39714.5927185968, -7748.73302456512, -121333.925485063,
118650.586978148, -129820.607837031, -179439.260821836, 71069.0976923304,
124173.1993115)), class = "data.frame", row.names = c(NA, -18L
))
With a little data manipulation, you could move the labels out to either side of the country an draw segments to connect the labels to the universities:
top_18_2_transformed <- top_18_2_transformed[order(-top_18_2_transformed$y),]
colleges_east <- top_18_2_transformed[top_18_2_transformed$x > 0,]
colleges_west <- top_18_2_transformed[top_18_2_transformed$x < 0,]
colleges_west$lab_x <- -2300000
colleges_west$lab_y <- seq(-1000000, -1500000, -250000)
colleges_east$lab_x <- 2800000
colleges_east$lab_y <- seq(1000000, -2500000, -250000)
plot_usmap(fill = "light blue", alpha = 0.5) +
geom_text(data = colleges_west,
aes(x = lab_x, y = lab_y, label =stringr::str_wrap(INSTNM, 25)),
hjust = 1, size = 3, lineheight = 0.8) +
geom_text(data = colleges_east,
aes(x = lab_x, y = lab_y, label = stringr::str_wrap(INSTNM, 25)),
hjust = 0, size = 3, lineheight = 0.8) +
geom_point(data = top_18_2_transformed, aes(x = x, y = y, size = UGDS),
color = "red",
alpha = 0.75) +
geom_segment(data = colleges_east,
aes(x, y, xend = lab_x - 100000, yend = lab_y)) +
geom_segment(data = colleges_west,
aes(x, y, xend = lab_x + 100000, yend = lab_y)) +
labs(title = "Select Colleges",
size = "Undergrad Enrollment") +
theme(legend.position = c(0.35, 0),
legend.direction = 'horizontal') +
coord_cartesian(xlim = c( -3500000, 4000000),
ylim = c(-3000000, 1500000))
It looks like ggrepel::geom_*_repel() won't take xlim within aes() nor can it accept a list of vectors to split the constraints of west and east coast labels. However, you can just split them into two separate layers and then it's easier to control. Below I made a function to supply that position to avoid duplicating the code for those layers. Then you have to customize the exact values used in xlim and expand_limits() to get things to look nice depending on your graphics device etc.
Also IMHO this visualization is very hard to easily get much information out of. The points in the northeast are mostly overlapping and there are so many labels that even when spaced very nicely it is a bit tricky to follow them all. Instead it may be better to have a zoomed in plot for that region and avoid showing lots of space with no data in your plot or other ways to increase the legibility of the plot.
library(tidyverse)
library(ggrepel)
library(usmap)
# create function to generate labels and constrain outside map away from the center of the map
college_layers <- function(d) {
xlimz <- if (all(d$x > 0)) {c(2.5e6, NA)} else {c(NA, -2e6)}
geom_text_repel(
data = d,
aes(x, y, label = INSTNM),
xlim = xlimz,
ylim = c(-Inf, Inf),
size = 3,
force = 20,
box.padding = 0.3,
max.overlaps = 30,
point.padding = NA,
alpha = 1.0,
min.segment.length = 0.1,
segment.color = "black",
segment.size = 1,
seed = 1000
)
}
# plot with separate layer for west coast and east coast
plot_usmap(fill = "light blue", alpha = 0.5) +
geom_point(
data = d,
aes(x = x, y = y, size = UGDS),
color = "red",
alpha = 0.75
) +
college_layers(d = filter(d, x > 0)) +
college_layers(d = filter(d, x < 0)) +
expand_limits(x = c(-3.9e6, 4.6e6),
y = c(-3e6, 2e6)) +
labs(title = "Select Colleges",
size = "Undergrad Enrollment") +
theme(legend.position = c(0.35, 0),
legend.direction = 'horizontal',
plot.title = element_text(hjust = 0.5))
Created on 2022-04-01 by the reprex package (v2.0.1)
Data:
d <- structure(list(INSTNM = c("Princeton University", "Stanford University",
"Duke University", "University of Pennsylvania", "Massachusetts Institute of Technology",
"Georgetown University", "California Institute of Technology",
"Harvey Mudd College", "Bentley University", "Carnegie Mellon University",
"Yale University", "Cornell University", "Columbia University in the City of New York",
"Boston College", "Stevens Institute of Technology", "Lehigh University",
"Massachusetts Maritime Academy", "Harvard University"), x = c(2107384.76948701,
-1933340.27810509, 1876178.25472949, 2077243.02501463, 2314261.77712267,
1955381.08673633, -1660141.85673732, -1623368.30493136, 2303424.70345276,
1678023.03854027, 2211596.23078863, 1896995.53745184, 2147624.50302849,
2309370.68277906, 2144734.86774305, 2041573.64168227, 2373567.48443726,
2311783.20749272), y = c(-188894.792987744, -582296.149881856,
-762721.806918975, -245389.810253038, 123275.753360416, -404107.357328073,
-1027748.36033576, -1039201.65863312, 122405.777575308, -300870.762534603,
-39714.5927185968, -7748.73302456512, -121333.925485063, 118650.586978148,
-129820.607837031, -179439.260821836, 71069.0976923304, 124173.1993115
), UGDS = c(5308, 6994, 6546, 10774, 4516, 7141, 938, 893, 4157,
6535, 6089, 14976, 8221, 9637, 3641, 5164, 1654, 7547)), class = "data.frame", row.names = c(NA,
-18L))

Cluster Analysis Visualisation: Colouring the Clusters after categorial variable

Salut folks! I'm still quiet new to ggplot and trying to understand, but I really need some help here.
Edit: Reproducible Data of my Dataset "Daten_ohne_Cluster_NA", first 25 rows
structure(list(ntaxa = c(2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 5, 8, 8, 7, 7, 6, 5, 5), mpd.obs.z = c(-1.779004391,
-1.721014957, -1.77727283, -1.774642404, -1.789386039, -1.983401439,
-0.875426386, -2.276052068, -2.340365105, -2.203126078, -2.394158227,
-2.278173635, -1.269075471, -1.176760985, -1.313045215, -1.164289676,
-1.247549961, -0.868174033, -2.057106804, -2.03154772, -1.691850922,
-1.224391713, -0.93993654, -0.39315089, -0.418380361), mntd.obs.z = c(-1.759874454,
-1.855202792, -1.866281778, -1.798439855, -1.739998395, -1.890847575,
-0.920672112, -1.381541177, -1.382847758, -1.394870597, -1.339878669,
-1.349541665, -0.516793786, -0.525476292, -0.557425575, -0.539534996,
-0.521299478, -0.638951825, -1.06467985, -1.033009266, -0.758380203,
-0.572401837, -0.166616844, 0.399510209, 0.314591018), pe = c(0.046370234,
0.046370234, 0.046370234, 0.046370234, 0.046370234, 0.046370234,
0.071665745, 0.118619482, 0.118619482, 0.118619482, 0.118619482,
0.118619482, 0.205838414, 0.205838414, 0.205838414, 0.205838414,
0.205838414, 0.179091659, 0.215719118, 0.215719118, 0.212092271,
0.315391478, 0.312205596, 0.305510773, 0.305510773), ECO_NUM = c(1,
6, 6, 1, 7, 6, 6, 6, 6, 6, 6, 7, 7, 6, 1, 6, 6, 6, 6, 6, 6, 7,
7, 7, 6)), row.names = c(NA, -25L), class = c("tbl_df", "tbl",
"data.frame"))
(1) I prepared my Dataframe like this:
'Daten_Cluster <- Daten[, c("ntaxa", "mpd.obs.z", "mntd.obs.z", "pe", "ECO_NUM")]
(2) I threw out all the NA's with na.omit. It is 6 variables with 3811 objects each. The column ECO_NUM represents the different ecoregions as a kategorial, numerical factor.
(3) Then I did a Cluster Analysis with k.means. I used 31 groups as there are 31 ecoregions in my dataset and the aim is to colour the plot after ecoregions lateron.
'Biomes_Clus <- kmeans(Daten_Cluster_ohne_NA, 31, iter.max = 10, nstart = 25)
(4) Then I followed the online-instructions from datanovia.com on how to visualise a k.means cluster analysis (I always just follow these How-To
s as I have no idea how to do it all by myself). I tried to change the arguments accordingly to colour after ecoregions.
fviz_cluster(Biomes_Clus, data = Daten_Cluster_ohne_NA,
geom = "point",
ellipse.type = "convex",
ggtheme = theme_bw(),
) +
stat_mean(aes(color = Daten_Cluster_ohne_NA$ECO_NUM), size = 4)
I get more than 50 warnings here, I guess for each object. Saying: In grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size) : unimplemented pch value '30'
I know that there are not enough pch-symbols for 31 groups, but I also don't need them - I just would like to have it coloured.
I also tried out the other function ggscatter and created my own color-palette (called P36) with more than 31 colours to have enough colours for the ecoregions.
ggscatter(
ind.coord, x = "Dim.1", y = "Dim.2",
color = "Species", palette = "P36", ellipse = TRUE, ellipse.type = "convex",
legend = "right", ggtheme = theme_bw(),
xlab = paste0("Dim 1 (", variance.percent[1], "% )" ),
ylab = paste0("Dim 2 (", variance.percent[2], "% )" )
) +
stat_mean(aes(color = cluster), size = 4)
The Error here is that a Discrete value was supplied to continuous scale. THe Question is: How can I easily colour the outcome of my k.means (which worked) and colour it not by the newly clustered groups but by the ecoregions (to visualise if there is a difference between the clusters and the ecoregion-groups)?
I appreciate your help and me and my group partner would be very thankful!! :)
Greetings
Evelyn

Ggplot error : haven_labelled/vctrs_vctr/double

I am new here and still studying R so I am dealing with an error.
Here is what I get from console
Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
I don't know what can I do to make it work. I want to get a scatterplot.
ggplot(data = diagnoza, aes(x = Plecc, y = P32.01))
Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.
Adding geom_point as suggested by #zx8754 gives me a scatter plot. There is still the warning you reported which is related to some of your variables being of type haven_labelled, so I guess you imported your data from SPSS.
To get rid of this warning you could convert your variables to R factors using haven::as_factor. Probably it would be best to do that for the whole dataset after importing your data.
diagnoza <- structure(list(Plecc = c(2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 2,
1, 1, 1, 1, 2, 1, 1, 2), P32.01 = structure(c(3, 4, 5, 5, 5,
5, 5, 4, 3, 5, 3, 4, 3, 4, 5, 5, 5, 3, 4, 5), label = "P32.01. odpoczynek w domu (oglądanie TV)", format.spss = "F1.0", display_width = 12L, labels = c(Nigdy = 1,
Rzadko = 2, `Od czasu do czasu` = 3, Często = 4, `Bardzo często` = 5
), class = c("haven_labelled", "vctrs_vctr", "double"))), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
library(haven)
library(ggplot2)
# Convert labelled vector to a factor
diagnoza$P32.01 <- haven::as_factor(diagnoza$P32.01)
ggplot(data = diagnoza, aes(x = Plecc, y = P32.01)) +
geom_point()

Double index/category bar plot in R? [duplicate]

For a sample dataframe:
df <- structure(list(year = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4), imd.quintile = c(1, 2, 3, 4, 5, 1, 2, 3,
4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5), average_antibiotic = c(1.17153515458827,
1.11592565388857, 1.09288449967773, 1.07442652168281, 1.06102887394413,
1.0560582933182, 1.00678980505929, 0.992997489072538, 0.978343676071694,
0.967900478870214, 1.02854157116164, 0.98339099101476, 0.981198852494798,
0.971392872980818, 0.962289579742817, 1.00601488964457, 0.951187417739673,
0.950706064156994, 0.939174499710836, 0.934948233015044)), .Names = c("year",
"imd.quintile", "average_antibiotic"), row.names = c(NA, -20L
), vars = "year", drop = TRUE, class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
I want to produce a grouped bar chart, very similar to this post.
I want year on the x axes, and average_antibiotic on the y axes. I want the five bar charts (for each imd.quintile - which is the legend).
I have tried a couple of options (based on the post and elsewhere), but can't make it work.
ggplot(df, aes(x = imd.quintile, y = average_antibiotic)) +
geom_col() +
facet_wrap(~ year)
ggplot(df, aes(x = imd.quintile, y = average_antibiotic)) +
geom_bar(aes(fill = imd.quintile), position = "dodge", stat="identity")
Any ideas?
I believe you are looking for something like this:
library(ggplot2)
ggplot(df ) +
geom_col(aes(x = year, y = average_antibiotic, group=imd.quintile, fill=imd.quintile), position = "dodge" )

Resources