Faceted Boxplots - r

I have a boxplot with a group on the left (Baseline.RT) and a group on the right (TBPM.RT). I want to rename Baseline.RT to 1-back and TBPM.RT to TBPM. I also want each group to have a different square frame. I would also like to try black and white options to fill in the values for Neutral, Positive, and Negative.
I have tried several options but did not get the result I wanted. Below is my code. Any help is welcome.
DataFrame:
data.frame(
stringsAsFactors = FALSE,
Participant = c(1, 2, 3, 4, 5, 6),
Sex = c("m", "m", "m", "f", "f", "m"),
Age = c(29, 21, 29, 22, 25, 31),
Stress = c(14, 26, 11, 19, 15, 15),
Dass21.total = c(6, 43, 4, 10, 12, 8),
Dass21Ansie = c(0, 12, 1, 3, 2, 2),
Dass.Depre = c(1, 11, 0, 1, 3, 0),
Dass.Stress = c(5, 20, 3, 6, 7, 6),
Valence = c(0, 1, 2, 0, 1, 2),
Baseline.RT = c(1.17657473346937,
0.656485061072056,0.617504973518475,0.552112912223171,
0.587283706967395,0.569011248952529),
TBPM.RT = c(1.16126499995575,
0.682658424923267,0.643632301167193,0.589782671563839,
0.705303832011063,0.691478784144668),
TotalClockChecks = c(44, 97, 44, 93, 32, 90),
TotalChecks5060 = c(13, 22, 17, 23, 10, 27),
TotalClockResets = c(18, 20, 19, 19, 18, 19),
Correct.Resets = c(16, 16, 18, 18, 12, 19),
Before.55.Resets = c(0, 2, 0, 1, 0, 0),
After.65.Resets = c(2, 2, 1, 0, 6, 0),
MeanResetTime = c(63.0026438647087,
58.9040712799639,60.9928466685597,60.4603108544334,
65.859630879724,60.5522703813385),
Accuracy.Baseline = c(0.987179487179487,
0.991489361702128,0.97907949790795,0.987234042553191,1,
0.987234042553191),
Accuracy.TBPM = c(0.968619246861925,
0.972746331236897,0.989626556016598,0.972515856236786,
0.974736842105263,0.991786447638604),
rau.Baseline = c(112.453264487601,
114.413187265486,109.508971532343,112.475825131896,
122.999999959683,112.475825131896),
rau.TBPM = c(106.447535249234,
107.58519024216,113.516946707831,107.519541719961,
108.163803190644,114.564811317506)
)
Plot:
my44 %>%
select(Participant, Valence, Baseline.RT,TBPM.RT) %>% #Select interest variables
gather(Task,RT, -Valence, -Participant) %>%
ggplot(., aes(factor(Valence), RT)) + #plot
geom_boxplot() + facet_wrap(~ Task) +
scale_x_discrete(name = element_blank(), labels=c("0" = "Neutral", "1" = "Positive", "2" = "Negative")) +
scale_fill_discrete(name="Valence",
breaks=c("0", "1", "2"),
labels=c("Neutral", "Positive",
"Negative"))
The obtained results:
Created on 2020-09-02 by the reprex package (v0.3.0)

It sounds like you're looking for something like this (although your question's input data doesn't produce the values displayed in your plot, and you seem to have a default theme set somewhere).
Your fill colours can be chosen by scale_fill_manual, but you need to map the Valence variable to the fill scale if you want the different boxes to have different colours.
If you want a frame around each facet, theme_bw does this by default, or you can use theme(panel.border = element_rect(colour = "black")).
To re-name facets, I would normally just re-name the faceting variables to the desired names in the input, but here I have shown an alternative method using the labeller parameter in facet_wrap.
my44 %>%
select(Participant, Valence, Baseline.RT,TBPM.RT) %>% #Select interest variables
gather(Task,RT, -Valence, -Participant) %>%
ggplot(., aes(factor(Valence), RT)) +
geom_boxplot(aes(fill = factor(Valence))) +
facet_wrap(~ Task,
labeller = function(x) data.frame(Task = c("1-back", "TBPM"))) +
scale_x_discrete(name = element_blank(),
labels=c("0" = "Neutral", "1" = "Positive", "2" = "Negative")) +
scale_fill_manual(name="Valence",
breaks=c("0", "1", "2"),
labels=c("Neutral", "Positive","Negative"),
values = c("gray50", "gray75", "gray95")) +
theme_bw() +
theme(legend.position = "none",
strip.background = element_blank())

Related

How can i add Hatched polygons on a spplot in R?

I have a map which summarizes an indicator of the saturation percentage of real estate by neighborhood in Paris (Observed Price of real estate/maximum price set by law). I would like to add hatched on neighborhoods which have less than 5 observations included in my dataset.
I searched, but I couldn't find a way to do it. Any advice in the right direction is welcomed. Thanks.
Here is my code:
library(sp)
library(sf)
library(rgdal)
library(RColorBrewer)
library(raster)
library(classInt)
library(cartography)
#Importation
setwd("path")
shp <- readOGR(dsn="path/to/file",layer="l_qu_paris")
#Breaks
q10 <- classIntervals(map$saturation2, n=7, style="fixed",
fixedBreaks=c(45,69.999999, 79.9999999, 89.9999999, 99.9999999
,109.99999999, 120))
#Colors
my.palette <- colors()[c(73,26,128,10,652,92)]
#Map
##Scale
scale.parameter = 1.1
xshift = 0
yshift = 0
original.bbox = shp#bbox
edges = original.bbox
edges[1, ] <- (edges[1, ] - mean(edges[1, ])) * scale.parameter + mean(edges[1, ]) + xshift
edges[2, ] <- (edges[2, ] - mean(edges[2, ])) * scale.parameter + mean(edges[2, ]) + yshift
#Saturation
idx <- match(shp$l_qu, map$l_qu)
is.na(idx)
concordance <- map[idx, "saturation2"]
shp$saturation2 <- concordance
spplot(shp, "saturation2",col.regions=my.palette,
col = "black", lwd= 1, at = q10$brks,
main=list(label="% de saturation des meublés 1 pièce",cex=1.2,fontfamily="serif"),
xlim = edges[1, ], ylim = edges[2, ])
grid.text("Saturation moyenne (en%)", x=unit(0.95, "npc"), y=unit(0.50, "npc"), rot=90)
Here is my map:
saturation
Here is an example of a map that i would like to have:
saturation example
Here are the polygons in shapefile format: https://www.data.gouv.fr/fr/datasets/quartiers-administratifs/
And here is my dataset:
map <- structure(list(l_qu = c("Amérique", "Archives", "Arsenal", "Arts-et-Métiers",
"Auteuil", "Batignolles", "Bel-Air", "Belleville", "Bercy", "Bonne-Nouvelle",
"Chaillot", "Champs-Elysées", "Charonne", "Chaussée-d'Antin",
"Clignancourt", "Combat", "Croulebarbe", "Ecole-Militaire", "Enfants-Rouges",
"Epinettes", "Europe", "Faubourg-du-Roule", "Faubourg-Montmartre",
"Folie-Méricourt", "Gaillon", "Gare", "Goutte-d'Or", "Grandes-Carrières",
"Grenelle", "Gros-Caillou", "Halles", "Hôpital-Saint-Louis",
"Invalides", "Jardin-des-Plantes", "Javel", "La Chapelle", "Madeleine",
"Mail", "Maison-Blanche", "Monnaie", "Montparnasse", "Muette",
"Necker", "Notre-Dame", "Notre-Dame-des-Champs", "Odéon", "Palais-Royal",
"Parc-de-Montsouris", "Père-Lachaise", "Petit-Montrouge", "Picpus",
"Place-Vendôme", "Plaine de Monceaux", "Plaisance", "Pont-de-Flandre",
"Porte-Dauphine", "Porte-Saint-Denis", "Porte-Saint-Martin",
"Quinze-Vingts", "Rochechouart", "Roquette", "Saint-Ambroise",
"Saint-Fargeau", "Saint-Germain-des-Prés", "Saint-Gervais",
"Saint-Lambert", "Saint-Merri", "Saint-Thomas-d'Aquin", "Saint-Victor",
"Saint-Vincent-de-Paul", "Sainte-Avoie", "Sainte-Marguerite",
"Saint-Georges", "Salpêtrière", "Sorbonne", "Saint-Germain-l'Auxerrois",
"Ternes", "Val-de-Grâce", "Villette", "Vivienne", "Total"),
saturation2 = c(98.188951329533, 85.4938271604938, 83.8463463463464,
90.1460755525873, 98.1726527090667, 90.2186740262059, 92.8743271072797,
72.8549079897508, 99.2356140350877, 90.1234567901235, 114.057904044022,
NA, 87.2208980972528, 91.2562612612613, 97.9518951016991,
86.2770900920801, 91.0239726151895, 92.8305400372439, 88.6514719848053,
73.876877752942, 108.693318725755, 67.3263578578579, 85.8735259484408,
89.2100224414912, 92, 90.6120989320281, 85.8446948520848,
91.4165103088783, 97.2760978594495, 93.60892313074, 102.471730530348,
95.9062868379746, 96, 92.5484278273071, 95.0066946433545,
85.8187074829932, 101.139150713213, 92.1272297297297, 93.0625144594594,
61.8074324324324, 100.173302938197, 99.720856146949, 84.8732544128823,
84.1911355800245, 85.1122672253259, 91.8422003734504, NA,
94.612349767814, 83.2363741480137, 87.0403187718064, 92.0886931496388,
77, 110.943302180685, 100.73486307088, 66.3899425287356,
96.2527514568292, 95.7430893746874, 87.9028997984617, 48,
85.5630809345015, 92.7010730078939, 82.075822827797, 83.1727736726875,
76.2162162162162, 104.534662867996, 98.3510353194912, 78.3333333333333,
103.169134078212, 80.8779605984059, 92.63515704154, 62, 90.3902768982325,
94.1391771653151, 94.8669917042241, 94.4825319797959, 95.4279279279279,
98.2238673533848, 94.0602977590835, 87.5105365473892, 102,
92.5123935729199), numobs = c(6, 4, 4, 6, 36, 15, 4, 4, 3,
2, 16, NA, 36, 3, 32, 9, 22, 13, 11, 6, 31, 5, 15, 14, 4,
22, 3, 64, 29, 58, 7, 18, 4, 13, 23, 2, 8, 4, 47, 12, 16,
49, 50, 9, 33, 26, NA, 15, 10, 10, 23, 2, 13, 15, 2, 12,
8, 31, 1, 17, 22, 42, 7, 3, 4, 74, 4, 7, 13, 6, 2, 23, 18,
16, 17, 1, 24, 44, 8, 4, 1290)), row.names = c(NA, -81L), class = c("tbl_df",
"tbl", "data.frame"))
Neither spplot, nor ggplot2 support textured fillings. Having said that, there is a package called ggpattern which provides custom ggplot2 geoms which support filled areas with geometric and image-based patterns. See developer site for more info on ggpattern: https://coolbutuseless.github.io/package/ggpattern/index.html
With ggpattern you can plot 'hatched' or textured geom fillings. Below is a working example from the developers website:
library(maps)
crimes <- data.frame(state = tolower(rownames(USArrests)), USArrests)
crimesm <- reshape2::melt(crimes, id = 1)
states_map <- map_data("state")
p <- ggplot(crimes, aes(map_id = state)) +
geom_map_pattern(
aes(
# fill = Murder,
pattern_fill = Murder,
pattern_spacing = state,
pattern_density = state,
pattern_angle = state,
pattern = state
),
fill = 'white',
colour = 'black',
pattern_aspect_ratio = 1.8,
map = states_map
) +
expand_limits(x = states_map$long, y = states_map$lat) +
coord_map() +
theme_bw(18) +
labs(title = "ggpattern::geom_map_pattern()") +
scale_pattern_density_discrete(range = c(0.01, 0.3)) +
scale_pattern_spacing_discrete(range = c(0.01, 0.03)) +
theme(legend.position = 'none')
p

ggplot formula for a bar graph

I am looking to get a bar graph of medals in R. I have 3 distinct columns (gold, silver, bronze). The columns for gold medals has a total of 8, the silver has 10, and the bronze has 13.
For the code, I started writing: ggplot(data, aes(x=?)) + geom_bar()
I am not sure how to write all 3 gold medals on the function where it shows x=?
Thanks
For plotting purposes, it is "easier" to work with long data instead of wide. Below I converted the data you mentioned in your comment to long and plotted the data as a grouped bar.
library(tidyverse)
# load data
raw_data <- structure(list(Rank = c(1, 2, 3, 4, 5, 6),
`Team/Noc` = c("United States of America", "People's Republic of China", "Japan", "Great Britain", "ROC", "Australia"),
Gold = c(39, 38, 27, 22, 20, 17),
Silver = c(41,32, 14, 21, 28, 7),
Bronze = c(33, 18, 17, 22, 23, 22),
Total = c(113, 88, 58, 65, 71, 46),
`Rank by Total` = c(1, 2, 5, 4, 3, 6)),
row.names = c(NA,-6L),
class = c("tbl_df", "tbl", "data.frame"))
# convert wide data to long
long_data <- raw_data %>%
pivot_longer(cols = -`Team/Noc`, names_to = 'Medal') %>% # convert wide data to long format
filter(Medal %in% c("Gold", "Silver", "Bronze")) # only select medal columns
# plot
ggplot(long_data) +
geom_col(aes(x = `Team/Noc`,
y = value,
fill = Medal),
position = "dodge" # grouped bars
)
Hope this gets you started!

How to automate positioning of inner labels within a stacked barplot?

I frequently have to produce stacked bar plots with labels. The way I've been coding the labels is very time intensive and I wondered if there was a way to code things more efficiently. I would like the labels to be centered on each section of the bars. I'd prefer base R solutions.
stemdata <- structure(list( #had to round some nums below for 100% bar
A = c(7, 17, 76),
B = c(14, 10, 76),
C = c( 14, 17, 69),
D = c( 4, 10, 86),
E = c( 7, 17, 76),
F = c(4, 10, 86)),
.Names = c("Food, travel, accommodations, and procedures",
"Travel itinerary and dates",
"Location of the STEM Tour stops",
"Interactions with presenters/guides",
"Duration of each STEM Tour stop",
"Overall quality of the STEM Tour"
),
class = "data.frame",
row.names = c(NA, -3L)) #4L=number of numbers in each letter vector#
# attach(stemdata)
print(stemdata)
par(mar=c(0, 19, 1, 2.1)) # this sets margins to allow long labels
barplot(as.matrix(stemdata),
beside = F, ylim = range(0, 10), xlim = range(0, 100),
horiz = T, col=colors, main="N=29",
border=F, las=1, xaxt='n', width = 1.03)
text(7, 2, "14%")
text(19, 2, "10%")
text(62, 2, "76%")
text(7, 3.2, "14%")
text(22.5, 3.2, "17%")
text(65.5, 3.2, "69%")
text(8, 4.4, "10%")
text(55, 4.4, "86%")
text(3.5, 5.6, "7%")
text(15, 5.6, "17%")
text(62, 5.6, "76%")
text(9, 6.9, "10%")
text(55, 6.9, "86%")
Staying base R as OP requested, we can easily automate the inner label positioning (i.e. x coordinates) within a small function.
xFun <- function(x) x/2 + c(0, cumsum(x)[-length(x)])
Now, it's good to know that barplot invisibly trows the y coordinates, we can catch them by assignment (here byc <- barplot(.)).
Eventually, just assemble coordinates and labels in data frame labs and "loop" through the text calls in a sapply. (Use col="white" or col=0 for white labels as wished in the other question.)
# barplot
colors <- c("gold", "orange", "red")
par(mar=c(2, 19, 4, 2) + 0.1) # expand margins
byc <- barplot(as.matrix(stemdata), horiz=TRUE, col=colors, main="N=29", # assign `byc`
border=FALSE, las=1, xaxt='n')
# labels
labs <- data.frame(x=as.vector(sapply(stemdata, xFun)), # apply `xFun` here
y=rep(byc, each=nrow(stemdata)), # use `byc` here
labels=as.vector(apply(stemdata, 1:2, paste0, "%")),
stringsAsFactors=FALSE)
invisible(sapply(seq(nrow(labs)), function(x) # `invisible` prevents unneeded console output
text(x=labs[x, 1:2], labels=labs[x, 3], cex=.9, font=2, col=0)))
# legend (set `xpd=TRUE` to plot beyond margins!)
legend(-55, 8.5, legend=c("Medium","High", "Very High"), col=colors, pch=15, xpd=TRUE)
par(mar=c(5, 4, 4, 2) + 0.1) # finally better reset par to default
Result
Data
stemdata <- structure(list(`Food, travel, accommodations, and procedures` = c(7,
17, 76), `Travel itinerary and dates` = c(14, 10, 76), `Location of the STEM Tour stops` = c(14,
17, 69), `Interactions with presenters/guides` = c(4, 10, 86),
`Duration of each STEM Tour stop` = c(7, 17, 76), `Overall quality of the STEM Tour` = c(4,
10, 86)), class = "data.frame", row.names = c(NA, -3L))
Would you consider a tidyverse solution?
library(tidyverse) # for dplyr, tidyr, tibble & ggplot2
stemdata %>%
rownames_to_column(var = "id") %>%
gather(Var, Val, -id) %>%
group_by(Var) %>%
mutate(id = factor(id, levels = 3:1)) %>%
ggplot(aes(Var, Val)) +
geom_col(aes(fill = id)) +
coord_flip() +
geom_text(aes(label = paste0(Val, "%")),
position = position_stack(0.5))
Result:

can't add labels to my graph

I have this graph:
I just need to add labels to each colored line.
I need to add to the blue one Forecast Sales and for the red one Historical Sales.
I tried to adapt these examples here but I have much error. Also, I can not plot the graph above just by using this code:
to make it reproductible :
dput(df1)
structure(list(Semaine = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31), M = c(5649.96284329564, 7400.19639744335, 6948.61488673139,
5043.28209277238, 7171.29719525351, 7151.04746494067, 5492.96601941748,
6796.1160130719, 5532.95496473142, 7371.33061889251, 5462.73861171367,
7156.01570964247, 5558.63194819212, 9329.49289405685, 5770.02903225806,
7348.68497576737, 5261.26655896607, 8536.11304909561, 7463.97630586968,
6133.49774339136, 7252.69089929995, 6258.54674403611, 8167.67766497462,
5644.66612816371, 7512.5169628433, 5407.84275713516, 7795.63220247711,
5596.75282714055, 7264.37264404954, 5516.98492191707, 8188.80776699029
> dput(df2)
structure(list(Semaine = c(32, 33.2, 34.4, 35.6, 36.8, 38), M = c(5820.32304669441,
6296.32038834951, 7313.24757281553, 7589.714214588, 8992.35922330097,
9664.95469255663)), .Names = c("Semaine", "M"), row.names = c(NA,
-6L), class = "data.frame")
ggplot() + geom_line(data=df1, aes(x = Semaine, y = M),color = "red") +
stat_smooth(data=df2, aes(x = Semaine, y = M),color = "blue")+
scale_x_continuous(breaks = seq(0,40,1))
Thank you!
cols <- c("A"="red", "B"="blue")
ggplot() + geom_line(data=df1, aes(x = Semaine, y = M,color = "A")) +
stat_smooth(data=df2, aes(x = Semaine, y = M,color = "B"), method = 'loess')+
scale_x_continuous(breaks = seq(0,40,1)) +
scale_color_manual(name="Title", values=cols)

Show point colour according to their row position in table

I want to display a scatter plot of points from a csv table with ggplot2. The trick is that I'd like each point, or cross, to have a different colour according to their row number in the csv file (using RColorBrewer's spectral colours).
The dataset (dat) looks like this:
modu mnc eff
1 0.3080473 0 0.4420544
2 0.3110355 4 0.4633741
3 0.3334024 9 0.4653061
So I'd like row 1 to be very blue, row two to be a little less, row three to be kind of green, etc.
Here's my code so far:
library(ggplot2)
library(RColorBrewer)
dat <- structure(list(modu = c(0.30947265625, 0.3094921875, 0.32958984375,
0.33974609375, 0.33767578125, 0.3243359375, 0.33513671875, 0.3076171875,
0.3203125, 0.3205078125, 0.3220703125, 0.28994140625, 0.31181640625,
0.352421875, 0.31978515625, 0.29642578125, 0.34982421875, 0.3289453125,
0.30802734375, 0.31185546875, 0.3472265625, 0.303828125, 0.32279296875,
0.3165234375, 0.311328125, 0.33640625, 0.3140234375, 0.33515625,
0.34314453125, 0.33869140625), mnc = c(15, 9, 6, 0, 10, 12, 14,
9, 5, 11, 0, 15, 0, 2, 14, 13, 14, 17, 11, 12, 13, 6, 4, 0, 13,
7, 10, 12, 7, 13), eff = c(0.492448979591836, 0.49687074829932,
0.49421768707483, 0.478571428571428, 0.493537414965986, 0.493809523809524,
0.49891156462585, 0.499319727891156, 0.495102040816327, 0.492285714285714,
0.482312925170068, 0.498911564625851, 0.479931972789116, 0.492857142857143,
0.495238095238095, 0.49891156462585, 0.49530612244898, 0.495850340136055,
0.50156462585034, 0.496, 0.492897959183673, 0.487959183673469,
0.495605442176871, 0.47795918367347, 0.501360544217687, 0.497850340136054,
0.493496598639456, 0.493741496598639, 0.496734693877551, 0.499659863945578
)), .Names = c("modu", "mnc", "eff"), row.names = c(NA, 30L), class = "data.frame")
dat2 <- structure(list(modu = c(0.26541015625, 0.282734375, 0.28541015625,
0.29216796875, 0.293671875), mnc = c(0.16, 0.28, 0.28, 0.28,
0.28), eff = c(0.503877551020408, 0.504149659863946, 0.504625850340136,
0.505714285714286, 0.508503401360544)), .Names = c("modu", "mnc",
"eff"), row.names = c(NA, 5L), class = "data.frame")
dat$modu = dat$modu
dat$mnc = dat$mnc*50
dat$eff = dat$eff
dat2$modu = dat2$modu
dat2$mnc = dat2$mnc*50
dat2$eff = dat2$eff
res <- do.call(rbind, combn(1:3, 2, function(ii)
cbind(setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y))+ geom_point(shape=4) +
facet_wrap(~ var, scales="free")
How should I go about doing this?
Thanks!
res <- do.call(rbind, combn(1:3, 2, function(ii)
cbind(row=seq(nrow(dat)),setNames(dat[,c(ii, setdiff(1:3, ii))], c("x", "y")),
var=paste(names(dat)[ii], collapse="/")), simplify=F))
ggplot(res, aes(x=x, y=y, color=row))+ geom_point(shape=4) +
scale_color_gradientn(colours=rev(brewer.pal(10,"Spectral")))+
facet_wrap(~ var, scales="free")

Resources