Drop unused level using facet_grid and coord_flip in ggplot2 - r

I have a dataframe df
df<- structure(list(Categorie = structure(c(1L, 1L, 2L, 2L, 4L, 4L,
3L, 1L, 1L, 2L, 2L, 4L, 4L, 3L, 1L, 1L, 2L, 2L, 4L, 4L, 3L, 1L,
1L, 2L, 2L, 4L, 4L, 3L, 1L, 1L, 2L, 2L, 4L, 4L, 3L, 1L, 1L, 2L,
2L, 4L, 4L, 3L, 1L, 1L, 2L, 2L, 4L, 4L, 3L, 1L, 1L, 2L, 2L, 4L,
4L, 3L), .Label = c("Age classes", "Climate", "Nutrient availability",
"PFT"), class = "factor"), Sub_categories = structure(c(7L, 4L,
6L, 1L, 3L, 2L, 5L, 7L, 4L, 6L, 1L, 3L, 2L, 5L, 7L, 4L, 6L, 1L,
3L, 2L, 5L, 7L, 4L, 6L, 1L, 3L, 2L, 5L, 7L, 4L, 6L, 1L, 3L, 2L,
5L, 7L, 4L, 6L, 1L, 3L, 2L, 5L, 7L, 4L, 6L, 1L, 3L, 2L, 5L, 7L,
4L, 6L, 1L, 3L, 2L, 5L), .Label = c("Continental", "DBF", "ENF",
"Intermediate-Old", "Low-High", "Temperate", "Young"), class = "factor"),
Variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L), .Label = c("Age", "Clay content", "GPP", "GPP*Age",
"GPP*P", "GPP*P trend", "N availability", "SPI"), class = "factor"),
Importance = c(19.2, 23.7, 45.2, 45.7, 39, 66.8, 34.8, 10.9,
16.2, 9.2, 6.3, 15.7, 2.1, 10, 13.2, 7.1, 6.1, 3.8, 2.4,
7.3, 5.2, 6.4, 10, 6.6, 3.7, 2.9, 5.8, 8.4, 17.7, 0, 6.1,
5.2, 8.4, 2.8, 6.7, 11.8, 21.1, 9.8, 21.9, 20, 6.3, 13.5,
2.3, 7.6, 3.9, 1.3, 3.9, 0.4, 3.8, 10.9, 7.5, 4.5, 5.8, 0.3,
2.5, 9.4)), .Names = c("Categorie", "Sub_categories", "Variable",
"Importance"), class = "data.frame", row.names = c(NA, -56L))
I want to plot my data by doing a facet_grid. with the Categorie and Sub_categories variables. I run the below command line:
library(ggplot2)
ggplot(data = var_Imp) +
geom_bar(mapping = aes(x = Variable, y = Importance, fill=Variable), width = 1, stat= "identity", position = "stack") +
coord_flip() +
facet_grid(Categorie~Sub_categories, scales="free", space="free", shrink=TRUE, drop=TRUE)+
theme_bw(base_size = 14, base_family = "Helvetica")+
theme(axis.ticks.length=unit(-0.25, "cm"),
legend.position="none",
legend.box="horizontal",
legend.key = element_blank(),
legend.text=element_text(size=14),
axis.text.x = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")),
axis.text.y = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")),
axis.ticks.y=element_blank())+
xlab("") +
ylab("Relative contribution [%]")+
scale_fill_brewer(type = "div")
However, the levels with no information in some of the facets are still plotted although they should not because there is no information in it. I thought the scales="free" and space="free" parameters will do the job but apparently not. Anyone knows how I can plot my data without the unused levels? Thanks

I don't know about a specific command in ggplot2. Two possible options:
1) Any of the options proposed in this post ggplot2: How to force the number of facets with too few plots?
2) Export the plot in .svg (or other vectorial format), open it with inkscape or any other program and delete the empty facets

Related

How to add color in emmeans graph?

I would like to have one color for each level in x axis. I have tried different ways to enter the colour via col argument, but it doesn't seem to work. So far:
library(emmeans)
library(ggplot2)
df <- structure(list(Scanner = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("a", "b", "c", "d", "e",
"f"), class = "factor"), Reta = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("A", "B", "C", "D", "E",
"F"), class = "factor"), erro = c(0.0120000000000005, 0.0289999999999999,
0.088000000000001, 0.00600000000000023, -0.0289999999999964,
0.106999999999999, 0.0850000000000009, 0.172999999999998, 0.183999999999997,
0.208000000000006, 0.192, 0.0869999999999997, -0.0140000000000029,
-0.0420000000000016, -0.0350000000000037, 0.00600000000000023,
0, -0.0100000000000016, 0.167000000000002, 0.276, 0.262, 0.0790000000000006,
0.426000000000002, 0.202999999999999, -0.181000000000004, 0.0560000000000009,
-0.0219999999999985, -0.264999999999993, 0.106000000000002, 0.154999999999998,
-0.0420000000000016, 0.0670000000000002, 0.176000000000002, -0.18,
0.215000000000003, 0.189, -0.036999999999999, 0.169, 0.103000000000002,
-0.622999999999998, 0.268999999999998, 0.106999999999999, -0.0140000000000029,
0.169999999999998, 0.115000000000002, -0.622, 0.276000000000003,
0.0969999999999978, -0.0320000000000036, 0.155999999999999, 0.116,
-0.290999999999997, 0.283000000000001, 0.0439999999999969, 0.0940000000000012,
-0.117000000000001, 0.0249999999999986, 0.00900000000000034,
0.0760000000000005, 0.109999999999999, 0.0549999999999997, 0.0470000000000006,
-0.027000000000001, 0.0130000000000052, 0.036999999999999, 0.0139999999999993,
0.0420000000000016, 0.0459999999999994, -0.109999999999999, 0.007000000000005,
0.0339999999999989, 0.104999999999997, -0.240000000000002, 0.0940000000000012,
-0.0570000000000022, -0.352999999999994, 0.0129999999999981,
0.113, -0.251000000000005, 0.0760000000000005, -0.00200000000000244,
NA, 0.112000000000002, 0.0839999999999996, -0.242000000000004,
0.0530000000000008, -0.134999999999998, -0.446999999999996, 0.118000000000002,
0.075999999999997, -0.0769999999999982, -0.0590000000000011,
-0.0870000000000033, -0.445999999999998, 0.158999999999999, 0.0829999999999984,
-0.270000000000003, -0.0210000000000008, -0.0840000000000032,
-0.189999999999998, 0.116999999999997, 0.0519999999999996, -0.0960000000000036,
-0.0859999999999985, -0.177, -0.271999999999998, 0.0679999999999978,
0.0439999999999969)), row.names = c(NA, -108L), class = c("tbl_df",
"tbl", "data.frame"))
m1 <- lm(erro ~ Scanner*Reta, data = df)
l1 <- emmeans(m1, "Scanner", "Reta")
emmip(l1, ~ Reta |Scanner,
col = rep(cols, each = 108/6),
CIs = TRUE) +
geom_hline(yintercept = 0, linetype = 2) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(y = "Erro absoluto (mm)")
Right now, I think one of solution like this:
cols = RColorBrewer::brewer.pal(6,"Paired")
emmip(m1 ,Scanner~Reta,CIs =TRUE) + facet_wrap(~Scanner) +
geom_hline(yintercept = 0, linetype = 2) +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(y = "Erro absoluto (mm)")+
scale_color_manual(values=cols)

Specify shape for points in ggplot2

I have a CSV with a Detect column where the result is Y or N. I've got my script to change shape dependent on that column, but I need to specify that Y is a filled in circle, while N is hollow circle.
library("ggplot2")
Report213 <- read.csv("FILE_NAME")
ggplot(data = Report213, aes(x = factor(Station_ID, level = c("NEB","NWB","LBC","WB","HR","FDP","FS","NR","PB")), y = Result, Group = Detect, colour = Station_ID,shape = Detect
)) + geom_point(aes(shape=Detect,size = 2)) +
facet_grid( . ~ Chemical ) +facet_wrap( ~ Chemical, scales= "free_y",ncol = 1) + theme(
panel.background = element_rect(fill = "white",
colour = "white",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "gray"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
strip.background =element_rect(fill="#454545"),
strip.text = element_text(colour = 'white')
)
Appreciate any pointers.
dput output off Report213:
structure(list(Station_ID = structure(c(4L, 4L, 4L, 4L, 4L, 9L,
3L, 9L, 3L, 3L, 9L, 3L, 3L, 5L, 7L, 2L, 6L, 7L, 5L, 7L, 8L, 1L,
5L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L,
7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L), .Label = c("FDP",
"FS", "HR", "LBC", "NEB", "NR", "NWB", "PB", "WB"), class = "factor"),
Chemical = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("4,4'-DDT", "CHLORDANE", "Total Aroclors",
"Total PAHs", "Total PCB Congeners"), class = "factor"),
Result = c(78.4176, 66.8307, 59.7295, 50.4102, 40.9341, 36.6868,
34.6394, 26.7728, 23.192, 18.091, 15.47568, 14.539, 13.8006,
4.489, 2.0159, 1.99509, 1.71768, 1.69251, 1.5165, 1.39725,
1.27822, 1.22813, 0.89586, 507.7, 135, 684, 8911, 4946, 780,
4920, 137.9, 559.5, 239.51, 902, 376, 655.4, 8299, 6500,
889, 502.8, 361.1, 17440, 555.8, 953, 5691, 1790, 0.3, 1,
14, 12, 20, 20, 21, 10, 14, 7.6, 7.3, 23, 7.7, 11, 1.5, 0.28,
8.1, 5.4, 11, 0.31, 0.62, 20, 22, 4.2, 6.8, 3.9, 6.7, 4.6,
6.4, 13, 51, 4.2, 50.8, 43.1, 41.9, 4.1, 4.4, 3.9, 4, 4.2,
4.5, 2.3, 4.3, 13, 6.8, 35, 1.1, 0.62, 0.053, 1, 7.4, 23,
3.7, 0.056, 2, 0.055, 0.054, 0.12, 0.053, 0.057, 0.13, 0.088,
0.11, 0.058, 1.1, 21, 1.5, 4.7, 1.6), Detect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("N", "Y"), class = "factor")), class = "data.frame", row.names = c(NA,
-115L))
You can specify the shape by using scale_shape_manual
P.S: Use either facet_grid or facet_wrap not both at the same time
Edit: with ggplot2 v3.0.0 released in July 2018, you can use text/string to specify the shape. E.g. scale_shape_manual(values = c("circle", "circle open")). See more here
library(tidyverse)
Report213 <- Report213 %>%
mutate(Station_ID = factor(Station_ID,
level = c("NEB","NWB","LBC","WB","HR","FDP","FS","NR","PB")))
ggplot(data = Report213,
aes(x = Station_ID,
y = Result)) +
geom_point(aes(color = Station_ID, shape = Detect), size = 2) +
scale_shape_manual(values = c(19, 1)) +
facet_wrap( ~ Chemical, scales = "free_y", ncol = 1) +
theme(
panel.background = element_rect(fill = "white",
colour = "white",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "gray"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white"),
strip.background =element_rect(fill = "#454545"),
strip.text = element_text(colour = 'white')
)
Edit 2: Add string ~ integer shape table for future references
pch_table <- c(
"square open" = 0,
"circle open" = 1,
"triangle open" = 2,
"plus" = 3,
"cross" = 4,
"diamond open" = 5,
"triangle down open" = 6,
"square cross" = 7,
"asterisk" = 8,
"diamond plus" = 9,
"circle plus" = 10,
"star" = 11,
"square plus" = 12,
"circle cross" = 13,
"square triangle" = 14,
"triangle square" = 14,
"square" = 15,
"circle small" = 16,
"triangle" = 17,
"diamond" = 18,
"circle" = 19,
"bullet" = 20,
"circle filled" = 21,
"square filled" = 22,
"diamond filled" = 23,
"triangle filled" = 24,
"triangle down filled" = 25
)
Data used
Report213 <- structure(list(Station_ID = structure(c(4L, 4L, 4L, 4L, 4L, 9L,
3L, 9L, 3L, 3L, 9L, 3L, 3L, 5L, 7L, 2L, 6L, 7L, 5L, 7L, 8L, 1L,
5L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L, 1L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L,
7L, 7L, 8L, 9L, 9L, 9L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 9L), .Label = c("FDP",
"FS", "HR", "LBC", "NEB", "NR", "NWB", "PB", "WB"), class = "factor"),
Chemical = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("4,4'-DDT", "CHLORDANE", "Total Aroclors",
"Total PAHs", "Total PCB Congeners"), class = "factor"),
Result = c(78.4176, 66.8307, 59.7295, 50.4102, 40.9341, 36.6868,
34.6394, 26.7728, 23.192, 18.091, 15.47568, 14.539, 13.8006,
4.489, 2.0159, 1.99509, 1.71768, 1.69251, 1.5165, 1.39725,
1.27822, 1.22813, 0.89586, 507.7, 135, 684, 8911, 4946, 780,
4920, 137.9, 559.5, 239.51, 902, 376, 655.4, 8299, 6500,
889, 502.8, 361.1, 17440, 555.8, 953, 5691, 1790, 0.3, 1,
14, 12, 20, 20, 21, 10, 14, 7.6, 7.3, 23, 7.7, 11, 1.5, 0.28,
8.1, 5.4, 11, 0.31, 0.62, 20, 22, 4.2, 6.8, 3.9, 6.7, 4.6,
6.4, 13, 51, 4.2, 50.8, 43.1, 41.9, 4.1, 4.4, 3.9, 4, 4.2,
4.5, 2.3, 4.3, 13, 6.8, 35, 1.1, 0.62, 0.053, 1, 7.4, 23,
3.7, 0.056, 2, 0.055, 0.054, 0.12, 0.053, 0.057, 0.13, 0.088,
0.11, 0.058, 1.1, 21, 1.5, 4.7, 1.6), Detect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L),
.Label = c("N", "Y"), class = "factor")),
class = "data.frame", row.names = c(NA,
-115L))
Created on 2018-06-09 by the reprex package (v0.2.0).

How to construct a matrix for a heatmap or a contour plot, but with NA events?

How can I construct a heatmap like matrix from 3 variables, 2 categorical and 1 numeric, in which certain events do not occur. My dplyr code overlooks those events and misses about 20 cavities in the surface plot that I'd like to make. For that I need an accurate matrix. But this is rather complicated.
What I consider a NA event is a maximum time for which two categorical events (Modeling and Discourse) do not occur simultaneously. So a point of null time observations (NA), not even zero.
I have the following dataframe:
df <- structure(list(`Modeling Code` = structure(c(4L, 4L, 4L, 4L,
4L, 4L, 4L, 6L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
6L, 6L, 6L, 6L, 6L, 4L, 5L, 5L, 5L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 3L, 3L, 5L, 4L, 4L, 4L,
4L, 5L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
4L, 5L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 6L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 2L, 2L,
2L, 5L, 4L, 4L, 2L, 2L, 5L, 2L, 2L, 3L, 5L, 5L, 5L, 4L, 4L, 1L,
1L, 4L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 6L, 5L, 5L, 2L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 5L, 5L,
5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 3L, 2L, 2L, 2L, 2L, 2L,
5L, 5L, 5L, 3L, 3L, 3L, 3L, 6L, 6L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 5L, 5L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 3L, 3L, 3L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 6L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L,
2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 3L,
3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 6L,
6L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 1L, 1L, 1L, 3L, 3L, 1L), .Label = c("A",
"MA", "OFF", "P", "SM", "V"), class = "factor"), `Discourse Code` = structure(c(8L,
5L, 8L, 1L, 9L, 2L, 8L, 6L, 5L, 6L, 5L, 8L, 3L, 3L, 6L, 2L, 2L,
9L, 3L, 3L, 6L, 6L, 3L, 3L, 8L, 6L, 9L, 3L, 3L, 9L, 8L, 6L, 8L,
6L, 9L, 3L, 3L, 6L, 6L, 4L, 9L, 1L, 6L, 9L, 6L, 3L, 3L, 6L, 8L,
2L, 6L, 2L, 8L, 2L, 2L, 2L, 2L, 8L, 2L, 1L, 6L, 8L, 9L, 2L, 6L,
8L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 9L, 1L, 6L, 8L, 7L, 7L, 6L,
8L, 6L, 9L, 9L, 6L, 1L, 1L, 6L, 6L, 9L, 9L, 1L, 1L, 9L, 6L, 6L,
6L, 1L, 1L, 9L, 6L, 9L, 1L, 6L, 1L, 9L, 9L, 1L, 6L, 1L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 6L, 9L, 6L, 9L, 8L, 2L, 8L, 2L, 1L, 2L, 6L,
4L, 1L, 1L, 1L, 9L, 5L, 1L, 9L, 8L, 2L, 9L, 2L, 7L, 6L, 1L, 6L,
1L, 2L, 6L, 6L, 6L, 9L, 2L, 2L, 9L, 7L, 7L, 7L, 7L, 9L, 2L, 1L,
1L, 4L, 8L, 4L, 6L, 1L, 6L, 9L, 2L, 1L, 9L, 6L, 6L, 9L, 1L, 6L,
2L, 4L, 4L, 4L, 4L, 8L, 6L, 2L, 1L, 1L, 1L, 2L, 6L, 6L, 8L, 2L,
4L, 6L, 9L, 1L, 6L, 1L, 1L, 3L, 2L, 2L, 2L, 9L, 9L, 9L, 8L, 2L,
6L, 1L, 2L, 1L, 2L, 2L, 1L, 8L, 2L, 6L, 6L, 8L, 2L, 7L, 2L, 2L,
6L, 2L, 2L, 6L, 4L, 8L, 7L, 7L, 7L, 7L, 6L, 8L, 7L, 7L, 9L, 1L,
9L, 2L, 9L, 1L, 6L, 9L, 2L, 6L, 2L, 7L, 9L, 8L, 9L, 9L, 2L, 8L,
9L, 4L, 2L, 4L, 6L, 2L, 6L, 1L, 1L, 3L, 9L, 1L, 8L, 9L, 9L, 9L,
6L, 2L, 6L, 2L, 2L, 7L, 7L, 7L, 8L, 1L, 2L, 2L, 2L, 2L, 6L, 8L,
6L, 1L, 6L, 8L, 2L, 1L, 2L, 6L, 9L, 2L, 9L, 2L, 6L, 2L, 1L, 1L,
9L, 9L, 9L, 8L, 4L, 9L, 6L, 1L, 2L, 9L, 8L, 2L, 1L, 6L, 1L, 6L,
2L, 8L, 2L, 2L, 8L, 4L, 4L, 9L, 6L, 1L, 9L, 7L, 7L, 7L, 7L, 7L,
9L, 6L, 7L, 7L, 7L, 7L, 8L, 6L, 2L, 2L, 6L, 8L, 8L, 4L, 2L, 6L,
1L, 6L, 9L, 6L, 9L, 9L, 2L, 8L, 6L, 6L, 2L, 2L, 9L, 9L, 6L, 2L,
2L, 3L, 3L, 3L, 2L, 9L, 2L, 9L, 2L, 9L, 1L, 9L, 8L, 6L, 7L, 7L,
6L), .Label = c("AG", "C", "D", "DA", "G", "J", "OFF", "Q", "S"
), class = "factor"), Time_Processed = c(1.3833, 1.4333, 1.4667,
1.5333, 1.6167, 1.65, 1.6833, 1.7333, 1.8, 1.8667, 1.9833, 2.05,
2.1333, 2.1667, 2.2167, 2.3, 2.3167, 2.3667, 2.5667, 2.5833,
2.6, 2.7833, 2.8, 2.8167, 2.8667, 3.0167, 3.0333, 3.05, 3.05,
3.1, 3.1833, 3.2667, 3.3, 3.3333, 3.4167, 3.45, 3.4833, 3.5667,
3.6, 3.7, 3.7167, 3.8, 3.95, 4, 4.05, 4.15, 4.1667, 4.15, 4.2167,
4.3, 4.3833, 4.4, 4.4833, 4.5833, 4.6, 4.7, 4.8, 4.8333, 4.8833,
5, 5.05, 5.1, 5.2167, 5.4333, 5.45, 5.6, 5.7, 5.9167, 6.25, 6.2667,
6.2833, 6.4667, 6.5167, 6.5333, 6.55, 6.6667, 6.7167, 6.9, 6.95,
7.05, 7.05, 7.45, 7.6167, 7.7667, 7.7833, 7.8333, 8, 8.0167,
8.05, 8.1, 8.2833, 8.3167, 8.4333, 8.4667, 8.5, 8.55, 8.8833,
9.2667, 9.3167, 9.3333, 9.35, 9.5167, 9.6833, 9.7167, 9.7667,
9.7833, 9.8333, 9.9, 9.9667, 10.0667, 10.0833, 10.15, 10.2, 10.2667,
10.2667, 10.3, 10.35, 10.3667, 10.4, 10.7, 10.7833, 10.9, 11.1333,
11.1833, 11.2167, 11.2333, 11.25, 11.3, 11.35, 11.4167, 11.4667,
11.5333, 11.5667, 11.6667, 11.85, 11.8667, 11.8833, 12.25, 12.3167,
12.7167, 12.7333, 12.8, 12.85, 12.9333, 12.9667, 13.2667, 13.3167,
13.4, 13.4167, 13.5, 13.55, 13.6333, 13.9, 13.95, 13.9667, 14.05,
14.0833, 14.3167, 14.35, 14.3667, 14.4333, 14.4667, 14.5, 14.5333,
14.5833, 14.5833, 14.6167, 14.6667, 14.7167, 14.75, 14.7667,
15.05, 15.0833, 15.25, 15.4333, 15.4833, 15.5167, 15.6, 15.6333,
15.7167, 15.7333, 15.7667, 15.8667, 16.0167, 16.2, 16.2833, 16.3333,
16.3833, 16.45, 16.6, 16.6667, 16.9333, 16.9667, 17, 17.0333,
17.0833, 17.1167, 17.2167, 17.35, 17.4333, 17.55, 17.6, 17.6167,
17.65, 17.7, 17.7167, 17.75, 17.7833, 17.8833, 17.9333, 17.9833,
18.0167, 18.0333, 18.05, 18.0667, 18.1, 18.1667, 18.2, 18.3667,
18.45, 18.5333, 18.6333, 18.6667, 18.7333, 18.85, 18.8833, 18.9833,
19.0333, 19.0667, 19.3833, 19.5333, 19.6333, 19.6667, 19.7167,
19.9333, 19.9667, 20.05, 20.2333, 20.3667, 20.4333, 20.5, 20.5167,
20.5167, 20.55, 20.6167, 20.7167, 20.7667, 20.8167, 20.8667,
21.1333, 21.1833, 21.2, 21.2167, 21.2333, 21.2833, 21.3, 21.5,
21.5833, 21.6333, 21.6667, 21.6833, 21.6833, 21.8167, 21.8833,
22.1333, 22.1667, 22.35, 22.4333, 22.5, 22.5333, 22.5833, 22.6,
22.6, 22.65, 22.6667, 22.7167, 22.75, 22.8833, 23.0667, 23.0833,
23.1167, 23.3167, 23.35, 23.3667, 23.45, 23.5, 23.7667, 23.9833,
24.1833, 24.2167, 24.25, 24.2833, 24.5167, 24.5333, 24.6833,
24.7833, 24.7833, 24.8, 24.8, 24.8667, 25.3833, 25.4333, 25.4833,
25.5, 25.5167, 25.55, 25.5667, 25.5833, 25.6667, 25.7, 26, 26.1333,
26.1667, 26.2, 26.2333, 26.2667, 26.4, 26.4333, 26.4667, 26.5,
26.5167, 26.6667, 26.7, 26.8, 27.0833, 27.1833, 27.2, 27.2, 27.45,
27.5667, 27.6667, 27.7, 27.75, 27.7667, 27.7667, 27.8, 27.8333,
28.0333, 28.35, 28.6333, 28.6333, 28.7833, 28.8, 28.85, 29, 29.1833,
29.3333, 29.6667, 29.7333, 29.8, 29.8833, 29.9, 29.9333, 30.0667,
30.1, 30.1833, 30.2167, 30.25, 30.3, 30.3833, 30.5, 30.55, 30.7167,
31.0167, 31.45, 31.6, 31.8, 31.8333, 32.0167, 32.15, 32.15, 32.1667,
32.2167, 32.2167, 32.2333, 32.3833, 32.6167, 32.6667, 32.7, 32.7167,
32.7333, 32.75, 32.9, 33.0833, 33.1333, 33.1833)), row.names = c(NA,
-386L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("Modeling Code",
"Discourse Code", "Time_Processed"))
Looks a little bit like this:
df[1:10,]
# A tibble: 10 x 3
`Modeling Code` `Discourse Code` Time_Processed
<fct> <fct> <dbl>
1 P Q 1.38
2 P G 1.43
3 P Q 1.47
4 P AG 1.53
5 P S 1.62
6 P C 1.65
7 P Q 1.68
8 V J 1.73
9 P G 1.80
10 SM J 1.87
If I construct a matrix for my heatmap For the two categorical variables Modeling Code and Discourse Code, it looks a little bit like this:
with(df, table(`Discourse Code`, `Modeling Code`)) %>% prop.table() %>% as.data.frame() -> z
ggplot(data = z, aes(x = `Modeling.Code`, y = `Discourse.Code`, fill = Freq)) + theme_bw() + geom_tile() + geom_text(size = 3, aes(label = Freq))
This is a heatmap of the freqency of occurence of each matching categorical varibale so (C & MA) occur simutaneously about 10.6% of the time, while many pairs of categorical factors do not sumulatenously occur at all. These are the ones with 0 quantity. All those factors add up to 1, accounting for 100% of all pairs of Modeling and Discourse Codes.
If you count the number of zeroes (no occurring pairs) in this data-set you will see that there are twenty zeroes and this is important.
I was interested in the times at which these pairs occur so I decided to make a contour plot with plot_ly from my original dataset.
plot_ly(data = df, x = ~ `Modeling Code`, y = ~ `Discourse Code`, z = ~ `Time_Processed`, type = "contour")
Inspection of this contour plot with an interactive mouse shows that the Time points of "Time_Processed" are the maximum values of the "Modeling Codes" and "Discourse Codes"
So I generate those points with dplyr:
df %>%
+ group_by(`Modeling Code`, `Discourse Code`) %>%
+ summarise(max_time = max(Time_Processed))
# A tibble: 34 x 3
# Groups: Modeling Code [?]
`Modeling Code` `Discourse Code` max_time
<fct> <fct> <dbl>
1 A AG 9.97
2 A C 32.7
3 A D 4.17
4 A J 33.2
5 A Q 32.8
6 A S 32.7
7 MA AG 24.7
8 MA C 31.4
9 MA D 22.4
10 MA DA 27.2
# ... with 24 more rows
Hold up!!! There are only 34 entries, of maximum times, but the size of my heatmap is (6 x 9) = 54 cells. The 20 missing entries are the categorical pairs that yield zero. So I'm finding it very difficult to construct my matrix.
A MA OFF P SM V
S 32.733 31.800 NA 30.3000 30.250 32.700
Q 32.750 27.1833 NA 30.5000 29.800 28.85
OFF NA NA 33.133 NA NA NA
J 33.1833 26.5167 NA 30.7167 30.2167 31.8333
G NA NA NA 11.8500 NA NA
DA NA 20.72 NA NA 29.8833 25.700
D 4.1667 22.235 NA 6.2667 NA 32.2167
C 32.6667 31.4500 NA 30.3833 29.9000 32.1500
AG 9.967 24.6833 NA 13.2667 30.0667 32.7167
This is the matrix (assuming I didn't make any manual mistakes) that I'd like to create based on my observations. The NAs are values that for the Modeling and Discourse Code pairs that do not occur, so it's the 20 entries that my dplyr summarise function with maximum time could not capture, but my heatmap did. So if I do that then I can tediously fill out this matrix.
My question is how can I construct this matrix?
In addition, I would prefer that the matching values either show up as NAs or as -1, but not zero ... because my goal is to construct this matrix and then I can create a 3D surface plot that complements by contour plot so that I can accurately see the types of procedures that my subjects are implementing over an event that is about 30 minutes. So if those drop columns are interpreted as zero, then the surface plot will be wrong because at the beginning of the event (time 0) the subjects did not use those procedures.
Complex problems sometimes have simple solutions and it wasn't clear to me until I did a lot of experimentation with all existing functions. I figured out that dcast accomplished my goal. All the word noise was me trying to explain the complexity of my problem I was hoping you would understand.
dcast(data = FERMI_1, formula = `Discourse Code` ~ `Modeling Code`, value.var = "Time_Processed", fun.aggregate = max, fill = -1)
Discourse Code A MA OFF P SM V
1 AG 9.9667 24.6833 -1.0000 13.2667 30.0667 32.7167
2 C 32.6667 31.4500 -1.0000 30.3833 29.9000 32.1500
3 D 4.1667 22.3500 -1.0000 6.2667 -1.0000 32.2167
4 DA -1.0000 27.2000 -1.0000 -1.0000 29.8833 25.7000
5 G -1.0000 -1.0000 -1.0000 11.8500 -1.0000 -1.0000
6 J 33.1833 26.5167 -1.0000 30.7167 30.2167 31.8333
7 OFF -1.0000 -1.0000 33.1333 -1.0000 -1.0000 -1.0000
8 Q 32.7500 27.1833 -1.0000 30.5000 29.8000 28.8500
9 S 32.7333 31.8000 -1.0000 30.3000 30.2500 32.7000
It appears my comment answered the question:
If you have an object that supports the is.na and [<- functions then reassigning a numeric value of -1 to entries that currently are NA is as simple as obj[ is.na(obj) ] <- -1. (I cannot really tell if this is the request, since I got lost in the long presentation that didn't have a definite goal.) If on the other hand, the need is to first generate such a matrix from a long format data-obj named df2 might be addressed by
obj <- xtabs(max_time ~Modeling Code+Discourse Code, data=df2)

adding geom_text from different dataset to geom_bar

I have a bar plot with a facet grid, and I would like to add the number of observations per sub-plot which are stored in a separate dataframe.
The bar plot is produced with
bar.plot <- ggplot(BarDiff.m.s, aes(x=value.change, fill=incompatibility))+
geom_bar(binwidth=1)+
labs(x="score differences", y="count / years since start of PSA")+
geom_vline(aes(xintercept=0), linetype="dotted")+
theme(plot.title=element_text(face="bold", size=10),
legend.position= "bottom")+
scale_fill_brewer(palette="Set1")+
facet_grid(years.since.peace ~ strategy.cm6.YP, space="free")
I tried to add the geom_text by adding after the geom_bar line
geom_text(data=num.obs, aes(label=paste("obs=",num.obs),y=4,x=min(BarDiff.m.s$value.change)))
however, I obtain the error message
Error in eval(expr, envir, enclos) : object 'incompatibility' not found
Apparently, for some reason, I have to consider the "fill" variable in geom_text; I tried to add group=BarDiff.m.s$incompatibility to geom_text, but to no avail.
I have seen How to add custom labels from a dataset on top of bars using ggplot/geom_bar in R? , but if possible I would like to keep the two data.frames separate and understand how to solve the "fill" issue. Any suggestion would be very much welcome! thx.
The pertaining data for the plot is
BarDiff.m.s <- structure(list(value.change = c(-1, -1, -2, -2, 1, NA, 0, -2,
-1, -2, NA, 2, -3, NA, NA, -3, -2, -1, -4, -1, -3, -1, 2, 2,
NA, 1, -1, 0, 0, -2, -2, -2, -1, 1, NA, -1, -1, 0, -2, NA, 0,
-4, NA, NA, NA, -3, -1, -4, -2, -3, -2, -1, 0, NA, NA, 0, -4,
NA, -2, -2, -3, -1, NA, NA, -1, -1, 0, -2, NA, 0, NA, NA, NA,
NA, -4, NA, -4, -2, -3, -2, -2, 2, NA, NA, 0, -4, -2, NA, NA,
NA, NA, NA, NA, -1, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA,
-4, NA, -2, -1, -2, NA, NA, NA, NA, -3, 1), incompatibility = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 1L), .Label = c("territory", "government"), class = "factor"),
years.since.peace = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L), .Label = c("y0", "y10", "y15", "y20", "diff.y5",
"diff.y10", "diff.y15", "diff.y20"), class = "factor"), strategy.cm6.YP = structure(c(4L,
4L, 5L, 1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L, 4L, 4L,
5L, 1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 4L,
3L, 3L, 4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L, 4L, 4L, 5L,
1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 4L, 3L,
3L, 4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L, 4L, 4L, 5L, 1L,
1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 3L, 4L, 4L, 4L, 4L, 3L, 3L,
4L, 4L, 4L, 1L, 4L, 5L, 1L, 4L, 1L, 4L), .Label = c("none",
"only offered", "communication/\nfacilitation", "procedural",
"directive", "unspecified"), class = "factor")), .Names = c("value.change",
"incompatibility", "years.since.peace", "strategy.cm6.YP"), class = "data.frame", row.names = c(1298L,
1299L, 1335L, 1339L, 1340L, 1341L, 1344L, 1372L, 1379L, 1386L,
1387L, 1402L, 1415L, 1439L, 1449L, 1454L, 1455L, 1456L, 1463L,
1466L, 1470L, 1496L, 1497L, 1498L, 1525L, 1536L, 1542L, 1546L,
1563L, 1617L, 1618L, 1654L, 1658L, 1659L, 1660L, 1663L, 1691L,
1698L, 1705L, 1706L, 1721L, 1734L, 1758L, 1768L, 1773L, 1774L,
1775L, 1782L, 1785L, 1789L, 1815L, 1816L, 1817L, 1844L, 1855L,
1861L, 1865L, 1882L, 1936L, 1937L, 1973L, 1977L, 1978L, 1979L,
1982L, 2010L, 2017L, 2024L, 2025L, 2040L, 2053L, 2077L, 2087L,
2092L, 2093L, 2094L, 2101L, 2104L, 2108L, 2134L, 2135L, 2136L,
2163L, 2174L, 2180L, 2184L, 2201L, 2255L, 2256L, 2292L, 2296L,
2297L, 2298L, 2301L, 2329L, 2336L, 2343L, 2344L, 2359L, 2372L,
2396L, 2406L, 2411L, 2412L, 2413L, 2420L, 2423L, 2427L, 2453L,
2454L, 2455L, 2482L, 2493L, 2499L, 2503L, 2520L))
The data for the number of observations is:
num.obs <- structure(list(years.since.peace = structure(c(5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L), .Label = c("y0",
"y10", "y15", "y20", "diff.y5", "diff.y10", "diff.y15", "diff.y20"
), class = "factor"), strategy.cm6.YP = structure(c(1L, 3L, 4L,
5L, 1L, 3L, 4L, 5L, 1L, 3L, 4L, 5L, 1L, 3L, 4L, 5L), .Label = c("none",
"only offered", "communication/\nfacilitation", "procedural",
"directive", "unspecified"), class = "factor"), num.obs = c(8L,
5L, 14L, 2L, 8L, 5L, 14L, 2L, 8L, 5L, 14L, 2L, 8L, 5L, 14L, 2L
)), .Names = c("years.since.peace", "strategy.cm6.YP", "num.obs"
), row.names = c(NA, -16L), class = "data.frame")
Move the fill aesthetic to geom_bar and change the y position for geom_text should get you what you want.
bar.plot <- ggplot(BarDiff.m.s, aes(x = value.change)) +
geom_bar(aes( fill = incompatibility), binwidth = 1) +
geom_text(data = num.obs, aes(label = paste("obs=", num.obs),y = 4, x = -4)) +
labs(x = "score differences", y = "count / years since start of PSA") +
geom_vline(aes(xintercept = 0), linetype = "dotted") +
theme(plot.title = element_text(face = "bold", size = 10),
legend.position = "bottom") +
scale_fill_brewer(palette = "Set1") +
facet_grid(years.since.peace ~ strategy.cm6.YP, space = "free")
bar.plot
If you want the text labels to be positioned by value.change in the first data set, probably the easiest way to get that is to merge the relevant column into the second data set.

Boxplot with multiple x variables

I'm new to R and having a few issues with using ggplot2.
This is an example of my data (subset of larger data set) :
df <-
structure(list(logpvalue = c(22.36, 6.93, 16.78, 1.78, 17.75,
20.99, 21.03, 9.19, 15.01, 22.25, 13.4, 6.47, 1.34, 13.4, 3.21,
0.37, 0.5, 0.12, 1.8, 0.71, 1.15, 6.73, 0.12, 6.97, 0.64, 9.85,
1.45, 1.67, 2.6, 1.8, 1.35, 4.69, 0.37, 1.91, 0.31, 0, 2.45,
1.68, 2.31, 1.35, 6.48, 4.68), SNP = structure(c(1L, 7L, 6L,
5L, 11L, 1L, 9L, 5L, 8L, 11L, 7L, 5L, 8L, 11L, 1L, 7L, 1L, 4L,
2L, 3L, 10L, 7L, 1L, 4L, 2L, 3L, 10L, 4L, 2L, 3L, 10L, 4L, 2L,
3L, 10L, 4L, 2L, 3L, 7L, 9L, 5L, 1L), .Label = c("rs10244", "rs10891244",
"rs10891245", "rs11213821", "rs12296076", "rs138567267", "rs45615536",
"rs6589218", "rs7103178", "rs7127721", "rs7944895"), class = "factor"),
X173 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("het", "hom"), class = "factor")), .Names = c("logpvalue",
"SNP", "X173"), class = "data.frame", row.names = c(NA, -42L))
I want to plot a boxplot of logpvalue on y axis, with SNP on the x-axis but with each SNP also categorized by whether the patient is het or hom for X173. So from this data I'd imagine 4 boxes on my boxplot.
If possible I'd also like to incorporate the individual data points (dotplot-boxplot overlay) with jitter.
This is the usual code I'd use for a boxplot of logpavlue vs SNP:
qplot(logpvalue, SNP, data = mydata, geom="boxplot")
+ geom_jitter(position=position_jitter(w=0.1, h=0.1)) + theme_bw()
How do I add the extra x variable into this code?
Try this:
boxplot(df$logpvalue~paste(df$SNP,df$X173))
Or using ggolot2 :
library(ggplot2)
ggplot(data=df,aes(SNP,logpvalue,colour=SNP)) +
geom_boxplot() +
geom_jitter() +
facet_grid(.~X173)

Resources