Combining new lines and italics in facet labels with ggplot2 - r

I have a problem getting some words used in facet labels in italics. I use the following code to create new lines for the labels:
levels(length_subject$CONSTRUCTION) <-
c("THAT \n Extraposed", "THAT \n Post-predicate", "TO \n Extraposed \n for-subject", "TO \n Post-predicate \n for-subject", "THAT \n Extraposed \n that-omission", "THAT \n Post-predicate \n that-omission")
However, I want the words "that" and "for" to appear in italics. I've tried something like
"TO \n Extraposed \n (italics(for))-subject"
bit it doesn't work.
This is what the plots look like:
produced with the following code:
ggplot( length_subject, aes( x = SUBJECT ) ) +
geom_histogram(binwidth=.6, colour="black", fill="grey") +
ylab("Frequency") +
xlab("Subject length") +
scale_x_discrete(breaks=c(2,4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)) + #
facet_grid( SUBJECT_TYPE~CONSTRUCTION, scales="free_x", space="free") +
theme(strip.text.x = element_text(size = 8))
Here is a reduced variant of the data:
structure(list(ID = structure(1:86, .Label = c("A05_122_01",
"A05_253_01", "A05_277_07", "A05_400_01", "A05_99_01", "A06_1076_01",
"A06_1261_01", "A06_1283_01", "A06_1283_02", "A06_1317_01", "A06_1326_01",
"A06_1389_01", "A06_1390_01", "A06_1437_01", "A06_1441_02", "A06_1441_03",
"A06_1442_03", "A06_1456_01", "A06_1461_01", "A06_830_01", "A06_868_01",
"A06_884_01", "A06_884_03", "A0K_1057_02", "A0K_1144_07", "A0K_1177_01",
"A0K_1190_03", "A0K_1214_03", "A0K_1216_01", "A0K_950_02", "A0K_986_01",
"A1A_102_02", "A1A_163_01", "A1A_199_01", "A1A_45_01", "A1A_97_01",
"A1B_1008_02", "A1B_1013_01", "A1B_1028_02", "A1B_1042_01", "A1B_1064_01",
"A1B_1126_03", "A1B_1152_01", "A1B_1174_01", "A1B_1271_01", "A1B_997_01",
"A1J_487_01", "A1J_544_02", "A1J_555_03", "A1J_569_01", "A1J_601_01",
"A1N_422_04", "A1N_70_02", "A1S_191_01", "A1S_329_01", "A1S_330_01",
"A1S_465_04", "A1Y_248_01", "A1Y_278_02", "A1Y_292_01", "A1Y_466_01",
"A1Y_521_01", "A1Y_612_01", "A1Y_634_01", "A26_139_03", "A26_142_01",
"A26_148_01", "A26_289_01", "A26_345_02", "A26_439_01", "A26_441_02",
"A26_463_01", "A28_171_01", "A28_244_01", "A28_245_01", "A28_30_01",
"A28_341_01", "A28_42_01", "A28_494_03", "A2A_301_01", "A2A_396_01",
"A2A_599_01", "A2A_637_01", "A2A_676_01", "A2E_22_01", "A2E_25_03"
), class = "factor"), SUBJECT = c(3L, 2L, 6L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 4L, 1L, 4L, 2L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 7L, 1L, 3L, 2L, 2L, 1L, 6L, 7L, 4L, 1L, 5L, 4L, 2L, 9L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 3L, 4L, 1L, 1L, 1L, 1L, 5L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 5L, 2L, 1L, 2L, 2L, 1L, 7L, 1L,
4L, 1L, 2L, 1L, 1L, 3L, 1L, 13L, 2L, 1L, 1L, 1L, 3L, 1L, 1L),
CONSTRUCTION = structure(c(1L, 3L, 1L, 1L, 1L, 4L, 4L, 1L,
1L, 5L, 5L, 1L, 1L, 5L, 1L, 3L, 5L, 1L, 5L, 4L, 3L, 3L, 1L,
5L, 3L, 5L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L,
4L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 4L, 2L, 4L, 1L, 1L, 3L, 2L,
5L, 1L, 1L, 1L, 3L, 1L, 1L, 4L, 4L, 3L, 1L, 2L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 6L, 1L, 1L, 2L, 4L, 4L, 3L, 5L, 3L, 3L, 3L,
3L, 5L, 1L), .Label = c("THAT_EXT", "THAT_EXT_NT", "THAT_POST",
"THAT_POST_NT", "TO_EXT_FOR", "TO_POST_FOR"), class = "factor"),
SUBJECT_TYPE = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 1L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 3L, 3L), .Label = c("NP", "PRO", "PROPER"), class = "factor")), .Names = c("ID",
"SUBJECT", "CONSTRUCTION", "SUBJECT_TYPE"), class = "data.frame", row.names = c(NA,
-86L))

To get italics, you need the formatting described in plotmath (and then for that to be parsed as an expression). However, the plotmath syntax does not have a line break operation. You can get something similar with atop, though. With your given example, you can set the labels to
levels(length_subject$CONSTRUCTION) <-
c("atop(textstyle('THAT'),textstyle('Extraposed'))",
"atop(textstyle('THAT'),textstyle('Post-predicate'))",
"atop(atop(textstyle('TO'),textstyle('Extraposed')),italic('for')*textstyle('-subject'))",
"atop(atop(textstyle('TO'),textstyle('Post-predicate')),italic('for')*textstyle('-subject'))",
"atop(atop(textstyle('THAT'),textstyle('Extraposed')),italic('that')*textstyle('-omission'))",
"atop(atop(textstyle('THAT'),textstyle('Post-predicate')),italic('that')*textstyle('-omission'))")
and then adding labeller=label_parsed to the facet_grid call
ggplot( length_subject, aes( x = SUBJECT ) ) +
geom_histogram(binwidth=.6, colour="black", fill="grey") +
ylab("Frequency") +
xlab("Subject length") +
scale_x_discrete(breaks=c(2,4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)) + #
facet_grid( SUBJECT_TYPE~CONSTRUCTION, scales="free_x", space="free",
labeller=label_parsed) +
theme(strip.text.x = element_text(size = 8))
gives
It's not perfect (the spacing between lines is not the same, and the disparity would only get worse the more lines there are), but that is the only way I've found to combine the two (newlines in plotmath expressions).

Edit (2016)
With the new facet labelling system, this solution does not work anymore. The trick of inheriting from element_blank to make a custom grob is now explicitly disabled. I guess the lesson is to accept that some things cannot be done in ggplot2, by design, and not waste too much energy with workarounds that may get broken at any time in the future.
Original answer
You could try to create a suitable custom element to place in the theme settings. The theme design does not make it very easy, unfortunately,
require(ggplot2)
require(gridExtra) # tableGrob
element_grob.element_custom <- function(element, label="", ...) {
mytheme <- ttheme_minimal(core = list(fg_params = list(parse=TRUE)))
disect <- strsplit(label, "\\n")[[1]]
g1 <- tableGrob(as.matrix(disect), theme=mytheme)
# wrapping into a gTree only because grobHeight.gtable would be too tight
# cf. absolute.units() squashing textGrobs
gTree(children=gList(g1), height=sum(g1$heights),
cl = "custom_strip")
}
# gTrees don't know their size and ggplot would squash it, so give it room
grobHeight.custom_strip = heightDetails.custom_axis = function(x, ...)
x$height
# silly wrapper to fool ggplot2's inheritance check...
facet_custom <- function(...){
structure(
list(...), # this ... information is not used, btw
class = c("element_custom","element_blank", "element") # inheritance test workaround
)
}
title <- c("First~line \n italic('wait, a second')",
"this~is~boring",
"integral(f(x)*dx, a, b)")
iris2 <- iris
iris2$Species <- factor(iris$Species, labels=title)
ggplot(iris2, aes(Sepal.Length, Sepal.Width)) +
geom_line() + facet_grid(.~Species) +
theme(strip.text.x = facet_custom())

As several of you were looking for how to fix the spacing, I have found a solution.
Add a line with atop(scriptscriptstyle("") before the last line from 3 lines (making this 4) or any following lines and don't forget to add ) afterwards

Related

Modify color and font of emmip plot (emmeans package) in R

Hello :) I am desperately trying to change the colors and font of my emmip plot (plot from the emmeans package in R) but none of my codes are working.
Currently my code for the plot looks like this:
emmip(Model, group ~ gend, CIs=TRUE, nuisance = c("known", "age_dup", "edu"),
xlab = "",
ylab = "Intention to use the platform")
I read in the manueal from the R-package that the code from emmip() can be combined with ggplot2 codes. But when I add the following two codes (that I successfully use in another ggplot) - nothing changes in my plot:
+ theme(text=element_text(family="serif", size=13)
+ scale_fill_brewer(palette="Blues"))
I varied them already, for example "," instead of "+"
Does anyone have an idea how I can make these two modifications work in emmip? Thank you all in advance!
Here is the dput of my data (first 30 rows):
structure(list(dv = c(1, 5, 5, 1, 3, 5, 2, 1, 5, 5, 2, 4, 6,
7, 3, 5, 5, 6, 7, 1, 7, 6, 2, 4, 7, 6, 5, 1, 6, 6), gend = structure(c(1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L), .Label = c("Male",
"Female"), class = "factor"), group = structure(c(5L, 3L, 5L,
3L, 2L, 1L, 3L, 4L, 2L, 1L, 3L, 2L, 3L, 3L, 4L, 4L, 2L, 4L, 5L,
5L, 1L, 4L, 1L, 4L, 2L, 1L, 2L, 3L, 1L, 4L), .Label = c("Default",
"Visual element", "Verbal content", "Visual design", "Combined",
"DesignZH"), class = "factor"), ISFregscores = c(0.984372106429775,
-0.383676865152824, -0.816194838031774, -0.408554787302724, -0.0416530380928891,
0.998088756156888, 0.216609251327447, 0.83416518546863, 1.00178246600492,
-0.496215251116934, -1.34559758838579, NA, 0.707838661016661,
1.05815783619489, -0.314855036376305, 0.617674358967702, -0.56862344822269,
0.0589354712707628, 0.31998903974822, -0.511084756816837, -0.171121724458495,
0.532699047600051, 0.196311893993997, -2.09902298349596, 1.04422334581248,
-0.132687312769232, 1.05733961165571, 0.541606480874359, 0.440296538856025,
0.895064902672922), age_dup = structure(c(2L, 1L, 1L, 2L, 1L,
2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 1L, 2L, 3L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("under34", "age35_49",
"over50"), class = "factor"), edu = structure(c(5L, 4L, 5L, 2L,
5L, 5L, 5L, 5L, 5L, 4L, 5L, NA, 5L, 4L, 1L, 5L, 4L, 5L, 3L, 5L,
2L, 5L, 3L, 5L, 6L, 5L, 6L, 1L, 3L, 5L), .Label = c("oblig. Schulzeit",
"Berufsausbildung", "Berufsmatura", "Gymnasiale Matura", "BA/MA",
"Doktorat", "Andere"), class = "factor"), empl = structure(c(1L,
6L, 1L, 2L, 8L, 2L, 5L, 2L, 2L, 6L, 2L, NA, 1L, 1L, 6L, 2L, 6L,
1L, 1L, 4L, 2L, 6L, 1L, 1L, 3L, 6L, 2L, 2L, 4L, 1L), .Label = c("Privatsektor",
"öffentlicher Sektor", "Non-Profit Sektor", "selbstständig",
"Rentner/in", "Student/in", "Hausfrau/Hausmann", "arbeitssuchend"
), class = "factor"), civ_dup = structure(c(2L, 1L, 1L, 3L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, NA, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 3L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 1L), .Label = c("single", "Partnerschaft",
"keine Angabe"), class = "factor"), kids = structure(c(2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Nein",
"Ja"), class = "factor"), known = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, NA, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("Nein", "Ja"
), class = "factor"), device = structure(c(1L, 2L, 1L, 2L, 2L,
1L, 3L, 1L, 2L, 2L, 1L, NA, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L), .Label = c("Smartphone / Tablet iOS (iPhone/iPad)",
"Smartphone / Tablet (Android)", "Computer / Laptop"), class = "factor")), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
And this is the code for my regression that I then use for the interaction (graph):
Model <- lm(dv ~ gend * group + ISFregscores + age_dup + edu + empl + civ_dup + kids + known + device, data=)
You've got the right approach to change the font but you also have to make sure the font is actually available to the graphics device. This step can be tricky; I use the showtext package which makes this a bit easier.
To change the color palette, specify the color scale (rather than the fill scale).
library("showtext")
#> Loading required package: sysfonts
#> Loading required package: showtextdb
library("emmeans")
library("tidyverse")
showtext_auto()
# 30 data points are too few to fit the original model, so I drop `device`
model <- lm(
dv ~ gend * group + ISFregscores + age_dup + edu + empl + civ_dup + kids + known,
data = data
)
p <- emmip(
model, group ~ gend,
CIs = TRUE,
nuisance = c("known", "age_dup", "edu"),
xlab = "",
ylab = "Intention to use the platform"
)
p +
scale_color_brewer(
palette = "Blues"
) +
guides(
color = guide_legend(title = "New Legend Title")
) +
theme(
text = element_text(family = "serif", face = "bold.italic", size = 16)
)
Created on 2023-01-11 with reprex v2.0.2

R - boxplot log scale problem with 0 values

I have a dataset that consists of 0 values. I want to use log scale but because of the 0 values, it is returning an error. I tried to replace 0s with 1s and it returned something that did not seem right.
As you can see in the figure, I have very small values for the 16k case but to show it clearly, I want to use log scale. Also, I want the order to be 8k_B, 8k_S, 16k_B, 16k_S. I tried factor and levels but still it didn't change the order.
Can someone please help? I can post the data if necessary. Thank you.
Here is the code I used.
data_freq <- data.frame(name=c( rep("8K_B",24), rep("8K_S",24), rep("16_B",24), rep("16K_S",24)),sines=c(rep("B",24),rep("S",24),rep("B",24),rep("S",24)),
value_freq=c( r1B$Frequency, r1S$Frequency, r2B$Frequency, r2S$Frequency)
)
p <- ggplot(data_freq, aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Here is the data:
data_freq <- structure(list(name = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("16K_S", "16_B",
"8K_B", "8K_S"), class = "factor"), sines = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B",
"S"), class = "factor"), value_freq = c(6.269822e-05, 5.494403e-05,
5.84888e-05, 5.727028e-05, 7.300023e-05, 6.502448e-05, 6.568913e-05,
5.771338e-05, 5.638409e-05, 5.693796e-05, 5.527635e-05, 6.103661e-05,
5.660564e-05, 6.269822e-05, 5.594099e-05, 6.978778e-05, 5.571945e-05,
6.258745e-05, 6.779384e-05, 6.668609e-05, 6.048274e-05, 5.826725e-05,
5.671641e-05, 6.070429e-05, 9.433902e-05, 8.037108e-05, 8.203393e-05,
8.591391e-05, 9.633444e-05, 9.123503e-05, 8.946133e-05, 8.447278e-05,
7.638024e-05, 8.103622e-05, 8.15905e-05, 8.480535e-05, 7.527167e-05,
8.779847e-05, 8.192307e-05, 9.7443e-05, 7.649109e-05, 8.425106e-05,
9.134589e-05, 9.555844e-05, 8.724419e-05, 7.881908e-05, 7.771052e-05,
8.358592e-05, 1.1077e-07, 1.1077e-07, 0, 0, 1.1077e-07, 0, 0,
1.1077e-07, 1.1077e-07, 0, 0, 0, 0, 0, 3.3232e-07, 0, 2.2155e-07,
4.431e-07, 1.1077e-07, 1.1077e-07, 1.1077e-07, 0, 2.2155e-07,
0, 5.5428e-07, 5.5428e-07, 6.6514e-07, 6.6514e-07, 7.64911e-06,
6.6514e-07, 6.6514e-07, 1.1086e-07, 5.5428e-07, 6.6514e-07, 6.6514e-07,
6.6514e-07, 3.3257e-07, 6.6514e-07, 0, 6.6514e-07, 3.87998e-06,
6.6514e-06, 1.1086e-07, 1.1086e-07, 1.1086e-07, 3.3257e-07, 3.3257e-07,
1.10857e-06)), class = "data.frame", row.names = c(NA, -96L))
You could try to do log(x+n) transformation instead.
p <- data_freq %>%
mutate(value_freq = log(value_freq + 0.000001)) %>% # your numbers are really small so I am adding a small number
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Alternatively, you can try square root transformation.
p <- data_freq %>%
mutate(value_freq = value_freq^(1/2)) %>%
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Or do the transformation using ggplot:
p <- data_freq %>%
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot() +
scale_y_log10()

Geom Bar Plot, sum of count not visible

I have a dataframe tag, with 51X5 structure
structure(list(Tagging = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("CIRCLE CAMPIAGN",
"NATIONAL CAMPIAGN"), class = "factor"), Status = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Negative", "Positive"), class = "factor"),
Month = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("JUL",
"JUN", "MAY"), class = "factor"), Category = structure(c(1L,
4L, 6L, 1L, 2L, 4L, 6L, 1L, 2L, 4L, 5L, 6L, 1L, 2L, 4L, 5L,
6L, 1L, 2L, 4L, 5L, 6L, 1L, 2L, 4L, 6L, 1L, 4L, 6L, 2L, 3L,
4L, 6L, 1L, 2L, 3L, 4L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L,
3L, 4L, 5L, 6L, 6L), .Label = c("Data", "Other", "Roaming",
"Unlimited", "VAS", "Voice"), class = "factor"), count = c(3L,
2L, 1L, 4L, 5L, 2L, 1L, 2L, 6L, 7L, 2L, 3L, 4L, 9L, 6L, 2L,
3L, 3L, 3L, 10L, 2L, 5L, 5L, 5L, 4L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 4L, 1L, 1L, 3L, 3L, 2L,
1L, 1L, 1L, 3L, 4L, 2L)), class = "data.frame", row.names = c(NA,
-51L))
I want to create a bar plot (ggplot) to show bar graph with label on bar as sum of count of category month wise I am using below code
ggplot(data = tag, aes(x = Tagging, y = count, fill = Status)) +
geom_col() +
labs(x = "Tagging", y = "Count", title = "FlyTxt ROI", subtitle = "Statistics") +
geom_text(aes(label = count), color = "white", size = 3, position = position_stack(vjust = 0.5)) +
theme_minimal()+facet_wrap(~Month)
But I am getting split count values:
Help as I want only sum of count for each status
The problem is, that the information you show in the bar is accumulated by geom_col over all categories but the geom_text doesn't do that.
On option is to pre-summarize the data (to get rid of the category split) and then plot the graph.
library(tidyverse)
tag_sum <- tag %>%
group_by(Tagging, Status, Month) %>%
summarise(count_sm = sum(count))
ggplot(data = tag_sum, aes(x = Tagging, y = count_sm, fill = Status)) +
geom_col() +
geom_text(aes(label = count_sm), color = "white", size = 3,
position = position_stack(vjust = 0.5)) +
facet_wrap(~Month) +
labs(x = "Tagging", y = "Count", title = "FlyTxt ROI", subtitle = "Statistics") +
theme_minimal()

Removing specific strips in a double-strip plot

I'm trying to remove the redundant "pro/retro" labels on the second row of panels on my plot. However, I still want to keep the top row of panel labels intact. I've tried for the past hour to selectively remove the 1st strip on the 2nd panel row and I was wondering if anyone here knows how to do this. See below for technical details.
I have the following plot:
It was generated from the following data:
absBtwnDat <- structure(list(setSize = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L), .Label = c("2", "3", "4", "5", "6", "7", "8"), class = "factor"),
Measure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Actual", "Predicted"), class = "factor"),
Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("fix", "forced"), class = "factor"),
JudgementType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("pro", "retro"), class = "factor"),
Accuracy = c(1.91388888888889, 2.95555555555556, 3.74861111111111,
4.37777777777778, 4.21527777777778, 3.0875, 2.85277777777778,
2, 2.99444444444444, 4, 4.77222222222222, 5.24444444444444,
5.18472222222222, 5.20277777777778, 1.98888888888889, 3,
3.97222222222222, 4.85972222222222, 5.70555555555556, 6.56944444444444,
7.27222222222222, 2, 3, 3.99444444444444, 4.99444444444444,
5.86944444444444, 6.75555555555556, 7.57777777777778, 1.96111111111111,
2.97777777777778, 3.78333333333333, 3.97222222222222, 4.22361111111111,
3.64722222222222, 3.68888888888889, 2, 3, 3.97222222222222,
4.67777777777778, 5.26944444444444, 5.4625, 5.8, 2, 3, 3.98333333333333,
4.87777777777778, 5.73055555555556, 6.48333333333333, 7.62916666666667,
2, 3, 3.98333333333333, 4.96666666666667, 5.96944444444444,
6.94444444444444, 7.93333333333333), LL = c(1.85, 2.87777777777778,
3.59861111111111, 4.15555555555556, 3.78888888888889, 2.73055555555556,
2.55555555555556, 2, 2.96111111111111, 4, 4.64444444444444,
5.01666666666667, 4.88333333333333, 4.88611111111111, 1.91111111111111,
3, 3.89444444444444, 4.73611111111111, 5.47777777777778,
6.20277777777778, 6.71666666666667, 2, 3, 3.96666666666667,
4.95555555555556, 5.65096686319131, 6.48333333333333, 7.17222222222222,
1.86637442123568, 2.92222222222222, 3.65, 3.61666666666667,
3.88333333333333, 3.17092476055122, 3.18888888888889, 2,
3, 3.92222222222222, 4.49444444444444, 5.0375, 5.09444444444444,
5.40555555555556, 2, 3, 3.92777777777778, 4.72222222222222,
5.52777777777778, 6.24444444444444, 7.37361111111111, 2,
3, 3.95, 4.88888888888889, 5.93333333333333, 6.88333333333333,
7.73065763697428), UL = c(1.95555555555556, 2.98333333333333,
3.84444444444444, 4.56666666666667, 4.6, 3.43611111111111,
3.17916666666667, 2, 3, 4, 4.86111111111111, 5.42777777777778,
5.48656054159421, 5.58611111111111, 2, 3, 4, 4.93888888888889,
5.83888888888889, 6.76944444444444, 7.6, 2, 3, 4, 5, 5.94166666666667,
6.88888888888889, 7.78888888888889, 1.98888888888889, 2.99444444444444,
3.87777777777778, 4.22777777777778, 4.53611111111111, 4.19722222222222,
4.20555555555556, 2, 3, 3.98888888888889, 4.78333333333333,
5.45555555555556, 5.79583333333333, 6.16666666666667, 2,
3, 3.99444444444444, 4.95, 5.85972222222222, 6.67222222222222,
7.80138888888889, 2, 3, 3.99444444444444, 4.98888888888889,
5.9875, 6.97222222222222, 7.98333333333333)), .Names = c("setSize",
"Measure", "Location", "JudgementType", "Accuracy", "LL", "UL"
), row.names = c(NA, -56L), class = "data.frame")
I visualized it using using the following code:
library(ggplot2)
p1 <- ggplot(data = absBtwnDat, aes(x = as.numeric(as.character(setSize)),
y = Accuracy, group = Measure,
colour = Measure))+
geom_point()+
geom_line(aes(linetype = Measure))+
scale_x_continuous("Trial Set Size", breaks = 2:8)+
scale_y_continuous("Accuracy (# Correct)", breaks = 0:8, limits = c(0, 8))+
geom_errorbar(aes(ymin = LL, ymax = UL), width = .1, size = .75)+
scale_colour_grey(start = .8, end = .4)+
facet_wrap(~JudgementType+Location, dir = "v")+
theme(legend.position = "top")
Just to be certain, I've highlighted unwanted strip in the following image:
With this you'll only have one row of labels per panel, but they still include both words.
p1 <- ggplot(data = absBtwnDat,
aes(x = as.numeric(as.character(setSize)), y = Accuracy,
group = Measure,
colour = Measure))+
geom_point()+
geom_line(aes(linetype = Measure))+
scale_x_continuous("Trial Set Size", breaks = 2:8)+
scale_y_continuous("Accuracy (# Correct)",
breaks = 0:8, limits = c(0, 8))+
geom_errorbar(aes(ymin = LL, ymax = UL),
width = .1, size = .75)+
scale_colour_grey(start = .8, end = .4)+
facet_wrap(~JudgementType + Location,
dir = "v",
labeller = label_wrap_gen(multi_line=FALSE)) +
theme(legend.position = "top")
p1
Here is a possible solution:
g1 <- ggplotGrob(p1)
k <- which(g1$layout$name=="strip-t-1-2")
g1$grobs[[k]]$grobs[[1]]$children[[2]]$children[[1]]$label <- ""
g1$grobs[[k]]$grobs[[1]]$children[[1]]$gp$fill <- NA
k <- which(g1$layout$name=="strip-t-2-2")
g1$grobs[[k]]$grobs[[1]]$children[[2]]$children[[1]]$label <- ""
g1$grobs[[k]]$grobs[[1]]$children[[1]]$gp$fill <- NA
library(grid)
grid.draw(g1)

ggplot add percentage labels based on x-axis variables

I've a ggplot that shows the counts of tweets for some brands as well as a label for the overall percentage. This was done with much help from this link: Show % instead of counts in charts of categorical variables
# plot ggplot of brands
ggplot(data = test, aes(x = brand, fill = brand))
+ geom_bar()
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', vjust = -0.3)
Next, I would like to plot it based on brand and sentiment, with the labels for the bars of each brand totalling up to 100%. However, I have difficulty amending my code to do this. Would you be able to help please? Also, would it be possible to change the colours for neu to blue and pos to green?
# plot ggplot of brands and sentiment
ggplot(data = test, aes(x = brand, fill = factor(sentiment)))
+ geom_bar(position = 'dodge')
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', position = position_dodge(width = 0.9), vjust=-0.3)
Here's a dput of 100 rows of my data's brand and sentiment column
structure(list(brand = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 1L, 2L, 1L, 2L, 1L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 5L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 4L, 5L, 5L, 1L, 1L, 2L, 3L, 1L, 1L, 4L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 4L, 1L, 1L), .Label = c("apple",
"samsung", "sony", "bb", "htc", "nokia", "huawei"), class = "factor"),
sentiment = structure(c(2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 3L,
1L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 2L, 3L, 3L, 1L, 3L, 2L,
1L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L,
3L), .Label = c("neg", "pos", "neu"), class = "factor")), .Names = c("brand",
"sentiment"), class = c("data.table", "data.frame"), row.names = c(NA,
-100L), .internal.selfref = <pointer: 0x0000000003070788>)
Posting a hack far far far from the ggplot2 idiomatic way to do this, so if someone posts a more ggplot2 way to do this, you should accept the idiomatic method.
So basically I'm creating a dummy data set which will include all the information you've calculated using ..count../sum(..count..)*100 and plotting it on top of your bar plot using geom_text
temp <- as.data.frame(table(test$brand, test$sentiment))
temp <- merge(temp, as.data.frame(table(test$brand)), by = "Var1", all.x = T)
names(temp) <- c("brand", "sentiment", "Freq", "Count")
library(ggplot2)
ggplot(data = test, aes(x = brand, fill = factor(sentiment))) +
geom_bar(position = 'dodge') +
geom_text(data = temp, aes(x = brand, y = Freq, label = sprintf("%.02f %%", Freq/Count*100)), position = position_dodge(width = 0.9), vjust=-0.3)
This is not exactly same as your plot because you only provided a subset of your data
To choose the colors you would like for sentiment, make use of
scale_fill_manual(value = [and choose your colors by RGB, name, etc.]
You will have to experiment but the three factors will be in alphabetical order (unless you change that) so the colors you pick for the scale will match that order: neg, neu, pos could be "grey", "blue", "green"

Resources