Related
I have a dataframe for which I did a two-way ANOVA.
dput(m3)
structure(list(Delta = c(-40, -40, -40, -40, -31.7, -29.3, -27.8,
-26.7, -26.2, -25.4, -24.7, -23.1, -23, -22.9, -22.4, -22.2,
-21.4, -21, -20.8, -15.1, -14.9, -14.1, -6.2, -6.2, -6, -5.3,
-4.9), Location = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 2L,
3L, 3L, 3L), .Label = c("int", "pen + int", "ter + pen"), class = "factor"),
Between = c(0L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 0L, 2L, 1L, 0L,
1L, 0L, 2L, 0L, 2L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L
), Relative = structure(c(5L, 6L, 6L, 7L, 8L, 3L, 3L, 4L,
5L, 4L, 3L, 5L, 3L, 5L, 7L, 5L, 4L, 6L, 3L, 3L, 6L, 2L, 1L,
2L, 1L, 1L, 1L), .Label = c("1&2", "2&3", "2&4", "2&5", "3&4",
"3&5", "3&6", "4&6"), class = "factor")), class = "data.frame", row.names = c(NA,
-27L))
library(agricolae)
aov.2sum=aov(Delta.~Location*X.between, data=m3)
I want to analyze the data using a HSD.test as I have for another dataframe using the same features.
I am following the code format in the package manual as below.
tx <- with(m3, interaction(Location, X.between))
amod <-aov(Delta~tx, data=m3)
test=HSD.test(amod, "tx", group=TRUE)
Then I receive the following error
Error in .rowNamesDF<-(x, value = value) :
duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique values when setting 'row.names': ‘int.0’, ‘pen + int.1’, ‘pen + int.2’, ‘te + int.0’, ‘te + int.1’
Upon further analysis I see that my duplicate row names error is related to my X.between feature. When I use the following code I get the same duplicate row names error:
HSD.test(amod, "X.between", group=TRUE)
>> Error in data.frame(row.names = means[, 1], means[, 2:6]) :
duplicate row.names: 0, 1, 2
How are row names chosen for the HSD.test?
Then how can I change my row names? Or just avoid this duplication error?
Thank you for all and any help.
I have a factor comp_id that has 4 levels (comp1 to comp4). I want to order each level from the highest to the lowest in a geom_line plot.
I got this plot
using this script
library(data.table)
library(ggplot2)
dat <- as.data.table(df)
dat[, ord := sprintf("%02i", frank(dat, comp_id, -value, ties.method = "first"))]
ggplot(dat, aes(x = ord, y = value , group = comp_id , colour = comp_id))+
geom_line()+
facet_wrap(~comp_id, ncol = 1, scales = "free_x", labeller = label_parsed, drop = TRUE)+
theme(axis.text.x=element_text(angle=35, vjust=1, hjust=1,
))
to replace x axis labels
+scale_x_discrete(labels = dat[, setNames(as.character(predictor), ord)])
As you can see, it worked fine for all levels except comp3 where variables ordered (100 to 105) were plotted at the start of facet where they were supposed to be plotted at the end. I wonder what went wrong. Any suggestions will be appreciated.
DATA
> dput(df)
structure(list(predictor = c("c_C2", "c_C3", "c_C4", "d_D2",
"d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER", "h_f", "h_PET",
"h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe", "m_a", "m_DrDe",
"m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv", "Mr_GREy",
"Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl", "Sr_Li",
"Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl", "c_C2", "c_C3", "c_C4",
"d_D2", "d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER", "h_f",
"h_PET", "h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe", "m_a",
"m_DrDe", "m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv",
"Mr_GREy", "Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl",
"Sr_Li", "Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl", "c_C2", "c_C3",
"c_C4", "d_D2", "d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER",
"h_f", "h_PET", "h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe",
"m_a", "m_DrDe", "m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv",
"Mr_GREy", "Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl",
"Sr_Li", "Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl", "c_C2", "c_C3",
"c_C4", "d_D2", "d_D3", "d_D4", "d_D5", "h_BF", "h_BFI", "h_ER",
"h_f", "h_PET", "h_QuFl", "h_Ra", "l_Da", "l_NaCo", "l_ShBe",
"m_a", "m_DrDe", "m_ElRa", "m_MeElm", "m_MeSlPe", "Mr_Co", "Mr_GRAv",
"Mr_GREy", "Mr_Mu", "Mr_Sa", "s_SaLo", "s_SiLo", "s_sSiLo", "s_Stl",
"Sr_Li", "Sr_SaCoCoTe", "Sr_SaLoSi", "Sr_SaMubcl"), comp_id = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("comp1",
"comp2", "comp3", "comp4"), class = "factor"), value = c(0.0633325075111356,
-0.0193713154441617, 0.000785081075580719, 0.287610195287972,
-0.0913783988809322, -0.122928438782758, 0.305621459875726, 0.0356570047659489,
0.367574915852176, -0.240835821698893, 0.0035597425358522, 0.295952594554233,
-0.0439920206129066, -0.235580426938533, 0.191947159509267, -0.132931615006652,
0.065155805120025, 0.038311284807646, 0.187182963731454, 0.120969596703282,
-0.118935354491654, -0.173851183397175, 0.125870264508295, 0.158977975187947,
-0.209351605852615, -0.0231602829054583, 0.078383405846316, 0.0959455355349004,
0.238306328058919, -0.188667962455942, -0.138302814516594, -0.0586994514783439,
0.019524606432138, 0.210636138928319, -0.204454169255484, -0.149879080476447,
0.282741114373524, -0.272911905666994, 0.102508662574812, -0.35056583225677,
0.257262737814283, 0.202117594283655, 0.191773977367133, 0.298513575892895,
0.139576016330362, 0.165641757285727, -0.071542760140058, 0.116819894570386,
0.145104320521166, 0.126636637925691, 0.0810830011112734, -0.0949935353116725,
0.0785254958291791, 0.0326439188223452, 0.065833153228218, 0.155405435626813,
0.128737420120173, 0.214943178842044, -0.0210359058420932, 0.0117832135586799,
0.0762824228178598, -0.29145271973574, -0.17089908579109, -0.0992003952524557,
0.163749177828358, 0.196561728687348, 0.0951493527111932, 0.17238711709624,
0.0638301486629609, -0.0351097560634362, 0.0647994534663104,
-0.154895398844537, 0.186448424833243, 0.240881706707846, -0.241364320964797,
-0.089459273670017, 0.0491598702691844, -0.200660845431752, -0.0339722426751736,
0.131396251991635, -0.195471026941394, -0.05919918680627, -0.184160478394361,
0.129464190293723, 0.193021703469902, 0.178985522376368, -0.245966624042807,
-0.23478025602535, 0.198620462933836, -0.157573246492692, -0.00808698000885529,
0.0413693509741982, -0.121020524702316, 0.105148862728949, 0.214386790903084,
-0.204515275979768, -0.0906160054540168, -0.276985960928353,
0.0768294557774406, -0.074181085595352, 0.138680723918144, -0.119684214245213,
-0.0919678069134681, 0.322602153170851, 0.228878715511945, -0.433082572929477,
0.05754301130056, 0.130719232236558, 0.253999327778221, 0.0469683234741709,
-0.0258294537417061, -0.258318910865727, -0.00406472629347961,
-0.165003562015847, -0.0292142578447021, 0.00862320222199929,
0.0875367120866572, 0.0331716236283754, -0.0418387105725687,
-0.12523142839593, -0.200857915084298, 0.138378222132672, 0.00992811008724002,
-0.0201043482518474, -0.148894977354092, -0.323240591170999,
-0.0556713655820164, 0.379033571103569, -0.264420286734383, 0.127560649906739,
-0.00546455207923468, -0.203293330594455, -0.122085266718802,
-0.0970860819632599, -0.173818516285048, -0.0585031143296301,
0.125084378608705, 0.0655074180474436, 0.254339734692359, 0.00114212078410835
)), class = "data.frame", .Names = c("predictor", "comp_id",
"value"), row.names = c(NA, -140L))
Here is an approach using tidyverse and continuous scale
library(tidyverse)
df %>%
arrange(comp_id, desc(value)) %>% #arrange by comp_id and descending value
mutate(ord = 1:n()) -> dat #create the x scale
ggplot(dat, aes(x = ord, y = value , group = comp_id , colour = comp_id))+
geom_line()+
facet_wrap(~comp_id, ncol = 1, scales = "free_x", drop = TRUE)+
theme(axis.text.x=element_text(angle=35, vjust=1, hjust=1)) +
scale_x_continuous(labels = dat$predictor, breaks = dat$ord, expand = c(0.02, 0.02))
In addition to the nice answer by #missuse, there was another way that gave me what I wanted.
using as factor / as numeric / as.character with the x axis
aes(x = as.factor(as.numeric(as.character(ord)))
and using as numeric /as character while replacing the x axis labels
as.numeric(as.character(ord))
The final script is
ggplot(dat, aes(x = as.factor(as.numeric(as.character(ord))), y = value , group = comp_id , colour = comp_id))+
geom_line()+
facet_wrap(~comp_id, ncol = 1, scales = "free_x", labeller = label_parsed, drop = TRUE)+
theme(axis.text.x=element_text(angle=35, vjust=1, hjust=1,
))+
scale_x_discrete(labels = dat[, setNames(as.character(predictor), as.numeric(as.character(ord)))])
For a sample dataframe:
df1 <- structure(list(area = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("a",
"b"), class = "factor"), region = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("a1",
"a2", "b1", "b2"), class = "factor"), weight = c(0, 1.2, 3.2,
2, 1.6, 5, 1, 0.5, 0.2, 0, 1.5, 2.3, 1.5, 1.8, 1.6, 2, 1.3, 1.4,
1.5, 1.6, 2, 3, 4, 2.3, 1.3, 2.1, 1.3, 1.6, 1.7, 1.8, 2, 1.3,
1, 0.5), var.1 = c(0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 0L), var.2 = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L)), .Names = c("area",
"region", "weight", "var.1", "var.2"), class = c("data.table",
"data.frame"))
I want to first produce a summary table...
area_summary <- setDT(df1)[,.(.N, freq.1 = sum(var.1==1), result = weighted.mean((var.1==1),
w = weight)*100), by = area]
...and then populate it by running the following code for each area (e.g. a, b). This looks for the highest and lowest 'result' in each region, and then produces a xtabs and calculates the relative difference (RD) before adding these to the summary table. Here I have developed the code for area 'a':
#Include only regions with highest or lowest percentage
a_cntry <- subset(df1, area=="a")
a_cntry.summary <- setDT(a_cntry)[,.(.N, freq.1 = sum(var.1==1), result = weighted.mean((var.1==1),
w = weight)*100), by = region]
#Include only regions with highest or lowest percentage
incl <- a_cntry.summary[c(which.min(result), which.max(result)),region]
region <- as.data.frame.matrix(a_cntry)
a_cntry <- a_cntry[a_cntry$region %in% incl,]
#Produce xtabs table of RD
a_cntry.var.1 <- xtabs(weight ~ var.1 + region, data=a_cntry)
a_cntry.var.1
#Produce xtabs table
RD.var.1 <- prop.test(x=a_cntry.var.1[,2], n=rowSums(a_cntry.var.1), correct = FALSE)
RD <- round(- diff(RD.var.1$estimate), 3)
RDpvalue <- round(RD.var.1$"p.value", 4)
RD
RDpvalue
#Add RD and RDpvalue tosummary table
area_summary$RD[area_summary$area == "a"] <- RD
area_summary$RDpvalue[area_summary$area == "a"] <- RDpvalue
rm(RD, RD.var.1, RDpvalue, a_cntry.var.1, incl, a_cntry,a_cntry.summary,region)
I wish to wrap this code into a function, so I can just specify the 'areas' (in the 'area' column in df1) and then the code completes all the analysis and adds the results to the summary table.
If I wanted to call my function stats, I understand it may start like this:
stats= function (df1, x) {
apply(x)
}
If anyone can start me off developing my function, I should be most grateful.
I've a ggplot that shows the counts of tweets for some brands as well as a label for the overall percentage. This was done with much help from this link: Show % instead of counts in charts of categorical variables
# plot ggplot of brands
ggplot(data = test, aes(x = brand, fill = brand))
+ geom_bar()
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', vjust = -0.3)
Next, I would like to plot it based on brand and sentiment, with the labels for the bars of each brand totalling up to 100%. However, I have difficulty amending my code to do this. Would you be able to help please? Also, would it be possible to change the colours for neu to blue and pos to green?
# plot ggplot of brands and sentiment
ggplot(data = test, aes(x = brand, fill = factor(sentiment)))
+ geom_bar(position = 'dodge')
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', position = position_dodge(width = 0.9), vjust=-0.3)
Here's a dput of 100 rows of my data's brand and sentiment column
structure(list(brand = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 1L, 2L, 1L, 2L, 1L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 5L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 4L, 5L, 5L, 1L, 1L, 2L, 3L, 1L, 1L, 4L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 4L, 1L, 1L), .Label = c("apple",
"samsung", "sony", "bb", "htc", "nokia", "huawei"), class = "factor"),
sentiment = structure(c(2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 3L,
1L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 2L, 3L, 3L, 1L, 3L, 2L,
1L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L,
3L), .Label = c("neg", "pos", "neu"), class = "factor")), .Names = c("brand",
"sentiment"), class = c("data.table", "data.frame"), row.names = c(NA,
-100L), .internal.selfref = <pointer: 0x0000000003070788>)
Posting a hack far far far from the ggplot2 idiomatic way to do this, so if someone posts a more ggplot2 way to do this, you should accept the idiomatic method.
So basically I'm creating a dummy data set which will include all the information you've calculated using ..count../sum(..count..)*100 and plotting it on top of your bar plot using geom_text
temp <- as.data.frame(table(test$brand, test$sentiment))
temp <- merge(temp, as.data.frame(table(test$brand)), by = "Var1", all.x = T)
names(temp) <- c("brand", "sentiment", "Freq", "Count")
library(ggplot2)
ggplot(data = test, aes(x = brand, fill = factor(sentiment))) +
geom_bar(position = 'dodge') +
geom_text(data = temp, aes(x = brand, y = Freq, label = sprintf("%.02f %%", Freq/Count*100)), position = position_dodge(width = 0.9), vjust=-0.3)
This is not exactly same as your plot because you only provided a subset of your data
To choose the colors you would like for sentiment, make use of
scale_fill_manual(value = [and choose your colors by RGB, name, etc.]
You will have to experiment but the three factors will be in alphabetical order (unless you change that) so the colors you pick for the scale will match that order: neg, neu, pos could be "grey", "blue", "green"
I have a problem getting some words used in facet labels in italics. I use the following code to create new lines for the labels:
levels(length_subject$CONSTRUCTION) <-
c("THAT \n Extraposed", "THAT \n Post-predicate", "TO \n Extraposed \n for-subject", "TO \n Post-predicate \n for-subject", "THAT \n Extraposed \n that-omission", "THAT \n Post-predicate \n that-omission")
However, I want the words "that" and "for" to appear in italics. I've tried something like
"TO \n Extraposed \n (italics(for))-subject"
bit it doesn't work.
This is what the plots look like:
produced with the following code:
ggplot( length_subject, aes( x = SUBJECT ) ) +
geom_histogram(binwidth=.6, colour="black", fill="grey") +
ylab("Frequency") +
xlab("Subject length") +
scale_x_discrete(breaks=c(2,4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)) + #
facet_grid( SUBJECT_TYPE~CONSTRUCTION, scales="free_x", space="free") +
theme(strip.text.x = element_text(size = 8))
Here is a reduced variant of the data:
structure(list(ID = structure(1:86, .Label = c("A05_122_01",
"A05_253_01", "A05_277_07", "A05_400_01", "A05_99_01", "A06_1076_01",
"A06_1261_01", "A06_1283_01", "A06_1283_02", "A06_1317_01", "A06_1326_01",
"A06_1389_01", "A06_1390_01", "A06_1437_01", "A06_1441_02", "A06_1441_03",
"A06_1442_03", "A06_1456_01", "A06_1461_01", "A06_830_01", "A06_868_01",
"A06_884_01", "A06_884_03", "A0K_1057_02", "A0K_1144_07", "A0K_1177_01",
"A0K_1190_03", "A0K_1214_03", "A0K_1216_01", "A0K_950_02", "A0K_986_01",
"A1A_102_02", "A1A_163_01", "A1A_199_01", "A1A_45_01", "A1A_97_01",
"A1B_1008_02", "A1B_1013_01", "A1B_1028_02", "A1B_1042_01", "A1B_1064_01",
"A1B_1126_03", "A1B_1152_01", "A1B_1174_01", "A1B_1271_01", "A1B_997_01",
"A1J_487_01", "A1J_544_02", "A1J_555_03", "A1J_569_01", "A1J_601_01",
"A1N_422_04", "A1N_70_02", "A1S_191_01", "A1S_329_01", "A1S_330_01",
"A1S_465_04", "A1Y_248_01", "A1Y_278_02", "A1Y_292_01", "A1Y_466_01",
"A1Y_521_01", "A1Y_612_01", "A1Y_634_01", "A26_139_03", "A26_142_01",
"A26_148_01", "A26_289_01", "A26_345_02", "A26_439_01", "A26_441_02",
"A26_463_01", "A28_171_01", "A28_244_01", "A28_245_01", "A28_30_01",
"A28_341_01", "A28_42_01", "A28_494_03", "A2A_301_01", "A2A_396_01",
"A2A_599_01", "A2A_637_01", "A2A_676_01", "A2E_22_01", "A2E_25_03"
), class = "factor"), SUBJECT = c(3L, 2L, 6L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 4L, 1L, 4L, 2L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 7L, 1L, 3L, 2L, 2L, 1L, 6L, 7L, 4L, 1L, 5L, 4L, 2L, 9L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 3L, 4L, 1L, 1L, 1L, 1L, 5L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 5L, 2L, 1L, 2L, 2L, 1L, 7L, 1L,
4L, 1L, 2L, 1L, 1L, 3L, 1L, 13L, 2L, 1L, 1L, 1L, 3L, 1L, 1L),
CONSTRUCTION = structure(c(1L, 3L, 1L, 1L, 1L, 4L, 4L, 1L,
1L, 5L, 5L, 1L, 1L, 5L, 1L, 3L, 5L, 1L, 5L, 4L, 3L, 3L, 1L,
5L, 3L, 5L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L,
4L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 4L, 2L, 4L, 1L, 1L, 3L, 2L,
5L, 1L, 1L, 1L, 3L, 1L, 1L, 4L, 4L, 3L, 1L, 2L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 6L, 1L, 1L, 2L, 4L, 4L, 3L, 5L, 3L, 3L, 3L,
3L, 5L, 1L), .Label = c("THAT_EXT", "THAT_EXT_NT", "THAT_POST",
"THAT_POST_NT", "TO_EXT_FOR", "TO_POST_FOR"), class = "factor"),
SUBJECT_TYPE = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 1L,
2L, 3L, 1L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 3L, 3L), .Label = c("NP", "PRO", "PROPER"), class = "factor")), .Names = c("ID",
"SUBJECT", "CONSTRUCTION", "SUBJECT_TYPE"), class = "data.frame", row.names = c(NA,
-86L))
To get italics, you need the formatting described in plotmath (and then for that to be parsed as an expression). However, the plotmath syntax does not have a line break operation. You can get something similar with atop, though. With your given example, you can set the labels to
levels(length_subject$CONSTRUCTION) <-
c("atop(textstyle('THAT'),textstyle('Extraposed'))",
"atop(textstyle('THAT'),textstyle('Post-predicate'))",
"atop(atop(textstyle('TO'),textstyle('Extraposed')),italic('for')*textstyle('-subject'))",
"atop(atop(textstyle('TO'),textstyle('Post-predicate')),italic('for')*textstyle('-subject'))",
"atop(atop(textstyle('THAT'),textstyle('Extraposed')),italic('that')*textstyle('-omission'))",
"atop(atop(textstyle('THAT'),textstyle('Post-predicate')),italic('that')*textstyle('-omission'))")
and then adding labeller=label_parsed to the facet_grid call
ggplot( length_subject, aes( x = SUBJECT ) ) +
geom_histogram(binwidth=.6, colour="black", fill="grey") +
ylab("Frequency") +
xlab("Subject length") +
scale_x_discrete(breaks=c(2,4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)) + #
facet_grid( SUBJECT_TYPE~CONSTRUCTION, scales="free_x", space="free",
labeller=label_parsed) +
theme(strip.text.x = element_text(size = 8))
gives
It's not perfect (the spacing between lines is not the same, and the disparity would only get worse the more lines there are), but that is the only way I've found to combine the two (newlines in plotmath expressions).
Edit (2016)
With the new facet labelling system, this solution does not work anymore. The trick of inheriting from element_blank to make a custom grob is now explicitly disabled. I guess the lesson is to accept that some things cannot be done in ggplot2, by design, and not waste too much energy with workarounds that may get broken at any time in the future.
Original answer
You could try to create a suitable custom element to place in the theme settings. The theme design does not make it very easy, unfortunately,
require(ggplot2)
require(gridExtra) # tableGrob
element_grob.element_custom <- function(element, label="", ...) {
mytheme <- ttheme_minimal(core = list(fg_params = list(parse=TRUE)))
disect <- strsplit(label, "\\n")[[1]]
g1 <- tableGrob(as.matrix(disect), theme=mytheme)
# wrapping into a gTree only because grobHeight.gtable would be too tight
# cf. absolute.units() squashing textGrobs
gTree(children=gList(g1), height=sum(g1$heights),
cl = "custom_strip")
}
# gTrees don't know their size and ggplot would squash it, so give it room
grobHeight.custom_strip = heightDetails.custom_axis = function(x, ...)
x$height
# silly wrapper to fool ggplot2's inheritance check...
facet_custom <- function(...){
structure(
list(...), # this ... information is not used, btw
class = c("element_custom","element_blank", "element") # inheritance test workaround
)
}
title <- c("First~line \n italic('wait, a second')",
"this~is~boring",
"integral(f(x)*dx, a, b)")
iris2 <- iris
iris2$Species <- factor(iris$Species, labels=title)
ggplot(iris2, aes(Sepal.Length, Sepal.Width)) +
geom_line() + facet_grid(.~Species) +
theme(strip.text.x = facet_custom())
As several of you were looking for how to fix the spacing, I have found a solution.
Add a line with atop(scriptscriptstyle("") before the last line from 3 lines (making this 4) or any following lines and don't forget to add ) afterwards