How to make multiple boxplots by two different groups in one graph? - r

The part of dataset is like this:
Treatment Status gene1 gene2
1 Both Deceased 3.1934860 63.8697194
2 Both Deceased 0.0000000 11.3436426
3 Chemo Deceased 7.2186817 35.0621681
4 Both Deceased 7.2186817 23.7185255
5 Chemo Deceased 0.8049256 17.7083638
6 Chemo Censored 0.8250437 0.8250437
7 Chemo Censored 3.4136505 23.895533
8 Radio Censored 0.9428735 4.7143673
9 None Censored 3.3001750 10.7255686
I want to make compare each gene expression in "deceased" vs "censored" for each treatment. I only could make one gene expression for now, which is like this:
ggboxplot(df, x="Treatment", y= "gene1", fill = "Status")
Is there any way I can combine two genes' boxplots in one graph? Or any other better way to show these genes expression level difference between deceased vs censored in each group?

We may use boxplot() in base R, where we need to use reshape() first to get a long format.
boxplot(gene ~ Status + time + Treatment,
reshape(cbind(id=rownames(dat), dat), 4:5, sep="", direction="long"),
border=1:2)
However, this yields a quite crowded plot. We could do separate boxplots for e.g. each treatment group using sapply().
par(mfrow=c(2, 2))
sapply(unique(dat$Treatment), function(x) {
boxplot(value ~ Status + gene,
reshape(cbind(id=rownames(dat[dat$Treatment == x, ]), dat[dat$Treatment == x, ]),
4:5, sep="", direction="long", v.names="value", timevar="gene"),
at=c(1:2, 4:5),
main=x,
border=1:2)
})
Result
Data
dat <- structure(list(Treatment = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L), .Label = c("Both", "Chemo", "None", "Radio"), class = "factor"),
Status = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Censored", "Deceased"), class = "factor"),
gene1 = c(2.83185327992901, 5.21658677992433, 9.36719279899948,
1.77809421116808, 6.39453760571561, 3.08376117126782, -1.99524072673447,
0.380722587753265, -0.947148460332481, 1.73014054712629,
0.855919162512028, 0.501667581598007, 0.0638735169737497,
10.1712355237258, 5.34317645471502, -7.96626158445742, -0.0781613844302278,
5.59930916967042, -0.725717330717595, 0.492793009977729,
-0.546677404630108, 0.290301979542245, 2.83540215865274,
-1.25738031049913), gene2 = c(6.97361394841868, -6.86012827859373,
-0.193731972798249, -5.64669185350061, -20.6664537342379,
32.5477488386544, 12.6210452154023, 6.56845245925654, 13.5491140544121,
-2.9113829554538, 2.90958200298303, -6.56806056188421, 50.2577234864485,
17.0734922804668, 49.0769939658538, -2.0186433516603, 32.3823429023035,
17.7654319738005, 12.2884241568455, 21.7600566866782, 19.68978862329,
-12.6277420840716, 27.555120882401, 17.5164450232983)), row.names = c(3L,
23L, 13L, 44L, 34L, 50L, 90L, 67L, 62L, 100L, 95L, 96L, 132L,
144L, 124L, 174L, 171L, 168L, 196L, 205L, 207L, 233L, 229L, 212L
), class = "data.frame")

using the data from jay.sf you can try a 'ggplot'. I'm using the tidyverse, but this is not required.
library(tidyverse)
dat %>%
as_tibble() %>%
gather(gene, mRNA, -Treatment, -Status) %>%
ggplot(aes(Status, mRNA, fill =gene)) +
geom_boxplot() +
facet_wrap(~Treatment, ncol = 2, scales = "free_y")
and with facet_grid you can add significance levels automatically
dat %>%
as_tibble() %>%
gather(gene, mRNA, -Treatment, -Status) %>%
ggplot(aes(gene, mRNA, fill =gene)) +
geom_boxplot(show.legend = F) +
ggbeeswarm::geom_beeswarm(show.legend = F) +
ggsignif::geom_signif(comparisons = list(c("gene1", "gene2"))) +
facet_grid(Status~Treatment, scales = "free_y")

Related

How to add p values into grouped charts in ggplot?

When I want to add p values into my plots:
library(tidyverse)
library(ggpubr)
library(rstatix)
stat.test3 <- MP %>%
group_by(TBI) %>%
wilcox_test(age ~ mp_1) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p.adj")%>%
mutate(y.position = 35)
C2<- ggplot(data=MP, aes(x=TBI, y=age, fill=mp_1))+
geom_violin()+
geom_boxplot(width=.2, fatten=NULL, position = position_dodge(0.9))+
stat_summary(fun="median", geom="point", position = position_dodge(0.9))+
stat_summary(fun.data = "mean_se", geom = "errorbar", width=.1, position = position_dodge(0.9))+
scale_fill_brewer(name="Mind-pop", palette = "Accent")
C2+ stat_pvalue_manual(stat.test3, xmin = "TBI",xmax = NULL)
it gives me this error:
Error in FUN(X[[i]], ...) : object 'mp_1' not found
This error is shown after adding stat_pvalue to the object.
How should I fix it?
I'm not familiar with ggpubr so can't say I understand the underlying issue but it seems like color=mp_1 instead of fill=mp_1 might fix your issue. This is in the following line:
C2 <- ggplot(data=MP, aes(x=TBI, y=age, color=mp_1)).
The full code is below. I've also changed y.position so that the significance is at the top of the plot.
MP <- structure(list(age = c(55L, 54L, 56L, 60L, 55L, 53L, 61L, 56L,
58L, 58L, 56L, 58L, 58L, 58L, 59L, 57L, 56L, 60L, 57L, 58L, 61L,
60L),
mp_1 = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
TBI = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("HC",
"TBI"), class = "factor")), class = "data.frame", row.names = c(NA, -22L))
library(tidyverse)
library(ggpubr)
library(rstatix)
stat.test3 <- MP %>%
group_by(TBI) %>%
wilcox_test(age ~ mp_1) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p.adj") %>%
mutate(y.position = 61.5)
C2 <- ggplot(data=MP, aes(x=TBI, y=age, color=mp_1))+
geom_violin() +
geom_boxplot(width=.2, fatten=NULL, position = position_dodge(0.9))+
stat_summary(fun="median", geom="point", position = position_dodge(0.9))+
stat_summary(fun.data = "mean_se", geom = "errorbar", width=.1, position = position_dodge(0.9))+
scale_fill_brewer(name="Mind-pop", palette = "Accent")
C2 + stat_pvalue_manual(stat.test3, xmin = "TBI",xmax = NULL)

labelling of ordered factor variable

I am trying to produce a univariate output table using the gtsummary package.
structure(list(id = 1:10, age = structure(c(3L, 3L, 2L, 3L, 2L,
2L, 2L, 1L, 1L, 1L), .Label = c("c", "b", "a"), class = c("ordered",
"factor")), sex = structure(c(2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L), .Label = c("F", "M"), class = "factor"), country = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L), .Label = c("eng", "scot",
"wale"), class = "factor"), edu = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 3L), .Label = c("x", "y", "z"), class = "factor"),
lungfunction = c(45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L,
62L, 45L), ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L,
9L, 15L), no2 = c(40L, 70L, 50L, 60L, 30L, 25L, 80L, 89L,
10L, 40L), pm25 = c(15L, 20L, 36L, 48L, 25L, 36L, 28L, 15L,
25L, 15L)), row.names = c(NA, 10L), class = "data.frame")
...
library(gtsummary)
publication_dummytable1_sum %>%
select(sex,age,lungfunction,ivdays) %>%
tbl_uvregression(
method =lm,
y = lungfunction,
pvalue_fun = ~style_pvalue(.x, digits = 3)
) %>%
add_global_p() %>% # add global p-value
bold_p() %>% # bold p-values under a given threshold
bold_labels()
...
When I run this code I get the output below. The issue is the labeling of the ordered factor variable (age). R chooses its own labeling for the ordered factor variable. Is it possible to tell R not to choose its own labeling for ordered factor variables?
I want output like the following:
Like many other people, I think you might be misunderstanding the meaning of an "ordered" factor in R. All factors in R are ordered, in a sense; the estimates etc. are typically printed, plotted, etc. in the order of the levels vector. Specifying that a factor is of type ordered has two major effects:
it allows you to evaluate inequalities on the levels of the factor (e.g. you can filter(age > "b"))
the contrasts are set by default to orthogonal polynomial contrasts, which is where the L (linear) and Q (quadratic) labels come from: see e.g. this CrossValidated answer for more details.
If you want this variable treated in the same way a regular factor (so that the estimates are made for differences of groups from the baseline level, i.e. treatment contrasts), you can:
convert back to an unordered factor (e.g. factor(age, ordered=FALSE))
specify that you want to use treatment contrasts in your model (in base R you would specify contrasts = list(age = "contr.treatment"))
set options(contrasts = c(unordered = "contr.treatment", ordered = "contr.treatment")) (the default for ordered is "contr.poly")
If you have an unordered ("regular") factor and the levels are not in the order you want, you can reset the level order by specifying the levels explicitly, e.g.
mutate(across(age, factor,
levels = c("0-10 years", "11-20 years", "21-30 years", "30-40 years")))
R sets the factors in alphabetical order by default, which is sometimes not what you want (but I can't think of a case where the order would be 'random' ...)
The easiest way to remove the odd labelling for the ordered variables, is to remove the ordered class from these factor variables. Example below!
library(gtsummary)
library(tidyverse)
packageVersion("gtsummary")
#> [1] '1.4.2'
publication_dummytable1_sum <-
structure(list(id = 1:10, age = structure(c(3L, 3L, 2L, 3L, 2L,
2L, 2L, 1L, 1L, 1L), .Label = c("c", "b", "a"), class = c("ordered",
"factor")), sex = structure(c(2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L), .Label = c("F", "M"), class = "factor"), country = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L), .Label = c("eng", "scot",
"wale"), class = "factor"), edu = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 3L), .Label = c("x", "y", "z"), class = "factor"),
lungfunction = c(45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L,
62L, 45L), ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L,
9L, 15L), no2 = c(40L, 70L, 50L, 60L, 30L, 25L, 80L, 89L,
10L, 40L), pm25 = c(15L, 20L, 36L, 48L, 25L, 36L, 28L, 15L,
25L, 15L)), row.names = c(NA, 10L), class = "data.frame") |>
as_tibble()
# R labels the order factors like this in lm()
lm(lungfunction ~ age, publication_dummytable1_sum)
#>
#> Call:
#> lm(formula = lungfunction ~ age, data = publication_dummytable1_sum)
#>
#> Coefficients:
#> (Intercept) age.L age.Q
#> 51.17 -10.37 -15.11
tbl <-
publication_dummytable1_sum %>%
# remove ordered class
mutate(across(where(is.ordered), ~factor(., ordered = FALSE))) %>%
select(sex,age,lungfunction,ivdays) %>%
tbl_uvregression(
method =lm,
y = lungfunction,
pvalue_fun = ~style_pvalue(.x, digits = 3)
)
Created on 2021-07-22 by the reprex package (v2.0.0)

Is there a way to create error bars on a ggplot bar graph that uses the fill option for a factor variable?

I have a data set that uses two factor variables to create a bar graph in ggplot. A factor variable of 5 levels provides the bar graph distinctions, while the factor variable of two levels provides the mean/average of each condition. An example graph looks like this :
The code to produce this is
plot <- ggplot(data = testdf, aes(x = condition, fill = DV)) + geom_bar(position = "fill", na.rm = TRUE) + theme_bw()
I would like to add error bars onto each of the bars, using 95% confidence intervals.
I've tried converting the DV variable to a numeric 1 or 0 and then analyzing using summarySE() to get CIs for each bar, like so:
se_test <- summarySE(testdf, measurevar = "numericDV", groupvars = c("condition"))
. I then change the ggplot function to read:
plot <- ggplot(data = testdf, aes(x = condition, fill = DV)) +
geom_bar(position = "fill", na.rm = TRUE) + theme_bw() +
geom_errorbar(aes(ymin = (DV - se_test$ci), ymax = (DV - se_test$ci)))
This leads to an error for the - and + to not be meaningful for factors. So the data is still being considered as a factor. Is there a way to keep this graph, while implementing the CI error bars? I'd like for the average displayed by the bars to act as the middle of the confidence intervals, while keeping the aesthetics of the fill conditions.
Thanks in advance.
Sample Data W/ Numeric DV:
testdf <- structure(list(condition = structure(c(4L, 3L, 3L, 2L, 5L, 1L,
5L, 4L, 4L, 3L, 4L, 1L, 2L, 5L, 5L, 1L, 4L, 3L, 2L, 5L, 4L, 3L,
3L, 2L, 2L, 3L, 2L, 3L, 5L, 1L, 3L, 3L, 3L, 4L, 4L, 1L, 4L, 2L,
4L, 3L), .Label = c("0", "1", "2", "3", "4"), class = "factor"),
DV = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L
), .Label = c("No", "Yes"), class = "factor"), numericDV = c(0,
1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1,
0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
1)), row.names = c(2L, 4L, 9L, 12L, 16L, 17L, 22L, 24L, 30L,
31L, 35L, 40L, 41L, 42L, 43L, 45L, 46L, 47L, 49L, 50L, 52L, 57L,
64L, 66L, 67L, 73L, 76L, 77L, 78L, 79L, 84L, 86L, 90L, 100L,
103L, 105L, 107L, 108L, 112L, 113L), class = "data.frame")
ggplot2 let's you combine several data frames in one 'screen-space' using just the variable names and values - that is you can add a layer to your plot which has a different data source.
testdf %>%
ggplot(aes(x = condition)) +
geom_bar(aes(fill = DV), position = "fill", na.rm = TRUE) +
geom_errorbar(aes(
ymin = numericDV - ci,
ymax = numericDV + ci),
data = Rmisc::summarySE(testdf, measurevar = "numericDV", groupvars = "condition")) +
theme_bw()
I'm not sure if the result looks really nice with the bars exceeding the 0-1 interval, but numerically it looks like what you wanted. I moved the fill aesthetic to the geom_bar layer, as DV is missing in the summarySE output.
you can try
library(tidyverse)
se_test <- Rmisc::summarySE(testdf, measurevar = "numericDV", groupvars = c("condition"))
testdf %>%
count(condition, DV) %>%
ggplot(aes(condition, n)) +
geom_col(aes( fill =DV)) +
geom_errorbar(data=se_test, aes(y=N, ymin = N - ci, ymax = N + ci))

Manually order x-axis labels within each facet in ggplot

I am trying to manually reorder the x-axis labels within each facet.
The data are as follows:
df = structure(list(block = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), item = structure(c(14L,
15L, 28L, 29L, 30L, 31L, 32L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
15L, 22L, 23L, 24L, 25L, 26L, 27L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 8L, 9L, 10L, 11L, 12L, 13L), .Label = c("p00e00d00", "p00e00d11",
"p00e00d12", "p00e00d13", "p00e00d21", "p00e00d22", "p00e00d23",
"p00e11d00", "p00e12d00", "p00e13d00", "p00e21d00", "p00e22d00",
"p00e23d00", "p01e00d00", "p11e00d00", "p11e00d11", "p11e00d12",
"p11e00d13", "p11e00d21", "p11e00d22", "p11e00d23", "p11e11d00",
"p11e12d00", "p11e13d00", "p11e21d00", "p11e22d00", "p11e23d00",
"p12e00d00", "p13e00d00", "p14e00d00", "p21e00d00", "p22e00d00"
), class = "factor"), response = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2",
"1"), class = "factor"), n = c(345L, 511L, 583L, 613L, 612L,
222L, 142L, 531L, 546L, 589L, 636L, 478L, 364L, 313L, 502L, 533L,
587L, 603L, 385L, 298L, 263L, 518L, 546L, 563L, 593L, 435L, 351L,
310L, 478L, 579L, 629L, 646L, 357L, 307L, 230L), freq = c(0.408284023668639,
0.604733727810651, 0.689940828402367, 0.725443786982249, 0.724260355029586,
0.262721893491124, 0.168047337278107, 0.628402366863905, 0.646153846153846,
0.697041420118343, 0.752662721893491, 0.565680473372781, 0.430769230769231,
0.370414201183432, 0.594082840236686, 0.630769230769231, 0.694674556213018,
0.713609467455621, 0.455621301775148, 0.352662721893491, 0.311242603550296,
0.61301775147929, 0.646153846153846, 0.666272189349112, 0.701775147928994,
0.514792899408284, 0.415384615384615, 0.366863905325444, 0.565680473372781,
0.685207100591716, 0.744378698224852, 0.764497041420118, 0.422485207100592,
0.363313609467456, 0.272189349112426)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -35L), .Names = c("block", "item",
"response", "n", "freq"))
There are five blocks, each block contains 7 items, and some items have the same names across blocks. I can therefore facet by block as follows:
df %>%
ggplot(aes(x = item, y = freq)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
facet_grid(.~block, scales = "free") +
coord_cartesian(ylim = c(0, 1), expand = F) + # need to add expanse = F to prevent zooming away
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1))
I also have vectors which states for each block the order that items should appear in. For example:
block_3_order = c("p11e13d00","p11e12d00", "p11e11d00", "p11e00d00", "p11e21d00", "p11e22d00","p11e23d00")
)
block_4_order = c("p00e00d13", "p00e00d12", "p00e00d11", "p00e00d00", "p00e00d21","p00e00d22","p00e00d23")
)
I tried to reorder the "item" factor, but to get the desired effect I would need to split the dataframe into subsets representing blocks. Otherwise I am having trouble grasping how you can integrate the ordering of factors with the ggplot treatment of item as a single factor across facets.
Any help is greatly appreciated.
To get a different custom axis order in each facet, you can create each "facet" as a separate plot and then lay them out together as if they were a single faceted plot.
library(tidyverse)
#devtools::install_github("baptiste/egg")
library(egg)
library(gridExtra)
library(grid)
theme_set(theme_bw())
First, create the custom orderings. The ones that are NULL will just be sorted alphabetically in the final plot.
b.order = list(b1 = NULL,
b2 = NULL,
b3 = c("p11e13d00","p11e12d00", "p11e11d00", "p11e00d00", "p11e21d00", "p11e22d00","p11e23d00"),
b4 = c("p00e00d13", "p00e00d12", "p00e00d11", "p00e00d00", "p00e00d21","p00e00d22","p00e00d23"),
b5 = NULL)
Create a list of plots, one for each block. We do this by splitting df by block. To get the custom ordering, we use factor to set the custom order based on the list b.order.
plist = map2(split(df, df$block), b.order,
~ .x %>% group_by(block) %>%
mutate(item = factor(item, levels=if(is.null(.y)) sort(unique(item)) else .y)) %>%
ggplot(aes(x = item, y = freq)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
facet_grid(.~block, scales = "free") +
coord_cartesian(ylim = c(0, 1), expand = F) + # need to add expanse = F to prevent zooming away
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1),
plot.margin=margin(b=-5)) +
labs(x=""))
Remove y-axis labels, title, and ticks from all but the left-most plot:
plist[2:length(plist)] = plist[2:length(plist)] %>%
map(~ .x + theme(axis.text.y=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank()))
Arrange the plots. We use ggarrange from the egg package in order to ensure that the plot panels all have the same horizontal width. We also need to add the Item label beneath the plot. However, ggarrange prints the plot to the output device, even inside arrangeGrob. So we create the object p, clear the device and then redraw the final plot.
p = arrangeGrob(ggarrange(plots=plist, ncol=length(plist)),
textGrob("Item"), heights=c(20,1))
grid.newpage()
grid.draw(p)

reorder dodge bar plots (ggplot2)

I have a data frame lot_main that looks like this:
I want to make a bar plot with columns reorders according to the wordcount.
library(ggplot2)
library(viridis)
lotr_main %>% ggplot(aes(x = Character, y = wordcount, fill = Film)) +
geom_bar(stat="identity",position = "dodge") +
coord_flip() +
scale_fill_viridis("Film",discrete = TRUE, option = "C")
The plot I got:
What I want is for each character, the bars are reorders with the longest on the top and shortest at the bottom. The orders of the bars don't need to be the same for each character.
You essentially want to fill by one thing, and order by another. A solution is thus to pry them apart and create a separate 'order' variable. Of note, I don't know if sorting your bars by value instead of having the same sequence each 'group' makes your plot more understandable.....
create some data:
library(data.table)
set.seed(123)
dat <- expand.grid(group=LETTERS[1:3],
subgroup=LETTERS[1:3])
dat$value <- runif(nrow(dat))
setDT(dat)
Create the order variable:
dat[,order:=order(value),by=group]
Create the plot
p1 <- ggplot(dat, aes(x=group,y=value, fill=subgroup,group=order))+
geom_bar(aes(group=order),position="dodge", stat="identity") +
coord_flip()
p1
Here's a start. Found data and inspiration here (code below)
LoTRdata <- structure(list(Film = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("The Fellowship Of The Ring",
"The Return Of The King", "The Two Towers"), class = "factor"),
Race = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L,
3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("Elf", "Hobbit",
"Man"), class = "factor"), Gender = structure(c(1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("Female", "Male"), class = "factor"), Words = c(1229L,
971L, 14L, 3644L, 0L, 1995L, 331L, 513L, 0L, 2463L, 401L,
3589L, 183L, 510L, 2L, 2673L, 268L, 2459L)), .Names = c("Film",
"Race", "Gender", "Words"), class = "data.frame", row.names = c(NA,
-18L))
LoTRdataOrder <- LoTRdata[order(LoTRdata$Words, LoTRdata$Film) , ]
# install.packages("ggplot2", dependencies = TRUE)
require(ggplot2)
p <- ggplot(LoTRdataOrder, aes(x = Race, y = Words, fill = Film))
p + geom_bar(stat = "identity", position = "dodge") +
coord_flip() + guides(fill = guide_legend())

Resources