Ordering data to match ggplot font faces - r

I'm currently practicing my R using tidyverse/dpylr library.
So I re-used this example from this post. I try to train with dpylr so i wanted to put value of muni_o bigger than 10 in bold in the y axis. I tried the following code:
pop=structure(list(muni_o = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"
), class = "factor"), muni_d2 = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("0", "1", "2"), class = "factor"), Freq = c(6122.14220014045,
14014.8937908602, 3212.98371738453, 3762.86973933326, 2036.58101043117,
49935.7095757787, 5660.24459378729, 27133.6814011703, 8436.05010435109,
1244.59144510279, 8192.3151623091, 5125.775303556, 5520.96031514796,
971.04877757262, 13042.4320509421, 11677.9344908548, 23179.1486978947,
19342.6692995674, 11480.8298502747, 773.75470216998, 658.84762129725,
47303.8942738045, 12189.6600233976, 34138.2766548561, 2280.85569280618,
6648.00527768305, 13731.4673121252, 6822.03535863008, 1654.5719919344,
3408.63547537664, 8289.59920490436, 68250.529943451, 42919.3498777027,
11304.5876393861, 1371.59278771158, 281250.973114875, 11758.2956764985,
12160.5828034274, 4762.03601855022, 3774.00754913303, 17629.1897496886,
450.28749520739, 11504.661338085, 2732.14923854684, 40180.8293143395,
16562.6710623701, 43865.6430358497, 36647.9484766594, 7035.57607097289,
25651.323071728, 26432.811604918, 23743.6986932767, 597.5643668538,
124644.985934809, 29773.2098362482, 13788.9040158001, 24718.4440592384,
6526.85217011751, 474.70640264902, 5440.01365034166, 29930.306857298,
11072.6889810259, 101099.685991706, 254.95577591974, 11637.2716353329,
5367.72912672273, 2504.81681919538, 97.98256049428, 2258.20425774592,
8475.80585487552, 40564.9077830157, 59907.8345066714, 12858.502494956,
850.24914346085, 0, 14152.3410516667, 41777.9839790016, 2730.8225408641,
13662.2793985781, 46579.248451629, 5035.33799662119, 9167.68431269098,
15713.4248568278, 36391.6173102699, 36523.5128982161, 56158.4213063058,
17666.1932118543, 10319.0225229469, 14744.0900996851, 14519.6971048675,
14341.9635886829, 5193.52143006156, 267285.439969485, 10049.4551896091,
17861.0606322283, 56589.3203766755, 14836.888817694, 6069.4455916734,
18992.3441918275, 52074.0110108799, 88973.5164027747, 82777.2109430964,
2270.54970959312, 12030.813277725, 15414.1038338142, 3284.84133984456,
3101.73291583232, 10020.5318813645, 25286.7675500444, 114919.563601638,
185758.597625183, 28154.2996127091, 2873.4152126078, 3503521.52693064,
49555.0928217366, 35402.2559957372, 7917.49624385274)), class = "data.frame", row.names = c(NA,
-117L))
pop<-mutate(pop, COLORS=case_when(as.numeric(muni_o)>10~ 'bold',TRUE~ 'plain'))
pop %>%
group_by(muni_o) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup() %>%
arrange(desc(muni_d2), prop) %>%
mutate(muni_o = factor(muni_o, levels = unique(muni_o))) %>%
ggplot(aes(muni_o, prop, fill = muni_d2)) +
geom_col() +
coord_flip()+
theme(axis.text.y = element_text(face =pop$COLORS,size=14))
The column I added COLORS seems correct. But I think my COLORS column is not modified to be associated with muni_o (because of the unique function use on muni_o) so the bold/plain order doesn't correspond. Am I doing something wrong?

The issue is that the data frame you use for plotting has a different order than the original data frame pop. Hence the assignment between COLORS and muni_o is messed up. To prevent this save the df after your data wrangling as e.g. pop1 and use pop1$COLORS:
library(dplyr)
library(ggplot2)
pop<-mutate(pop, COLORS=case_when(as.numeric(muni_o)>10~ 'bold',TRUE~ 'plain'))
pop1 <- pop %>%
group_by(muni_o) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup() %>%
arrange(desc(muni_d2), prop) %>%
mutate(muni_o = factor(muni_o, levels = unique(muni_o)))
pop1 %>%
ggplot(aes(muni_o, prop, fill = muni_d2)) +
geom_col() +
coord_flip()+
theme(axis.text.y = element_text(face =pop1$COLORS,size=14))
#> Warning: Vectorized input to `element_text()` is not officially supported.
#> Results may be unexpected or may change in future versions of ggplot2.

Related

add correlation coefficient and CI values in a boxplot in R

I am trying to make a boxplot with the correlation coefficient and their CI values in a boxplot in R. However when I try to add the following line of code it does not work nor gives it a error code.
boxplot(AI~Q8,
data=df6,
main="The relation between Q8 and the A",
xlab="A",
ylab="B",
col="orange",
border="brown",
stat_cor()
)
or
boxplot(AI~Q8,
data=df6,
main="The relation between Q8 and the A",
xlab="A",
ylab="B",
col="orange",
border="brown",
) + stat_cor()
And is it also possible to get a different color for each boxplot?
This is the dataset I use
structure(list(AI = c(0.659967433444017, 0.941802575478176, 0.565824387077681,
0.733813835498287, 0.502486567259441, 0.581214986043292, 0.190601573198807,
0.61511194322592, 0.630316833066587, 0.513634604352834, 0.691766098799664,
0.443331648025045, 0.475498746385683, 0.253183014637901, 0.685340877692643,
1.07517098753888, 1.10219598244924, 0.469261733415629, 0.473732071653954,
1.11472099520751, 1.12140547685593, 0.844644528419478, 0.741480217894283,
0.664326042816726, 0.798610418245564, 0.184662871961999, 0.718894350907626,
0.773529667226157, 0.65593386028412, 0.958431317152659, 0.638301596431948,
0.844678953483002, 1.12048871720509, 1.21583405287684, 0.642731559824528,
0.720376072993178, 0.53551579775883, 0.612011376983417, 0.281431655977777,
1.25176573918925, 1.15328375538737, 1.20957695840318, 1.09278028083012,
0.859636858723266, 1.20869790596587, 1.12288309438874, 0.891306451574103,
0.552552119084953, 0.995259412720299, 1.17674596084747), Q8 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 2L, 2L, 2L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 33L, 34L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L), class = "data.frame")
You can add correlation in a legend, colors may be specified as vectors.
clr <- hcl.colors(length(unique(df6$Q8)), alpha=.5)
blr <- hcl.colors(length(unique(df6$Q8)), alpha=1)
boxplot(AI ~ Q8,data=df6, main="The relation between Q8 and the A",
xlab="A", ylab="B", col=clr, border=blr)
corr <- with(df6, cor.test(AI, Q8))[c('estimate', 'conf.int')]
legend('topleft',
legend=bquote(rho ==.(signif(corr[[1]], 2))~'['*.(signif(corr[[2]][1], 2))*','~.(signif(corr[[2]][2], 2))*']'),
bty='n')

Conditionally replace values of multiple columns, from values of other multiple columns

Suppose I have this dataset:
set.seed (1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),
c=rep(c("d","e","f"),20)) %>% head()
Then I want to add many columns (in this example I only added two), to identify distinct cases between each group (in this case, column "a").
set.seed(1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),c=rep(c("d","e","f"),20)) %>%
group_by(a) %>% dplyr::mutate_at(vars(c(b,c)), .funs= list(dups_hash_ing= ~n_distinct(.)))
This code leaves the following dataset:
If I set the dataset with dput, the outcome is
structure(list(a = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("no", "si"), class = "factor"), b = structure(c(22L,
1L, 51L, 34L, 50L, 57L, 53L, 10L, 47L, 3L, 11L, 23L, 15L, 38L,
58L, 39L, 41L, 17L, 28L, 21L, 37L, 45L, 29L, 46L, 32L, 48L, 56L,
52L, 26L, 19L, 35L, 8L, 55L, 20L, 9L, 36L, 2L, 12L, 6L, 42L,
49L, 43L, 59L, 54L, 31L, 13L, 60L, 44L, 14L, 30L, 7L, 5L, 16L,
27L, 33L, 18L, 24L, 4L, 25L, 40L), .Label = c("-0.0997905884418961",
"-0.151736536534977", "-0.198416273822079", "-0.254874652654534",
"-0.274704218225806", "-0.304721068966714", "-0.324393300483657",
"-0.400235237343163", "-0.415751788401515", "-0.50873701541522",
"-0.538070788884863", "-0.60615111526422", "-0.659770093821306",
"-0.684320344136007", "-0.789646852263761", "-0.933503340589868",
"-0.965903210133575", "-1.07754212275943", "-1.11444896479736",
"-1.60708093984972", "-2.07823754188738", "-2.7322195229558",
"-2.85575865501923", "-3.23315213292314", "0.0295178303214797",
"0.0326639575014441", "0.116845344986082", "0.162654708118265",
"0.185513915583057", "0.186492083080971", "0.287709728313787",
"0.311681028661359", "0.319160238648117", "0.413868915451097",
"0.418057822385083", "0.42200837321742", "0.485226820569252",
"0.487814635163685", "0.500694614280786", "0.594273774110513",
"0.62021020366732", "0.629536099884472", "0.660212631820405",
"0.677415500438328", "0.696768778564913", "0.700733515544461",
"0.704180178465512", "0.760462361967838", "0.895171980275539",
"0.912322161610113", "0.976031734922396", "1.1123628412626",
"1.16910851401363", "1.17349757263239", "1.49349310261748", "1.84246362620766",
"1.98373220068438", "2.16803253951933", "2.27348352044748", "2.91914013071762"
), class = "factor"), c = structure(c(1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("d", "e", "f"), class = "factor"),
a_dups_hash_ing = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), b_dups_hash_ing = c(30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), c_dups_hash_ing = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), groups = structure(list(
a = structure(1:2, .Label = c("no", "si"), class = "factor"),
.rows = list(c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L,
22L, 24L, 26L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L,
46L, 48L, 50L, 52L, 54L, 56L, 58L, 60L), c(1L, 3L, 5L, 7L,
9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L, 29L, 31L,
33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L, 55L,
57L, 59L))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
What I need to do, is replace, column by column, if the number of distinct cases is more than one per group, with the value of the original column. I have to do this for more than 50 columns. An example of this will be provided for only one column with mutate:
dplyr::mutate(b_dups_hash_ing= ifelse(>1,b,0))
I need to repeat the code provided above for many variables. This is very similar to a mutate_at (words in brackets is what I would do). The following example does not work, but is something I would do in an ideal world, just for your better understanding of my problem.
dplyr::mutate_at(vars(contains('_dups_hash_ing')), .funs = list(~ifelse(.>1,vars([original]),0)))
Is this what you're looking for?
df %>% dplyr::mutate_at(vars(contains('_dups_hash_ing')), ~ ifelse(. > 1, ., 0)) %>% head
#> # A tibble: 6 x 6
#> # Groups: a [2]
#> a b c a_dups_hash_ing b_dups_hash_ing c_dups_hash_ing
#> <fct> <fct> <fct> <dbl> <int> <int>
#> 1 si -2.7322195229558 d 0 30 3
#> 2 no -0.09979058844189… e 0 30 3
#> 3 si 0.976031734922396 f 0 30 3
#> 4 no 0.413868915451097 d 0 30 3
#> 5 si 0.912322161610113 e 0 30 3
#> 6 no 1.98373220068438 f 0 30 3

Weekday Factor displaying alphabetically

I'm trying to create boxplots using the code below. I've also included sample data. It's time-series data, I have a date-time field, and I've created a factor for Weekday. The problem I have is that my Weekdays when displayed in the boxplot are out of order. I think it's doing it alphabetically instead of chronologically. Does anyone know how to fix this? Also I'd like to rotate the x-axis labels like 45 degrees, so they'll all fit, and have them all displayed.
Code:
boxplot(OrderCnt ~ Weekday, data=icartdf_factor, main="Orders vs Weekday", xlab="Weekday", ylab="Orders")
Sample Data:
dput(droplevels(icartdf_factor[1:50,]))
structure(list(OrderCnt = c(1L, 1L, 0L, 0L, 0L, 2L, 5L, 12L,
16L, 30L, 27L, 21L, 23L, 27L, 37L, 36L, 35L, 30L, 27L, 17L, 8L,
2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 6L, 9L, 19L, 28L, 27L, 28L,
22L, 29L, 41L, 35L, 43L, 42L, 27L, 16L, 10L, 2L, 1L, 0L, 0L,
0L), DateTime = structure(c(1398931200, 1398934800, 1398938400,
1398942000, 1398945600, 1398949200, 1398952800, 1398956400, 1398960000,
1398963600, 1398967200, 1398970800, 1398974400, 1398978000, 1398981600,
1398985200, 1398988800, 1398992400, 1398996000, 1398999600, 1399003200,
1399006800, 1399010400, 1399014000, 1399017600, 1399021200, 1399024800,
1399028400, 1399032000, 1399035600, 1399039200, 1399042800, 1399046400,
1399050000, 1399053600, 1399057200, 1399060800, 1399064400, 1399068000,
1399071600, 1399075200, 1399078800, 1399082400, 1399086000, 1399089600,
1399093200, 1399096800, 1399100400, 1399104000, 1399107600), class = c("POSIXct",
"POSIXt")), Weekday = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Friday",
"Saturday", "Thursday"), class = "factor"), hourcol = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L), .Label = c("00", "01",
"02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"
), class = "factor")), .Names = c("OrderCnt", "DateTime", "Weekday",
"hourcol"), row.names = c(NA, 50L), class = "data.frame")
You just need to specify the desired order within the factor itself.
icartdf_factor$Weekday = factor(icartdf_factor$Weekday,
levels = c("Thursday", "Friday", "Saturday"))
boxplot(OrderCnt ~ Weekday, data=icartdf_factor,
main="Orders vs Weekday", xlab="Weekday", ylab="Orders")
To change the angle of the labels, take a look at How can I change the angle of the value labels on my axes?

R codes for hypothesis test in Rmarkdown

I am really struggling on the coding part for the R markdown but have no one to ask...
The data I am working on is, dput(survey):
structure(list(Time = structure(c(5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 1L, 2L, 3L, 4L, 35L, 42L, 46L, 30L, 31L,
33L, 34L, 29L, 36L, 37L, 38L, 39L, 40L, 41L, 43L, 44L, 45L, 47L,
48L, 32L, 54L, 55L, 50L, 49L, 51L, 52L, 53L, 57L, 59L, 56L, 60L,
61L, 58L, 62L), .Label = c("2017/08/06 10:25:01 PM GMT+10", "2017/08/06 10:26:54 PM GMT+10",
"2017/08/06 10:38:13 PM GMT+10", "2017/08/06 10:51:58 PM GMT+10",
"2017/08/06 4:53:07 PM GMT+10", "2017/08/06 4:58:44 PM GMT+10",
"2017/08/06 5:01:05 PM GMT+10", "2017/08/06 5:03:25 PM GMT+10",
"2017/08/06 5:04:50 PM GMT+10", "2017/08/06 5:06:51 PM GMT+10",
"2017/08/06 5:06:54 PM GMT+10", "2017/08/06 5:10:57 PM GMT+10",
"2017/08/06 5:11:16 PM GMT+10", "2017/08/06 5:18:21 PM GMT+10",
"2017/08/06 5:23:46 PM GMT+10", "2017/08/06 5:34:02 PM GMT+10",
"2017/08/06 5:43:10 PM GMT+10", "2017/08/06 5:54:52 PM GMT+10",
"2017/08/06 6:04:06 PM GMT+10", "2017/08/06 7:11:00 PM GMT+10",
"2017/08/06 7:13:21 PM GMT+10", "2017/08/06 7:32:45 PM GMT+10",
"2017/08/06 7:33:58 PM GMT+10", "2017/08/06 7:50:31 PM GMT+10",
"2017/08/06 8:02:07 PM GMT+10", "2017/08/06 8:28:39 PM GMT+10",
"2017/08/06 8:36:46 PM GMT+10", "2017/08/06 9:14:14 PM GMT+10",
"2017/08/07 1:59:14 PM GMT+10", "2017/08/07 10:28:13 AM GMT+10",
"2017/08/07 11:05:40 AM GMT+10", "2017/08/07 11:44:09 PM GMT+10",
"2017/08/07 12:18:04 PM GMT+10", "2017/08/07 12:49:27 PM GMT+10",
"2017/08/07 12:55:41 AM GMT+10", "2017/08/07 2:04:49 PM GMT+10",
"2017/08/07 2:14:56 PM GMT+10", "2017/08/07 2:17:10 PM GMT+10",
"2017/08/07 4:47:38 PM GMT+10", "2017/08/07 4:57:15 PM GMT+10",
"2017/08/07 7:08:44 PM GMT+10", "2017/08/07 9:12:16 AM GMT+10",
"2017/08/07 9:18:11 PM GMT+10", "2017/08/07 9:22:59 PM GMT+10",
"2017/08/07 9:23:43 PM GMT+10", "2017/08/07 9:32:10 AM GMT+10",
"2017/08/07 9:46:41 PM GMT+10", "2017/08/07 9:55:01 PM GMT+10",
"2017/08/08 1:36:16 PM GMT+10", "2017/08/08 10:27:59 AM GMT+10",
"2017/08/08 3:36:15 PM GMT+10", "2017/08/08 4:15:12 PM GMT+10",
"2017/08/08 6:39:28 PM GMT+10", "2017/08/08 8:44:38 AM GMT+10",
"2017/08/08 9:03:07 AM GMT+10", "2017/08/09 1:00:16 PM GMT+10",
"2017/08/09 10:17:55 AM GMT+10", "2017/08/09 10:26:28 PM GMT+10",
"2017/08/09 11:50:50 AM GMT+10", "2017/08/09 3:02:39 PM GMT+10",
"2017/08/09 9:48:19 PM GMT+10", "2017/08/10 7:32:00 AM GMT+10"
), class = "factor"), ID = structure(c(48L, 57L, 38L, 9L, 8L,
42L, 41L, 58L, 31L, 27L, 60L, 34L, 13L, 37L, 40L, 29L, 53L, 28L,
16L, 20L, 47L, 18L, 51L, 3L, 36L, 10L, 32L, 11L, 54L, 22L, 61L,
15L, 35L, 2L, 25L, 55L, 17L, 5L, 14L, 21L, 49L, 45L, 6L, 30L,
26L, 4L, 19L, 50L, 44L, 56L, 43L, 59L, 24L, 12L, 52L, 23L, 1L,
39L, 7L, 62L, 46L, 33L), .Label = c("1907", "3456", "450181964",
"460061490", "A", "ABCABCABC", "adsad", "affordance", "alexxx",
"AliceJ", "blueberry11", "Bob", "byue7515", "Cameron Nichols",
"Coelacanth", "crocophile", "Donald trump ", "DS2012-LB-S", "Gir",
"goly", "Grace", "greyshirt", "grob6576", "hahahahaha", "Harry",
"Insidestella", "ja150", "jane", "Jiashu Wu", "jmc", "Joohee0214",
"kakinna", "Kimbo Slice", "lhar7524", "lizebin", "Lucy", "Magician1213",
"Matchey", "md123", "mia", "MP", "N52981227", "Nattt", "Pete",
"rcon", "Ryan_eats_p-values", "S123", "Salmon ", "smarcon", "smile",
"snail", "sonja kay", "Thelimitdoesnotexist", "Toflin", "Tony Stark ",
"UriLover420", "valerie", "Whatzup", "Winky", "xwn19960829",
"zilu2637", "ZXFAARON"), class = "factor"), Gender = structure(c(3L,
2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L,
2L, 1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("crocodilian",
"Female", "Male", "Poisson"), class = "factor"), Postcode =structure(c(12L,
30L, 20L, 35L, 28L, 33L, 13L, 22L, 12L, 2L, 3L, 38L, 25L, 13L,
4L, 23L, 19L, 23L, 29L, 32L, 26L, 4L, 14L, 4L, 36L, 12L, 3L,
41L, 28L, 40L, 24L, 9L, 37L, 4L, 3L, 17L, 32L, 27L, 15L, 36L,
12L, 11L, 3L, 7L, 4L, 10L, 39L, 24L, 42L, 8L, 12L, 13L, 5L, 6L,
31L, 20L, 1L, 34L, 18L, 13L, 21L, 16L), .Label = c("14052", "2000",
"2007", "2008", "2020", "2021", "2022", "2026", "2031", "2037",
"2041", "2042", "2050", "2066", "2069", "2074", "2097", "2112",
"2117", "2131", "2134", "2136", "2137", "2138", "2140", "2144",
"2154", "2165", "2166", "2171", "2193", "2200", "2205", "2209",
"2216", "2220", "2228", "2756", "2762", "2765", "2780", "sydney"
), class = "factor"), StatsCourse = structure(c(4L, 4L, 4L, 4L,
4L, 4L, 1L, 4L, 4L, 4L, 3L, 4L, 4L, 5L, 4L, 4L, 5L, 6L, 4L, 4L,
4L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 2L, 5L, 4L, 5L, 6L, 4L, 2L,
2L, 4L, 4L, 5L, 7L, 5L, 1L, 4L, 4L, 4L), .Label = c("", "BUSS1020",
"MATH1001,MATH1002", "MATH1005", "MATH1015", "MATH1905", "none"
), class = "factor"), Clubs = structure(c(1L, 1L, 4L, 5L, 4L,
2L, 4L, 4L, 2L, 4L, 7L, 2L, 4L, 4L, 1L, 4L, 1L, 4L, 1L, 1L, 6L,
1L, 4L, 1L, 11L, 4L, 5L, 10L, 3L, 5L, 2L, 4L, 1L, 1L, 2L, 1L,
4L, 4L, 4L, 6L, 2L, 2L, 4L, 4L, 9L, 4L, 1L, 8L, 2L, 4L, 2L, 6L,
4L, 4L, 11L, 5L, 1L, 1L, 1L, 4L, 4L, 1L), .Label = c("0", "1",
"10+", "2", "3", "4", "5", "6", "7", "none", "None"), class = "factor"),
StudyTime = structure(c(24L, 3L, 26L, 27L, 17L, 2L, 10L,
14L, 23L, 7L, 19L, 3L, 17L, 29L, 23L, 22L, 10L, 10L, 28L,
23L, 6L, 14L, 20L, 7L, 17L, 28L, 5L, 16L, 20L, 3L, 21L, 3L,
23L, 7L, 17L, 10L, 1L, 18L, 10L, 17L, 10L, 7L, 13L, 5L, 15L,
3L, 8L, 17L, 19L, 17L, 3L, 30L, 31L, 1L, 4L, 3L, 20L, 9L,
14L, 11L, 12L, 25L), .Label = c("0", "05-Jun", "10", "11",
"12", "14", "15", "17", "2", "20", "20-24", "20-25?", "24",
"25", "28", "28 hours", "30", "31", "35", "4", "40", "49",
"5", "50", "6", "7", "70", "8", "8hr", "didn't start uni maybe 6h",
"not sure"), class = "factor"), StudyLoad = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("", "full-time", "part-time"), class = "factor"),
SocialMedia = structure(c(1L, 5L, 1L, 1L, 1L, 7L, 1L, 1L,
7L, 7L, 2L, 1L, 2L, 1L, 1L, 8L, 6L, 2L, 1L, 7L, 1L, 4L, 1L,
8L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 7L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 5L, 5L, 1L, 1L, 2L, 2L,
1L, 3L, 1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("Facebook",
"Instragram", "none! (really)", "reddit", "Snapchat", "Tumblr",
"Twitter", "WeChat"), class = "factor"), Siblings = structure(c(2L,
4L, 4L, 1L, 4L, 1L, 2L, 4L, 5L, 2L, 1L, 2L, 2L, 1L, 4L, 1L,
1L, 4L, 2L, 2L, 8L, 2L, 2L, 3L, 1L, 1L, 2L, 5L, 2L, 7L, 1L,
4L, 2L, 6L, 1L, 6L, 2L, 5L, 1L, 1L, 4L, 4L, 2L, 2L, 1L, 2L,
1L, 1L, 4L, 4L, 2L, 9L, 1L, 2L, 10L, 2L, 4L, 2L, 2L, 1L,
2L, 2L), .Label = c("0", "1", "165", "2", "3", "4", "5",
"6", "none", "one"), class = "factor"), FBFriends = structure(c(49L,
43L, 6L, 3L, 28L, 2L, 9L, 13L, 21L, 19L, 30L, 40L, 37L, 20L,
35L, 32L, 53L, 47L, 30L, 22L, 8L, 45L, 14L, 15L, 38L, 16L,
45L, 31L, 35L, 43L, 34L, 23L, 52L, 18L, 34L, 27L, 33L, 11L,
42L, 24L, 51L, 26L, 17L, 50L, 39L, 19L, 10L, 12L, 4L, 44L,
46L, 29L, 45L, 36L, 54L, 20L, 7L, 5L, 41L, 25L, 1L, 48L), .Label = c("~300",
"10", "100", "1000", "1127", "115", "1192", "12", "120",
"121", "130", "148", "150", "1583", "165", "170", "174",
"190", "200", "213", "228", "229", "235", "240", "242", "256",
"259", "263", "27", "300", "308", "31", "382", "40", "400",
"431", "470", "5", "540", "548", "57", "572", "600", "664",
"700", "724", "800", "850", "90", "936", "978", "do not know",
"Don't have FB", "none (not in facebook)"), class = "factor"),
Grade = structure(c(18L, 19L, 11L, 31L, 33L, 14L, 22L, 18L,
6L, 9L, 19L, 18L, 22L, 23L, 24L, 30L, 28L, 16L, 2L, 14L,
3L, 12L, 21L, 2L, 12L, 12L, 6L, 29L, 12L, 27L, 17L, 6L, 12L,
17L, 17L, 15L, 24L, 20L, 7L, 14L, 12L, 10L, 22L, 34L, 24L,
17L, 16L, 12L, 24L, 32L, 26L, 25L, 26L, 13L, 4L, 12L, 1L,
5L, 12L, 8L, 24L, 35L), .Label = c("2.8", "50", "50-60",
"54", "6.25", "60", "61", "61.5", "62", "63", "64", "65",
"65.9", "66", "68", "69", "70", "72", "73", "73.2", "73.4",
"74", "74.6", "75", "8.7", "80", "82", "82.4", "83.2", "87",
"90", "90.1", "90.5", "91", "D"), class = "factor"), Pet = structure(c(3L,
2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 3L,
2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L,
3L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L,
3L), .Label = c("", "No", "Yes"), class = "factor"), Home = structure(c(2L,
3L, 3L, 1L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 3L,
3L), .Label = c("", "No", "Yes"), class = "factor"), ExerciseTime = structure(c(10L,
12L, 7L, 1L, 4L, 7L, 7L, 5L, 7L, 12L, 13L, 5L, 10L, 7L, 15L,
15L, 10L, 10L, 5L, 14L, 2L, 9L, 4L, 5L, 7L, 4L, 14L, 8L,
10L, 13L, 1L, 13L, 1L, 13L, 13L, 5L, 7L, 16L, 16L, 14L, 10L,
14L, 7L, 6L, 12L, 10L, 10L, 13L, 13L, 14L, 7L, 11L, 2L, 2L,
17L, 16L, 7L, 7L, 2L, 3L, 13L, 15L), .Label = c("", "0",
"05-Jun", "1", "10", "12", "2", "2 hours", "20", "3", "3.5",
"4", "5", "6", "7", "8", "none"), class = "factor"), Eyecolor = structure(c(9L,
7L, 5L, 1L, 8L, 2L, 8L, 3L, 3L, 8L, 3L, 7L, 7L, 7L, 7L, 7L,
3L, 4L, 7L, 3L, 11L, 8L, 11L, 2L, 8L, 2L, 2L, 2L, 8L, 7L,
1L, 7L, 2L, 7L, 3L, 4L, 10L, 7L, 8L, 7L, 7L, 6L, 7L, 3L,
8L, 2L, 8L, 7L, 4L, 8L, 9L, 3L, 7L, 5L, 7L, 8L, 12L, 7L,
7L, 8L, 3L, 8L), .Label = c("", "black", "Black", "blue",
"Blue", "Blue/Green", "brown", "Brown", "Brown ", "Brown/black",
"dark brown", "grey"), class = "factor"), Working = structure(c(2L,
8L, 2L, 1L, 4L, 2L, 2L, 8L, 2L, 24L, 2L, 13L, 5L, 3L, 26L,
2L, 8L, 13L, 24L, 2L, 12L, 2L, 9L, 8L, 2L, 2L, 2L, 11L, 2L,
10L, 1L, 4L, 21L, 2L, 2L, 15L, 14L, 21L, 26L, 18L, 4L, 2L,
7L, 27L, 12L, 2L, 20L, 2L, 19L, 25L, 8L, 2L, 2L, 17L, 23L,
16L, 2L, 6L, 2L, 13L, 13L, 22L), .Label = c("", "0", "1.5",
"10", "11", "12", "14", "15", "17", "18", "18 hours", "2",
"20", "24", "25", "26", "3", "3.5", "30", "38", "4", "40",
"44", "5", "6", "7", "8"), class = "factor"), Season = structure(c(2L,
3L, 2L, 1L, 5L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 3L, 4L, 3L,
3L, 3L, 3L, 5L, 3L, 3L, 2L, 5L, 5L, 4L, 2L, 2L, 5L, 2L, 3L,
2L, 2L, 3L, 2L, 4L, 2L, 3L, 5L, 3L, 4L, 5L, 3L, 4L, 4L, 4L,
3L, 4L, 4L, 4L, 3L, 2L, 2L, 2L, 3L, 4L, 4L, 3L, 2L, 4L, 4L,
3L), .Label = c("", "Autumn", "Spring", "Summer", "Winter"
), class = "factor")), .Names = c("Time", "ID", "Gender",
"Postcode", "StatsCourse", "Clubs", "StudyTime", "StudyLoad",
"SocialMedia", "Siblings", "FBFriends", "Grade", "Pet", "Home",
"ExerciseTime", "Eyecolor", "Working", "Season"), class = "data.frame", row.names = c(NA,
-62L))
And what I did so far is,
library(dplyr)
library(ggplot2)
library(tidyr)
library(knitr)
survey <- read.csv("STAT2012Survey.csv")
colnames(survey)
oldname = colnames(survey)
newname = c("Time", "ID", "Gender", "Postcode", "StatsCourse", "Clubs", "StudyTime",
"StudyLoad", "SocialMedia", "Siblings", "FBFriends", "Grade", "Pet", "Home",
"ExerciseTime", "Eyecolor", "Working", "Season")
colnames(survey) = newname
What I want to achieve is, I want to provide a hypothesis test about
"Is there any evidence that there is difference in exercise time between males and females?"
To do this, I need to get the mean and standard deviation sort of that stuffs in order to test the two-sample t-test but I do not know how to approach to it
Also, to visualize the data with graph, I tried,
ggplot(survey, aes(x = Gender, y = ExerciseTime, fill = Gender)) + geom_boxplot()
however it only showed some strange graph. I think it is because the "ExerciesTime" variable is not numeric, but I am stuck on it as well since ggplot2 does not deal with the data of class numeric...
Someone please help me...! I want to make more hypothesis tests towards multiple questions but I am stuck on the first question... I might be able to achieve the goal if I know how to do the first one! Thanks.
Before you can make a boxplot, you will need to make ExerciseTime a numeric variable. The problem you will have with that is some of the responses don't easily turn numeric (2 hours, for example, should probably be 2, but it will require an extra step to get rid of the text).
As a start, though, let's just do the easiest case of take anything that isn't a natural number and let it change to a missing value.
survey2 <-
survey %>%
mutate(ExerciseTime = as.character(ExerciseTime),
ExerciseTime = str_replace(ExerciseTime, "\\d{2}-\\w{3}", ""),
ExerciseTime = str_extract(ExerciseTime, "\\d{1,2}"),
ExerciseTime = as.numeric(ExerciseTime))
ggplot(data = survey,
mapping = aes(x = Gender,
y = ExerciseTime,
fill = Gender)) +
geom_boxplot()

R ggplot geom_bar facet dodge

I'm having some trouble producing a faceted bar_plot in ggplot2. Perhaps it is something very obvious, but I can't figure it out:( I've the following dataset:
structure(list(COUNTRY = structure(c(1L, 4L, 7L, 10L, 13L, 16L,
19L, 2L, 5L, 8L, 11L, 14L, 17L, 20L, 3L, 6L, 9L, 12L, 15L, 18L,
2L, 5L, 8L, 11L, 14L, 17L, 20L, 3L, 6L, 9L, 12L, 15L, 18L, 1L,
4L, 7L, 10L, 13L, 16L, 19L, 3L, 6L, 9L, 12L, 15L, 18L, 1L, 4L,
7L, 10L, 13L, 16L, 19L, 2L, 5L, 8L, 11L, 14L, 17L, 20L), .Label = c("Angola",
"Botswana", "Burundi", "Comoros", "Eritrea", "Ethiopia", "Kenya",
"Lesotho", "Madagascar", "Malawi", "Mozambique", "Namibia", "Rwanda",
"Somalia", "South Africa", "Swaziland", "Tanzania", "Uganda",
"Zambia", "Zimbabwe"), class = "factor"), Year = structure(c(2L,
2L, 14L, 16L, 16L, 11L, 12L, 2L, 4L, 15L, 5L, 10L, 16L, 16L,
2L, 17L, 14L, 11L, 12L, 10L, 2L, 4L, 15L, 5L, 10L, 16L, 16L,
2L, 17L, 14L, 11L, 12L, 10L, 2L, 2L, 14L, 16L, 16L, 11L, 12L,
2L, 17L, 14L, 11L, 12L, 10L, 2L, 2L, 14L, 16L, 16L, 11L, 12L,
2L, 4L, 15L, 5L, 10L, 16L, 16L), .Label = c("1998", "2000", "2001/2",
"2002", "2003", "2003/4", "2004", "2005", "2005/6", "2006", "2006/7",
"2007", "2007/8", "2008/9", "2009", "2010", "2011"), class = "factor"),
sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L), .Label = c("m", "f", "b"), class = "factor"),
location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Urban", "Rural", "Total",
"Capital.City", "Other.Cities.towns", "Urban.Non.slum", "Urban.Slum"
), class = "factor"), percent = c(60.4, 42.3, 85.4919452426806,
96.3, 90.2847535659154, 87.7347421555771, 87.7323067592087,
80.4, 80.6, 93.8186266493188, 75.0109418832216, 36.8, 87.1059275774722,
90.1216932603937, 66.8, 83.6279398931798, 89.690685909038,
88.8207941092749, 94.6139558774441, 88.0251085200726, 70.4,
54.7, 86.1919805548309, 56.9792710715853, 13.1, 75.6355555697382,
86.8196674671991, 42.5, 61.9452522893308, 77.597285694676,
88.3453320625631, 94.5192341778471, 80.6271302923487, 44.1,
29, 77.8542469357068, 90, 86.7073851186482, 83.8921034867784,
76.4094871587916, 49.3, 63.952805392032, 77.004884485532,
88.6723566877386, 93.9560433940531, 82.3095948307742, 56.1,
31.1, 80.0235653889704, 91.5, 88.3809682134183, 85.5656196766576,
80.0539027063387, 77, 61.2, 89.2538966046165, 59.6756344409838,
23, 79.6749544074645, 86.9507859695728)), .Names = c("COUNTRY",
"Year", "sex", "location", "percent"), row.names = c(1L, 4L,
7L, 10L, 13L, 16L, 19L, 22L, 25L, 28L, 31L, 34L, 37L, 40L, 43L,
46L, 49L, 52L, 55L, 58L, 62L, 65L, 68L, 71L, 74L, 77L, 80L, 83L,
86L, 89L, 92L, 95L, 98L, 101L, 104L, 107L, 110L, 113L, 116L,
119L, 123L, 126L, 129L, 132L, 135L, 138L, 141L, 144L, 147L, 150L,
153L, 156L, 159L, 162L, 165L, 168L, 171L, 174L, 177L, 180L), class = "data.frame")
I am trying to make a bar_plot which shows the percentage of people living in rural, urban areas (and the average) for a number of countries, and wish to show this split by gender. I can plot one of these categories on a simple bar plot by using a subset call within the ggplot function as follows:
ggplot(edu_melt[c(edu_melt$sex!="b" & edu_melt$location==c("Urban")), ], aes(x=COUNTRY, y=percent, fill=sex)) + geom_bar(position="dodge", width=0.5) + facet_grid(~location) + labs(x="Country") + theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1))
I would however like to compare the data across the location (e.g. urban, rural, and both). I thought this would be a simple case of introducing a facet_wrap call, however I get some odd behaviour where the data is plotted across the three facets - I would expect 20 pairs of bars on each facet, however this code produces 20 pairs of bars spread over the three facets?!
ggplot(edu_melt_over[c(edu_melt_over$sex!="b"),], aes(x=COUNTRY, y=percent, fill=sex)) + geom_bar(position="dodge", width=0.5, space=1) + facet_wrap(~location, nrow=3) + labs(x="Country", title="Proportion Net Primary School Enrolement in ESA") + theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1))
I'm not sure why this is happening, but have searched for hints and tips and tried a number of approaches, but get the same result. Anybody have any idea how I could produce this plot?
Thanks
Marty
Your data looks odd as you don't seem to have any combinations of male and female in the same strata (e.g. Angola has a male urban percent but no female). This is the data not the plotting.
ggplot(edu_melt[edu_melt$sex!="b", ], aes(x=COUNTRY, y=percent, fill=sex)) +
geom_bar(position="dodge", width=0.25) + facet_grid(location~.) + labs(x="Country") +
theme(axis.text.x = element_text(angle=30))

Resources