Related
I'm currently practicing my R using tidyverse/dpylr library.
So I re-used this example from this post. I try to train with dpylr so i wanted to put value of muni_o bigger than 10 in bold in the y axis. I tried the following code:
pop=structure(list(muni_o = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"
), class = "factor"), muni_d2 = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("0", "1", "2"), class = "factor"), Freq = c(6122.14220014045,
14014.8937908602, 3212.98371738453, 3762.86973933326, 2036.58101043117,
49935.7095757787, 5660.24459378729, 27133.6814011703, 8436.05010435109,
1244.59144510279, 8192.3151623091, 5125.775303556, 5520.96031514796,
971.04877757262, 13042.4320509421, 11677.9344908548, 23179.1486978947,
19342.6692995674, 11480.8298502747, 773.75470216998, 658.84762129725,
47303.8942738045, 12189.6600233976, 34138.2766548561, 2280.85569280618,
6648.00527768305, 13731.4673121252, 6822.03535863008, 1654.5719919344,
3408.63547537664, 8289.59920490436, 68250.529943451, 42919.3498777027,
11304.5876393861, 1371.59278771158, 281250.973114875, 11758.2956764985,
12160.5828034274, 4762.03601855022, 3774.00754913303, 17629.1897496886,
450.28749520739, 11504.661338085, 2732.14923854684, 40180.8293143395,
16562.6710623701, 43865.6430358497, 36647.9484766594, 7035.57607097289,
25651.323071728, 26432.811604918, 23743.6986932767, 597.5643668538,
124644.985934809, 29773.2098362482, 13788.9040158001, 24718.4440592384,
6526.85217011751, 474.70640264902, 5440.01365034166, 29930.306857298,
11072.6889810259, 101099.685991706, 254.95577591974, 11637.2716353329,
5367.72912672273, 2504.81681919538, 97.98256049428, 2258.20425774592,
8475.80585487552, 40564.9077830157, 59907.8345066714, 12858.502494956,
850.24914346085, 0, 14152.3410516667, 41777.9839790016, 2730.8225408641,
13662.2793985781, 46579.248451629, 5035.33799662119, 9167.68431269098,
15713.4248568278, 36391.6173102699, 36523.5128982161, 56158.4213063058,
17666.1932118543, 10319.0225229469, 14744.0900996851, 14519.6971048675,
14341.9635886829, 5193.52143006156, 267285.439969485, 10049.4551896091,
17861.0606322283, 56589.3203766755, 14836.888817694, 6069.4455916734,
18992.3441918275, 52074.0110108799, 88973.5164027747, 82777.2109430964,
2270.54970959312, 12030.813277725, 15414.1038338142, 3284.84133984456,
3101.73291583232, 10020.5318813645, 25286.7675500444, 114919.563601638,
185758.597625183, 28154.2996127091, 2873.4152126078, 3503521.52693064,
49555.0928217366, 35402.2559957372, 7917.49624385274)), class = "data.frame", row.names = c(NA,
-117L))
pop<-mutate(pop, COLORS=case_when(as.numeric(muni_o)>10~ 'bold',TRUE~ 'plain'))
pop %>%
group_by(muni_o) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup() %>%
arrange(desc(muni_d2), prop) %>%
mutate(muni_o = factor(muni_o, levels = unique(muni_o))) %>%
ggplot(aes(muni_o, prop, fill = muni_d2)) +
geom_col() +
coord_flip()+
theme(axis.text.y = element_text(face =pop$COLORS,size=14))
The column I added COLORS seems correct. But I think my COLORS column is not modified to be associated with muni_o (because of the unique function use on muni_o) so the bold/plain order doesn't correspond. Am I doing something wrong?
The issue is that the data frame you use for plotting has a different order than the original data frame pop. Hence the assignment between COLORS and muni_o is messed up. To prevent this save the df after your data wrangling as e.g. pop1 and use pop1$COLORS:
library(dplyr)
library(ggplot2)
pop<-mutate(pop, COLORS=case_when(as.numeric(muni_o)>10~ 'bold',TRUE~ 'plain'))
pop1 <- pop %>%
group_by(muni_o) %>%
mutate(prop = Freq / sum(Freq)) %>%
ungroup() %>%
arrange(desc(muni_d2), prop) %>%
mutate(muni_o = factor(muni_o, levels = unique(muni_o)))
pop1 %>%
ggplot(aes(muni_o, prop, fill = muni_d2)) +
geom_col() +
coord_flip()+
theme(axis.text.y = element_text(face =pop1$COLORS,size=14))
#> Warning: Vectorized input to `element_text()` is not officially supported.
#> Results may be unexpected or may change in future versions of ggplot2.
I'm trying to re order my datas, I already found the code to use but it doesn't seem to work...
Can you help me please ?
This is my code :
#code for my boxplot
dat.m2 <- melt(H1,id.vars='fusion', measure.vars=c('FF','FM'))
dat.m2 <- melt(H1,id.vars='fusion', measure.vars=c('FF','FM'))
ggplot(dat.m2)+ geom_boxplot(aes(x=fusion, y=value, colour=variable))+ facet_wrap(~H1$Genotype)+
xlab(" ")+ ylab("Days after sowing")
#code tried to re order
levels(dat.m2$fusion)
dat.m2$fusion<-factor(dat.m2$fusion, levels=c("Control", "CK20", "CK100", "CK500", "GA20", "GA100", "GA500"))
I tried to run again the first code after re ordering but it didn't work...
You can also find attached the image of the boxplot that I'm trying to modify
Thanks
EDIT :
> dput(head(H1, 20))
structure(list(Genotype = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L), .Label = c("F1045",
"FF", "M1585", "M1610"), class = "factor"), X = structure(c(1L,
105L, 116L, 127L, 138L, 149L, 160L, 171L, 182L, 2L, 13L, 24L,
35L, 46L, 57L, 68L, 79L, 90L, 101L, 106L), .Label = c("H1", "H10",
"H100", "H101", "H102", "H103", "H104", "H105", "H106", "H107",
"H108", "H109", "H11", "H110", "H111", "H112", "H113", "H114",
"H115", "H116", "H117", "H118", "H119", "H12", "H120", "H121",
"H122", "H123", "H124", "H125", "H126", "H127", "H128", "H129",
"H13", "H130", "H131", "H132", "H133", "H134", "H135", "H136",
"H137", "H138", "H139", "H14", "H140", "H141", "H142", "H143",
"H144", "H145", "H146", "H147", "H148", "H149", "H15", "H150",
"H151", "H152", "H153", "H154", "H155", "H156", "H157", "H158",
"H159", "H16", "H160", "H161", "H162", "H163", "H164", "H165",
"H166", "H167", "H168", "H169", "H17", "H170", "H171", "H172",
"H173", "H174", "H175", "H176", "H177", "H178", "H179", "H18",
"H180", "H181", "H182", "H183", "H184", "H185", "H186", "H187",
"H188", "H189", "H19", "H190", "H191", "H192", "H2", "H20", "H21",
"H22", "H23", "H24", "H25", "H26", "H27", "H28", "H29", "H3",
"H30", "H31", "H32", "H33", "H34", "H35", "H36", "H37", "H38",
"H39", "H4", "H40", "H41", "H42", "H43", "H44", "H45", "H46",
"H47", "H48", "H49", "H5", "H50", "H51", "H52", "H53", "H54",
"H55", "H56", "H57", "H58", "H59", "H6", "H60", "H61", "H62",
"H63", "H64", "H65", "H66", "H67", "H68", "H69", "H7", "H70",
"H71", "H72", "H73", "H74", "H75", "H76", "H77", "H78", "H79",
"H8", "H80", "H81", "H82", "H83", "H84", "H85", "H86", "H87",
"H88", "H89", "H9", "H90", "H91", "H92", "H93", "H94", "H95",
"H96", "H97", "H98", "H99"), class = "factor"), Hormone = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("CK", "Control", "GA"), class = "factor"),
Hormone.quantity = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("100",
"20", "500", "Control"), class = "factor"), fusion = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("CK 100", "CK 20", "CK 500",
"Control", "GA 100", "GA 20", "GA 500"), class = "factor"),
DL = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), LI = c(100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L), Temperature = c(21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L), Sowing.date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "25-mrt", class = "factor"), BTD10 = structure(c(6L,
7L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
6L, 4L, 4L, 4L), .Label = c("16-apr", "17-apr", "18-apr",
"19-apr", "21-mei", "23-apr", "26-apr", "30-apr"), class = "factor"),
FFLDT = structure(c(13L, 18L, 4L, 9L, 18L, 3L, 2L, 13L, 8L,
10L, 18L, 10L, 8L, 8L, 8L, 10L, 11L, 11L, 1L, 11L), .Label = c("",
"10-mei", "14-apr", "14-mei", "17-mei", "18-jun", "21-mei",
"23-apr", "24-mei", "26-apr", "28-mei", "3-apr", "3-mei",
"30-apr", "31-mei", "4-jun", "7-jun", "7-mei"), class = "factor"),
FH = structure(c(42L, 62L, 67L, 18L, 59L, 7L, 5L, 52L, 53L,
62L, 65L, 58L, 53L, 42L, 52L, 58L, 24L, 55L, 1L, 54L), .Label = c("",
"10", "10,5", "11", "11,5", "11,7", "12", "12,3", "12,5",
"13", "13,5", "14", "14,3", "14,5", "15", "15,3", "15,5",
"16", "16-jan", "17", "18", "18,5", "19", "20", "20,5", "21",
"21,5", "22", "22,5", "23", "23,5", "24,5", "25", "25,5",
"26", "26,5", "27", "27,5", "29", "29-mei", "3", "3,5", "30",
"30,5", "31,5", "32", "32,5", "33", "35", "36", "37", "4",
"4,5", "40", "42", "43", "47", "5", "5,5", "53", "55", "6",
"6,5", "7", "8", "8,5", "9", "9,5"), class = "factor"), SRDT = structure(c(3L,
8L, 1L, 1L, 8L, 1L, 8L, NA, NA, NA, 4L, NA, 15L, 12L, 14L,
14L, 15L, 15L, 1L, 15L), .Label = c("", "10-mei", "11-jun",
"13-jun", "13-mei", "14-mei", "17-mei", "18-jun", "21-jun",
"21-mei", "24-mei", "28-mei", "3-mei", "31-mei", "4-jun",
"7-jun", "7-mei"), class = "factor"), MH = c(26, 50, NA,
NA, 46, NA, 61, NA, NA, NA, 40, NA, 68, 48, 47, 42, 26, 50,
NA, 48), SEEDT = structure(c(2L, 4L, 1L, 1L, 4L, 1L, 4L,
NA, NA, NA, 4L, NA, 9L, 8L, 8L, 8L, 4L, 3L, 1L, 4L), .Label = c("",
"11-jun", "13-jun", "18-jun", "20-mei", "21-jun", "28-mei",
"31-mei", "4-jun", "6-apr", "7-jun"), class = "factor"),
FERMK = c(7L, 8L, NA, NA, 8L, NA, 8L, NA, NA, NA, 5L, NA,
7L, 6L, 7L, 6L, NA, NA, NA, 4L), PLRMK = c(1L, 2L, NA, NA,
1L, NA, 1L, NA, NA, NA, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, NA,
1L), BT = structure(c(5L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 4L, 4L, 4L), .Label = c("",
"22", "23", "25", "29", "32", "bino"), class = "factor"),
FF = c(39L, 43L, 50L, 60L, 43L, 20L, 46L, 39L, 29L, 32L,
43L, 32L, 29L, 29L, 29L, 32L, 64L, 64L, NA, 64L), FM = c(78L,
85L, NA, NA, 85L, NA, 85L, NA, NA, NA, 80L, NA, 71L, 64L,
67L, 67L, 71L, 71L, NA, 71L), SEED = c(78L, 85L, NA, NA,
85L, NA, 85L, NA, NA, NA, 85L, NA, 71L, 67L, 67L, 67L, 85L,
80L, NA, 85L)), row.names = c(NA, 20L), class = "data.frame")
I am really struggling on the coding part for the R markdown but have no one to ask...
The data I am working on is, dput(survey):
structure(list(Time = structure(c(5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 1L, 2L, 3L, 4L, 35L, 42L, 46L, 30L, 31L,
33L, 34L, 29L, 36L, 37L, 38L, 39L, 40L, 41L, 43L, 44L, 45L, 47L,
48L, 32L, 54L, 55L, 50L, 49L, 51L, 52L, 53L, 57L, 59L, 56L, 60L,
61L, 58L, 62L), .Label = c("2017/08/06 10:25:01 PM GMT+10", "2017/08/06 10:26:54 PM GMT+10",
"2017/08/06 10:38:13 PM GMT+10", "2017/08/06 10:51:58 PM GMT+10",
"2017/08/06 4:53:07 PM GMT+10", "2017/08/06 4:58:44 PM GMT+10",
"2017/08/06 5:01:05 PM GMT+10", "2017/08/06 5:03:25 PM GMT+10",
"2017/08/06 5:04:50 PM GMT+10", "2017/08/06 5:06:51 PM GMT+10",
"2017/08/06 5:06:54 PM GMT+10", "2017/08/06 5:10:57 PM GMT+10",
"2017/08/06 5:11:16 PM GMT+10", "2017/08/06 5:18:21 PM GMT+10",
"2017/08/06 5:23:46 PM GMT+10", "2017/08/06 5:34:02 PM GMT+10",
"2017/08/06 5:43:10 PM GMT+10", "2017/08/06 5:54:52 PM GMT+10",
"2017/08/06 6:04:06 PM GMT+10", "2017/08/06 7:11:00 PM GMT+10",
"2017/08/06 7:13:21 PM GMT+10", "2017/08/06 7:32:45 PM GMT+10",
"2017/08/06 7:33:58 PM GMT+10", "2017/08/06 7:50:31 PM GMT+10",
"2017/08/06 8:02:07 PM GMT+10", "2017/08/06 8:28:39 PM GMT+10",
"2017/08/06 8:36:46 PM GMT+10", "2017/08/06 9:14:14 PM GMT+10",
"2017/08/07 1:59:14 PM GMT+10", "2017/08/07 10:28:13 AM GMT+10",
"2017/08/07 11:05:40 AM GMT+10", "2017/08/07 11:44:09 PM GMT+10",
"2017/08/07 12:18:04 PM GMT+10", "2017/08/07 12:49:27 PM GMT+10",
"2017/08/07 12:55:41 AM GMT+10", "2017/08/07 2:04:49 PM GMT+10",
"2017/08/07 2:14:56 PM GMT+10", "2017/08/07 2:17:10 PM GMT+10",
"2017/08/07 4:47:38 PM GMT+10", "2017/08/07 4:57:15 PM GMT+10",
"2017/08/07 7:08:44 PM GMT+10", "2017/08/07 9:12:16 AM GMT+10",
"2017/08/07 9:18:11 PM GMT+10", "2017/08/07 9:22:59 PM GMT+10",
"2017/08/07 9:23:43 PM GMT+10", "2017/08/07 9:32:10 AM GMT+10",
"2017/08/07 9:46:41 PM GMT+10", "2017/08/07 9:55:01 PM GMT+10",
"2017/08/08 1:36:16 PM GMT+10", "2017/08/08 10:27:59 AM GMT+10",
"2017/08/08 3:36:15 PM GMT+10", "2017/08/08 4:15:12 PM GMT+10",
"2017/08/08 6:39:28 PM GMT+10", "2017/08/08 8:44:38 AM GMT+10",
"2017/08/08 9:03:07 AM GMT+10", "2017/08/09 1:00:16 PM GMT+10",
"2017/08/09 10:17:55 AM GMT+10", "2017/08/09 10:26:28 PM GMT+10",
"2017/08/09 11:50:50 AM GMT+10", "2017/08/09 3:02:39 PM GMT+10",
"2017/08/09 9:48:19 PM GMT+10", "2017/08/10 7:32:00 AM GMT+10"
), class = "factor"), ID = structure(c(48L, 57L, 38L, 9L, 8L,
42L, 41L, 58L, 31L, 27L, 60L, 34L, 13L, 37L, 40L, 29L, 53L, 28L,
16L, 20L, 47L, 18L, 51L, 3L, 36L, 10L, 32L, 11L, 54L, 22L, 61L,
15L, 35L, 2L, 25L, 55L, 17L, 5L, 14L, 21L, 49L, 45L, 6L, 30L,
26L, 4L, 19L, 50L, 44L, 56L, 43L, 59L, 24L, 12L, 52L, 23L, 1L,
39L, 7L, 62L, 46L, 33L), .Label = c("1907", "3456", "450181964",
"460061490", "A", "ABCABCABC", "adsad", "affordance", "alexxx",
"AliceJ", "blueberry11", "Bob", "byue7515", "Cameron Nichols",
"Coelacanth", "crocophile", "Donald trump ", "DS2012-LB-S", "Gir",
"goly", "Grace", "greyshirt", "grob6576", "hahahahaha", "Harry",
"Insidestella", "ja150", "jane", "Jiashu Wu", "jmc", "Joohee0214",
"kakinna", "Kimbo Slice", "lhar7524", "lizebin", "Lucy", "Magician1213",
"Matchey", "md123", "mia", "MP", "N52981227", "Nattt", "Pete",
"rcon", "Ryan_eats_p-values", "S123", "Salmon ", "smarcon", "smile",
"snail", "sonja kay", "Thelimitdoesnotexist", "Toflin", "Tony Stark ",
"UriLover420", "valerie", "Whatzup", "Winky", "xwn19960829",
"zilu2637", "ZXFAARON"), class = "factor"), Gender = structure(c(3L,
2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L,
2L, 1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("crocodilian",
"Female", "Male", "Poisson"), class = "factor"), Postcode =structure(c(12L,
30L, 20L, 35L, 28L, 33L, 13L, 22L, 12L, 2L, 3L, 38L, 25L, 13L,
4L, 23L, 19L, 23L, 29L, 32L, 26L, 4L, 14L, 4L, 36L, 12L, 3L,
41L, 28L, 40L, 24L, 9L, 37L, 4L, 3L, 17L, 32L, 27L, 15L, 36L,
12L, 11L, 3L, 7L, 4L, 10L, 39L, 24L, 42L, 8L, 12L, 13L, 5L, 6L,
31L, 20L, 1L, 34L, 18L, 13L, 21L, 16L), .Label = c("14052", "2000",
"2007", "2008", "2020", "2021", "2022", "2026", "2031", "2037",
"2041", "2042", "2050", "2066", "2069", "2074", "2097", "2112",
"2117", "2131", "2134", "2136", "2137", "2138", "2140", "2144",
"2154", "2165", "2166", "2171", "2193", "2200", "2205", "2209",
"2216", "2220", "2228", "2756", "2762", "2765", "2780", "sydney"
), class = "factor"), StatsCourse = structure(c(4L, 4L, 4L, 4L,
4L, 4L, 1L, 4L, 4L, 4L, 3L, 4L, 4L, 5L, 4L, 4L, 5L, 6L, 4L, 4L,
4L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 2L, 5L, 4L, 5L, 6L, 4L, 2L,
2L, 4L, 4L, 5L, 7L, 5L, 1L, 4L, 4L, 4L), .Label = c("", "BUSS1020",
"MATH1001,MATH1002", "MATH1005", "MATH1015", "MATH1905", "none"
), class = "factor"), Clubs = structure(c(1L, 1L, 4L, 5L, 4L,
2L, 4L, 4L, 2L, 4L, 7L, 2L, 4L, 4L, 1L, 4L, 1L, 4L, 1L, 1L, 6L,
1L, 4L, 1L, 11L, 4L, 5L, 10L, 3L, 5L, 2L, 4L, 1L, 1L, 2L, 1L,
4L, 4L, 4L, 6L, 2L, 2L, 4L, 4L, 9L, 4L, 1L, 8L, 2L, 4L, 2L, 6L,
4L, 4L, 11L, 5L, 1L, 1L, 1L, 4L, 4L, 1L), .Label = c("0", "1",
"10+", "2", "3", "4", "5", "6", "7", "none", "None"), class = "factor"),
StudyTime = structure(c(24L, 3L, 26L, 27L, 17L, 2L, 10L,
14L, 23L, 7L, 19L, 3L, 17L, 29L, 23L, 22L, 10L, 10L, 28L,
23L, 6L, 14L, 20L, 7L, 17L, 28L, 5L, 16L, 20L, 3L, 21L, 3L,
23L, 7L, 17L, 10L, 1L, 18L, 10L, 17L, 10L, 7L, 13L, 5L, 15L,
3L, 8L, 17L, 19L, 17L, 3L, 30L, 31L, 1L, 4L, 3L, 20L, 9L,
14L, 11L, 12L, 25L), .Label = c("0", "05-Jun", "10", "11",
"12", "14", "15", "17", "2", "20", "20-24", "20-25?", "24",
"25", "28", "28 hours", "30", "31", "35", "4", "40", "49",
"5", "50", "6", "7", "70", "8", "8hr", "didn't start uni maybe 6h",
"not sure"), class = "factor"), StudyLoad = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("", "full-time", "part-time"), class = "factor"),
SocialMedia = structure(c(1L, 5L, 1L, 1L, 1L, 7L, 1L, 1L,
7L, 7L, 2L, 1L, 2L, 1L, 1L, 8L, 6L, 2L, 1L, 7L, 1L, 4L, 1L,
8L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 7L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 5L, 5L, 1L, 1L, 2L, 2L,
1L, 3L, 1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("Facebook",
"Instragram", "none! (really)", "reddit", "Snapchat", "Tumblr",
"Twitter", "WeChat"), class = "factor"), Siblings = structure(c(2L,
4L, 4L, 1L, 4L, 1L, 2L, 4L, 5L, 2L, 1L, 2L, 2L, 1L, 4L, 1L,
1L, 4L, 2L, 2L, 8L, 2L, 2L, 3L, 1L, 1L, 2L, 5L, 2L, 7L, 1L,
4L, 2L, 6L, 1L, 6L, 2L, 5L, 1L, 1L, 4L, 4L, 2L, 2L, 1L, 2L,
1L, 1L, 4L, 4L, 2L, 9L, 1L, 2L, 10L, 2L, 4L, 2L, 2L, 1L,
2L, 2L), .Label = c("0", "1", "165", "2", "3", "4", "5",
"6", "none", "one"), class = "factor"), FBFriends = structure(c(49L,
43L, 6L, 3L, 28L, 2L, 9L, 13L, 21L, 19L, 30L, 40L, 37L, 20L,
35L, 32L, 53L, 47L, 30L, 22L, 8L, 45L, 14L, 15L, 38L, 16L,
45L, 31L, 35L, 43L, 34L, 23L, 52L, 18L, 34L, 27L, 33L, 11L,
42L, 24L, 51L, 26L, 17L, 50L, 39L, 19L, 10L, 12L, 4L, 44L,
46L, 29L, 45L, 36L, 54L, 20L, 7L, 5L, 41L, 25L, 1L, 48L), .Label = c("~300",
"10", "100", "1000", "1127", "115", "1192", "12", "120",
"121", "130", "148", "150", "1583", "165", "170", "174",
"190", "200", "213", "228", "229", "235", "240", "242", "256",
"259", "263", "27", "300", "308", "31", "382", "40", "400",
"431", "470", "5", "540", "548", "57", "572", "600", "664",
"700", "724", "800", "850", "90", "936", "978", "do not know",
"Don't have FB", "none (not in facebook)"), class = "factor"),
Grade = structure(c(18L, 19L, 11L, 31L, 33L, 14L, 22L, 18L,
6L, 9L, 19L, 18L, 22L, 23L, 24L, 30L, 28L, 16L, 2L, 14L,
3L, 12L, 21L, 2L, 12L, 12L, 6L, 29L, 12L, 27L, 17L, 6L, 12L,
17L, 17L, 15L, 24L, 20L, 7L, 14L, 12L, 10L, 22L, 34L, 24L,
17L, 16L, 12L, 24L, 32L, 26L, 25L, 26L, 13L, 4L, 12L, 1L,
5L, 12L, 8L, 24L, 35L), .Label = c("2.8", "50", "50-60",
"54", "6.25", "60", "61", "61.5", "62", "63", "64", "65",
"65.9", "66", "68", "69", "70", "72", "73", "73.2", "73.4",
"74", "74.6", "75", "8.7", "80", "82", "82.4", "83.2", "87",
"90", "90.1", "90.5", "91", "D"), class = "factor"), Pet = structure(c(3L,
2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 3L,
2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L,
3L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L,
3L), .Label = c("", "No", "Yes"), class = "factor"), Home = structure(c(2L,
3L, 3L, 1L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 3L,
3L), .Label = c("", "No", "Yes"), class = "factor"), ExerciseTime = structure(c(10L,
12L, 7L, 1L, 4L, 7L, 7L, 5L, 7L, 12L, 13L, 5L, 10L, 7L, 15L,
15L, 10L, 10L, 5L, 14L, 2L, 9L, 4L, 5L, 7L, 4L, 14L, 8L,
10L, 13L, 1L, 13L, 1L, 13L, 13L, 5L, 7L, 16L, 16L, 14L, 10L,
14L, 7L, 6L, 12L, 10L, 10L, 13L, 13L, 14L, 7L, 11L, 2L, 2L,
17L, 16L, 7L, 7L, 2L, 3L, 13L, 15L), .Label = c("", "0",
"05-Jun", "1", "10", "12", "2", "2 hours", "20", "3", "3.5",
"4", "5", "6", "7", "8", "none"), class = "factor"), Eyecolor = structure(c(9L,
7L, 5L, 1L, 8L, 2L, 8L, 3L, 3L, 8L, 3L, 7L, 7L, 7L, 7L, 7L,
3L, 4L, 7L, 3L, 11L, 8L, 11L, 2L, 8L, 2L, 2L, 2L, 8L, 7L,
1L, 7L, 2L, 7L, 3L, 4L, 10L, 7L, 8L, 7L, 7L, 6L, 7L, 3L,
8L, 2L, 8L, 7L, 4L, 8L, 9L, 3L, 7L, 5L, 7L, 8L, 12L, 7L,
7L, 8L, 3L, 8L), .Label = c("", "black", "Black", "blue",
"Blue", "Blue/Green", "brown", "Brown", "Brown ", "Brown/black",
"dark brown", "grey"), class = "factor"), Working = structure(c(2L,
8L, 2L, 1L, 4L, 2L, 2L, 8L, 2L, 24L, 2L, 13L, 5L, 3L, 26L,
2L, 8L, 13L, 24L, 2L, 12L, 2L, 9L, 8L, 2L, 2L, 2L, 11L, 2L,
10L, 1L, 4L, 21L, 2L, 2L, 15L, 14L, 21L, 26L, 18L, 4L, 2L,
7L, 27L, 12L, 2L, 20L, 2L, 19L, 25L, 8L, 2L, 2L, 17L, 23L,
16L, 2L, 6L, 2L, 13L, 13L, 22L), .Label = c("", "0", "1.5",
"10", "11", "12", "14", "15", "17", "18", "18 hours", "2",
"20", "24", "25", "26", "3", "3.5", "30", "38", "4", "40",
"44", "5", "6", "7", "8"), class = "factor"), Season = structure(c(2L,
3L, 2L, 1L, 5L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 3L, 4L, 3L,
3L, 3L, 3L, 5L, 3L, 3L, 2L, 5L, 5L, 4L, 2L, 2L, 5L, 2L, 3L,
2L, 2L, 3L, 2L, 4L, 2L, 3L, 5L, 3L, 4L, 5L, 3L, 4L, 4L, 4L,
3L, 4L, 4L, 4L, 3L, 2L, 2L, 2L, 3L, 4L, 4L, 3L, 2L, 4L, 4L,
3L), .Label = c("", "Autumn", "Spring", "Summer", "Winter"
), class = "factor")), .Names = c("Time", "ID", "Gender",
"Postcode", "StatsCourse", "Clubs", "StudyTime", "StudyLoad",
"SocialMedia", "Siblings", "FBFriends", "Grade", "Pet", "Home",
"ExerciseTime", "Eyecolor", "Working", "Season"), class = "data.frame", row.names = c(NA,
-62L))
And what I did so far is,
library(dplyr)
library(ggplot2)
library(tidyr)
library(knitr)
survey <- read.csv("STAT2012Survey.csv")
colnames(survey)
oldname = colnames(survey)
newname = c("Time", "ID", "Gender", "Postcode", "StatsCourse", "Clubs", "StudyTime",
"StudyLoad", "SocialMedia", "Siblings", "FBFriends", "Grade", "Pet", "Home",
"ExerciseTime", "Eyecolor", "Working", "Season")
colnames(survey) = newname
What I want to achieve is, I want to provide a hypothesis test about
"Is there any evidence that there is difference in exercise time between males and females?"
To do this, I need to get the mean and standard deviation sort of that stuffs in order to test the two-sample t-test but I do not know how to approach to it
Also, to visualize the data with graph, I tried,
ggplot(survey, aes(x = Gender, y = ExerciseTime, fill = Gender)) + geom_boxplot()
however it only showed some strange graph. I think it is because the "ExerciesTime" variable is not numeric, but I am stuck on it as well since ggplot2 does not deal with the data of class numeric...
Someone please help me...! I want to make more hypothesis tests towards multiple questions but I am stuck on the first question... I might be able to achieve the goal if I know how to do the first one! Thanks.
Before you can make a boxplot, you will need to make ExerciseTime a numeric variable. The problem you will have with that is some of the responses don't easily turn numeric (2 hours, for example, should probably be 2, but it will require an extra step to get rid of the text).
As a start, though, let's just do the easiest case of take anything that isn't a natural number and let it change to a missing value.
survey2 <-
survey %>%
mutate(ExerciseTime = as.character(ExerciseTime),
ExerciseTime = str_replace(ExerciseTime, "\\d{2}-\\w{3}", ""),
ExerciseTime = str_extract(ExerciseTime, "\\d{1,2}"),
ExerciseTime = as.numeric(ExerciseTime))
ggplot(data = survey,
mapping = aes(x = Gender,
y = ExerciseTime,
fill = Gender)) +
geom_boxplot()
What I'm trying to do is getting a dataframe where the repeated rows in the first column act as an index to copy the corresponding rows of other columns. I know this sound messy, and my inability to accurately state the issue is one of the reasons I'm having so many problems with this.
I'll provide a reproducible example below.
structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L), .Label = c("2016-01", "2016-02", "2016-03", "2016-04",
"2016-05", "2016-06", "2016-07", "2016-08", "2016-09", "2016-10",
"2016-11", "2016-12", "2017-01", "2017-02", "2017-03", "2017-04",
"2017-05"), class = "factor"), Var2 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("B2B", "B2C", "B2K"), class = "factor"), Freq = c(5L,
13L, 8L, 13L, 36L, 5L, 18L, 1L, 12L, 24L, 22L, 6L, 24L, 15L,
11L, 26L, 1L, 338L, 285L, 291L, 232L, 142L, 42L, 92L, 9L, 46L,
34L, 45L, 35L, 30L, 31L, 36L, 56L, 9L, 0L, 1L, 0L, 0L, 0L, 0L,
7L, 0L, 13L, 0L, 1L, 0L, 0L, 0L, 0L, 2L, 0L)), .Names = c("Var1",
"Var2", "Freq"), class = "data.frame", row.names = c(NA, -51L
))
basically what I want is:
On Var1 no repeated dates
On the row where the date is repeated, take the value of Var2 and Freq and copy them in two new columns to the index of the unique date
This must be done for every distinct level of Var2
Thank you in advance!
I think what your trying to explain is a dcast. Does this end up how you want it?
library(reshape2)
dcast(x,Var1~Var2,value.var="Freq")
A base R option would be
xtabs(Freq~Var1 + Var2, df1)
I'm having some trouble producing a faceted bar_plot in ggplot2. Perhaps it is something very obvious, but I can't figure it out:( I've the following dataset:
structure(list(COUNTRY = structure(c(1L, 4L, 7L, 10L, 13L, 16L,
19L, 2L, 5L, 8L, 11L, 14L, 17L, 20L, 3L, 6L, 9L, 12L, 15L, 18L,
2L, 5L, 8L, 11L, 14L, 17L, 20L, 3L, 6L, 9L, 12L, 15L, 18L, 1L,
4L, 7L, 10L, 13L, 16L, 19L, 3L, 6L, 9L, 12L, 15L, 18L, 1L, 4L,
7L, 10L, 13L, 16L, 19L, 2L, 5L, 8L, 11L, 14L, 17L, 20L), .Label = c("Angola",
"Botswana", "Burundi", "Comoros", "Eritrea", "Ethiopia", "Kenya",
"Lesotho", "Madagascar", "Malawi", "Mozambique", "Namibia", "Rwanda",
"Somalia", "South Africa", "Swaziland", "Tanzania", "Uganda",
"Zambia", "Zimbabwe"), class = "factor"), Year = structure(c(2L,
2L, 14L, 16L, 16L, 11L, 12L, 2L, 4L, 15L, 5L, 10L, 16L, 16L,
2L, 17L, 14L, 11L, 12L, 10L, 2L, 4L, 15L, 5L, 10L, 16L, 16L,
2L, 17L, 14L, 11L, 12L, 10L, 2L, 2L, 14L, 16L, 16L, 11L, 12L,
2L, 17L, 14L, 11L, 12L, 10L, 2L, 2L, 14L, 16L, 16L, 11L, 12L,
2L, 4L, 15L, 5L, 10L, 16L, 16L), .Label = c("1998", "2000", "2001/2",
"2002", "2003", "2003/4", "2004", "2005", "2005/6", "2006", "2006/7",
"2007", "2007/8", "2008/9", "2009", "2010", "2011"), class = "factor"),
sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L), .Label = c("m", "f", "b"), class = "factor"),
location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Urban", "Rural", "Total",
"Capital.City", "Other.Cities.towns", "Urban.Non.slum", "Urban.Slum"
), class = "factor"), percent = c(60.4, 42.3, 85.4919452426806,
96.3, 90.2847535659154, 87.7347421555771, 87.7323067592087,
80.4, 80.6, 93.8186266493188, 75.0109418832216, 36.8, 87.1059275774722,
90.1216932603937, 66.8, 83.6279398931798, 89.690685909038,
88.8207941092749, 94.6139558774441, 88.0251085200726, 70.4,
54.7, 86.1919805548309, 56.9792710715853, 13.1, 75.6355555697382,
86.8196674671991, 42.5, 61.9452522893308, 77.597285694676,
88.3453320625631, 94.5192341778471, 80.6271302923487, 44.1,
29, 77.8542469357068, 90, 86.7073851186482, 83.8921034867784,
76.4094871587916, 49.3, 63.952805392032, 77.004884485532,
88.6723566877386, 93.9560433940531, 82.3095948307742, 56.1,
31.1, 80.0235653889704, 91.5, 88.3809682134183, 85.5656196766576,
80.0539027063387, 77, 61.2, 89.2538966046165, 59.6756344409838,
23, 79.6749544074645, 86.9507859695728)), .Names = c("COUNTRY",
"Year", "sex", "location", "percent"), row.names = c(1L, 4L,
7L, 10L, 13L, 16L, 19L, 22L, 25L, 28L, 31L, 34L, 37L, 40L, 43L,
46L, 49L, 52L, 55L, 58L, 62L, 65L, 68L, 71L, 74L, 77L, 80L, 83L,
86L, 89L, 92L, 95L, 98L, 101L, 104L, 107L, 110L, 113L, 116L,
119L, 123L, 126L, 129L, 132L, 135L, 138L, 141L, 144L, 147L, 150L,
153L, 156L, 159L, 162L, 165L, 168L, 171L, 174L, 177L, 180L), class = "data.frame")
I am trying to make a bar_plot which shows the percentage of people living in rural, urban areas (and the average) for a number of countries, and wish to show this split by gender. I can plot one of these categories on a simple bar plot by using a subset call within the ggplot function as follows:
ggplot(edu_melt[c(edu_melt$sex!="b" & edu_melt$location==c("Urban")), ], aes(x=COUNTRY, y=percent, fill=sex)) + geom_bar(position="dodge", width=0.5) + facet_grid(~location) + labs(x="Country") + theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1))
I would however like to compare the data across the location (e.g. urban, rural, and both). I thought this would be a simple case of introducing a facet_wrap call, however I get some odd behaviour where the data is plotted across the three facets - I would expect 20 pairs of bars on each facet, however this code produces 20 pairs of bars spread over the three facets?!
ggplot(edu_melt_over[c(edu_melt_over$sex!="b"),], aes(x=COUNTRY, y=percent, fill=sex)) + geom_bar(position="dodge", width=0.5, space=1) + facet_wrap(~location, nrow=3) + labs(x="Country", title="Proportion Net Primary School Enrolement in ESA") + theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1))
I'm not sure why this is happening, but have searched for hints and tips and tried a number of approaches, but get the same result. Anybody have any idea how I could produce this plot?
Thanks
Marty
Your data looks odd as you don't seem to have any combinations of male and female in the same strata (e.g. Angola has a male urban percent but no female). This is the data not the plotting.
ggplot(edu_melt[edu_melt$sex!="b", ], aes(x=COUNTRY, y=percent, fill=sex)) +
geom_bar(position="dodge", width=0.25) + facet_grid(location~.) + labs(x="Country") +
theme(axis.text.x = element_text(angle=30))