Plotting bar chart on map using ggplot2 - r

I have to plot bargraphs on a map using ggplot2.
library(ggplot2)
q=ggplot(data=mapindia, aes(long, lat, group=group,colour)) + geom_polygon(fill="#FF9999", colour="black") +coord_map()
q=q+geom_point(aes(x=LATITUDE, y=LONGITUDE,group=state_name),data=religion)+coord_map()
q <- q+geom_subplot2d(aes(long,lat,subplot = geom_bar(aes(value,fill=fact))), ref = NULL, width = rel(0.8), data = simdat)
print(q)
But got the following error:
## Using binwidth 2.12
## Using binwidth 2.13
## Don't know how to automatically pick scale for object of type data.frame. Defaulting to continuous
## Error in eval(expr, envir, enclos) : object 'group' not found
My data sets
dput(head(mapindia, 100))
structure(list(long = c(72.94823, 72.948775, 72.96202, 72.964302,
72.968211, 72.970708, 72.964092, 72.956172, 72.943367, 72.931651,
72.93708, 72.948365, 72.947281, 72.938496, 72.930794, 72.924501,
72.918209, 72.910397, 72.893036, 72.879257, 72.878837, 72.880537,
72.882884, 72.882518, 72.885103, 72.88869, 72.892337, 72.89675,
72.904402, 72.911528, 72.924481, 72.944418, 72.950934, 72.955821,
72.962662, 72.95561, 72.94823, 73.043316, 73.042348, 73.040185,
73.035095, 73.028379, 73.024913, 73.022964, 73.022209, 73.025029,
73.032294, 73.03815, 73.043791, 73.046396, 73.047159, 73.053124,
73.066142, 73.080464, 73.092181, 73.105632, 73.114977, 73.134375,
73.147935, 73.15217, 73.156084, 73.16586, 73.178224, 73.182115,
73.182861, 73.181982, 73.182834, 73.190533, 73.20268, 73.213414,
73.227186, 73.235096, 73.237463, 73.230614, 73.219987, 73.211622,
73.210741, 73.202164, 73.190022, 73.177, 73.165391, 73.150641,
73.1438, 73.13869, 73.133474, 73.13281, 73.136697, 73.160516,
73.178507, 73.192372, 73.206024, 73.208846, 73.214068, 73.225464,
73.236084, 73.258275, 73.268327, 73.272309, 73.269801, 73.265777
), lat = c(20.466602, 20.459483, 20.44602, 20.43615, 20.424953,
20.412235, 20.398937, 20.402725, 20.410493, 20.404022, 20.392923,
20.385058, 20.379266, 20.374414, 20.375353, 20.377813, 20.380272,
20.382634, 20.390107, 20.390654, 20.405481, 20.415684, 20.428257,
20.443762, 20.453246, 20.460464, 20.466909, 20.47418, 20.482435,
20.486774, 20.501579, 20.496404, 20.491097, 20.487117, 20.477538,
20.469934, 20.466602, 20.219381, 20.232198, 20.240647, 20.24748,
20.255641, 20.261145, 20.266745, 20.276713, 20.279753, 20.284505,
20.287736, 20.293816, 20.299704, 20.309768, 20.311575, 20.320982,
20.333331, 20.339792, 20.343499, 20.343211, 20.346733, 20.349013,
20.353571, 20.362402, 20.374462, 20.372372, 20.361166, 20.351196,
20.342554, 20.331159, 20.330208, 20.330965, 20.330202, 20.329626,
20.325824, 20.314522, 20.304079, 20.303419, 20.292882, 20.28424,
20.276552, 20.275796, 20.266398, 20.258519, 20.251874, 20.241429,
20.228231, 20.216457, 20.204968, 20.193763, 20.18094, 20.184923,
20.182923, 20.183772, 20.186809, 20.198582, 20.209307, 20.209965,
20.198458, 20.186199, 20.173564, 20.166254, 20.15885), order = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L), hole = c(FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), piece = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2"), class = "factor"), group = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("0.1", "1.1", "2.1", "3.1", "4.1", "5.1",
"5.2", "6.1", "7.1", "8.1", "9.1", "10.1", "11.1", "12.1", "13.1",
"14.1", "15.1", "16.1", "17.1", "18.1", "19.1", "20.1", "21.1",
"22.1", "23.1", "24.1", "25.1", "26.1", "27.1", "28.1", "29.1",
"30.1", "31.1", "32.1", "33.1", "34.1", "35.1", "36.1", "37.1",
"38.1", "39.1", "40.1", "41.1", "42.1", "43.1", "44.1", "45.1",
"46.1", "47.1", "48.1", "49.1", "50.1", "51.1", "52.1", "53.1",
"54.1", "55.1", "56.1", "57.1", "58.1", "59.1", "60.1", "61.1",
"62.1", "63.1", "64.1", "65.1", "66.1", "67.1", "68.1", "69.1",
"70.1", "71.1", "71.2", "72.1", "73.1", "74.1", "75.1", "76.1",
"77.1", "78.1", "79.1", "80.1", "81.1", "82.1", "83.1", "84.1",
"85.1", "86.1", "87.1", "87.2", "88.1", "88.2", "89.1", "90.1",
"91.1", "92.1", "93.1", "94.1", "95.1", "96.1", "97.1", "98.1",
"99.1", "100.1", "101.1", "102.1", "103.1", "104.1", "105.1",
"106.1", "107.1", "108.1", "109.1", "110.1", "111.1", "112.1",
"113.1", "114.1", "115.1", "116.1", "117.1", "118.1", "119.1",
"120.1", "121.1", "122.1", "123.1", "124.1", "125.1", "126.1",
"127.1", "128.1", "129.1", "130.1", "131.1", "132.1", "133.1",
"134.1", "135.1", "136.1", "137.1", "138.1", "139.1", "140.1",
"141.1", "142.1", "143.1", "144.1", "145.1", "146.1", "147.1",
"148.1", "149.1", "150.1", "151.1", "152.1", "153.1", "154.1",
"155.1", "156.1", "157.1", "158.1", "159.1", "160.1", "161.1",
"162.1", "163.1", "164.1", "165.1", "166.1", "167.1", "168.1"
), class = "factor"), id = c("0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1")), .Names = c("long", "lat", "order", "hole", "piece",
"group", "id"), row.names = c(NA, 100L), class = "data.frame")
dput(head(simdat, 100))
structure(list(state = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L), .Label = c("", "Andhra Pradesh", "ASSAM",
"Bihar", "Chandigarh", "CHHATTISGARH", "DADRA & NAGAR HAVELI",
"DAMAN & DIU", "Delhi", "GOA", "GUJARAT", "Haryana", "Himachal",
"Jharkhand", "KARNATAKA", "KERALA", "MAHARASHTRA", "MANIPUR",
"MEGHALAYA", "MP", "Odissa", "PONDICHERRY", "Punjab", "Rajasthan",
"TAMIL NADU", "TRIPURA", "UP", "Uttrakhand", "WEST BENGAL"), class = "factor"),
long = c(78.30000305, 78.30000305, 78.30000305, 78.30000305,
78.30000305, 78.30000305, 78.30000305, 78.30000305, 91.5,
91.5, 91.5, 91.5, 91.5, 91.5, 91.5, 91.5, 85.12999725, 85.12999725,
85.12999725, 85.12999725, 85.12999725, 85.12999725, 85.12999725,
85.12999725, 76.798553, 76.798553, 76.798553, 76.798553,
76.798553, 76.798553, 76.798553, 76.798553, 81.62999725,
81.62999725, 81.62999725, 81.62999725, 81.62999725, 81.62999725,
81.62999725, 81.62999725, 72.96666718, 72.96666718, 72.96666718,
72.96666718, 72.96666718, 72.96666718, 72.96666718, 72.96666718,
72.806396, 72.806396, 72.806396, 72.806396, 72.806396, 72.806396,
72.806396, 72.806396, 72.806396, 72.806396, 72.806396, 72.806396,
72.806396, 72.806396, 72.806396, 72.806396, 73.96992109,
73.96992109, 73.96992109, 73.96992109, 73.96992109, 73.96992109,
73.96992109, 73.96992109, 72.40000153, 72.40000153, 72.40000153,
72.40000153, 72.40000153, 72.40000153, 72.40000153, 72.40000153,
75.959473, 75.959473, 75.959473, 75.959473, 75.959473, 75.959473,
75.959473, 75.959473, 75.959473, 75.959473, 75.959473, 75.959473,
75.959473, 75.959473, 75.959473, 75.959473, 85.33000183,
85.33000183, 85.33000183, 85.33000183), lat = c(17.20000076,
17.20000076, 17.20000076, 17.20000076, 17.20000076, 17.20000076,
17.20000076, 17.20000076, 26.09000015, 26.09000015, 26.09000015,
26.09000015, 26.09000015, 26.09000015, 26.09000015, 26.09000015,
25.37000084, 25.37000084, 25.37000084, 25.37000084, 25.37000084,
25.37000084, 25.37000084, 25.37000084, 30.744196, 30.744196,
30.744196, 30.744196, 30.744196, 30.744196, 30.744196, 30.744196,
21.22999954, 21.22999954, 21.22999954, 21.22999954, 21.22999954,
21.22999954, 21.22999954, 21.22999954, 20.26666641, 20.26666641,
20.26666641, 20.26666641, 20.26666641, 20.26666641, 20.26666641,
20.26666641, 20.25189, 20.25189, 20.25189, 20.25189, 20.25189,
20.25189, 20.25189, 20.25189, 20.25189, 20.25189, 20.25189,
20.25189, 20.25189, 20.25189, 20.25189, 20.25189, 15.38429276,
15.38429276, 15.38429276, 15.38429276, 15.38429276, 15.38429276,
15.38429276, 15.38429276, 23.03000069, 23.03000069, 23.03000069,
23.03000069, 23.03000069, 23.03000069, 23.03000069, 23.03000069,
29.017748, 29.017748, 29.017748, 29.017748, 29.017748, 29.017748,
29.017748, 29.017748, 29.017748, 29.017748, 29.017748, 29.017748,
29.017748, 29.017748, 29.017748, 29.017748, 23.35000038,
23.35000038, 23.35000038, 23.35000038), value = c(134L, 3L,
1098L, 16645L, 123L, 2070L, 37L, 66L, 2L, 4131L, 1L, 21L,
0L, 127L, 5L, 651L, 220L, 260L, 1827L, 17596L, 20L, 0L, 0L,
22L, 16L, 0L, 2L, 0L, 18L, 450L, 0L, 22L, 30L, 41L, 56L,
3L, 53L, 66L, 11L, 4674L, 0L, 28L, 0L, 1L, 328L, 3L, 0L,
33L, 0L, 34L, 5L, 5L, 11L, 0L, 440L, 0L, 0L, 318L, 16L, 122L,
2990L, 35L, 0L, 44L, 1L, 671L, 0L, 43L, 0L, 302L, 0L, 0L,
0L, 40L, 10054L, 379L, 1492L, 160L, 77L, 385L, 4348L, 0L,
157L, 59L, 0L, 12L, 334L, 159L, 2013L, 12L, 0L, 3L, 29L,
0L, 15L, 16L, 0L, 49L, 10L, 329L), variable = structure(list(
fact = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L,
4L, 7L, 8L, 2L, 3L, 5L, 6L, 3L, 1L, 6L, 4L, 8L, 2L, 7L,
5L, 8L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 7L, 2L, 3L, 1L,
6L, 5L, 4L, 5L, 1L, 7L, 8L, 4L, 6L, 2L, 3L, 7L, 6L, 8L,
1L, 5L, 3L, 4L, 2L, 7L, 6L, 5L, 8L, 4L, 1L, 2L, 3L, 5L,
4L, 8L, 6L, 7L, 3L, 2L, 1L, 2L, 7L, 4L, 5L, 6L, 3L, 8L,
1L, 4L, 7L, 6L, 3L, 2L, 5L, 1L, 8L, 4L, 3L, 7L, 5L, 6L,
2L, 8L, 1L, 2L, 1L, 5L, 3L), .Label = c("Buddhist", "Budhist",
"Christian", "Hindu", "Jain", "Muslim", "Others", "Sikh"
), class = "factor")), .Names = "fact", row.names = c(1L,
29L, 57L, 85L, 113L, 141L, 169L, 197L, 2L, 86L, 170L, 198L,
30L, 58L, 114L, 142L, 59L, 3L, 143L, 87L, 199L, 31L, 171L,
115L, 200L, 172L, 4L, 32L, 60L, 88L, 116L, 144L, 201L, 173L,
33L, 61L, 5L, 145L, 117L, 89L, 118L, 6L, 174L, 202L, 90L,
146L, 34L, 62L, 175L, 147L, 203L, 7L, 119L, 63L, 91L, 35L,
176L, 148L, 120L, 204L, 92L, 8L, 36L, 64L, 121L, 93L, 205L,
149L, 177L, 65L, 37L, 9L, 38L, 178L, 94L, 122L, 150L, 66L,
206L, 10L, 95L, 179L, 151L, 67L, 39L, 123L, 11L, 207L, 96L,
68L, 180L, 124L, 152L, 40L, 208L, 12L, 41L, 13L, 125L, 69L
), class = "data.frame")), .Names = c("state", "long", "lat",
"value", "variable"), row.names = c(NA, 100L), class = "data.frame")

There is an known issue with ggsubplot and R3.1.0. See more on this here and here.
I can plot your data with ggplot but not with ggsubplot
library(ggplot2)
df$fact <- df$variable$fact
df$state <- as.character(df$state)
df <- df[ ,-5]
ggplot(df, aes(x = fact, y = value, fill = fact)) +
geom_bar(stat = 'identity', position = 'dodge') +
facet_wrap(~state) +
theme(axis.text.x = element_blank()
,axis.title.x = element_blank()
,axis.ticks.x = element_blank()
,strip.text = element_text(size = 8))
If you want to represent your data with pies, you could try something like
library(dplyr)
df1 <- df %>%
group_by(state) %>%
mutate(tsum = sum(value),
prop = value/tsum)
ggplot(df1, aes(x=factor(1), y=prop, fill = fact1)) +
geom_bar(stat="identity") +
coord_polar(theta = 'y') +
facet_wrap(~ state, ncol = 4) +
theme(axis.text.x = element_blank()
,axis.title.x = element_blank()
,axis.text.y = element_blank()
,axis.title.y = element_blank()
,axis.ticks = element_blank()
,strip.text = element_text(size = 8))
For subplot, something like this should work:
library(maptools)
data(wrld_simpl)
india <- wrld_simpl[wrld_simpl$NAME == 'India', ]
dfindia <- fortify(india)
p <- ggplot() +
geom_polygon(data=dfindia, aes(x=long, y=lat,group=group)) +
geom_coord()
p + geom_subplot(data=simdat,
aes(long, lat, group=state,
subplot = geom_bar(aes(x=fact, y=value, fill = state),
stat="identity")), width = 10, height=10)

Related

How to re order datas in ggplot2

I'm trying to re order my datas, I already found the code to use but it doesn't seem to work...
Can you help me please ?
This is my code :
#code for my boxplot
dat.m2 <- melt(H1,id.vars='fusion', measure.vars=c('FF','FM'))
dat.m2 <- melt(H1,id.vars='fusion', measure.vars=c('FF','FM'))
ggplot(dat.m2)+ geom_boxplot(aes(x=fusion, y=value, colour=variable))+ facet_wrap(~H1$Genotype)+
xlab(" ")+ ylab("Days after sowing")
#code tried to re order
levels(dat.m2$fusion)
dat.m2$fusion<-factor(dat.m2$fusion, levels=c("Control", "CK20", "CK100", "CK500", "GA20", "GA100", "GA500"))
I tried to run again the first code after re ordering but it didn't work...
You can also find attached the image of the boxplot that I'm trying to modify
Thanks
EDIT :
> dput(head(H1, 20))
structure(list(Genotype = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L), .Label = c("F1045",
"FF", "M1585", "M1610"), class = "factor"), X = structure(c(1L,
105L, 116L, 127L, 138L, 149L, 160L, 171L, 182L, 2L, 13L, 24L,
35L, 46L, 57L, 68L, 79L, 90L, 101L, 106L), .Label = c("H1", "H10",
"H100", "H101", "H102", "H103", "H104", "H105", "H106", "H107",
"H108", "H109", "H11", "H110", "H111", "H112", "H113", "H114",
"H115", "H116", "H117", "H118", "H119", "H12", "H120", "H121",
"H122", "H123", "H124", "H125", "H126", "H127", "H128", "H129",
"H13", "H130", "H131", "H132", "H133", "H134", "H135", "H136",
"H137", "H138", "H139", "H14", "H140", "H141", "H142", "H143",
"H144", "H145", "H146", "H147", "H148", "H149", "H15", "H150",
"H151", "H152", "H153", "H154", "H155", "H156", "H157", "H158",
"H159", "H16", "H160", "H161", "H162", "H163", "H164", "H165",
"H166", "H167", "H168", "H169", "H17", "H170", "H171", "H172",
"H173", "H174", "H175", "H176", "H177", "H178", "H179", "H18",
"H180", "H181", "H182", "H183", "H184", "H185", "H186", "H187",
"H188", "H189", "H19", "H190", "H191", "H192", "H2", "H20", "H21",
"H22", "H23", "H24", "H25", "H26", "H27", "H28", "H29", "H3",
"H30", "H31", "H32", "H33", "H34", "H35", "H36", "H37", "H38",
"H39", "H4", "H40", "H41", "H42", "H43", "H44", "H45", "H46",
"H47", "H48", "H49", "H5", "H50", "H51", "H52", "H53", "H54",
"H55", "H56", "H57", "H58", "H59", "H6", "H60", "H61", "H62",
"H63", "H64", "H65", "H66", "H67", "H68", "H69", "H7", "H70",
"H71", "H72", "H73", "H74", "H75", "H76", "H77", "H78", "H79",
"H8", "H80", "H81", "H82", "H83", "H84", "H85", "H86", "H87",
"H88", "H89", "H9", "H90", "H91", "H92", "H93", "H94", "H95",
"H96", "H97", "H98", "H99"), class = "factor"), Hormone = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("CK", "Control", "GA"), class = "factor"),
Hormone.quantity = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("100",
"20", "500", "Control"), class = "factor"), fusion = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("CK 100", "CK 20", "CK 500",
"Control", "GA 100", "GA 20", "GA 500"), class = "factor"),
DL = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), LI = c(100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L), Temperature = c(21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L), Sowing.date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "25-mrt", class = "factor"), BTD10 = structure(c(6L,
7L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
6L, 4L, 4L, 4L), .Label = c("16-apr", "17-apr", "18-apr",
"19-apr", "21-mei", "23-apr", "26-apr", "30-apr"), class = "factor"),
FFLDT = structure(c(13L, 18L, 4L, 9L, 18L, 3L, 2L, 13L, 8L,
10L, 18L, 10L, 8L, 8L, 8L, 10L, 11L, 11L, 1L, 11L), .Label = c("",
"10-mei", "14-apr", "14-mei", "17-mei", "18-jun", "21-mei",
"23-apr", "24-mei", "26-apr", "28-mei", "3-apr", "3-mei",
"30-apr", "31-mei", "4-jun", "7-jun", "7-mei"), class = "factor"),
FH = structure(c(42L, 62L, 67L, 18L, 59L, 7L, 5L, 52L, 53L,
62L, 65L, 58L, 53L, 42L, 52L, 58L, 24L, 55L, 1L, 54L), .Label = c("",
"10", "10,5", "11", "11,5", "11,7", "12", "12,3", "12,5",
"13", "13,5", "14", "14,3", "14,5", "15", "15,3", "15,5",
"16", "16-jan", "17", "18", "18,5", "19", "20", "20,5", "21",
"21,5", "22", "22,5", "23", "23,5", "24,5", "25", "25,5",
"26", "26,5", "27", "27,5", "29", "29-mei", "3", "3,5", "30",
"30,5", "31,5", "32", "32,5", "33", "35", "36", "37", "4",
"4,5", "40", "42", "43", "47", "5", "5,5", "53", "55", "6",
"6,5", "7", "8", "8,5", "9", "9,5"), class = "factor"), SRDT = structure(c(3L,
8L, 1L, 1L, 8L, 1L, 8L, NA, NA, NA, 4L, NA, 15L, 12L, 14L,
14L, 15L, 15L, 1L, 15L), .Label = c("", "10-mei", "11-jun",
"13-jun", "13-mei", "14-mei", "17-mei", "18-jun", "21-jun",
"21-mei", "24-mei", "28-mei", "3-mei", "31-mei", "4-jun",
"7-jun", "7-mei"), class = "factor"), MH = c(26, 50, NA,
NA, 46, NA, 61, NA, NA, NA, 40, NA, 68, 48, 47, 42, 26, 50,
NA, 48), SEEDT = structure(c(2L, 4L, 1L, 1L, 4L, 1L, 4L,
NA, NA, NA, 4L, NA, 9L, 8L, 8L, 8L, 4L, 3L, 1L, 4L), .Label = c("",
"11-jun", "13-jun", "18-jun", "20-mei", "21-jun", "28-mei",
"31-mei", "4-jun", "6-apr", "7-jun"), class = "factor"),
FERMK = c(7L, 8L, NA, NA, 8L, NA, 8L, NA, NA, NA, 5L, NA,
7L, 6L, 7L, 6L, NA, NA, NA, 4L), PLRMK = c(1L, 2L, NA, NA,
1L, NA, 1L, NA, NA, NA, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, NA,
1L), BT = structure(c(5L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 4L, 4L, 4L), .Label = c("",
"22", "23", "25", "29", "32", "bino"), class = "factor"),
FF = c(39L, 43L, 50L, 60L, 43L, 20L, 46L, 39L, 29L, 32L,
43L, 32L, 29L, 29L, 29L, 32L, 64L, 64L, NA, 64L), FM = c(78L,
85L, NA, NA, 85L, NA, 85L, NA, NA, NA, 80L, NA, 71L, 64L,
67L, 67L, 71L, 71L, NA, 71L), SEED = c(78L, 85L, NA, NA,
85L, NA, 85L, NA, NA, NA, 85L, NA, 71L, 67L, 67L, 67L, 85L,
80L, NA, 85L)), row.names = c(NA, 20L), class = "data.frame")

scale_colour_hue issue getting Error: Discrete value supplied to continuous scale

I'm attempting to plot coefficient estimates of insect Orders using small_multiple which integrates with ggplo however when I use scale_colour_hue to color code each Order I get Error: Continuous value supplied to discrete scale.
Any help would be appreciated.
Data:
m123456_df:
term estimate std.error statistic group by_2sd model Order
insecticidearea -1.87 1.84 -1.01 fixed TRUE
AcariInsecticideFor Acari
insecticidearea 3.02 1.66 1.80 fixed TRUE
AraneaeInsecticideFor Araneae
insecticidearea 28.18 5.76 4.89 fixed TRUE
ColeopteraInsecticideFor Coleoptera
insecticidearea -2.60 3.52 -0.73 fixed TRUE
DipteraInsecticideFor Diptera
insecticidearea -6.97 7.85 -0.88 fixed TRUE
HemipteraInsecticideFor Hemiptera
insecticidearea 5.47 2.96 1.84 fixed TRUE
HomopteraInsecticideFor Homoptera
insecticidearea -3.98 4.13 -0.96 fixed TRUE
HymenopteraInsecticideFor Hymenoptera
insecticidearea -0.07 0.68 -0.11 fixed TRUE
LepidopteraInsecticideFor Lepidoptera
insecticidearea -9.98 3.28 -3.03 fixed TRUE
OdonataInsecticideFor Odonata
insecticidearea -0.60 0.83 -0.72 fixed TRUE
OrthopteraInsecticideFor Orthoptera
insecticidearea -1.97 1.70 -1.15 fixed TRUE
ThysanopInsecticideFor Thysanoptera
To better see the structure of the data,
dput(m123456_df):
"structure(list(X = c(49L, 50L, 51L, 52L, 53L, 169L, 170L, 171L,
172L, 173L, 1L, 2L, 3L, 4L, 109L, 110L, 111L, 112L, 113L, 54L,
55L, 56L, 57L, 58L, 174L, 175L, 176L, 177L, 178L, 5L, 6L, 7L,
8L, 114L, 115L, 116L, 117L, 118L, 59L, 60L, 61L, 62L, 63L, 179L,
180L, 181L, 182L, 183L, 9L, 10L, 11L, 12L, 119L, 120L, 121L,
122L, 123L, 69L, 70L, 71L, 72L, 73L, 189L, 190L, 191L, 192L,
193L, 17L, 18L, 19L, 20L, 129L, 130L, 131L, 132L, 133L, 74L,
75L, 76L, 77L, 78L, 194L, 195L, 196L, 197L, 198L, 21L, 22L, 23L,
24L, 134L, 135L, 136L, 137L, 138L, 79L, 80L, 81L, 82L, 83L, 199L,
200L, 201L, 202L, 203L, 25L, 26L, 27L, 28L, 139L, 140L, 141L,
142L, 143L, 84L, 85L, 86L, 87L, 88L, 204L, 205L, 206L, 207L,
208L, 29L, 30L, 31L, 32L, 144L, 145L, 146L, 147L, 148L, 89L,
90L, 91L, 92L, 93L, 209L, 210L, 211L, 212L, 213L, 33L, 34L, 35L,
36L, 149L, 150L, 151L, 152L, 153L, 94L, 95L, 96L, 97L, 98L, 214L,
215L, 216L, 217L, 218L, 37L, 38L, 39L, 40L, 154L, 155L, 156L,
157L, 158L, 99L, 100L, 101L, 102L, 103L, 219L, 220L, 221L, 222L,
223L, 41L, 42L, 43L, 44L, 159L, 160L, 161L, 162L, 163L, 104L,
105L, 106L, 107L, 108L, 224L, 225L, 226L, 227L, 228L, 45L, 46L,
47L, 48L, 164L, 165L, 166L, 167L, 168L), term = structure(c(1L,
3L, 2L, 6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L,
6L, 7L, 1L, 3L, 2L, 6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L,
1L, 5L, 2L, 6L, 7L, 1L, 3L, 2L, 6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L,
2L, 6L, 7L, 1L, 5L, 2L, 6L, 7L, 1L, 3L, 2L, 6L, 7L, 1L, 4L, 2L,
6L, 7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L, 6L, 7L, 1L, 3L, 2L, 6L, 7L,
1L, 4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L, 6L, 7L, 1L, 3L,
2L, 6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L, 6L,
7L, 1L, 3L, 2L, 6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L, 1L,
5L, 2L, 6L, 7L, 1L, 3L, 2L, 6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L, 2L,
6L, 7L, 1L, 5L, 2L, 6L, 7L, 1L, 3L, 2L, 6L, 7L, 1L, 4L, 2L, 6L,
7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L, 6L, 7L, 1L, 3L, 2L, 6L, 7L, 1L,
4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L, 6L, 7L, 1L, 3L, 2L,
6L, 7L, 1L, 4L, 2L, 6L, 7L, 1L, 2L, 6L, 7L, 1L, 5L, 2L, 6L, 7L
), .Label = c("(Intercept)", "doy", "insecticidearea", "neonicarea",
"pesticidearea", "sd_(Intercept).SiteID.x", "sd_Observation.Residual"
), class = "factor"), estimate = c(5.833565955, -1.872580966,
0.227436188, 9.992583603, 6.852396625, 5.142632969, -0.678674828,
0.254534918, 9.864923466, 6.97100003, 4.477798595, 0.039781365,
9.850994785, 6.987034948, 4.3283009, 0.123013649, 0.022392237,
9.838260007, 6.994754179, 17.88900765, 3.029762821, 10.65695216,
2.226434694, 16.74910855, 23.73870445, -6.329202795, 11.20683527,
3.142802325, 16.38385835, 19.72583775, 10.76829079, 1.846216149,
16.86087792, 20.66463849, -0.908216649, 10.86819106, 2.09135404,
16.82721692, 23.13229672, 28.18575111, 7.732269676, 18.27238254,
56.95696509, 46.09461569, -8.045705619, 10.81339723, 19.1102249,
58.22254846, 40.85473609, 10.14359574, 18.64465552, 58.48404924,
36.83632929, 4.404336962, 9.931425672, 18.16739664, 58.57271159,
39.6406962, -2.605713718, 9.307524378, 15.82145387, 34.13815459,
36.98723437, 1.498641219, 8.947675916, 15.38990904, 34.28639819,
37.99163279, 9.083495066, 15.55045251, 34.24464848, 38.25136135,
-0.276226574, 9.096730123, 15.56422095, 34.24013774, 26.01030204,
-6.971380003, 9.152834977, 0, 76.54684844, 18.81324754, 4.584413346,
8.566929469, 0, 76.59232376, 21.71528903, 8.750358994, 0, 76.62612401,
21.52279232, 0.204300155, 8.743035747, 0, 76.62605563, 23.1671264,
5.474675587, 10.61361745, 7.81542411, 27.03810868, 34.70000476,
-11.75757077, 11.81009181, 9.412199453, 26.11205322, 27.01646276,
11.12642064, 7.617789448, 27.21673834, 30.33676716, -3.179909206,
11.50125637, 8.155250611, 27.04250033, 49.27043746, -3.98632448,
5.038285369, 19.08368393, 36.3099002, 44.65665992, 3.305098051,
4.660102066, 19.15206617, 36.29619993, 46.86770473, 4.875937397,
19.07213112, 36.36348295, 43.97229941, 3.034326999, 4.717080506,
19.16011396, 36.30805912, 5.398084837, -0.079315577, 2.151502251,
0, 5.373302675, 5.618080493, -0.492859394, 2.150411138, 0, 5.367888491,
5.34786557, 2.145072919, 0, 5.373448857, 6.696394307, -1.214487209,
2.183364895, 0, 5.338975281, 7.142053223, -9.981494663, -6.62191384,
6.870891062, 0.081012541, 7.347328261, -0.955827518, 4.837078368,
6.183147924, 0.245901403, 6.703342056, 4.82069418, 6.322801182,
0.729648159, 12.34478985, -4.615010887, 4.343005964, 4.884578148,
0.209518655, 5.332438336, -0.604879396, 0.877454525, 8.290188235,
4.675812692, 5.70857201, -1.066255113, 1.034587249, 8.238516889,
4.671352748, 4.962001457, 0.863341599, 8.327281966, 4.669175273,
5.976663938, -1.049824638, 0.915523839, 8.275547511, 4.677431349,
5.945020749, -1.977904212, -1.231110798, 2.537802029, 10.77392762,
5.579913084, -1.248700513, -1.384542346, 2.633756398, 10.77936693,
4.732988096, -1.412934304, 2.432763407, 10.84148556, 5.62838417,
-1.108552993, -1.46904816, 2.412972399, 10.83202785), std.error = c(2.345020833,
1.840768317, 1.750080908, NA, NA, 2.554842927, 1.701707856, 1.833883917,
NA, NA, 1.937814035, 1.755048913, NA, NA, 3.153724651, 2.050476456,
1.779085063, NA, NA, 1.814657198, 1.663420324, 1.66101237, NA,
NA, 1.837869979, 1.636172422, 1.643826419, NA, NA, 1.517843621,
1.665313905, NA, NA, 2.32233174, 1.679925176, 1.67454475, NA,
NA, 6.545154384, 5.761076413, 5.699062437, NA, NA, 6.737162579,
5.737241184, 5.834839768, NA, NA, 5.593994125, 5.82677481, NA,
NA, 7.906973554, 6.089892019, 5.829353741, NA, NA, 3.99576339,
3.52936854, 3.444993938, NA, NA, 3.983759621, 3.318339418, 3.446513164,
NA, NA, 3.310686622, 3.436527267, NA, NA, 4.845766085, 3.764235629,
3.440400053, NA, NA, 8.628004354, 7.855856109, 7.876962746, NA,
NA, 8.728305686, 7.915196489, 7.874937593, NA, NA, 7.150052148,
7.872039214, NA, NA, 10.28405417, 7.845041369, 7.87705336, NA,
NA, 3.377133084, 2.967940792, 2.915310064, NA, NA, 3.233316178,
2.815957854, 2.853379324, NA, NA, 2.666723896, 2.914377219, NA,
NA, 4.171797935, 3.048878466, 2.928824718, NA, NA, 4.479237651,
4.131956214, 4.007895041, NA, NA, 4.519776015, 3.863296489, 4.011682605,
NA, NA, 3.722679065, 4.008923785, NA, NA, 5.663222384, 4.498583965,
4.011828348, NA, NA, 0.7259094, 0.686957844, 0.690230788, NA,
NA, 0.693440783, 0.691612154, 0.687328516, NA, NA, 0.581187607,
0.687999626, NA, NA, 0.952040753, 0.681680334, 0.683923538, NA,
NA, 3.810333838, 3.28639311, 1.520388256, NA, NA, 3.276429106,
0.336977305, 3.998119731, NA, NA, 3.278016524, 4.193948156, NA,
NA, 3.162247377, 1.072403509, 2.824905614, NA, NA, 1.125687092,
0.833751484, 0.810504208, NA, NA, 1.120076008, 0.722188325, 0.815860562,
NA, NA, 1.00910672, 0.809981199, NA, NA, 1.508858442, 1.172058637,
0.81176584, NA, NA, 1.839933156, 1.709077899, 1.713608126, NA,
NA, 1.909498614, 1.709505119, 1.711082298, NA, NA, 1.51769132,
1.713651686, NA, NA, 2.06553118, 1.737291954, 1.713781907, NA,
NA), statistic = c(2.487639287, -1.017282267, 0.129957528, NA,
NA, 2.012895946, -0.398819824, 0.138795545, NA, NA, 2.310747324,
0.022666813, NA, NA, 1.372440964, 0.059992715, 0.012586378, NA,
NA, 9.85806447, 1.821405437, 6.41593787, NA, NA, 12.91642212,
-3.868298175, 6.817529602, NA, NA, 12.99596182, 6.466222833,
NA, NA, 8.898228507, -0.540629227, 6.49023626, NA, NA, 3.534262962,
4.89244528, 1.356761706, NA, NA, 6.841844047, -1.402364893, 1.853246646,
NA, NA, 7.303321237, 1.740859407, NA, NA, 4.658714113, 0.723220863,
1.703692401, NA, NA, 9.920681566, -0.738294595, 2.701753485,
NA, NA, 9.28450456, 0.451623849, 2.596153123, NA, NA, 11.47545423,
2.643219261, NA, NA, 7.89376967, -0.07338185, 2.644090798, NA,
NA, 3.014637102, -0.887411875, 1.161975151, NA, NA, 2.155429497,
0.579191351, 1.087872681, NA, NA, 3.037081209, 1.11157462, NA,
NA, 2.092831481, 0.026041947, 1.109937352, NA, NA, 6.85999806,
1.844604044, 3.640647895, NA, NA, 10.73201718, -4.175336201,
4.138984155, NA, NA, 10.13095612, 3.817769562, NA, NA, 7.271868781,
-1.042976702, 3.926918637, NA, NA, 10.9997373, -0.964754773,
1.257090148, NA, NA, 9.880281628, 0.8555124, 1.161632792, NA,
NA, 12.58977847, 1.216270914, NA, NA, 7.764536942, 0.674507139,
1.175793204, NA, NA, 7.436306566, -0.115459162, 3.11707662, NA,
NA, 8.101745142, -0.712623963, 3.12865113, NA, NA, 9.201616665,
3.117840238, NA, NA, 7.033726533, -1.781608106, 3.192410809,
NA, NA, 1.874390415, -3.037218716, -4.355409753, NA, NA, 2.242480463,
-2.836474455, 1.209838297, NA, NA, 2.044938458, 1.149440575,
NA, NA, 3.903802699, -4.303427628, 1.53739861, NA, NA, 4.737052041,
-0.725491237, 1.082603294, NA, NA, 5.096593419, -1.476422528,
1.268093222, NA, NA, 4.917221697, 1.065878567, NA, NA, 3.961050137,
-0.895709997, 1.127817647, NA, NA, 3.231106918, -1.157293189,
-0.718431933, NA, NA, 2.922187554, -0.730445612, -0.80916175,
NA, NA, 3.118544617, -0.824516625, NA, NA, 2.72490884, -0.638092515,
-0.857196679, NA, NA), group = structure(c(1L, 1L, 1L, 3L, 2L,
1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L,
1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L,
2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L,
1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L,
3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L,
2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L,
1L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L,
3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L,
1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 1L,
1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L,
2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L,
1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L,
1L, 3L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 3L, 2L), .Label = c("fixed",
"Residual", "SiteID.x"), class = "factor"), by_2sd = c(TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE),
model = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 19L, 19L,
19L, 19L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 24L, 24L, 24L,
24L, 24L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L,
27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 29L,
29L, 29L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L,
32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L, 34L, 34L, 34L,
34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L, 36L, 36L, 36L, 37L,
37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 42L,
42L, 42L, 42L, 42L, 43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L,
44L), .Label = c("AcariInsecticideFor", "AcariNeonicFor",
"AcariNullFor", "AcariPesticideFor", "AraneaeInsecticideFor",
"AraneaeNeonicFor", "AraneaeNullFor", "AraneaePesticideFor",
"ColeopteraInsecticideFor", "ColeopteraNeonicFor", "ColeopteraNullFor",
"ColeopteraPesticideFor", "DipteraInsecticideFor", "DipteraNeonicFor",
"DipteraNullFor", "DipteraPesticideFor", "HemipteraInsecticideFor",
"HemipteraNeonicFor", "HemipteraNullFor", "HemipteraPesticideFor",
"HomopteraInsecticideFor", "HomopteraNeonicFor", "HomopteraNullFor",
"HomopteraPesticideFor", "HymenopteraInsecticideFor",
"HymenopteraNeonicFor",
"HymenopteraNullFor", "HymenopteraPesticideFor",
"LepidopteraInsecticideFor",
"LepidopteraNeonicFor", "LepidopteraNullFor", "LepidopteraPesticideFor",
"OdonataInsecticideFor", "OdonataNeonicFor", "OdonataNullFor",
"OdonataPesticideFor", "OrthopteraInsecticideFor", "OrthopteraNeonicFor",
"OrthopteraNullFor", "OrthopteraPesticideFor",
"ThysanopteraInsecticideFor",
"ThysanopteraNeonicFor", "ThysanopteraNullFor", "ThysanopteraPesticideFor"
), class = "factor"), Order = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L), .Label = c("Acari", "Araneae", "Coleoptera",
"Diptera", "Hemiptera", "Homoptera", "Hymenoptera", "Lepidoptera",
"Odonata", "Orthoptera", "Thysanoptera"), class = "factor")), row.names =
c(NA,
-209L), class = "data.frame")
Plot code:
#required packages
library(dotwhisker)
library(broom)
library(lme4)
m123456_df<-read.csv("C:/Users/breiley/Desktop/m123456_df.csv")
m123456_df$Order=as.factor(m123456_df$Order)
# Relabel predictors (they will appear as facet labels)
m123456_df <- m123456_df %>%
relabel_predictors(c("(Intercept)" = "Intercept",
neonicarea = "Neonictinoid",
insecticidearea= "All insecticide",
pesticidearea = "All pesticide" ))
m123456_df$Order=as.factor(m123456_df$Order)
# Generate a 'small multiple' plot
small_multiple(m123456_df) +
theme_bw() + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+ylab("Coefficient estimate") +
geom_hline(yintercept = 0, colour = "grey60", linetype = 2) +
scale_colour_hue(name = "Order",
breaks = c(0,1,2,3,4,5,6,7,8,9,10),
labels = c("Acari",
"Araneae","Coleoptera","Diptera","Hemiptera","Homoptera",
"Hymenoptera","Lepidoptera","Odonata","Orthoptera","Thysanoptera"))+
theme(legend.position = c(0.02, 0.008),
legend.justification=c(0, 0),legend.title = element_text(size=8),
legend.background = element_rect(color="gray90"),
legend.spacing = unit(-4, "pt"),
legend.key.size = unit(10, "pt"))
ggtitle("Arthropod temporal trends") +
theme(plot.title = element_text(face =
"bold"))+scale_colour_discrete(na.translate = F)
ggsave("C:/Users/breiley/Desktop/ForestPesticide.png",width=10,
height=10,dpi=300)

Read CSV file up to line with unique marker

I have many data sets that have extra information beyond a certain line. The files are all csv. I would be able to loop through them and read.csv with "skip" argument to clean the top of the data, but the length of the data frames are all different. The only commonality is the "--------------- ---------------- ------ -----" line in the Total column that separates the meaningful data from summaries and extraneous info below it.
Here's how I'm reading in the data without skip = 14 (which is standard across everything).
before<-read.csv("Example.csv", header = FALSE,
col.names = c("CountryID","Name","Type","Symbol","Code","Unit",
"Total", "Measurement", "Value", "Percent", "CO2" ))
However, the ----- marker maybe a different row, but it's the first thing to hit. Here's the data before:
structure(list(CountryID = structure(c(26L, 19L, 21L, 23L, 21L,
7L, 1L, 1L, 1L, 22L, 3L, 1L, 19L, 2L, 8L, 14L, 15L, 13L, 9L,
12L, 18L, 17L, 8L, 13L, 15L, 10L, 8L, 8L, 11L, 16L, 1L, 1L, 1L,
20L, 4L, 6L, 1L, 25L, 5L, 1L, 1L, 1L, 24L, 1L), .Label = c("",
"------------", "-------------", "---------------", "------------------",
" ", "08.15.1997", "10000", "15000", "200", "2000", "2500", "3000",
"45000", "5000", "7000", "8000", "8300", "Country", "Output",
"Production", "Quantity", "Serial Output", "TOTAL SUM", "Unaccounted",
"United Nations Data"), class = "factor"), Name = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 20L, 2L, 1L, 1L, 1L, 21L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 19L, 1L, 1L, 1L, 1L), .Label = c("",
"--------------------", " ", "Bahrain", "Bangladesh", "Barbados",
"Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia",
"Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria",
"Burkina Faso", "Chad", "Name", "The Bahamas"), class = "factor"),
Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 4L,
2L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("", "----", " ", "Code", "Type",
"Unit"), class = "factor"), Symbol = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 20L, 22L, 2L, 1L, 1L, 1L, 4L, 5L,
6L, 7L, 9L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 21L, 1L, 1L, 1L,
1L), .Label = c("", "------------", " ", "BAHM", "BAHR",
"BANG", "BARB", "BELGM", "BELS", "BELZ", "BEN", "BHUT", "BOL",
"BOSHER", "BOTS", "BRAZ", "BRUN", "BULG", "BURKF", "Country",
"private", "Symbol"), class = "factor"), Code = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 19L, 2L, 1L, 1L, 1L, 12L,
15L, 11L, 17L, 4L, 13L, 14L, 9L, 18L, 10L, 5L, 16L, 3L, 7L,
8L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "------------", "1504944270", "2287368539",
"2388991307", "2453202442", "2561470743", "3205402223", "3221488867",
"3230369605", "3247578406", "3712013344", "4307638090", "462793263",
"4835205752", "4854959101", "5842098895", "5932776587", "Code"
), class = "factor"), Unit = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 17L, 16L, 2L, 1L, 1L, 1L, 7L, 9L, 10L, 14L,
12L, 15L, 15L, 11L, 13L, 3L, 8L, 13L, 15L, 6L, 5L, 9L, 1L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"-------------", "100", "1109", "27", "35", "40", "45", "58",
"70", "74", "77", "79", "82", "95", "Output", "Per Unit"), class = "factor"),
Total = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 25L,
24L, 2L, 1L, 1L, 1L, 18L, 5L, 17L, 8L, 23L, 20L, 6L, 9L,
7L, 11L, 12L, 13L, 19L, 15L, 14L, 10L, 3L, 16L, 1L, 1L, 1L,
16L, 1L, 1L, 1L, 21L, 1L, 3L, 22L, 4L), .Label = c("", "---------------",
"--------------- ---------------- ------ -----",
"=============== ================ ====== =====",
"126912", "147431", "170553", "175973", "203728", "230761",
"293789", "304471", "376281", "386526", "399160", "4417002",
"476025", "478030", "502999", "51012", "5610654", "56406056",
"93351", "Output", "Total"), class = "factor"), Measurement = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 2L, 1L, 1L, 1L, 3L,
9L, 3L, 4L, 10L, 9L, 6L, 4L, 5L, 10L, 7L, 9L, 4L, 8L, 10L,
9L, 1L, 1L, 1L, 1L, 1L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "--------", "20", "23", "24", "26", "27",
"28", "29", "30", "420", "Measurement"), class = "factor"),
Value = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 22L,
23L, 2L, 1L, 1L, 1L, 5L, 19L, 11L, 8L, 3L, 18L, 13L, 6L,
4L, 9L, 14L, 17L, 7L, 10L, 12L, 15L, 1L, 16L, 1L, 1L, 1L,
16L, 1L, 1L, 1L, 20L, 1L, 1L, 21L, 1L), .Label = c("", "----------------",
"15150240", "15891735", "16083459", "16959919", "20350968",
"20909501", "21770264", "25121096", "27726279", "30024743",
"34069742", "34841369", "38498281", "468004111", "49524999",
"50512814", "50568702", "540650", "64506", "Country", "Value"
), class = "factor"), Percent = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 23L, 2L, 1L, 1L, 1L, 11L, 12L, 8L, 3L,
17L, 16L, 5L, 10L, 20L, 9L, 6L, 7L, 4L, 15L, 14L, 22L, 1L,
13L, 1L, 1L, 1L, 21L, 1L, 1L, 1L, 19L, 1L, 1L, 18L, 1L), .Label = c("",
"------", "102", "104", "106", "112", "126", "129", "142",
"15", "160", "177", "1775", "180", "191", "24", "25", "5640645",
"650163", "87", "887.5", "95", "Production Percent"), class = "factor"),
CO2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 14L,
2L, 1L, 1L, 1L, 9L, 4L, 9L, 7L, 4L, 5L, 4L, 7L, 4L, 9L, 4L,
11L, 4L, 12L, 10L, 4L, 1L, 6L, 1L, 1L, 1L, 8L, 1L, 1L, 1L,
3L, 1L, 1L, 13L, 1L), .Label = c("", "-----", "?", "0", "0.2",
"0.6", "1", "19.4", "2", "2.2", "4", "5", "564065", "CO2",
"Cur."), class = "factor")), class = "data.frame", row.names = c(NA,
-44L))
And here's how I'm hoping it could look:
structure(list(CountryID = c(10000L, 45000L, 5000L, 3000L, 15000L,
2500L, 8300L, 8000L, 10000L, 3000L, 5000L, 200L, 10000L, 10000L,
2000L, 7000L), Name = structure(c(16L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L), .Label = c("Bahrain",
"Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin",
"Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
"Brunei", "Bulgaria", "Burkina Faso", "The Bahamas"), class = "factor"),
Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Unit", class = "factor"),
Symbol = structure(c(1L, 2L, 3L, 4L, 6L, 5L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L), .Label = c("BAHM", "BAHR",
"BANG", "BARB", "BELGM", "BELS", "BELZ", "BEN", "BHUT", "BOL",
"BOSHER", "BOTS", "BRAZ", "BRUN", "BULG", "BURKF"), class = "factor"),
Code = c(3712013344, 4835205752, 3247578406, 5842098895,
2287368539, 4307638090, 462793263, 3221488867, 5932776587,
3230369605, 2388991307, 4854959101, 1504944270, 2561470743,
3205402223, 2453202442), Unit = c(40L, 58L, 70L, 82L, 77L,
95L, 95L, 74L, 79L, 100L, 45L, 79L, 95L, 35L, 27L, 58L),
Total = c(478030L, 126912L, 476025L, 175973L, 93351L, 51012L,
147431L, 203728L, 170553L, 293789L, 304471L, 376281L, 502999L,
399160L, 386526L, 230761L), Measurement = c(20L, 29L, 20L,
23L, 30L, 29L, 26L, 23L, 24L, 30L, 27L, 29L, 23L, 28L, 30L,
29L), Value = c(16083459L, 50568702L, 27726279L, 20909501L,
15150240L, 50512814L, 34069742L, 16959919L, 15891735L, 21770264L,
34841369L, 49524999L, 20350968L, 25121096L, 30024743L, 38498281L
), Percent = c(160L, 177L, 129L, 102L, 25L, 24L, 106L, 15L,
87L, 142L, 112L, 126L, 104L, 191L, 180L, 95L), CO2 = c(2,
0, 2, 1, 0, 0.2, 0, 1, 0, 2, 0, 4, 0, 5, 2.2, 0)), class = "data.frame", row.names = c(NA,
-16L))
Can this be integrated into the read.csv argument, or is it easier to clean the bottom of it some other way.
Three thoughts:
Use readLines (as #user2554330 suggested), find/remove the specific row, filter it, then parse the text vector with read.csv, the least of the three.
before[seq_len(min(head(which(!grepl("^[^- ]+$", before$Total)),1)-1L,nrow(before))),]; a bit complicated, granted, but it does what you need (assuming that you've already filtered the first 14 rows with skip=.
Use an external script such as sed -e '1,14d;/^[ -]\+$/{g;q;} in a pipe(...)-type thing.
Read it twice. The first time, use readLines("Example.csv"), and look through the lines for the marker of the end of data. Say it's on line n. Then in the second read, use
read.csv("Example.csv", header = FALSE,
col.names = c("CountryID","Name","Type","Symbol","Code","Unit",
"Total", "Measurement", "Value", "Percent", "CO2" ), nrows = n - 1)
(or maybe nrows will need to be a different value, if you're skipping some).

Weekday Factor displaying alphabetically

I'm trying to create boxplots using the code below. I've also included sample data. It's time-series data, I have a date-time field, and I've created a factor for Weekday. The problem I have is that my Weekdays when displayed in the boxplot are out of order. I think it's doing it alphabetically instead of chronologically. Does anyone know how to fix this? Also I'd like to rotate the x-axis labels like 45 degrees, so they'll all fit, and have them all displayed.
Code:
boxplot(OrderCnt ~ Weekday, data=icartdf_factor, main="Orders vs Weekday", xlab="Weekday", ylab="Orders")
Sample Data:
dput(droplevels(icartdf_factor[1:50,]))
structure(list(OrderCnt = c(1L, 1L, 0L, 0L, 0L, 2L, 5L, 12L,
16L, 30L, 27L, 21L, 23L, 27L, 37L, 36L, 35L, 30L, 27L, 17L, 8L,
2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 6L, 9L, 19L, 28L, 27L, 28L,
22L, 29L, 41L, 35L, 43L, 42L, 27L, 16L, 10L, 2L, 1L, 0L, 0L,
0L), DateTime = structure(c(1398931200, 1398934800, 1398938400,
1398942000, 1398945600, 1398949200, 1398952800, 1398956400, 1398960000,
1398963600, 1398967200, 1398970800, 1398974400, 1398978000, 1398981600,
1398985200, 1398988800, 1398992400, 1398996000, 1398999600, 1399003200,
1399006800, 1399010400, 1399014000, 1399017600, 1399021200, 1399024800,
1399028400, 1399032000, 1399035600, 1399039200, 1399042800, 1399046400,
1399050000, 1399053600, 1399057200, 1399060800, 1399064400, 1399068000,
1399071600, 1399075200, 1399078800, 1399082400, 1399086000, 1399089600,
1399093200, 1399096800, 1399100400, 1399104000, 1399107600), class = c("POSIXct",
"POSIXt")), Weekday = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Friday",
"Saturday", "Thursday"), class = "factor"), hourcol = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L), .Label = c("00", "01",
"02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"
), class = "factor")), .Names = c("OrderCnt", "DateTime", "Weekday",
"hourcol"), row.names = c(NA, 50L), class = "data.frame")
You just need to specify the desired order within the factor itself.
icartdf_factor$Weekday = factor(icartdf_factor$Weekday,
levels = c("Thursday", "Friday", "Saturday"))
boxplot(OrderCnt ~ Weekday, data=icartdf_factor,
main="Orders vs Weekday", xlab="Weekday", ylab="Orders")
To change the angle of the labels, take a look at How can I change the angle of the value labels on my axes?

R codes for hypothesis test in Rmarkdown

I am really struggling on the coding part for the R markdown but have no one to ask...
The data I am working on is, dput(survey):
structure(list(Time = structure(c(5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 1L, 2L, 3L, 4L, 35L, 42L, 46L, 30L, 31L,
33L, 34L, 29L, 36L, 37L, 38L, 39L, 40L, 41L, 43L, 44L, 45L, 47L,
48L, 32L, 54L, 55L, 50L, 49L, 51L, 52L, 53L, 57L, 59L, 56L, 60L,
61L, 58L, 62L), .Label = c("2017/08/06 10:25:01 PM GMT+10", "2017/08/06 10:26:54 PM GMT+10",
"2017/08/06 10:38:13 PM GMT+10", "2017/08/06 10:51:58 PM GMT+10",
"2017/08/06 4:53:07 PM GMT+10", "2017/08/06 4:58:44 PM GMT+10",
"2017/08/06 5:01:05 PM GMT+10", "2017/08/06 5:03:25 PM GMT+10",
"2017/08/06 5:04:50 PM GMT+10", "2017/08/06 5:06:51 PM GMT+10",
"2017/08/06 5:06:54 PM GMT+10", "2017/08/06 5:10:57 PM GMT+10",
"2017/08/06 5:11:16 PM GMT+10", "2017/08/06 5:18:21 PM GMT+10",
"2017/08/06 5:23:46 PM GMT+10", "2017/08/06 5:34:02 PM GMT+10",
"2017/08/06 5:43:10 PM GMT+10", "2017/08/06 5:54:52 PM GMT+10",
"2017/08/06 6:04:06 PM GMT+10", "2017/08/06 7:11:00 PM GMT+10",
"2017/08/06 7:13:21 PM GMT+10", "2017/08/06 7:32:45 PM GMT+10",
"2017/08/06 7:33:58 PM GMT+10", "2017/08/06 7:50:31 PM GMT+10",
"2017/08/06 8:02:07 PM GMT+10", "2017/08/06 8:28:39 PM GMT+10",
"2017/08/06 8:36:46 PM GMT+10", "2017/08/06 9:14:14 PM GMT+10",
"2017/08/07 1:59:14 PM GMT+10", "2017/08/07 10:28:13 AM GMT+10",
"2017/08/07 11:05:40 AM GMT+10", "2017/08/07 11:44:09 PM GMT+10",
"2017/08/07 12:18:04 PM GMT+10", "2017/08/07 12:49:27 PM GMT+10",
"2017/08/07 12:55:41 AM GMT+10", "2017/08/07 2:04:49 PM GMT+10",
"2017/08/07 2:14:56 PM GMT+10", "2017/08/07 2:17:10 PM GMT+10",
"2017/08/07 4:47:38 PM GMT+10", "2017/08/07 4:57:15 PM GMT+10",
"2017/08/07 7:08:44 PM GMT+10", "2017/08/07 9:12:16 AM GMT+10",
"2017/08/07 9:18:11 PM GMT+10", "2017/08/07 9:22:59 PM GMT+10",
"2017/08/07 9:23:43 PM GMT+10", "2017/08/07 9:32:10 AM GMT+10",
"2017/08/07 9:46:41 PM GMT+10", "2017/08/07 9:55:01 PM GMT+10",
"2017/08/08 1:36:16 PM GMT+10", "2017/08/08 10:27:59 AM GMT+10",
"2017/08/08 3:36:15 PM GMT+10", "2017/08/08 4:15:12 PM GMT+10",
"2017/08/08 6:39:28 PM GMT+10", "2017/08/08 8:44:38 AM GMT+10",
"2017/08/08 9:03:07 AM GMT+10", "2017/08/09 1:00:16 PM GMT+10",
"2017/08/09 10:17:55 AM GMT+10", "2017/08/09 10:26:28 PM GMT+10",
"2017/08/09 11:50:50 AM GMT+10", "2017/08/09 3:02:39 PM GMT+10",
"2017/08/09 9:48:19 PM GMT+10", "2017/08/10 7:32:00 AM GMT+10"
), class = "factor"), ID = structure(c(48L, 57L, 38L, 9L, 8L,
42L, 41L, 58L, 31L, 27L, 60L, 34L, 13L, 37L, 40L, 29L, 53L, 28L,
16L, 20L, 47L, 18L, 51L, 3L, 36L, 10L, 32L, 11L, 54L, 22L, 61L,
15L, 35L, 2L, 25L, 55L, 17L, 5L, 14L, 21L, 49L, 45L, 6L, 30L,
26L, 4L, 19L, 50L, 44L, 56L, 43L, 59L, 24L, 12L, 52L, 23L, 1L,
39L, 7L, 62L, 46L, 33L), .Label = c("1907", "3456", "450181964",
"460061490", "A", "ABCABCABC", "adsad", "affordance", "alexxx",
"AliceJ", "blueberry11", "Bob", "byue7515", "Cameron Nichols",
"Coelacanth", "crocophile", "Donald trump ", "DS2012-LB-S", "Gir",
"goly", "Grace", "greyshirt", "grob6576", "hahahahaha", "Harry",
"Insidestella", "ja150", "jane", "Jiashu Wu", "jmc", "Joohee0214",
"kakinna", "Kimbo Slice", "lhar7524", "lizebin", "Lucy", "Magician1213",
"Matchey", "md123", "mia", "MP", "N52981227", "Nattt", "Pete",
"rcon", "Ryan_eats_p-values", "S123", "Salmon ", "smarcon", "smile",
"snail", "sonja kay", "Thelimitdoesnotexist", "Toflin", "Tony Stark ",
"UriLover420", "valerie", "Whatzup", "Winky", "xwn19960829",
"zilu2637", "ZXFAARON"), class = "factor"), Gender = structure(c(3L,
2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L,
2L, 1L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("crocodilian",
"Female", "Male", "Poisson"), class = "factor"), Postcode =structure(c(12L,
30L, 20L, 35L, 28L, 33L, 13L, 22L, 12L, 2L, 3L, 38L, 25L, 13L,
4L, 23L, 19L, 23L, 29L, 32L, 26L, 4L, 14L, 4L, 36L, 12L, 3L,
41L, 28L, 40L, 24L, 9L, 37L, 4L, 3L, 17L, 32L, 27L, 15L, 36L,
12L, 11L, 3L, 7L, 4L, 10L, 39L, 24L, 42L, 8L, 12L, 13L, 5L, 6L,
31L, 20L, 1L, 34L, 18L, 13L, 21L, 16L), .Label = c("14052", "2000",
"2007", "2008", "2020", "2021", "2022", "2026", "2031", "2037",
"2041", "2042", "2050", "2066", "2069", "2074", "2097", "2112",
"2117", "2131", "2134", "2136", "2137", "2138", "2140", "2144",
"2154", "2165", "2166", "2171", "2193", "2200", "2205", "2209",
"2216", "2220", "2228", "2756", "2762", "2765", "2780", "sydney"
), class = "factor"), StatsCourse = structure(c(4L, 4L, 4L, 4L,
4L, 4L, 1L, 4L, 4L, 4L, 3L, 4L, 4L, 5L, 4L, 4L, 5L, 6L, 4L, 4L,
4L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 2L, 5L, 4L, 5L, 6L, 4L, 2L,
2L, 4L, 4L, 5L, 7L, 5L, 1L, 4L, 4L, 4L), .Label = c("", "BUSS1020",
"MATH1001,MATH1002", "MATH1005", "MATH1015", "MATH1905", "none"
), class = "factor"), Clubs = structure(c(1L, 1L, 4L, 5L, 4L,
2L, 4L, 4L, 2L, 4L, 7L, 2L, 4L, 4L, 1L, 4L, 1L, 4L, 1L, 1L, 6L,
1L, 4L, 1L, 11L, 4L, 5L, 10L, 3L, 5L, 2L, 4L, 1L, 1L, 2L, 1L,
4L, 4L, 4L, 6L, 2L, 2L, 4L, 4L, 9L, 4L, 1L, 8L, 2L, 4L, 2L, 6L,
4L, 4L, 11L, 5L, 1L, 1L, 1L, 4L, 4L, 1L), .Label = c("0", "1",
"10+", "2", "3", "4", "5", "6", "7", "none", "None"), class = "factor"),
StudyTime = structure(c(24L, 3L, 26L, 27L, 17L, 2L, 10L,
14L, 23L, 7L, 19L, 3L, 17L, 29L, 23L, 22L, 10L, 10L, 28L,
23L, 6L, 14L, 20L, 7L, 17L, 28L, 5L, 16L, 20L, 3L, 21L, 3L,
23L, 7L, 17L, 10L, 1L, 18L, 10L, 17L, 10L, 7L, 13L, 5L, 15L,
3L, 8L, 17L, 19L, 17L, 3L, 30L, 31L, 1L, 4L, 3L, 20L, 9L,
14L, 11L, 12L, 25L), .Label = c("0", "05-Jun", "10", "11",
"12", "14", "15", "17", "2", "20", "20-24", "20-25?", "24",
"25", "28", "28 hours", "30", "31", "35", "4", "40", "49",
"5", "50", "6", "7", "70", "8", "8hr", "didn't start uni maybe 6h",
"not sure"), class = "factor"), StudyLoad = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("", "full-time", "part-time"), class = "factor"),
SocialMedia = structure(c(1L, 5L, 1L, 1L, 1L, 7L, 1L, 1L,
7L, 7L, 2L, 1L, 2L, 1L, 1L, 8L, 6L, 2L, 1L, 7L, 1L, 4L, 1L,
8L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 7L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 5L, 5L, 1L, 1L, 2L, 2L,
1L, 3L, 1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("Facebook",
"Instragram", "none! (really)", "reddit", "Snapchat", "Tumblr",
"Twitter", "WeChat"), class = "factor"), Siblings = structure(c(2L,
4L, 4L, 1L, 4L, 1L, 2L, 4L, 5L, 2L, 1L, 2L, 2L, 1L, 4L, 1L,
1L, 4L, 2L, 2L, 8L, 2L, 2L, 3L, 1L, 1L, 2L, 5L, 2L, 7L, 1L,
4L, 2L, 6L, 1L, 6L, 2L, 5L, 1L, 1L, 4L, 4L, 2L, 2L, 1L, 2L,
1L, 1L, 4L, 4L, 2L, 9L, 1L, 2L, 10L, 2L, 4L, 2L, 2L, 1L,
2L, 2L), .Label = c("0", "1", "165", "2", "3", "4", "5",
"6", "none", "one"), class = "factor"), FBFriends = structure(c(49L,
43L, 6L, 3L, 28L, 2L, 9L, 13L, 21L, 19L, 30L, 40L, 37L, 20L,
35L, 32L, 53L, 47L, 30L, 22L, 8L, 45L, 14L, 15L, 38L, 16L,
45L, 31L, 35L, 43L, 34L, 23L, 52L, 18L, 34L, 27L, 33L, 11L,
42L, 24L, 51L, 26L, 17L, 50L, 39L, 19L, 10L, 12L, 4L, 44L,
46L, 29L, 45L, 36L, 54L, 20L, 7L, 5L, 41L, 25L, 1L, 48L), .Label = c("~300",
"10", "100", "1000", "1127", "115", "1192", "12", "120",
"121", "130", "148", "150", "1583", "165", "170", "174",
"190", "200", "213", "228", "229", "235", "240", "242", "256",
"259", "263", "27", "300", "308", "31", "382", "40", "400",
"431", "470", "5", "540", "548", "57", "572", "600", "664",
"700", "724", "800", "850", "90", "936", "978", "do not know",
"Don't have FB", "none (not in facebook)"), class = "factor"),
Grade = structure(c(18L, 19L, 11L, 31L, 33L, 14L, 22L, 18L,
6L, 9L, 19L, 18L, 22L, 23L, 24L, 30L, 28L, 16L, 2L, 14L,
3L, 12L, 21L, 2L, 12L, 12L, 6L, 29L, 12L, 27L, 17L, 6L, 12L,
17L, 17L, 15L, 24L, 20L, 7L, 14L, 12L, 10L, 22L, 34L, 24L,
17L, 16L, 12L, 24L, 32L, 26L, 25L, 26L, 13L, 4L, 12L, 1L,
5L, 12L, 8L, 24L, 35L), .Label = c("2.8", "50", "50-60",
"54", "6.25", "60", "61", "61.5", "62", "63", "64", "65",
"65.9", "66", "68", "69", "70", "72", "73", "73.2", "73.4",
"74", "74.6", "75", "8.7", "80", "82", "82.4", "83.2", "87",
"90", "90.1", "90.5", "91", "D"), class = "factor"), Pet = structure(c(3L,
2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 3L,
2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L,
3L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L,
3L), .Label = c("", "No", "Yes"), class = "factor"), Home = structure(c(2L,
3L, 3L, 1L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 3L,
3L), .Label = c("", "No", "Yes"), class = "factor"), ExerciseTime = structure(c(10L,
12L, 7L, 1L, 4L, 7L, 7L, 5L, 7L, 12L, 13L, 5L, 10L, 7L, 15L,
15L, 10L, 10L, 5L, 14L, 2L, 9L, 4L, 5L, 7L, 4L, 14L, 8L,
10L, 13L, 1L, 13L, 1L, 13L, 13L, 5L, 7L, 16L, 16L, 14L, 10L,
14L, 7L, 6L, 12L, 10L, 10L, 13L, 13L, 14L, 7L, 11L, 2L, 2L,
17L, 16L, 7L, 7L, 2L, 3L, 13L, 15L), .Label = c("", "0",
"05-Jun", "1", "10", "12", "2", "2 hours", "20", "3", "3.5",
"4", "5", "6", "7", "8", "none"), class = "factor"), Eyecolor = structure(c(9L,
7L, 5L, 1L, 8L, 2L, 8L, 3L, 3L, 8L, 3L, 7L, 7L, 7L, 7L, 7L,
3L, 4L, 7L, 3L, 11L, 8L, 11L, 2L, 8L, 2L, 2L, 2L, 8L, 7L,
1L, 7L, 2L, 7L, 3L, 4L, 10L, 7L, 8L, 7L, 7L, 6L, 7L, 3L,
8L, 2L, 8L, 7L, 4L, 8L, 9L, 3L, 7L, 5L, 7L, 8L, 12L, 7L,
7L, 8L, 3L, 8L), .Label = c("", "black", "Black", "blue",
"Blue", "Blue/Green", "brown", "Brown", "Brown ", "Brown/black",
"dark brown", "grey"), class = "factor"), Working = structure(c(2L,
8L, 2L, 1L, 4L, 2L, 2L, 8L, 2L, 24L, 2L, 13L, 5L, 3L, 26L,
2L, 8L, 13L, 24L, 2L, 12L, 2L, 9L, 8L, 2L, 2L, 2L, 11L, 2L,
10L, 1L, 4L, 21L, 2L, 2L, 15L, 14L, 21L, 26L, 18L, 4L, 2L,
7L, 27L, 12L, 2L, 20L, 2L, 19L, 25L, 8L, 2L, 2L, 17L, 23L,
16L, 2L, 6L, 2L, 13L, 13L, 22L), .Label = c("", "0", "1.5",
"10", "11", "12", "14", "15", "17", "18", "18 hours", "2",
"20", "24", "25", "26", "3", "3.5", "30", "38", "4", "40",
"44", "5", "6", "7", "8"), class = "factor"), Season = structure(c(2L,
3L, 2L, 1L, 5L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 3L, 4L, 3L,
3L, 3L, 3L, 5L, 3L, 3L, 2L, 5L, 5L, 4L, 2L, 2L, 5L, 2L, 3L,
2L, 2L, 3L, 2L, 4L, 2L, 3L, 5L, 3L, 4L, 5L, 3L, 4L, 4L, 4L,
3L, 4L, 4L, 4L, 3L, 2L, 2L, 2L, 3L, 4L, 4L, 3L, 2L, 4L, 4L,
3L), .Label = c("", "Autumn", "Spring", "Summer", "Winter"
), class = "factor")), .Names = c("Time", "ID", "Gender",
"Postcode", "StatsCourse", "Clubs", "StudyTime", "StudyLoad",
"SocialMedia", "Siblings", "FBFriends", "Grade", "Pet", "Home",
"ExerciseTime", "Eyecolor", "Working", "Season"), class = "data.frame", row.names = c(NA,
-62L))
And what I did so far is,
library(dplyr)
library(ggplot2)
library(tidyr)
library(knitr)
survey <- read.csv("STAT2012Survey.csv")
colnames(survey)
oldname = colnames(survey)
newname = c("Time", "ID", "Gender", "Postcode", "StatsCourse", "Clubs", "StudyTime",
"StudyLoad", "SocialMedia", "Siblings", "FBFriends", "Grade", "Pet", "Home",
"ExerciseTime", "Eyecolor", "Working", "Season")
colnames(survey) = newname
What I want to achieve is, I want to provide a hypothesis test about
"Is there any evidence that there is difference in exercise time between males and females?"
To do this, I need to get the mean and standard deviation sort of that stuffs in order to test the two-sample t-test but I do not know how to approach to it
Also, to visualize the data with graph, I tried,
ggplot(survey, aes(x = Gender, y = ExerciseTime, fill = Gender)) + geom_boxplot()
however it only showed some strange graph. I think it is because the "ExerciesTime" variable is not numeric, but I am stuck on it as well since ggplot2 does not deal with the data of class numeric...
Someone please help me...! I want to make more hypothesis tests towards multiple questions but I am stuck on the first question... I might be able to achieve the goal if I know how to do the first one! Thanks.
Before you can make a boxplot, you will need to make ExerciseTime a numeric variable. The problem you will have with that is some of the responses don't easily turn numeric (2 hours, for example, should probably be 2, but it will require an extra step to get rid of the text).
As a start, though, let's just do the easiest case of take anything that isn't a natural number and let it change to a missing value.
survey2 <-
survey %>%
mutate(ExerciseTime = as.character(ExerciseTime),
ExerciseTime = str_replace(ExerciseTime, "\\d{2}-\\w{3}", ""),
ExerciseTime = str_extract(ExerciseTime, "\\d{1,2}"),
ExerciseTime = as.numeric(ExerciseTime))
ggplot(data = survey,
mapping = aes(x = Gender,
y = ExerciseTime,
fill = Gender)) +
geom_boxplot()

Resources