Ggplot showing random spikes in data - r

When I try and plot my data from the dataframe to the ggplot, the graph has random spikes along the line and I don't know why. I have tried messing around with the ggplot but nothing works so am wondering if my dataframe is not configured correctly.
The data frame has 4 groups of animals with the size of population recorded over 104 weeks:
head(mydata)
time group size
1 0 rabbits 30
2 1 rabbits 31.38
3 2 rabbits 32.82561384
4 3 rabbits 34.3400006020094
5 4 rabbits 35.926471747402
6 5 rabbits 37.5884983951345
This is what the ggplot creates when run
ggplot(mydata, aes(x = time, y = size, group= group), main="lotka volterra model plot") +
geom_line(aes(col = group)) +
theme(axis.text = element_blank())
mydata<-structure(list(time = structure(c(1L, 2L, 18L, 29L, 40L, 51L,
62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 43L, 44L, 45L,
46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L,
75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 85L, 86L, 87L, 88L,
89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L, 99L, 100L, 101L,
102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 18L, 29L,
40L, 51L, 62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L,
72L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 85L, 86L,
87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 18L,
29L, 40L, 51L, 62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L,
57L, 58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L,
71L, 72L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 85L,
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L, 99L,
100L, 101L, 102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L, 1L, 2L,
18L, 29L, 40L, 51L, 62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L,
99L, 100L, 101L, 102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L), .Label = c("0",
"1", "10", "100", "101", "102", "103", "104", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "2", "20", "21", "22", "23",
"24", "25", "26", "27", "28", "29", "3", "30", "31", "32", "33",
"34", "35", "36", "37", "38", "39", "4", "40", "41", "42", "43",
"44", "45", "46", "47", "48", "49", "5", "50", "51", "52", "53",
"54", "55", "56", "57", "58", "59", "6", "60", "61", "62", "63",
"64", "65", "66", "67", "68", "69", "7", "70", "71", "72", "73",
"74", "75", "76", "77", "78", "79", "8", "80", "81", "82", "83",
"84", "85", "86", "87", "88", "89", "9", "90", "91", "92", "93",
"94", "95", "96", "97", "98", "99"), class = "factor"), group = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("foxes", "rabbits", "sto foxes", "sto rabbits"
), class = "factor"), size = structure(c(172L, 180L, 188L, 196L,
204L, 216L, 224L, 231L, 237L, 242L, 246L, 253L, 259L, 265L, 271L,
275L, 281L, 285L, 291L, 297L, 303L, 309L, 313L, 320L, 324L, 330L,
337L, 5L, 10L, 17L, 21L, 27L, 33L, 40L, 48L, 54L, 62L, 68L, 77L,
86L, 92L, 98L, 100L, 103L, 106L, 111L, 114L, 144L, 154L, 162L,
170L, 184L, 191L, 199L, 208L, 218L, 226L, 232L, 238L, 243L, 248L,
254L, 260L, 266L, 272L, 276L, 282L, 288L, 292L, 298L, 304L, 310L,
315L, 321L, 325L, 328L, 335L, 2L, 6L, 14L, 19L, 24L, 30L, 34L,
39L, 44L, 49L, 53L, 57L, 64L, 66L, 71L, 74L, 79L, 82L, 83L, 81L,
78L, 72L, 65L, 59L, 50L, 42L, 28L, 15L, 227L, 223L, 221L, 220L,
215L, 211L, 210L, 203L, 202L, 197L, 194L, 192L, 189L, 186L, 183L,
181L, 179L, 174L, 173L, 168L, 167L, 165L, 161L, 159L, 157L, 153L,
151L, 149L, 143L, 141L, 139L, 138L, 134L, 132L, 130L, 129L, 127L,
125L, 123L, 121L, 119L, 117L, 116L, 118L, 120L, 122L, 124L, 126L,
128L, 131L, 133L, 137L, 140L, 142L, 150L, 152L, 158L, 160L, 166L,
169L, 175L, 182L, 187L, 193L, 198L, 205L, 214L, 222L, 228L, 235L,
241L, 247L, 256L, 263L, 270L, 278L, 284L, 294L, 305L, 317L, 323L,
334L, 9L, 22L, 37L, 51L, 69L, 88L, 102L, 110L, 147L, 177L, 207L,
229L, 245L, 264L, 280L, 299L, 318L, 331L, 11L, 26L, 43L, 58L,
75L, 172L, 178L, 185L, 195L, 201L, 213L, 219L, 227L, 234L, 240L,
240L, 240L, 249L, 252L, 255L, 255L, 261L, 269L, 277L, 283L, 287L,
293L, 300L, 302L, 306L, 311L, 312L, 316L, 322L, 329L, 333L, 332L,
1L, 4L, 8L, 12L, 18L, 23L, 29L, 36L, 38L, 47L, 55L, 61L, 70L,
84L, 90L, 97L, 101L, 104L, 105L, 109L, 112L, 135L, 145L, 155L,
163L, 176L, 190L, 200L, 212L, 217L, 225L, 233L, 239L, 244L, 250L,
257L, 262L, 267L, 274L, 279L, 286L, 290L, 295L, 301L, 307L, 314L,
319L, 326L, 327L, 336L, 3L, 7L, 16L, 20L, 25L, 31L, 35L, 41L,
45L, 52L, 56L, 63L, 67L, 73L, 80L, 85L, 87L, 89L, 94L, 93L, 96L,
95L, 91L, 227L, 213L, 209L, 195L, 185L, 178L, 178L, 172L, 172L,
164L, 164L, 164L, 164L, 164L, 164L, 156L, 148L, 136L, 136L, 136L,
136L, 115L, 113L, 113L, 113L, 113L, 107L, 115L, 115L, 115L, 115L,
148L, 136L, 115L, 113L, 115L, 113L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 113L, 113L, 148L, 148L, 148L,
148L, 115L, 115L, 113L, 115L, 136L, 115L, 113L, 115L, 115L, 136L,
148L, 148L, 136L, 115L, 115L, 156L, 156L, 172L, 172L, 156L, 172L,
185L, 201L, 201L, 201L, 201L, 209L, 219L, 236L, 240L, 258L, 273L,
287L, 296L, 312L, 333L, 13L, 32L, 46L, 60L, 76L, 99L, 108L, 146L,
171L, 206L, 230L, 251L, 268L, 289L, 308L), .Label = c("100",
"1012.93841736696", "1015", "102", "102.906562962176", "1056.50381985743",
"1064", "107", "107.681565975074", "107.772405683259", "1077.18555087758",
"109", "110", "1101.34248052638", "1106.71937713192", "1113",
"112.871167258684", "113", "1147.37104021294", "1159", "118.213953337458",
"118.907362834956", "119", "1194.47454549496", "1202", "1222.35700531209",
"123.812386226114", "1234.46815782699", "124", "1242.49940630328",
"1246", "125", "129.678627213148", "1291.24494843906", "1299",
"130", "131.883068737542", "133", "1340.45334270173", "135.825399632334",
"1352", "1353.01585096407", "1376.1737200094", "1389.79769980332",
"1399", "141", "142", "142.266012655376", "1438.86816034509",
"1458.3616245406", "146.923717396291", "1466", "1487.15590421503",
"149.014385797496", "150", "1506", "1534.03518691436", "1534.8487312945",
"1547.69218094539", "155", "156", "156.085074111268", "1569",
"1578.74382847183", "1619.48179996562", "1620.36309674333", "1628",
"163.493294034224", "164.404667496757", "165", "1657.79871894128",
"1673.39048203113", "1688", "1689.76590687066", "1693.62394525503",
"170", "171.254949844073", "1710.03514245751", "1714.7828566833",
"1720", "1730.71178397753", "1731.17917390182", "1737.12790025705",
"176", "1780", "179.386660661569", "1825", "184.772238294142",
"1846", "185", "1859", "187.905787924644", "1882", "1885", "1889",
"1905", "193", "196.830463238049", "199", "206.179616479748",
"208", "208.555306039675", "215.973004018167", "217", "221",
"226.231236862199", "23", "231", "232", "236.377317707137", "236.975808527841",
"239", "24", "248.229122360228", "25", "25.006996053829", "25.014924626149",
"25.0224494183214", "25.045214840124", "25.0624177868317", "25.0969480307356",
"25.1281596085623", "25.1692973020694", "25.2210730103966", "25.261518785926",
"25.3427120320241", "25.3729438809371", "25.4948050967564", "25.502972351872",
"25.6510661849217", "25.6792760646671", "25.8167441085175", "25.8982682744573",
"25.9995767010671", "253", "26", "26.1541720515152", "26.1991820171385",
"26.4152216723683", "26.4496562442932", "26.6473973349067", "26.7877044521904",
"26.8954475777233", "260.014517996404", "265", "266", "268.967694505081",
"27", "27.1591450517314", "27.1716567290231", "27.4382939445755",
"27.6052576910814", "27.7327276941622", "272.356296590851", "278",
"28", "28.0423069297069", "28.0927121328395", "28.3669176162769",
"28.6387494628908", "28.706469381617", "285.279744350548", "288",
"29", "29.060894006489", "29.2486985253466", "29.4301440619011",
"29.814191678482", "29.9285746772517", "298.811153836992", "299",
"30", "30.2130274348995", "30.6266593536767", "30.6851813622098",
"305", "307.170873254196", "31", "31.0551119940305", "31.38",
"31.4984256324846", "31.5262288687559", "31.956655522999", "312.977842386294",
"32", "32.4298712292389", "32.4604735067685", "32.82561384",
"32.9181560223779", "326", "327.808166871801", "33.4216063385245",
"33.4978810982094", "33.9403312904656", "34", "34.3400006020094",
"34.4744522289701", "34.6498194865811", "343.331533882608", "346",
"35", "35.0241023493733", "35.589426339596", "35.926471747402",
"35.929285755777", "351", "351.950203806801", "359.578404210861",
"36", "36.170580066137", "36.767730294917", "364", "37", "37.3511750540819",
"37.3810544441627", "37.5884983951345", "373", "376.580290324766",
"38", "38.0107403667906", "38.65698616", "38.932599392745", "39.32",
"39.3297189377729", "393", "394.369745245424", "40", "40.6932665915451",
"404.382545261556", "408", "41.1539470098798", "412.980340934988",
"414", "42", "42.6559317498839", "43", "43.0651798238046", "432.446633930023",
"434", "44", "44.8469363200958", "45.0676068884105", "452.804115504112",
"456", "465.637719971972", "47.1656191267561", "47.2968531559251",
"474.089143100815", "48", "482", "486", "49", "49.3638184092258",
"496.338849122065", "50", "50.0412599281857", "503", "51", "51.6670275190631",
"519.591022362781", "52", "529", "53.1216681850227", "536.935197912393",
"54.0803005677077", "543.883956419344", "555", "556", "56", "56.5866412461102",
"56.6089338777482", "569.256258229975", "58", "580", "59.2584773516901",
"595.746608482546", "60", "60.4931412285751", "606", "619.469005246511",
"62.0347463450789", "623.393463893253", "63", "64.9081539728981",
"64.9438340628061", "640", "65", "652.234689248171", "662", "664",
"67.9921244976501", "682.307104523925", "69", "69.9106506029712",
"700", "71", "71.1863059302521", "713.645929259951", "714.291855860478",
"72", "735", "74", "74.533385009776", "746.284102517739", "75.5939559687986",
"76", "770", "774", "78.0407014344389", "780.251452081395", "79",
"81", "81.715943250903", "811", "815.573680850085", "82", "82.0686071733396",
"822.15243629253", "849", "85.5671627911559", "852.271131437189",
"88", "89.4657993696359", "89.6027932649582", "890.35728158426",
"891", "927", "929.836912856086", "93", "93.8316660251566", "943.287593733742",
"95", "96", "97.9415301108653", "970.703882929996", "979", "98.2630285221179"
), class = "factor")), class = "data.frame", row.names = c(NA,
-420L))

Related

order geom_point by specific facet

I have a ggplot related question, which should be easy but I could not find the answer yet. I am trying to plot a faceted plot with the code below and this dataset (11 kB).
ggplot(plot.dat, aes(x = estimate, y = reorder(countryyear, estimate))) +
geom_point() +
geom_segment(aes(x=conf.low, xend=conf.high, yend=countryyear)) +
facet_grid(. ~ facet) +
xlab("Random Effect Estimate") +
ylab("") + scale_x_continuous(breaks=c(seq(0, 5, 1)), limits=c(0, 5)) +
ggtitle("Random Slopes in Country*Year Groups from Northwestern Europe") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
I would like countryyear to be organized by the values of estimate in the Extreme Right facet. Not quite sure how to order by values of a specific facet. Any ideas are welcome! Thanks.
Update: Here is the dput structure of a random subset of the dataset. It has some missing values, but it should work for the sake of the example. I also updated the download link above, that has the full version.
structure(list(estimate = c(1.41056902925372, 0.854859208455895,
1.16012834593894, 0.871339033194504, 0.803272289946221, 1.17540386134493,
0.996313357490551, 1.49940694539732, 1.33773365908762, 2.7318703090905,
1.19131935418045, 1.12765907711738, 0.746741192261761, 0.985847015192172,
0.912357310925342, 1.11582763712164, 1.21854572824977, 0.675712547978394,
0.566955524699616, 1.32611743759365, 0.519648352294682, 0.591013596394243,
1.30944973684044, 0.613722269599125, 1.13293279727271, 0.950788678552604,
1.1599446923567, 1.11493952112913, 0.95336321045095, 1.39002327097034,
0.794207546872633, 0.788545101449259, 1.01096883872495, 0.897407203907834,
1.38391605229103, 1.35754760293107, 1.0718508539761, 0.542191158958878,
0.757132752456427, 1.44172863221312, 1.04842251986171, 0.77260404885379,
0.879288027642055, 1.09372353598088, 0.745484830381145, 1.21211217249353,
0.628009608902132, 1.34864488674734), countryyear = structure(c(1L,
2L, 4L, 5L, 7L, 9L, 10L, 12L, 13L, 26L, 28L, 29L, 31L, 32L, 34L,
36L, 37L, 39L, 40L, 57L, 59L, 60L, 62L, 63L, 65L, 67L, 68L, 70L,
71L, 73L, 75L, 76L, 89L, 90L, 92L, 94L, 95L, 103L, 104L, 106L,
108L, 109L, 111L, 128L, 130L, 132L, 133L, 135L), .Label = c("AT02",
"AT04", "AT06", "AT14", "AT16", "BE02", "BE04", "BE06", "BE08",
"BE10", "BE12", "BE14", "BE16", "BG06", "BG08", "BG10", "BG12",
"CH14", "CZ02", "CZ04", "CZ08", "CZ10", "CZ12", "CZ14", "CZ16",
"DE02", "DE04", "DE06", "DE08", "DE10", "DE12", "DE14", "DE16",
"DK02", "DK04", "DK06", "DK08", "DK10", "DK12", "DK14", "EE04",
"EE06", "EE08", "EE10", "EE12", "EE14", "EE16", "ES02", "ES04",
"ES06", "ES08", "ES10", "ES12", "ES14", "ES16", "FI02", "FI04",
"FI06", "FI08", "FI10", "FI12", "FI14", "FI16", "FR06", "FR08",
"FR10", "FR12", "FR14", "FR16", "GB02", "GB04", "GB06", "GB08",
"GB10", "GB12", "GB14", "GB16", "GR02", "GR04", "GR08", "GR10",
"HU02", "HU06", "HU08", "HU10", "HU12", "HU14", "HU16", "IE02",
"IE04", "IE06", "IE08", "IE10", "IE12", "IE14", "IE16", "IT04",
"IT12", "IT16", "LT10", "LT12", "LT14", "NL02", "NL04", "NL06",
"NL08", "NL10", "NL12", "NL14", "NL16", "NO14", "PL02", "PL04",
"PL06", "PL08", "PL10", "PL12", "PL14", "PL16", "PT02", "PT04",
"PT06", "PT08", "PT10", "PT12", "PT14", "PT16", "SE02", "SE04",
"SE06", "SE08", "SE10", "SE12", "SE14", "SE16", "SI02", "SI04",
"SI06", "SI08", "SI10", "SI12", "SI14", "SI16", "SK04", "SK06",
"SK08", "SK10", "SK12"), class = "factor"), facet = structure(c(1L,
3L, 1L, 4L, 5L, 3L, 4L, 1L, 1L, 1L, 5L, 5L, 4L, 5L, 3L, 1L, 2L,
4L, 5L, 2L, 1L, 4L, 2L, 5L, 2L, 3L, 4L, 3L, 2L, 5L, 5L, 4L, 2L,
5L, 4L, 5L, 3L, 1L, 4L, 5L, 3L, 5L, 4L, 1L, 5L, 2L, 4L, 1L), .Label = c("Intercept",
"Extreme Left", "Center", "Right", "Extreme Right"), class = "factor"),
conf.low = c(1.16824810706745, 0.686215051613965, 0.910277310292764,
0.591705078386698, 0.37357342399703, 0.947951001435781, 0.663296044193037,
1.18794112232166, 1.06645119085865, 2.33578182814618, 0.580210898576738,
0.564235690522211, 0.530859530342114, 0.516191258265551,
0.730992343373883, 0.862424540370486, 0.827891784352444,
0.427638276259852, 0.275692447335368, 0.829763907986328,
0.370078643492081, 0.321852705445509, 0.83550621863293, 0.289836810427436,
0.847226120408727, 0.780056160572728, 0.873143885861924,
0.869757467125519, 0.615741777890997, 0.649483531741787,
0.349657606457465, 0.523294407847395, 0.670109418373736,
0.36656743494149, 0.952201390937053, 0.777207016700884, 0.888128473009524,
0.397085597526946, 0.479828726362257, 0.614533313431094,
0.813336887981082, 0.3129232351085, 0.61435321820328, 0.854801028643867,
0.346698059397102, 0.805414039007076, 0.434676644041643,
1.07780736338027), conf.high = c(1.70315275860739, 1.06494933995261,
1.47855797769819, 1.28312522319126, 1.7272277157504, 1.45743211956315,
1.49652679976667, 1.8925358720741, 1.67802460909168, 3.19512520208851,
2.44607918797515, 2.25369471581694, 1.05041423643869, 1.8828182806291,
1.13872035780431, 1.44368725318228, 1.79353596677755, 1.06769546329854,
1.16593171156554, 2.11938292490653, 0.729667639003753, 1.08526995489865,
2.05223919950836, 1.29954170985538, 1.51498719434776, 1.15888977865399,
1.54095070825389, 1.4292376699955, 1.47610807594453, 2.97492484321718,
1.80395225460704, 1.18824770090216, 1.52521060717706, 2.19697554354282,
2.01136404338166, 2.37122858469145, 1.29357889999432, 0.740322123703373,
1.19469713534712, 3.38237391450413, 1.35145693795059, 1.90755095606211,
1.25847381058047, 1.39942645489832, 1.60297301142912, 1.82417470710871,
0.907332092210651, 1.68753999308876)), row.names = c(1L,
9L, 17L, 25L, 33L, 41L, 49L, 57L, 65L, 128L, 136L, 144L, 152L,
160L, 168L, 176L, 184L, 192L, 200L, 283L, 291L, 299L, 307L, 315L,
323L, 331L, 339L, 347L, 355L, 363L, 371L, 379L, 442L, 450L, 458L,
466L, 474L, 512L, 520L, 528L, 536L, 544L, 552L, 640L, 648L, 656L,
664L, 672L), class = "data.frame")

Calculate percentage given condition

I am new to this website and to coding as well. I was wondering if any of you could help me out
I need to calculate the Top 5 Movies, by rating distribution, calculating the percentage of ratings for each movie that are 4 stars or higher.
So far I was only able to calculate the number of occurrences using dplyr.
Is it possible to calculate it using dplyr (something similar to my coding)?
I'm not sure whether I need to mutate to come up with the solution or if there's another way to do so.
My code so far:
dfAux1 <- na.omit(dfAux)
dfAux1 %>%
group_by(movie) %>%
summarise(tot = n()) %>%
arrange(desc(tot))%>%
head(5)
the result should be something like this:
**Expected result**:
0.7000000, 'The Shawshank Redemption'
0.5333333, 'Star Wars IV - A New Hope'
0.5000000, 'Gladiator'
0.4444444, 'Blade Runner'
0.4375000, 'The Silence of the Lambs'
and so far this is my result:
# A tibble: 5 x 2
movie tot
<fctr> <int>
1 Toy Story 17
2 The Silence of the Lambs 16
3 Star Wars IV - A New Hope 15
4 Star Wars VI - Return of the Jedi 14
5 Independence Day 13
edit:
str(dfAux1)
'data.frame': 241 obs. of 2 variables:
$ Rating: int 1 5 4 2 4 5 4 2 3 2 ...
$ movie : Factor w/ 20 levels "Star Wars IV - A New Hope",..: 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "na.action")=Class 'omit' Named int [1:159] 3 4 7 16 17 23 27 28 34 36 ...
.. ..- attr(*, "names")= chr [1:159] "3" "4" "7" "16" ...
dput(dfAux1)
structure(list(Rating = c(1L, 5L, 4L, 2L, 4L, 5L, 4L, 2L, 3L,
2L, 3L, 4L, 4L, 5L, 1L, 5L, 3L, 3L, 3L, 4L, 1L, 2L, 1L, 5L, 3L,
4L, 5L, 1L, 2L, 2L, 4L, 4L, 3L, 5L, 2L, 3L, 1L, 1L, 2L, 2L, 5L,
1L, 4L, 1L, 4L, 5L, 5L, 5L, 4L, 4L, 4L, 2L, 4L, 1L, 3L, 2L, 3L,
2L, 4L, 2L, 5L, 3L, 4L, 1L, 5L, 4L, 2L, 1L, 1L, 4L, 2L, 4L, 5L,
5L, 2L, 1L, 4L, 2L, 1L, 4L, 2L, 3L, 2L, 4L, 4L, 5L, 2L, 4L, 3L,
2L, 2L, 4L, 2L, 2L, 2L, 3L, 4L, 1L, 5L, 4L, 3L, 5L, 2L, 1L, 3L,
4L, 4L, 2L, 3L, 4L, 1L, 3L, 2L, 5L, 3L, 2L, 3L, 4L, 1L, 1L, 4L,
1L, 4L, 5L, 1L, 3L, 2L, 2L, 3L, 5L, 5L, 1L, 2L, 3L, 5L, 2L, 3L,
1L, 2L, 1L, 4L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 1L, 5L, 2L, 4L,
1L, 4L, 3L, 1L, 2L, 2L, 3L, 4L, 2L, 3L, 2L, 4L, 3L, 4L, 3L, 2L,
2L, 4L, 5L, 2L, 1L, 5L, 1L, 4L, 5L, 2L, 3L, 3L, 2L, 5L, 5L, 4L,
1L, 3L, 1L, 2L, 1L, 5L, 5L, 2L, 4L, 2L, 4L, 2L, 5L, 2L, 5L, 5L,
1L, 5L, 1L, 3L, 2L, 2L, 3L, 5L, 1L, 3L, 1L, 5L, 3L, 3L, 1L, 2L,
4L, 1L, 5L, 3L, 1L, 1L, 5L, 5L, 1L, 5L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 2L, 5L, 4L, 2L, 1L, 4L, 5L), movie = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), .Label = c("Star Wars IV - A New Hope",
"Star Wars VI - Return of the Jedi", "Forrest Gump", "The Shawshank Redemption",
"The Silence of the Lambs", "Gladiator", "Toy Story", "Saving Private Ryan",
"Pulp Fiction", "Stand by Me", "Shakespeare in Love", "Total Recall",
"Independence Day", "Blade Runner", "Groundhog Day", "The Matrix",
"Schindler's List", "The Sixth Sense", "Raiders of the Lost Ark",
"Babe"), class = "factor")), .Names = c("Rating", "movie"), row.names = c(1L,
2L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L,
21L, 22L, 24L, 25L, 26L, 29L, 30L, 31L, 32L, 33L, 35L, 38L, 39L,
40L, 41L, 45L, 46L, 47L, 51L, 52L, 54L, 56L, 58L, 60L, 62L, 63L,
65L, 66L, 67L, 69L, 70L, 73L, 78L, 80L, 81L, 82L, 83L, 85L, 87L,
88L, 89L, 90L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 100L, 101L,
102L, 104L, 105L, 107L, 108L, 109L, 111L, 115L, 116L, 118L, 119L,
121L, 122L, 123L, 124L, 126L, 128L, 129L, 130L, 131L, 132L, 133L,
134L, 135L, 137L, 138L, 139L, 140L, 141L, 144L, 145L, 146L, 147L,
149L, 150L, 153L, 156L, 159L, 160L, 164L, 166L, 167L, 168L, 170L,
172L, 175L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 185L, 186L,
189L, 194L, 195L, 196L, 199L, 200L, 201L, 202L, 205L, 206L, 207L,
209L, 212L, 216L, 217L, 219L, 220L, 222L, 223L, 224L, 225L, 226L,
228L, 229L, 231L, 233L, 234L, 235L, 239L, 241L, 242L, 243L, 244L,
246L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 261L, 263L,
264L, 265L, 267L, 268L, 274L, 278L, 280L, 282L, 283L, 284L, 286L,
288L, 289L, 292L, 293L, 294L, 295L, 296L, 300L, 301L, 303L, 305L,
307L, 310L, 311L, 312L, 314L, 316L, 317L, 319L, 320L, 321L, 322L,
323L, 324L, 325L, 328L, 330L, 334L, 335L, 336L, 338L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 348L, 350L, 351L, 356L, 358L, 360L,
362L, 363L, 364L, 367L, 368L, 371L, 373L, 375L, 376L, 378L, 380L,
383L, 384L, 386L, 387L, 389L, 391L, 392L, 395L, 396L, 398L), class = "data.frame", na.action = structure(c(3L,
4L, 7L, 16L, 17L, 23L, 27L, 28L, 34L, 36L, 37L, 42L, 43L, 44L,
48L, 49L, 50L, 53L, 55L, 57L, 59L, 61L, 64L, 68L, 71L, 72L, 74L,
75L, 76L, 77L, 79L, 84L, 86L, 91L, 99L, 103L, 106L, 110L, 112L,
113L, 114L, 117L, 120L, 125L, 127L, 136L, 142L, 143L, 148L, 151L,
152L, 154L, 155L, 157L, 158L, 161L, 162L, 163L, 165L, 169L, 171L,
173L, 174L, 176L, 184L, 187L, 188L, 190L, 191L, 192L, 193L, 197L,
198L, 203L, 204L, 208L, 210L, 211L, 213L, 214L, 215L, 218L, 221L,
227L, 230L, 232L, 236L, 237L, 238L, 240L, 245L, 247L, 256L, 257L,
258L, 259L, 260L, 262L, 266L, 269L, 270L, 271L, 272L, 273L, 275L,
276L, 277L, 279L, 281L, 285L, 287L, 290L, 291L, 297L, 298L, 299L,
302L, 304L, 306L, 308L, 309L, 313L, 315L, 318L, 326L, 327L, 329L,
331L, 332L, 333L, 337L, 339L, 347L, 349L, 352L, 353L, 354L, 355L,
357L, 359L, 361L, 365L, 366L, 369L, 370L, 372L, 374L, 377L, 379L,
381L, 382L, 385L, 388L, 390L, 393L, 394L, 397L, 399L, 400L), .Names = c("3",
"4", "7", "16", "17", "23", "27", "28", "34", "36", "37", "42",
"43", "44", "48", "49", "50", "53", "55", "57", "59", "61", "64",
"68", "71", "72", "74", "75", "76", "77", "79", "84", "86", "91",
"99", "103", "106", "110", "112", "113", "114", "117", "120",
"125", "127", "136", "142", "143", "148", "151", "152", "154",
"155", "157", "158", "161", "162", "163", "165", "169", "171",
"173", "174", "176", "184", "187", "188", "190", "191", "192",
"193", "197", "198", "203", "204", "208", "210", "211", "213",
"214", "215", "218", "221", "227", "230", "232", "236", "237",
"238", "240", "245", "247", "256", "257", "258", "259", "260",
"262", "266", "269", "270", "271", "272", "273", "275", "276",
"277", "279", "281", "285", "287", "290", "291", "297", "298",
"299", "302", "304", "306", "308", "309", "313", "315", "318",
"326", "327", "329", "331", "332", "333", "337", "339", "347",
"349", "352", "353", "354", "355", "357", "359", "361", "365",
"366", "369", "370", "372", "374", "377", "379", "381", "382",
"385", "388", "390", "393", "394", "397", "399", "400"), class = "omit"))
I am using data.table instead of dplyr
library(data.table)
setDT(dfAux1) # make dfAux1 as data table by reference
# calculate total number by movies, then compute percent for `Rating >= 4` by movies and then sort `tot` by descending order and also eliminating duplicates in movies using `.SD[1]` which gives the first row in each movie.
dfAux1[, .(Rating, tot = .N), by = movie ][Rating >= 4, .(percent = .N/tot, tot), by = movie ][order(-tot), .SD[1], by = movie]
# movie percent tot
# 1: Toy Story 0.35294118 17
# 2: The Silence of the Lambs 0.43750000 16
# 3: Star Wars IV - A New Hope 0.53333333 15
# 4: Star Wars VI - Return of the Jedi 0.35714286 14
# 5: Independence Day 0.30769231 13
# 6: Gladiator 0.50000000 12
# 7: Total Recall 0.08333333 12
# 8: Groundhog Day 0.41666667 12
# 9: The Matrix 0.41666667 12
# 10: Schindler's List 0.33333333 12
# 11: The Sixth Sense 0.33333333 12
# 12: Saving Private Ryan 0.36363636 11
# 13: Pulp Fiction 0.36363636 11
# 14: Stand by Me 0.36363636 11
# 15: Shakespeare in Love 0.27272727 11
# 16: Raiders of the Lost Ark 0.27272727 11
# 17: Forrest Gump 0.30000000 10
# 18: The Shawshank Redemption 0.70000000 10
# 19: Babe 0.40000000 10
# 20: Blade Runner 0.44444444 9
A single line solution using data.table and data from OP could be as:
library(data.table)
setDT(dfAux1)[, .(pct = sum(Rating>=4)/.N), by=movie][order(-pct)][1:5]
movie pct
1: The Shawshank Redemption 0.7000000
2: Star Wars IV - A New Hope 0.5333333
3: Gladiator 0.5000000
4: Blade Runner 0.4444444
5: The Silence of the Lambs 0.4375000
Overview
I used the dplyr package to group your data by the movie column and perform calculations based on the rating column.
In summarise(), I created three new columns:
Total_Review: counts the total number of reviews per movie.
FourPlus_Rating: counts the subset of reviews with a Rating value of 4 or higher.
Per_FourPlus_Rating: divides FourPlus_Rating by Total_Review.
I then arranged the date in descending order based on Per_FourPlus_Rating. Finally, I called head() to specify that I only want the tibble to return the first 5 rows.
Reproducible Example
# install necessary package
install.packages( pkgs = "dplyr" )
# load necessary package
library( dplyr )
# view first six rows
head( x = df )
# Rating movie
# 1 1 Star Wars IV - A New Hope
# 2 5 Star Wars IV - A New Hope
# 5 4 Star Wars IV - A New Hope
# 6 2 Star Wars IV - A New Hope
# 8 4 Star Wars IV - A New Hope
# 9 5 Star Wars IV - A New Hope
# perform calculations using
# dplyr functions
df %>%
group_by( movie ) %>%
summarise( Total_Review = n()
, FourPlus_Rating = length( Rating[ which( Rating >= 4 ) ] )
, Per_FourPlus_Rating = length( Rating[ which( Rating >= 4 ) ] ) / n() ) %>%
arrange( desc( Per_FourPlus_Rating ) ) %>%
head( n = 5 )
# A tibble: 5 x 4
# movie Total_Review FourPlus_Rating Per_FourPlus_Rati…
# <fct> <int> <int> <dbl>
# 1 The Shawshank Rede… 10 7 0.700
# 2 Star Wars IV - A N… 15 8 0.533
# 3 Gladiator 12 6 0.500
# 4 Blade Runner 9 4 0.444
# 5 The Silence of the… 16 7 0.438
# end of script #
this is a dplyr solution:
dfAuxhigh=filter(dfAux1,Rating>=4)%>%group_by(movie)%>%summarize(percentHigh=n())
dfAux=dfAux1%>%group_by(movie)%>%summarize(percentAll=n())
result<-merge(dfAuxhigh,dfAux,by="movie")%>%mutate(percentage=percentHigh/percentAll)
result<-result[order(result$percentage,decreasing = T)[1:5],c(1,4)]
library(tidyverse)
df %>%
group_by(movie, Rating) %>%
summarise(n = n()) %>% #< get freq of movies
mutate(freq = n/sum(n)) %>% #< find perc for each rating, by movie
filter(Rating >=4) %>% #< filter for desired rating (4 or above)
summarise(freq = sum(freq)) %>% #< summarize again
top_n(5) %>%
arrange(desc(freq)) %>%
mutate(freq = paste0(round(freq*100, 2), "%"))
#> movie freq
#> 1 The Shawshank Redemption 70%
#> 2 Star Wars IV - A New Hope 53.33%
#> 3 Gladiator 50%
#> 4 Blade Runner 44.44%
#> 5 The Silence of the Lambs 43.75%

Bubble Plot of Negative and Positive values in space ggplot2 R

I would like to make a bubble plot using ggplot2 in R. My code and data are found below.
Please leave the colors as they are. I am having difficulties in scaling positive and negative values equally. For example, -3 is scaled smaller than +3. I would like negatives and positives to be scaled proportionately irrespective of sign.
Identify negative from positive values using some kind of outline linetype for bubbles and include it in the legend.
Also remove the "Mean" part of the legend.
Thanks very much for your great help.
#=====================================================================
library(ggplot2)
if (dev.cur() == 1) x11(width=8,height=6)
par(mfcol=c(1,1))
p<-ggplot(site.resiudal, aes(x=Eastings, y=Northings, size=Mean,label=site.resiudal$Site,legend = FALSE))+
#theme(legend.position="none")+
geom_point(shape=21)+
geom_point(aes(colour = factor(Region)))+
scale_area(range=c(1,15))+
scale_alpha(guide = 'none')+
scale_x_continuous(name="Longitude", limits=c(-120,-95))+
scale_y_continuous(name="Latitude", limits=c(48,61))+
geom_text(size=4)+
scale_colour_manual(name="Region",labels = c("A", "B","C","D", "E"),values = c("1" = "firebrick3","2" = "palegreen4","3" = "sandybrown","4" = "red","5" = "gray0"))+
theme(legend.title = element_text(colour="black", size=16, face="plain"))+
theme(legend.text = element_text(colour="black", size = 16, face = "plain"))
p
#Data[["sign"]] = ifelse(Data[["Mean"]] >= 0, "positive", "negative")
#=================================================
structure(list(Site = structure(c(101L, 102L, 105L, 107L, 108L,
110L, 111L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 16L, 20L, 47L, 52L, 53L, 55L, 91L, 92L, 93L, 94L, 95L,
96L, 99L, 15L, 17L, 18L, 19L, 21L, 114L, 23L, 26L, 36L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L,
86L, 87L, 88L, 89L, 98L, 100L, 103L, 104L, 106L, 109L, 112L,
113L, 115L, 116L, 117L, 119L, 42L, 44L, 46L, 48L, 49L, 50L, 51L,
54L, 56L, 57L, 58L, 90L, 97L, 118L, 120L, 22L, 24L, 25L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 37L, 38L, 39L, 40L, 41L,
43L, 45L), .Label = c("G100", "G101", "G102", "G103", "G104",
"G105", "G106", "G107", "G108", "G109", "G110", "G111", "G112",
"G113", "G114", "G115", "G116", "G117", "G118", "G119", "G120",
"GG10", "GG11", "GG12", "GG13", "GG14", "GG15", "GG16", "GG17",
"GG18", "GG19", "GG20", "GG21", "GG22", "GG23", "GG24", "GG25",
"GG26", "GG27", "GG28", "GG29", "GG30", "GG31", "GG32", "GG33",
"GG34", "GG35", "GG36", "GG37", "GG38", "GG39", "GG40", "GG41",
"GG42", "GG43", "GG44", "GG45", "GG46", "GG47", "GG48", "GG49",
"GG50", "GG51", "GG52", "GG53", "GG54", "GG55", "GG56", "GG57",
"GG58", "GG59", "GG60", "GG61", "GG62", "GG63", "GG64", "GG65",
"GG66", "GG67", "GG68", "GG69", "GG70", "GG71", "GG72", "GG73",
"GG74", "GG75", "GG76", "GG77", "GG78", "GG79", "GG80", "GG81",
"GG82", "GG83", "GG84", "GG85", "GG86", "GG87", "GG88", "GG89",
"GG90", "GG91", "GG92", "GG93", "GG94", "GG95", "GG96", "GG97",
"GG98", "GG99", "GGG1", "GGG2", "GGG3", "GGG4", "GGG5", "GGG6",
"GGG7", "GGG8", "GGG9"), class = "factor"), Name = structure(c(53L,
87L, 29L, 92L, 36L, 76L, 102L, 103L, 119L, 2L, 9L, 11L, 45L,
47L, 49L, 54L, 90L, 30L, 105L, 66L, 78L, 107L, 81L, 42L, 41L,
43L, 59L, 110L, 24L, 27L, 56L, 61L, 64L, 118L, 40L, 21L, 44L,
70L, 108L, 25L, 58L, 98L, 83L, 5L, 19L, 26L, 31L, 38L, 55L, 60L,
71L, 74L, 75L, 85L, 95L, 120L, 109L, 1L, 67L, 20L, 50L, 63L,
106L, 111L, 116L, 62L, 6L, 99L, 114L, 73L, 84L, 89L, 93L, 97L,
115L, 80L, 10L, 12L, 88L, 79L, 15L, 17L, 33L, 35L, 94L, 100L,
3L, 16L, 37L, 101L, 117L, 8L, 39L, 48L, 86L, 113L, 23L, 13L,
69L, 96L, 104L, 32L, 65L, 82L, 14L, 22L, 18L, 46L, 68L, 72L,
77L, 91L, 112L, 4L, 7L, 28L, 51L, 57L, 52L, 34L), .Label = c("ANEROID",
"ARBORG", "ATHABASCA", "BANFF", "BANGOR", "BATTLEFORD", "BEAVER MINES",
"BEAVERLODGE", "BERENS RIVER", "BIRTLE", "BISSETT", "BRANDON",
"BUFFALO NARROWS", "CALGARY", "CALMAR", "CAMPSIE", "CAMROSE",
"CARWAY", "CEYLON", "CHAPLIN", "CHURCHILL", "CLARESHOLM", "COLD LAKE",
"COLLINS BAY", "CORONATION", "COTE", "CREE LAKE", "CROWSNEST",
"CYPRESS RIVER", "DAUPHIN", "DAVIDSON", "DRUMHELLER", "EDMONTON",
"EDSON", "ELK POINT", "EMERSON AUT", "ENILDA-BERG", "ESTEVAN",
"FAIRVIEW", "FLIN FLON", "FORT CHIPEWYAN", "FORT MCMURRAY", "FORT VERMILION",
"GILLAM", "GIMLI", "GLEICHEN", "GRAND RAPIDS", "GRANDE PRAIRIE",
"GREAT FALLS", "HIGH POINT", "HIGHWOOD", "HINTON VALLEY", "HUDSON BAY",
"INDIAN BAY", "INDIAN HEAD", "ISLAND FALLS", "JASPER WARDEN",
"JENNER", "KEG RIVER RS", "KELLIHER", "KEY LAKE", "KINDERSLEY",
"KLINTONEL", "LA RONGE", "LACOMBE 2", "LANGRUTH WEST", "LEADER",
"LETHBRIDGE", "LOON LAKE", "LYNN LAKE", "MANOR", "MEDICINE HAT",
"MELFORT", "MOOSE JAW", "MOOSOMIN", "MORDEN", "MOUNTAIN VIEW",
"NEEPAWA MURRAY", "NINETTE", "NIPAWIN", "NORWAY HOUSE", "OLDS",
"ONEFOUR", "OUTLOOK", "PASWEGIN", "PEACE RIVER", "PELLY", "PIERSON",
"PILGER", "PINAWA WNRE", "PINCHER CREEK ", "PORTAGE PRAIRIE",
"PRINCE ALBERT", "RANFURLY", "REGINA", "ROCKY MT HOUSE ", "SASKATOON",
"SCOTFIELD", "SCOTT", "SION", "SLAVE LAKE", "SPRAGUE", "STEINBACH",
"STETTLER NORTH", "SWAN RIVER", "SWIFT CURRENT", "THE PAS", "THOMPSON",
"TONKIN", "URANIUM CITY ", "VAL-MARIE", "VAUXHALL", "WABASCA RS",
"WASECA", "WASKESIU LAKE", "WEST POPLAR", "WHITECOURT", "WHITESAND DAM",
"WINNIPEG", "YELLOW GRASS"), class = "factor"), Mean = c(-0.020525899,
0.333863493, 0.210353772, NA, NA, 0.093520458, 0.341295298, NA,
-0.175074657, 0.09834825, 0.075610648, NA, -0.117503802, 0.18309367,
0.25246942, 0.221329766, 0.072167004, -0.094766032, NA, NA, 0.19783711,
-0.166351357, -0.0996169, -0.038555432, -0.028092042, 0.297855371,
0.108263891, 0.002057761, 0.327731415, NA, 0.180100638, 0.193837736,
-0.003306948, 0.178881894, 0.3655509, -0.235975798, -0.176154056,
-0.080433735, -0.110955273, -0.228010105, 0.048103255, -0.116681527,
-0.073042421, NA, NA, 0.035356012, 0.297171565, -0.197834719,
0.036412958, 0.055218077, NA, -0.236229087, 0.265211081, 0.271625885,
-0.293179359, 0.113744571, -0.207770026, 0.100471248, -0.071569464,
NA, NA, NA, -0.052716493, 0.057385851, 0.090340517, -0.30456625,
-0.234420722, 0.082287977, 0.009973663, NA, -0.06405062, 0.074703356,
-0.208329196, -0.272401078, 0.217991554, -0.043619919, -0.208901155,
-0.020022401, 0.111495318, NA, 0.38239749, 0.199136959, -0.177740258,
NA, 0.147515615, 0.309306538, 0.298741467, 0.068170296, NA, -0.02102765,
0.001754313, -0.010196512, 0.108254156, -0.228183063, -0.196261239,
NA, -0.167054722, 0.039949534, 0.154337034, -0.020855461, 0.136010278,
NA, 0.096997744, NA, -0.241963754, 0.660176529, 0.423554314,
0.190305726, -0.210778787, -0.261148915, NA, 0.054264129, -0.098706619,
-0.138776994, NA, NA, NA, -0.113823745, 0.373292721, -0.047060083
), Eastings = c(-102.5800018, -101.8700027, -99.08000183, -98.26999664,
-97.23000336, -98.08000183, -95.59999847, -96.76999664, -97.23000336,
-97.08000183, -97.02999878, -95.69999695, -97.01999664, -99.27999878,
-96, -95.19999695, -96.06999969, -100.0500031, -101.2300034,
-98.80000305, -99.56999969, -101.0999985, -97.84999847, -111.2200012,
-111.1200027, -116.0299988, -117.6200027, -108.4800034, -103.6999969,
-107.1299973, -102.3499985, -105.6200027, -105.2699966, -103.1500015,
-101.8799973, -94.06999969, -94.72000122, -101.0800018, -97.87000275,
-111.4499969, -111.1999969, -111.3499985, -110.4700012, -102.2799988,
-104.6500015, -101.7799988, -105.9800034, -102.9700012, -103.6500015,
-103.75, -102.0999985, -105.5500031, -101.6699982, -103.9199982,
-104.6699982, -104.1800003, -102.2300034, -107.3000031, -109.5,
-106.6500015, -107.9300003, -108.9199982, -107.7300034, -107.8499985,
-106.3799973, -109.1800003, -108.25, -108.8300018, -109.4000015,
-104.5999985, -107.0500031, -105.1500015, -105.6699982, -106.7200012,
-106.0699997, -104, -101.0500031, -99.94999695, -101.2699966,
-99.65000153, -113.8499985, -112.8199997, -113.5800018, -111.0699997,
-111.7300034, -114.1200027, -113.2799988, -114.6800003, -116.3000031,
-114.7799988, -115.7799988, -119.4000015, -118.5299988, -118.8799973,
-117.4499969, -113.8300018, -110.2799988, -108.4300003, -109.0999985,
-114.9199982, -112.7200012, -112.8700027, -113.75, -114.0999985,
-114.0199966, -113.7300034, -113.3799973, -113.0500031, -112.8000031,
-110.7200012, -113.6299973, -113.9800034, -112.1299973, -115.5500031,
-114.1800003, -114.4800034, -114.3700027, -118.0299988, -117.5299988,
-116.4499969), Northings = c(52.88000107, 52.08000183, 49.54999924,
49.95000076, 49, 49.18000031, 49.02000046, 49.52999878, 49.91999817,
50.93000031, 52.34999847, 51.02999878, 50.63000107, 53.15000153,
50.47000122, 49.61999893, 50.18000031, 51.09999847, 52.11999893,
50.41999817, 50.15000153, 53.97000122, 53.97000122, 56.65000153,
58.77000046, 58.38000107, 57.75, 59.56999969, 58.18000031, 57.34999847,
55.52999878, 57.25, 55.15000153, 56.22999954, 54.77000046, 58.72999954,
56.34999847, 56.86999893, 55.79999924, 52.06999969, 50.72000122,
51.58000183, 49.11999893, 50.90000153, 49.38000107, 51.52000046,
51.27000046, 49.22000122, 50.54999924, 51.25, 49.61999893, 50.33000183,
50.13000107, 51.97999954, 50.43000031, 49.81999969, 51.20000076,
49.72000122, 50.90000153, 50.47000122, 50.97999954, 49.68000031,
50.27000046, 49.36999893, 49, 51.52000046, 52.77000046, 52.36999893,
53.13000107, 52.81999969, 51.47999954, 52.41999817, 53.22000122,
52.16999817, 53.91999817, 53.33000183, 50.43000031, 49.91999817,
49.18000031, 49.41999817, 53.27999878, 53.02999878, 53.31999969,
53.88000107, 53.41999817, 53.88000107, 54.72000122, 54.13000107,
55.41999817, 55.27999878, 54.15000153, 55.20000076, 56.08000183,
55.18000031, 56.22999954, 55.97000122, 54.41999817, 55.83000183,
54.04999924, 52.41999817, 52.33000183, 51.47000122, 52.45000076,
51.77999878, 51.11999893, 49.93000031, 49, 50.88000107, 49.63000107,
50.02000046, 49.13000107, 49.52000046, 50.04999924, 51.20000076,
49.47000122, 49.63000107, 50.54999924, 52.93000031, 53.40000153,
53.58000183), Region = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L)), .Names = c("Site",
"Name", "Mean", "Eastings", "Northings", "Region"), class = "data.frame", row.names = c(NA,
-120L))
#============================================
use abs()
eg
instead of size=Mean use size=abs(Mean)
Then, you can track the sign using shape (or some other aesthetic, but color and size are already taken)
replace:
geom_point(shape=21)+
geom_point(aes(colour = factor(Region))) +
with the single line
geom_point(aes(shape=factor(sign(Mean)), colour = factor(Region))) +
If you'd like, you can also add lines such as
scale_shape_discrete(name="Mean Is", breaks=c(-1, 1), labels=c("Negative", "Positive"))
guides(size=FALSE)

Internal ordering of facets ggplot2

I'm trying to plot a facets in ggplot2 but I struggle to get the internal ordering of the different facets right. The data looks like this:
head(THAT_EXT)
ID FILE GENRE NODE
1 CKC_1823_01 CKC Novels better
2 CKC_1824_01 CKC Novels better
3 EW9_192_03 EW9 Popular Science better
4 H0B_265_01 H0B Popular Science sad
5 CS2_231_03 CS2 Academic Prose desirable
6 FED_8_05 FED Academic Prose certain
str(THAT_EXT)
'data.frame': 851 obs. of 4 variables:
$ ID : Factor w/ 851 levels "A05_122_01","A05_277_07",..: 345 346 439 608 402 484 319 395 228 5 ...
$ FILE : Factor w/ 241 levels "A05","A06","A0K",..: 110 110 127 169 120 135 105 119 79 2 ...
$ GENRE: Factor w/ 5 levels "Academic Prose",..: 4 4 5 5 1 1 1 5 1 5 ...
$ NODE : Factor w/ 115 levels "absurd","accepted",..: 14 14 14 89 23 16 59 59 18 66 ...
Part of the problem is that can't get the sorting right. Here is the code for the sorting of NODE that I use:
THAT_EXT <- within(THAT_EXT,
NODE <- factor(NODE,
levels=names(sort(table(NODE),
decreasing=TRUE))))
When I plot this with the code below I get a graphs in which the NODE is not correctly sorted in the individual GENREs since different NODEs are more frequent in different GENREs:
p1 <-
ggplot(THAT_EXT, aes(x=NODE)) +
geom_bar() +
scale_x_discrete("THAT_EXT", breaks=NULL) + # supress tick marks on x axis
facet_wrap(~GENRE)
What I want is for every facet to have NODE sorted in decreasing order for that particular GENRE. Can anyone help with this?
structure(list(ID = structure(c(1L, 2L, 3L, 4L, 10L, 133L, 137L,
138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L,
160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L,
171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L,
182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L,
193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L, 201L, 202L, 203L,
204L, 205L, 206L, 207L, 208L, 212L, 213L, 214L, 215L, 216L, 217L,
218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L, 227L, 228L,
229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L, 238L, 239L,
240L, 241L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L,
276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 290L, 291L,
298L, 299L, 300L, 303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L,
313L, 314L, 315L, 316L, 317L, 318L, 319L, 327L, 328L, 329L, 330L,
331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 347L, 348L, 352L, 353L, 354L, 355L,
356L, 357L, 358L, 359L, 360L, 349L, 350L, 351L, 361L, 362L, 363L,
364L, 365L, 366L, 367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L,
375L, 376L, 377L, 378L, 379L, 380L, 381L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 41L, 42L, 43L, 44L, 45L,
46L, 50L, 54L, 72L, 73L, 74L, 75L, 76L, 90L, 91L, 92L, 97L, 98L,
102L, 115L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 209L,
210L, 211L, 242L, 243L, 244L, 245L, 246L, 289L, 292L, 293L, 294L,
295L, 296L, 297L, 301L, 302L, 311L, 312L, 320L, 321L, 322L, 323L,
324L, 325L, 326L, 382L, 383L, 384L, 385L, 386L, 387L, 388L, 5L,
6L, 7L, 8L, 9L, 11L, 37L, 38L, 39L, 40L, 47L, 48L, 49L, 51L,
52L, 53L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L,
66L, 67L, 68L, 69L, 70L, 71L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
84L, 85L, 86L, 87L, 88L, 89L, 93L, 94L, 95L, 96L, 99L, 100L,
101L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L,
113L, 114L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L,
134L, 135L, 136L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L,
255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 263L, 264L, 265L,
266L, 285L, 286L, 287L, 288L), .Label = c("A05_122_01", "A05_277_07",
"A05_400_01", "A05_99_01", "A06_1283_02", "A06_1389_01", "A06_1390_01",
"A06_1441_02", "A06_884_03", "A0K_1190_03", "A77_1684_01", "A8K_525_03",
"A8K_582_01", "A8K_645_01", "A8K_799_01", "A90_341_02", "A90_496_01",
"A94_217_01", "A94_472_01", "A94_477_03", "A9M_164_01", "A9M_259_03",
"A9N_199_01", "A9N_489_01", "A9N_591_01", "A9R_173_01", "A9R_425_02",
"A9W_536_02", "AA5_121_01", "AAE_203_01", "AAE_243_01", "AAE_412_01",
"AAW_14_03", "AAW_244_02", "AAW_297_04", "AAW_365_04", "ADG_1398_01",
"ADG_1500_01", "ADG_1507_01", "ADG_1516_01", "AHB_336_01", "AHB_421_01",
"AHJ_1090_02", "AHJ_619_01", "AR3_340_01", "AR3_91_03", "ARF_879_01",
"ARF_985_01", "ARF_991_02", "ARK_1891_01", "ASL_33_04", "ASL_43_01",
"ASL_9_01", "AT7_1031_01", "B09_1162_01", "B09_1475_01", "B09_1493_01",
"B09_1539_01", "B0G_197_01", "B0G_320_01", "B0N_1037_01", "B0N_624_01",
"B0N_645_02", "B0N_683_01", "B3G_313_04", "B3G_320_03", "B3G_398_02",
"B7M_1630_01", "B7M_1913_01", "BNN_746_02", "BNN_895_01", "BP7_2426_01",
"BP7_2777_01", "BP7_2898_01", "BP9_410_01", "BP9_599_01", "BPK_829_01",
"C93_1407_02", "C9A_181_01", "C9A_196_01", "C9A_365_01", "C9A_82_02",
"C9A_9_01", "CB9_306_02", "CB9_63_04", "CB9_86_01", "CBJ_439_01",
"CBJ_702_02", "CBJ_705_01", "CCM_320_01", "CCM_665_01", "CCM_669_02",
"CCN_1036_02", "CCN_1078_01", "CCN_1119_01", "CCN_784_01", "CCW_2284_02",
"CCW_2349_03", "CE7_242_02", "CE7_284_01", "CE7_39_01", "CEB_1675_01",
"CER_145_03", "CER_23_01", "CER_235_02", "CER_378_10", "CET_1056_02",
"CET_680_01", "CET_705_01", "CET_797_01", "CET_838_01", "CET_879_05",
"CET_946_03", "CET_986_01", "CEY_2977_01", "CJ3_107_02", "CJ3_114_03",
"CJ3_20_01", "CJ3_81_01", "CK2_112_01", "CK2_22_01", "CK2_392_01",
"CK2_42_01", "CK2_75_01", "CKC_1776_01", "CKC_1777_01", "CKC_1823_01",
"CKC_1824_01", "CKC_1860_01", "CKC_1883_01", "CKC_1883_02", "CKC_2127_01",
"CMN_1439_02", "CRM_5767_01", "CRM_5770_03", "CRM_5789_01", "CS2_110_01",
"CS2_131_01", "CS2_139_01", "CS2_187_01", "CS2_187_03", "CS2_231_03",
"CS2_249_02", "CS2_301_01", "CS2_35_01", "CS2_58_02", "EV6_16_01",
"EV6_206_02", "EV6_240_01", "EV6_244_02", "EV6_28_01", "EV6_30_01",
"EV6_32_01", "EV6_450_01", "EV6_69_01", "EV6_80_01", "EV6_91_01",
"FAC_1019_01", "FAC_1026_01", "FAC_1027_01", "FAC_1235_01", "FAC_1269_05",
"FAC_1270_05", "FAC_1393_01", "FAC_1406_03", "FAC_933_01", "FAC_950_01",
"FAC_960_01", "FED_105_01", "FED_120_02", "FED_21_02", "FED_281_02",
"FED_302_02", "FED_53_01", "FED_8_05", "FEF_498_03", "FEF_674_03",
"FR2_410_01", "FR2_557_02", "FR2_593_01", "FR2_691_01", "FR4_232_01",
"FR4_331_01", "FR4_346_01", "FS7_818_01", "FS7_919_01", "FU0_368_02",
"FYT_1138_01", "FYT_1183_01", "FYT_901_05", "G08_1336_01", "G1E_385_01",
"G1N_824_01", "G1N_860_01", "G1N_868_01", "G1N_975_01", "GU5_854_01",
"GUJ_423_01", "GUJ_501_01", "GUJ_611_01", "GUJ_629_03", "GUJ_700_01",
"GV0_10_01", "GV0_104_01", "GV0_111_01", "GV0_122_01", "GV0_160_01",
"GV0_232_02", "GV2_1465_01", "GV2_1899_01", "GV6_2683_01", "GW6_297_01",
"GW6_306_05", "GW6_307_01", "GW6_322_01", "GW6_330_02", "GW6_335_01",
"GW6_338_01", "GW6_367_02", "GW6_373_01", "GW6_407_01", "GW6_411_01",
"GW6_413_01", "GW6_421_01", "GW6_423_01", "GW6_424_01", "GW6_428_01",
"GW6_447_01", "GWM_480_01", "GWM_533_02", "GWM_554_02", "GWM_554_03",
"GWM_609_01", "GWM_609_04", "GWM_610_01", "GWM_730_01", "GWM_731_01",
"GWM_738_01", "GWM_804_06", "GWM_815_01", "GWM_832_03", "GVP_179_01",
"GVP_211_01", "GVP_393_02", "GVP_443_02", "GVP_710_01", "H0B_171_04",
"H0B_216_01", "H0B_265_01", "H0B_32_01", "H0B_361_03", "H0B_365_01",
"H0B_369_01", "H0B_74_01", "H0B_93_01", "H10_1002_01", "H10_1032_04",
"H10_653_01", "H10_803_01", "H10_824_01", "H10_825_03", "H10_881_01",
"H10_986_01", "H78_851_04", "H78_891_01", "H78_946_04", "H79_1959_19",
"H7S_110_05", "H7S_130_06", "H7S_131_03", "H7S_131_04", "H7S_146_01",
"H7S_148_01", "H7S_164_01", "H7S_179_01", "H7S_54_01", "H7S_56_05",
"H7S_62_03", "H7S_79_01", "H7S_8_01", "H7S_81_01", "H7S_83_01",
"H7S_87_01", "H7S_92_03", "H7X_1028_02", "H7X_1091_01", "H7X_691_01",
"H7X_695_01", "H8H_2917_01", "H8K_153_01", "H8K_55_01", "H8M_1897_01",
"H8M_2104_02", "H8T_3316_03", "H98_3204_01", "H98_3410_01", "H98_3490_02",
"H9R_130_02", "H9R_39_01", "H9S_1297_01", "HA2_3107_02", "HA2_3284_01",
"HPY_754_04", "HPY_785_09", "HPY_799_03", "HPY_807_04", "HPY_830_04",
"HPY_838_02", "HPY_843_01", "HPY_869_11", "HR7_190_01", "HR7_440_01",
"HTP_540_01", "HTP_585_01", "HTP_588_05", "HTP_593_01", "HTP_601_01",
"HTP_613_01", "HTP_648_02", "HTW_197_01", "HTW_494_01", "HTW_750_01",
"HWL_2770_01", "HWL_2919_01", "HWM_45_01", "HWM_45_02", "HXY_1047_03",
"HXY_701_01", "HXY_781_01", "HXY_783_01", "HXY_784_01", "HXY_836_01",
"HXY_931_01", "HXY_963_01", "HXY_972_01", "HXY_985_03", "HY6_1024_01",
"HY6_1025_01", "HY6_1164_01", "HY6_1223_01", "HY6_988_03", "HY6_989_01",
"HY8_160_01", "HY8_164_01", "HY8_292_03", "HY8_316_01", "HY9_778_03",
"HY9_845_02", "HYX_235_08", "HYX_245_01", "HYX_88_01", "J12_1474_02",
"J12_1492_01", "J12_1571_01", "J12_1845_01", "J14_341_01", "J18_597_04",
"J18_698_02", "J18_759_01", "J18_828_01", "J3R_197_01", "J3R_219_02",
"J3R_277_04", "J3T_267_01", "J3T_269_02", "J3T_57_02", "J41_41_02",
"J41_58_03", "J9B_133_03", "J9B_341_02", "J9B_341_03", "J9D_147_05",
"J9D_218_01", "J9D_411_01", "J9D_616_01", "J9D_616_02", "JNB_563_02",
"JT7_118_01", "JT7_129_02", "JT7_218_02", "JT7_344_02", "JXS_3663_01",
"JXU_407_01", "JXU_468_02", "JXU_559_01", "JXV_1439_04", "JXV_1592_01",
"JY1_100_01"), class = "factor"), GENRE = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("Academic Prose", "Conversation", "News",
"Novels", "Popular Science"), class = "factor"), NODE = structure(c(9L,
10L, 10L, 10L, 4L, 10L, 71L, 35L, 49L, 6L, 5L, 15L, 28L, 44L,
64L, 64L, 28L, 28L, 18L, 18L, 32L, 18L, 58L, 10L, 72L, 28L, 18L,
10L, 64L, 10L, 35L, 64L, 64L, 69L, 8L, 10L, 50L, 69L, 49L, 49L,
15L, 69L, 10L, 49L, 8L, 64L, 49L, 10L, 69L, 18L, 61L, 67L, 67L,
61L, 57L, 69L, 11L, 10L, 64L, 10L, 59L, 61L, 49L, 10L, 59L, 1L,
61L, 35L, 54L, 54L, 39L, 44L, 61L, 64L, 69L, 1L, 23L, 49L, 49L,
8L, 69L, 49L, 69L, 49L, 49L, 69L, 35L, 49L, 49L, 49L, 35L, 10L,
49L, 48L, 10L, 49L, 11L, 44L, 50L, 11L, 50L, 69L, 49L, 10L, 59L,
68L, 47L, 69L, 49L, 35L, 29L, 8L, 49L, 50L, 35L, 10L, 35L, 8L,
35L, 8L, 10L, 35L, 10L, 10L, 10L, 35L, 44L, 61L, 35L, 44L, 28L,
47L, 39L, 39L, 49L, 61L, 43L, 60L, 19L, 10L, 10L, 10L, 44L, 44L,
62L, 44L, 10L, 59L, 10L, 61L, 1L, 53L, 33L, 10L, 8L, 8L, 64L,
64L, 10L, 57L, 61L, 64L, 66L, 19L, 61L, 64L, 10L, 10L, 8L, 19L,
35L, 28L, 10L, 61L, 35L, 42L, 35L, 28L, 32L, 64L, 10L, 18L, 28L,
25L, 35L, 35L, 10L, 18L, 10L, 22L, 55L, 28L, 10L, 1L, 55L, 51L,
1L, 38L, 28L, 28L, 33L, 10L, 44L, 29L, 16L, 8L, 28L, 69L, 32L,
10L, 61L, 20L, 35L, 10L, 28L, 10L, 32L, 10L, 46L, 59L, 64L, 35L,
66L, 2L, 35L, 28L, 30L, 18L, 69L, 32L, 10L, 28L, 17L, 36L, 64L,
61L, 10L, 64L, 33L, 3L, 37L, 26L, 28L, 64L, 44L, 28L, 64L, 64L,
6L, 6L, 64L, 50L, 32L, 8L, 64L, 50L, 28L, 24L, 18L, 47L, 35L,
40L, 24L, 55L, 44L, 22L, 1L, 49L, 44L, 18L, 45L, 63L, 64L, 35L,
12L, 35L, 10L, 35L, 10L, 10L, 10L, 44L, 44L, 44L, 65L, 44L, 55L,
32L, 49L, 64L, 39L, 69L, 1L, 60L, 7L, 14L, 44L, 33L, 10L, 19L,
10L, 70L, 53L, 8L, 61L, 61L, 44L, 61L, 65L, 28L, 68L, 69L, 27L,
61L, 28L, 72L, 34L, 61L, 32L, 10L, 49L, 35L, 49L, 10L, 10L, 69L,
39L, 40L, 19L, 59L, 53L, 49L, 49L, 44L, 49L, 35L, 49L, 61L, 61L,
1L, 10L, 28L, 49L, 35L, 49L, 61L, 50L, 69L, 35L, 61L, 35L, 50L,
10L, 28L, 69L, 61L, 21L, 69L, 29L, 35L, 35L, 35L, 11L, 69L, 8L,
41L, 56L, 35L, 61L, 69L, 49L, 49L, 49L, 1L, 13L, 64L, 64L, 52L,
44L, 64L, 64L, 50L, 49L, 69L, 11L, 59L, 49L, 31L), .Label = c("apparent",
"appropriate", "awful", "axiomatic", "best", "better", "breathtaking",
"certain", "characteristic", "clear", "conceivable", "convenient",
"crucial", "cruel", "desirable", "disappointing", "emphatic",
"essential", "evident", "expected", "extraordinary", "fair",
"fortunate", "Funny", "good", "great", "imperative", "important",
"impossible", "incredible", "inescapable", "inevitable", "interesting",
"ironic", "likely", "Likely", "lucky", "ludicrous", "natural",
"necessary", "needful", "notable", "noteworthy", "obvious", "odd",
"paradoxical", "plain", "plausible", "possible", "probable",
"proper", "relevant", "remarkable", "revealing", "right", "Sad",
"self-evident", "sensible", "significant", "striking", "surprising",
"symptomatic", "terrible", "true", "typical", "understandable",
"unexpected", "unfortunate", "unlikely", "unreasonable", "untrue",
"vital"), class = "factor")), .Names = c("ID", "GENRE", "NODE"
), class = "data.frame", row.names = c(NA, -388L))
As I mentioned already: facet_wrap is not intended for having individual scales. At least I didn't find a solution. Hence, setting the labels in scale_x_discrete did not bring the desired result.
But this my workaround:
library(plyr)
library(ggplot2)
nodeCount <- ddply( df, c("GENRE", "NODE"), nrow )
nodeCount$factors <- paste( nodeCount$GENRE, nodeCount$NODE, sep ="." )
nodeCount <- nodeCount[ order( nodeCount$GENRE, nodeCount$V1, decreasing=TRUE ), ]
nodeCount$factors <- factor( nodeCount$factors, levels=nodeCount$factors )
head(nodeCount)
GENRE NODE V1 factors
121 Popular Science possible 14 Popular Science.possible
128 Popular Science surprising 11 Popular Science.surprising
116 Popular Science likely 9 Popular Science.likely
132 Popular Science unlikely 9 Popular Science.unlikely
103 Popular Science clear 7 Popular Science.clear
129 Popular Science true 5 Popular Science.true
g <- ggplot( nodeCount, aes( y=V1, x = factors ) ) +
geom_bar() +
scale_x_discrete( breaks=NULL ) + # supress tick marks on x axis
facet_wrap( ~GENRE, scale="free_x" ) +
geom_text( aes( label = NODE, y = V1+2 ), angle = 45, vjust = 0, hjust=0, size=3 )
Which gives:

How can I add missing sequence values?

I have a data frame like this:
structure(list(x = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L,
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 112L,
113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 123L, 124L,
125L, 127L, 128L, 129L, 130L, 132L, 133L, 134L, 135L, 136L, 137L,
138L, 139L, 140L, 141L, 142L, 143L, 145L, 146L, 147L, 148L, 149L,
150L, 151L, 152L, 153L, 154L, 155L, 158L, 160L, 163L, 164L, 166L,
167L, 169L, 170L, 173L, 174L, 178L, 179L, 181L, 182L, 183L, 186L,
187L, 191L, 192L, 193L, 194L, 197L, 198L, 200L, 205L, 208L, 209L,
213L, 214L, 216L, 217L, 220L, 222L, 223L, 225L, 229L, 233L, 235L,
237L, 242L, 243L, 244L, 251L, 253L, 254L, 255L, 261L, 262L, 263L,
264L, 267L, 268L, 269L, 270L, 276L, 281L, 282L, 284L, 285L, 287L,
289L, 293L, 295L, 297L, 299L, 301L, 306L, 308L, 315L, 317L, 318L,
320L, 327L, 330L, 336L, 337L, 345L, 346L, 355L, 359L, 376L, 377L,
379L, 384L, 387L, 388L, 402L, 405L, 408L, 415L, 420L, 421L, 427L,
428L, 429L, 430L, 437L, 438L, 439L, 440L, 446L, 448L, 453L, 456L,
469L, 472L, 476L, 478L, 481L, 483L, 486L, 487L, 488L, 497L, 500L,
502L, 504L, 507L, 512L, 525L, 530L, 531L, 543L, 546L, 550L, 578L,
581L, 598L, 601L, 680L, 689L, 693L, 712L, 728L, 746L, 768L, 790L,
794L, 840L, 851L, 861L, 928L, 969L, 1010L, 1180L, 1698L), freq = c(29186L,
12276L, 5851L, 3938L, 3133L, 1894L, 1157L, 820L, 597L, 481L,
398L, 297L, 269L, 251L, 175L, 176L, 153L, 130L, 117L, 108L, 93L,
83L, 58L, 84L, 60L, 43L, 59L, 51L, 57L, 53L, 38L, 38L, 32L, 35L,
28L, 27L, 29L, 22L, 24L, 29L, 30L, 23L, 26L, 19L, 19L, 25L, 14L,
22L, 16L, 12L, 15L, 14L, 11L, 13L, 18L, 10L, 17L, 20L, 7L, 9L,
2L, 8L, 12L, 8L, 7L, 10L, 10L, 9L, 6L, 6L, 9L, 5L, 11L, 4L, 5L,
5L, 10L, 4L, 6L, 1L, 4L, 7L, 3L, 4L, 3L, 2L, 3L, 5L, 7L, 2L,
2L, 3L, 2L, 4L, 7L, 1L, 3L, 5L, 5L, 3L, 5L, 2L, 2L, 2L, 3L, 2L,
5L, 7L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 3L, 2L, 2L, 1L,
3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 4L, 3L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 4L, 4L, 1L, 2L,
2L, 4L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L,
3L, 2L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("x",
"freq"), row.names = c(NA, -296L), class = "data.frame")
After the x value of 130, there are missing values. Is there a way I make this a continuous data frame in increments of 1 i.e. from 1 to 1698, populate the entire list and set the elements that do not have a value here as 0? What I mean is:
1,2
4,5
5,7
should be converted to:
1,2
2,0
3,0
4,5
5,7
Any suggestions?
You can also use merge (assuming your data is strored in l):
l <- merge(l,data.frame(x = 1:1698),all = TRUE,by = "x")
l$freq[is.na(l$freq)] <- 0
I'd create a data set of values that aren't covered by column x and then create a dataframe of those values and assign 0 to the freq of all of these x values. Then rbind and order by x.
#I called your data dat
y <- 1:max(dat$x)
dat2 <- data.frame(x=y[!y%in%dat$x], freq=0)
dat3 <- rbind(dat, dat2)
dat4 <- dat3[order(dat3$x), ] #could stop here
rownames(dat4) <- NULL #but I hate non sequential row names
dat4

Resources