Calculate percentage given condition - r

I am new to this website and to coding as well. I was wondering if any of you could help me out
I need to calculate the Top 5 Movies, by rating distribution, calculating the percentage of ratings for each movie that are 4 stars or higher.
So far I was only able to calculate the number of occurrences using dplyr.
Is it possible to calculate it using dplyr (something similar to my coding)?
I'm not sure whether I need to mutate to come up with the solution or if there's another way to do so.
My code so far:
dfAux1 <- na.omit(dfAux)
dfAux1 %>%
group_by(movie) %>%
summarise(tot = n()) %>%
arrange(desc(tot))%>%
head(5)
the result should be something like this:
**Expected result**:
0.7000000, 'The Shawshank Redemption'
0.5333333, 'Star Wars IV - A New Hope'
0.5000000, 'Gladiator'
0.4444444, 'Blade Runner'
0.4375000, 'The Silence of the Lambs'
and so far this is my result:
# A tibble: 5 x 2
movie tot
<fctr> <int>
1 Toy Story 17
2 The Silence of the Lambs 16
3 Star Wars IV - A New Hope 15
4 Star Wars VI - Return of the Jedi 14
5 Independence Day 13
edit:
str(dfAux1)
'data.frame': 241 obs. of 2 variables:
$ Rating: int 1 5 4 2 4 5 4 2 3 2 ...
$ movie : Factor w/ 20 levels "Star Wars IV - A New Hope",..: 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "na.action")=Class 'omit' Named int [1:159] 3 4 7 16 17 23 27 28 34 36 ...
.. ..- attr(*, "names")= chr [1:159] "3" "4" "7" "16" ...
dput(dfAux1)
structure(list(Rating = c(1L, 5L, 4L, 2L, 4L, 5L, 4L, 2L, 3L,
2L, 3L, 4L, 4L, 5L, 1L, 5L, 3L, 3L, 3L, 4L, 1L, 2L, 1L, 5L, 3L,
4L, 5L, 1L, 2L, 2L, 4L, 4L, 3L, 5L, 2L, 3L, 1L, 1L, 2L, 2L, 5L,
1L, 4L, 1L, 4L, 5L, 5L, 5L, 4L, 4L, 4L, 2L, 4L, 1L, 3L, 2L, 3L,
2L, 4L, 2L, 5L, 3L, 4L, 1L, 5L, 4L, 2L, 1L, 1L, 4L, 2L, 4L, 5L,
5L, 2L, 1L, 4L, 2L, 1L, 4L, 2L, 3L, 2L, 4L, 4L, 5L, 2L, 4L, 3L,
2L, 2L, 4L, 2L, 2L, 2L, 3L, 4L, 1L, 5L, 4L, 3L, 5L, 2L, 1L, 3L,
4L, 4L, 2L, 3L, 4L, 1L, 3L, 2L, 5L, 3L, 2L, 3L, 4L, 1L, 1L, 4L,
1L, 4L, 5L, 1L, 3L, 2L, 2L, 3L, 5L, 5L, 1L, 2L, 3L, 5L, 2L, 3L,
1L, 2L, 1L, 4L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 1L, 5L, 2L, 4L,
1L, 4L, 3L, 1L, 2L, 2L, 3L, 4L, 2L, 3L, 2L, 4L, 3L, 4L, 3L, 2L,
2L, 4L, 5L, 2L, 1L, 5L, 1L, 4L, 5L, 2L, 3L, 3L, 2L, 5L, 5L, 4L,
1L, 3L, 1L, 2L, 1L, 5L, 5L, 2L, 4L, 2L, 4L, 2L, 5L, 2L, 5L, 5L,
1L, 5L, 1L, 3L, 2L, 2L, 3L, 5L, 1L, 3L, 1L, 5L, 3L, 3L, 1L, 2L,
4L, 1L, 5L, 3L, 1L, 1L, 5L, 5L, 1L, 5L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 2L, 5L, 4L, 2L, 1L, 4L, 5L), movie = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), .Label = c("Star Wars IV - A New Hope",
"Star Wars VI - Return of the Jedi", "Forrest Gump", "The Shawshank Redemption",
"The Silence of the Lambs", "Gladiator", "Toy Story", "Saving Private Ryan",
"Pulp Fiction", "Stand by Me", "Shakespeare in Love", "Total Recall",
"Independence Day", "Blade Runner", "Groundhog Day", "The Matrix",
"Schindler's List", "The Sixth Sense", "Raiders of the Lost Ark",
"Babe"), class = "factor")), .Names = c("Rating", "movie"), row.names = c(1L,
2L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L,
21L, 22L, 24L, 25L, 26L, 29L, 30L, 31L, 32L, 33L, 35L, 38L, 39L,
40L, 41L, 45L, 46L, 47L, 51L, 52L, 54L, 56L, 58L, 60L, 62L, 63L,
65L, 66L, 67L, 69L, 70L, 73L, 78L, 80L, 81L, 82L, 83L, 85L, 87L,
88L, 89L, 90L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 100L, 101L,
102L, 104L, 105L, 107L, 108L, 109L, 111L, 115L, 116L, 118L, 119L,
121L, 122L, 123L, 124L, 126L, 128L, 129L, 130L, 131L, 132L, 133L,
134L, 135L, 137L, 138L, 139L, 140L, 141L, 144L, 145L, 146L, 147L,
149L, 150L, 153L, 156L, 159L, 160L, 164L, 166L, 167L, 168L, 170L,
172L, 175L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 185L, 186L,
189L, 194L, 195L, 196L, 199L, 200L, 201L, 202L, 205L, 206L, 207L,
209L, 212L, 216L, 217L, 219L, 220L, 222L, 223L, 224L, 225L, 226L,
228L, 229L, 231L, 233L, 234L, 235L, 239L, 241L, 242L, 243L, 244L,
246L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 261L, 263L,
264L, 265L, 267L, 268L, 274L, 278L, 280L, 282L, 283L, 284L, 286L,
288L, 289L, 292L, 293L, 294L, 295L, 296L, 300L, 301L, 303L, 305L,
307L, 310L, 311L, 312L, 314L, 316L, 317L, 319L, 320L, 321L, 322L,
323L, 324L, 325L, 328L, 330L, 334L, 335L, 336L, 338L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 348L, 350L, 351L, 356L, 358L, 360L,
362L, 363L, 364L, 367L, 368L, 371L, 373L, 375L, 376L, 378L, 380L,
383L, 384L, 386L, 387L, 389L, 391L, 392L, 395L, 396L, 398L), class = "data.frame", na.action = structure(c(3L,
4L, 7L, 16L, 17L, 23L, 27L, 28L, 34L, 36L, 37L, 42L, 43L, 44L,
48L, 49L, 50L, 53L, 55L, 57L, 59L, 61L, 64L, 68L, 71L, 72L, 74L,
75L, 76L, 77L, 79L, 84L, 86L, 91L, 99L, 103L, 106L, 110L, 112L,
113L, 114L, 117L, 120L, 125L, 127L, 136L, 142L, 143L, 148L, 151L,
152L, 154L, 155L, 157L, 158L, 161L, 162L, 163L, 165L, 169L, 171L,
173L, 174L, 176L, 184L, 187L, 188L, 190L, 191L, 192L, 193L, 197L,
198L, 203L, 204L, 208L, 210L, 211L, 213L, 214L, 215L, 218L, 221L,
227L, 230L, 232L, 236L, 237L, 238L, 240L, 245L, 247L, 256L, 257L,
258L, 259L, 260L, 262L, 266L, 269L, 270L, 271L, 272L, 273L, 275L,
276L, 277L, 279L, 281L, 285L, 287L, 290L, 291L, 297L, 298L, 299L,
302L, 304L, 306L, 308L, 309L, 313L, 315L, 318L, 326L, 327L, 329L,
331L, 332L, 333L, 337L, 339L, 347L, 349L, 352L, 353L, 354L, 355L,
357L, 359L, 361L, 365L, 366L, 369L, 370L, 372L, 374L, 377L, 379L,
381L, 382L, 385L, 388L, 390L, 393L, 394L, 397L, 399L, 400L), .Names = c("3",
"4", "7", "16", "17", "23", "27", "28", "34", "36", "37", "42",
"43", "44", "48", "49", "50", "53", "55", "57", "59", "61", "64",
"68", "71", "72", "74", "75", "76", "77", "79", "84", "86", "91",
"99", "103", "106", "110", "112", "113", "114", "117", "120",
"125", "127", "136", "142", "143", "148", "151", "152", "154",
"155", "157", "158", "161", "162", "163", "165", "169", "171",
"173", "174", "176", "184", "187", "188", "190", "191", "192",
"193", "197", "198", "203", "204", "208", "210", "211", "213",
"214", "215", "218", "221", "227", "230", "232", "236", "237",
"238", "240", "245", "247", "256", "257", "258", "259", "260",
"262", "266", "269", "270", "271", "272", "273", "275", "276",
"277", "279", "281", "285", "287", "290", "291", "297", "298",
"299", "302", "304", "306", "308", "309", "313", "315", "318",
"326", "327", "329", "331", "332", "333", "337", "339", "347",
"349", "352", "353", "354", "355", "357", "359", "361", "365",
"366", "369", "370", "372", "374", "377", "379", "381", "382",
"385", "388", "390", "393", "394", "397", "399", "400"), class = "omit"))

I am using data.table instead of dplyr
library(data.table)
setDT(dfAux1) # make dfAux1 as data table by reference
# calculate total number by movies, then compute percent for `Rating >= 4` by movies and then sort `tot` by descending order and also eliminating duplicates in movies using `.SD[1]` which gives the first row in each movie.
dfAux1[, .(Rating, tot = .N), by = movie ][Rating >= 4, .(percent = .N/tot, tot), by = movie ][order(-tot), .SD[1], by = movie]
# movie percent tot
# 1: Toy Story 0.35294118 17
# 2: The Silence of the Lambs 0.43750000 16
# 3: Star Wars IV - A New Hope 0.53333333 15
# 4: Star Wars VI - Return of the Jedi 0.35714286 14
# 5: Independence Day 0.30769231 13
# 6: Gladiator 0.50000000 12
# 7: Total Recall 0.08333333 12
# 8: Groundhog Day 0.41666667 12
# 9: The Matrix 0.41666667 12
# 10: Schindler's List 0.33333333 12
# 11: The Sixth Sense 0.33333333 12
# 12: Saving Private Ryan 0.36363636 11
# 13: Pulp Fiction 0.36363636 11
# 14: Stand by Me 0.36363636 11
# 15: Shakespeare in Love 0.27272727 11
# 16: Raiders of the Lost Ark 0.27272727 11
# 17: Forrest Gump 0.30000000 10
# 18: The Shawshank Redemption 0.70000000 10
# 19: Babe 0.40000000 10
# 20: Blade Runner 0.44444444 9

A single line solution using data.table and data from OP could be as:
library(data.table)
setDT(dfAux1)[, .(pct = sum(Rating>=4)/.N), by=movie][order(-pct)][1:5]
movie pct
1: The Shawshank Redemption 0.7000000
2: Star Wars IV - A New Hope 0.5333333
3: Gladiator 0.5000000
4: Blade Runner 0.4444444
5: The Silence of the Lambs 0.4375000

Overview
I used the dplyr package to group your data by the movie column and perform calculations based on the rating column.
In summarise(), I created three new columns:
Total_Review: counts the total number of reviews per movie.
FourPlus_Rating: counts the subset of reviews with a Rating value of 4 or higher.
Per_FourPlus_Rating: divides FourPlus_Rating by Total_Review.
I then arranged the date in descending order based on Per_FourPlus_Rating. Finally, I called head() to specify that I only want the tibble to return the first 5 rows.
Reproducible Example
# install necessary package
install.packages( pkgs = "dplyr" )
# load necessary package
library( dplyr )
# view first six rows
head( x = df )
# Rating movie
# 1 1 Star Wars IV - A New Hope
# 2 5 Star Wars IV - A New Hope
# 5 4 Star Wars IV - A New Hope
# 6 2 Star Wars IV - A New Hope
# 8 4 Star Wars IV - A New Hope
# 9 5 Star Wars IV - A New Hope
# perform calculations using
# dplyr functions
df %>%
group_by( movie ) %>%
summarise( Total_Review = n()
, FourPlus_Rating = length( Rating[ which( Rating >= 4 ) ] )
, Per_FourPlus_Rating = length( Rating[ which( Rating >= 4 ) ] ) / n() ) %>%
arrange( desc( Per_FourPlus_Rating ) ) %>%
head( n = 5 )
# A tibble: 5 x 4
# movie Total_Review FourPlus_Rating Per_FourPlus_Rati…
# <fct> <int> <int> <dbl>
# 1 The Shawshank Rede… 10 7 0.700
# 2 Star Wars IV - A N… 15 8 0.533
# 3 Gladiator 12 6 0.500
# 4 Blade Runner 9 4 0.444
# 5 The Silence of the… 16 7 0.438
# end of script #

this is a dplyr solution:
dfAuxhigh=filter(dfAux1,Rating>=4)%>%group_by(movie)%>%summarize(percentHigh=n())
dfAux=dfAux1%>%group_by(movie)%>%summarize(percentAll=n())
result<-merge(dfAuxhigh,dfAux,by="movie")%>%mutate(percentage=percentHigh/percentAll)
result<-result[order(result$percentage,decreasing = T)[1:5],c(1,4)]

library(tidyverse)
df %>%
group_by(movie, Rating) %>%
summarise(n = n()) %>% #< get freq of movies
mutate(freq = n/sum(n)) %>% #< find perc for each rating, by movie
filter(Rating >=4) %>% #< filter for desired rating (4 or above)
summarise(freq = sum(freq)) %>% #< summarize again
top_n(5) %>%
arrange(desc(freq)) %>%
mutate(freq = paste0(round(freq*100, 2), "%"))
#> movie freq
#> 1 The Shawshank Redemption 70%
#> 2 Star Wars IV - A New Hope 53.33%
#> 3 Gladiator 50%
#> 4 Blade Runner 44.44%
#> 5 The Silence of the Lambs 43.75%

Related

What is wrong with how I'm cutting my dataset?

Reproducible Dataset
This is my data:
#### Data for Stack ####
stack <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L,
269L, 300L, 427L, 301L, 436L, 342L, 229L, 379L, 102L, 146L, NA,
94L, 345L, 73L, 204L, 512L, 113L, 135L, 458L, 493L, 552L, 108L,
335L, 395L, 508L, 546L, 396L, 159L, 325L, 747L, 650L, 377L, 461L,
669L, 186L, 220L, 410L, 708L, 409L, 515L, 413L, 166L, 451L, 660L,
177L, 192L, 191L, 461L, 637L, 297L, 601L, 586L, 270L, 479L, 0L,
480L, 397L, 174L, 111L, 0L, 610L, 332L, 345L, 423L, 160L, 611L,
0L, 345L, 550L, 324L, 427L, 505L, 632L, 560L, 230L, 495L, 235L,
522L, 654L, 465L, 377L, 260L, 572L, 612L, 594L, 624L, 237L, 0L,
38L, 409L, 634L, 292L, 706L, 399L, 568L, 0L, 694L, 298L, 616L,
553L, 581L, 423L, 636L, 623L, 338L, 345L, 521L, 438L, 504L, 600L,
616L, 656L, 285L, 474L, 688L, 278L, 383L, 535L, 363L, 470L, 457L,
303L, 123L, 363L, 329L, 513L, 636L, 421L, 220L, 430L, 428L, 536L,
156L, 615L, 429L, 103L, 332L, 250L, 281L, 248L, 435L, 589L, 515L,
158L, 0L, 649L, 427L, 193L, 225L, 0L, 280L, 163L, 536L, 301L,
406L, 230L, 519L, 0L, 303L, 472L, 392L, 326L, 368L, 405L, 515L,
308L, 259L, 769L, 93L, 517L, 261L, 420L, 248L, 265L, 834L, 313L,
131L, 298L, 134L, 385L, 648L, 529L, 487L, 533L, 641L, 429L, 339L,
508L, 560L, 439L, 381L, 397L, 692L, NA), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L, 6L, 4L, 6L, 4L, 4L, 2L, 6L, 6L, 5L, 1L, 3L, 1L, 5L, 4L, 6L,
5L, 0L, 6L, 6L, 4L, 4L, 2L, 2L, 6L, 6L, 7L, 3L, 3L, 0L, 5L, 7L,
6L, 3L, 5L, 3L, 3L, 1L, 9L, 9L, 3L, 3L, 6L, 6L, 6L, 3L, 0L, 7L,
6L, 6L, 3L, 9L, 3L, 8L, 8L, 3L, 3L, 7L, 6L, 3L, 3L, 3L, 6L, 6L,
6L, 1L, 9L, 3L, 3L, 2L, 6L, 3L, 6L, 9L, 6L, 8L, 9L, 6L, 6L, 6L,
0L, 3L, 0L, 3L, 3L, 6L, 3L, 0L, 9L, 3L, 0L, 2L, 0L, 6L, 6L, 6L,
3L, 6L, 3L, 9L, 3L, 0L, 0L, 6L, 3L, 3L, 3L, 3L, 6L, 0L, 6L, 3L,
3L, 5L, 5L, 3L, 0L, 6L, 4L, 2L, 0L, 2L, 4L, 0L, 6L, 4L, 4L, 2L,
2L, 0L, 9L, 6L, 3L, 6L, 6L, 9L, 0L, 6L, 6L, 6L, 6L, 6L, 6L, 3L,
3L, 0L, 9L, 6L, 3L, 6L, 3L, 6L, 1L, 6L, 6L, 6L, 6L, 6L, 1L, 3L,
9L, 6L, 3L, 6L, 9L, 3L, 5L, 6L, 3L, 0L, 6L, 3L, 3L, 5L, 0L, 6L,
3L, 5L, 3L, 0L, 6L, 7L, 3L, 6L, 6L, 6L, 6L, 3L, 5L, 6L, 7L, 6L,
6L, 4L, 3L)), class = "data.frame", row.names = c(NA, -244L))
Solution So Far
I'm trying to cut my coffee data into three groups, a low group, a medium group, and a high group. Here is how I tried doing so:
#### Load Libraries ####
library(tidyverse)
library(ggpubr)
#### Transform Data: Coffee ####
coffee_labels <- c("Low", "Medium", "High") # labels
range(stack$Coffee_Cups) # get range for split
coffee_breaks <- seq(from = 0,
to = 9,
by = 3) # split from 0 to 9 in 3 pt intervals
coffee_transform <- cut(x= stack$Coffee_Cups,
labels = coffee_labels,
breaks = coffee_breaks) # add labels and breaks
stack_transform <- stack %>%
mutate(coffee_level = coffee_transform) # mutate to add to data
tail(stack_transform$coffee_level, 30) # check transform
Problem
However, when I print the tail command at the end, I get these NA values, which I assume is from an improper cut:
[1] Low Medium Medium Low <NA> Medium Low Low Medium <NA> Medium
[12] Low Medium Low <NA> Medium High Low Medium Medium Medium Medium
[23] Low Medium Medium High Medium Medium Medium Low
Levels: Low Medium High
I looked and those values correspond to my coffee consumption equaling zero, yet I already set the cut from 0 to 9. Naturally, when I try to make a boxplot with this, the NA levels get included, which I don't want:
#### Transform Coffee Boxplot ####
ggboxplot(stack_transform,
x="coffee_level",
y="Mins_Work",
palette = "simpsons",
color = "coffee_level",
title = "Coffee Consumption Level Productivity",
caption = "*Data obtained from local matrix.",
xlab = "Coffee Consumption Level",
ylab = "Minutes of Productivity")+
theme_bw()+
theme(legend.position = "none",
plot.caption = element_text(face = "italic"),
plot.title = element_text(face = "bold",
size = 18,
family = "mono"))
Question
How do I fix these NA values? I want my zero values to be included into the "low" group if possible.
Would this work for you? It seems you are using base R, so a nested ifelse statement may be simpler:
stack$coffee_cat <- ifelse(stack$Coffee_Cups %in% 0:3, "Low",
ifelse(stack$Coffee_Cups %in% 4:6, "Medium",
ifelse(stack$Coffee_Cups %in% 7:9, "High", NA)))
Output
# Mins_Work Coffee_Cups coffee_cat
#1 435 3 Low
#2 350 0 Low
#3 145 2 Low
#4 135 6 Medium
#5 15 4 Medium
#6 60 5 Medium
case_when would be a dplyr alternative:
stack %>% mutate(coffee_level = case_when(Coffee_Cups %in% 0:3 ~ "Low",
Coffee_Cups %in% 4:6 ~ "Medium",
Coffee_Cups %in% 7:9 ~ "High"))
To include the zeros in cutting, you could also use the Hmisc::cut2function:
stack$coffee_Hmisc <- factor(Hmisc::cut2(stack$Coffee_Cups, g = 3), labels = coffee_labels)

Ggplot showing random spikes in data

When I try and plot my data from the dataframe to the ggplot, the graph has random spikes along the line and I don't know why. I have tried messing around with the ggplot but nothing works so am wondering if my dataframe is not configured correctly.
The data frame has 4 groups of animals with the size of population recorded over 104 weeks:
head(mydata)
time group size
1 0 rabbits 30
2 1 rabbits 31.38
3 2 rabbits 32.82561384
4 3 rabbits 34.3400006020094
5 4 rabbits 35.926471747402
6 5 rabbits 37.5884983951345
This is what the ggplot creates when run
ggplot(mydata, aes(x = time, y = size, group= group), main="lotka volterra model plot") +
geom_line(aes(col = group)) +
theme(axis.text = element_blank())
mydata<-structure(list(time = structure(c(1L, 2L, 18L, 29L, 40L, 51L,
62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 43L, 44L, 45L,
46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L,
75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 85L, 86L, 87L, 88L,
89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L, 99L, 100L, 101L,
102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 18L, 29L,
40L, 51L, 62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L,
72L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 85L, 86L,
87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 18L,
29L, 40L, 51L, 62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L,
57L, 58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L,
71L, 72L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 85L,
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L, 99L,
100L, 101L, 102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L, 1L, 2L,
18L, 29L, 40L, 51L, 62L, 73L, 84L, 95L, 3L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L,
85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 98L,
99L, 100L, 101L, 102L, 103L, 104L, 105L, 4L, 5L, 6L, 7L, 8L), .Label = c("0",
"1", "10", "100", "101", "102", "103", "104", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "2", "20", "21", "22", "23",
"24", "25", "26", "27", "28", "29", "3", "30", "31", "32", "33",
"34", "35", "36", "37", "38", "39", "4", "40", "41", "42", "43",
"44", "45", "46", "47", "48", "49", "5", "50", "51", "52", "53",
"54", "55", "56", "57", "58", "59", "6", "60", "61", "62", "63",
"64", "65", "66", "67", "68", "69", "7", "70", "71", "72", "73",
"74", "75", "76", "77", "78", "79", "8", "80", "81", "82", "83",
"84", "85", "86", "87", "88", "89", "9", "90", "91", "92", "93",
"94", "95", "96", "97", "98", "99"), class = "factor"), group = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("foxes", "rabbits", "sto foxes", "sto rabbits"
), class = "factor"), size = structure(c(172L, 180L, 188L, 196L,
204L, 216L, 224L, 231L, 237L, 242L, 246L, 253L, 259L, 265L, 271L,
275L, 281L, 285L, 291L, 297L, 303L, 309L, 313L, 320L, 324L, 330L,
337L, 5L, 10L, 17L, 21L, 27L, 33L, 40L, 48L, 54L, 62L, 68L, 77L,
86L, 92L, 98L, 100L, 103L, 106L, 111L, 114L, 144L, 154L, 162L,
170L, 184L, 191L, 199L, 208L, 218L, 226L, 232L, 238L, 243L, 248L,
254L, 260L, 266L, 272L, 276L, 282L, 288L, 292L, 298L, 304L, 310L,
315L, 321L, 325L, 328L, 335L, 2L, 6L, 14L, 19L, 24L, 30L, 34L,
39L, 44L, 49L, 53L, 57L, 64L, 66L, 71L, 74L, 79L, 82L, 83L, 81L,
78L, 72L, 65L, 59L, 50L, 42L, 28L, 15L, 227L, 223L, 221L, 220L,
215L, 211L, 210L, 203L, 202L, 197L, 194L, 192L, 189L, 186L, 183L,
181L, 179L, 174L, 173L, 168L, 167L, 165L, 161L, 159L, 157L, 153L,
151L, 149L, 143L, 141L, 139L, 138L, 134L, 132L, 130L, 129L, 127L,
125L, 123L, 121L, 119L, 117L, 116L, 118L, 120L, 122L, 124L, 126L,
128L, 131L, 133L, 137L, 140L, 142L, 150L, 152L, 158L, 160L, 166L,
169L, 175L, 182L, 187L, 193L, 198L, 205L, 214L, 222L, 228L, 235L,
241L, 247L, 256L, 263L, 270L, 278L, 284L, 294L, 305L, 317L, 323L,
334L, 9L, 22L, 37L, 51L, 69L, 88L, 102L, 110L, 147L, 177L, 207L,
229L, 245L, 264L, 280L, 299L, 318L, 331L, 11L, 26L, 43L, 58L,
75L, 172L, 178L, 185L, 195L, 201L, 213L, 219L, 227L, 234L, 240L,
240L, 240L, 249L, 252L, 255L, 255L, 261L, 269L, 277L, 283L, 287L,
293L, 300L, 302L, 306L, 311L, 312L, 316L, 322L, 329L, 333L, 332L,
1L, 4L, 8L, 12L, 18L, 23L, 29L, 36L, 38L, 47L, 55L, 61L, 70L,
84L, 90L, 97L, 101L, 104L, 105L, 109L, 112L, 135L, 145L, 155L,
163L, 176L, 190L, 200L, 212L, 217L, 225L, 233L, 239L, 244L, 250L,
257L, 262L, 267L, 274L, 279L, 286L, 290L, 295L, 301L, 307L, 314L,
319L, 326L, 327L, 336L, 3L, 7L, 16L, 20L, 25L, 31L, 35L, 41L,
45L, 52L, 56L, 63L, 67L, 73L, 80L, 85L, 87L, 89L, 94L, 93L, 96L,
95L, 91L, 227L, 213L, 209L, 195L, 185L, 178L, 178L, 172L, 172L,
164L, 164L, 164L, 164L, 164L, 164L, 156L, 148L, 136L, 136L, 136L,
136L, 115L, 113L, 113L, 113L, 113L, 107L, 115L, 115L, 115L, 115L,
148L, 136L, 115L, 113L, 115L, 113L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 113L, 113L, 148L, 148L, 148L,
148L, 115L, 115L, 113L, 115L, 136L, 115L, 113L, 115L, 115L, 136L,
148L, 148L, 136L, 115L, 115L, 156L, 156L, 172L, 172L, 156L, 172L,
185L, 201L, 201L, 201L, 201L, 209L, 219L, 236L, 240L, 258L, 273L,
287L, 296L, 312L, 333L, 13L, 32L, 46L, 60L, 76L, 99L, 108L, 146L,
171L, 206L, 230L, 251L, 268L, 289L, 308L), .Label = c("100",
"1012.93841736696", "1015", "102", "102.906562962176", "1056.50381985743",
"1064", "107", "107.681565975074", "107.772405683259", "1077.18555087758",
"109", "110", "1101.34248052638", "1106.71937713192", "1113",
"112.871167258684", "113", "1147.37104021294", "1159", "118.213953337458",
"118.907362834956", "119", "1194.47454549496", "1202", "1222.35700531209",
"123.812386226114", "1234.46815782699", "124", "1242.49940630328",
"1246", "125", "129.678627213148", "1291.24494843906", "1299",
"130", "131.883068737542", "133", "1340.45334270173", "135.825399632334",
"1352", "1353.01585096407", "1376.1737200094", "1389.79769980332",
"1399", "141", "142", "142.266012655376", "1438.86816034509",
"1458.3616245406", "146.923717396291", "1466", "1487.15590421503",
"149.014385797496", "150", "1506", "1534.03518691436", "1534.8487312945",
"1547.69218094539", "155", "156", "156.085074111268", "1569",
"1578.74382847183", "1619.48179996562", "1620.36309674333", "1628",
"163.493294034224", "164.404667496757", "165", "1657.79871894128",
"1673.39048203113", "1688", "1689.76590687066", "1693.62394525503",
"170", "171.254949844073", "1710.03514245751", "1714.7828566833",
"1720", "1730.71178397753", "1731.17917390182", "1737.12790025705",
"176", "1780", "179.386660661569", "1825", "184.772238294142",
"1846", "185", "1859", "187.905787924644", "1882", "1885", "1889",
"1905", "193", "196.830463238049", "199", "206.179616479748",
"208", "208.555306039675", "215.973004018167", "217", "221",
"226.231236862199", "23", "231", "232", "236.377317707137", "236.975808527841",
"239", "24", "248.229122360228", "25", "25.006996053829", "25.014924626149",
"25.0224494183214", "25.045214840124", "25.0624177868317", "25.0969480307356",
"25.1281596085623", "25.1692973020694", "25.2210730103966", "25.261518785926",
"25.3427120320241", "25.3729438809371", "25.4948050967564", "25.502972351872",
"25.6510661849217", "25.6792760646671", "25.8167441085175", "25.8982682744573",
"25.9995767010671", "253", "26", "26.1541720515152", "26.1991820171385",
"26.4152216723683", "26.4496562442932", "26.6473973349067", "26.7877044521904",
"26.8954475777233", "260.014517996404", "265", "266", "268.967694505081",
"27", "27.1591450517314", "27.1716567290231", "27.4382939445755",
"27.6052576910814", "27.7327276941622", "272.356296590851", "278",
"28", "28.0423069297069", "28.0927121328395", "28.3669176162769",
"28.6387494628908", "28.706469381617", "285.279744350548", "288",
"29", "29.060894006489", "29.2486985253466", "29.4301440619011",
"29.814191678482", "29.9285746772517", "298.811153836992", "299",
"30", "30.2130274348995", "30.6266593536767", "30.6851813622098",
"305", "307.170873254196", "31", "31.0551119940305", "31.38",
"31.4984256324846", "31.5262288687559", "31.956655522999", "312.977842386294",
"32", "32.4298712292389", "32.4604735067685", "32.82561384",
"32.9181560223779", "326", "327.808166871801", "33.4216063385245",
"33.4978810982094", "33.9403312904656", "34", "34.3400006020094",
"34.4744522289701", "34.6498194865811", "343.331533882608", "346",
"35", "35.0241023493733", "35.589426339596", "35.926471747402",
"35.929285755777", "351", "351.950203806801", "359.578404210861",
"36", "36.170580066137", "36.767730294917", "364", "37", "37.3511750540819",
"37.3810544441627", "37.5884983951345", "373", "376.580290324766",
"38", "38.0107403667906", "38.65698616", "38.932599392745", "39.32",
"39.3297189377729", "393", "394.369745245424", "40", "40.6932665915451",
"404.382545261556", "408", "41.1539470098798", "412.980340934988",
"414", "42", "42.6559317498839", "43", "43.0651798238046", "432.446633930023",
"434", "44", "44.8469363200958", "45.0676068884105", "452.804115504112",
"456", "465.637719971972", "47.1656191267561", "47.2968531559251",
"474.089143100815", "48", "482", "486", "49", "49.3638184092258",
"496.338849122065", "50", "50.0412599281857", "503", "51", "51.6670275190631",
"519.591022362781", "52", "529", "53.1216681850227", "536.935197912393",
"54.0803005677077", "543.883956419344", "555", "556", "56", "56.5866412461102",
"56.6089338777482", "569.256258229975", "58", "580", "59.2584773516901",
"595.746608482546", "60", "60.4931412285751", "606", "619.469005246511",
"62.0347463450789", "623.393463893253", "63", "64.9081539728981",
"64.9438340628061", "640", "65", "652.234689248171", "662", "664",
"67.9921244976501", "682.307104523925", "69", "69.9106506029712",
"700", "71", "71.1863059302521", "713.645929259951", "714.291855860478",
"72", "735", "74", "74.533385009776", "746.284102517739", "75.5939559687986",
"76", "770", "774", "78.0407014344389", "780.251452081395", "79",
"81", "81.715943250903", "811", "815.573680850085", "82", "82.0686071733396",
"822.15243629253", "849", "85.5671627911559", "852.271131437189",
"88", "89.4657993696359", "89.6027932649582", "890.35728158426",
"891", "927", "929.836912856086", "93", "93.8316660251566", "943.287593733742",
"95", "96", "97.9415301108653", "970.703882929996", "979", "98.2630285221179"
), class = "factor")), class = "data.frame", row.names = c(NA,
-420L))

order geom_point by specific facet

I have a ggplot related question, which should be easy but I could not find the answer yet. I am trying to plot a faceted plot with the code below and this dataset (11 kB).
ggplot(plot.dat, aes(x = estimate, y = reorder(countryyear, estimate))) +
geom_point() +
geom_segment(aes(x=conf.low, xend=conf.high, yend=countryyear)) +
facet_grid(. ~ facet) +
xlab("Random Effect Estimate") +
ylab("") + scale_x_continuous(breaks=c(seq(0, 5, 1)), limits=c(0, 5)) +
ggtitle("Random Slopes in Country*Year Groups from Northwestern Europe") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
I would like countryyear to be organized by the values of estimate in the Extreme Right facet. Not quite sure how to order by values of a specific facet. Any ideas are welcome! Thanks.
Update: Here is the dput structure of a random subset of the dataset. It has some missing values, but it should work for the sake of the example. I also updated the download link above, that has the full version.
structure(list(estimate = c(1.41056902925372, 0.854859208455895,
1.16012834593894, 0.871339033194504, 0.803272289946221, 1.17540386134493,
0.996313357490551, 1.49940694539732, 1.33773365908762, 2.7318703090905,
1.19131935418045, 1.12765907711738, 0.746741192261761, 0.985847015192172,
0.912357310925342, 1.11582763712164, 1.21854572824977, 0.675712547978394,
0.566955524699616, 1.32611743759365, 0.519648352294682, 0.591013596394243,
1.30944973684044, 0.613722269599125, 1.13293279727271, 0.950788678552604,
1.1599446923567, 1.11493952112913, 0.95336321045095, 1.39002327097034,
0.794207546872633, 0.788545101449259, 1.01096883872495, 0.897407203907834,
1.38391605229103, 1.35754760293107, 1.0718508539761, 0.542191158958878,
0.757132752456427, 1.44172863221312, 1.04842251986171, 0.77260404885379,
0.879288027642055, 1.09372353598088, 0.745484830381145, 1.21211217249353,
0.628009608902132, 1.34864488674734), countryyear = structure(c(1L,
2L, 4L, 5L, 7L, 9L, 10L, 12L, 13L, 26L, 28L, 29L, 31L, 32L, 34L,
36L, 37L, 39L, 40L, 57L, 59L, 60L, 62L, 63L, 65L, 67L, 68L, 70L,
71L, 73L, 75L, 76L, 89L, 90L, 92L, 94L, 95L, 103L, 104L, 106L,
108L, 109L, 111L, 128L, 130L, 132L, 133L, 135L), .Label = c("AT02",
"AT04", "AT06", "AT14", "AT16", "BE02", "BE04", "BE06", "BE08",
"BE10", "BE12", "BE14", "BE16", "BG06", "BG08", "BG10", "BG12",
"CH14", "CZ02", "CZ04", "CZ08", "CZ10", "CZ12", "CZ14", "CZ16",
"DE02", "DE04", "DE06", "DE08", "DE10", "DE12", "DE14", "DE16",
"DK02", "DK04", "DK06", "DK08", "DK10", "DK12", "DK14", "EE04",
"EE06", "EE08", "EE10", "EE12", "EE14", "EE16", "ES02", "ES04",
"ES06", "ES08", "ES10", "ES12", "ES14", "ES16", "FI02", "FI04",
"FI06", "FI08", "FI10", "FI12", "FI14", "FI16", "FR06", "FR08",
"FR10", "FR12", "FR14", "FR16", "GB02", "GB04", "GB06", "GB08",
"GB10", "GB12", "GB14", "GB16", "GR02", "GR04", "GR08", "GR10",
"HU02", "HU06", "HU08", "HU10", "HU12", "HU14", "HU16", "IE02",
"IE04", "IE06", "IE08", "IE10", "IE12", "IE14", "IE16", "IT04",
"IT12", "IT16", "LT10", "LT12", "LT14", "NL02", "NL04", "NL06",
"NL08", "NL10", "NL12", "NL14", "NL16", "NO14", "PL02", "PL04",
"PL06", "PL08", "PL10", "PL12", "PL14", "PL16", "PT02", "PT04",
"PT06", "PT08", "PT10", "PT12", "PT14", "PT16", "SE02", "SE04",
"SE06", "SE08", "SE10", "SE12", "SE14", "SE16", "SI02", "SI04",
"SI06", "SI08", "SI10", "SI12", "SI14", "SI16", "SK04", "SK06",
"SK08", "SK10", "SK12"), class = "factor"), facet = structure(c(1L,
3L, 1L, 4L, 5L, 3L, 4L, 1L, 1L, 1L, 5L, 5L, 4L, 5L, 3L, 1L, 2L,
4L, 5L, 2L, 1L, 4L, 2L, 5L, 2L, 3L, 4L, 3L, 2L, 5L, 5L, 4L, 2L,
5L, 4L, 5L, 3L, 1L, 4L, 5L, 3L, 5L, 4L, 1L, 5L, 2L, 4L, 1L), .Label = c("Intercept",
"Extreme Left", "Center", "Right", "Extreme Right"), class = "factor"),
conf.low = c(1.16824810706745, 0.686215051613965, 0.910277310292764,
0.591705078386698, 0.37357342399703, 0.947951001435781, 0.663296044193037,
1.18794112232166, 1.06645119085865, 2.33578182814618, 0.580210898576738,
0.564235690522211, 0.530859530342114, 0.516191258265551,
0.730992343373883, 0.862424540370486, 0.827891784352444,
0.427638276259852, 0.275692447335368, 0.829763907986328,
0.370078643492081, 0.321852705445509, 0.83550621863293, 0.289836810427436,
0.847226120408727, 0.780056160572728, 0.873143885861924,
0.869757467125519, 0.615741777890997, 0.649483531741787,
0.349657606457465, 0.523294407847395, 0.670109418373736,
0.36656743494149, 0.952201390937053, 0.777207016700884, 0.888128473009524,
0.397085597526946, 0.479828726362257, 0.614533313431094,
0.813336887981082, 0.3129232351085, 0.61435321820328, 0.854801028643867,
0.346698059397102, 0.805414039007076, 0.434676644041643,
1.07780736338027), conf.high = c(1.70315275860739, 1.06494933995261,
1.47855797769819, 1.28312522319126, 1.7272277157504, 1.45743211956315,
1.49652679976667, 1.8925358720741, 1.67802460909168, 3.19512520208851,
2.44607918797515, 2.25369471581694, 1.05041423643869, 1.8828182806291,
1.13872035780431, 1.44368725318228, 1.79353596677755, 1.06769546329854,
1.16593171156554, 2.11938292490653, 0.729667639003753, 1.08526995489865,
2.05223919950836, 1.29954170985538, 1.51498719434776, 1.15888977865399,
1.54095070825389, 1.4292376699955, 1.47610807594453, 2.97492484321718,
1.80395225460704, 1.18824770090216, 1.52521060717706, 2.19697554354282,
2.01136404338166, 2.37122858469145, 1.29357889999432, 0.740322123703373,
1.19469713534712, 3.38237391450413, 1.35145693795059, 1.90755095606211,
1.25847381058047, 1.39942645489832, 1.60297301142912, 1.82417470710871,
0.907332092210651, 1.68753999308876)), row.names = c(1L,
9L, 17L, 25L, 33L, 41L, 49L, 57L, 65L, 128L, 136L, 144L, 152L,
160L, 168L, 176L, 184L, 192L, 200L, 283L, 291L, 299L, 307L, 315L,
323L, 331L, 339L, 347L, 355L, 363L, 371L, 379L, 442L, 450L, 458L,
466L, 474L, 512L, 520L, 528L, 536L, 544L, 552L, 640L, 648L, 656L,
664L, 672L), class = "data.frame")

Straight-forward AND open-source alternatives to asreml-r for spatial models?

In the past, I have used asreml-r to account for spatial auto-correlation in agricultural field trials that were laid out in a “row and range” design. It is relatively easy to use the asreml package to specify a spatial model (i.e. rcov=~at(LOCATION):ar1(ROW):ar1(RANGE))
Unfortunately, asreml-r is expensive and difficult to learn. My research group also prefers to rely on nlme and lmer for the majority of it’s analytical needs. So they are reluctant to either pay for asreml-r or consider using.
Several years ago a question was posted asking if an open-source alternative to asreml-r was available that could be used to construct a two-dimensional spatial model with error structure in both direction. The consensus at the time seemed to be that it wasn’t straight forward to do this in either lmer or nlme.
After spending a few hours searching, it’s not totally clear to me whether there has been any progress on addressing this. Can anyone refer me to a recent discussion regarding this type of analysis? Or can they offer advice on how to construct a mixed effects models that accounts for spatial correlation in nlme or lmer?
Please note that neither myself nor other members of our group are exactly statisticians or high-level r coders. It is also not practical to contract an outside group to analyze our data. We just want to apply the best methods we can to routine annual analyses of data.
An example of the data being analyzed:
my.data <- structure(list(ENTRY = structure(c(23L, 23L, 23L, 40L, 12L, 8L,
1L, 15L, 30L, 1L, 24L, 8L, 1L, 8L, 30L, 33L, 12L, 38L, 41L, 36L,
43L, 32L, 44L, 31L, 26L, 11L, 13L, 34L, 5L, 22L, 4L, 14L, 11L,
20L, 25L, 11L, 21L, 43L, 44L, 4L, 42L, 45L, 42L, 41L, 42L, 4L,
44L, 20L, 40L, 29L, 29L, 24L, 2L, 3L, 28L, 24L, 34L, 27L, 41L,
28L, 29L, 5L, 3L, 25L, 14L, 20L, 15L, 21L, 31L, 22L, 40L, 21L,
6L, 38L, 43L, 12L, 6L, 14L, 5L, 3L, 30L, 45L, 31L, 7L, 9L, 39L,
22L, 15L, 26L, 28L, 34L, 10L, 25L, 27L, 16L, 45L, 10L, 18L, 32L,
10L, 6L, 18L, 33L, 16L, 37L, 9L, 32L, 38L, 39L, 2L, 2L, 39L,
36L, 36L, 7L, 27L, 7L, 26L, 17L, 9L, 33L, 13L, 17L, 17L, 35L,
37L, 37L, 18L, 16L, 19L, 13L, 19L, 35L, 19L, 35L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L,
44L, 45L, 52L, 54L, 52L, 54L, 49L, 51L, 50L, 54L, 49L, 46L, 51L,
50L, 53L, 49L, 50L, 51L, 53L, 52L, 53L, 48L, 47L, 46L, 46L, 47L,
48L, 48L, 47L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L), .Label = c("20",
"112", "1478", "1495", "1521", "1522", "1590", "1608", "1657",
"1658", "1660", "1667", "1680", "1688", "1723", "1728", "1730",
"1731", "1743", "1745", "1748", "1751", "1766", "1778", "1802",
"1815", "1817", "1819", "1828", "1830", "1831", "1834", "1835",
"1836", "1837", "1838", "1839", "1840", "1841", "1842", "1843",
"1844", "1845", "1846", "1847", "3097", "3164", "3168", "3169",
"3170", "3178", "3180", "3181", "3182"), class = "factor"), BLOCK = structure(c(12L,
77L, 163L, 67L, 28L, 170L, 90L, 36L, 52L, 2L, 15L, 19L, 168L,
103L, 188L, 31L, 203L, 66L, 29L, 46L, 34L, 32L, 27L, 16L, 83L,
48L, 82L, 30L, 171L, 14L, 115L, 54L, 93L, 65L, 50L, 187L, 58L,
91L, 200L, 6L, 169L, 135L, 99L, 148L, 101L, 104L, 107L, 128L,
153L, 146L, 41L, 22L, 53L, 87L, 131L, 151L, 110L, 10L, 44L, 11L,
13L, 20L, 42L, 202L, 111L, 38L, 183L, 51L, 199L, 109L, 75L, 134L,
92L, 166L, 182L, 97L, 100L, 1L, 86L, 181L, 25L, 108L, 94L, 116L,
72L, 18L, 23L, 76L, 185L, 81L, 62L, 63L, 56L, 204L, 85L, 95L,
129L, 49L, 147L, 106L, 145L, 205L, 73L, 207L, 105L, 24L, 43L,
8L, 167L, 164L, 3L, 96L, 184L, 45L, 74L, 39L, 89L, 4L, 152L,
130L, 165L, 40L, 57L, 70L, 206L, 186L, 7L, 37L, 9L, 102L, 132L,
127L, 88L, 80L, 98L, 139L, 196L, 174L, 118L, 215L, 194L, 193L,
208L, 172L, 122L, 143L, 141L, 123L, 161L, 209L, 213L, 178L, 159L,
160L, 191L, 177L, 192L, 144L, 175L, 211L, 140L, 180L, 173L, 125L,
119L, 120L, 210L, 214L, 136L, 154L, 162L, 190L, 158L, 216L, 142L,
124L, 212L, 195L, 155L, 121L, 64L, 68L, 117L, 59L, 71L, 35L,
69L, 201L, 21L, 84L, 61L, 114L, 17L, 112L, 55L, 150L, 113L, 79L,
78L, 47L, 33L, 149L, 60L, 189L, 5L, 133L, 26L, 137L, 197L, 179L,
126L, 198L, 157L, 176L, 138L, 156L), .Label = c("101", "102",
"103", "104", "105", "106", "107", "108", "109", "110", "111",
"112", "113", "114", "115", "116", "117", "118", "201", "202",
"203", "204", "205", "206", "207", "208", "209", "210", "211",
"212", "213", "214", "215", "216", "217", "218", "301", "302",
"303", "304", "305", "306", "307", "308", "309", "310", "311",
"312", "313", "314", "315", "316", "317", "318", "401", "402",
"403", "404", "405", "406", "407", "408", "409", "410", "411",
"412", "413", "414", "415", "416", "417", "418", "501", "502",
"503", "504", "505", "506", "507", "508", "509", "510", "511",
"512", "513", "514", "515", "516", "517", "518", "601", "602",
"603", "604", "605", "606", "607", "608", "609", "610", "611",
"612", "613", "614", "615", "616", "617", "618", "701", "702",
"703", "704", "705", "706", "707", "708", "709", "710", "711",
"712", "713", "714", "715", "716", "717", "718", "801", "802",
"803", "804", "805", "806", "807", "808", "809", "810", "811",
"812", "813", "814", "815", "816", "817", "818", "901", "902",
"903", "904", "905", "906", "907", "908", "909", "910", "911",
"912", "913", "914", "915", "916", "917", "918", "1001", "1002",
"1003", "1004", "1005", "1006", "1007", "1008", "1009", "1010",
"1011", "1012", "1013", "1014", "1015", "1016", "1017", "1018",
"1101", "1102", "1103", "1104", "1105", "1106", "1107", "1108",
"1109", "1110", "1111", "1112", "1113", "1114", "1115", "1116",
"1117", "1118", "1201", "1202", "1203", "1204", "1205", "1206",
"1207", "1208", "1209", "1210", "1211", "1212", "1213", "1214",
"1215", "1216", "1217", "1218"), class = "factor"), PLOT = structure(c(3L,
1L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 2L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 1L,
3L, 3L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 2L,
2L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 1L, 3L, 3L, 1L, 1L, 2L, 2L,
1L, 2L, 3L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 1L, 3L, 2L, 1L,
3L, 1L, 2L, 3L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L,
3L, 2L, 3L, 2L, 2L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 1L, 1L, 1L,
3L, 2L, 2L, 3L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 1L, 3L, 1L, 2L, 3L,
2L, 1L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
RANGE = structure(c(1L, 5L, 10L, 4L, 2L, 10L, 5L, 2L, 3L,
1L, 1L, 2L, 10L, 6L, 11L, 2L, 12L, 4L, 2L, 3L, 2L, 2L, 2L,
1L, 5L, 3L, 5L, 2L, 10L, 1L, 7L, 3L, 6L, 4L, 3L, 11L, 4L,
6L, 12L, 1L, 10L, 8L, 6L, 9L, 6L, 6L, 6L, 8L, 9L, 9L, 3L,
2L, 3L, 5L, 8L, 9L, 7L, 1L, 3L, 1L, 1L, 2L, 3L, 12L, 7L,
3L, 11L, 3L, 12L, 7L, 5L, 8L, 6L, 10L, 11L, 6L, 6L, 1L, 5L,
11L, 2L, 6L, 6L, 7L, 4L, 1L, 2L, 5L, 11L, 5L, 4L, 4L, 4L,
12L, 5L, 6L, 8L, 3L, 9L, 6L, 9L, 12L, 5L, 12L, 6L, 2L, 3L,
1L, 10L, 10L, 1L, 6L, 11L, 3L, 5L, 3L, 5L, 1L, 9L, 8L, 10L,
3L, 4L, 4L, 12L, 11L, 1L, 3L, 1L, 6L, 8L, 8L, 5L, 5L, 6L,
8L, 11L, 10L, 7L, 12L, 11L, 11L, 12L, 10L, 7L, 8L, 8L, 7L,
9L, 12L, 12L, 10L, 9L, 9L, 11L, 10L, 11L, 8L, 10L, 12L, 8L,
10L, 10L, 7L, 7L, 7L, 12L, 12L, 8L, 9L, 9L, 11L, 9L, 12L,
8L, 7L, 12L, 11L, 9L, 7L, 4L, 4L, 7L, 4L, 4L, 2L, 4L, 12L,
2L, 5L, 4L, 7L, 1L, 7L, 4L, 9L, 7L, 5L, 5L, 3L, 2L, 9L, 4L,
11L, 1L, 8L, 2L, 8L, 11L, 10L, 7L, 11L, 9L, 10L, 8L, 9L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"),
ROW = structure(c(12L, 5L, 1L, 13L, 10L, 8L, 18L, 18L, 16L,
2L, 15L, 1L, 6L, 13L, 8L, 13L, 5L, 12L, 11L, 10L, 16L, 14L,
9L, 16L, 11L, 12L, 10L, 12L, 9L, 14L, 7L, 18L, 3L, 11L, 14L,
7L, 4L, 1L, 2L, 6L, 7L, 9L, 9L, 4L, 11L, 14L, 17L, 2L, 9L,
2L, 5L, 4L, 17L, 15L, 5L, 7L, 2L, 10L, 8L, 11L, 13L, 2L,
6L, 4L, 3L, 2L, 3L, 15L, 1L, 1L, 3L, 8L, 2L, 4L, 2L, 7L,
10L, 1L, 14L, 1L, 7L, 18L, 4L, 8L, 18L, 18L, 5L, 4L, 5L,
9L, 8L, 9L, 2L, 6L, 13L, 5L, 3L, 13L, 3L, 16L, 1L, 7L, 1L,
9L, 15L, 6L, 7L, 8L, 5L, 2L, 3L, 6L, 4L, 9L, 2L, 3L, 17L,
4L, 8L, 4L, 3L, 4L, 3L, 16L, 8L, 6L, 7L, 1L, 9L, 12L, 6L,
1L, 16L, 8L, 8L, 13L, 16L, 12L, 10L, 17L, 14L, 13L, 10L,
10L, 14L, 17L, 15L, 15L, 17L, 11L, 15L, 16L, 15L, 16L, 11L,
15L, 12L, 18L, 13L, 13L, 14L, 18L, 11L, 17L, 11L, 12L, 12L,
16L, 10L, 10L, 18L, 10L, 14L, 18L, 16L, 16L, 14L, 15L, 11L,
13L, 10L, 14L, 9L, 5L, 17L, 17L, 15L, 3L, 3L, 12L, 7L, 6L,
17L, 4L, 1L, 6L, 5L, 7L, 6L, 11L, 15L, 5L, 6L, 9L, 5L, 7L,
8L, 11L, 17L, 17L, 18L, 18L, 13L, 14L, 12L, 12L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18"), class = "factor"), YIELD = c(7882.814724,
7641.976671, 7535.187491, 8462.821158, 6470.762695, 7086.39647,
7260.626003, 8374.363239, 8225.545799, 6870.562479, 7260.303179,
6472.786879, 6535.801894, 7335.468082, 8101.853381, 7544.810974,
5597.940891, 8144.903193, 8489.541356, 7420.247609, 8267.229308,
7388.809243, 8753.922873, 7675.2452, 7540.083649, 7459.719121,
7614.590404, 6910.577593, 7655.161236, 8086.00529, 6754.554032,
9141.060314, 7728.70075, 7210.881432, 8872.660416, 7341.942246,
8211.265337, 9030.218757, 8957.01212, 7134.079145, 8580.60533,
8901.807114, 9009.635596, 8972.04225, 8850.07798, 7244.08863,
9357.355395, 7693.962907, 9059.604638, 8115.135788, 8073.220877,
7694.865425, 7168.389384, 7931.776306, 8310.054831, 7743.358631,
7241.417998, 7887.710882, 8671.335868, 7900.074562, 7089.929401,
8252.964285, 8038.601576, 8749.99335, 7880.418003, 7227.593551,
9733.562528, 7715.095262, 6926.775409, 7770.203085, 9000.211927,
7808.710708, 8239.82626, 8252.964285, 9546.314331, 2801.654022,
7865.302917, 6472.037973, 11286.93314, 7698.702989, 8239.164252,
8391.871173, 7817.085477, 7987.7324, 8517.420004, 8286.027753,
8021.268999, 8605.836444, 8360.390812, 8408.648702, 6980.52271,
8484.391646, 7604.489488, 8047.32564, 6859.736888, 8211.744547,
8338.224508, 7549.875965, 7831.170315, 8002.372075, 8092.398475,
7233.303386, 7880.198456, 6431.676768, 8146.454012, 9012.217125,
7696.760712, 7916.314754, 8372.430545, 4552.305881, 4744.119616,
8072.706265, 8038.601576, 8070.612573, 7631.800415, 8124.412039,
7958.686488, 8565.578204, 7204.2532, 7782.851494, 8195.743097,
8075.444598, 7468.681342, 7376.4572, 7019.132415, 7450.186973,
7900.853201, 7077.396698, 6781.366002, 8195.304822, 7581.211378,
8155.600681, 7446.611537, 7887.710882, 6849.690117, 6384.206298,
6965.647058, 7732.576444, 7687.296996, 7887.710882, 8061.034883,
7861.831189, 6690.298381, 7982.777954, 8310.054831, 7476.530867,
5840.137517, 8012.816166, 9211.484507, 8906.076566, 7227.155276,
6795.608201, 6926.023806, 8026.998142, 7388.809243, 7700.812705,
7493.134187, 7397.470718, 6794.411986, 8475.249868, 8387.892097,
8503.435859, 7890.106874, 7631.800415, 8349.757061, 7852.912013,
7758.848165, 7580.919692, 6402.21648, 6920.804051, 8628.194894,
7489.137138, 7866.037678, 7311.596266, 8746.497033, 9147.374207,
9022.033508, 8475.348448, 8911.007949, 8961.95446, 8476.003123,
8932.837953, 8661.336305, 8949.625535, 9048.100379, 10684.87284,
8845.185424, 8182.999872, 8986.675848, 8136.137692, 10504.2443,
8848.254372, 7233.813327, 8707.732966, 8381.547529, 10471.33626,
7682.888263, 8071.666541, 7428.171461, 9736.360333, 9378.789551,
8294.552055, 8225.545799, 8874.930993, 8459.226077, 8749.99335,
9192.455984, 7875.820212, 8982.410256, 8642.199262, 8935.14394,
8480.821358, 10240.80452, 8746.68483, 7619.897735, 8417.475201
)), .Names = c("ENTRY", "BLOCK", "PLOT", "RANGE", "ROW",
"YIELD"), row.names = 372:587, class = "data.frame")
The spatial arrangement of the data:
library(reshape2)
dcast(my.data, RANGE ~ ROW, value.var ="YIELD")
Possible examples of models to analyze the data:
library(nlme)
fit1 = lme(fixed = YIELD ~ ENTRY, data = my.data,
random= ~1 | BLOCK,
method = "ML")
fit2 = lme(fixed = YIELD ~ ENTRY, data = my.data,
random= ~1 | BLOCK,
corr = corSpatial(form = ~RANGE+ROW),
method = "ML")

How can I add missing sequence values?

I have a data frame like this:
structure(list(x = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L,
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 112L,
113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 123L, 124L,
125L, 127L, 128L, 129L, 130L, 132L, 133L, 134L, 135L, 136L, 137L,
138L, 139L, 140L, 141L, 142L, 143L, 145L, 146L, 147L, 148L, 149L,
150L, 151L, 152L, 153L, 154L, 155L, 158L, 160L, 163L, 164L, 166L,
167L, 169L, 170L, 173L, 174L, 178L, 179L, 181L, 182L, 183L, 186L,
187L, 191L, 192L, 193L, 194L, 197L, 198L, 200L, 205L, 208L, 209L,
213L, 214L, 216L, 217L, 220L, 222L, 223L, 225L, 229L, 233L, 235L,
237L, 242L, 243L, 244L, 251L, 253L, 254L, 255L, 261L, 262L, 263L,
264L, 267L, 268L, 269L, 270L, 276L, 281L, 282L, 284L, 285L, 287L,
289L, 293L, 295L, 297L, 299L, 301L, 306L, 308L, 315L, 317L, 318L,
320L, 327L, 330L, 336L, 337L, 345L, 346L, 355L, 359L, 376L, 377L,
379L, 384L, 387L, 388L, 402L, 405L, 408L, 415L, 420L, 421L, 427L,
428L, 429L, 430L, 437L, 438L, 439L, 440L, 446L, 448L, 453L, 456L,
469L, 472L, 476L, 478L, 481L, 483L, 486L, 487L, 488L, 497L, 500L,
502L, 504L, 507L, 512L, 525L, 530L, 531L, 543L, 546L, 550L, 578L,
581L, 598L, 601L, 680L, 689L, 693L, 712L, 728L, 746L, 768L, 790L,
794L, 840L, 851L, 861L, 928L, 969L, 1010L, 1180L, 1698L), freq = c(29186L,
12276L, 5851L, 3938L, 3133L, 1894L, 1157L, 820L, 597L, 481L,
398L, 297L, 269L, 251L, 175L, 176L, 153L, 130L, 117L, 108L, 93L,
83L, 58L, 84L, 60L, 43L, 59L, 51L, 57L, 53L, 38L, 38L, 32L, 35L,
28L, 27L, 29L, 22L, 24L, 29L, 30L, 23L, 26L, 19L, 19L, 25L, 14L,
22L, 16L, 12L, 15L, 14L, 11L, 13L, 18L, 10L, 17L, 20L, 7L, 9L,
2L, 8L, 12L, 8L, 7L, 10L, 10L, 9L, 6L, 6L, 9L, 5L, 11L, 4L, 5L,
5L, 10L, 4L, 6L, 1L, 4L, 7L, 3L, 4L, 3L, 2L, 3L, 5L, 7L, 2L,
2L, 3L, 2L, 4L, 7L, 1L, 3L, 5L, 5L, 3L, 5L, 2L, 2L, 2L, 3L, 2L,
5L, 7L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 3L, 2L, 2L, 1L,
3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 4L, 3L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 4L, 4L, 1L, 2L,
2L, 4L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L,
3L, 2L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("x",
"freq"), row.names = c(NA, -296L), class = "data.frame")
After the x value of 130, there are missing values. Is there a way I make this a continuous data frame in increments of 1 i.e. from 1 to 1698, populate the entire list and set the elements that do not have a value here as 0? What I mean is:
1,2
4,5
5,7
should be converted to:
1,2
2,0
3,0
4,5
5,7
Any suggestions?
You can also use merge (assuming your data is strored in l):
l <- merge(l,data.frame(x = 1:1698),all = TRUE,by = "x")
l$freq[is.na(l$freq)] <- 0
I'd create a data set of values that aren't covered by column x and then create a dataframe of those values and assign 0 to the freq of all of these x values. Then rbind and order by x.
#I called your data dat
y <- 1:max(dat$x)
dat2 <- data.frame(x=y[!y%in%dat$x], freq=0)
dat3 <- rbind(dat, dat2)
dat4 <- dat3[order(dat3$x), ] #could stop here
rownames(dat4) <- NULL #but I hate non sequential row names
dat4

Resources