ggplot + boxplot with time series value by group - r
A variation of this question -- I can't quite get the dimensions right in the data structure to make a boxplot with the right values.
what I'm looking to do: hours would be on the x-axis, region would be on the y-axis, and for every region there will be a boxplot showing the distribution of income by hour.
The closest I can get is the following, but it's not right. How do I create the boxplot with two factors (one a time series) as axes, populated by the value distribution?
data:
regions <- structure(list(location = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("east",
"north", "west"), class = "factor"), hour = structure(list(sec = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), mday = c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L), mon = c(7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), year = c(115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L,
115L, 115L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L), isdst = c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT",
"CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT"),
gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), hour_income = c(67L,
98L, 89L, 75L, 75L, 89L, 70L, 97L, 52L, 94L, 80L, 84L, 52L, 82L,
81L, 93L, 85L, 94L, 64L, 90L, 54L, 60L, 97L, 100L, 57L, 63L,
90L, 58L, 86L, 68L, 52L, 78L, 61L)), .Names = c("location", "hour",
"hour_income"), row.names = c(NA, -33L), class = "data.frame")
And the boxplot
ggplot(regions) +
geom_boxplot(aes(x=hour, y=hour_income, group=location))
First we convert datetimes to character format, then create the boxplots.
regions$hour <- strftime(regions$hour, format="%H:%M:%S")
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot()
But because you only have an observation for each region and hour when you try to create a boxplot to visualise the regions you can only obtain lines instead of boxplots, which are not very meaningful:
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot(aes(fill= location))
Related
Why is geom_line() not connecting through geom_point()?
Question: why is geom_line() not connecting through geom_point()? I have: Written with ggplot(a, aes(x = month, color = year, fill = year)) + scale_color_manual(values = colsze) + scale_fill_manual(values = alpha(colsze, .2)) + scale_x_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "Septemer", "October", "November", "December")) + geom_point(aes(y = n), size = 4, shape=19) + geom_line(aes(y = n)) + scale_y_continuous(breaks = seq(0, 120, 10), limits = c(0, 120)) + facet_wrap(.~year) I cannot figure out why this does not work? E.g. following tutorials like this geom_line() seems to appear in the legend but not in plot. a <- structure(list(month = structure(c(4L, 1L, 4L, 7L, 1L, 9L, 2L, 8L, 8L, 10L, 7L, 10L, 9L, 9L, 9L, 2L, 10L, 7L, 4L, 2L, 2L, 3L, 11L, 11L, 12L, 9L, 12L, 10L, 10L, 10L, 11L, 5L, 10L, 10L, 10L, 10L, 10L, 12L, 11L, 7L, 12L, 6L, 9L, 9L, 9L, 7L, 9L, 4L, 12L, 12L, 11L, 3L, 3L, 11L, 11L, 11L, 7L, 11L, 12L, 12L, 12L, 2L, 4L, 1L, 11L, 11L, 1L, 4L, 8L, 2L, 10L, 5L, 5L, 6L, 7L, 11L, 11L, 11L, 11L, 11L, 12L, 11L, 10L, 7L, 12L, 9L, 9L, 7L, 10L, 8L, 8L, 5L, 9L, 10L, 9L, 3L, 8L, 10L, 10L, 8L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"), year = structure(c(3L, 3L, 2L, 1L, 4L, 4L, 4L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 3L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 4L, 1L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 2L, 1L, 4L, 2L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 4L, 2L, 3L, 3L, 3L, 4L, 4L, 1L, 1L, 3L, 4L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L ), .Label = c("2017", "2018", "2019", "2020"), class = "factor"), n = c(92L, 95L, 83L, 95L, 70L, 88L, 94L, 103L, 103L, 98L, 95L, 98L, 90L, 89L, 89L, 76L, 98L, 97L, 79L, 103L, 103L, 111L, 104L, 104L, 73L, 89L, 73L, 107L, 107L, 107L, 88L, 111L, 107L, 107L, 107L, 107L, 107L, 73L, 104L, 78L, 87L, 92L, 90L, 90L, 90L, 78L, 89L, 92L, 98L, 98L, 85L, 111L, 111L, 85L, 85L, 85L, 97L, 104L, 73L, 73L, 73L, 71L, 92L, 99L, 85L, 104L, 99L, 83L, 103L, 94L, 90L, 90L, 90L, 92L, 97L, 85L, 85L, 88L, 88L, 85L, 73L, 89L, 107L, 97L, 87L, 89L, 89L, 95L, 96L, 103L, 103L, 75L, 90L, 90L, 90L, 88L, 87L, 98L, 98L, 103L)), row.names = c(NA, -100L), groups = structure(list(month = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"), year = structure(c(2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 2L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 4L), .Label = c("2017", "2018", "2019", "2020"), class = "factor"), .rows = structure(list(c(64L, 67L), 2L, 5L, 20:21, 62L, 16L, c(7L, 70L), c(22L, 52L, 53L), 96L, 19L, c(3L, 68L), c(1L, 48L, 63L), 72:73, 92L, 32L, c(42L, 74L), 4L, c(40L, 46L ), c(11L, 88L), c(18L, 57L, 75L, 84L), c(8L, 9L, 69L, 90L, 91L, 100L), 97L, c(14L, 15L, 26L, 47L, 86L, 87L), c(13L, 43L, 44L, 45L, 93L, 95L), 6L, c(10L, 12L, 17L, 98L, 99L), c(71L, 94L), c(28L, 29L, 30L, 33L, 34L, 35L, 36L, 37L, 83L), 89L, c(23L, 24L, 39L, 58L, 66L), c(51L, 54L, 55L, 56L, 65L, 76L, 77L, 80L), 82L, c(31L, 78L, 79L), 49:50, c(25L, 27L, 38L, 59L, 60L, 61L, 81L), c(41L, 85L)), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, 36L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
Try this: ggplot(a, aes(x = as.numeric(month), color = year, fill = year)) + # scale_color_manual(values = colsze) + # scale_fill_manual(values = alpha(colsze, .2)) + scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12), labels = c("January", "February", "March", "April", "May", "June", "July", "August", "Septemer", "October", "November", "December")) + geom_point(aes(y = n), size = 4, shape=19) + geom_line(aes(y = n)) + scale_y_continuous(breaks = seq(0, 120, 10), limits = c(0, 120)) + facet_wrap(.~year) I commented on those two lines because in your reproducible example there is no variable colsze. The problem is that month is a factor and must first be converted to numeric. For a better visualization, evaluate whether to rotate the labels on the x axis by 45 °
Formatting x_scale in ggplot with weekly data
In my ggplot, I've managed to create a x_scale based on time (I have weekly data) but am not sure how to create year-month labels using scale_x_date The following is my code - I have tried using (...breaks = "1 month") and (...minor_breaks = "1 month") but this does not produce the desired result. I am aiming for the labels to simply be Dec-16, Jan-17, Feb-17 and so on. What is the proper formatting to make the x_scale to show Month-Year in an abbreviated way? ggplot(data=test, aes(x=as.Date(test$weekly), y=test$dist, group=1)) + geom_path(col = "blue") + scale_x_date(labels = date_format("%b-%Y")) Here is a sample of the data > dput(test) structure(list(weekly = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L ), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), mday = c(18L, 25L, 1L, 8L, 15L, 22L, 29L, 5L, 12L, 19L, 26L, 3L, 10L, 17L, 24L, 31L, 7L, 14L, 21L, 28L, 5L, 12L, 19L, 26L, 2L, 9L, 16L, 23L, 30L, 6L, 13L, 20L, 27L, 6L, 13L, 20L, 27L, 3L, 10L, 17L, 24L, 1L, 8L, 15L, 22L, 29L, 5L, 12L, 19L, 26L, 3L, 10L, 17L, 24L, 31L), mon = c(6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L), year = c(116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L, 117L), wday = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), yday = c(199L, 206L, 213L, 220L, 227L, 234L, 241L, 248L, 255L, 262L, 269L, 276L, 283L, 290L, 297L, 304L, 311L, 318L, 325L, 332L, 339L, 346L, 353L, 360L, 1L, 8L, 15L, 22L, 29L, 36L, 43L, 50L, 57L, 64L, 71L, 78L, 85L, 92L, 99L, 106L, 113L, 120L, 127L, 134L, 141L, 148L, 155L, 162L, 169L, 176L, 183L, 190L, 197L, 204L, 211L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), zone = c("CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CET", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), dist = c(23621.19, 30400.01, 27458.66, 24511.07, 24908.37, 24413.81, 21096.52, 24557.51, 14833.71, 15513.72, 13516.23, 8102.02, 5881.44, 8370.01, 7339.34, 7703.79, 9297.52, 4542, 3555.56, 4438.33, 1968.65, 2259.06, 2729.89, 2876.66, 1767.86, 784.17, 2004.55, 4446.98, 2203.16, 3956.35, 4095.28, 3999.88, 3288.59, 4593.19, 6164.63, 6111.46, 8462.84, 7404.8, 9725.91, 9652.72, 9357.52, 15535.51, 11810.82, 17890.89, 23518.06, 18754.44, 16377.46, 15023.27, 23354.14, 23328.12, 27024.1, 23414.38, 28273.08, 24213.3, 19068.03)), .Names = c("weekly", "dist"), row.names = c(NA, -55L), class = "data.frame")
R shows NA although a value is present
I have two columns of PosixLT times with no NA values , yet NA values show up upon check > sum(is.na(check$start)) [1] 19 > sum(is.na(check$end)) [1] 23 The data is present in the cells, so why does this happen? I have heard that this can happen with PosixLT but even when I convert this to posixCT, there is very strange behavior. How does one go about solving this? > as.POSIXct(check$start, format = "%Y-%m-%d %H:%M:%S", tz = "CST6CDT") [1] NA "2014-03-09 01:35:01 CST" NA "2014-03-09 01:53:30 CST" NA [6] NA NA NA NA "2014-03-09 04:17:11 CDT" [11] NA NA "2015-03-08 01:54:43 CST" NA NA [16] NA NA NA NA NA [21] NA NA NA > dput(check) structure(list(start = structure(list(sec = c(24, 1, 27, 30, 8, 21, 40, 9, 43, 11, 31, 43, 43, 55, 39, 54, 41, 19, 2, 35, 6, 54, 40), min = c(45L, 35L, 14L, 53L, 36L, 37L, 47L, 48L, 54L, 17L, 57L, 53L, 54L, 3L, 52L, 22L, 34L, 28L, 41L, 42L, 52L, 52L, 53L), hour = c(2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), mday = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), mon = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L), wday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yday = c(67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L), isdst = c(-1L, 0L, -1L, 0L, -1L, -1L, -1L, -1L, -1L, 1L, -1L, -1L, 0L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L), zone = c("", "CST", "", "CST", "", "", "", "", "", "CDT", "", "", "CST", "", "", "", "", "", "", "", "", "", ""), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt"), tzone = c("CST6CDT", "CST", "CDT")), end = structure(list( sec = c(7, 59, 38, 45, 29, 46, 39, 14, 52, 29, 37, 5, 23, 41, 10, 43, 46, 46, 53, 24, 57, 13, 51), min = c(55L, 47L, 30L, 2L, 43L, 51L, 53L, 56L, 54L, 54L, 57L, 56L, 6L, 3L, 13L, 29L, 37L, 32L, 48L, 47L, 55L, 55L, 55L), hour = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), mday = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), mon = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L), wday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yday = c(67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L), isdst = c(-1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L), zone = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_ )), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt"), tzone = c("CST6CDT", "CST", "CDT"))), .Names = c("start", "end"), row.names = c(1559963L, 1560092L, 1560157L, 1560220L, 1560240L, 1560247L, 1560252L, 1560253L, 1560255L, 1560258L, 1560260L, 2004432L, 2004583L, 2004591L, 2004594L, 2004596L, 2004598L, 2004599L, 2004600L, 2004603L, 2004609L, 2004610L, 2004611L), class = "data.frame")
How works is.na in this context ? > is.na.POSIXlt function (x) is.na(as.POSIXct(x)) <bytecode: 0x0000000014232980> How does as.POSIXct behave here ? > as.POSIXct(check$start) [1] NA "2014-03-09 01:35:01 CST" NA "2014-03-09 01:53:30 CST" [5] NA NA NA NA [9] NA "2014-03-09 04:17:11 CDT" NA NA [13] "2015-03-08 01:54:43 CST" NA NA NA [17] NA NA NA NA [21] NA NA NA Ok, but WHY ???? Let's check the doc of as.POSIXct: Any conversion that needs to go between the two date-time classes requires a time zone: conversion from "POSIXlt" to "POSIXct" will validate times in the selected time zone. One issue is what happens at transitions to and from DST, for example in the UK Let's see: > check$start$zone [1] "" "CST" "" "CST" "" "" "" "" "" "CDT" "" "" "CST" "" "" "" "" "" "" "" [21] "" "" "" An here's the dragons, there's no timezone except for 4 entries, so as.POSIXct can't tell if the dates are valid (within DST change or not ?) as you can see with: > check$start$isdst [1] -1 0 -1 0 -1 -1 -1 -1 -1 1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 So the converstion between POSIXlt (your dataframe) and POSIXct can't guess if the date is valid, and return NA. One fixing method could be to enforce a timezone on all records: > check$start <- as.POSIXlt(strftime(check$start,tz="CST"),tz="CST6CDT") > is.na(check$start) [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Change the value in a column of a dataframe depending on how many of each possible value there are
I have a dataframe looking like this: chr <- c(1,1,1,1,1) b1 <- c('HP', 'HP', 'CP', 'CP', 'KP') b2 <- c('HP', 'HP', 'CP', 'CP', 'KP') b3 <- c('CP', 'KP', 'CP', 'HP', 'CP') b4 <- c('CP', 'KP', 'CP', 'HP', 'CP') b5 <- c('CP', 'CP', 'KP', 'KP', 'HP') b6 <- c('CP', 'CP', 'KP', 'KP', 'HP') b7 <- c('CP', 'KP', 'HP', 'CP', 'CP') b8 <- c('CP', 'KP', 'HP', 'CP', 'CP') df <- data.frame(chr, b1,b2,b3,b4,b5,b6,b7,b8) I want to write a function that looks at each 'b' column and asks if it contains the value 'HP'. If it does, and the other six 'b' columns contain 'CP' or 'KP', I want to change the value 'HP' into 'CP' or 'KP' depending on which is the majority. If CP is the majority, change the HP to CP. If KP is the majority, change HP to KP. (note that the value of b1 and b2, b3 and b4 etc is always the same, so really only 4 columns need to be looked at, b1, b3, b5, and b7). To clarify, if the columns are e.g. HP HP CP CP CP CP KP KP, I want to change the two HPs into CPs (and leave the other columns the same). So, the example I gave would become: chr <- c(1,1,1,1,1) b1 <- c('CP', 'KP', 'CP', 'CP', 'KP') b2 <- c('CP', 'KP', 'CP', 'CP', 'KP') b3 <- c('CP', 'KP', 'CP', 'CP', 'CP') b4 <- c('CP', 'KP', 'CP', 'CP', 'CP') b5 <- c('CP', 'CP', 'KP', 'KP', 'CP') b6 <- c('CP', 'CP', 'KP', 'KP', 'CP') b7 <- c('CP', 'KP', 'CP', 'CP', 'CP') b8 <- c('CP', 'KP', 'CP', 'CP', 'CP') df <- data.frame(chr, b1,b2,b3,b4,b5,b6,b7,b8) df I have written a function (just for df$b1) with if statements, but it doesn't work. (note the rules for whether the HP changes to KP or CP depend on how many other CPs or KPs there are): fun <- function(df){ if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'CP' && df$b7 == 'CP') {df$b1 <- 'KP'} if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'CP' && df$b7 == 'CP') {df$b1 <- 'CP'} if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'KP' && df$b7 == 'CP') {df$b1 <- 'CP'} if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'CP' && df$b7 == 'KP') {df$b1 <- 'CP'} if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'KP' && df$b7 == 'CP') {df$b1 <- 'KP'} if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'CP' && df$b7 == 'KP') {df$b1 <- 'KP'} if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'KP' && df$b7 == 'KP') {df$b1 <- 'KP'} if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'KP' && df$b7 == 'KP') {df$b1 <- 'CP'} df$b2 <-df$b1 } Thanks very much for any help. I'm really stuck on this one. EDIT: This is a sample of my actual data which is more complex than the example I gave above. structure(list(chr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), pos_c = c(2373L, 2406L, 2418L, 2419L, 2447L, 2450L, 2468L, 2524L, 2533L, 2535L, 2536L, 2542L, 2623L, 2709L, 3942L, 11716L, 11893L, 11898L, 12190L, 12396L, 26639L, 26640L, 26643L, 26646L, 26655L, 26657L, 26661L, 26667L, 26670L, 26676L, 26679L, 26684L, 26685L, 26688L, 26694L, 26703L, 26710L, 26712L, 26713L, 26723L, 26733L, 26737L, 26738L, 26739L, 26742L, 26743L, 26748L, 26761L, 26765L, 26766L, 26778L, 26781L, 26790L, 26792L, 26796L, 26802L, 26805L, 26811L, 26814L, 26819L, 26820L, 26823L, 26829L, 26838L, 26846L, 26847L, 26848L, 26872L, 26873L, 26874L, 26877L, 26878L, 26883L, 26889L, 26901L, 26904L, 26907L, 26916L, 26923L, 26925L, 26927L, 26931L, 26937L, 26940L, 26946L, 26954L, 26958L, 26961L, 26963L, 26964L, 26970L, 26981L, 26982L, 26983L, 26991L, 26994L, 26997L, 27007L, 27008L, 27009L, 27012L, 27015L, 27018L, 27027L, 202471L, 203660L, 203668L, 203669L, 203670L, 203672L, 203678L, 203683L, 203686L, 203687L, 203690L, 203704L, 203705L, 203711L, 203714L, 203732L, 203749L, 203752L, 203754L, 203755L, 203903L, 203910L, 203911L, 203912L, 203913L, 203914L, 203915L, 203922L, 203924L, 203933L, 203937L, 203939L, 203945L, 203948L, 203951L, 203957L, 203960L, 203961L, 203963L, 203969L, 203972L, 203973L, 203974L, 203975L, 203981L, 203991L, 204220L, 204227L, 204230L, 204232L, 204242L, 204245L, 204262L, 204272L, 204278L, 204282L, 204290L), c1 = c(101L, 60L, 63L, 64L, 100L, 97L, 94L, 83L, 80L, 48L, 46L, 51L, 69L, 46L, 23L, 79L, 63L, 59L, 53L, 85L, 13L, 12L, 1L, 9L, 11L, 13L, 9L, 14L, 14L, 12L, 15L, 9L, 15L, 14L, 14L, 2L, 2L, 8L, 3L, 0L, 0L, 4L, 2L, 1L, 4L, 4L, 8L, 39L, 7L, 5L, 2L, 41L, 69L, 79L, 89L, 120L, 128L, 90L, 134L, 107L, 169L, 120L, 103L, 48L, 58L, 132L, 62L, 19L, 9L, 13L, 12L, 12L, 17L, 251L, 8L, 367L, 367L, 264L, 5L, 170L, 113L, 234L, 134L, 143L, 189L, 224L, 255L, 296L, 448L, 239L, 169L, 80L, 312L, 84L, 403L, 397L, 430L, 529L, 544L, 556L, 565L, 549L, 555L, 4L, 11L, 0L, 18L, 18L, 19L, 19L, 18L, 18L, 17L, 17L, 15L, 15L, 16L, 15L, 13L, 14L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 13L, 2L, 10L, 4L, 10L, 24L, 33L, 33L, 63L, 42L), c2 = c(101L, 60L, 63L, 64L, 100L, 97L, 94L, 83L, 80L, 48L, 46L, 51L, 69L, 46L, 23L, 79L, 63L, 59L, 53L, 85L, 13L, 12L, 1L, 9L, 11L, 13L, 9L, 14L, 14L, 12L, 15L, 9L, 15L, 14L, 14L, 2L, 2L, 8L, 3L, 0L, 0L, 4L, 2L, 1L, 4L, 4L, 8L, 39L, 7L, 5L, 2L, 41L, 69L, 79L, 89L, 120L, 128L, 90L, 134L, 107L, 169L, 120L, 103L, 48L, 58L, 132L, 62L, 19L, 9L, 13L, 12L, 12L, 17L, 251L, 8L, 367L, 367L, 264L, 5L, 170L, 113L, 234L, 134L, 143L, 189L, 224L, 255L, 296L, 448L, 239L, 169L, 80L, 312L, 84L, 403L, 397L, 430L, 529L, 544L, 556L, 565L, 549L, 555L, 4L, 11L, 0L, 18L, 18L, 19L, 19L, 18L, 18L, 17L, 17L, 15L, 15L, 16L, 15L, 13L, 14L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 13L, 2L, 10L, 4L, 10L, 24L, 33L, 33L, 63L, 42L), c3 = c(37L, 0L, 0L, 0L, 42L, 46L, 46L, 21L, 26L, 6L, 2L, 7L, 11L, 4L, 0L, 4L, 1L, 0L, 0L, 2L, 29L, 29L, 0L, 22L, 23L, 23L, 26L, 27L, 29L, 24L, 32L, 26L, 35L, 32L, 32L, 3L, 3L, 10L, 1L, 5L, 1L, 6L, 1L, 0L, 5L, 11L, 6L, 81L, 15L, 14L, 0L, 92L, 157L, 174L, 168L, 236L, 221L, 143L, 228L, 251L, 292L, 273L, 281L, 33L, 39L, 260L, 57L, 53L, 24L, 22L, 26L, 37L, 37L, 484L, 16L, 721L, 724L, 436L, 7L, 367L, 163L, 411L, 167L, 373L, 275L, 599L, 637L, 773L, 866L, 615L, 223L, 63L, 531L, 59L, 878L, 868L, 911L, 939L, 975L, 995L, 980L, 931L, 958L, 12L, 16L, 0L, 12L, 13L, 12L, 11L, 9L, 12L, 11L, 11L, 10L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 2L, 28L, 5L, 28L, 3L, 12L, 39L, 40L, 50L, 90L, 80L), c4 = c(37L, 0L, 0L, 0L, 42L, 46L, 46L, 21L, 26L, 6L, 2L, 7L, 11L, 4L, 0L, 4L, 1L, 0L, 0L, 2L, 29L, 29L, 0L, 22L, 23L, 23L, 26L, 27L, 29L, 24L, 32L, 26L, 35L, 32L, 32L, 3L, 3L, 10L, 1L, 5L, 1L, 6L, 1L, 0L, 5L, 11L, 6L, 81L, 15L, 14L, 0L, 92L, 157L, 174L, 168L, 236L, 221L, 143L, 228L, 251L, 292L, 273L, 281L, 33L, 39L, 260L, 57L, 53L, 24L, 22L, 26L, 37L, 37L, 484L, 16L, 721L, 724L, 436L, 7L, 367L, 163L, 411L, 167L, 373L, 275L, 599L, 637L, 773L, 866L, 615L, 223L, 63L, 531L, 59L, 878L, 868L, 911L, 939L, 975L, 995L, 980L, 931L, 958L, 12L, 16L, 0L, 12L, 13L, 12L, 11L, 9L, 12L, 11L, 11L, 10L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 2L, 28L, 5L, 28L, 3L, 12L, 39L, 40L, 50L, 90L, 80L), c5 = c(96L, 77L, 74L, 72L, 96L, 96L, 92L, 80L, 79L, 79L, 76L, 76L, 66L, 55L, 64L, 78L, 110L, 100L, 165L, 171L, 38L, 41L, 2L, 38L, 33L, 37L, 21L, 40L, 41L, 21L, 37L, 19L, 45L, 30L, 22L, 22L, 28L, 34L, 30L, 31L, 25L, 40L, 34L, 33L, 34L, 46L, 41L, 96L, 48L, 51L, 38L, 93L, 152L, 155L, 155L, 193L, 195L, 189L, 222L, 213L, 284L, 248L, 230L, 56L, 70L, 208L, 82L, 85L, 67L, 64L, 64L, 83L, 71L, 495L, 77L, 570L, 577L, 499L, 55L, 292L, 236L, 352L, 244L, 296L, 351L, 391L, 440L, 483L, 653L, 417L, 194L, 57L, 460L, 57L, 538L, 520L, 573L, 731L, 753L, 770L, 772L, 757L, 761L, 35L, 73L, 66L, 70L, 70L, 71L, 70L, 74L, 79L, 82L, 83L, 85L, 69L, 68L, 71L, 71L, 70L, 73L, 72L, 72L, 74L, 103L, 107L, 106L, 107L, 109L, 106L, 106L, 105L, 106L, 105L, 108L, 104L, 105L, 106L, 106L, 103L, 112L, 112L, 113L, 112L, 109L, 114L, 114L, 115L, 120L, 114L, 97L, 125L, 103L, 124L, 107L, 116L, 145L, 139L, 138L, 177L, 139L ), c6 = c(96L, 77L, 74L, 72L, 96L, 96L, 92L, 80L, 79L, 79L, 76L, 76L, 66L, 55L, 64L, 78L, 110L, 100L, 165L, 171L, 38L, 41L, 2L, 38L, 33L, 37L, 21L, 40L, 41L, 21L, 37L, 19L, 45L, 30L, 22L, 22L, 28L, 34L, 30L, 31L, 25L, 40L, 34L, 33L, 34L, 46L, 41L, 96L, 48L, 51L, 38L, 93L, 152L, 155L, 155L, 193L, 195L, 189L, 222L, 213L, 284L, 248L, 230L, 56L, 70L, 208L, 82L, 85L, 67L, 64L, 64L, 83L, 71L, 495L, 77L, 570L, 577L, 499L, 55L, 292L, 236L, 352L, 244L, 296L, 351L, 391L, 440L, 483L, 653L, 417L, 194L, 57L, 460L, 57L, 538L, 520L, 573L, 731L, 753L, 770L, 772L, 757L, 761L, 35L, 73L, 66L, 70L, 70L, 71L, 70L, 74L, 79L, 82L, 83L, 85L, 69L, 68L, 71L, 71L, 70L, 73L, 72L, 72L, 74L, 103L, 107L, 106L, 107L, 109L, 106L, 106L, 105L, 106L, 105L, 108L, 104L, 105L, 106L, 106L, 103L, 112L, 112L, 113L, 112L, 109L, 114L, 114L, 115L, 120L, 114L, 97L, 125L, 103L, 124L, 107L, 116L, 145L, 139L, 138L, 177L, 139L), c7 = c(28L, 3L, 1L, 1L, 52L, 50L, 60L, 49L, 50L, 3L, 2L, 2L, 37L, 11L, 0L, 1L, 2L, 2L, 0L, 1L, 28L, 30L, 1L, 17L, 23L, 28L, 11L, 30L, 32L, 13L, 32L, 19L, 39L, 18L, 17L, 23L, 29L, 46L, 37L, 25L, 21L, 42L, 32L, 29L, 30L, 41L, 44L, 141L, 72L, 64L, 25L, 93L, 219L, 234L, 218L, 294L, 277L, 184L, 294L, 273L, 382L, 293L, 280L, 131L, 132L, 386L, 157L, 99L, 77L, 75L, 68L, 66L, 88L, 615L, 55L, 746L, 740L, 685L, 27L, 305L, 158L, 511L, 151L, 326L, 371L, 605L, 650L, 727L, 886L, 623L, 314L, 170L, 734L, 162L, 937L, 908L, 987L, 964L, 997L, 1002L, 1007L, 960L, 980L, 28L, 75L, 61L, 96L, 98L, 97L, 96L, 93L, 101L, 99L, 100L, 98L, 91L, 90L, 90L, 89L, 87L, 76L, 75L, 75L, 76L, 88L, 92L, 87L, 86L, 88L, 87L, 85L, 87L, 87L, 83L, 86L, 87L, 86L, 86L, 89L, 83L, 83L, 84L, 84L, 86L, 83L, 86L, 88L, 87L, 88L, 84L, 81L, 118L, 90L, 120L, 90L, 101L, 127L, 134L, 140L, 172L, 160L), c8 = c(28L, 3L, 1L, 1L, 52L, 50L, 60L, 49L, 50L, 3L, 2L, 2L, 37L, 11L, 0L, 1L, 2L, 2L, 0L, 1L, 28L, 30L, 1L, 17L, 23L, 28L, 11L, 30L, 32L, 13L, 32L, 19L, 39L, 18L, 17L, 23L, 29L, 46L, 37L, 25L, 21L, 42L, 32L, 29L, 30L, 41L, 44L, 141L, 72L, 64L, 25L, 93L, 219L, 234L, 218L, 294L, 277L, 184L, 294L, 273L, 382L, 293L, 280L, 131L, 132L, 386L, 157L, 99L, 77L, 75L, 68L, 66L, 88L, 615L, 55L, 746L, 740L, 685L, 27L, 305L, 158L, 511L, 151L, 326L, 371L, 605L, 650L, 727L, 886L, 623L, 314L, 170L, 734L, 162L, 937L, 908L, 987L, 964L, 997L, 1002L, 1007L, 960L, 980L, 28L, 75L, 61L, 96L, 98L, 97L, 96L, 93L, 101L, 99L, 100L, 98L, 91L, 90L, 90L, 89L, 87L, 76L, 75L, 75L, 76L, 88L, 92L, 87L, 86L, 88L, 87L, 85L, 87L, 87L, 83L, 86L, 87L, 86L, 86L, 89L, 83L, 83L, 84L, 84L, 86L, 83L, 86L, 88L, 87L, 88L, 84L, 81L, 118L, 90L, 120L, 90L, 101L, 127L, 134L, 140L, 172L, 160L), k1 = c(39L, 64L, 68L, 69L, 38L, 38L, 41L, 51L, 54L, 84L, 83L, 84L, 57L, 50L, 43L, 58L, 72L, 71L, 29L, 35L, 0L, 0L, 10L, 1L, 1L, 0L, 3L, 0L, 0L, 1L, 0L, 3L, 0L, 0L, 0L, 14L, 14L, 9L, 15L, 18L, 24L, 20L, 20L, 27L, 28L, 10L, 28L, 27L, 59L, 64L, 73L, 43L, 19L, 7L, 27L, 5L, 23L, 30L, 29L, 65L, 10L, 46L, 27L, 160L, 168L, 95L, 175L, 255L, 265L, 271L, 270L, 76L, 269L, 77L, 14L, 12L, 11L, 118L, 382L, 204L, 220L, 181L, 290L, 290L, 114L, 209L, 89L, 159L, 7L, 144L, 95L, 9L, 180L, 411L, 105L, 125L, 97L, 19L, 3L, 3L, 2L, 12L, 1L, 540L, 1L, 32L, 14L, 14L, 13L, 13L, 15L, 14L, 12L, 11L, 12L, 11L, 12L, 13L, 13L, 9L, 18L, 17L, 8L, 18L, 6L, 2L, 1L, 2L, 1L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 0L, 2L, 1L, 21L, 28L, 49L, 50L, 54L, 45L, 44L), k2 = c(39L, 64L, 68L, 69L, 38L, 38L, 41L, 51L, 54L, 84L, 83L, 84L, 57L, 50L, 43L, 58L, 72L, 71L, 29L, 35L, 0L, 0L, 10L, 1L, 1L, 0L, 3L, 0L, 0L, 1L, 0L, 3L, 0L, 0L, 0L, 14L, 14L, 9L, 15L, 18L, 24L, 20L, 20L, 27L, 28L, 10L, 28L, 27L, 59L, 64L, 73L, 43L, 19L, 7L, 27L, 5L, 23L, 30L, 29L, 65L, 10L, 46L, 27L, 160L, 168L, 95L, 175L, 255L, 265L, 271L, 270L, 76L, 269L, 77L, 14L, 12L, 11L, 118L, 382L, 204L, 220L, 181L, 290L, 290L, 114L, 209L, 89L, 159L, 7L, 144L, 95L, 9L, 180L, 411L, 105L, 125L, 97L, 19L, 3L, 3L, 2L, 12L, 1L, 540L, 1L, 32L, 14L, 14L, 13L, 13L, 15L, 14L, 12L, 11L, 12L, 11L, 12L, 13L, 13L, 9L, 18L, 17L, 8L, 18L, 6L, 2L, 1L, 2L, 1L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 0L, 2L, 1L, 21L, 28L, 49L, 50L, 54L, 45L, 44L), k3 = c(84L, 122L, 120L, 120L, 92L, 88L, 90L, 107L, 98L, 114L, 120L, 117L, 91L, 64L, 59L, 100L, 113L, 109L, 56L, 136L, 1L, 0L, 29L, 7L, 4L, 6L, 5L, 6L, 6L, 9L, 7L, 11L, 7L, 10L, 9L, 44L, 46L, 38L, 51L, 60L, 79L, 75L, 80L, 83L, 80L, 41L, 97L, 61L, 133L, 135L, 180L, 100L, 50L, 28L, 75L, 18L, 79L, 94L, 100L, 117L, 47L, 74L, 68L, 393L, 390L, 191L, 416L, 504L, 532L, 545L, 545L, 181L, 556L, 175L, 19L, 24L, 19L, 312L, 766L, 389L, 416L, 418L, 639L, 475L, 239L, 293L, 70L, 135L, 37L, 122L, 84L, 42L, 408L, 886L, 93L, 115L, 65L, 67L, 35L, 37L, 47L, 50L, 54L, 942L, 9L, 43L, 29L, 29L, 29L, 29L, 28L, 27L, 25L, 25L, 26L, 32L, 33L, 32L, 33L, 30L, 26L, 23L, 24L, 23L, 8L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 0L, 7L, 3L, 65L, 73L, 111L, 98L, 133L, 107L, 64L), k4 = c(84L, 122L, 120L, 120L, 92L, 88L, 90L, 107L, 98L, 114L, 120L, 117L, 91L, 64L, 59L, 100L, 113L, 109L, 56L, 136L, 1L, 0L, 29L, 7L, 4L, 6L, 5L, 6L, 6L, 9L, 7L, 11L, 7L, 10L, 9L, 44L, 46L, 38L, 51L, 60L, 79L, 75L, 80L, 83L, 80L, 41L, 97L, 61L, 133L, 135L, 180L, 100L, 50L, 28L, 75L, 18L, 79L, 94L, 100L, 117L, 47L, 74L, 68L, 393L, 390L, 191L, 416L, 504L, 532L, 545L, 545L, 181L, 556L, 175L, 19L, 24L, 19L, 312L, 766L, 389L, 416L, 418L, 639L, 475L, 239L, 293L, 70L, 135L, 37L, 122L, 84L, 42L, 408L, 886L, 93L, 115L, 65L, 67L, 35L, 37L, 47L, 50L, 54L, 942L, 9L, 43L, 29L, 29L, 29L, 29L, 28L, 27L, 25L, 25L, 26L, 32L, 33L, 32L, 33L, 30L, 26L, 23L, 24L, 23L, 8L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 0L, 7L, 3L, 65L, 73L, 111L, 98L, 133L, 107L, 64L), k5 = c(0L, 14L, 14L, 14L, 1L, 0L, 0L, 8L, 7L, 5L, 5L, 5L, 0L, 3L, 0L, 8L, 2L, 3L, 18L, 15L, 0L, 2L, 38L, 3L, 5L, 1L, 18L, 1L, 2L, 2L, 3L, 21L, 2L, 15L, 1L, 26L, 22L, 17L, 27L, 33L, 41L, 39L, 42L, 45L, 51L, 14L, 50L, 31L, 82L, 84L, 108L, 55L, 24L, 16L, 51L, 33L, 44L, 55L, 54L, 87L, 15L, 20L, 27L, 285L, 297L, 151L, 293L, 343L, 363L, 374L, 376L, 57L, 382L, 24L, 25L, 10L, 8L, 103L, 551L, 301L, 320L, 276L, 364L, 340L, 49L, 272L, 171L, 195L, 24L, 180L, 161L, 11L, 254L, 663L, 188L, 229L, 158L, 26L, 3L, 3L, 6L, 10L, 6L, 708L, 0L, 9L, 0L, 3L, 0L, 1L, 0L, 2L, 0L, 0L, 1L, 9L, 9L, 9L, 10L, 10L, 6L, 6L, 1L, 6L, 2L, 0L, 5L, 3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 1L, 3L, 0L, 0L, 4L, 1L, 0L, 1L, 5L, 2L, 0L, 1L, 2L, 0L, 2L, 5L, 1L, 3L, 3L, 43L, 50L, 78L, 75L, 87L, 78L, 59L), k6 = c(0L, 14L, 14L, 14L, 1L, 0L, 0L, 8L, 7L, 5L, 5L, 5L, 0L, 3L, 0L, 8L, 2L, 3L, 18L, 15L, 0L, 2L, 38L, 3L, 5L, 1L, 18L, 1L, 2L, 2L, 3L, 21L, 2L, 15L, 1L, 26L, 22L, 17L, 27L, 33L, 41L, 39L, 42L, 45L, 51L, 14L, 50L, 31L, 82L, 84L, 108L, 55L, 24L, 16L, 51L, 33L, 44L, 55L, 54L, 87L, 15L, 20L, 27L, 285L, 297L, 151L, 293L, 343L, 363L, 374L, 376L, 57L, 382L, 24L, 25L, 10L, 8L, 103L, 551L, 301L, 320L, 276L, 364L, 340L, 49L, 272L, 171L, 195L, 24L, 180L, 161L, 11L, 254L, 663L, 188L, 229L, 158L, 26L, 3L, 3L, 6L, 10L, 6L, 708L, 0L, 9L, 0L, 3L, 0L, 1L, 0L, 2L, 0L, 0L, 1L, 9L, 9L, 9L, 10L, 10L, 6L, 6L, 1L, 6L, 2L, 0L, 5L, 3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 1L, 3L, 0L, 0L, 4L, 1L, 0L, 1L, 5L, 2L, 0L, 1L, 2L, 0L, 2L, 5L, 1L, 3L, 3L, 43L, 50L, 78L, 75L, 87L, 78L, 59L), k7 = c(0L, 36L, 42L, 44L, 0L, 0L, 0L, 3L, 3L, 49L, 50L, 51L, 0L, 0L, 0L, 0L, 0L, 0L, 31L, 158L, 0L, 1L, 28L, 14L, 11L, 9L, 27L, 14L, 12L, 14L, 14L, 28L, 14L, 32L, 19L, 41L, 37L, 26L, 39L, 57L, 85L, 75L, 82L, 87L, 87L, 37L, 91L, 54L, 124L, 138L, 206L, 150L, 44L, 18L, 92L, 38L, 76L, 95L, 101L, 155L, 20L, 90L, 48L, 375L, 344L, 135L, 379L, 519L, 537L, 549L, 563L, 67L, 557L, 91L, 43L, 30L, 35L, 125L, 784L, 491L, 519L, 324L, 627L, 503L, 215L, 296L, 68L, 203L, 42L, 173L, 58L, 43L, 222L, 812L, 64L, 98L, 36L, 65L, 36L, 45L, 42L, 50L, 43L, 962L, 0L, 36L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 15L, 17L, 15L, 13L, 12L, 25L, 27L, 8L, 26L, 7L, 2L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 6L, 5L, 4L, 6L, 0L, 0L, 5L, 0L, 1L, 0L, 5L, 3L, 0L, 0L, 4L, 0L, 1L, 4L, 2L, 9L, 3L, 59L, 77L, 123L, 107L, 144L, 119L, 79L), k8 = c(0L, 36L, 42L, 44L, 0L, 0L, 0L, 3L, 3L, 49L, 50L, 51L, 0L, 0L, 0L, 0L, 0L, 0L, 31L, 158L, 0L, 1L, 28L, 14L, 11L, 9L, 27L, 14L, 12L, 14L, 14L, 28L, 14L, 32L, 19L, 41L, 37L, 26L, 39L, 57L, 85L, 75L, 82L, 87L, 87L, 37L, 91L, 54L, 124L, 138L, 206L, 150L, 44L, 18L, 92L, 38L, 76L, 95L, 101L, 155L, 20L, 90L, 48L, 375L, 344L, 135L, 379L, 519L, 537L, 549L, 563L, 67L, 557L, 91L, 43L, 30L, 35L, 125L, 784L, 491L, 519L, 324L, 627L, 503L, 215L, 296L, 68L, 203L, 42L, 173L, 58L, 43L, 222L, 812L, 64L, 98L, 36L, 65L, 36L, 45L, 42L, 50L, 43L, 962L, 0L, 36L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 15L, 17L, 15L, 13L, 12L, 25L, 27L, 8L, 26L, 7L, 2L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 6L, 5L, 4L, 6L, 0L, 0L, 5L, 0L, 1L, 0L, 5L, 3L, 0L, 0L, 4L, 0L, 1L, 4L, 2L, 9L, 3L, 59L, 77L, 123L, 107L, 144L, 119L, 79L), b1 = structure(c(7L, 3L, 3L, 3L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 1L, 1L, 7L, 7L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 7L, 1L, 1L, 1L, 7L, 7L, 7L, 7L, 5L, 5L, 7L, 7L, 5L, 5L, 7L, 7L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 7L, 1L, 7L, 7L, 7L, 3L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 5L, 5L, 5L, 5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 3L, 3L, 7L, 3L, 7L, 3L, 1L, 7L, 7L, 7L, 3L, 5L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 7L, 5L, 5L, 6L, 6L, 2L, 6L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 7L, 3L, 7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"), b2 = structure(c(7L, 3L, 3L, 3L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 1L, 1L, 7L, 7L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 7L, 1L, 1L, 1L, 7L, 7L, 7L, 7L, 5L, 5L, 7L, 7L, 5L, 5L, 7L, 7L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 7L, 1L, 7L, 7L, 7L, 3L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 5L, 5L, 5L, 5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 3L, 3L, 7L, 3L, 7L, 3L, 1L, 7L, 7L, 7L, 3L, 5L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 7L, 5L, 5L, 6L, 6L, 2L, 6L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 7L, 3L, 7L ), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"), b3 = structure(c(3L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 7L, 7L, 5L, 5L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 5L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 7L, 7L, 7L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 7L, 3L, 7L, 3L, 7L, 1L, 1L, 7L, 7L, 7L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 3L, 5L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 3L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L, 6L, 4L, 4L, 4L, 4L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 6L, 6L, 4L, 6L, 2L, 7L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"), b4 = structure(c(3L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 7L, 7L, 5L, 5L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 5L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 7L, 7L, 7L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 7L, 3L, 7L, 3L, 7L, 1L, 1L, 7L, 7L, 7L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 3L, 5L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 3L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L, 6L, 4L, 4L, 4L, 4L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 6L, 6L, 4L, 6L, 2L, 7L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"), b5 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L, 1L, 2L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 4L, 4L, 2L, 4L), .Label = c("CP", "HP", "KP", "NP"), class = "factor"), b6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L, 1L, 2L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 4L, 4L, 1L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 4L, 4L, 2L, 4L), .Label = c("CP", "HP", "KP", "NP"), class = "factor"), b7 = structure(c(2L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 2L, 2L, 5L, 1L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 3L, 6L, 6L, 6L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 6L, 6L, 3L, 6L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 4L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 6L, 4L, 2L, 6L, 2L, 2L, 2L, 4L, 3L, 6L, 3L, 6L, 3L, 6L, 3L, 6L, 6L, 2L, 6L, 6L, 6L, 6L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 3L, 3L, 6L, 3L, 3L, 6L), .Label = c("CF", "CP", "HP", "KP", "NF", "NP"), class = "factor"), b8 = structure(c(2L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 2L, 2L, 5L, 1L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 3L, 6L, 6L, 6L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 6L, 6L, 3L, 6L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 4L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 6L, 4L, 2L, 6L, 2L, 2L, 2L, 4L, 3L, 6L, 3L, 6L, 3L, 6L, 3L, 6L, 6L, 2L, 6L, 6L, 6L, 6L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 3L, 3L, 6L, 3L, 3L, 6L), .Label = c("CF", "CP", "HP", "KP", "NF", "NP"), class = "factor")), .Names = c("chr", "pos_c", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "k1", "k2", "k3", "k4", "k5", "k6", "k7", "k8", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8"), class = "data.frame", row.names = c(NA, -161L))
You can try: t(apply(df[,-1], 1, function(rg){ occ_rg <- table(rg) rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] return(rg)})) So, to have your new df: df <- data.frame(chr=df[, 1], t(apply(df[,-1], 1, function(rg){ occ_rg <- table(rg) rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] return(rg)})), stringsAsFactors=F) # chr b1 b2 b3 b4 b5 b6 b7 b8 #1 1 CP CP CP CP CP CP CP CP #2 1 KP KP KP KP CP CP KP KP #3 1 CP CP CP CP KP KP CP CP #4 1 CP CP CP CP KP KP CP CP #5 1 KP KP CP CP CP CP CP CP EDIT If you have other columns and the columns you want to change are the only ones beginning with "b", you can do : df[, grepl("^b", colnames(df))] <- t(apply(df[, grepl("^b", colnames(df))], 1, function(rg){ occ_rg <- table(rg) rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] return(rg)})) Example: With this df: # chr c1 b1 b2 b3 b4 b5 b6 b7 b8 c2 #1 1 1 HP HP CP CP CP CP CP CP 11 #2 1 2 HP HP KP KP CP CP KP KP 12 #3 1 3 CP CP CP CP KP KP HP HP 13 #4 1 4 CP CP HP HP KP KP CP CP 14 #5 1 5 KP KP CP CP HP HP CP CP 15 You get: # chr c1 b1 b2 b3 b4 b5 b6 b7 b8 c2 #1 1 1 CP CP CP CP CP CP CP CP 11 #2 1 2 KP KP KP KP CP CP KP KP 12 #3 1 3 CP CP CP CP KP KP CP CP 13 #4 1 4 CP CP CP CP KP KP CP CP 14 #5 1 5 KP KP CP CP CP CP CP CP 15 EDIT 2 If you have other values than "HP", "CP" and "KP" and want to replace "HP" by either "CP" or "KP", depending on which occurs the most, you can do: df[, grepl("^b", colnames(df))] <- t(apply(df[, grepl("^b", colnames(df))], 1, function(rg){ occ_rg <- table(rg) occ_rg <- occ_rg[grepl("KP|CP", names(occ_rg))] rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] return(rg)})) Explanation (for edit2): df[, grepl("^b", colnames(df))] <- # only the columns beginning with b are considered (so the other ones will remain untouched) t( # the results of apply will be transposed apply(df[, grepl("^b", colnames(df))], # apply on df with only the columns beginning by b 1, # by row function(rg){ # a function that takes a vector "rg" as input occ_rg <- table(rg) # computes the table occ_rg <- occ_rg[grepl("KP|CP", names(occ_rg))] # keep only the occurrences of either "KP" or "CP" rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] # replace in the vector rg the "HP" elements by "KP" or "CP" depending on which occurs the most return(rg) # finally returns the vector rg }))
time difference between rows producing odd results
Plenty of material on stackoverflow regarding calculating time differences between rows/entries/observations. However, I'm stumped why I'm getting NA's in unusual positions. I have 3 columns, DATETIME which is posixlt, GRP800 which is the group (factor), and TIME800 which is supposed to represent the time elapsed between each observation for each group. My particular code was derived from Calculate differences between rows faster than a for loop?. df$TIME800<-unlist(by(df$DATETIME,df$GRP800,function(x)c(NA,diff(x)))) It does appear to function properly for the first group but then I am getting NA's in the middle of the 2nd group. I've tried several approaches using diff and it's producing the identical output. I'm quite puzzled. Any advice would be greatly appreciated. DATETIME GRP800 TIME800 1 2013-07-16 16:01:30 1 NA 2 2013-07-16 20:00:54 1 3.990000 3 2013-07-17 00:01:30 1 4.010000 4 2013-07-17 04:01:00 1 3.991667 5 2013-07-17 08:00:50 1 3.997222 6 2013-07-17 12:01:46 1 4.015556 7 2013-07-17 16:00:50 1 3.984444 8 2013-07-17 20:01:00 1 4.002778 9 2013-07-18 00:01:18 1 4.005000 10 2013-07-18 04:01:02 1 3.995556 11 2013-07-18 08:00:50 1 3.996667 12 2013-07-18 12:01:18 2 NA 13 2013-07-18 16:01:02 2 3.970833 14 2013-07-18 20:00:59 2 4.007500 15 2013-07-19 00:01:31 2 3.997222 16 2013-07-19 04:01:18 2 4.011111 17 2013-07-19 08:01:02 2 NA 18 2013-07-19 12:01:57 2 2.007500 19 2013-07-19 20:01:00 2 NA 20 2013-07-20 00:01:00 2 2.003333 > dput(df[1:20,]) structure(list(DATETIME = structure(list(sec = c(30, 54, 30, 0, 50, 46, 50, 0, 18, 2, 50, 18, 2, 59, 31, 18, 2, 57, 0, 0), min = c(1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(16L, 20L, 0L, 4L, 8L, 12L, 16L, 20L, 0L, 4L, 8L, 12L, 16L, 20L, 0L, 4L, 8L, 12L, 20L, 0L), mday = c(16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 20L ), mon = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L), wday = c(2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L), yday = c(196L, 196L, 197L, 197L, 197L, 197L, 197L, 197L, 198L, 198L, 198L, 198L, 198L, 198L, 199L, 199L, 199L, 199L, 199L, 200L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), zone = c("MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), GRP800 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), TIME800 = c(NA, 3.99, 4.01, 3.991666667, 3.997222222, 4.015555556, 3.984444444, 4.002777778, 4.005, 3.995555556, 3.996666667, NA, 3.970833333, 4.0075, 3.997222222, 4.011111111, NA, 2.0075, NA, 2.003333333)), .Names = c("DATETIME", "GRP800", "TIME800" ), row.names = c(NA, 20L), class = "data.frame")