Related
I have a data set and want to create a time series of means by education and by race. However, I am struggling to make rename() accept a command rather than just a string as a new variable name.
My code:
#libraries
install.packages(c("tidyverse", "spatstat"))
lapply(c("tidyverse", "spatstat"), require, character.only = TRUE)
#calculate weighted medians by race and education
wmedians <- lapply(data[,c("race", "education")],function(vars){
data %>%
group_by((vars), year) %>%
summarize(w_median = weighted.median(wealth, weight))%>%
rename(colnames(vars) = "(vars)")
})
This gives me the following error:
Error: unexpected '=' in:
" summarize(w_median = weighted.median(wealth, weight))%>%
rename(colnames(vars) ="
Desired Output:
At the moment I get a list with two tibbles where the column name of the first column is "(vars)". Instead I would like the column name to be "race" in the first tibble and "education" in the second one.
I tried around a bit and my guess is that the rename-function generally does not accept any function (such as colnames()) as a new variable name. Do you have any idea how to get around this?
Sample of my data:
structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L), .Label = c("1989", "1992", "1995", "1998", "2001",
"2004", "2007", "2010", "2013", "2016"), class = "factor"), weight = c(9084.9691295,
1571.9511258, 191.75635451, 204.62890325, 204.62890325, 20.462890325,
10.412082059, 144.25723032, 66.170395167, 17169.253056, 5240.2917738,
768.75688855, 152.72765752, 166.52285228, 4165.0038712, 12.696579164,
1.3158341152, 3574.4768327, 18.640983311, 687.040202, 6738.6966881,
0.9648849583, 2.9597366608, 20.019564258, 895.51359665, 1.3152534108,
3244.207427, 2.0000789024, 750.95122778, 1.6580375994, 4007.7581965,
4.7569235917, 180.73948443, 237.26008744, 2.8105880617, 2.8105880617,
1.7964957199, 4883.711226, 17.268444467, 2.9783310762, 354.15138196,
162.00933944, 1.6450475811, 1.3755398392, 4174.6347012, 44.17020127,
4987.2079388, 1.3755398392, 18.01293584, 3.3426730968, 1.5455142055,
904.20169275, 12.578831203, 10051.580218, 162.70814346, 2.1257090517,
2.1257090517, 1027.7013368, 8166.4587927, 1.7239086827, 2.1374243666,
0.901741906, 2.9900010571, 33.443685091, 12913.631224, 5973.1098594,
9527.6211412, 2.2331957715, 376.47055359, 2.9488054663, 2.9488054663,
2.8570458091, 8.0625166988, 4.7867036342, 923.0539464, 2.7381019933,
5800.0572063, 4.3379657179, 0.8195417131, 6108.0937784, 23.232719795,
121.97520298, 1684.0365357, 4.7867036342, 4.8109344834, 479.31438165,
4.8109344834, 3.7209489469, 5257.7592767, 3220.9438379, 3.1738803883,
3.8910375552, 3.7209489469, 30.231850875, 3277.3748665, 32.237421329,
2.7966498146, 3208.415157, 34.375315295, 30.200771547, 31.811971048,
20.701306688, 2.7966498146, 31.968899323, 33.280487562, 2548.6399138,
32.94638396, 2.7966498146, 2.6033164134, 30.815250688, 23.702590485,
31.465956118, 29.46116036, 760.36545895, 8.3114397117, 23.702590485,
4.6504256805, 3204.2440292, 6.8002099257, 35.813850525), race = structure(c(1L,
1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L,
1L, 4L, 1L, 1L, 4L, 4L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L,
1L, 4L, 4L, 4L, 1L, 4L, 4L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 4L,
4L, 4L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 1L, 4L, 4L, 1L, 1L,
1L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 4L,
1L, 1L, 4L, 1L, 4L, 4L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 4L,
1L, 4L, 4L, 4L, 1L, 4L, 1L), .Label = c("black", "Hispanic",
"other", "white"), class = "factor"), education = structure(c(2L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("college degree", "no college",
"some college"), class = "factor"), wealth = c(370932.98, 10170000.94,
12598660.39, 114293258.81, 211275067.86, 290691670.17, 85726008.16,
230065771.49, 246480115.73, 349587.55, 378694.96, 3164512.87,
8495442.34, 6020105.92, 505133.05, 367073411.82, 2928346179.67,
533643.21, 554996993.66, 5648836.48, 392098.63, 692454429.71,
872798466.66, 352917443.83, 798505.65, 1250534235.01, 645694.94,
570963643.71, 10983328.56, 360732249.01, 717840.77, 365513260.49,
7204246.71, 8901952.1, 473348324.51, 686068914.54, 566191645.55,
451622.81, 376484717.9, 702785331.9, 6677625.87, 5826581.94,
538339875.62, 81126854.16, 1073395.38, 595512233.49, 769008.48,
363748981.28, 203466108.48, 801021687.19, 744505545.07, 5219227.59,
132716087.05, 813859.01, 4429075.76, 376023173.93, 418753292.21,
591392.47, 986060.58, 740193054.24, 305729499.91, 815008777.34,
292998224.45, 25586473.36, 1482007.57, 832890.03, 1431390.64,
465587944.43, 31578347.08, 771207206.27, 774386788.46, 283388639.66,
491131539.34, 1106105605.16, 15792325.59, 448260665.36, 1598512.35,
256789830.23, 1327614423.44, 1034918.02, 44314918.71, 47310703.56,
1174945.19, 1050196871.69, 791040687.75, 40226229.67, 1193945180.43,
882118783.02, 865693.49, 598203.92, 94409003.49, 321989895.39,
611845894.05, 414941965.04, 1697121.78, 192234467.65, 1200517207.56,
1522890.02, 42378401.16, 269560657.85, 49377539.54, 450366559.75,
1362371354.42, 254034284.01, 65726116.22, 840350.29, 87111742.24,
1116803883.14, 1703800000, 248761000, 104962000, 111565400, 1224715000,
1405000, 1196478000, 981045000, 535052000, 2077700, 444554200,
41907500)), row.names = c(NA, -120L), groups = structure(list(
year = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L), .Label = c("1989", "1992", "1995", "1998", "2001",
"2004", "2007", "2010", "2013", "2016"), class = "factor"),
education = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L), .Label = c("college degree", "no college", "some college"
), class = "factor"), race = structure(c(1L, 4L, 1L, 4L,
1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L,
4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L, 1L, 4L,
1L, 4L, 1L, 4L, 1L, 4L), .Label = c("black", "Hispanic",
"other", "white"), class = "factor"), .rows = structure(list(
c(2L, 3L, 12L), c(5L, 6L, 9L), c(1L, 10L, 11L), c(4L,
7L, 8L), c(13L, 14L, 20L), c(17L, 22L, 23L), c(15L, 18L,
21L), c(16L, 19L, 24L), c(29L, 33L, 34L), c(26L, 35L,
36L), c(25L, 27L, 31L), c(28L, 30L, 32L), c(41L, 42L,
44L), c(37L, 40L, 43L), c(38L, 45L, 47L), c(39L, 46L,
48L), c(52L, 55L, 57L), c(50L, 51L, 60L), c(54L, 58L,
59L), c(49L, 53L, 56L), c(63L, 64L, 69L), c(62L, 70L,
71L), 65:67, c(61L, 68L, 72L), c(75L, 81L, 82L), c(74L,
79L, 84L), c(77L, 80L, 83L), c(73L, 76L, 78L), c(86L,
91L, 92L), c(85L, 87L, 88L), c(89L, 90L, 95L), c(93L,
94L, 96L), c(101L, 105L, 107L), c(97L, 103L, 108L), c(98L,
99L, 106L), c(100L, 102L, 104L), 110:112, c(109L, 113L,
115L), c(114L, 118L, 120L), c(116L, 117L, 119L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Thanks a lot in advance!
Pass column names in lapply and use .data to refer them in group_by so you don't need rename at all as column names would be maintained.
library(dplyr)
library(spatstat)
lapply(c("race", "education"),function(vars){
data %>%
group_by(.data[[vars]], year) %>%
summarize(w_median = weighted.median(wealth, weight))
}) -> result
result
I am trying make bar chart with ggplot2 with the dataset below. When I use the code
ggplot(p.data, aes(x = `Period Number`, y = `Total Jumps`)) +
stat_summary(data = subset(p.data, Status = "Starter"), fun ="mean", geom = "bar")
I get this graph:
The most concerning aspect is the for period 2, 3, 4, and 5 the bars should be taller (period 2 should be around 9.9). Additionally, I would like to remove period 0 and period 1 and add bar labels with the raw data and without creating an additional data frame.
p.data <- structure(list(`Period Number` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
`Total Jumps` = c(112L, 97L, 28L, 132L, 162L, 19L, 92L, 112L,
97L, 141L, 68L, 86L, 76L, 26L, 105L, 125L, 19L, 92L, 112L,
64L, 101L, 68L, 4L, 8L, 0L, 8L, 12L, 0L, 0L, 0L, 13L, 8L,
0L, 8L, 2L, 2L, 5L, 12L, 0L, 0L, 0L, 5L, 11L, 0L, 0L, 6L,
0L, 9L, 8L, 0L, 0L, 0L, 7L, 10L, 0L, 14L, 5L, 0L, 5L, 5L,
0L, 0L, 0L, 8L, 11L, 0L, 108L, 131L, 47L, 136L, 159L, 35L,
114L, 116L, 111L, 190L, 64L, 75L, 95L, 47L, 116L, 123L, 27L,
103L, 108L, 70L, 152L, 64L, 4L, 7L, 0L, 14L, 10L, 0L, 0L,
0L, 15L, 10L, 0L, 4L, 0L, 0L, 3L, 7L, 7L, 8L, 8L, 5L, 10L,
0L, 7L, 14L, 0L, 3L, 10L, 1L, 0L, 0L, 11L, 7L, 0L, 18L, 15L,
0L, 0L, 9L, 0L, 3L, 0L, 10L, 11L, 0L, 118L, 96L, 48L, 143L,
170L, 37L, 118L, 117L, 116L, 165L, 56L, 80L, 68L, 48L, 114L,
130L, 36L, 114L, 107L, 80L, 123L, 56L, 2L, 10L, 0L, 8L, 11L,
0L, 0L, 0L, 5L, 9L, 0L, 4L, 12L, 0L, 6L, 5L, 0L, 4L, 8L,
12L, 8L, 0L, 7L, 4L, 0L, 10L, 10L, 0L, 0L, 0L, 12L, 13L,
0L, 25L, 2L, 0L, 5L, 14L, 1L, 0L, 2L, 7L, 12L, 0L), Status = structure(c(1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L), .Label = c("Bench", "Starter"), class = "factor")), row.names = c(NA,
198L), class = "data.frame")
Thank you for your help!
It's best to pass that data you actually want to plot to the plotting function, rather than trying to coerce it within the plotting function. In this case you were trying to subset a different data frame from the one you passed to ggplot inside stat_summary. The call to ggplot had already set up the aesthetics you wanted mapped, then in your only geom layer, you were telling ggplot you wanted a completely different set of aesthetics.
You don't need to create another data frame to reshape your data. Here's how you could do it using dplyr:
library(dplyr)
library(ggplot2)
p.data %>%
filter(Status == "Starter") %>%
group_by(`Period Number`) %>%
summarise(`Total Jumps` = mean(`Total Jumps`)) %>%
filter(`Period Number` > 1) %>%
ggplot(aes(x = `Period Number`, y = `Total Jumps`)) +
geom_col(fill = "dodgerblue", colour = "black") +
geom_text(aes(y = `Total Jumps` + 1, label = signif(`Total Jumps`, 2)))
I would like to know if there is a way to more elegantly rewrite this piece of script. I have tried case_when but it throws an error message when I try to have several of them within one mutate function. Here is the dput for the file
structure(list(todays_date = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 4L, 4L, 5L, 5L, 5L, 2L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 8L, 9L, 9L,
9L, 9L), .Label = c("04/11/2019", "05/11/2019", "06/11/2019",
"07/11/2019", "08/11/2019", "12/11/2019", "13/11/2019", "14/11/2019",
"15/11/2019"), class = "factor"), p_initials = structure(c(34L,
54L, 1L, 71L, 16L, 77L, NA, 55L, 56L, 122L, 20L, 53L, 116L, 48L,
36L, 14L, 44L, 55L, 89L, 96L, 105L, 83L, 92L, 98L, 38L, 5L, 70L,
47L, 10L, 10L, 107L, 67L, 70L, 24L, 25L, 32L, 65L, 24L, 124L,
87L, 75L, 80L, 26L, 31L, 112L, 40L, 45L, 117L, 10L, 23L, 11L,
69L, 7L, 8L, 6L, 79L, 81L, 46L, 108L, 13L, 3L, 61L, 82L, 65L,
90L, 102L, 101L, 59L, 93L, 70L, 74L, 29L, 62L, 78L, 67L, 13L,
64L, 119L, 22L, 43L, 10L, 38L, 50L, 104L, 3L, 2L, 125L, 13L,
88L, 4L, 96L, 106L, 84L, 109L, 17L, 74L, 10L, 91L, 63L, 89L,
7L, 120L, 12L, 38L, 95L, 27L, 9L, 86L, 42L, 99L, 70L, 110L, 103L,
74L, 111L, 72L, 85L, 68L, 76L, 73L, 70L, 21L, 77L, 37L, 8L, 66L,
70L, 123L, 94L, 61L, 115L, 25L, 120L, 67L, 119L, 19L, 71L, 21L,
34L, 57L, 42L, 57L, 100L, 18L, 30L, 19L, 105L, 113L, 39L, 60L,
15L, 33L, 95L, 121L, 52L, 97L, 102L, 5L, 58L, 81L, 114L, 119L,
28L, 3L, 7L, 51L, 35L), .Label = c("BA", "BB", "BD", "BE", "BH",
"BI", "BM", "BS", "BY", "CA", "CB", "CD", "CE", "CF", "CG", "CGA",
"CGG", "CI", "CK", "CL", "CM", "CO", "CP", "CS", "CT", "CZ",
"DK", "DO", "DPH", "DT", "GA", "GB", "GG", "IA", "IB", "Ik",
"IK", "IM", "IP", "IS", "ITF", "KA", "KB", "KBA", "KF", "KG",
"KJ", "KK", "KM", "KO", "KP", "KR", "KS", "KY", "NB", "ND", "NF",
"NG", "NI", "NJ", "NK", "NKD", "NL", "NM", "NR", "NRBS", "NT",
"NWD", "NY", "OA", "OB", "OC", "OD", "OH", "OHD", "OI", "OJ",
"OK", "OL", "OM", "OP", "OPI", "OS", "OSP", "OT", "OTL", "PR",
"PS", "SA", "SG", "SH", "SJ", "SLP", "SM", "SP", "SS", "TA",
"TBC", "TE", "TG", "TKP", "TM", "TMB", "TP", "TR", "TS", "WJ",
"WR", "YH", "YKI", "YM", "ZA", "ZB", "ZE", "ZH", "ZK", "ZM",
"ZN", "ZP", "ZS", "ZSS", "ZT", "ZTM", "ZTN", "ZZ"), class = "factor"),
village = structure(c(2L, 2L, 2L, 2L, 3L, 3L, 8L, 1L, 1L,
1L, 8L, 8L, 8L, 8L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L,
8L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 1L, 1L, 1L,
1L, 8L, 8L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 2L,
2L, 7L, 7L, 7L, 4L, 4L, 4L, 7L, 7L, 6L, 6L, 6L, 6L, 1L, 1L,
1L, 1L, 7L, 7L, 7L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 1L, 4L, 4L, 4L, 4L, 3L, 6L, 6L, 8L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 4L, 2L, 3L,
3L, 3L, 3L, 2L, 2L, 2L, 7L, 7L), .Label = c("banembanto",
"bankore", "damzoussi", "pissy", "sabsin", "tanghin", "toundou",
"watenga"), class = "factor"), compound_id = c("40080", "40093",
"40113", "040127", "240043", "240060", "250035", "230047",
"230033", "230049", "250014", "250031", "250002", "250051",
"220040", "220080", "250056", "250045", "250061", "250042",
"250811", "230068", "230104", "230144", "250062", "40144",
"40814", "030015", "030022", "030108", "30156", "30001",
"30002", "30052", "30089", "30069", "30083", "030094", "30144",
"30161", "30192", "30004", "030006", "030025", "30055", "30202",
"30205", "30239", "30259", "30809", "40053", "40086", "40109",
"040116", "40823", "30197", "30216", "30237", "30159", "30167",
"30219", "30223", "260041", "260803", "260055", "260015",
"230098", "230102", "230111", "230145", "250805", "250810",
"260004", "260023", "260032", "240065", "260025", "260075",
"260049", "30012", "030023", "030030", "30057", "40055",
"40118", "80044", "80068", "80075", "30203", "30229", "30238",
"80001", "80007", "220041", "220042", "220022", "220083",
"230115", "230048", "230097", "230072", "80055", "80803",
"80807", "250809", "250806", "220034", "220019", "220064",
"220840", "220001", "220118", "220175", "220834", "220070",
"220099", "220098", "220141", "220805", "220849", "230174",
"030110", "30146", "30190", "30215", "240006", "220097",
"220823", "250016", "240010", "240042", "240049", "240080",
"240073", "240067", "30265", "30822", "30823", "240004",
"230040", "230057", "230078", "230158", "240021", "240053",
"240054", "240064", "240066", "240086", "250009", "250028",
"250039", "250053", "250063", "230150", "230164", "30828",
"40094", "240007", "240013", "240071", "240078", "040018",
"040125", "40147", "80034", "80049"), new_compound_id = c(40080L,
NA, NA, NA, NA, NA, NA, NA, 230033L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 30156L, NA, NA, 30052L, NA, NA, NA, NA, NA, NA, 30192L,
NA, NA, NA, NA, 30202L, NA, NA, NA, NA, 40053L, NA, NA, NA,
NA, 30197L, 30216L, 30237L, NA, NA, 30219L, 30223L, NA, NA,
260055L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
260075L, 260049L, NA, NA, NA, NA, NA, NA, NA, 80068L, NA,
30203L, 30229L, NA, NA, 80007L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 220840L, NA, NA, NA,
NA, NA, NA, NA, NA, 220805L, NA, NA, NA, NA, 30190L, NA,
NA, NA, NA, 250016L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 30828L, 40094L, NA, NA, NA, NA, NA, NA, NA, NA,
NA), num_sleep_space = c(2L, 3L, 2L, 2L, 3L, 4L, 2L, 3L,
6L, 4L, 8L, 5L, 1L, 2L, 4L, 4L, 3L, 6L, 3L, 10L, 2L, 3L,
9L, 8L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 3L, 4L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 3L, 3L, 5L, 5L, 3L, 3L, 2L, 5L, 4L, 3L, 2L, 4L, 3L, 4L,
3L, 4L, 5L, 2L, 2L, 3L, 5L, 3L, 5L, 4L, 3L, 2L, 4L, 3L, 4L,
4L, 5L, 4L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 7L, 2L, 3L, 2L, 4L, 3L,
3L, 3L, 2L, 3L, 4L, 3L, 3L, 2L, 5L, 4L, 4L, 4L, 4L, 2L, 3L,
2L, 4L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 3L, 3L, 4L, 4L, 4L, 6L,
8L, 8L, 9L, 7L, 7L, 3L, 7L, 3L, 4L, 4L, 4L, 2L, 10L, 12L,
4L, 4L, 10L, 5L, 3L, 8L, 4L, 5L, 4L, 3L, 3L), receive_new_net = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "yes", class = "factor"), note_net_type.num_net_given = c(2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 6L, 4L, 6L, 7L, 1L, 3L, 3L, 3L,
3L, 5L, 4L, 4L, 3L, 2L, 4L, 3L, 3L, 6L, 5L, 3L, 3L, 2L, 2L,
3L, 3L, 6L, 3L, 4L, 2L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L,
3L, 4L, 4L, 4L, 2L, 3L, 3L, 4L, 3L, 5L, 3L, 3L, 3L, 1L, 3L,
3L, 5L, 5L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 5L, 1L, 3L,
4L, 3L, 2L, 4L, 3L, 4L, 4L, 5L, 4L, 3L, 3L, 2L, 2L, 3L, 3L,
3L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 3L,
7L, 2L, 3L, 2L, 4L, 3L, 3L, 3L, 2L, 3L, 4L, 4L, 3L, 2L, 4L,
4L, 4L, 4L, 4L, 2L, 3L, 2L, 4L, 2L, 2L, 2L, 5L, 5L, 1L, 4L,
3L, 3L, 6L, 4L, 3L, 5L, 6L, 6L, 5L, 7L, 6L, 3L, 8L, 5L, 4L,
5L, 5L, 4L, 10L, 15L, 4L, 4L, 8L, 5L, 3L, 7L, 4L, 5L, 4L,
3L, 3L), note_net_type.date_new_net = structure(c(2L, 2L,
2L, 2L, 14L, 11L, 14L, 12L, 12L, 14L, 14L, 12L, 14L, 14L,
11L, 12L, 21L, 14L, 21L, 11L, 21L, 14L, 11L, 11L, 15L, 2L,
2L, 8L, 10L, 9L, 9L, 22L, 21L, 23L, 23L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 6L, 7L,
6L, 21L, 2L, 2L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
12L, 14L, 14L, 12L, 15L, 17L, 11L, 16L, 14L, 14L, 11L, 14L,
21L, 2L, 2L, 2L, 2L, 2L, 4L, 21L, 9L, 9L, 23L, 23L, 23L,
23L, 23L, 14L, 1L, 14L, 14L, 14L, 13L, 14L, 14L, 4L, 4L,
4L, 21L, 21L, 21L, 21L, 21L, 9L, 21L, 21L, 21L, 21L, 21L,
21L, 23L, 23L, 23L, 23L, 23L, 4L, 4L, 4L, 4L, 14L, 12L, 16L,
18L, 14L, 14L, 14L, 23L, 23L, 14L, 4L, 4L, 2L, 14L, 12L,
14L, 14L, 14L, 16L, 12L, 12L, 14L, 12L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 18L, 4L, 2L, 19L, 19L, 16L, 20L, 2L, 3L, 5L,
2L, 2L), .Label = c("12/07/2019", "15/06/2019", "15/07/2019",
"16/06/2019", "16/07/2019", "17/06/2019", "17/10/2019", "18/06/2019",
"19/06/2019", "20/06/2019", "20/07/2019", "21/07/2019", "22/06/2019",
"22/07/2019", "23/06/2019", "23/07/2019", "24/06/2019", "24/07/2019",
"25/06/2019", "25/07/2019", "29/06/2019", "29/10/2019", "30/06/2019"
), class = "factor"), note_net_type.brand_net_given = structure(c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 9L, 9L, 9L, 9L, 9L, 2L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 5L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 1L, 6L, 9L, 9L, 6L, 12L, 1L, 11L, 12L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 4L, 7L, 3L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 4L, 7L, 6L, 12L, 13L, 12L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 12L, 12L, 7L, 7L, 1L, 12L, 12L, 12L,
10L, 7L, 5L, 7L, 7L), .Label = c("", "Pema.net", "PERMA .NET",
"PERMA,NET", "PERMA. NET", "Perma.net", "PERMA.NET", "Perman.net",
"Permanet", "PERMANET", "Permanet.2", "PERMANET.2", "PERMANT.2"
), class = "factor"), note_net_type.help_hang_net = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("no", "yes"), class = "factor"), net_shape = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "rectangular", class = "factor"), other_net_shape = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), num_old_net = c(2L, 3L, 2L, 2L, 4L, 6L, 3L, 3L, 4L,
2L, 4L, 5L, 1L, 3L, 6L, 4L, 3L, 2L, 4L, 4L, 3L, 1L, 4L, 4L,
3L, 0L, 2L, 0L, 1L, 3L, 2L, 3L, 2L, 3L, 2L, 5L, 4L, 3L, 6L,
6L, 4L, 5L, 6L, 4L, 6L, 5L, 6L, 6L, 5L, 4L, 4L, 4L, 3L, 6L,
6L, 3L, 3L, 3L, 2L, 1L, 3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 6L,
5L, 1L, 3L, 4L, 5L, 4L, 5L, 0L, 0L, 2L, 4L, 3L, 4L, 4L, 5L,
4L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 3L, 2L, 2L, 2L, 2L,
3L, 2L, 5L, 4L, 5L, 3L, 3L, 7L, 2L, 3L, 2L, 3L, 3L, 3L, 3L,
2L, 3L, 4L, 2L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 2L,
4L, 2L, 2L, 5L, 5L, 1L, 4L, 3L, 3L, 5L, 3L, 4L, 5L, 7L, 7L,
7L, 7L, 8L, 3L, 7L, 5L, 3L, 3L, 4L, 3L, 9L, 8L, 4L, 4L, 6L,
4L, 1L, 1L, 4L, 5L, 4L, 3L, 3L), num_hh_members = c(4L, 5L,
4L, 3L, 4L, 6L, 5L, 6L, 7L, 7L, 12L, 9L, 7L, 9L, 7L, 5L,
7L, 8L, 8L, 9L, 6L, 3L, 8L, 7L, 5L, 6L, 5L, 5L, 5L, 4L, 4L,
6L, 6L, 6L, 7L, 6L, 3L, 5L, 7L, 8L, 7L, 6L, 7L, 6L, 6L, 7L,
6L, 8L, 7L, 7L, 4L, 5L, 5L, 8L, 6L, 5L, 5L, 6L, 7L, 2L, 5L,
5L, 7L, 5L, 8L, 6L, 8L, 5L, 8L, 7L, 6L, 6L, 7L, 10L, 8L,
10L, 5L, 5L, 6L, 5L, 4L, 5L, 5L, 6L, 6L, 4L, 4L, 3L, 5L,
7L, 8L, 7L, 5L, 10L, 10L, 6L, 2L, 4L, 6L, 4L, 10L, 5L, 5L,
5L, 5L, 6L, 12L, 5L, 5L, 4L, 7L, 5L, 5L, 5L, 4L, 5L, 5L,
5L, 6L, 5L, 9L, 5L, 5L, 5L, 6L, 9L, 9L, 6L, 10L, 6L, 5L,
5L, 11L, 10L, 3L, 6L, 5L, 5L, 11L, 8L, 5L, 9L, 10L, 18L,
12L, 12L, 19L, 6L, 15L, 10L, 9L, 7L, 10L, 8L, 22L, 30L, 5L,
6L, 19L, 11L, 5L, 15L, 7L, 7L, 6L, 5L, 6L), hh_member_count = c(4L,
5L, 4L, 3L, 4L, 6L, 5L, 6L, 7L, 7L, 12L, 9L, 7L, 9L, 7L,
5L, 7L, 8L, 8L, 9L, 6L, 3L, 8L, 7L, 5L, 6L, 5L, 5L, 5L, 4L,
4L, 6L, 6L, 6L, 7L, 6L, 3L, 5L, 7L, 8L, 7L, 6L, 7L, 6L, 6L,
7L, 6L, 8L, 7L, 7L, 4L, 5L, 5L, 8L, 6L, 5L, 5L, 6L, 7L, 2L,
5L, 5L, 7L, 5L, 8L, 6L, 8L, 5L, 8L, 7L, 6L, 6L, 7L, 10L,
8L, 10L, 5L, 5L, 6L, 5L, 4L, 5L, 5L, 6L, 6L, 4L, 4L, 3L,
5L, 7L, 8L, 7L, 5L, 10L, 10L, 6L, 2L, 4L, 6L, 4L, 10L, 5L,
5L, 5L, 5L, 6L, 12L, 5L, 5L, 4L, 7L, 5L, 5L, 5L, 4L, 5L,
5L, 5L, 6L, 5L, 9L, 5L, 5L, 5L, 6L, 9L, 9L, 6L, 10L, 6L,
5L, 5L, 11L, 10L, 3L, 6L, 5L, 5L, 11L, 8L, 5L, 9L, 10L, 18L,
12L, 12L, 19L, 6L, 15L, 10L, 9L, 7L, 10L, 8L, 22L, 30L, 5L,
6L, 19L, 11L, 5L, 15L, 7L, 7L, 6L, 5L, 6L)), class = "data.frame", row.names = c(NA,
-167L))
and the script I want to rewrite
comp_df <- comp_df %>% mutate(`sleep space category` = ifelse(num_sleep_space == 1, "1", ifelse(num_sleep_space >=2
& num_sleep_space <=4 ,"2-4",ifelse(num_sleep_space >=5 & num_sleep_space <=9,
"5-9", ifelse(num_sleep_space >9, ">9", NA)))),
`sleep space category` = factor(`sleep space category` , levels=c("1","2-4","5-9",">9")),
`number of nets given` = ifelse(note_net_type.num_net_given == 1, "1",
ifelse(note_net_type.num_net_given >=2 & note_net_type.num_net_given <=4 ,"2-4",
ifelse(note_net_type.num_net_given >=5 & note_net_type.num_net_given <=9,"5-9",
ifelse(note_net_type.num_net_given >9, ">9", NA)))),
`number of nets given` = factor(`number of nets given`, levels = c("1","2-4","5-9",">9")),
`net surplus/gap` = num_sleep_space - note_net_type.num_net_given,
`number of household members` = ifelse(hh_member_count >= 1 & hh_member_count<= 5, "1-5",
ifelse(hh_member_count >=6 & hh_member_count <=10,"6-10",ifelse(hh_member_count >10, ">10", NA)))) %>%
mutate(`number of household members` = factor(`number of household members`,
levels = c("1-5","6-10",">10")))
I can see why you want to refactor your code!
You are trying to reinvent the cut function using ifelse statements and without taking advantage of the ability to seperate logic out into simple chunks using functions.
Your whole complex code can be replaced with this:
cut4 <- function(x) cut(x, c(0, 1.5, 4.5, 9.5, 20), c("1", "2-4", "5-9", ">9"))
cut3 <- function(x) cut(x, c(0, 5.5, 10.5, 50), c("1-5", "6-10", ">10"))
comp_df <- comp_df %>%
mutate(`sleep space category` = cut4(num_sleep_space),
`number of nets given` = cut4(note_net_type.num_net_given),
`net surplus/gap` = num_sleep_space - note_net_type.num_net_given,
`number of household members` = cut3(hh_member_count))
I have to draw a bar chart in R ggplot2 with multiple variables (i.e each bar for BMI, weight, cholesterol, Blood pressure etc) in each group ( i.e. different populations ex: Indian, Korean, Philipinos etc.) But the bars are overflowing to the next group in the axis. for example: the bars of the Indian group is overflowing to Korean group. The axis marks are not adjusted accordingly. I have attached the figure .. can someone please help. Following is my code. dput(data) is also given.
p = ggplot(data = t,
aes(x = factor(Population, levels = names(sort(table(Population), increasing = TRUE))),
y = Snp_Count,
group = factor(Trait, levels = c("BMI", "DBP", "HDL", "Height", "LDL", "TC", "TG", "WC", "Weight"),
ordered = TRUE)))
p = p + geom_bar(aes(fill = Trait),
position = position_dodge(preserve = "single"),
stat = "identity") +
scale_fill_manual(values = c("#28559A", "#3EB650", "#E56B1F", "#A51890", "#FCC133", "#663300", "#6666ff", "#ff3300", "#ff66ff")) +
coord_flip()
structure(list(Trait = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("BMI",
"DBP", "HDL", "HT", "LDL", "TC", "TG", "WC", "Weight"), class = "factor"),
Association = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Direct", class = "factor"), TraitClass = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Anthropometric",
"BP", "Lipid"), class = "factor"), Population = structure(c(2L,
3L, 4L, 5L, 7L, 8L, 10L, 11L, 12L, 13L, 22L, 24L, 3L, 5L,
11L, 22L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 18L, 20L, 28L, 5L, 7L, 13L, 14L, 1L, 3L, 5L, 7L,
9L, 11L, 12L, 16L, 18L, 20L, 22L, 5L, 6L, 7L, 10L, 12L, 18L,
20L, 3L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 14L, 15L, 18L, 19L,
20L, 21L, 22L, 23L, 26L, 28L, 3L, 4L, 5L, 8L, 12L, 22L, 24L,
3L, 5L, 7L, 8L, 17L, 25L, 27L), .Label = c("ACB", "AFR",
"ASW", "ASW/ACB", "CEU", "CHB", "EAS", "Filipino", "FIN",
"GBR", "Hispanic", "Hispanic/Latinos", "JPT", "Korean", "Kuwaiti",
"Micronesian", "Moroccan", "MXL", "Mylopotamos", "Orcadian",
"Pomak", "SAS", "Saudi_Arabian", "Seychellois", "Surinamese",
"Taiwanese", "Turkish", "YRI"), class = "factor"), Snp_Count = c(3L,
12L, 6L, 17L, 2L, 10L, 1L, 6L, 3L, 3L, 10L, 6L, 1L, 1L, 1L,
1L, 2L, 1L, 10L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 3L, 1L, 1L,
2L, 1L, 2L, 20L, 5L, 4L, 1L, 1L, 2L, 7L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 8L, 2L, 4L, 3L, 1L, 2L, 1L, 4L, 20L, 5L,
11L, 2L, 4L, 3L, 4L, 2L, 3L, 4L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 3L, 2L, 4L, 4L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L
), Gene_Count = c(3L, 9L, 7L, 9L, 2L, 8L, 1L, 7L, 3L, 2L,
8L, 7L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 9L, 6L, 5L, 1L, 1L, 2L, 5L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 6L, 2L, 3L, 3L, 1L, 2L, 1L, 3L,
10L, 4L, 7L, 1L, 3L, 3L, 4L, 1L, 3L, 5L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 2L,
2L, 2L)), class = "data.frame", row.names = c(NA, -86L))
The total width of each group in your barchart is 0.9 by default, which means that 90% of the area is covered. When you increase the width of the individual bars to 3 they will overlap with other groups, the maximum value for with should thus be 1 and then it will touch the other groups.
I'd suggest in your situation to use facet_wrap instead of a dodged barchart.
Note: geom_col is the same as geom_bar(stat = "identity).
my.df$Trait <- factor(my.df$Trait, levels = c("BMI", "DBP", "HDL", "HT", "LDL", "TC", "TG", "WC", "Weight"))
my.df$Population <- factor(my.df$Population, levels = names(sort(table(my.df$Population), increasing = TRUE)))
ggplot(my.df, aes(x = Trait, y = Snp_Count, fill = Trait)) +
geom_col(width = 1) +
scale_fill_manual(values = c("#28559A", "#3EB650", "#E56B1F", "#A51890", "#FCC133", "#663300", "#6666ff", "#ff3300", "#ff66ff")) +
# Split the data by Population, allow flexible scales and spacing for y axis (Trait)
facet_grid(Population ~ ., scales = "free_y", space = "free_y", switch = "y") +
coord_flip() +
theme(axis.text.y = element_blank(), # Remove Trait labels (indicated by color)
axis.ticks.y = element_blank(), # Remove tick marks
strip.background = element_blank(),
strip.text.y = element_text(angle = 180, hjust = 1), # Rotate Population labels
panel.spacing.y = unit(3, "pt")) # Spacing between groups
Data
my.df <-
structure(list(Trait = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L),
.Label = c("BMI", "DBP", "HDL", "HT", "LDL", "TC", "TG", "WC", "Weight"), class = "factor"),
Population = structure(c(2L, 3L, 4L, 5L, 7L, 8L, 10L, 11L,
12L, 13L, 22L, 24L, 3L, 5L, 11L, 22L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 20L, 28L, 5L,
7L, 13L, 14L, 1L, 3L, 5L, 7L, 9L, 11L, 12L, 16L, 18L, 20L,
22L, 5L, 6L, 7L, 10L, 12L, 18L, 20L, 3L, 5L, 6L, 7L, 8L,
11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L, 23L, 26L,
28L, 3L, 4L, 5L, 8L, 12L, 22L, 24L, 3L, 5L, 7L, 8L, 17L,
25L, 27L),
.Label = c("ACB", "AFR", "ASW", "ASW/ACB", "CEU",
"CHB", "EAS", "Filipino", "FIN", "GBR", "Hispanic", "Hispanic/Latinos",
"JPT", "Korean", "Kuwaiti", "Micronesian", "Moroccan", "MXL",
"Mylopotamos", "Orcadian", "Pomak", "SAS", "Saudi_Arabian",
"Seychellois", "Surinamese", "Taiwanese", "Turkish", "YRI"), class = "factor"),
Snp_Count = c(3L, 12L, 6L, 17L, 2L,
10L, 1L, 6L, 3L, 3L, 10L, 6L, 1L, 1L, 1L, 1L, 2L, 1L, 10L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 2L, 20L,
5L, 4L, 1L, 1L, 2L, 7L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 8L,
2L, 4L, 3L, 1L, 2L, 1L, 4L, 20L, 5L, 11L, 2L, 4L, 3L, 4L,
2L, 3L, 4L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 3L, 2L, 4L, 4L, 1L,
4L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L)),
class = "data.frame", row.names = c(NA, -86L))
I want to use weka to predict future instances. I have a csv file and a small portion of it is as following:
I used r to read the file but I am not sure how to substitute the zeros with "No error" and anything besides zero to "Error". I would have left like this but unfortunately weka is not able to predict instances with numbers as the status.
Edit 2: I tried your solution and even though it changed the numbbers to error/no error, it erased the other columns. Did I do something wrong? I also wrote it to a file so it would be easier to see.
Edit 3:
dput(data)
structure(list(BoxType = structure(c(3L, 3L, 6L, 6L, 3L, 8L,
3L, 3L, 6L, 4L, 4L, 3L, 3L, 4L, 6L, 6L, 3L, 6L, 2L, 4L, 3L, 3L,
8L, 3L, 6L, 8L, 2L, 3L, 8L, 8L, 3L, 8L, 2L, 2L, 8L, 8L, 2L, 3L,
8L, 3L, 4L, 3L, 3L, 3L, 2L, 2L, 6L, 4L, 3L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 4L, 2L, 6L, 6L, 4L, 4L, 4L, 6L, 3L, 4L, 6L, 3L,
3L, 2L, 2L, 6L, 3L, 3L, 3L, 2L, 6L, 8L, 3L, 8L, 3L, 4L, 3L, 8L,
6L, 2L, 6L, 6L, 3L, 3L, 4L, 3L, 4L, 4L, 2L, 4L, 2L, 3L, 2L, 6L,
3L, 3L, 4L, 3L, 3L, 6L, 3L, 6L, 3L, 3L, 4L, 6L, 4L, 4L, 3L, 4L,
4L, 2L, 6L, 2L, 6L, 6L, 3L, 3L, 4L, 3L, 4L, 6L, 3L, 4L, 6L, 6L,
4L, 4L, 3L, 6L, 4L, 4L, 3L, 4L, 6L, 3L, 6L, 2L, 3L, 2L, 2L, 6L,
4L, 4L, 3L, 6L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 6L, 6L,
6L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 6L, 3L, 4L, 3L, 3L, 3L, 3L, 8L,
6L, 3L, 6L, 2L, 8L, 2L, 3L, 3L, 6L, 3L, 2L, 2L, 3L, 4L, 6L, 2L,
6L, 3L, 3L, 4L, 6L, 3L, 4L, 4L, 4L, 2L, 4L, 3L, 6L, 3L, 3L, 3L,
4L, 3L, 3L, 2L, 4L, 3L, 3L, 3L, 2L, 3L, 4L, 6L, 3L, 3L, 3L, 2L,
3L, 6L, 3L, 3L, 3L, 3L, 3L, 6L, 8L, 4L, 3L, 3L, 2L, 7L, 8L, 6L,
6L, 4L, 6L, 8L, 3L, 2L, 4L, 4L, 6L, 3L, 2L, 6L, 8L, 4L, 6L, 4L,
4L, 4L, 3L, 3L, 3L, 6L, 4L, 4L, 3L, 3L, 3L, 6L, 6L, 3L, 6L, 6L,
6L, 4L, 4L, 3L, 2L, 4L, 3L, 6L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 3L,
4L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 6L, 3L, 6L, 6L, 4L, 4L, 2L, 3L,
4L, 4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 2L, 3L, 4L, 4L, 3L, 4L, 4L,
4L, 3L, 6L, 3L, 6L, 3L, 6L, 3L, 6L, 6L, 3L, 3L, 3L, 3L, 3L, 6L,
6L, 3L, 6L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 6L, 3L,
3L, 3L, 2L, 4L, 4L, 3L, 6L, 3L, 2L, 7L, 3L, 3L, 3L, 2L, 3L, 2L,
3L, 4L, 2L, 2L, 3L, 4L, 3L, 3L, 4L, 1L, 6L, 3L, 2L, 3L, 3L, 7L,
4L, 4L, 3L, 2L, 4L, 2L, 4L, 3L, 3L, 3L, 3L, 6L, 2L, 4L, 2L, 4L,
4L, 4L, 3L, 3L, 3L, 6L, 6L, 3L, 7L, 6L, 3L, 3L, 3L, 4L, 4L, 3L,
6L, 4L, 3L, 7L, 4L, 6L, 6L, 2L, 2L, 4L, 3L, 4L, 4L, 2L, 4L, 4L,
7L, 3L, 4L, 6L, 4L, 6L, 3L, 2L, 3L, 3L, 4L, 4L, 2L, 4L, 3L, 4L,
3L, 3L, 4L, 6L, 2L, 2L, 6L, 6L, 6L, 2L, 3L, 4L, 4L, 3L, 8L, 6L,
4L, 4L, 3L, 3L, 5L, 6L, 2L, 3L, 4L, 8L, 6L, 8L, 4L, 4L, 7L, 4L,
6L, 8L, 4L, 2L, 6L, 6L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 2L,
6L, 8L, 4L, 3L, 1L, 6L, 6L, 1L, 1L, 1L, 4L, 4L, 8L, 3L, 3L, 2L,
2L, 4L, 8L, 6L, 4L, 8L, 3L, 3L, 3L, 5L, 4L, 1L, 2L, 2L, 3L, 4L,
2L, 5L, 4L, 8L, 3L, 8L, 2L, 3L, 4L, 8L, 3L, 6L, 3L, 6L, 6L, 3L,
3L, 8L, 8L, 3L, 6L, 3L, 3L, 2L, 5L, 3L, 6L, 3L, 2L, 3L, 3L, 3L,
4L, 3L, 4L, 3L, 4L, 3L, 2L, 2L, 3L, 6L, 4L, 6L, 3L, 3L, 6L, 3L,
4L, 3L, 2L, 3L, 4L, 4L, 4L, 6L, 6L, 3L, 6L, 4L, 7L, 8L, 6L, 8L,
8L, 4L, 6L, 4L, 4L, 3L, 4L, 2L, 3L, 2L, 4L, 6L, 4L, 6L, 4L, 6L,
4L, 6L, 3L, 4L, 3L, 6L, 4L, 4L, 8L, 4L, 8L, 3L, 3L, 6L, 6L, 3L,
4L, 3L, 3L, 3L, 3L, 6L, 3L, 3L, 3L, 4L, 2L, 4L, 3L, 3L, 6L, 6L,
4L, 3L, 2L, 3L, 6L, 4L, 3L, 3L, 2L, 3L, 2L, 6L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 3L, 3L, 6L, 6L, 2L, 3L, 6L, 3L, 2L, 3L, 6L,
4L, 3L, 3L, 3L, 6L, 6L, 4L, 3L, 8L, 8L, 4L, 3L, 2L, 2L, 3L, 2L,
3L, 8L, 2L, 3L, 6L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 8L,
8L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 6L, 2L, 3L, 6L, 6L, 8L, 3L,
4L, 3L, 3L, 6L, 6L, 3L, 3L, 3L, 2L, 6L, 2L, 3L, 6L, 8L, 3L, 4L,
4L, 6L, 4L, 8L, 4L, 4L, 2L, 6L, 8L, 6L, 4L, 8L, 3L, 8L, 1L, 8L,
2L, 2L, 2L, 2L, 3L, 3L, 6L, 3L, 3L, 6L, 3L, 3L, 3L, 2L, 3L, 3L,
3L, 2L, 4L, 3L, 4L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L,
3L, 6L, 3L, 3L, 6L, 3L, 2L, 3L, 3L, 3L, 4L, 3L, 3L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 6L, 4L, 3L, 3L, 6L, 3L, 6L, 6L, 4L, 6L, 4L,
6L, 4L, 4L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L,
2L, 2L, 8L, 4L, 4L, 6L, 4L, 8L, 6L, 4L, 3L, 4L, 3L, 4L, 6L, 4L,
6L, 6L, 6L, 4L, 6L, 6L, 4L, 4L, 4L, 2L, 6L, 4L, 2L, 4L, 4L, 3L,
4L, 6L, 6L, 6L, 3L, 4L, 6L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
8L, 4L, 4L, 6L, 2L, 8L, 8L, 4L, 6L, 3L, 4L, 8L, 8L, 5L, 3L, 2L,
4L, 3L, 4L, 6L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 4L, 3L,
6L, 4L, 6L, 6L, 6L, 2L, 3L, 6L, 6L, 3L, 4L, 3L, 2L, 8L, 4L, 8L,
8L, 3L, 3L, 4L, 6L, 6L, 4L, 6L, 6L, 3L, 4L, 4L, 4L, 3L, 7L, 4L,
6L), .Label = c("", "IPH8005", "ISB7005", "VIP1200", "VIP1216",
"VIP1232", "VIP2262NA", "VIP2502W"), class = "factor"), BoxVendor = structure(c(2L,
2L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L,
3L, 4L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 4L, 2L, 3L, 3L, 2L, 3L, 4L,
4L, 3L, 3L, 4L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 2L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 3L, 3L, 2L, 2L, 4L, 4L, 3L, 2L, 2L, 2L, 4L, 3L, 3L, 2L,
3L, 2L, 3L, 2L, 3L, 3L, 4L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 4L,
3L, 4L, 2L, 4L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L,
3L, 3L, 3L, 2L, 3L, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 2L, 3L, 2L, 3L,
3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L,
4L, 2L, 4L, 4L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 4L, 3L, 3L, 3L, 2L, 4L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 4L, 3L, 4L, 2L, 2L, 3L, 2L, 4L,
4L, 2L, 3L, 3L, 4L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 4L, 3L,
2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 4L, 3L, 2L, 2L, 2L, 4L, 2L, 3L,
3L, 2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L,
2L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 3L, 3L, 3L, 2L, 4L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L,
3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 3L, 2L, 3L, 2L, 3L, 3L,
3L, 3L, 4L, 4L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L,
3L, 3L, 3L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 2L,
3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 4L,
2L, 2L, 2L, 3L, 2L, 2L, 2L, 4L, 3L, 3L, 2L, 3L, 2L, 4L, 3L, 2L,
2L, 2L, 4L, 2L, 4L, 2L, 3L, 4L, 4L, 2L, 3L, 2L, 2L, 3L, 1L, 3L,
2L, 4L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 3L, 4L, 3L, 2L, 2L, 2L, 2L,
3L, 4L, 3L, 4L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 3L, 2L,
3L, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 4L, 2L, 2L, 3L,
3L, 4L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L, 4L, 2L,
3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 4L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 4L, 2L, 2L, 4L, 3L, 3L, 3L, 2L, 1L, 3L, 3L, 1L, 1L, 1L, 3L,
3L, 3L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L,
1L, 4L, 4L, 2L, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 4L, 2L, 3L, 3L, 2L,
3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 4L, 3L, 2L, 3L,
2L, 4L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 4L, 4L, 2L, 3L, 3L,
3L, 2L, 2L, 3L, 2L, 3L, 2L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 4L, 2L, 4L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 4L,
3L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 2L, 3L, 3L, 2L, 2L, 4L, 2L, 4L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 3L, 3L, 4L, 2L,
3L, 2L, 4L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L,
2L, 4L, 4L, 2L, 4L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 3L, 3L, 4L, 2L, 2L, 4L, 4L, 2L, 4L, 4L, 3L, 4L,
2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 4L, 3L, 4L,
2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L,
3L, 2L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 3L, 2L, 2L, 3L, 2L,
2L, 2L, 4L, 2L, 2L, 2L, 4L, 3L, 2L, 3L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 2L, 4L, 4L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 4L, 2L, 2L, 2L, 3L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L,
3L, 4L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 2L, 3L,
3L, 3L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L,
2L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 2L, 3L,
2L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L,
3L, 3L, 2L, 3L, 3L, 3L), .Label = c("", "CISCO", "MOTOROLA",
"PACE"), class = "factor"), Receiver_TotalVideoDecoderErrors = c(3L,
204L, 0L, 0L, 3393L, 909L, 1556L, 48L, 0L, 0L, 0L, 182L, 19L,
0L, 0L, 0L, 77L, 0L, 0L, 0L, 6L, 1002L, 10L, 0L, 0L, 6938L, 0L,
299L, 49L, 245L, 0L, 41L, 0L, 0L, 717L, 31L, 0L, 75L, 37L, 71L,
0L, 40L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1230L, 1230L, 1230L, 1230L, 1230L, 1230L, 1230L, 1230L,
1230L, 1230L, 1230L, 1230L, 1230L, 1230L, 1230L, 1230L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 22L, 0L, 0L, 1384L, 95L, 0L, 0L,
0L, 437L, 119L, 910L, 0L, 0L, 8679L, 20L, 68L, 7L, 0L, 0L, 16L,
0L, 0L, 0L, 0L, 74L, 1L, 0L, 82L, 0L, 0L, 0L, 0L, 0L, 21L, 0L,
0L, 279L, 40L, 0L, 1483L, 3L, 0L, 132L, 0L, 0L, 171L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 90L, 0L, 0L, 0L, 0L,
0L, 111L, 0L, 0L, 0L, 0L, 0L, 18L, 0L, 0L, 0L, 217L, 0L, 0L,
1687L, 0L, 0L, 25L, 0L, 0L, 0L, 0L, 0L, 60L, 0L, 0L, 7L, 0L,
0L, 0L, 0L, 1L, 20L, 0L, 0L, 0L, 0L, 0L, 230L, 0L, 169L, 0L,
0L, 0L, 889L, 0L, 3L, 0L, 48L, 2951L, 10L, 531L, 0L, 0L, 0L,
0L, 0L, 232L, 0L, 0L, 125L, 0L, 39L, 0L, 0L, 262L, 0L, 0L, 0L,
0L, 1270L, 6L, 0L, 0L, 88L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 297L,
124L, 419L, 0L, 483L, 280L, 0L, 0L, 127L, 93L, 368L, 0L, 209571L,
0L, 0L, 21L, 62L, 11L, 0L, 501L, 0L, 169L, 34L, 32L, 25L, 188L,
0L, 1596L, 0L, 41L, 183L, 0L, 805L, 3L, 0L, 0L, 0L, 0L, 297L,
90L, 0L, 0L, 0L, 0L, 691L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 23L,
52L, 0L, 0L, 0L, 0L, 58L, 18L, 93L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 9L, 0L, 0L, 11381L, 0L, 34L, 0L, 0L, 26L, 0L, 0L, 0L, 318L,
0L, 0L, 36L, 0L, 6534L, 22L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 18L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 35L, 0L, 0L,
30L, 0L, 0L, 0L, 51L, 0L, 7L, 0L, 84L, 0L, 9L, 0L, 0L, 48L, 65L,
23L, 0L, 60312L, 0L, 0L, 28L, 0L, 32L, 0L, 0L, 283L, 406L, 44L,
0L, 0L, 0L, 2L, 824L, 0L, 0L, 2487L, 95L, 0L, 0L, 0L, 0L, 0L,
56L, 0L, 1L, 4640L, 12L, 3626L, 0L, 0L, 0L, 420L, 0L, 0L, 0L,
49L, 0L, 78L, 8L, 0L, 0L, 0L, 380L, 0L, 0L, 7L, 1194L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 29L, 489L, 584L, 47L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 899L, 120L, 0L, 0L, 0L, 26L, 656L, 0L, 0L, 0L, 50L, 0L,
0L, 0L, 0L, 0L, 6L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 89L, 0L, 0L,
0L, 0L, 0L, 104L, 0L, 0L, 0L, 0L, 0L, 217L, 0L, 50L, 14L, 0L,
0L, 0L, 0L, 21L, 0L, 73L, 403L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
14L, 0L, 0L, 2769L, 5878L, 0L, 0L, 0L, 37L, 0L, 0L, 0L, 0L, 0L,
0L, 258L, 0L, 5560L, 0L, 0L, 722L, 0L, 0L, 707L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 95L, 0L, 0L, 13L, 0L, 37L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 150L, 65L, 0L, 0L, 0L, 78L,
0L, 0L, 0L, 4L, 10L, 25L, 0L, 0L, 0L, 0L, 0L, 13L, 0L, 0L, 0L,
0L, 0L, 47L, 103L, 0L, 9L, 0L, 116L, 27L, 0L, 7L, 0L, 0L, 6L,
3L, 831L, 1396L, 545L, 0L, 226L, 79L, 0L, 0L, 101L, 0L, 3370L,
0L, 36L, 98L, 25L, 0L, 290L, 0L, 3L, 0L, 193L, 0L, 0L, 10L, 0L,
0L, 0L, 188L, 4L, 0L, 3L, 0L, 0L, 0L, 155L, 0L, 0L, 0L, 0L, 0L,
58L, 0L, 0L, 56L, 0L, 0L, 209L, 60L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 458L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 40L, 0L, 0L, 4L, 1L, 0L, 0L, 5L, 0L, 548L, 123L, 387L,
723L, 0L, 522L, 384L, 0L, 0L, 0L, 0L, 472L, 1L, 0L, 0L, 0L, 24L,
0L, 91L, 0L, 0L, 11L, 47L, 0L, 11L, 0L, 0L, 27L, 17L, 156L, 0L,
29L, 5L, 0L, 0L, 0L, 15L, 79L, 0L, 0L, 0L, 137L, 0L, 75L, 0L,
0L, 0L, 0L, 0L, 0L, 670L, 0L, 0L, 0L, 17L, 23L, 1L, 0L, 160L,
0L, 0L, 235L, 0L, 272L, 16L, 0L, 1803L, 0L, 4780L, 26L, 0L, 90L,
17L, 0L, 1518L, 151L, 1L, 768L, 151L, 42L, 0L, 9161L, 57L, 0L,
0L, 73L, 0L, 0L, 0L, 0L, 3086L, 0L, 0L, 0L, 1896L, 0L, 240L,
165L, 0L, 0L, 722L, 282L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 4L, 0L,
0L, 0L, 0L, 212L, 0L, 0L, 0L, 0L, 285L, 0L, 0L, 9L, 1349L, 1417L,
30792L, 2163L, 0L, 0L, 0L, 0L, 969L, 0L, 0L, 0L, 8L, 0L, 100L,
109L, 48L, 0L, 100L, 109L, 48L, 0L, 0L, 61L, 0L, 0L, 0L, 86L,
967L, 2679L, 86L, 967L, 2679L, 0L, 0L, 0L, 32L, 0L, 0L, 32L,
0L, 4L, 0L, 0L, 54L, 341L, 0L, 54L, 341L, 0L, 0L, 0L, 0L, 401L,
83L, 83L, 6L, 0L, 0L, 46L, 442L, 0L, 46L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 245L, 0L, 245L, 599L, 78L, 0L, 58L, 54L,
0L, 54L, 0L, 0L, 16L, 0L, 0L, 0L, 0L, 30L, 0L, 0L, 305L, 0L,
305L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 27L, 0L, 0L, 0L, 0L, 536L, 0L, 0L, 0L, 10L, 66L,
20L, 66L, 899L, 4L, 0L, 0L, 233L, 0L, 0L, 0L, 0L, 233L, 3L, 0L,
0L, 8L, 0L, 0L, 0L, 0L, 13L, 0L, 0L, 428L, 0L, 0L, 0L, 428L,
0L, 172L, 0L, 0L, 0L, 12L, 13L, 13L, 0L, 35L, 0L, 0L, 0L, 0L,
0L, 0L, 49L, 0L, 0L, 0L, 0L, 639L, 0L, 78L, 0L, 4386L, 78L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2076L, 6L, 0L, 0L
)), .Names = c("BoxType", "BoxVendor", "Receiver_TotalVideoDecoderErrors"
), class = "data.frame", row.names = c(NA, -999L))
The ifelse way
Assuming your CSV has been loaded into a data frame called boxdata with the same column names as in the CSV:
ifelse(boxdata$Receiver_TotalVideoDecoderErrors, 'Error', 'No error')
Explanation
The best way to demonstrate ifelse is by example:
x <- 1:5
x_lessthan_4 <- x < 4
x_lessthan_4
# [1] TRUE TRUE TRUE FALSE FALSE
if_lessthan_4 <- -x
if_lessthan_4
# [1] -1 -2 -3 -4 -5
if_notlessthan_4 <- x + 100
if_notlessthan_4
# [1] 101 102 103 104 105
ifelse(test = if_notlessthan_4,
yes = if_lessthan_4,
no = if_notlessthan_4)
# [1] -1 -2 -3 104 105
Hopefully it's clear what this function does. Obviously you don't need to always name the arguments as long as they're in the right order; I'm just doing it here so you can see exactly what's going on.
However, you'll notice that the expression
ifelse(boxdata$Receiver_TotalVideoDecoderErrors, 'Error', 'No error')
does not conform to this standard. It works because two things happen "under the hood":
test is "coerced" to logical, so if I pass in something like test = c(1, 3, 0) the value of test will be replaced with as.logical(test), so test = c(1, 3, 0) becomes test = c(TRUE, TRUE, FALSE).
yes and no are "recycled" if they are shorter than test, and truncated if they are longer.
Recycling is again best demonstrated by example:
test <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
yes <- c(1, 2, 3)
no <- c(99, 100, 101, 102, 103, 104)
c(length(test), length(yes), length(no))
# [1] 5 3 6
ifelse(test, yes, no)
# [1] 1 100 3 1 103
These things are documented, but they're easy to miss if you're not used to reading the R help files.
And finally, the help file also says this, which is worth pointing out:
Missing values in test give missing values in the result.
This means that ifelse(c(NA, 1, 0, 1), 99, 100) returns c(NA, 99, 100, 99).
So
ifelse(boxdata$Receiver_TotalVideoDecoderErrors, 'Error', 'No error')
is equivalent to
test <- as.logical(boxdata$Receiver_TotalVideoDecoderErrors)
yes <- rep('Error', length(test))
no <- rep('No error', length(test))
ifelse(test, yes, no)
The slick way
Or let argument recycling to do the work for you. Shorter and more efficient, but maybe less readable to someone who isn't familiar with R:
c('No error', 'Error')[as.logical(boxdata$Receiver_TotalVideoDecoderErrors) + 1]
or
c('Error', 'No error')[!as.logical(boxdata$Receiver_TotalVideoDecoderErrors) + 1]
Explanation
First, the statement
as.logical(boxdata$Receiver_TotalVideoDecoderErrors) + 1
is equivalent to:
i <- as.logical(boxdata$Receiver_TotalVideoDecoderErrors)
i <- as.numeric(i) # FALSE -> 0, TRUE -> 1
i <- i + rep(1, length(i))
Finally, recycling is applied to subsetting as well:
c('No error', 'Error')[c(1, 2, 1, 1, 2)]
# [1] "No error" "Error" "No error" "No error" "Error"
So the entire thing is equivalent to:
i <- as.logical(boxdata$Receiver_TotalVideoDecoderErrors)
i <- as.numeric(i)
i <- i + rep(1, length(i))
c('No error', 'Error')[i]
The long way (suggested here)
This way doesn't need much explanation. It's a lot more typing but it's easy to read and it's flexible:
x <- boxdata$Receiver_TotalVideoDecoderErrors
x[x > 0] <- 'Error'
x[x == 0] <- 'No error'
Try using the which command and some logical operators to quickly determine the index locations for the values that are equal to 0 and those that are greater than 0. From there you can replace the original numeric values with a string. Try something like this:
data$Receiver_TotalVideoDecoderErrors[which(data$Receiver_TotalVideoDecoderErrors == 0)] = "no error"
data$Receiver_TotalVideoDecoderErrors[which(data$Receiver_TotalVideoDecoderErrors > 0)] = "error"
Hope that helps.