Issues spreading data after removing outliers

Issues spreading data after removing outliers - r

I put my data into long format, in order to remove outliers (I grouped by grade and condition, and then removed by 1.5 * IQR), however, I'm having issues getting it back into wide format. The final two columns (condition and variable (36, 37) are what I want to spread the data by (so that BOXED_Conjunction_12 becomes a variable). Variables 1:35 should remain as they are. (There will be NAs introduced given that outliers were removed; however, outliers were only removed per condition and not completely). I think I'm having issues because of removing the outliers, but I would think that fill = NA would solve this issue. Can't figure it out.
I've tried
dat%>%spread(condition,pid.avg_rw, fill = NA)
I've also tried using reshape2:
dat%>%dcast((1:35)~ condition, value.var = "pid.avg_rw")
and I get the error
number of rows of result is not a multiple of vector length (arg 1)Aggregation function missing: defaulting to length
Here's a dput of the first ten lines.
Thanks much,
James
structure(list(pid = c("ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002",
"ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002",
"ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002"
), timepoint = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1), District.ID = c(175420L,
175420L, 175420L, 175420L, 175420L, 175420L, 175420L, 175420L,
175420L, 175420L), School = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Bowers", "Bracher", "Cabrillo",
"Central Park", "Laurelwood", "Millikin", "Peterson"), class = "factor"),
Ethnicity = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L), .Label = c("American Indian or Alaskan Native", "Asian",
"Black or African American", "Blank on Purpose", "Filipino",
"Hispanic or Latino", "Pacific Islander", "Two or More Races",
"White"), class = "factor"), Age.2018 = c(10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L), Sex = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
Language.Fluency = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("English Learner", "English Only",
"IFEP-Initially Fluent", "RFEP-Redesignated"), class = "factor"),
Parent.Ed.Lvl = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("College Graduate", "Declined to state/Unknown",
"Grad School/post grad trng", "High School Graduate", "Not HS Graduate",
"Some College"), class = "factor"), SpEd = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"
), class = "factor"), SpEd.Dis = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "Autism (AUT)",
"Emotional Disturbance (ED)", "Hard of Hearing (HH)", "Intellectual Disability (ID)",
"Other Health Impairment (OHI)", "Specific Learning Disability (SLD)",
"Speech or Language Impairment (SLI)", "Visual Impairment (VI)"
), class = "factor"), Low.Income = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
grade = c("3", "3", "4", "3", "3", "4", "3", "3", "4", "3"
), gender = c("F", "F", "2", "F", "F", "2", "F", "F", "2",
"F"), Teacher = c("Keith, Susan", "Keith, Susan", "Lourdes Martin",
"Keith, Susan", "Keith, Susan", "Lourdes Martin", "Keith, Susan",
"Keith, Susan", "Lourdes Martin", "Keith, Susan"), time = structure(c(17113,
17263, 17417, 17113, 17263, 17417, 17113, 17263, 17417, 17113
), class = "Date"), ela.score = c(2424, 2424, NA, 2424, 2424,
NA, 2424, 2424, NA, 2424), School.Year = c("2017", "2017",
"2018", "2017", "2017", "2018", "2017", "2017", "2018", "2017"
), math.score = c(2440, 2440, NA, 2440, 2440, NA, 2440, 2440,
NA, 2440), basc = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), cohort = c("3", "3", "3", "3", "3", "3", "3", "3", "3",
"3"), attendance = c(96.1, 96.1, 100, 96.1, 96.1, 100, 96.1,
96.1, 100, 96.1), tme4 = structure(c(NA, 17655, 17655, NA,
17655, 17655, NA, 17655, 17655, NA), class = "Date"), t4.minus = c(6.39692965521615,
5.97126183979046, 5.47227067367148, 6.39692965521615, 5.97126183979046,
5.47227067367148, 6.39692965521615, 5.97126183979046, 5.47227067367148,
6.39692965521615), median_grade = c(1536.4, 1536.4, 1372.4,
1192, 1192, 1054, 986.6, 986.6, 871.6, 958.4), mad_grade = c(377.17344,
377.17344, 278.13576, 167.5338, 167.5338, 161.89992, 139.66092,
139.66092, 116.23584, 143.21916), lowerq = c(1323.7, 1323.7,
1226.2, 1102.2, 1102.2, 960.6, 902.9, 902.9, 804, 873.5),
upperq = c(1964.8, 1964.8, 1655.6, 1329.3, 1329.3, 1181.6,
1091.9, 1091.9, 964.2, 1074.1), iqr = c(641.1, 641.1, 429.4,
227.1, 227.1, 221, 189, 189, 160.2, 200.6), grade.threshold.upper = c(3888.1,
3888.1, 2943.8, 2010.6, 2010.6, 1844.6, 1658.9, 1658.9, 1444.8,
1675.9), grade.threshold.lower = c(-599.6, -599.6, -61.9999999999995,
420.9, 420.9, 297.6, 335.9, 335.9, 323.4, 271.7), mad = c(377.17344,
377.17344, 278.13576, 167.5338, 167.5338, 161.89992, 139.66092,
139.66092, 116.23584, 143.21916), z_rw = c(0.350390238376874,
0.0417183791440274, 0.171148318277673, -0.108910138097997,
-0.497500239197831, -0.365723152941879, 0.512731829784946,
-0.588322005081869, -0.0970981769116109, -0.290844134905211
), condition = c("BOXED_Conjunction_12", "BOXED_Conjunction_12",
"BOXED_Conjunction_12", "BOXED_Conjunction_4", "BOXED_Conjunction_4",
"BOXED_Conjunction_4", "BOXED_Feature_12", "BOXED_Feature_12",
"BOXED_Feature_12", "BOXED_Feature_4"), pid.avg_rw = c(2140,
1845.6, 1884.4, 1242.8, 1088.4, 973.6, 1160.4, 887.6, 910.8,
929.2), avg_rw_grade = c(1805.81052631579, 1805.81052631579,
1686.41503416856, 1286.07368421053, 1286.07368421053, 1148.48656036446,
1033.36421052632, 1033.36421052632, 982.933485193622, 1001.18526315789
), sd_grade = c(953.763652869694, 953.763652869694, 1156.80345459324,
397.333847576144, 397.333847576144, 478.193844053012, 247.762635541793,
247.762635541793, 742.892271389251, 247.504606484003)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -10L))

Related

Percentages in the wrong position in ggplot2

I'm trying to plot a graph for a likert test using ggplot2 and I would like to have the percentages values appearing on the graph. I've created a df with all the averages and percentages so I could write it on the graph. It all seems to be working good, except the values are being plotted as if they were upsided or something.
This is the code I'm using
example <- structure(list(grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("EJA",
"REG"), class = "factor"), nivel = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("CINCO", "DOZE", "NOVE"), class = "factor"), tipo = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L), .Label = c("COR", "PAD", "RES"), class = "factor"),
likert = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L), .Label = c("0",
"1", "2", "3"), class = c("ordered", "factor")), cnt = c(3L,
1L, 3L, 5L, 3L, 1L, 3L, 6L, 2L, 1L, 10L, 5L, 5L, 9L, 11L,
6L, 4L, 10L, 10L, 10L), freq = c(0.25, 0.083, 0.25, 0.417,
0.231, 0.077, 0.231, 0.462, 0.154, 0.077, 0.769, 0.167, 0.167,
0.3, 0.367, 0.2, 0.133, 0.333, 0.333, 0.333), prop = c(25,
8.3, 25, 41.7, 23.1, 7.7, 23.1, 46.2, 15.4, 7.7, 76.9, 16.7,
16.7, 30, 36.7, 20, 13.3, 33.3, 33.3, 33.3), proptext = c("25",
"8.3", "25", "41.7", "23.1", "7.7", "23.1", "46.2", "15.4",
"7.7", "76.9", "16.7", "16.7", "30", "36.7", "20", "13.3",
"33.3", "33.3", "33.3")), row.names = c(NA, -20L), groups = structure(list(
grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("EJA",
"REG"), class = "factor"), nivel = structure(c(1L, 1L, 1L,
2L, 2L, 2L), .Label = c("CINCO", "DOZE", "NOVE"), class = "factor"),
tipo = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("COR",
"PAD", "RES"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:11, 12:15, 16:19, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
ggplot(example, aes(x=(interaction(grupo, nivel, tipo)),y=prop, fill=likert))+
geom_col()+
#scale_y_continuous(labels = percent)+
coord_flip() +
ggtitle("Testing")+
xlab("A, B, and C")+
ylab("%")+
geom_text(aes(label = proptext), size = 2, colour = "black")
Would someone have an idea of how could I solve it?

The geom_text may also require the x, y
library(dplyr)
library(tidyr)
library(ggplot2)
example %>%
unite(new, grupo, nivel, tipo, sep = ".") %>%
ggplot(aes(x=new, fill=likert))+
geom_col(aes(y= prop))+
geom_text(aes(x = new, y = prop, label = proptext),
position = position_stack(vjust = .5)) +
coord_flip() +
#scale_y_continuous(labels = percent)+
ggtitle("Testing")+
xlab("A, B, and C")+
ylab("%")
-output

Data set structure and NA values

I've successfully rearranged the dataset into the format I want (see code annotation). However:
(a) I feel that there's a cleaner more efficient way to construct the database, maybe in the Tidyverse? My solution reads as a bit hacked together, and it takes a lot of code to implement. I'd really like to find an elegant and efficient way to do this, but need help.
(b) I am having trouble with the NA values. No matter what I do, R reads them as characters. I don't know if this is crucial for analysis as when I pass the as.numeric() function, it coerces these to NA values anyways. However, I'd like to understand what I'm doing wrong, and how to 'do it right' going forward.
I've provided the code I'm using below, annotated, and dput() 5 lines from my (very large) data set are at the end of the code block to help re-create. Any help/feedback would be much appreciated. Thank you.
library(tidyverse)
# Load data set. This is how I'm loading the data. While the dput() output is at the
# bottom of the code block, I've included this so people can see my steps for input, and
# how I'm inputting NA's.
pheno_sep_imp <- read.table(file="~/pheno_sep_imp.txt",
row.names = 1, header = TRUE,
na.strings = c(NA, "NA", " NA"), sep ="\t")
pheno_sep_imp <- data.frame(pheno_sep_imp,stringsAsFactors = T)
pheno_sep_imp <- mutate_if(pheno_sep_imp, is.integer, as.factor)
pheno_sep_imp <- mutate_if(pheno_sep_imp, is.character, as.factor)
# Remove anterior teeth
pheno_sep_imp <- pheno_sep_imp[c(1:6,22:46,62:86)]
# Re-code R3En to 3 for analysis
pheno_sep_imp[pheno_sep_imp == "R3En"] <- 3
# Re-code CON to NA for analysis. This is the point where NA's become a problem as they're # converting to 'character'.
pheno_sep_imp[pheno_sep_imp == "CON"] <- NA
# Create an empty data frame for transformed data set
dta <- data.frame(matrix(vector(), 0, 8,
dimnames = list(c(),
c("Ind", "Geo",
"E1", "E2", "E3", "E4", "E5",
"Tooth"))),
stringsAsFactors=FALSE)
# Select names of columns from original data set for teeth/elements
nms <- names(pheno_sep_imp)
str_nms <- strsplit(nms, "_")
tooth_names <- NULL
for(i in 1:length(str_nms)){
if(i>1){
tooth_names <- c(tooth_names, paste0(str_nms[[i]][1]))
}
}
# variable locations in pheno_sep_imp, put into dta
root_num <- seq(7, 52, by=5)
count = 0
for(i in 1:dim(pheno_sep_imp)[1]){
tmp <- pheno_sep_imp[i,]
ind <- paste0("ind_",i)
for(k in root_num){
count <- count + 1
dta[count,] <- c(ind,toString(tmp[2][1,1]),
toString(tmp[k][[1]]),
toString(tmp[k+1][[1]]),
toString((tmp[k+2][[1]])),
toString((tmp[k+3][[1]])),
toString((tmp[k+4][[1]])),
tooth_names[k-1])
}
}
# check structure of data set
str(dta)
# check to see if NA's are NA's or 'characters'
class(dta$E1[1])
dta$E1 <- as.numeric(dta$E1) # Warning message: NAs introduced by coercion
dta$E2 <- as.numeric(dta$E2) # Warning message: NAs introduced by coercion
dta$E3 <- as.factor(dta$E3) # works as it should
dta$E4 <- as.factor(dta$E4) # works as it should
dta$E5 <- as.factor(dta$E5) # works as it should
dta$Geo <- as.factor(dta$Geo) # works as it should
dta$Ind <- as.factor(dta$Ind) # works as it should
dta <- dta[complete.cases(dta),]
View(dta) # Data is in the format I want/need for my analysis.
# dput() of 5 lines from my data set:
structure(list(Sex = structure(c(2L, 2L, 2L, 1L, NA, 1L), .Label = c("Female",
"Male"), class = "factor"), G1_Major_Human_Subdivisions = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Sahul_Pacific", "Sino_Americas",
"Sub_Saharan_Africa", "Sunda_Pacific", "West_Eurasia"), class = "factor"),
G2_Continental_Group = structure(c(4L, 4L, 4L, 4L, 4L, 4L
), .Label = c("Central_America", "Europe", "North_Africa",
"North_America", "Oceania", "South_America", "South_Asia",
"South_East_Asia", "Sub_Saharan_Africa"), class = "factor"),
G3_Continental_Region = structure(c(1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("American_Arctic", "Andaman_Archipelago", "Andean",
"Australia", "Carribbean", "Central_Africa", "Central_America",
"Central_Europe", "Eastern_Africa", "Eastern_Europe", "Indian_Sub_Continent",
"Indochinese_Peninsula", "Malay_Archipelago", "Melanesia",
"NA_Northeast_Woodlands", "NA_Northwest_Coast", "NA_Plains",
"NA_South_West", "NA_Subarctic", "NA_Unknown", "North_East_Africa",
"North_Western_Africa", "Northern_Africa", "Northern_Europe",
"Polynesia", "SA_Unknown", "Southern_Africa", "Southern_Europe",
"SSA_Unknown", "Western_Africa", "Western_Europe"), class = "factor"),
G4_Country_State = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Alaska",
"American_Arctic_Unknown", "Andaman_Island", "Angola", "Argentina",
"Australia_Unknown", "Austrian", "Bangladesh", "Barbados",
"Canada", "Canary_Islands", "Central_Australia", "Chile",
"Congo", "Czechoslovakia", "East_India", "Egypt", "England",
"Finland", "France", "Germany", "Ghana", "Greece", "Greenland",
"Guatemala", "Guinea", "Hungary", "India", "India_Unknown",
"Indonesia", "Italy", "Jamaica", "Kenya", "Malta", "Mozambique",
"Myanmar", "NA_Unknown", "Namibia", "New_South_Wales", "New_Zealand",
"Nicobar_Island", "Nigeria", "North_India", "Northern_Territory",
"Pakistan", "Papua_New_Guinea", "Peru", "Philippines", "Queensland",
"Russia", "SA_Unknown", "Solomon_Islands", "Somalia", "South_Africa",
"South_Australia", "South_East_Australia", "South_India",
"Spain", "Sri_Lanka", "SSA_Unknown", "Sudan", "Sweden", "Switzerland",
"Tanzania", "Uganda", "Ukraine", "United_States", "Victoria",
"West_India", "Western_Australia", "Zimbabwe"), class = "factor"),
G5_Locality_Tribe = structure(c(57L, 57L, 57L, 57L, 57L,
57L), .Label = c("Aborigine", "Aboringine", "Ainaho", "Ainaho_Burao",
"Akamba", "Ali_Kush", "Amaponda", "Amaxhosa_Great_Winterberg",
"Apache", "Arawak", "Ashanti", "Badari", "Baffin_Island",
"Baiono", "Ballam_Coffa", "Bambuti_Pygmy", "Bantu_Kaoisoudo",
"Basuto", "Bechuanaland", "Bengal", "Bengal_Bangladesh",
"Berida", "Bihari", "Bingemma", "Brazaville", "Brittany",
"Bukoba", "Cape_Spencer_Aborigine", "Colombo", "Coorg", "Crime_Sebastopol",
"Crocodile _Island_Yan_nhanu", "Croydon_Queensland", "Darood",
"Darood_Hawiya", "Deccan_Berars", "Derby_Coast_Aborigine",
"Didali", "Dravidian", "Eingenadu", "Fanti", "Gannawarri",
"Gondaiaio", "Graubunden_Saint_Moritz", "Guanche", "Hadad",
"Halle", "Hariya", "Haya", "Hexham", "Hindu", "Hindustan",
"Hindustan_Bihar", "Huron", "Inuit", "Inuit_Eleanoran_Bay",
"Ipiutak", "Iroquois", "Java", "Jebel_Moya", "Jilili", "Kaduna",
"Kagoro", "Kalahari", "Kerma", "Ketchipawan", "Khanty_Kondinski",
"Khoikhoi", "Kikuyu", "Knysna_Cave", "Korana", "Kwaiawata _Island_Muyuw",
"Lapland", "Lazio", "Loddon_River_Aborigine", "Luanda", "Mackay_Aborigine",
"Makah", "Makua", "Malaysian", "Manatee_Cradock", "Manitoba",
"Maori", "Mem_Mem", "Minorca", "Mortlake_Aborigine", "Mumbai_Parsi",
"Muri_Province", "Murray_River", "Murray_River_Aborigine",
"Murua_Island_Muyuw", "Muyuw_Kwaiawata_Island", "Nagada",
"Naharhmpikya_Sinhalese", "Native_American", "New_Britain",
"New_Westminster", "Newcastle", "North_Queensland ", "Oriomo_River_Daudai",
"Paestum", "Pagi_Island", "Pakistan", "Paliyan_Tribe", "Paris",
"Pasamayo", "Pathan", "Patna", "Perth", "Plympton_Aborigine",
"Port_Elizabeth", "Punjab", "Rio_Gallegos", "Rome", "Salekhard",
"Sardinia", "Sicily", "Sioux", "South_Wilshire", "St_Bernard",
"Swanport_Aborigine", "Tagalog_Island", "Tal_Horr", "Tegera_Well",
"Teita", "Teso", "Thessaly", "Tigara", "Toszeg", "Upper_Congo_River",
"Valparaiso", "Vancouver_Island", "Veddah", "Vienna", "Walvis_Bay",
"Wollongong", "Wynberg_San", "Yola", "Yoruba_Ilorin", "Zuni"
), class = "factor"), RI1_MAX_E1 = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "1", class = "factor"), RI1_MAX_E2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), RI1_MAX_E3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "A", class = "factor"), RI1_MAX_E4 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("E", "G", "P", "W"), class = "factor"),
RI1_MAX_E5 = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("O",
"R"), class = "factor"), RI2_MAX_E1 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = c("1", "CON"), class = "factor"),
RI2_MAX_E2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "CON"), class = "factor"), RI2_MAX_E3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A", "B1L1", "CON"), class = "factor"),
RI2_MAX_E4 = structure(c(3L, 2L, 3L, 3L, 2L, 3L), .Label = c("CON",
"E", "G", "P", "W"), class = "factor"), RI2_MAX_E5 = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("CON", "O", "R", "R4"), class = "factor"),
RC1_MAX_E1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"),
RC1_MAX_E2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), RC1_MAX_E3 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = c("A", "B1L1"), class = "factor"),
RC1_MAX_E4 = structure(c(4L, 1L, 1L, 5L, 5L, 5L), .Label = c("E",
"EBi", "G", "P", "W"), class = "factor"), RC1_MAX_E5 = structure(c(1L,
1L, 2L, 1L, 1L, 1L), .Label = c("O", "R", "R5"), class = "factor"),
RP3_MAX_E1 = structure(c(2L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), RP3_MAX_E2 = structure(c(2L,
2L, 1L, 1L, 2L, 2L), .Label = c("1", "2", "3"), class = "factor"),
RP3_MAX_E3 = structure(c(2L, 2L, 1L, 1L, 2L, 2L), .Label = c("A",
"B1L1", "B1L2", "B2L1", "M1D1", "M1D1L1"), class = "factor"),
RP3_MAX_E4 = structure(c(1L, 17L, 17L, 17L, 17L, 18L), .Label = c("BGLG",
"BGLKBi", "BHLE", "BHLG", "BKLG", "BPLG", "BWLG", "E", "H",
"HBi", "K", "KBi", "MEDGLG", "MGDGLE", "MGDGLG", "MPDPLE",
"P", "PBi", "W"), class = "factor"), RP3_MAX_E5 = structure(c(3L,
11L, 8L, 8L, 3L, 10L), .Label = c("BR2LR", "BR4LR", "BRLR",
"BRLR2", "i2", "i5", "MRDRLR", "O", "R", "R2", "R4", "R5"
), class = "factor"), RP4_MAX_E1 = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
RP4_MAX_E2 = structure(c(2L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4"), class = "factor"), RP4_MAX_E3 = structure(c(2L,
1L, 1L, 1L, 1L, 1L), .Label = c("A", "B1L1", "B2L1", "B2L2",
"M1D1L1"), class = "factor"), RP4_MAX_E4 = structure(c(13L,
13L, 13L, 13L, 13L, 13L), .Label = c("BELG", "BGLG", "BHLG",
"BKLG", "E", "G", "H", "HBi", "K", "KBi", "MGDGLE", "MLFBiDG",
"P", "PBi", "W"), class = "factor"), RP4_MAX_E5 = structure(c(10L,
7L, 7L, 7L, 7L, 7L), .Label = c("BR2L4", "BR2LR", "BRLR",
"i2", "i5", "MRDRLR", "O", "R", "R2", "R4", "R5"), class = "factor"),
RM1_MAX_E1 = structure(c(3L, 3L, 3L, 3L, 3L, 2L), .Label = c("1",
"2", "3", "4"), class = "factor"), RM1_MAX_E2 = structure(c(3L,
2L, 2L, 2L, 2L, 2L), .Label = c("2", "3", "4", "5", "6"), class = "factor"),
RM1_MAX_E3 = structure(c(9L, 3L, 3L, 3L, 3L, 3L), .Label = c("B1L1",
"M1D1", "M1D1L1", "M1D1L2", "M1D2", "M1D2L1", "M1L1", "M2D1",
"M2D1L1", "M2D1L2", "M2D2L1", "M2D2L2", "M3D1L1", "MB1DB1ML1DL1"
), class = "factor"), RM1_MAX_E4 = structure(c(51L, 32L,
32L, 45L, 32L, 38L), .Label = c("BKLG", "BPLG", "MBPDBEMLEDLG",
"MDFLE", "MDFLG", "MDFLK", "MDFLP", "MEDELE", "MEDELP", "MEDGLG",
"MEDKLE", "MEDPLE", "MEDWLK", "MHDELP", "MHDPLE", "MHDPLP",
"MKDELE", "MKDELP", "MKDGLE", "MKDGLG", "MKDGLP", "MKDPLE",
"MKDPLG", "MKDPLP", "MLFBiDG", "MLFBiDP", "MLFDE", "MPBiDPLG",
"MPBiDPLK", "MPBiDPLP", "MPDELE", "MPDELG", "MPDELK", "MPDELP",
"MPDGLE", "MPDGLG", "MPDGLP", "MPDLF", "MPDLFBi", "MPDP",
"MPDPLE", "MPDPLG", "MPDPLK", "MPDPLP", "MPDWLE", "MPDWLG",
"MPDWLP", "MWBiDPLP", "MWBiDWLG", "MWDE", "MWDELE", "MWDELG",
"MWDELK", "MWDELP", "MWDGLE", "MWDGLG", "MWDGLK", "MWDGLP",
"MWDGLR", "MWDGLW", "MWDKLE", "MWDKLG", "MWDKLP", "MWDLF",
"MWDLFBi", "MWDPDE", "MWDPLE", "MWDPLG", "MWDPLK", "MWDPLP",
"MWDPLW", "MWDWLE", "MWDWLG", "MWDWLK", "MWDWLP", "P"), class = "factor"),
RM1_MAX_E5 = structure(c(29L, 42L, 22L, 22L, 20L, 22L), .Label = c("BOLR",
"MBODBOMLRDLR", "Mi2DOLO", "Mi2DOLR", "Mi2DR4LR4", "Mi2DRLO",
"Mi2DRLR", "Mi2DRLR2", "Mi3DOLR", "Mi3DRLO", "Mi3DRLR", "Mi4DRLR",
"Mi5DOLO", "Mi5DOLR", "Mi5DRLO", "Mi5DRLR", "MODi2", "MODLi5",
"MODOLO", "MODOLR", "MODRLO", "MODRLR", "MODRLR2", "MR2DO",
"MR2DOLO", "MR2DOLR", "MR2DR2LO", "MR2DRLO", "MR2DRLR", "MR2DRLR2",
"MR4DOLO", "MR4DOLR", "MR4DRLO", "MR4DRLR", "MR5DOLO", "MR5DRLO",
"MR5DRLR", "MRDOLO", "MRDOLR", "MRDRLi5", "MRDRLO", "MRDRLR",
"R2"), class = "factor"), RM2_MAX_E1 = structure(c(1L, 1L,
1L, 1L, 3L, 3L), .Label = c("1", "2", "3", "4"), class = "factor"),
RM2_MAX_E2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1",
"2", "3", "4"), class = "factor"), RM2_MAX_E3 = structure(c(5L,
5L, 5L, 5L, 5L, 5L), .Label = c("A", "B1L1", "M1B1D1L1",
"M1D1", "M1D1L1", "M1D2L1", "M2D1L1", "MB1DB1ML1DL1", "ML3D1"
), class = "factor"), RM2_MAX_E4 = structure(c(45L, 13L,
13L, 51L, 57L, 70L), .Label = c("BGLG", "BHLG", "BKLG", "BKLK",
"BKLP", "BLF", "BPLG", "H", "HBi", "KBi", "MBEDBEMLEDLE",
"MBWDBGMLEDLE", "MDFDLF", "MDFLE", "MDFLG", "MDFMLF", "MDFMLFBi",
"MEDELE", "MEDELG", "MEDGLE", "MEDGLG", "MEDGLK", "MEDKLG",
"MEDWLG", "MGDGLG", "MHBiDELE", "MHDELE", "MHDELG", "MHDELP",
"MHDGLG", "MHDGLP", "MHDWLP", "MKDELE", "MKDELG", "MKDELP",
"MKDGLE", "MKDGLG", "MKDGLP", "MKDLF", "MKDPLE", "MKDPLP",
"MLFBGDLF", "MLFBiDE", "MLFBiDG", "MLFBiDLF", "MLFBiDP",
"MLFBiDW", "MLFBiMDF", "MLFDE", "MLFDG", "MLFDLF", "MLFDLFBi",
"MLFDP", "MLFDW", "MLFMDF", "MPDELE", "MPDELG", "MPDELK",
"MPDELP", "MPDGLE", "MPDGLG", "MPDGLP", "MPDKLG", "MPDLF",
"MPDPLE", "MPDPLG", "MPDPLP", "MPDWLG", "MWDELE", "MWDELG",
"MWDELK", "MWDELP", "MWDGLE", "MWDGLG", "MWDGLK", "MWDGLP",
"MWDKLE", "MWDKLG", "MWDLF", "MWDLFBi", "MWDPLE", "MWDPLG",
"MWDPLK", "MWDPLP", "MWDWLE", "MWDWLG", "MWDWLP", "P", "W"
), class = "factor"), RM2_MAX_E5 = structure(c(28L, 16L,
20L, 16L, 25L, 44L), .Label = c("BOLO", "BOLR", "BRLR", "i2",
"i3", "MBRDBRMLRDLR", "MDi2LO", "MDi2LR", "Mi2DOLR", "Mi2DRLO",
"Mi2DRLR", "Mi3DRLO", "Mi5DOLR", "Mi5DRLR", "MLi2DO", "MLi2DR",
"MLi3DR", "MLi4DO", "MLi5DO", "MLi5DR", "MLODLi2", "MODi5LR",
"MODLi2", "MODOLO", "MODOLR", "MODR4LR", "MODRLO", "MODRLR",
"MR2DOLO", "MR2DOLR", "MR2DRLO", "MR2DRLR", "MR4DOLO", "MR4DOLR",
"MR4DRLO", "MR4DRLR", "MR5DRLO", "MR5DRLR", "MRBRDRLR", "MRDLi2",
"MRDOLO", "MRDOLR", "MRDRLO", "MRDRLR", "O", "R", "R2"), class = "factor"),
RM3_MAX_E1 = structure(c(3L, 1L, 3L, 1L, 2L, 1L), .Label = c("1",
"2", "3", "4", "CON"), class = "factor"), RM3_MAX_E2 = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3", "4", "CON"
), class = "factor"), RM3_MAX_E3 = structure(c(7L, 7L, 7L,
7L, 7L, 7L), .Label = c("A", "B1L1", "B2D1L1", "CON", "M1B1D1L1",
"M1D1", "M1D1L1", "M1D1L2", "M1D2", "M1D2L1", "M2D1", "M2D1L1",
"M2D2", "MB1DB1ML1DL1", "ML3D1"), class = "factor"), RM3_MAX_E4 = structure(c(70L,
49L, 70L, 49L, 61L, 49L), .Label = c("BGLG", "BGLK", "BKDGLG",
"BKLG", "CON", "E", "G", "HBi", "K", "MBEDBGMLEDLG", "MBEDBGMLGDLG",
"MBGDBGMLGDLG", "MDF", "MDFDLF", "MDFLE", "MDFLG", "MEDELE",
"MEDELP", "MEDGLE", "MEDGLG", "MEDLF", "MEDP", "MEDWLE",
"MGDELG", "MGDGLG", "MGDLF", "MHBiDELG", "MHBiDH", "MHBiDHBi",
"MHDGLE", "MHDGLG", "MHDH", "MHDK", "Mi", "MKDELE", "MKDELG",
"MKDGLE", "MKDGLG", "MKDGLP", "MKDKLG", "MKDLF", "MKDP",
"MKDPLG", "MLFBiDE", "MLFBiDG", "MLFBiDLF", "MLFDE", "MLFDG",
"MLFDLF", "MLFDP", "MLFMDF", "MPBiDGLG", "MPBiDW", "MPDELE",
"MPDELG", "MPDGLE", "MPDGLG", "MPDGLP", "MPDGLPBi", "MPDH",
"MPDLF", "MPDP", "MPDPBi", "MPDPLG", "MWBEDELG", "MWDELE",
"MWDELG", "MWDELP", "MWDGLE", "MWDGLG", "MWDGLP", "MWDH",
"MWDKLG", "MWDLF", "MWDPLG", "MWDW", "MWDWLG", "P"), class = "factor"),
RM3_MAX_E5 = structure(c(45L, 45L, 45L, 17L, 29L, 45L), .Label = c("BOLR",
"BR2DRLR", "BRLO", "BRLR", "CON", "i2", "i3", "i5", "MBRBDRMLRDLR",
"MBRDBRMLRDLR", "MDi2LR", "Mi2DOLR", "Mi2DR", "Mi2DRLR",
"Mi3DRLR", "Mi5DRLR", "MLi2DR", "MLi4DR", "MODi5LR", "MODLi2",
"MODLi3", "MODLi5", "MODO", "MODOLO", "MODOLR", "MODR", "MODR4",
"MODRLO", "MODRLR", "MODRLR2", "MR2DOLO", "MR2DR2", "MR2DRLO",
"MR2DRLR", "MR4DR2", "MR4DR4", "MR4DRLO", "MR4DRLR", "MRBRDRLR",
"MRDLi5", "MRDOLR", "MRDR", "MRDR2", "MRDRLO", "MRDRLR",
"O", "R", "R2", "R4"), class = "factor"), ri1_mand_E1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), ri1_mand_E2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
ri1_mand_E3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("A",
"B1L1"), class = "factor"), ri1_mand_E4 = structure(c(4L,
4L, 4L, 4L, 4L, 4L), .Label = c("E", "G", "K", "P", "W"), class = "factor"),
ri1_mand_E5 = structure(c(3L, 3L, 2L, 2L, 2L, 2L), .Label = c("i2",
"O", "R", "R2", "R4"), class = "factor"), ri2_mand_E1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "CON"), class = "factor"),
ri2_mand_E2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "CON"), class = "factor"), ri2_mand_E3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A", "B1L1", "CON"), class = "factor"),
ri2_mand_E4 = structure(c(5L, 5L, 5L, 5L, 5L, 5L), .Label = c("CON",
"E", "H", "K", "P", "W"), class = "factor"), ri2_mand_E5 = structure(c(5L,
5L, 4L, 4L, 4L, 4L), .Label = c("CON", "i2", "i5", "O", "R",
"R2", "R4"), class = "factor"), rc1_mand_E1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
rc1_mand_E2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), rc1_mand_E3 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = c("A", "B1L1"), class = "factor"),
rc1_mand_E4 = structure(c(2L, 6L, 2L, 7L, 6L, 7L), .Label = c("BGLG",
"E", "G", "H", "K", "P", "W", "WBi"), class = "factor"),
rc1_mand_E5 = structure(c(3L, 3L, 3L, 3L, 3L, 3L), .Label = c("BRLR",
"i2", "O", "R", "R2", "R4", "R5"), class = "factor"), rp3_mand_E1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
rp3_mand_E2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), rp3_mand_E3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A", "B1L1", "M1D1L1"), class = "factor"),
rp3_mand_E4 = structure(c(7L, 7L, 3L, 10L, 7L, 7L), .Label = c("BGLG",
"BWLG", "E", "G", "H", "K", "P", "T", "TBi", "W"), class = "factor"),
rp3_mand_E5 = structure(c(7L, 8L, 7L, 7L, 7L, 7L), .Label = c("BRLR",
"i2", "i3", "i4", "i5", "MRDRLR", "O", "R", "R2", "R4"), class = "factor"),
rp4_mand_E1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"),
rp4_mand_E2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), rp4_mand_E3 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = c("A", "B1L1"), class = "factor"),
rp4_mand_E4 = structure(c(5L, 1L, 2L, 5L, 5L, 5L), .Label = c("E",
"G", "HBi", "K", "P", "T", "TBi", "W"), class = "factor"),
rp4_mand_E5 = structure(c(2L, 3L, 3L, 2L, 2L, 2L), .Label = c("i5",
"O", "R", "R2", "R4"), class = "factor"), rm1_mand_E1 = structure(c(1L,
2L, 2L, 2L, 1L, 1L), .Label = c("2", "R3En"), class = "factor"),
rm1_mand_E2 = structure(c(2L, 2L, 2L, 2L, 3L, 3L), .Label = c("2",
"3", "4", "5", "6"), class = "factor"), rm1_mand_E3 = structure(c(3L,
2L, 2L, 2L, 3L, 5L), .Label = c("M1D1", "M1D1L1", "M2D1",
"M2D1L1", "M2D2", "M2D2L1", "M2D3", "M3D1", "M3D2", "M3D3"
), class = "factor"), rm1_mand_E4 = structure(c(17L, 36L,
36L, 45L, 40L, 40L), .Label = c("MHBiDELG", "MHBiDH", "MHBiDHBi",
"MHBiDK", "MHBiDKBi", "MHBiDP", "MHBiDPBi", "MHBiDPLE", "MHBiDPLG",
"MHBiDWLG", "MHDE", "MHDELG", "MHDG", "MHDGLG", "MHDH", "MHDK",
"MHDP", "MHDPBi", "MHDPLE", "MHDPLG", "MHDWLG", "MKBiDK",
"MKBiDP", "MKDH", "MKDK", "MKDP", "MPBiDELG", "MPBiDK", "MPBiDKLG",
"MPBiDP", "MPBiDPLE", "MPBiDPLG", "MPDE", "MPDELE", "MPDELG",
"MPDGLG", "MPDK", "MPDKBi", "MPDKLG", "MPDP", "MPDPBi", "MPDPLE",
"MPDPLG", "MPDPLP", "MPDPLW", "MPDWLG", "MPDWLW"), class = "factor"),
rm1_mand_E5 = structure(c(34L, 28L, 28L, 28L, 34L, 39L), .Label = c("Mi2Di2",
"Mi2Di3", "Mi2Di5", "Mi2DO", "Mi2DOLR", "Mi2DR", "Mi2DR2",
"Mi2DRLR", "Mi3Di2", "Mi3Di4", "Mi3DO", "Mi3DOLO", "Mi3DOLR",
"Mi3DRLR", "Mi4Di2", "Mi4Di4", "Mi4DO", "Mi4DR4", "Mi5Di2",
"Mi5Di5", "Mi5DO", "Mi5DOLR", "Mi5DR2", "Mi5DR4", "Mi5DRLR",
"MODO", "MODR", "MODRLR", "MR2Di2", "MR2Di2LR", "MR2Di3",
"MR2Di4", "MR2Di5", "MR2DO", "MR2DOLO", "MR2DOLR", "MR2DR",
"MR2DR2", "MR2DR4", "MR2DR5", "MR2DRLR", "MR4Di2", "MR4Di2LR",
"MR4Di3", "MR4Di4", "MR4DO", "MR4DOLR", "MR4DR", "MR4DR4",
"MR4DRLO", "MR4DRLR", "MR5DOLR", "MR5DR4", "MR5DRLO", "MRDO",
"MRDOLR", "MRDR"), class = "factor"), rm2_mand_E1 = structure(c(1L,
2L, 2L, 1L, 2L, 2L), .Label = c("1", "2", "3", "R3En"), class = "factor"),
rm2_mand_E2 = structure(c(2L, 2L, 2L, 2L, 3L, 3L), .Label = c("1",
"2", "3", "4"), class = "factor"), rm2_mand_E3 = structure(c(6L,
6L, 6L, 6L, 9L, 9L), .Label = c("A", "B1D1L1", "B2L1", "B2L2",
"M1B1D1", "M1D1", "M1D1L1", "M1D2", "M2D1", "M2D1L1", "M2D2",
"M3D1"), class = "factor"), rm2_mand_E4 = structure(c(5L,
39L, 30L, 5L, 21L, 39L), .Label = c("BGDPLG", "BHBiLP", "BPLK",
"BPLP", "Cs", "CsBi", "G", "MDF", "MEDE", "MGDPLW", "MHBiDE",
"MHBiDH", "MHBiDK", "MHBiDP", "MHBiDPBi", "MHDE", "MHDELP",
"MHDG", "MHDH", "MHDHBi", "MHDK", "MHDP", "MHDPLG", "MKBiDK",
"MKBiDP", "MKDE", "MKDG", "MKDH", "MKDHBi", "MKDK", "MKDP",
"MKDPBi", "MPBiDGLG", "MPBiDP", "MPDE", "MPDG", "MPDH", "MPDK",
"MPDP", "MPDPLG"), class = "factor"), rm2_mand_E5 = structure(c(5L,
17L, 18L, 7L, 21L, 21L), .Label = c("Bi5Li2", "BR2LO", "BRDOLR",
"i2", "i3", "i4", "i5", "Mi2Di2", "Mi2DO", "Mi2DR", "Mi2DR2",
"Mi3DO", "Mi3DR", "Mi4DO", "Mi5DO", "Mi5DR", "MODO", "MODR",
"MR2Di2", "MR2Di3", "MR2DO", "MR2DOLR", "MR2DR", "MR2DR2",
"MR2DR4", "MR2DRLR", "MR4Di2", "MR4DO", "MR4DOLR", "MR4DR",
"MR4DR4", "MR4DRLR", "MR5DO", "MRDO", "MRDOLR", "MRDR", "R",
"R2"), class = "factor"), rm3_mand_E1 = structure(c(1L, 1L,
1L, 2L, 2L, 2L), .Label = c("1", "2", "CON", "R3En", "R3Pa"
), class = "factor"), rm3_mand_E2 = structure(c(2L, 1L, 1L,
2L, 3L, 3L), .Label = c("1", "2", "3", "4", "CON"), class = "factor"),
rm3_mand_E3 = structure(c(6L, 1L, 1L, 6L, 9L, 9L), .Label = c("A",
"B2L1", "CON", "M1B1D1", "M1B2D1", "M1D1", "M1D1L1", "M2B1D1",
"M2D1", "M2D1L1", "M2D2", "M3D1"), class = "factor"), rm3_mand_E4 = structure(c(3L,
25L, 25L, 44L, 32L, 47L), .Label = c("BHLP", "CON", "Cs",
"E", "G", "K", "MCsLG", "MDF", "MDFLG", "MEDELG", "MEDG",
"MELE", "MGBGDE", "MGBPDK", "MGDK", "MHBiDK", "MHBiDP", "MHDE",
"MHDELG", "MHDG", "MHDGLG", "MHDK", "MHDP", "MHDPLG", "Mi",
"MKBGDG", "MKBiDK", "MKBiDP", "MKDE", "MKDG", "MKDGLG", "MKDK",
"MKDP", "MKDPLG", "MKDW", "MPBGDE", "MPBGDG", "MPBGDP", "MPBiDE",
"MPBiDK", "MPBPDE", "MPDE", "MPDELG", "MPDG", "MPDGLG", "MPDK",
"MPDP", "MPDPLE", "MPDPLG", "MPDWLG", "MWDELG", "MWDP", "P"
), class = "factor"), rm3_mand_E5 = structure(c(3L, 42L,
42L, 23L, 33L, 26L), .Label = c("BR2LO", "CON", "i2", "i3",
"i5", "Mi2BRDR", "Mi2Di2", "Mi2DO", "Mi2DOLR", "Mi2DR", "Mi2DRLR",
"Mi3DO", "Mi3DOLR", "Mi3LR", "Mi4DR", "Mi5DO", "Mi5DR", "Mi5DRLR",
"MOBRDO", "MOBRDR", "MODO", "MODOLR", "MODR", "MODRLR", "MR2BRDR",
"MR2DO", "MR2DOLR", "MR2DR", "MR2DR2", "MR2DR4", "MR2DRLR",
"MR4Di5", "MR4DO", "MR4DR", "MR4DR4", "MR4DRLR", "MRBR2DO",
"MRBRDR", "MRDR", "MRDRLR", "O", "R", "R2"), class = "factor")), row.names = c("99_1_192",
"99_1_194", "99_1_196", "99_1_197", "99_1_198", "99_1_201"), class = "data.frame")

Group by several columns and print

I have a data frame where I want at the end to print the results of several columns grouped by the number of observations of each column and their frequency separately.
This is what I have done, but it gives me the count and freq of the total but I want for each column, q1a, q1a_30d,q1a_60d,q1a_90d
a<- df %>% group_by(q1a, q1a_30d, q1a_60d,q1a_90d) %>% summarise(cnt = n()) %>%mutate(freq = formattable::percent(cnt / sum(cnt),1))
and then for print
kable( a, col.names = c(" ", "cnt", "freq"),align = c("lcr"),longtable = T, booktabs = T, valign = 't', escape = F, caption = '<b> Wore a face covering or mask<b>') %>%
kable_styling(bootstrap_options = c("striped", "hold_position"),full_width = T,position = "center",html_font = "Arial") %>%
add_header_above(c("Baseline", "30 days", "60 days", "90 days"))%>%
column_spec(border_left = T, border_right = T)
dput(a[1:10, ])
structure(list(q1a = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L), .Label = c("All of the time", "Very frequently", "Somewhat frequently", "Never", "No answer"), class = "factor"), q1a_30d = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,2L), .Label = c("All of the time", "Very frequently", "Somewhat frequently", "Never", "No answer"), class = "factor"), q1a_60d = structure(c(1L, 1L, 1L, 1L, 2L, 3L, 5L, 1L, 2L, 2L), .Label = c("All of the time", "Very frequently", "Somewhat frequently", "Never", "No answer"), class = "factor"), q1a_90d = structure(c(1L, 2L, 3L, 5L,5L, 1L, 5L, 1L, 1L, 3L), .Label = c("All of the time", "Very frequently", "Somewhat frequently", "Never", "No answer"), class = "factor"), cnt = c(8L, 1L, 1L, 13L, 1L, 1L, 14L, 4L, 1L, 1L), freq = structure(c(0.347826086956522, 0.0434782608695652, 0.0434782608695652, 0.565217391304348, 1, 1, 1, 1, 0.5, 0.5), formattable = list(formatter = "formatC", format = list(format = "f", digits = 1), preproc = "percent_preproc", postproc = "percent_postproc"), class = c("formattable", "numeric"))), row.names = c(NA, -10L), groups = structure(list(q1a = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("All of the time", "Very frequently", "Somewhat frequently", "Never", "No answer"), class = "factor"), q1a_30d = structure(c(1L, 1L, 1L, 1L,2L, 2L), .Label = c("All of the time", "Very frequently","Somewhat frequently", "Never", "No answer"), class = "factor"),q1a_60d = structure(c(1L, 2L, 3L, 5L, 1L, 2L), .Label = c("All of the time","Very frequently", "Somewhat frequently", "Never", "No answer"), class = "factor"), .rows = structure(list(1:4, 5L, 6L,7L, 8L, 9:10), ptype = integer(0), class = ("vctrs_list_of","vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df","tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))

Bar chart showing NA bar when there are no NA values

My visualisation is showing an NA bar chart despite the fact that I have imputed all NA values in my incomeLev column and explicitly removed all NA values from the mental health (which is in my stacked bar visualisation)
brfss2013$mentalHealth <- forcats::fct_explicit_na(brfss2013$mentalHealth, na_level = "Missing")
brfss2013$incomeLev <- as.factor(brfss2013$incomeLev)
brfss2013 <- subset(brfss2013, !is.na(incomeLev))
brfss2013 %>%
add_count(incomeLev) %>%
rename(count_inc = n) %>%
count(incomeLev, mentalHealth, count_inc) %>%
rename(count_mentalHealth = n) %>%
mutate(percent= count_mentalHealth / count_inc) %>%
mutate(incomeLev = factor(incomeLev,
levels=c('0-$20k','25-$35k','35-$50k','50-$75k','>$75k')))%>%
ggplot(aes(x= incomeLev,
y= count_mentalHealth,
group= mentalHealth)) +
xlab('Annual Income')+ylab('Number of People')+
geom_bar(aes(fill=mentalHealth),
stat="identity",na.rm=TRUE)+
# Using the scales package does the percent formatting for you
geom_text(aes(label = scales::percent(percent)),position = position_stack(vjust = 0.5))+
theme_minimal()
Here is a sample of my data:
brfss2013<-structure(list(incomeLev = structure(c(5L, 1L, 1L, 5L, 4L, 1L,
1L, 4L, 1L, 3L), .Label = c(">$75k", "0-$20k", "25-$35k", "35-$50k",
"50-$75"), class = "factor"), healtheat = c(4.66, 1.68, 2.37,
1.85, 2.5, 3, 3.66, 4.27, 2.72, 1.72), X_age_g = structure(c(5L,
4L, 5L, 5L, 6L, 4L, 3L, 5L, 4L, 6L), .Label = c("Age 18 to 24",
"Age 25 to 34", "Age 35 to 44", "Age 45 to 54", "Age 55 to 64",
"Age 65 or older"), class = "factor"), employ1 = structure(c(7L,
1L, 1L, 7L, 7L, 1L, 1L, 7L, 7L, 5L), .Label = c("Employed for wages",
"Self-employed", "Out of work for 1 year or more", "Out of work for less than 1 year",
"A homemaker", "A student", "Retired", "Unable to work"), class = "factor"),
renthom1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L), .Label = c("Own", "Rent", "Other arrangement"), class = "factor"),
sex = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("Male",
"Female"), class = "factor"), physLev = structure(c(3L, 1L,
3L, 1L, 2L, 1L, 2L, 1L, 2L, 2L), .Label = c("0-200", "200-500",
"500-1000", "1000-2000", "2000-4000", "4000-10000", ">10000"
), class = "factor"), mentalHealth = structure(c(5L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L), .Label = c("Excellent",
"Good", "Ok", "Bad", "Very Bad", "Missing"), class = "factor")), row.names = c(NA,
10L), class = "data.frame")

Copy and insert row (with a minor change) in dataframe ABOVE a variable

I'm really new to R, and struggling with the following. If anyone could suggest where I look for a solution or point me in the right direction, I'd be forever grateful.
I have a dataset where I'd like to copy a row and insert that copy with an amendment (in this case appending ", USA) into the same dataframe when it find a value in the second column (a before and after dput are below).
I can find examples of duplicating row based on a regular pattern (ie. copy and insert every fourth row), but I'm not sure how I'd do that if the pattern isn't regular.
Any help would be greatly appreciated.
before = structure(list(Teams = structure(c(4L, 1L, 1L, 2L, 1L, 1L, 1L,
5L, 1L, 1L, 3L, 1L, 1L, 1L, 1L), .Label = c("", "Blue", "Green",
"Red", "Yellow"), class = "factor"), City = structure(c(1L, 2L,
1L, 1L, 4L, 1L, 1L, 1L, 5L, 1L, 1L, 3L, 1L, 1L, 1L), .Label = c("",
"California", "Chicago", "New York ", "Ohio"), class = "factor"),
Jan = c(NA, NA, 156.156, NA, NA, 818.87, 1586.4, NA, NA,
87.1, NA, NA, 873.4, 41.1, 1886.5), Feb = c(NA, NA, 1856,
NA, NA, 17.1, NA, NA, NA, NA, NA, NA, 48.8, NA, 187)), class = "data.frame", row.names = c(NA,
-15L))
after = structure(list(Teams = structure(c(4L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"Blue", "Green", "Red", "Yellow"), class = "factor"), City = structure(c(1L,
3L, 2L, 1L, 1L, 7L, 6L, 1L, 1L, 1L, 9L, 8L, 1L, 1L, 5L, 4L, 1L,
1L, 1L), .Label = c("", "California", "California, USA", "Chicago",
"Chicago, USA", "New York", "New York, USA", "Ohio", "Ohio, USA"
), class = "factor"), Jan = c(NA, NA, NA, 156.156, NA, NA, NA,
818.87, 1586.4, NA, NA, NA, 87.1, NA, NA, NA, 873.4, 41.1, 1886.5
), Feb = c(NA, NA, NA, 1856, NA, NA, NA, 17.1, NA, NA, NA, NA,
NA, NA, NA, NA, 48.8, NA, 187)), class = "data.frame", row.names = c(NA,
-19L))

will this work for you?
library(dplyr)
before %>% mutate(City = ifelse(City != "", paste0(City, ", USA"), ""))
Basically, you consider working around columns.
You can also use base R, which is more cumbersome. You need to convert your City to character first.
before$City = as.character(before$City)
before[before[, 2] != "", 2] = paste0(before[before[, 2] != "", 2], ",USA")
Edits:
I don't have an elegant way. This is an ugly for loop solution.
before$City = as.character(before$City)
df=NULL
for(i in 1:nrow(before)){
df=rbind(df,before[i,])
if(before[i,2]!=""){
before[i,2]=paste0(before[i,2], ",USA")
df=rbind(df,before[i,])
}
}
df

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Issues spreading data after removing outliers - r

Related

Percentages in the wrong position in ggplot2

Data set structure and NA values

Group by several columns and print

Bar chart showing NA bar when there are no NA values

Copy and insert row (with a minor change) in dataframe ABOVE a variable

Categories

Resources