Related
I have a dataset with many columns and thousands of rows. I am trying to get another column, foo which has the frequency that a value above 100 occurs in a row.
structure(list(S026401.R1 = c(0L, 0L, 0L, 0L, 0L), S026404.R1 = c(0L,
0L, 0L, 0L, 0L), S026406.R1 = c(0L, 0L, 0L, 0L, 0L), S026409.R1 = c(0L,
0L, 0L, 0L, 0L), S026412.R1 = c(0L, 0L, 0L, 0L, 0L), S026413.R1 = c(0L,
0L, 0L, 0L, 0L), S026414.R1 = c(47L, 0L, 0L, 0L, 0L), S026415.R1 = c(0L,
0L, 0L, 0L, 0L), S026416.R1 = c(31L, 0L, 0L, 0L, 0L), S026419.R1 = c(0L,
0L, 0L, 0L, 0L), S026421.R1 = c(0L, 0L, 0L, 0L, 34L), S026422.R1 = c(0L,
0L, 0L, 0L, 0L), S026423.R1 = c(0L, 0L, 0L, 0L, 0L), S026427.R1 = c(0L,
0L, 0L, 0L, 0L), S026428.R1 = c(0L, 0L, 0L, 0L, 1049L), S026429.R1 = c(0L,
0L, 0L, 0L, 0L), S026430.R1 = c(0L, 0L, 0L, 0L, 0L), S026431.R1 = c(0L,
10L, 0L, 0L, 0L), S026432.R1 = c(0L, 0L, 0L, 0L, 0L), S026433.R1 = c(0L,
0L, 0L, 0L, 0L), S026434.R1 = c(0L, 0L, 0L, 0L, 0L), S026435.R1 = c(0L,
0L, 0L, 0L, 0L), S026438.R1 = c(0L, 0L, 0L, 0L, 0L), S026440.R1 = c(0L,
0L, 0L, 0L, 0L), S026444.R1 = c(0L, 0L, 0L, 0L, 0L), S026447.R1 = c(0L,
0L, 0L, 0L, 0L), S026450.R1 = c(0L, 0L, 0L, 0L, 0L), S026451.R1 = c(0L,
0L, 0L, 0L, 0L), S026453.R1 = c(0L, 0L, 53L, 0L, 0L), S026456.R1 = c(0L,
0L, 0L, 0L, 0L), S026457.R1 = c(0L, 0L, 0L, 0L, 0L), S026458.R1 = c(0L,
0L, 0L, 0L, 0L), S026461.R1 = c(0L, 0L, 0L, 0L, 0L), S026462.R1 = c(0L,
0L, 0L, 0L, 18L), S026463.R1 = c(153L, 0L, 0L, 0L, 0L), S026464.R1 = c(0L,
0L, 0L, 0L, 0L), S026466.R1 = c(0L, 0L, 0L, 0L, 0L), S026467.R1 = c(32L,
0L, 0L, 0L, 0L), S026469.R1 = c(0L, 0L, 0L, 0L, 0L), S026470.R1 = c(0L,
0L, 0L, 0L, 0L), S026471.R1 = c(0L, 0L, 0L, 0L, 0L), S026473.R1 = c(0L,
0L, 0L, 0L, 0L), S026474.R1 = c(0L, 0L, 0L, 0L, 0L), S026476.R1 = c(0L,
0L, 0L, 0L, 0L), S026477.R1 = c(780L, 0L, 0L, 0L, 0L), S026483.R1 = c(21L,
0L, 0L, 0L, 0L), S026484.R1 = c(0L, 0L, 0L, 0L, 0L), S026485.R1 = c(0L,
0L, 0L, 13L, 0L), S026488.R1 = c(0L, 0L, 0L, 0L, 0L), S026489.R1 = c(0L,
0L, 0L, 0L, 0L), S026490.R1 = c(60L, 0L, 0L, 0L, 0L), S026493.R1 = c(0L,
0L, 103L, 0L, 0L)), class = c("rowwise_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -5L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L, 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")))
what i've been trying is:
df %>%
rowwise() %>%
mutate(foo = sum(c(8:52>100), na.rm = TRUE))
but this returns all 0s in the new column foo
when i change >100 to a lower number, it does provide a frequency. however, there are many values above 100.
As suggested, use c_across with a range (or collection) of columns.
library(dplyr)
df %>%
rowwise() %>%
mutate(foo = sum(c_across(X1:X8) > 15)) %>%
ungroup()
# # A tibble: 3 x 9
# X1 X2 X3 X4 X5 X6 X7 X8 foo
# <int> <int> <int> <int> <int> <int> <int> <int> <int>
# 1 17 10 24 7 23 2 22 12 4
# 2 5 4 15 20 14 19 6 11 2
# 3 1 18 8 9 21 3 16 13 3
Sample data:
set.seed(42)
df <- data.frame(matrix(sample(24), nrow=3))
df
# X1 X2 X3 X4 X5 X6 X7 X8
# 1 17 10 24 7 23 2 22 12
# 2 5 4 15 20 14 19 6 11
# 3 1 18 8 9 21 3 16 13
Hi I have a dataframe of COVID symptoms and COVID diagnosis:
EDITED TO ADD PACKAGES
library(tidyverse)
library(pubh)
library(sjlabelled)
dta <- structure(list(fever = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), nose_bleed = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
chills = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), decrease_taste = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), cough = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), decrease_smell = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), shortness_breath = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), decrease_test_smell = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), fatigue = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), nausea = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), appetite_loss = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), abdominal_pain = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
muscle_ache = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), diarrhea = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), joint_ache = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), rash = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L), sore_throat = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
chest_pain = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), runny_nose = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), headache = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), conjunctivitus = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), seizure = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), wheezing = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), altered_consciousness = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), other_respiratory_symptom = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
other_symptom = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), covid19 = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L), .Label = c("COVID-19 Positive", "COVID-19 Negative"
), class = "factor")), row.names = c(169L, 252L, 312L, 515L,
161L, 136L, 365L, 463L, 572L, 194L, 443L, 444L, 88L, 500L, 96L,
353L, 171L, 310L, 51L, 206L, 307L, 59L, 87L, 561L, 89L, 216L,
542L, 239L, 298L, 140L, 132L, 529L, 242L, 338L, 115L, 369L, 22L,
418L, 179L, 366L, 86L, 70L, 135L, 44L, 254L, 507L, 535L, 16L,
575L, 466L), class = "data.frame")
I am trying to make a cross_tab this way:
dta %>%
select(c(fever, nose_bleed,
chills, decrease_taste,
cough, decrease_smell,
shortness_breath, decrease_test_smell,
fatigue, nausea,
appetite_loss, abdominal_pain,
muscle_ache, diarrhea,
joint_ache, rash,
sore_throat, chest_pain,
runny_nose, headache,
conjunctivitus, seizure,
wheezing, altered_consciousness,
other_respiratory_symptom,
other_symptom)) %>%
copy_labels(dta) %>%
cross_tab(covid19 ~ .) %>%
theme_pubh()
But I am getting this error. I have no idea why and Googling the error/?cross_tab got me nowhere... Any insight would be appreciated.
There is no column named ' covid19 ' in data data
Error in if (x$show.all == TRUE) out = x$res else out = x$res[1:(length(x$res) - :
argument is of length zero
I know the first one isn't actually an error. But, the function refuses to acknowledge that column.
The second one is an error but I don't know if it's related.
Couldn't reproduce the error with the same data using labelled
library(dplyr)
library(pubh)
library(labelled)
dta %>%
select(c(fever, nose_bleed,
chills, decrease_taste,
cough, decrease_smell,
shortness_breath, decrease_test_smell,
fatigue, nausea,
appetite_loss, abdominal_pain,
muscle_ache, diarrhea,
joint_ache, rash,
sore_throat, chest_pain,
runny_nose, headache,
conjunctivitus, seizure,
wheezing, altered_consciousness,
other_respiratory_symptom,
other_symptom)) %>%
copy_labels(dta) %>%
cross_tab(covid19 ~ .) %>%
theme_pubh()
-output
───────────────────────────────────────────────────────────────────────────────────
covid19
COVID-19 Positive COVID-19 Negative Total
(N=6) (N=44) (N=50)
───────────────────────────────────────────────────────────────────────────────────
fever
- 0 6 (100.0%) 39 (88.6%) 45 (90.0%)
- 1 0 ( 0.0%) 5 (11.4%) 5 (10.0%)
nose_bleed
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
chills
- 0 6 (100.0%) 41 (93.2%) 47 (94.0%)
- 1 0 ( 0.0%) 3 ( 6.8%) 3 ( 6.0%)
decrease_taste
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
cough
- 0 6 (100.0%) 42 (95.5%) 48 (96.0%)
- 1 0 ( 0.0%) 2 ( 4.5%) 2 ( 4.0%)
decrease_smell
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
shortness_breath
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
decrease_test_smell
- 0 6 (100.0%) 43 (97.7%) 49 (98.0%)
- 1 0 ( 0.0%) 1 ( 2.3%) 1 ( 2.0%)
fatigue
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
nausea
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
appetite_loss
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
abdominal_pain
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
muscle_ache
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
diarrhea
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
joint_ache
- 0 6 (100.0%) 43 (97.7%) 49 (98.0%)
- 1 0 ( 0.0%) 1 ( 2.3%) 1 ( 2.0%)
rash
- 0 6 (100.0%) 39 (88.6%) 45 (90.0%)
- 1 0 ( 0.0%) 5 (11.4%) 5 (10.0%)
sore_throat
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
chest_pain
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
runny_nose
- 0 6 (100.0%) 43 (97.7%) 49 (98.0%)
- 1 0 ( 0.0%) 1 ( 2.3%) 1 ( 2.0%)
headache
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
conjunctivitus
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
seizure
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
wheezing
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
altered_consciousness
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
other_respiratory_symptom
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
other_symptom
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
───────────────────────────────────────────────────────────────────────────────────
Column names: , COVID-19 Positive, COVID-19 Negative, Total
Update
If we do this with sjlabelled, it select only the columns specified in select. We can include the 'covid19' column as well (on a fresh R session with only relevant packages loaded)
library(sjlabelled)
library(dplyr)
library(pubh)
dta %>%
select(c(fever, nose_bleed,
chills, decrease_taste,
cough, decrease_smell,
shortness_breath, decrease_test_smell,
fatigue, nausea,
appetite_loss, abdominal_pain,
muscle_ache, diarrhea,
joint_ache, rash,
sore_throat, chest_pain,
runny_nose, headache,
conjunctivitus, seizure,
wheezing, altered_consciousness,
other_respiratory_symptom,
other_symptom, covid19)) %>%
copy_labels(dta) %>%
cross_tab(covid19 ~ .) %>%
theme_pubh()
───────────────────────────────────────────────────────────────────────────────────
covid19
COVID-19 Positive COVID-19 Negative Total
(N=6) (N=44) (N=50)
───────────────────────────────────────────────────────────────────────────────────
fever
- 0 6 (100.0%) 39 (88.6%) 45 (90.0%)
- 1 0 ( 0.0%) 5 (11.4%) 5 (10.0%)
nose_bleed
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
chills
- 0 6 (100.0%) 41 (93.2%) 47 (94.0%)
- 1 0 ( 0.0%) 3 ( 6.8%) 3 ( 6.0%)
decrease_taste
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
cough
- 0 6 (100.0%) 42 (95.5%) 48 (96.0%)
- 1 0 ( 0.0%) 2 ( 4.5%) 2 ( 4.0%)
decrease_smell
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
shortness_breath
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
decrease_test_smell
- 0 6 (100.0%) 43 (97.7%) 49 (98.0%)
- 1 0 ( 0.0%) 1 ( 2.3%) 1 ( 2.0%)
fatigue
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
nausea
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
appetite_loss
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
abdominal_pain
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
muscle_ache
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
diarrhea
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
joint_ache
- 0 6 (100.0%) 43 (97.7%) 49 (98.0%)
- 1 0 ( 0.0%) 1 ( 2.3%) 1 ( 2.0%)
rash
- 0 6 (100.0%) 39 (88.6%) 45 (90.0%)
- 1 0 ( 0.0%) 5 (11.4%) 5 (10.0%)
sore_throat
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
chest_pain
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
runny_nose
- 0 6 (100.0%) 43 (97.7%) 49 (98.0%)
- 1 0 ( 0.0%) 1 ( 2.3%) 1 ( 2.0%)
headache
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
conjunctivitus
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
seizure
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
wheezing
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
altered_consciousness
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
other_respiratory_symptom
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
other_symptom
- 0 6 (100.0%) 44 (100.0%) 50 (100.0%)
───────────────────────────────────────────────────────────────────────────────────
Column names: , COVID-19 Positive, COVID-19 Negative, Total
Im trying to train a regression tree in using the train-function from caret. It worked with a linear model. But when I tried a regression tree the following error came: Error in '[.data.frame'(m, labs) : undefined columns selected
the data frame contains of 450+ dummy variables made from categorical variables using the fastDummies-package. Is it a problem that the data frame consists of that many dummies, where there could be few 1's and many zeroes in each variable?
I have tried to versions:
model_reg_tree <- train(
x = data_train[, names(data_train) != "Outcome"],
y = data_train$Outcome,
data = data_train,
method = 'rpart',
tuneLength = 10,
trControl = kontroller_cv)
and
model_reg_træ <- train(
Outcome ~ .,
data = data_train,
method = 'rpart',
tuneLength = 10,
trControl = kontroller_cv)
the controls:
kontroller_cv <- trainControl(
method = "cv",
number = 10 ,
summaryFunction = defaultSummary,
verboseIter = TRUE,
index = folds
)
data (only some of it, did not have space enough to all of it)
dput(head(data_train))
structure(list(Medlem_Køn = c(1L, 1L, 1L, 1L, 1L, 1L), Alder_ind = c(20,
20, 54, 36, 51, 51), Elev_ind = c(0L, 0L, 0L, 0L, 0L, 0L), Elev_ud = c(0L,
0L, 0L, 0L, 0L, 0L), Ledig_ind = c(1L, 1L, 1L, 1L, 1L, 1L), Outcome = c(492L,
1158L, 2161L, 7365L, 8522L, 5326L), PostNr_ud_87 = c(0L, 0L,
0L, 0L, 0L, 0L), PostNr_ud_47 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_98 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_74 = c(0L, 0L, 0L, 0L, 0L, 0L),
PostNr_ud_55 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_64 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_94 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_86 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_90 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_52 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_95 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_62 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_54 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_85 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_58 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_93 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_63 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_26 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_67 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_42 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_77 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_00 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_99 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_20 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_71 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_89 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_41 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_92 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_75 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_76 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_44 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_65 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_57 = c(0L,
0L, 0L, 0L, 1L, 0L), PostNr_ud_49 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_45 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_78 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_60 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_84 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_59 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_88 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_61 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_29 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_82 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_34 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_56 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_68 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_83 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_96 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_69 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_24 = c(0L, 1L, 0L, 0L, 0L, 0L), PostNr_ud_28 = c(0L,
0L, 1L, 0L, 0L, 0L), PostNr_ud_72 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_97 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_32 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_40 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_35 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_37 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_Odense = c(0L, 0L, 0L, 0L,
0L, 0L), PostNr_ud_33 = c(1L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_66 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_79 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_70 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_21 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_30 = c(0L, 0L, 0L, 0L, 0L,
1L), PostNr_ud_53 = c(0L, 0L, 0L, 1L, 0L, 0L), PostNr_ud_73 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_36 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_Frd_c = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_23 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_27 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_43 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_22 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_46 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_48 = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_80 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_25 = c(0L, 0L, 0L, 0L, 0L,
0L), PostNr_ud_Kbh_v = c(0L, 0L, 0L, 0L, 0L, 0L), PostNr_ud_31 = c(0L,
0L, 0L, 0L, 0L, 0L), PostNr_ud_Kbh_K = c(0L, 0L, 0L, 0L,
0L, 0L), `Sektor_ind_Pædagogisk Sektor` = c(1L, 0L, 0L, 0L,
0L, 0L), `Sektor_ind_Social- og Sundhedssektoren` = c(0L,
1L, 0L, 1L, 0L, 0L), `Sektor_ind_Teknik- og Servicesektoren` = c(0L,
0L, 0L, 0L, 0L, 0L), `Sektor_ind_Kost- og Servicesektoren` = c(0L,
0L, 1L, 0L, 0L, 1L), `Sektor_ind_Uden sektor` = c(0L, 0L,
0L, 0L, 1L, 0L), Stilling_ind_Dagplejer = c(0L, 0L, 0L, 0L,
0L, 0L), Stilling_ind_Sygehjælpere = c(0L, 0L, 0L, 1L, 0L,
0L), `Stilling_ind_Anden stilling` = c(0L, 0L, 1L, 0L, 1L,
1L), `Stilling_ind_Social og sundhedshjælper` = c(0L, 1L,
0L, 0L, 0L, 0L), `Stilling_ind_Social og sundhedsassistent` = c(0L,
0L, 0L, 0L, 0L, 0L), Stilling_ind_Pædagogmedhjælper = c(1L,
0L, 0L, 0L, 0L, 0L), Stilling_ind_Hjemmehjælper = c(0L, 0L,
0L, 0L, 0L, 0L), `Stilling_ind_Pædagog, Dag - Københavns Kommune` = c(0L,
0L, 0L, 0L, 0L, 0L), `Stilling_ind_Pædagogisk assistent - Dag-området` = c(0L,
0L, 0L, 0L, 0L, 0L), Leder_ud_Nej = c(1L, 1L, 1L, 1L, 1L,
1L), `Leder_ud_Praksisnær ledelse` = c(0L, 0L, 0L, 0L, 0L,
0L), `Leder_ud_Strategisk ledelse` = c(0L, 0L, 0L, 0L, 0L,
0L), `Afd_navn_ind_FOA Horsens` = c(0L, 0L, 0L, 0L, 0L, 0L
), `Afd_navn_ind_Næstved, Nedlagt` = c(0L, 0L, 0L, 0L, 0L,
0L), `Afd_navn_ind_FOA Vendsyssel` = c(0L, 0L, 0L, 0L, 0L,
0L), `Afd_navn_ind_FOA Herning` = c(0L, 0L, 0L, 0L, 0L, 0L
), `Afd_navn_ind_Ikke defineret konv.` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_FOA Sønderborg` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_FOA Nordjylland` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_FOA Silkeb-Skanderbo` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Odense` = c(0L, 0L, 0L,
1L, 0L, 0L), `Afd_navn_ind_FOA Randers` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_FOA Tønder,Nedlagt` = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_PMF Djursland,Nedlag` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Sydfyn` = c(0L, 0L,
0L, 0L, 1L, 0L), `Afd_navn_ind_FOA Aabenraa,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), Afd_navn_ind_KLS = c(0L, 0L, 1L, 0L,
0L, 0L), `Afd_navn_ind_FOA Esbjerg` = c(0L, 0L, 0L, 0L, 0L,
0L), `Afd_navn_ind_FOA Slagelse,Nedlagt` = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_FOA Thisted-Morsø` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Hillerød,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 1L), `Afd_navn_ind_FOA Viborg` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Frederikshavn` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA SOSU` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Mariagerfjord` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Vejle` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Århus` = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_FOA Varde` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_PMF Sydøstjy,Nedlagt` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Brønderslev,Nedl` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Holstebro` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_Kalundborg,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Kolding` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Lolland` = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_FOA Sjælland omr Hol` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF mellem broerne,N` = c(0L,
0L, 0L, 0L, 0L, 0L), Afd_navn_ind_LFS = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_FOA Vestlolland,Nedl` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Skive` = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_FOA Sønderjylland` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Vojens,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Helsingør,Nedlag` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Nordsjælland` = c(0L,
1L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA 1` = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_FOA Frederikssund` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Fyn` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_FOA Bornholm` = c(0L, 0L, 0L, 0L,
0L, 0L), `Afd_navn_ind_Odsher-Svinn,Nedlagt` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Sjælland omr Nor` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Vendsyssel,Nedla` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Lillebælt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Roskilde` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Fredericia,Nedla` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Ringsted,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Middelfart,Nedla` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Østlolland,Nedla` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_TF,Nedlagt` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Morsø,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Køge` = c(0L, 0L,
0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Ringkøb,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Guldborgsund` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Ribe,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_Sydsjælland, Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Sjylland,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA/PMF Kbh. Syd` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Viborg amt,Nedla` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_Møn, Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Himmerland,Nedla` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Århus,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Sydsjælland` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Kbh/frb,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF afdeling 4` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_FOA Sjælland omr Mid` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Frederiksborg,Ne` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF afdeling 1` = c(1L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_PMF Silkeborg,Nedlag` = c(0L,
0L, 0L, 0L, 0L, 0L), Afd_navn_ind_Afd.99 = c(0L, 0L, 0L,
0L, 0L, 0L), `Afd_navn_ind_PMF Bornholm,Nedlagt` = c(0L,
0L, 0L, 0L, 0L, 0L), `Afd_navn_ind_Tidl. PMF` = c(0L, 0L,
0L, 0L, 0L, 0L), MedlemstypeOmråde_ind_Erhvervsaktiv = c(1L,
1L, 1L, 1L, 1L, 1L), MedlemstypeOmråde_ind_Pensionist = c(0L,
0L, 0L, 0L, 0L, 0L), MedlemstypeOmråde_ind_Hvilende = c(0L,
0L, 0L, 0L, 0L, 0L), MedlemstypeOmråde_ind_Efterløn = c(0L,
0L, 0L, 0L, 0L, 0L), MedlemstypeOmråde_ud_Erhvervsaktiv = c(1L,
1L, 1L, 1L, 0L, 0L), MedlemstypeOmråde_ud_Efterløn = c(0L,
0L, 0L, 0L, 0L, 0L), MedlemstypeOmråde_ud_Pensionist = c(0L,
0L, 0L, 0L, 1L, 1L), MedlemstypeOmråde_ud_Hvilende = c(0L,
0L, 0L, 0L, 0L, 0L), `Medlemstype_ud_Fuldtid o 30 år u/efbid` = c(0L,
0L, 0L, 0L, 0L, 0L), `Medlemstype_ud_Fleksibel efterløn, fuldtid` = c(0L,
0L, 0L, 0L, 0L, 0L), `Medlemstype_ud_Anden medlemstype` = c(0L,
0L, 0L, 1L, 0L, 0L), `Medlemstype_ud_Pensionist over 65 år` = c(0L,
0L, 0L, 0L, 1L, 1L), `Medlemstype_ud_Fuldtid o 60 år med efterlønsbevis` = c(0L,
0L, 1L, 0L, 0L, 0L), `Medlemstype_ud_Fuldtid m/efterlønsbidrag` = c(0L,
0L, 0L, 0L, 0L, 0L), `Medlemstype_ud_Forbund, fuldtid` = c(1L,
1L, 0L, 0L, 0L, 0L), `Medlemstype_ud_Pensionist under 65 år` = c(0L,
0L, 0L, 0L, 0L, 0L), `Medlemstype_ud_Fuldtid u 30 år u/efbid` = c(0L,
0L, 0L, 0L, 0L, 0L), `Sektor_ud_Pædagogisk Sektor` = c(1L,
0L, 0L, 0L, 0L, 0L), `Sektor_ud_Social- og Sundhedssektoren` = c(0L,
1L, 0L, 1L, 1L, 0L), `Sektor_ud_Teknik- og Servicesektoren` = c(0L,
0L, 0L, 0L, 0L, 0L), `Sektor_ud_Uden sektor` = c(0L, 0L,
0L, 0L, 0L, 0L), `Sektor_ud_Kost- og Servicesektoren` = c(0L,
0L, 1L, 0L, 0L, 1L), Fagruppe_ud_Dagplejere = c(0L, 0L, 0L,
0L, 0L, 0L), Fagruppe_ud_Sygehjælpere = c(0L, 0L, 0L, 1L,
0L, 0L), Fagruppe_ud_Plejere = c(0L, 0L, 0L, 0L, 0L, 0L),
`Fagruppe_ud_FOA RBR Rådhus, Biblioteks- og Regionsbetjente` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Uden sektortilhør` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Social-og sundhedshjælpere` = c(0L,
1L, 0L, 0L, 0L, 0L), Fagruppe_ud_Køkken = c(0L, 0L, 0L, 0L,
0L, 0L), Fagruppe_ud_Rengøring = c(0L, 0L, 0L, 0L, 0L, 1L
), `Fagruppe_ud_Social-og sundhedsassistenter` = c(0L, 0L,
0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Plejehjemstekniker` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Hjemmehjælpere = c(0L, 0L,
0L, 0L, 1L, 0L), `Fagruppe_ud_Dagplejepædagoger mv` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Pædagogmedhjælpere og pædagogiske assistenter` = c(1L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Portører = c(0L, 0L, 0L,
0L, 0L, 0L), `Fagruppe_ud_Personale ved højskoler og private kantiner mv` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Plejehjemsassistenter = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Formænd under Formandsoverenskomsten` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Brand og Redning` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Beskæftigelsesvejledere = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Værksstedsassistenter/-ledere` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Omsorgsmedhjælpere og pædagogiske assistenter` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Trafikservice` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Ledere og Mellemledere (Tidl. Hjemmehjælpsledere)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Serviceassistenter og -medarbejdere` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Ledende servicepersonale` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Øvrige indenfor social-og sundheds sektor` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Regionsansatte uden for FOA` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_LFS, dag` = c(0L, 0L, 0L,
0L, 0L, 0L), `Fagruppe_ud_FOA Teknisk Ledelse` = c(0L, 0L,
0L, 0L, 0L, 0L), Fagruppe_ud_Handicapledsagere = c(0L, 0L,
0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Søfart` = c(0L, 0L, 0L,
0L, 0L, 0L), `Fagruppe_ud_Øvrige indenfor teknik-og service sektoren` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Sport (Bade- og Idrætsassistenter)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Parkering` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Husmedhjælpere i staten` = c(0L,
0L, 1L, 0L, 0L, 0L), `Fagruppe_ud_Øvrige indenfor pædagogisk sektor` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Kantineledere = c(0L, 0L,
0L, 0L, 0L, 0L), `Fagruppe_ud_LFS, døgn` = c(0L, 0L, 0L,
0L, 0L, 0L), Fagruppe_ud_Handicaphjælpere = c(0L, 0L, 0L,
0L, 0L, 0L), `Fagruppe_ud_Servicechefer/-ledere og teamkoordinatorer` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Specialarbejdere = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Øvrige indenfor Kost-og service sektoren` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Bybuschauffører` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Håndværkere = c(0L, 0L,
0L, 0L, 0L, 0L), `Fagruppe_ud_Neurofysiologi assistenter` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Vaskeripersonale = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Teknik og Service (Skoler)` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Fodterapeuter = c(0L, 0L,
0L, 0L, 0L, 0L), `Fagruppe_ud_UDGÅET Idrætsassistenter` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Legepladsansatte = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_UDGÅET Arbejdsledere` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Rengøringsledere = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Pædagogiske konsulenter` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Piccoloer og Piccoliner` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Tilsynsførende assistenter` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Operationsteknikere = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Kedel-, maskin- og motorpassere` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_Pædagogiske pladsanvisere` = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_FOA Ambulance` = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Gymnastikpædagoger = c(0L,
0L, 0L, 0L, 0L, 0L), Fagruppe_ud_Miljøkontrollører = c(0L,
0L, 0L, 0L, 0L, 0L), `Fagruppe_ud_UDGÅET Skolebetjente/-pedeller og -medhjælpere` = c(0L,
0L, 0L, 0L, 0L, 0L), Stilling_ud_Dagplejere = c(0L, 0L, 0L,
0L, 0L, 0L), Stilling_ud_Sygehjælpere = c(0L, 0L, 0L, 1L,
0L, 0L), Stilling_ud_Anden_stilling = c(0L, 1L, 1L, 0L, 0L,
1L), `Stilling_ud_Social og sundhedshjælper` = c(0L, 0L,
0L, 0L, 0L, 0L), Stilling_ud_Husassistent = c(0L, 0L, 0L,
0L, 0L, 0L), `Stilling_ud_Social og sundhedsassistent` = c(0L,
0L, 0L, 0L, 0L, 0L), Stilling_ud_Hjemmehjælper = c(0L, 0L,
0L, 0L, 1L, 0L), Stilling_ud_Pædagogmedhjælper = c(1L, 0L,
0L, 0L, 0L, 0L), `Stilling_ud_Pædagogisk assistent - Dag-området` = c(0L,
0L, 0L, 0L, 0L, 0L), Stilling_ud_Pædagog = c(0L, 0L, 0L,
0L, 0L, 0L), `Overflyttet_fra_akasse_Det Faglige Hus - A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_BUPL-A-Børne- og ungdomspædagogernes A-k` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Ikke overflyttet` = c(1L,
1L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_HK Hovedstaden` = c(0L,
0L, 1L, 0L, 0L, 1L), `Overflyttet_fra_akasse_3F-Fælles faglig A-kasse` = c(0L,
0L, 0L, 1L, 0L, 0L), `Overflyttet_fra_akasse_Huslige arbejderes a-kasse` = c(0L,
0L, 0L, 0L, 1L, 0L), `Overflyttet_fra_akasse_Pædagogiske Medhjælperes Fælles A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Kvindelige Arbejderes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_KRIFA-Kristelig A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_bryggeriarb. a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_NNF-Fødevareforbundets A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Frisørfagets og keramikernes a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_DLF-A Lærernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_TL-Teknikernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Ledernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Træ-Industri-Bygs arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Business Danmarks A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Træindustriarb. arb.løsh.kas.` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Beklædnings- og Tekstilarbejder A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_DSA-Din Sundhedsfaglige A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_FTF-A Funktionærernes og Tjenestemænd.` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Metalarbejdernes Arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_FFA-Frie Funktionærers A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Grafisk Arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_kommunalarb. arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_DANA-A-kasse for selvstændige` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_RBF's Arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_FOA-Fag og Arbejdes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_hotel. og rest.pers. a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_ASE = c(0L, 0L,
0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Funktionærernes og Servicefagenes A-kas.` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_SL-Socialpædagogernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Min A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_A-kasse for Journalistik, Kommunikation` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Gastronomernes a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Forsvarets Arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_CA A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_BFA-Byggefagenes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_MA-Magistrenes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_AAK-Akademikernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Norge = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_EL-Fagets A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Færøerne = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Tjenerne, musik. og artis. a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Sverige = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Tyskland = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Akademikernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Murerfagets arb.løsh.kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Frisører, Artister og Maritim A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Bogb.- og kartonagearb. a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Island = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Storbritanien = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Bogtrykfagets a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Sø-restaurationens a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Prosa og Merkonomernes A-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Spanien = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Irland = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Jurist- og økonom. a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Journalisternes Arbejdsløshedskasse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Overflyttet_fra_akasse_Reprofagets a-kasse` = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Grækenland = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Holland = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_3.lande = c(0L,
0L, 0L, 0L, 0L, 0L), Overflyttet_fra_akasse_Østrig = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Ingen Hverv` = c(1L,
1L, 1L, 1L, 1L, 1L), `Hverv_FoaHvervTypeNavn_ud_Andre hverv` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Sektorbestyrelse = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Andre afdelingshverv` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Afd. sektorformand` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Afdelingsbestyrelse = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Bestyrelsesudvalg = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_SOSU-uddannelsesråd` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Afdelingsnæstformand = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Ungdomsansvarlig = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Ældreråd = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Fællesbest. SOSU` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_PGU uddannelsesråd` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Klubformand = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_HB-suppleant(amt)` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Amtselevkoordinator = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_AMU udd-udvalg` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_2. HB-suppl. (sekt)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Faglig sekretær` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Afd. daglige ledelse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Elevansvarlig(SOSU)` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Gæstelærer = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Fanebærer = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Faggrupperepræsentan = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_PGU udd-råd supplean` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Afdelingskassere = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Seniorklub = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Kritisk revisorsupp.` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Afdelingsformand = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_HB-medlem(amt)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_Kritisk revisor` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_AMU udd-udvalg suppl` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Ungdomsnetværket = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Landselevbestyrelse = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ud_Sektorformand = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_3. HB-suppl. (sekt)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ud_HB-medlem(sektor)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_Ingen Hverv` = c(1L,
1L, 1L, 1L, 1L, 1L), `Hverv_FoaHvervTypeNavn_ind_Andre hverv` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_Andre afdelingshverv` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ind_Bestyrelsesudvalg = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ind_Sektorbestyrelse = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ind_Gæstelærer = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_2. HB-suppl. (sekt)` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ind_Afdelingsbestyrelse = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_Afd. daglige ledelse` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_SOSU-uddannelsesråd` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_Afd. sektorformand` = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ind_Fanebærer = c(0L,
0L, 0L, 0L, 0L, 0L), Hverv_FoaHvervTypeNavn_ind_Sektornæstformand = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_Kritisk revisor` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_Fællesbest. SOSU` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_HB-medlem(sektor)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_FoaHvervTypeNavn_ind_1. HB-suppl. (sekt)` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_Tillidshverv_ind_MED SU/AMO nederste` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_Tillidshverv_ind_MED SU/AMO næst nederst` = c(0L,
0L, 0L, 0L, 0L, 0L), `Hverv_Tillidshverv_ind_Lokalt MED udvalg supp.` = c(0L,
0L, 0L, 0L, 0L, 0L)), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"))
Problem
Your variable names are unclean. For instance, they include spaces.
Solution
To use the formula interface, you must first clean up the variable names. An easy alternative is to use the janitor package. For instance:
library(janitor)
data_train <- clean_names(data_train)
model_reg_tree <- train(
outcome ~ .,
data = data_train,
method = 'rpart',
tuneLength = 10,
trControl = kontroller_cv
)
Alternative solution
You can still use the x =, y= interface without cleaning the names, but then you should not provide the data argument. (However, I strongly suggest that you clean the variable names). For instance:
model_reg_tree <- train(
x = data_train[, names(data_train) != "Outcome"],
y = data_train$Outcome,
method = 'rpart',
tuneLength = 10,
trControl = kontroller_cv
)
My issue is that when I try to retrieve names(myresults) after subsetting a table I get null when the returned subset has only 1 result. Rather than returning a character vector of row names r returns an integer (in this case of 1).
Here is a table
head(tbl)
1 2 3 4 5 6
afford 0 1 0 0 0 0
app 0 0 0 1 0 0
back 0 1 0 0 0 0
cancel 0 0 0 0 1 0
charg 0 0 0 0 0 1
download 0 0 0 0 0 1
I have been subsetting the table within a loop to return a table for each group. If a term belongs to a group it has a value of 1:
for (i in 1:ncol(tbl)) {
t <- tbl[which(tbl[,i]==1),i]
nam <- names(t)
df <- as.data.frame(nam)
names(df) <- paste0("Cluster ",i)
print(kable(df))
}
This loop seems to work OK when there are more than one instance of a term returned by which(). But the group 4, which has only 1 term "app" gives me issues. Here's an example on group 3, which works as expected then on group 4, which does not:
> t <- tbl[which(tbl[,4]==1),4] # only 1 observation meets this criteria
> t
[1] 1
> t <- tbl[which(tbl[,3]==1),3] # 3 observations meet this criteria
> t
aword cat dog
1 1 1
So I can get names(t) for tbl[,3] where it has 3 returned instances but not for tbl[,4] which only has 1.
> t <- fintab[which(fintab[,4]==1),4]
> names(t)
NULL # expected "app"
> t <- fintab[which(fintab[,4]==1),4]
> names(t)
[1] "aword" "cat" "dog"
How can I get names(t) when I have only 1 instance returned like in the example?
Some further context following comment below:
> str(tbl)
'table' int [1:33, 1:6] 0 0 0 0 0 0 0 0 0 0 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:33] "aword" "app" "cat" "dog" ...
..$ : chr [1:6] "1" "2" "3" "4" ...
>
and
> dput(tbl)
structure(c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L), .Dim = c(33L, 6L), .Dimnames = structure(list(
c("aword", "app", "back", "cancel", "charg", "download",
"enough", "expens", "get", "great", "just", "like", "love",
"cat", "dog", "bla", "month", "much", "need",
"never", "phone", "pleas", "blabla", "realli", "term", "sign",
"thank", "time", "triangle", "use", "want", "will", "work"), c("1",
"2", "3", "4", "5", "6")), .Names = c("", "")), class = "table")
As we are subsetting a single column, we get the logical index (tbl[,4] ==1 - no need to wrap with which unless there are NAs. In that case, the which remove those NAs) and use that to subset the column vector.
tbl[,4][tbl[,4]==1]
# app
# 1
tbl[,3][tbl[,3]==1]
# cat blabla time
# 1 1 1
I have a data frame that I cannot reshape2 :: melt. Can anyone help me see why?
> dput(x2)
structure(list(`26492` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), `28728` = c(0L, NA, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("26492", "28728"), class = c("data.table",
"data.frame"), row.names = c(NA, -286L), .internal.selfref = <pointer: 0x0000000000330788>)
> melt(x2)
Using as id variables
Error in data.frame(ids, variable, value, stringsAsFactors = FALSE) :
arguments imply differing number of rows: 0, 572
I also created this data frame with a NA value inside and melt works, so I'm not sure where the problem is.
> df1 <- data.frame(x = rnorm(2), y = rnorm(2))
> df1[1,1]<-NA
> df1
x y
1 NA -1.0138754
2 -0.7848228 0.3117672
> melt(df1)
Using as id variables
variable value
1 x NA
2 x -0.7848228
3 y -1.0138754
4 y 0.3117672
data.frames and data.tables, while related, are different beasts and should be treated differently. This is why in data.table Version 1.8.11, a new melt method was provided.
To summarize what I found, using melt.data.frame will give you this error on a data.table but not on a data.frame, so you should either use as.data.frame(YOUR_DATA_TABLE) or (recommended) update your version of data.table.
Example:
library(reshape2)
library(data.table)
packageVersion("data.table")
# [1] ‘1.8.11’
## WORKS WITH WARNING THAT YOU DID NOT
## SUPPLY ID AND MEASURE VARIABLES
data.table:::melt.data.table(x2)
# variable value
# 1: 26492 0
# 2: 26492 0
# 3: 26492 0
# 4: 26492 0
# 5: 26492 0
# ---
# 568: 28728 0
# 569: 28728 0
# 570: 28728 0
# 571: 28728 0
# 572: 28728 0
# Warning message:
# In data.table:::melt.data.table(x2) :
# To be consistent with reshape2's melt, id.vars and measure.vars
# are internally guessed when both are 'NULL'. All non-numeric/integer/
# logical type columns are conisdered id.vars, which in this case are
# columns ''. Consider providing at least one of 'id' or 'measure' vars
# in future.
So, with data.table 1.8.11 and up, which introduce this new melt method, things work. Where's the error you speak of?
## HERE'S YOUR ERROR
reshape2:::melt.data.frame(x2)
# Using as id variables
# Error in data.frame(ids, variable, value, stringsAsFactors = FALSE) :
# arguments imply differing number of rows: 0, 572
## HERE'S A WORKAROUND
head(reshape2:::melt.data.frame(as.data.frame(x2)))
Using as id variables
# variable value
# 1 26492 0
# 2 26492 0
# 3 26492 0
# 4 26492 0
# 5 26492 0
# 6 26492 0
This works with your small example too.
df1 <- data.frame(x = rnorm(2), y = rnorm(2))
df1[1,1]<-NA
DT <- data.table(df1)
reshape2:::melt.data.frame(DT) ## ERROR
reshape2:::melt.data.frame(as.data.frame(DT)) ## NO ERROR
data.table:::melt.data.table(DT) ## Warning. NO ERROR
Update
If there is an ID var specified, it appears that this error does not occur:
df1 <- data.frame(matrix(rnorm(6), ncol = 3))
df1[1, 2] <- NA
df1
DT <- data.table(df1)
reshape2:::melt.data.frame(DT, id.vars="X1") ## NO ERROR
# X1 variable value
# 1 1.3586796 X2 NA
# 2 -0.1027877 X2 -0.05380504
# 3 1.3586796 X3 -1.37705956
# 4 -0.1027877 X3 -0.41499456
data.table:::melt.data.table(DT, id.vars="X1") ## NO ERROR
# X1 variable value
# 1: 1.3586796 X2 NA
# 2: -0.1027877 X2 -0.05380504
# 3: 1.3586796 X3 -1.37705956
# 4: -0.1027877 X3 -0.41499456