Related
I am calculating the correlarion between each variable with the target feature, in a dataframe. It works great aside from one variable, Age, which is not producing the correlation, instead I get an NA. I removed all NA values before even starting the analysis. So the data is clean.
This is the code: (PD is the target variable and I want to compare it with all other variables. PD is binary)
pearsons = c()
for (i in 1:length(colnames(Train_set))){
pearsons[i] = cor(Train_set[,i], Train_set$PD, method = 'pearson')
}
This is the data structre: (only some of it)
> glimpse(Train_set)
Rows: 1,219
Columns: 56
$ PD <dbl> 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,…
$ gender <int> 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2,…
$ cancer_type <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ Treatment <int> 5, 6, 6, 6, 5, 6, 5, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 5, 5, 6, 6, 6, 6, 5, 6, 5, 5, 6, 5,…
$ totaldata_new.Age <int> 50, 66, 51, 60, 31, 70, 51, 56, 65, 62, 55, 69, 32, 82, 60, 49, 56, 59, 50, 51, 70, 74…
$ Adipocytes <dbl> 0.000000000, 0.000000000, 0.005592077, 0.005844092, 0.038175712, 0.000000000, 0.005063…
$ B.cells <dbl> 0.045214394, 1.300478781, 0.184967801, 0.032890485, 0.041641426, 0.006477740, 0.653999…
$ Basophils <dbl> 0.120695085, 0.065615816, 0.362173522, 0.039214941, 0.225555640, 0.056926623, 0.019076…
totaldata_new.Age is the Age variable. I tried setting it as.numeric() and as.integer() but both didn't work.
This is the training set,
structure(list(PD = c(0, 0, 1, 1, 1, 1, 0, 0, 1, 1), gender = c(2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L), cancer_type = c(3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), Treatment = c(5L, 6L, 6L, 6L,
5L, 6L, 5L, 6L, 5L, 5L), totaldata_new.Age = c(50L, 66L, 51L,
60L, 31L, 70L, 51L, 56L, 65L, 62L), Adipocytes = c(0, 0, 0.00559207695850587,
0.00584409167696122, 0.0381757121622292, 0, 0.00506330308366599,
0, 0.0156430635414994, 0), B.cells = c(0.0452143935493372, 1.30047878079526,
0.184967800962064, 0.0328904854435036, 0.0416414264467815, 0.00647774047514386,
0.653999365837062, 0.0331653878504112, 0.0286461940371656, 0.0888471904628742
), Basophils = c(0.120695085116671, 0.0656158162440011, 0.362173521572841,
0.0392149412975555, 0.225555640419744, 0.0569266227666268, 0.0190762558461507,
0.0733199539844435, 0.20291673586147, 0.0757313145147394), CD4..memory.T.cells = c(0,
0.24081994997988, 0, 0.0084070550945875, 0, 0, 0.0704387567897827,
0, 0.0177784010286187, 0.00653794301542519), CD4..naive.T.cells = c(0,
0.222121262122827, 0, 0, 0, 0, 0.0337776019379054, 0, 0, 0)), row.names = c("Pt10",
"Pt101", "Pt103", "Pt106", "Pt11", "Pt17", "Pt18", "Pt26", "Pt27",
"Pt28"), class = "data.frame")
Why is this variable producing NA, while other variables give good results of the correlation?
Looks like there are NA values in the columns of interest.
To avoid this problem, there is the parameter 'use' in the 'cor'-function, which the help explains as:
"giving a method for computing covariances in the presence of missing
values."
I'd recommend changing your code to:
pearsons[i] = cor(Train_set[,i], Train_set$PD, method = 'pearson',
use = "complete.obs")
Hope that helps!
Samuel
I realize that I may ask this question to a void because wnominate package is not of a wide use, but...
I have data on evaluation of state policies. I would like to know the potential choice polarization using W-NOMINATE Method. I have prepared my data according to the wnominate package vignette (p. 9, 11). But when I try to apply the method to my data, I receive a very strange error:
Error in wnominate(rc_samp, polarity = c(2, 7)) :
Data contains values other than 1 or 6 or 9.
And I do not understand what I am asked about. Can anybody clarify, what I am doing wrong? It will be a miracle if someone can help me with this package.
Data and code:
respNames <- samp$id
codeData <- matrix(samp$code, length(samp$code), 1)
colnames(codeData) <- "code"
samp <- samp[, -c(1,2)]
rc_samp <- rollcall(samp, yea = c(5,6,7), nay = c(1,2,3), missing = 4,
notInLegis = 88, legis.names = respNames, legis.data = codeData, desc = "Ideological polarization")
samp_result <- wnominate(rc_samp, polarity = c(7,7))
structure(list(id = structure(1:100, .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81",
"82", "83", "84", "85", "86", "87", "88", "89", "90", "91", "92",
"93", "94", "95", "96", "97", "98", "99", "100", "101", "102",
"103", "104", "105", "106", "107", "108", "109", "110", "111",
"112", "113", "114", "115", "116", "117", "118", "119", "120",
"121", "122", "123", "124", "125", "126", "127", "128", "129",
"130", "131", "132", "133", "134", "135", "136", "137", "138",
"139", "140", "141", "142", "143", "144", "145", "146", "147",
"148", "149", "150", "151", "152", "153", "154", "155", "156",
"157", "158", "159", "160", "161", "162", "163", "164", "165",
"166", "167", "168", "169", "170", "171", "172", "173", "174",
"175", "176", "177", "178", "179", "180", "181", "182", "183",
"184", "185", "186", "187", "188", "189", "190", "191", "192",
"193", "194", "195", "196", "197", "198", "199", "200", "201",
"202", "203", "204", "205", "206", "207", "208", "209", "210",
"211", "212", "213", "214", "215", "216", "217", "218", "219",
"220", "221", "222", "223", "224", "225", "226", "227", "228",
"229", "230", "231", "232", "233", "234", "235", "236", "237",
"238", "239", "240", "241", "242", "243", "244", "245", "246",
"247", "248", "249", "250", "251", "252", "253", "254", "255",
"256", "257", "258", "259", "260", "261", "262", "263", "264",
"265", "266", "267", "268", "269", "270", "271", "272", "273",
"274", "275", "276", "277", "278", "279", "280", "281", "282",
"283", "284", "285", "286", "287", "288", "289", "290", "291",
"292", "293", "294", "295", "296", "297", "298", "299", "300",
"301", "302", "303", "304", "305", "306", "307", "308", "309",
"310", "311", "312", "313", "314", "315", "316", "317", "318",
"319", "320", "321", "322", "323", "324", "325", "326", "327",
"328", "329", "330", "331", "332", "333", "334", "335", "336",
"337", "338", "339", "340", "341", "342", "343", "344", "345",
"346", "347", "348"), class = "factor"), code = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("control", "treatment_1", "treatment_2"
), class = "factor"), ideol_samesexmarriage = c(2, 7, 1, 2, 1,
7, 6, 1, 88, 1, 6, 6, 4, 1, 6, 1, 1, 88, 1, 88, 7, 6, 4, 6, 6,
6, 5, 7, 6, 3, 7, 4, 7, 7, 5, 5, 7, 7, 3, 6, 1, 7, 1, 1, 1, 7,
7, 7, 7, 7, 1, 88, 1, 7, 4, 7, 5, 5, 3, 7, 4, 7, 7, 88, 7, 7,
7, 4, 6, 6, 6, 6, 7, 7, 1, 7, 7, 88, 2, 2, 7, 7, 7, 1, 7, 5,
7, 7, 7, 1, 7, 5, 6, 5, 5, 7, 4, 7, 88, 2), ideol_flattaxes = c(5,
7, 1, 2, 2, 1, 6, 6, 2, 1, 6, 2, 4, 5, 1, 1, 6, 1, 4, 2, 3, 1,
1, 1, 4, 1, 2, 3, 1, 1, 1, 1, 3, 5, 4, 1, 88, 7, 5, 4, 1, 4,
1, 5, 3, 4, 4, 7, 2, 2, 1, 3, 5, 1, 4, 4, 5, 3, 3, 1, 3, 6, 1,
4, 3, 1, 2, 2, 2, 4, 6, 7, 3, 1, 5, 7, 1, 7, 1, 6, 7, 3, 6, 2,
2, 4, 1, 1, 1, 6, 2, 4, 5, 1, 5, 1, 2, 1, 1, 2), ideol_progrtaxes = c(2,
1, 7, 5, 88, 5, 1, 1, 7, 7, 2, 6, 6, 1, 5, 5, 1, 6, 6, 5, 6,
5, 6, 7, 4, 7, 5, 6, 5, 7, 7, 7, 5, 4, 3, 88, 6, 1, 4, 4, 1,
4, 5, 2, 7, 5, 7, 1, 7, 7, 7, 2, 4, 7, 4, 7, 5, 6, 3, 7, 5, 1,
7, 4, 6, 7, 6, 5, 7, 5, 1, 2, 5, 7, 3, 2, 7, 2, 7, 2, 1, 5, 1,
7, 88, 3, 7, 7, 7, 3, 7, 5, 2, 7, 3, 7, 5, 5, 1, 6), ideol_democracy = c(4,
7, 7, 3, 4, 7, 5, 1, 7, 6, 6, 5, 4, 7, 5, 6, 6, 88, 1, 6, 7,
5, 6, 6, 6, 2, 4, 3, 5, 6, 6, 2, 7, 6, 5, 7, 7, 3, 6, 4, 6, 7,
2, 4, 6, 7, 7, 7, 7, 6, 6, 6, 5, 6, 4, 5, 5, 6, 7, 7, 2, 5, 7,
4, 6, 7, 6, 4, 6, 4, 5, 6, 4, 7, 1, 6, 6, 88, 5, 6, 5, 4, 2,
3, 4, 5, 7, 3, 7, 6, 6, 5, 6, 7, 5, 7, 4, 4, 1, 7), ideol_dictatorship = c(5,
4, 5, 3, 1, 1, 5, 7, 2, 88, 6, 2, 7, 5, 2, 6, 3, 5, 7, 1, 1,
7, 2, 6, 1, 7, 1, 5, 7, 2, 1, 4, 1, 2, 1, 1, 1, 1, 7, 3, 7, 1,
6, 7, 1, 1, 1, 1, 1, 1, 7, 1, 1, 2, 3, 1, 6, 1, 1, 1, 3, 4, 1,
4, 1, 1, 2, 1, 3, 2, 3, 2, 4, 1, 7, 1, 3, 4, 3, 1, 2, 3, 7, 4,
7, 3, 1, 1, 1, 3, 1, 3, 1, 6, 2, 1, 5, 6, 1, 1), ideol_goveconomy = c(2,
2, 1, 2, 1, 2, 3, 1, 1, 3, 3, 2, 4, 1, 2, 1, 5, 1, 7, 1, 2, 4,
1, 6, 1, 6, 2, 3, 4, 1, 6, 2, 2, 2, 1, 2, 2, 4, 3, 1, 1, 1, 5,
2, 2, 2, 1, 1, 5, 5, 1, 1, 1, 1, 3, 1, 2, 1, 1, 88, 3, 2, 1,
4, 1, 3, 1, 6, 1, 2, 1, 1, 1, 4, 1, 1, 1, 1, 2, 1, 2, 1, 7, 5,
2, 4, 3, 1, 1, 2, 1, 4, 2, 1, 2, 6, 2, 4, 3, 3), ideol_govpaternalism = c(3,
4, 5, 4, 88, 3, 5, 1, 6, 7, 7, 5, 4, 88, 5, 4, 3, 6, 1, 5, 6,
2, 4, 4, 6, 7, 6, 6, 2, 7, 6, 7, 5, 5, 4, 88, 6, 6, 5, 5, 6,
7, 3, 3, 7, 2, 7, 5, 7, 5, 3, 6, 6, 6, 5, 5, 6, 7, 2, 88, 5,
6, 7, 4, 6, 6, 3, 5, 2, 5, 3, 2, 88, 7, 5, 6, 5, 2, 5, 4, 4,
5, 6, 4, 4, 6, 6, 6, 3, 4, 7, 5, 6, 6, 6, 6, 4, 5, 88, 6), ideol_govfreeimmigration = c(3,
4, 1, 2, 1, 2, 4, 1, 4, 1, 4, 3, 1, 2, 2, 1, 1, 4, 1, 3, 6, 2,
1, 2, 2, 5, 2, 3, 2, 6, 5, 2, 5, 3, 1, 2, 2, 2, 2, 1, 1, 6, 1,
2, 3, 2, 5, 2, 3, 1, 3, 4, 3, 4, 2, 2, 4, 3, 3, 1, 1, 88, 6,
2, 1, 5, 3, 3, 3, 4, 2, 5, 4, 7, 1, 3, 4, 2, 2, 1, 4, 4, 4, 1,
2, 4, 4, 5, 6, 2, 1, 4, 4, 4, 4, 6, 2, 2, 88, 2), ideol_govimmigration = c(5,
6, 7, 6, 88, 5, 1, 7, 4, 88, 6, 5, 7, 88, 6, 7, 7, 88, 7, 88,
4, 88, 7, 5, 6, 5, 6, 5, 88, 3, 4, 6, 4, 6, 7, 7, 6, 4, 6, 6,
88, 5, 7, 6, 5, 4, 3, 5, 4, 6, 5, 4, 6, 4, 4, 6, 6, 5, 7, 88,
7, 4, 7, 4, 6, 3, 5, 3, 5, 5, 6, 5, 88, 1, 7, 7, 4, 4, 6, 7,
6, 6, 5, 7, 88, 3, 3, 3, 2, 6, 6, 5, 4, 6, 5, 5, 4, 88, 1, 6),
ideol_commongoals = c(3, 2, 1, 2, 2, 2, 2, 4, 2, 6, 2, 3,
4, 6, 2, 1, 2, 2, 7, 4, 4, 2, 2, 88, 1, 6, 1, 6, 2, 1, 7,
6, 1, 3, 2, 1, 2, 1, 5, 4, 6, 1, 4, 4, 2, 4, 4, 1, 2, 3,
7, 5, 2, 1, 2, 4, 88, 88, 1, 3, 5, 88, 5, 4, 3, 5, 5, 2,
3, 4, 88, 2, 4, 88, 1, 4, 3, 3, 3, 1, 2, 3, 7, 3, 4, 4, 1,
1, 1, 3, 1, 4, 4, 2, 4, 2, 4, 5, 1, 3), ideol_privatefreedom = c(5,
88, 7, 5, 7, 4, 3, 3, 5, 1, 6, 5, 4, 2, 5, 7, 6, 7, 1, 3,
3, 5, 5, 88, 7, 2, 6, 3, 5, 7, 2, 2, 7, 5, 5, 88, 6, 7, 3,
4, 1, 7, 4, 4, 3, 5, 5, 7, 2, 5, 1, 3, 6, 7, 4, 4, 3, 88,
7, 6, 3, 4, 5, 4, 5, 4, 4, 7, 7, 3, 88, 6, 6, 88, 7, 3, 5,
2, 4, 7, 4, 5, 1, 3, 5, 4, 5, 7, 5, 5, 5, 4, 5, 2, 5, 6,
5, 3, 88, 3), ideol_goveconomy_lib = c(5, 2, 4, 2, 6, 3,
1, 5, 2, 1, 5, 5, 4, 5, 2, 6, 2, 7, 1, 5, 4, 5, 3, 4, 2,
2, 6, 4, 5, 7, 2, 1, 2, 3, 5, 5, 88, 4, 3, 5, 5, 6, 1, 3,
2, 4, 5, 5, 5, 1, 5, 88, 5, 6, 4, 3, 2, 1, 2, 88, 5, 5, 1,
4, 6, 4, 1, 3, 4, 3, 6, 5, 4, 1, 7, 5, 1, 1, 2, 6, 3, 7,
1, 1, 3, 4, 1, 5, 4, 6, 6, 4, 6, 3, 3, 3, 2, 3, 1, 4)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
I contacted with the package creator and we came to the (though unilateral) conclusion that WNOMINATE method is applicable to the data with big number of columns.
Rollcall votes should be coded as either 1, 6, or 9.
I'm curretly learning R with a book and was trying a mutate_at function from dplyr. In this example I want to standardize the survey items on a scale from 0 to 1. To do this, we can divide each value by the (theoretical) maximum value of the scale.
The book example stats_test from the package "pradadata" works perfectly fine:
data(stats_test, package = "pradadata")
stats_test %>%
drop_na() %>%
mutate_at(.vars = vars(study_time, self_eval, interest),
.funs = funs(prop = ./max(.))) %>%
select(contains("_prop"))
Output:
study_time_prop self_eval_prop interest_prop
<dbl> <dbl> <dbl>
1 0.6 0.7 0.667
2 0.8 0.8 0.833
3 0.6 0.4 0.167
4 0.8 0.7 0.833
5 0.4 0.6 0.5
6 0.4 0.6 0.667
7 0.8 0.6 0.5
8 0.2 0.7 0.667
9 0.6 0.8 0.833
10 0.6 0.7 0.833
# ... with 1,617 more rows
Tried the same code with my own data but it doesn't work and I can't figure out why. The variable RG04 from my data has a range from 1-5. I tried to transform the variable from numeric to integer, because the variables from the the data stats_test are integer too:
df_literacy_2 <- transform(df_literacy, RG04 = as.integer(RG04))
df_literacy_2 <- tibble(df_literacy_2)
df_literacy_2 %>%
drop_na() %>%
mutate_at(.vars = vars(RG04),
.funs = funs(prop = ./max(.))) %>%
select(contains("_prop"))
Output:
# A tibble: 0 x 0
Warning messages:
1: Problem with `mutate()` input `prop`.
i no non-missing arguments to max; returning -Inf
i Input `prop` is `RG04/max(RG04)`.
2: In base::max(x, ..., na.rm = na.rm) :
no non-missing arguments to max; returning -Inf
str(df_literacy_2$RG04)
int [1:630] 2 4 2 1 2 2 1 3 1 3 ...
Why doesn't it work on my data?
Thank you for your help.
Edit with sample of df_literacy:
> dput(head(df_literacy,20))
structure(list(CASE = c(40, 41, 44, 45, 48, 49, 54, 55, 56, 57,
58, 61, 62, 63, 64, 65, 66, 67, 68, 69), SERIAL = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), REF = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), QUESTNNR = c("base", "base",
"base", "base", "base", "base", "base", "base", "base", "base",
"base", "base", "base", "base", "base", "base", "base", "base",
"base", "base"), MODE = c("interview", "interview", "interview",
"interview", "interview", "interview", "interview", "interview",
"interview", "interview", "interview", "interview", "interview",
"interview", "interview", "interview", "interview", "interview",
"interview", "interview"), STARTED = structure(c(1607290462,
1607290608, 1607291086, 1607291118, 1607291265, 1607291793, 1607294071,
1607294336, 1607294337, 1607294419, 1607294814, 1607296474, 1607301809,
1607329348, 1607333933, 1607335996, 1607336207, 1607336378, 1607343194,
1607343414), tzone = "UTC", class = c("POSIXct", "POSIXt")),
EI01 = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Ja",
"Nein", "Nicht beantwortet"), class = "factor"), EI02 = c(2,
2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3),
RF01 = c(4, 2, 4, 3, 4, 4, 1, 3, 2, 3, 4, 3, 2, 3, 2, 2,
4, 2, 5, 3), RF02 = c(1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1,
1, 1, 2, 2, 2, 2, 2, 2), RF03 = c(1, 2, 2, 2, 1, 2, 1, 1,
1, 1, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2), RG01 = c(2, 2, 2, 2,
2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2), RG02 = c(3,
3, 3, 3, 4, 3, 4, 2, 4, 2, 3, 4, 4, 2, 4, 3, 4, 3, 4, 4),
RG03 = c(3, 2, 2, 3, 3, 3, 1, 3, 1, 2, 3, 1, 2, 2, 1, 3,
2, 3, 2, 2), RG04 = c(2, 4, 2, 1, 2, 2, 1, 3, 1, 3, 2, 4,
1, 1, 1, 1, 1, 2, 4, 1), RG05 = c(1, 1, 1, 1, 1, 1, 1, 2,
1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1), SD01 = structure(c(2L,
1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("weiblich", "männlich", "divers",
"nicht beantwortet"), class = "factor"), SD03 = c(4, 3, 2,
2, 1, 2, 4, 4, 1, 4, 3, 1, 2, 3, 2, 4, 2, 3, 1, 3), SD05_01 = c(23,
22, 22, 21, 18, 22, 21, 27, 17, 22, 17, 21, 21, 22, 50, 25,
23, 20, 23, 23), TIME001 = c(2, 3, 23, 73, 29, 2, 3, 3, 29, 7,
50, 55, 3, 2, 10, 2, 1, 5, 7, 35), TIME002 = c(2, 2, 16,
34, 12, 14, 2, 2, 21, 2, 30, 24, 21, 3, 3, 2, 3, 2, 3, 22
), TIME003 = c(34, 8, 12, 15, 13, 12, 12, 7, 13, 11, 16,
10, 11, 16, 8, 8, 7, 8, 11, 14), TIME004 = c(60, 33, 25,
31, 45, 25, 14, 13, 38, 35, 50, 50, 37, 32, 32, 25, 72, 55,
28, 29), TIME005 = c(84, 21, 29, 41, 54, 33, 30, 22, 32,
42, 44, 23, 65, 30, 28, 32, 51, 31, 27, 44), TIME006 = c(14,
9, 27, 11, 24, 8, 8, 9, 18, 12, 35, 33, 27, 46, 11, 15, 8,
14, 12, 14), TIME007 = c(3, 18, 3, 5, 6, 2, 9, 2, 3, 3, 6,
7, 3, 13, 4, 4, 378, 3, 4, 10), TIME_SUM = c(199, 94, 135,
142, 183, 96, 78, 58, 154, 112, 186, 152, 167, 142, 96, 88,
146, 118, 92, 168), MAILSENT = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
LASTDATA = structure(c(1607290661, 1607290702, 1607291221,
1607291328, 1607291448, 1607291889, 1607294149, 1607294394,
1607294491, 1607294531, 1607295045, 1607296676, 1607301976,
1607329490, 1607334030, 1607336084, 1607336727, 1607336496,
1607343286, 1607343582), tzone = "UTC", class = c("POSIXct",
"POSIXt")), FINISHED = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), Q_VIEWER = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), LASTPAGE = c(7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7),
MAXPAGE = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7), MISSING = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 0, 7, 7, 7), MISSREL = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1), TIME_RSI = c("46023",
"14246", "0.75", "0.63", "0.54", "12055", "17533", "30682",
"0.7", "44197", "0.45", "0.58", "0.83", "44378", "44501",
"18629", "46753", "46388", "44197", "0.57"), DEG_TIME = c(27,
27, 3, 1, 0, 23, 30, 42, 2, 17, 0, 2, 7, 18, 10, 27, 43,
18, 8, 0)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Edit with TRUE and FALSE NAs:
> sapply(df_literacy, function(a) table(c(T,F,is.na(a)))-1)
CASE SERIAL REF QUESTNNR MODE STARTED EI01 EI02 RF01 RF02 RF03 RG01 RG02 RG03 RG04 RG05 SD01 SD03 SD05_01 TE03_01 TIME001 TIME002 TIME003
FALSE 630 0 0 630 630 630 630 630 630 630 630 630 630 630 630 630 629 629 615 99 630 630 630
TRUE 0 630 630 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 15 531 0 0 0
TIME004 TIME005 TIME006 TIME007 TIME_SUM MAILSENT LASTDATA FINISHED Q_VIEWER LASTPAGE MAXPAGE MISSING MISSREL TIME_RSI DEG_TIME
FALSE 630 630 629 625 630 0 630 630 630 630 630 630 630 630 630
TRUE 0 0 1 5 0 630 0 0 0 0 0 0 0 0 0
There are a few things to correct here.
drop_na() is removing all of your data.
drop_na(df_literacy)
# # A tibble: 0 x 37
# # ... with 37 variables: CASE <dbl>, SERIAL <lgl>, REF <lgl>, QUESTNNR <chr>,
# # MODE <chr>, STARTED <dttm>, EI01 <fct>, EI02 <dbl>, RF01 <dbl>, RF02 <dbl>,
# # RF03 <dbl>, RG01 <dbl>, RG02 <dbl>, RG03 <dbl>, RG04 <dbl>, RG05 <dbl>,
# # SD01 <fct>, SD03 <dbl>, SD05_01 <dbl>, TIME001 <dbl>, TIME002 <dbl>,
# # TIME003 <dbl>, TIME004 <dbl>, TIME005 <dbl>, TIME006 <dbl>, TIME007 <dbl>,
# # TIME_SUM <dbl>, MAILSENT <lgl>, LASTDATA <dttm>, FINISHED <dbl>,
# # Q_VIEWER <dbl>, LASTPAGE <dbl>, MAXPAGE <dbl>, MISSING <dbl>,
# # MISSREL <dbl>, TIME_RSI <chr>, DEG_TIME <dbl>
The problem is that you have several columns that are completely NA, namely SERIAL, REF, and MAILSENT.
sapply(df_literacy, function(a) table(c(T,F,is.na(a)))-1)
# CASE SERIAL REF QUESTNNR MODE STARTED EI01 EI02 RF01 RF02 RF03 RG01 RG02
# FALSE 20 0 0 20 20 20 20 20 20 20 20 20 20
# TRUE 0 20 20 0 0 0 0 0 0 0 0 0 0
# RG03 RG04 RG05 SD01 SD03 SD05_01 TIME001 TIME002 TIME003 TIME004 TIME005
# FALSE 20 20 20 20 20 20 20 20 20 20 20
# TRUE 0 0 0 0 0 0 0 0 0 0 0
# TIME006 TIME007 TIME_SUM MAILSENT LASTDATA FINISHED Q_VIEWER LASTPAGE
# FALSE 20 20 20 0 20 20 20 20
# TRUE 0 0 0 20 0 0 0 0
# MAXPAGE MISSING MISSREL TIME_RSI DEG_TIME
# FALSE 20 20 20 20 20
# TRUE 0 0 0 0 0
Drop the drop_na(), or at least drop_na(-SERIAL, -REF, -MAILSENT).
Your code is using funs, which has been deprecated since dplyr-0.8.0.
# Warning: `funs()` is deprecated as of dplyr 0.8.0.
# Please use a list of either functions or lambdas:
# # Simple named list:
# list(mean = mean, median = median)
# # Auto named with `tibble::lst()`:
# tibble::lst(mean, median)
# # Using lambdas
# list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
While this isn't causing an error, it is causing a warning (and will likely stop working at some point. Change your mutate_at to be:
mutate_at(.vars = vars(RG04, RF02),
.funs = list(prop = ~ . / max(.)))
You are using a single variable within .vars and a single function within .funs, so the column names are preserved as-is (and you will not see a _prop column). From ?mutate_at:
The names of the new columns are derived from the names of the
input variables and the names of the functions.
• if there is only one unnamed function (i.e. if '.funs' is an
unnamed list of length one), the names of the input variables
are used to name the new columns;
• for _at functions, if there is only one unnamed variable
(i.e., if '.vars' is of the form 'vars(a_single_column)') and
'.funs' has length greater than one, the names of the
functions are used to name the new columns;
• otherwise, the new names are created by concatenating the
names of the input variables and the names of the functions,
separated with an underscore '"_"'.
If you aren't going to add more variables and functions, then you need to self-name it in the call, as in mutate_at(.vars = vars(RG04 = RG04), ...). Oddly enough, this causes it to produce RG04_prop.
If we fix all of those, then it works.
df_literacy %>%
drop_na(-SERIAL, -REF, -MAILSENT) %>%
mutate_at(.vars = vars(RG04 = RG04),
.funs = list(prop = ~ ./max(.))) %>%
select(contains("_prop")) %>%
head(3)
# A tibble: 3 x 1
# RG04_prop
# <dbl>
# 1 0.5
# 2 1
# 3 0.5
hth1 is a data frame that I already have.
> hth1
Source: local data frame [13 x 14]
Groups: team [13]
team CSK DC DD GL KKR KTK KXIP MI PW RCB RPSG
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CSK 0 8 11 0 11 2 9 10 4 10 0
2 DC 2 0 8 0 2 1 7 5 3 8 0
3 DD 5 3 0 0 7 2 8 5 2 10 2
4 GL 0 0 2 0 0 0 0 0 0 1 0
5 KKR 5 7 10 2 0 0 5 10 3 15 0
6 KTK 0 0 0 0 2 0 1 0 1 2 0
7 KXIP 8 3 10 2 14 0 0 11 2 6 1
8 MI 12 5 13 2 8 1 7 0 3 11 1
9 PW 2 1 4 0 2 0 4 3 0 1 0
10 RCB 9 3 7 2 3 0 12 8 4 0 1
11 RPSG 0 0 0 2 2 0 1 1 0 1 0
12 RR 8 2 7 0 14 1 7 6 2 7 0
13 SH 3 0 4 0 5 0 4 5 2 5 2
# ... with 2 more variables: RR <dbl>, SH <dbl>
Why do the data frame returned by bind_rows() and the original data frame differ?
> h <- list(hth1)
> hth_b1 <- bind_rows(h)
> identical(hth1, hth_b1)
[1] FALSE
> class(hth_b1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> class(hth1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> setequal(hth1, hth_b1)
TRUE
> anti_join(hth1, hth_b1)
Joining, by = c("team", "CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH")
Source: local data frame [0 x 14]
Groups: team [13]
# ... with 14 variables: team <chr>, CSK <dbl>, DC <dbl>, DD <dbl>, GL <dbl>,
# KKR <dbl>, KTK <dbl>, KXIP <dbl>, MI <dbl>, PW <dbl>, RCB <dbl>,
# RPSG <dbl>, RR <dbl>, SH <dbl>
What am I missing? I have been stuck here for a long time.
Update 1:
As requested by Benjamin, I dput() function on both dataframes. Here is the output.
> dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
>
> dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
There is a difference in the output for both of them, there is an extra drop = TRUE for hth1.
I don't understand why it is not there in the other one.
A reproducible example:
library(tidyverse)
test1 <- mtcars %>% group_by(cyl)
test2 <- bind_rows(list(test1))
identical(test1, test2) #FALSE
all_equal(test1, test2) #TRUE
You can check both their attributes and you can see the rownames differ:
rownames(test1)
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
[4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
[7] "Duster 360" "Merc 240D" "Merc 230"
[10] "Merc 280" "Merc 280C" "Merc 450SE"
[13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
[16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
[19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
[22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
[25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
[28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
[31] "Maserati Bora" "Volvo 142E"
rownames(test2)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13"
[14] "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26"
[27] "27" "28" "29" "30" "31" "32"
Never expect tibbles to treat your rownames with respect, they may be silently dropped at any time.
Forgive the formatting on this answer, but it would appear that you have labels attached to one object, and not in the other. Where the labels got attached or removed isn't something I can know without looking at code that generates the objects. I've bolded the difference in your objects below.
Note: not formatting this as code is a deliberate choice. Formatting as code prevents me from marking the difference in the structure in bold text
dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L , labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
In the example below, I will add labels to the mtcars data frame, then run it through bind_rows, and you'll see that the labels are no longer present. This is what I believe is happening to your data.
library(Hmisc)
mtcars2 <- mtcars
label(mtcars2, self = FALSE) <- toupper(names(mtcars))
library(dplyr)
mtcars3 <- bind_rows(mtcars2)
identical(mtcars2, mtcars3)
label(mtcars3)
This question already has answers here:
How can I get the average (mean) of selected columns
(3 answers)
Closed 5 years ago.
I'm trying to average across columns, however, because some of the columns are missing data, the average ends of being NA as well.
Is there a way to find the mean of a number of columns while excluding any NA data from the calculation?
The code I've used so far is:
### Calculate Bins ###
{pulse<-transmute(pulse, Question, Type, Student,Bin1=(Rt1+ Rt2 + Rt3+ Rt4)/4 , Bin2= (Rt5+Rt6+Rt7+Rt8)/4 , Bin3= (Rt9+Rt10+Rt11)/3)
}
However, I don't think this is the best way.My goal is to have three columns with the means of Rt1-Rt4, Rt5-Rt8 and Rt9-Rt11. i.e. something like this:
Question Type Student Bin1 Bin2 Bin3
1 Q SNR 789331 4.25 4.00 4.666667
2 Q2 SNR 789331 3.75 2.50 3.000000
3 Q8 SNR 789331 4.00 2.50 3.333333
4 Q10 SNR 789331 4.00 2.75 3.333333
5 Q12 SNR 789331 3.50 3.25 3.666667
Any help would be appreciated!
My data is attached below:
> dput(pulse)
structure(list(Question = c("Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12", "Q",
"Q2", "Q8", "Q10", "Q12", "Q", "Q2", "Q8", "Q10", "Q12"), Type = c("SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR", "SNR",
"SNR", "SNR", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS", "FYS",
"FYS", "FYS", "FYS", "FYS", "FYS", "FYS"), Student = c("789331",
"789331", "789331", "789331", "789331", "805933", "805933", "805933",
"805933", "805933", "826523", "826523", "826523", "826523", "826523",
"832929", "832929", "832929", "832929", "832929", "838607", "838607",
"838607", "838607", "838607", "841903", "841903", "841903", "841903",
"841903", "843618", "843618", "843618", "843618", "843618", "852125",
"852125", "852125", "852125", "852125", "876406", "876406", "876406",
"876406", "876406", "879972", "879972", "879972", "879972", "879972",
"885650", "885650", "885650", "885650", "885650", "888712", "888712",
"888712", "888712", "888712", "903303", "903303", "903303", "903303",
"903303", "796882", "796882", "796882", "796882", "796882", "827911",
"827911", "827911", "827911", "827911", "830271", "830271", "830271",
"830271", "830271", "831487", "831487", "831487", "831487", "831487",
"834598", "834598", "834598", "834598", "834598", "836364", "836364",
"836364", "836364", "836364", "839802", "839802", "839802", "839802",
"839802", "855524", "855524", "855524", "855524", "855524", "873527",
"873527", "873527", "873527", "873527", "885409", "885409", "885409",
"885409", "885409", "894218", "894218", "894218", "894218", "894218",
"928026", "928026", "928026", "928026", "928026", "932196", "932196",
"932196", "932196", "932196", "955389", "955389", "955389", "955389",
"955389", "956952", "956952", "956952", "956952", "956952", "957206",
"957206", "957206", "957206", "957206", "957759", "957759", "957759",
"957759", "957759", "959200", "959200", "959200", "959200", "959200",
"962490", "962490", "962490", "962490", "962490", "968728", "968728",
"968728", "968728", "968728", "969005", "969005", "969005", "969005",
"969005", "971179", "971179", "971179", "971179", "971179", "976863",
"976863", "976863", "976863", "976863", "981621", "981621", "981621",
"981621", "981621", "952797", "952797", "952797", "952797", "952797",
"965873", "965873", "965873", "965873", "965873", "967416", "967416",
"967416", "967416", "967416", "975424", "975424", "975424", "975424",
"975424"), Rt1 = c(4, 3, 4, 4, 3, 5, 4, 5, 5, 5, 4, 4, 4, 5,
5, 4, 4, 4, 4, 3, 5, 5, 5, 5, 5, 2, 3, 4, 3, 4, 4, 5, 5, 4, 4,
3, 3, 3, 4, 3, 3, 3, 4, 4, 4, 3, 4, 5, 4, 3, 4, 4, 4, 3, 5, 4,
4, 4, 5, 5, 3, 4, 4, 4, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 4, 5, 3, 4, 4, 4, 3, 3, 5, 4, 4, 2, 2, 3, 4, NA, NA,
NA, NA, NA, 3, 4, 4, 4, 3, NA, NA, NA, NA, NA, 5, 4, 5, 4, 4,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4, 4, 3, 3, 4, 1, 3,
4, 5, 4, 4, 4, 5, 4, 4, NA, NA, NA, NA, NA), Rt2 = c(4, 4, 4,
4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 5, 5, 4, 4, 4, 4, 3, 5, 5, 5, 5,
5, 4, 4, 4, 4, 5, 4, 4, 5, 5, 4, NA, NA, NA, NA, NA, 4, 4, 4,
4, 4, 3, 4, 4, 5, 3, 4, 4, 4, 5, 5, 4, 4, 4, 4, 4, 1, 5, 5, 5,
3, 3, 5, 5, 5, 4, 5, 4, 3, 4, 5, 4, 5, 5, 5, 4, 4, 5, 4, 5, 4,
5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 3, 4, 3, 4, 3, 5, 5, 5, 5, 5, 3,
5, 4, 4, 3, 4, 5, 5, 5, 5, 4, 4, 4, 5, 5, 4, 5, 5, 5, 4, 4, 2,
2, 4, 4, 5, 5, 5, 5, 5, 3, 4, 4, 5, 5, 5, 5, 3, 5, 4, 5, 4, 4,
5, 4, 5, 2, 3, 4, 3, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 3, 4,
3, 5, 5, 5, 5, 4, 5, 5, 5, 3, 4, 4, 5, 5, 5, 5, NA, NA, NA, NA,
NA, NA, 4, 5, 5, 5, NA, NA, NA, NA, NA, 4, 4, 4, 4, 4), Rt3 = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 5, 5, 4, 4, 4, 4, 3, 5, 5,
5, 5, 5, 4, 5, 4, 4, 4, 5, 4, 5, 5, 4, 4, 4, 4, 4, 3, 4, 3, 4,
5, 5, 3, 4, 4, 4, 4, 3, 4, 4, 4, 5, NA, NA, NA, NA, NA, 3, 5,
5, 5, 5, 3, 4, 5, 5, 3, 4, 3, 3, 4, 4, 4, 5, 5, 5, 5, 4, 5, 4,
4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 1, 3, 1, 4, 1, 4, 5, 5, 5,
4, 4, 4, 4, 4, 3, 4, 5, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 5, 4, 5,
NA, NA, NA, NA, NA, 4, 4, 5, 5, 5, NA, NA, NA, NA, NA, 5, 4,
4, 4, 3, 5, 4, 4, 5, 4, NA, NA, NA, NA, NA, 5, 4, 3, 5, 4, 3,
4, 4, 4, 3, 5, 5, 4, 4, 5, 5, 4, 4, 5, 4, NA, 5, 5, 5, 5, 5,
4, 4, 5, 5, NA, NA, NA, NA, NA, 5, 5, 5, 5, 5, 5, 5, 4, 3, 4,
3, 4, 3, 3, 4), Rt4 = c(5, 4, 4, 4, 4, 4, 4, 3, 4, 3, 4, 4, 4,
5, 5, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, NA, NA, NA, NA, NA, 5, 4,
4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, NA, NA, NA, NA, NA, 4,
4, 4, 3, 5, 4, 4, 4, 4, 5, 3, 4, 4, 4, 5, 3, 4, 5, 5, 3, NA,
NA, NA, NA, NA, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 1, 1, 2, 3, 2, 4, 5, 5, 5, 4, 4, 4, 4, 4, 5, 4,
5, 5, 5, 5, 5, 5, 4, 4, 5, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
5, 4, 4, 5, 4, NA, NA, NA, NA, NA, 4, 4, 5, 4, 4, 4, 3, 3, 4,
3, 5, 4, 4, 4, 5, NA, NA, NA, NA, NA, 5, 4, 3, 3, 4, NA, NA,
NA, NA, NA, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), Rt5 = c(3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 4,
5, 4, 5, 5, 2, 4, 4, 4, 4, 5, 5, 5, 5, 5, 4, 4, 4, 3, 3, 5, 4,
4, 4, 5, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 3, 4, 4, 4, 5,
4, 4, 4, 4, 5, 4, 5, NA, NA, NA, NA, NA, 3, 2, 4, 4, 1, 3, 2,
3, 5, 4, 5, 5, 5, 5, 5, 4, 5, 4, 5, 4, 4, 4, 4, 4, 5, 3, 4, 3,
4, 4, 5, 4, 3, 4, 5, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 4,
4, 5, 5, 5, 5, 5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4,
3, 3, 5, 5, NA, NA, NA, NA, NA, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 2, 2, 4, 4, 5, 4, 4, 4, 4, 3, 3, 4, 4, 3, NA, NA, NA, NA,
NA, 5, 5, 4, 4, 4, NA, NA, NA, NA, NA, 5, 5, 5, 5, 5, 5, 4, 4,
4, 5, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, NA, NA, NA, NA, NA), Rt6 = c(4,
2, 2, 1, 3, 4, 3, 3, 3, 3, 4, 5, 5, 4, 5, NA, NA, NA, NA, NA,
5, 4, 4, 4, 5, NA, NA, NA, NA, NA, 5, 4, 4, 4, 5, 3, 3, 4, 4,
4, 4, 3, 2, 1, 2, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, 5, 5, 4, 4,
3, 4, 4, 3, 3, 5, 3, 2, 3, 5, 4, 3, 3, 4, 3, 5, 4, 4, 4, 5, NA,
NA, NA, NA, NA, 4, 4, 4, 4, 4, 3, 4, 3, 3, 3, 2, 2, 3, 2, 2,
4, 4, 5, 4, 5, NA, NA, NA, NA, NA, 4, 5, 5, 4, 4, 5, 5, 5, 5,
5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
3, 2, 4, 3, 4, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 4, 4,
5, 4, 5, 5, 3, 3, 3, 3, 3, NA, NA, NA, NA, NA, NA, 5, 4, 4, 4,
NA, NA, NA, NA, NA, 5, 3, 4, 4, 5, 4, 3, 4, 4, 3, 4, 4, 4, 3,
4, 4, 4, 5, 4, 5, NA, NA, NA, NA, NA), Rt7 = c(5, 2, 2, 3, 3,
4, 3, 3, 3, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, 4,
4, 4, 4, 4, 4, 3, 4, 5, 5, 4, 4, 4, 5, 3, 4, 3, 4, 4, 4, 3, 2,
2, 3, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 5, 4, 5, 4, 5, 3, 4, 4, 4,
4, 4, 3, 1, 1, 5, NA, NA, NA, NA, NA, 5, 5, 4, 5, 5, 4, 5, 4,
4, 4, 4, 4, 4, 4, 4, 3, 4, 3, 4, 4, 3, 3, 3, 3, 3, 5, 5, 5, 5,
4, 4, 4, 4, 4, 5, 4, 5, 5, 3, 4, 5, 5, 5, 5, 5, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 3, 5, 5, 4, 5, 5, 5, 3, 4, 5, 4, 4, 4,
4, 4, 4, 3, 3, 3, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1, 1, 1, 1, 1, 5, 4, 4, 4, 5, 5, 4, 4, 4, 4, 4, 3, 3, 4, 4, 5,
3, 4, 3, 4, 4, 4, 4, 4, 4, 3, 1, 1, 1, 1, 5, 5, 5, 4, 4, 3, 2,
2, 3, 4), Rt8 = c(4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, 5, 5, 4, 4,
NA, NA, NA, NA, NA, 5, 4, 4, 5, 4, 3, 4, 3, 3, 4, 5, 4, 4, 3,
5, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 3, 5,
4, 4, 4, 3, 4, 3, 4, 4, 3, 4, 1, 1, 1, 1, 3, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 5, 5, 4, 4, 5, NA, NA, NA, NA, NA, 3,
4, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4, 4, 5, 5, 5,
4, 3, 5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 3, 5, 5, 5, 5, 4, 4, 4, 5, 4, 5, 5, 4, 4, 3, 4, 3, 3,
3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 3, 3, 3, 3, 3, 5, 5, 4, 4,
5, 5, 5, 4, 5, 5, 4, 3, 3, 4, 4, 5, 5, 5, 3, 3, 5, 4, 4, 4, 4,
3, 2, 2, 2, 2, 5, 5, 5, 5, 5, NA, NA, NA, NA, NA), Rt9 = c(4,
3, 3, 3, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 4, 3, 4, 4, 4, 4, 4, 4, 4, 5, 4, 3, 3, 4, 4, NA, NA,
NA, NA, NA, 3, 3, 3, 2, 4, 4, 4, 4, 4, 4, 5, 4, 4, 3, 3, 5, 4,
4, 4, 4, 3, 4, 4, 4, 4, 3, 1, 1, 1, 5, NA, NA, NA, NA, NA, 5,
5, 5, 5, 5, 5, 5, 5, 4, 5, NA, NA, NA, NA, NA, 3, 4, 3, 3, 4,
3, 3, 3, 2, 3, 5, 5, 5, 5, 5, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 4, 5, 5, 4, 4, NA, NA, NA, NA, NA, 5, 4, 3, 4, 4, 4, 3, 3,
3, 2, NA, NA, NA, NA, NA, 1, 1, 1, 1, 1, 2, 3, 4, 4, 2, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 4, 1, 1, 1, 1, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), Rt10 = c(5, 3, 3, 3, 4, NA, NA, NA, NA,
NA, 5, 4, 4, 4, 4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 5, 4, 4, 3, 4, 4, 3, 3, 3, 4, 4, 3, 2, 3, 4,
4, 4, 4, 4, 4, 5, 5, 4, 3, 3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 3, 3,
1, 1, 1, 4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, 5, 4,
3, 5, 4, 4, 4, 4, 4, 3, 4, 3, 3, 4, 1, 1, 2, 2, 3, 4, 5, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 2, 5, 4, 4, 4, 3, 5, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, 4, 4, 4, 4, 4,
4, 3, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4, 2, 2, 4, 4, 1, 1,
3, 1, 2, 5, 5, 4, 4, 5, NA, NA, NA, NA, NA, 4, 5, 3, 4, 4, 5,
5, 5, 5, 5, 4, 4, 4, 4, 4, 5, 3, 3, 2, 4, NA, NA, NA, NA, NA,
3, 4, 3, 4, 4), Rt11 = c(5, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4,
4, 4, 5, NA, NA, NA, NA, NA, 4, 4, 3, 3, 4, 3, 5, 5, 5, 5, 5,
4, 4, 4, 5, 3, 5, 5, 5, 5, 4, 4, 4, 4, 5, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 5, 5, 5, 4, 4, 4, 5, 5, 4, 5, 5, 3, 4, 5,
4, NA, NA, NA, NA, NA, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 4, 4, 4,
4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 5, 4, 5, 4, 4,
5, 4, 4, 4, 3, 3, 5, 5, 5, 5, 5, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5, 4, 4,
4, 5, 5, 4, 5, 5, 4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1, 1, 1, 2, 3, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5, NA, NA, NA, NA,
NA, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("Question", "Type",
"Student", "Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8",
"Rt9", "Rt10", "Rt11"), row.names = c(NA, -205L), class = c("tbl_df",
"tbl", "data.frame"))
To generate mean of rows:
dataframe <- pulse[(number_of_rows_you_are_interested_in),]
rowMeans(dataframe, na.rm = TRUE)
Rt1[!is.na(Rt1)]
The above code returns the reduced dataframe by excluding all NA entries in Rt1
You may use this expression across your columns
I've found complete.cases() to be particularly useful to only give you rows that have no NAs
pulse <- pulse[complete.cases(pulse), ]
and then you should be able to calculate over this dataframe
also, instead of having to manually calculate the average, follow this link's example (which is pretty similar to your question to begin with)