My R code is taking too long and is too complex - r

Using big data sets, this code is taking a really long time to process. Does anyone have any simpler ways of running it?
Ran this code, locked up my machine for a while
SID_Scores <- filtered %>%
group_by(SalesPerson_SID) %>%
summarise(
Brand_Advocacy = mean(Q1, na.rm = TRUE),
Vehicle_Satisfaction = mean(Q2, na.rm = TRUE),
Dealer_Satisfaction = mean(Q3, na.rm = TRUE),
Sales_Advocacy = mean(Q6N_srvsls_Recommend_10Pt, na.rm = TRUE),
Overall_SalesCon = mean(Q5N1_ovrsls, na.rm = TRUE),
Understanding_Needs = mean(Q7N1_slsneeds, na.rm = TRUE),
Product_Features = mean(Q7N2_slsfeat, na.rm = TRUE),
Professional_Court = mean(Q7N3_slsprof, na.rm = TRUE),
Feel_Valued = mean(SlsValued, na.rm = TRUE),
Trustworthy = mean(SlsTrustworthy, na.rm = TRUE),
Financial_Arrang = mean(Q5N2_ovrfin, na.rm = TRUE),
Financial_Agreement = mean(Q8N2_finease, na.rm = TRUE),
Respect_Time = mean(Q8N3_fintime, na.rm = TRUE),
Honesty = mean(Q8N4_finhon, na.rm = TRUE),
Delivery = mean(Q5N3_ovrdlv, na.rm = TRUE),
U_Pairing = (sum(filtered$Q_UCPairing == '1', na.rm = TRUE)) / (
sum(filtered$Q_UCPairing == '1', na.rm = TRUE) +
sum(filtered$Q_UCPairing == '2', na.rm = TRUE)
),
U_Demonstrate = (sum(filtered$Q_UCDemonstrate == '1', na.rm = TRUE)) /
(
sum(filtered$Q_UCDemonstrate == '1', na.rm = TRUE) +
sum(filtered$Q_UCDemonstrate == '2', na.rm = TRUE)
),
U_FreeTrials = (sum(filtered$Q_UCFreeTrials == '1', na.rm = TRUE)) /
(
sum(filtered$Q_UCFreeTrials == '1', na.rm = TRUE) +
sum(filtered$Q_UCFreeTrials == '2', na.rm = TRUE)
),
U_Presets = (sum(filtered$Q_UCRadioPreset == '1', na.rm = TRUE)) /
(
sum(filtered$Q_UCRadioPreset == '1', na.rm = TRUE) +
sum(filtered$Q_UCRadioPreset == '2', na.rm = TRUE)
)
) %>%
group_by(SalesPerson_SID)
This has been running for several hours now. Filtered has 540000 rows with 35 variables
Here is the code to reproduce some sample data:
structure(list(EventType = c("001", "001", "001", "001", "001",
"001"), `Survey Type` = c("Sales", "Sales", "Sales", "Sales",
"Sales", "Sales"), ModelYear = c(2018, 2019, 2018, 2018, 2018,
2018), PurchaseDate = c(20181209, 20181216, 20181209, 20181215,
20181218, 20181218), `ZoneCode (BC)` = c("32", "71", "71", "51",
"63", "74"), SalesDistrict = c("G", "D", "G", "C", "T", "G"),
SalesGroupSize = c("E", "E", "B", "D", "D", "B"), DealerCode = c("60698",
"45622", "69319", "36277", "44107", "26922"), Q1 = c(9, 8,
10, 10, 10, 9), Q2 = c(9, 10, 10, 10, 10, 9), Q3 = c(8, 10,
10, 10, 10, 9), Q6N_srvsls_Recommend_10Pt = c(9, 10, 10,
10, 10, 9), Q5N1_ovrsls = c(8, 10, 10, 10, 10, 8), Q5N2_ovrfin = c(9,
10, 10, 10, 10, 7), Q5N3_ovrdlv = c(8, NA, 10, 10, 10, 6),
Q5N4_srvsls_facility = c(9, 10, 10, 10, 10, 10), Q7N1_slsneeds = c(9,
10, 10, 10, 10, 9), Q7N2_slsfeat = c(9, 10, 10, 10, 10, 9
), Q7N3_slsprof = c(10, 10, 10, 10, 10, 9), Q8N1_finneg = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Q8N2_finease = c(9,
10, 10, 9, 10, 7), Q8N3_fintime = c(9, 10, 10, 10, 10, 10
), Q8N4_finhon = c(9, 10, 10, 10, 10, 9), Q9 = c(0, 0, 0,
0, 0, 0), SlsValued = c(9, 10, 10, 10, 10, 8), SlsTrustworthy = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), SlsPaperwork = c(NA,
3, 2, 2, 2, NA), `SlsF&ITransaction` = c(3, 2, 2, 3, 1, 4
), SalesPerson_SID = c("S39547M", "S56830O", "S35478Q", "S61788P",
"S35680B", "S75254K"), Q_UCPairing = c(1, 1, 1, 1, 1, 1),
Q_UCDemonstrate = c(1, 1, 1, 1, NA, 1), Q_UCFreeTrials = c(1,
1, 1, 1, 1, 1), Q_UCRadioPreset = c(1, 1, 1, 2, 1, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L), .Names = c("EventType",
"Survey Type", "ModelYear", "PurchaseDate", "ZoneCode (BC)",
"SalesDistrict", "SalesGroupSize", "DealerCode", "Q1", "Q2",
"Q3", "Q6N_srvsls_Recommend_10Pt", "Q5N1_ovrsls", "Q5N2_ovrfin",
"Q5N3_ovrdlv", "Q5N4_srvsls_facility", "Q7N1_slsneeds", "Q7N2_slsfeat",
"Q7N3_slsprof", "Q8N1_finneg", "Q8N2_finease", "Q8N3_fintime",
"Q8N4_finhon", "Q9", "SlsValued", "SlsTrustworthy", "SlsPaperwork",
"SlsF&ITransaction", "SalesPerson_SID", "Q_UCPairing", "Q_UCDemonstrate",
"Q_UCFreeTrials", "Q_UCRadioPreset"))

Related

how do i create a bar chart to compare pre and post scores between participants?

I am trying to create a bar chart or column chart plot to compare pre and post scores between participants. I managed to do this in a line graph, however, I am struggling to visualise this within a bar chart, can anyone help me with this?
Here is the data I am using:
structure(list(Participant = c(2, 3, 5, 7), PRE_QUIP_RS = c(24,
24, 20, 20), POST_QUIP_RS = c(10, 23, 24, 14), PRE_PDQ8 = c(11,
8, 10, 4), POST_PDQ8 = c(7, 7, 9, 4), PRE_GDS = c(1, 7, 1, 0),
POST_GDS = c(1, 4, 2, 0), PRE_PERSISTENT = c(9, 13, 6, 2),
POST_PERSISTENT = c(9, 13, 11, 3), PRE_EPISODIC = c(3, 4,
2, 0), POST_EPISODIC = c(2, 5, 6, 2), PRE_AVOIDANCE = c(6,
3, 0, 2), POST_AVOIDANCE = c(3, 3, 4, 1), PRE_IPQ = c(39,
48, 40, 37), POST_IPQ = c(16, 44, 30, 17), PRE_GSE = c(28,
31, 36, 29), POST_GSE = c(29, 30, 30, 29), PRE_BCI = c(11,
9, 5, 3), POST_BCI = c(3, 15, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
In terms of how I roughly want it to look, I want the bars to be placed together for pre and post for each participant, kind of like this:
You may try
library(tidyverse)
df %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1],
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")
PRE POST order
dummy %>%
select(Participant, PRE_QUIP_RS, POST_QUIP_RS) %>%
pivot_longer(cols = c(PRE_QUIP_RS, POST_QUIP_RS), names_to = "group") %>%
mutate(group = str_split(group, "_", simplify = T)[,1] %>%
factor(., levels = c("PRE", "POST")), # HERE
Participant = as.factor(Participant)) %>%
ggplot(aes(x = Participant, y = value, group = group, fill = group)) +
geom_col(position = "dodge")

Tidyverse change values based on name

I have a dataframe as follows
library(tidyverse)
library(tidymodels)
#df <- read_csv("C:\\Users\\omarl\\OneDrive\\Escritorio\\games.csv")
df <- structure(list(gameId = 3326086514, creationTime = 1504279457970,
gameDuration = 1949, seasonId = 9, winner = 1, firstBlood = 2,
firstTower = 1, firstInhibitor = 1, firstBaron = 1, firstDragon = 1,
firstRiftHerald = 2, t1_champ1id = 8, t1_champ1_sum1 = 12,
t1_champ1_sum2 = 4, t1_champ2id = 432, t1_champ2_sum1 = 3,
t1_champ2_sum2 = 4, t1_champ3id = 96, t1_champ3_sum1 = 4,
t1_champ3_sum2 = 7, t1_champ4id = 11, t1_champ4_sum1 = 11,
t1_champ4_sum2 = 6, t1_champ5id = 112, t1_champ5_sum1 = 4,
t1_champ5_sum2 = 14, t1_towerKills = 11, t1_inhibitorKills = 1,
t1_baronKills = 2, t1_dragonKills = 3, t1_riftHeraldKills = 0,
t1_ban1 = 92, t1_ban2 = 40, t1_ban3 = 69, t1_ban4 = 119,
t1_ban5 = 141, t2_champ1id = 104, t2_champ1_sum1 = 11, t2_champ1_sum2 = 4,
t2_champ2id = 498, t2_champ2_sum1 = 4, t2_champ2_sum2 = 7,
t2_champ3id = 122, t2_champ3_sum1 = 6, t2_champ3_sum2 = 4,
t2_champ4id = 238, t2_champ4_sum1 = 14, t2_champ4_sum2 = 4,
t2_champ5id = 412, t2_champ5_sum1 = 4, t2_champ5_sum2 = 3,
t2_towerKills = 5, t2_inhibitorKills = 0, t2_baronKills = 0,
t2_dragonKills = 1, t2_riftHeraldKills = 1, t2_ban1 = 114,
t2_ban2 = 67, t2_ban3 = 43, t2_ban4 = 16, t2_ban5 = 51), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
df <- df %>%
mutate(winner = ifelse(winner == 1, "team1", "team2")) %>%
mutate(firstBlood = ifelse(firstBlood == 1, "team1", "team2")) %>%
mutate(firstTower = ifelse(firstTower == 1, "team1", "team2")) %>%
mutate(firstInhibitor = ifelse(firstInhibitor == 1, "team1", "team2")) %>%
mutate(firstBaron = ifelse(firstBaron == 1, "team1", "team2")) %>%
mutate(firstDragon = ifelse(firstDragon == 1, "team1", "team2")) %>%
mutate(firstRiftHerald = ifelse(firstRiftHerald == 1, "team1", "team2")) %>%
select(-gameId, -creationTime) %>%
filter(seasonId == 9) %>%
select(gameDuration, winner, firstBlood, firstTower, firstInhibitor, firstBaron, firstDragon,
firstRiftHerald)
As you can see, mutate is really redundant here, because I'm copying the code for every variable. Is there any way to apply the ifelse to columns that start with first, t1, etc. programatically?
You may try
library(dplyr)
df %>%
mutate(across(starts_with("t1")|starts_with("first"), ~ifelse(.x == 1, "team1", "team2")))
Park gave a best (one liner) solution. But if you want to look at some other options, here is how we can do it via using some other functions in dplyr:
df %>%
gather(key, value, firstBlood:t1_ban5) %>%
mutate(value = ifelse(value == 1, "team1", "team2")) %>%
spread(key, value) %>%
select(-gameId, -creationTime) %>%
filter(seasonId == 9) %>%
select(gameDuration, winner, firstBlood, firstTower, firstInhibitor, firstBaron, firstDragon,
firstRiftHerald)

getting error in dataframe replacement has 0 data has x

out <-diallele1(dataframe = fulldial, male = "MALE", female = "FEMALE",
progeny = "TRT", replication = "REP", yvar = "YIELD" )
structure(list(FAMILY = c(1, 1, 1, 2, 2, 2), TRT = c(11, 11,
11, 12, 12, 12), FAMQC = c(NA, NA, NA, 1, 1, 1), MALE = c(1,
1, 1, 1, 1, 1), FEMALE = c(1, 1, 1, 2, 2, 2), REP = c(1, 2, 3,
1, 2, 3), AUDPC = c(3116.66666666667, 2983.33333333333, 3050,
2483.33333333333, 1883.33333333333, 2183.33333333333)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
while running these command i am getting the following error
Error in `$<-.data.frame`(`*tmp*`, "male", value = integer(0)) :
replacement has 0 rows, data has 192
somebody help me to fix this
thankyou

R error: Error in UseMethod("weekdays") : no applicable method for 'weekdays' applied to an object of class "character"

Ive tried creating a day of week variable using this code:
weekdays1 <- c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday')
fulldata$wDay <- factor((weekdays(fulldata$completed_ts) %in% weekdays1),
levels=c(FALSE, TRUE), labels=c('weekend', 'weekday'))
# Error message:
# Error in UseMethod("weekdays") :
# no applicable method for 'weekdays' applied to an object of class "character"
This issue arises only after adding a new variable (coded on SPSS) to the data file, and resolves when removing this added variable (however I need this variable in my analyses) Unsure why this is the case.
Any suggestions would be really appreciated, cheers.
structure(list(participant_id = c(5237430, 5237430, 5237430),
participant_tz = c("UTC", "Australia/Melbourne", "Australia/Melbourne"
), study_id = c("s4lpHqswe", "s4lpHqswe", "s4lpHqswe"), study_name = c("Social Networks and Eating Behaviours",
"Social Networks and Eating Behaviours", "Social Networks and Eating Behaviours"
), study_version = c(7, 7, 7), survey_id = c("X81ypVgkcU",
"X81ypVgkcU", "X81ypVgkcU"), survey_name = c("Survey 1",
"Survey 1", "Survey 1"), trigger = c("scheduled", "scheduled",
"scheduled"), export_tz = c("Australia/Melbourne", "Australia/Melbourne",
"Australia/Melbourne"), start_end = c(1, 1, 1), created_ts = structure(c(1587937200,
1587813720, 1587820680), tzone = "UTC", class = c("POSIXct",
"POSIXt")), scheduled_ts = structure(c(1587935280, 1587813720,
1587820680), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), started_ts = c("#NULL!", "43946.473611111112", "43946.554166666669"
), completed_ts = c(NA, 43946.4743055556, 43946.5548611111
), expired_ts = c("43947.901388888888", "#NULL!", "#NULL!"
), uploaded_ts = c("#NULL!", "43946.474305555574", "43946.554861111101"
), total_rt = c("NA", "56500", "33155"), rand_prob = c("NA",
"NA", "NA"), lonely1 = c(NA, 6, 5), lonely1_rt = c(NA, 4359,
1377), happy = c(NA, 5, 5), happy_rt = c(NA, 1071, 963),
lonely2 = c(NA, 4, 3), lonely2_rt = c(NA, 979, 2319), pos_feedback_1 = c(NA,
1, 1), pos_feedback_2 = c(NA, 0, 0), pos_feedback_3 = c(NA,
0, 0), pos_feedback_4 = c(NA, 0, 0), pos_feedback_5 = c(NA,
0, 0), pos_feedback_rt = c(NA, 7452, 1650), neg_feedback_1 = c(NA,
1, 1), neg_feedback_2 = c(NA, 0, 0), neg_feedback_3 = c(NA,
0, 0), neg_feedback_4 = c(NA, 0, 0), neg_feedback_5 = c(NA,
0, 0), neg_feedback_rt = c(NA, 2695, 3267), sat1 = c(NA,
4, 2), sat1_rt = c(NA, 3462, 1482), sat2 = c(NA, 5, 5), sat2_rt = c(NA,
1330, 948), comp1 = c(NA, 5, 4), comp1_rt = c(NA, 1043, 926
), comp2 = c(NA, 3, 3), comp2_rt = c(NA, 1134, 851), comp3 = c(NA,
2, 2), comp3_rt = c(NA, 2985, 2888), comp4 = c(NA, 6, 5),
comp4_rt = c(NA, 2221, 1253), selfie1 = c(NA, 1, 1), selfie1_rt = c(NA,
2315, 1241), selfie2 = c(NA, 102, 78), selfie2_rt = c(NA,
1393, 1078), selfie3 = c(NA, 1, 2), selfie3_rt = c(NA, 2589,
883), inspo1 = c(NA, 1, 2), inspo1_rt = c(NA, 1641, 788),
inspo2 = c(NA, 1, 2), inspo2_rt = c(NA, 1435, 968), inspo3 = c(NA,
2, 2), inspo3_rt = c(NA, 3953, 883), dating1 = structure(c(NA,
1L, 2L), .Label = c("1", "2"), class = "factor"), dating1_rt = c(NA,
2710, 1064), dating2_1 = c(NA, 0, NA), dating2_2 = c(NA,
1, NA), dating2_3 = c(NA, 0, NA), dating2_4 = c(NA, 0, NA
), dating2_5 = c(NA, 1, NA), dating2_6 = c(NA, 0, NA), dating2_7 = c(NA,
0, NA), dating2_rt = c(NA, 3988, NA), video = c(NA, 2, 2),
video_rt = c(NA, 2809, 1283), eating_1 = c(NA, 1, 0), eating_2 = c(NA,
0, 0), eating_3 = c(NA, 0, 0), eating_4 = c(NA, 0, 0), eating_5 = c(NA,
0, 0), eating_6 = c(NA, 0, 1), eating_7 = c(NA, 0, 0), eating_8 = c(NA,
0, 0), eating_none = c(NA, 0, 0), eating_rt = c(NA, 2226,
5979), dating = c(NA, 1, 2), dating_rt = c(NA, 2710, 1064
), partner_cat = c(NA_real_, NA_real_, NA_real_), partnerideal_dum = structure(c(NA,
1L, NA), .Label = c("0", "1"), class = "factor"), partnernonideal_dum = structure(c(NA,
1L, NA), .Label = c("0", "1"), class = "factor"), partnerboth_dum = structure(c(NA,
2L, NA), .Label = c("0", "1"), class = "factor"), qualtrics_sample = c(NA_character_,
NA_character_, NA_character_), start_date = structure(c(1587397597,
1587397597, 1587397597), tzone = "UTC", class = c("POSIXct",
"POSIXt")), end_date = structure(c(1587398241, 1587398241,
1587398241), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), status = c(0, 0, 0), ip_address = c("101.182.17.165",
"101.182.17.165", "101.182.17.165"), progress = c(100, 100,
100), duration_in_seconds = c(644, 644, 644), finished = c(1,
1, 1), recorded_date = structure(c(1587398242, 1587398242,
1587398242), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), response_id = c("R_3n97cmY4P1NXi92", "R_3n97cmY4P1NXi92",
"R_3n97cmY4P1NXi92"), user_language = c("EN", "EN", "EN"),
self_genid = c(NA_character_, NA_character_, NA_character_
), agree_share_email = c(1, 1, 1), consent = c(1, 1, 1),
age = c(20, 20, 20), gender = c(2, 2, 2), gender_other = c(NA_character_,
NA_character_, NA_character_), currentweight = c(55, 55,
55), currentheight = c(158, 158, 158), highestweight = c("63",
"63", "63"), highestheight = c("158", "158", "158"), lowestweight = c("55",
"55", "55"), lowestheight = c("158", "158", "158"), culture = c("southern asian",
"southern asian", "southern asian"), culture_other = c(NA_character_,
NA_character_, NA_character_), student = c("yes", "yes",
"yes"), international_student = c(NA_character_, NA_character_,
NA_character_), aus_international_student = c(NA_character_,
NA_character_, NA_character_), aus_years = c(NA_character_,
NA_character_, NA_character_), currentlive = c(NA_character_,
NA_character_, NA_character_), language = c("English", "English",
"English"), language_other = c(NA_character_, NA_character_,
NA_character_), maritalstatus = c("single", "single", "single"
), sexualorientation = c("heterosexual", "heterosexual",
"heterosexual"), sexualorientation_other = c(NA_character_,
NA_character_, NA_character_), education = c("bachelor degree",
"bachelor degree", "bachelor degree"), working_full = c(0,
0, 0), working_part = c(0, 0, 0), working_casual = c(0, 0,
0), working_unemployed = c(0, 0, 0), working_student = c(1,
1, 1), workhours = c("0", "0", "0"), taxes = c(4, 4, 4),
videoconferencing = c(1, 1, 1), zoom = c(1, 1, 1), team_viewer = c(NA_real_,
NA_real_, NA_real_), microsoft_teams = c(NA_real_, NA_real_,
NA_real_), skype = c(NA_real_, NA_real_, NA_real_), webex = c(NA_real_,
NA_real_, NA_real_), googlemeet = c(NA_real_, NA_real_, NA_real_
), joinme = c(NA, NA, NA), whats_app = c(NA_real_, NA_real_,
NA_real_), slack = c(NA_real_, NA_real_, NA_real_), houseparty = c(NA_real_,
NA_real_, NA_real_), videoconferencing_other = c(NA_real_,
NA_real_, NA_real_), videoconferencing_othertext = c(NA_character_,
NA_character_, NA_character_), videoconf_time = c(2, 2, 2
), fooddelivery = c(2, 2, 2), uber_eats = c(NA_real_, NA_real_,
NA_real_), deliveroo = c(NA_real_, NA_real_, NA_real_), menulog = c(NA_real_,
NA_real_, NA_real_), foodora = c(NA_real_, NA_real_, NA_real_
), door_dash = c(NA_real_, NA_real_, NA_real_), fooddelivery_other = c(NA_real_,
NA_real_, NA_real_), fooddelivery_othertext = c(NA_character_,
NA_character_, NA_character_), fooddeliverymonth = c(NA_real_,
NA_real_, NA_real_), serviceson = c(NA_real_, NA_real_, NA_real_
), serviceswith = c(NA_real_, NA_real_, NA_real_), fooddelivery_money = c(NA_real_,
NA_real_, NA_real_), facebook = c(1, 1, 1), instagram = c(1,
1, 1), snapchat = c(1, 1, 1), twitter = c(NA_real_, NA_real_,
NA_real_), tumblr = c(NA_real_, NA_real_, NA_real_), socialmedia_other = c(NA_real_,
NA_real_, NA_real_), socialmedia_othertext = c(NA_character_,
NA_character_, NA_character_), socialmediatime = c(1, 1,
1), socialmediaminutes_t = c("30", "30", "30"), socialmediaminutes_d = c("300",
"300", "300"), selfies = c(4, 4, 4), modifiedselfie = c(1,
1, 1), fitspiration = c(1, 1, 1), fitspirationtime = c(1,
1, 1), thinspiration = c(1, 1, 1), thinspirationtime = c(1,
1, 1), fatspiration = c(1, 1, 1), fatspirationtime = c(2,
2, 2), datingapp = c(1, 1, 1), tinder = c(1, 1, 1), hinge = c(NA_real_,
NA_real_, NA_real_), grindr = c(NA_real_, NA_real_, NA_real_
), bumble = c(NA_real_, NA_real_, NA_real_), ok_cupid = c(NA_real_,
NA_real_, NA_real_), her = c(NA_real_, NA_real_, NA_real_
), offee_meets = c(NA_real_, NA_real_, NA_real_), happn = c(NA_real_,
NA_real_, NA_real_), momo = c(NA_real_, NA_real_, NA_real_
), tantan = c(NA_real_, NA_real_, NA_real_), datingapp_other = c(NA_real_,
NA_real_, NA_real_), datingapp_other_t = c(NA_character_,
NA_character_, NA_character_), datingapp_time = c("multiple times a month",
"multiple times a month", "multiple times a month"), matchweek = c("10",
"10", "10"), match_month = c("40", "40", "40"), date_love = c(1,
1, 1), date_sex = c(1, 1, 1), date_comm = c(1, 1, 1), date_worth = c(NA_real_,
NA_real_, NA_real_), date_thrill = c(1, 1, 1), date_trend = c(NA_real_,
NA_real_, NA_real_), feat_thin = c(NA_real_, NA_real_, NA_real_
), feat_muscle = c(1, 1, 1), feat_face = c(1, 1, 1), feat_sex = c(NA_real_,
NA_real_, NA_real_), feat_health = c(1, 1, 1), feat_intell = c(1,
1, 1), feat_other = c(NA_real_, NA_real_, NA_real_), feat_other_t = c(NA_character_,
NA_character_, NA_character_), covid_food = c(1, 1, 1), covid_apps = c(3,
3, 3), covid_social = c(5, 5, 5), eatingdisorder_diagnosed = c("no",
"no", "no"), month_diagnosed = c(NA_character_, NA_character_,
NA_character_), year_diagnosed = c(NA_character_, NA_character_,
NA_character_), eatingdisorder = c(NA_real_, NA_real_, NA_real_
), eatingdisorder_other = c(NA_character_, NA_character_,
NA_character_), eatingdisorder_status = c(NA_real_, NA_real_,
NA_real_), ed_age = c(NA_character_, NA_character_, NA_character_
), ed_years = c(NA_character_, NA_character_, NA_character_
), socio_1 = c(3, 3, 3), socio_2 = c(4, 4, 4), socio_3 = c(4,
4, 4), socio_4 = c(3, 3, 3), socio_5 = c(2, 2, 2), socio_6 = c(4,
4, 4), socio_7 = c(5, 5, 5), socio_8 = c(5, 5, 5), socio_9 = c(4,
4, 4), socio_10 = c(1, 1, 1), bodysat_1 = c(6, 6, 6), bodysat_2 = c(6,
6, 6), bodysat_3 = c(4, 4, 4), bodysat_4 = c(6, 6, 6), bodysat_5 = c(6,
6, 6), bodysat_6 = c(6, 6, 6), bodysat_7 = c(6, 6, 6), bodysat_8 = c(6,
6, 6), bodyimage_1 = c(5, 5, 5), bodayimage_2 = c(5, 5, 5
), bodyimage_3 = c(5, 5, 5), bodyimage_4 = c(5, 5, 5), bodayimage_5 = c(5,
5, 5), bodayimage_6 = c(5, 5, 5), bodyimage_7 = c(5, 5, 5
), bodyimage_8 = c(5, 5, 5), bodyimage_9 = c(5, 5, 5), bodyimage_10 = c(5,
5, 5), media_1 = c(2, 2, 2), media_2 = c(1, 1, 1), media_3 = c(3,
3, 3), media_4 = c(1, 1, 1), media_5 = c(2, 2, 2), media_6 = c(3,
3, 3), critical_1 = c(4, 4, 4), critical_2 = c(4, 4, 4),
critical_3 = c(4, 4, 4), critical_4 = c(3, 3, 3), critical_5 = c(4,
4, 4), intro_aware_1 = c(2, 2, 2), intro_aware_2 = c(3, 3,
3), intro_aware_3 = c(3, 3, 3), intro_aware_4 = c(3, 3, 3
), cesd_1 = c(2, 2, 2), cesd_2 = c(2, 2, 2), cesd_3 = c(3,
3, 3), cesd_4 = c(3, 3, 3), cesd_5 = c(3, 3, 3), cesd_6 = c(2,
2, 2), cesd_7 = c(2, 2, 2), cesd_8 = c(3, 3, 3), cesd_9 = c(3,
3, 3), cesd_10 = c(2, 2, 2), eat26_1 = c(1, 1, 1), eat26_2 = c(5,
5, 5), eat26_3 = c(1, 1, 1), eat26_4 = c(1, 1, 1), eat26_5 = c(1,
1, 1), eat26_6 = c(1, 1, 1), eat26_7 = c(1, 1, 1), eat26_8 = c(4,
4, 4), eat26_9 = c(6, 6, 6), eat26_10 = c(2, 2, 2), eat26_11 = c(1,
1, 1), eat26_12 = c(1, 1, 1), eat26_13 = c(5, 5, 5), eat26_14 = c(2,
2, 2), eat26_15 = c(5, 5, 5), eat26_16 = c(3, 3, 3), eat26_17 = c(2,
2, 2), eat26_18 = c(2, 2, 2), eat26_19 = c(2, 2, 2), eat26_20 = c(4,
4, 4), eat26_21 = c(1, 1, 1), eat26_22 = c(1, 1, 1), eat26_23 = c(2,
2, 2), eat26_24 = c(2, 2, 2), eat26_25 = c(1, 1, 1), eat26_26 = c(2,
2, 2), eat26_a = c(2, 2, 2), eat26_b = c(2, 2, 2), eat26_c = c(1,
1, 1), eat26_d = c(1, 1, 1), eat26_e = c(1, 1, 1), neg_urg_1 = c(2,
2, 2), neg_urg_2 = c(2, 2, 2), neg_urg_3 = c(2, 2, 2), neg_urg_4 = c(2,
2, 2), neg_urg_5 = c(3, 3, 3), neg_urg_6 = c(2, 2, 2), neg_urg_7 = c(2,
2, 2), neg_urg_8 = c(3, 3, 3), neg_urg_9 = c(2, 2, 2), neg_urg_10 = c(2,
2, 2), neg_urg_11 = c(3, 3, 3), neg_urg_12 = c(2, 2, 2),
dis_tol_1 = c(3, 3, 3), dis_tol_2 = c(3, 3, 3), dis_tol_3 = c(2,
2, 2), dis_tol_4 = c(3, 3, 3), dis_tol_5 = c(4, 4, 4), dis_tol_6 = c(4,
4, 4), dis_tol_7 = c(3, 3, 3), dis_tol_8 = c(4, 4, 4), dis_tol_9 = c(3,
3, 3), dis_tol_10 = c(2, 2, 2), dis_tol_11 = c(2, 2, 2),
dis_tol_12 = c(3, 3, 3), dis_tol_13 = c(4, 4, 4), dis_tol_14 = c(4,
4, 4), dis_tol_15 = c(3, 3, 3), lone_1 = c(3, 3, 3), lone_2 = c(2,
2, 2), lone_3 = c(3, 3, 3), lone_4 = c(3, 3, 3), lone_5 = c(2,
2, 2), lone_6 = c(3, 3, 3), lone_7 = c(2, 2, 2), lone_8 = c(3,
3, 3), lone_9 = c(3, 3, 3), lone_10 = c(2, 2, 2), lone_11 = c(3,
3, 3), lone_12 = c(2, 2, 2), lone_13 = c(3, 3, 3), lone_14 = c(3,
3, 3), lone_15 = c(2, 2, 2), lone_16 = c(3, 3, 3), lone_17 = c(2,
2, 2), lone_18 = c(3, 3, 3), lone_19 = c(3, 3, 3), lone_20 = c(3,
3, 3), ucla_1 = c(3, 3, 3), ucla_2 = c(3, 3, 3), ucla_3 = c(3,
3, 3), appear_1a = c(5, 5, 5), appear_1e = c(4, 4, 4), appear_2a = c(5,
5, 5), appear_2e = c(5, 5, 5), appear_3a = c(4, 4, 4), appear_3e = c(4,
4, 4), appear_4a = c(5, 5, 5), appear_4e = c(5, 5, 5), appear_5a = c(5,
5, 5), appear_5e = c(4, 4, 4), appear_6a = c(5, 5, 5), appear_6e = c(4,
4, 4), appear_7a = c(4, 4, 4), appear_7e = c(5, 5, 5), appear_8a = c(5,
5, 5), appear_8e = c(5, 5, 5), appear_9a = c(5, 5, 5), appear_9e = c(4,
4, 4), appear_10a = c(5, 5, 5), appear_10e = c(5, 5, 5),
object_1 = c(3, 3, 3), object_2 = c(1, 1, 1), object_3 = c(2,
2, 2), object_4 = c(4, 4, 4), object_5 = c(12, 12, 12), object_6 = c(5,
5, 5), object_7 = c(11, 11, 11), object_8 = c(6, 6, 6), object_9 = c(10,
10, 10), object_10 = c(7, 7, 7), object_11 = c(8, 8, 8),
object_12 = c(9, 9, 9), rrs_1 = c(NA_real_, NA_real_, NA_real_
), rrs_2 = c(NA_real_, NA_real_, NA_real_), rrs_3 = c(NA_real_,
NA_real_, NA_real_), rrs_4 = c(NA_real_, NA_real_, NA_real_
), rrs_5 = c(NA_real_, NA_real_, NA_real_), rrs_6 = c(NA_real_,
NA_real_, NA_real_), rrs_7 = c(NA_real_, NA_real_, NA_real_
), rrs_8 = c(NA_real_, NA_real_, NA_real_), rrs_9 = c(NA_real_,
NA_real_, NA_real_), rrs_10 = c(NA_real_, NA_real_, NA_real_
), negative_urgency_tot = c(34, 34, 34), smartphone = c(NA_real_,
NA_real_, NA_real_), eat_26_total = c(48, 48, 48), eat26_oral_control = c(5,
5, 5), eat26_bulimia_food = c(11, 11, 11), eat26_diet = c(32,
32, 32), total_lone = c(49, 49, 49), total_object = c(78,
78, 78), rrs_total = c(NA_real_, NA_real_, NA_real_), total_dis_tol = c(45,
45, 45), total_body_sat = c(46, 46, 46), totalsocio = c(35,
35, 35), total_bodyimage = c(50, 50, 50), total_media = c(12,
12, 12), total_critical = c(19, 19, 19), total_intro_aware = c(7,
7, 7), intro_aware1_recoded = c(1, 1, 1), totalcesdrecoded = c(13,
13, 13), itro_aware_2recoded = c(2, 2, 2), intro_aware_3recoded = c(2,
2, 2), intro_aware4_recoded = c(2, 2, 2), cesd_1recoded = c(1,
1, 1), cesd_2recoded = c(1, 1, 1), cesd_3recoded = c(2, 2,
2), cesd_4recoded = c(2, 2, 2), cesd_5reversecoded = c(1,
1, 1), cesd_6recoded = c(1, 1, 1), cesd_7recoded = c(1, 1,
1), cesd_8reversecoded = c(1, 1, 1), cesd_9recoded = c(2,
2, 2), cesd_10recoded = c(1, 1, 1), recoded_eat26_q1 = c(3,
3, 3), recoded_eat26_q2 = c(0, 0, 0), recoded_eat26_q3 = c(3,
3, 3), recoded_eat26_q4 = c(3, 3, 3), recoded_eat26_q5 = c(3,
3, 3), recoded_eat26_q6 = c(3, 3, 3), recoded_eat26_q7 = c(3,
3, 3), recoded_eat26_q8 = c(0, 0, 0), recoded_eat26_q9 = c(0,
0, 0), recoded_eat26_q10 = c(2, 2, 2), recoded_eat26_q11 = c(3,
3, 3), recoded_eat26_q12 = c(3, 3, 3), recoded_eat26_q13 = c(0,
0, 0), recoded_eat26_q14 = c(2, 2, 2), recoded_eat26_q15 = c(0,
0, 0), recoded_eat26_q16 = c(1, 1, 1), recoded_eat26_q17 = c(2,
2, 2), recoded_eat26_q18 = c(2, 2, 2), recoded_eat26_q19 = c(2,
2, 2), recoded_eat26_q20 = c(0, 0, 0), recoded_eat26_q21 = c(3,
3, 3), recoded_eat26_q22 = c(3, 3, 3), recoded_eat26_q23 = c(2,
2, 2), recoded_eat26_q24 = c(2, 2, 2), recoded_eat26_q25 = c(3,
3, 3), recoded_eat26_q26 = c(0, 0, 0), dis_tol_6recoded = c(2,
2, 2), lone_1recoded = c(2, 2, 2), lone_5recoded = c(3, 3,
3), lone_6recoded = c(2, 2, 2), lone_9recoded = c(2, 2, 2
), lone_10recoded = c(3, 3, 3), lone_15recoded = c(3, 3,
3), lone_16recoded = c(2, 2, 2), lone_19recoded = c(2, 2,
2), lone_20recoded = c(2, 2, 2), lone_4recoded = c(2, 2,
2), neg_urg_1recoded = c(3, 3, 3), neg_urg_2recoded = c(3,
3, 3), neg_urg_3recoded = c(3, 3, 3), neg_urg_4recoded = c(3,
3, 3), neg_urg_5recoded = c(2, 2, 2), neg_urg_6recoded = c(3,
3, 3), neg_urg_7recoded = c(3, 3, 3), neg_urg_8recoded = c(2,
2, 2), neg_urg_9recoded = c(3, 3, 3), neg_urg_10recoded = c(3,
3, 3), neg_urg_12recoded = c(3, 3, 3), filter = c(1, 1, 1
), EatingSum = c(NA, 0, 1), EatingMean = c(NA, 0, 0.166666666666667
), appearsum = c(216, 216, 216), appearT = c(21.6, 21.6,
21.6), sexualorientation_col = c(NA_real_, NA_real_, NA_real_
), sat1r = c(NA, 6, 8), happyr = c(NA, 5, 5), dating1r = c(NA,
2, 1), datingappr = c(2, 2, 2), currentheight_metre = c(1.58,
1.58, 1.58), BMI = c(22.0317256849864, 22.0317256849864,
22.0317256849864), employed_sum = c(0, 0, 0), employed = c(0,
0, 0), unemployed = c(0, 0, 0), IDorder = 1:3, Date = structure(c(NA_real_,
NA_real_, NA_real_), class = c("POSIXct", "POSIXt"), tzone = ""),
FirstDate = structure(c(NA_real_, NA_real_, NA_real_), class = c("POSIXct",
"POSIXt"), tzone = ""), DaysElapsed = structure(c(NA_real_,
NA_real_, NA_real_), class = "difftime", units = "secs")), row.names = c(NA,
-3L), groups = structure(list(participant_id = 5237430, .rows = structure(list(
1:3), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = 1L, class = c("tbl_df", "tbl", "data.frame"
), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
))

densityplot error occurs when trying to run densityplot post mice with two variables removed

I am trying to run kernel density estimates of the imputed and observed data. However, I don't want to include variables "FOC_2", and "FOC_3" - they are hierarchical and mess up the imputations. The code runs with the full data set. However when I remove the aforementioned variables I get - 'Error in density.default(x = c(NA_real_, NA_real_, NA_real_, NA_real_,:need at least 2 points to select a bandwidth automatically'
Here is a subset of the data:
> dput(diss_data[1:4,])
structure(list(DS_1 = c(5, 10, 1, 10), DS_2 = c(10, 10, 1, NA
), DS_3 = c(5, 10, NA, 10), DS_4 = c(10, 10, 1, 10), DS_5 = c(10,
8, 2, 9), DS_6 = c(10, 9, 10, 10), DS_7 = c(5, 6, 5, 10), ISR_1 = c(3,
7, 1, NA), ISR_2 = c(10, 5, 2, NA), ISR_3 = c(7, 8, 1, NA), ISR_4 = c(10,
8, 1, NA), ISR_5 = c(10, 10, NA, NA), SC_T1 = c(1, 1, 2, 10),
SC_T2 = c(1, 1, 1, 10), SC_T3 = c(5, 1, 2, 10), SC_T4 = c(5,
8, NA, 10), SC_T5 = c(5, 7, 10, 10), FOC_1 = structure(c(2L,
2L, 1L, 2L), .Label = c("1", "2"), class = "factor"), FOC_2 = c(1,
1, 1, NA), FOC_3 = c(NA, 1, NA, 10), PS_1 = c(NA, 5, 1, 10
), PR_1 = c(1, 1, NA, NA), PR_2 = c(5, 1, NA, 1), PR_3 = c(1,
1, 1, 1), PR_4 = c(5, 10, NA, 1), PR_5 = c(1, 1, 10, NA),
PR_6 = c(5, 1, 5, 1), PR_7 = c(5, 1, 10, NA), PR_8 = c(5,
1, 10, NA), DR_1 = structure(c(2L, 2L, 1L, 2L), .Label = c("1",
"2"), class = "factor"), IR_1 = structure(c(2L, 2L, 1L, 2L
), .Label = c("1", "2"), class = "factor"), PF_1 = c(5, 1,
10, 10), PF_2 = c(5, 1, 10, 10), PF_3 = c(1, 1, 9, 10), PF_4 = c(10,
7, 2, 10), PF_5 = c(10, 10, 6, 10), DF_1 = c(5, 10, 1, NA
), DF__2 = c(5, 8, 10, 10), L_1 = c(5, 5, 8, 10), L_2 = c(5,
6, 10, 10), PE_1 = c(NA, 10, 5, 10), PE_2 = c(NA, 8, 10,
10), PE_3 = c(NA, 9, 10, 10), PE_4 = c(NA, 10, 10, 10), PE_5 = c(10,
8, 9, 10), PE_6 = c(10, 10, 10, 10), PE_7 = c(1, 10, 10,
10), YRS_N = c(15, 20, 10, NA), AGE = c(22, 60, 53, 24),
GENDER = c(2, 1, 1, NA), M_S = c(2, 1, 1, NA), RACE = c(5,
2, 2, 5), HAITI = structure(c(1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), H_INC = c(1, 1, 3, 1), H_O = c(2,
2, 1, NA), TREATMENT = structure(c(1L, 1L, 1L, 1L), .Label = c("0",
"1"), class = "factor")), row.names = c(NA, 4L), class = "data.frame")
Here is my code:
library(mice)
init = mice(diss_data, maxit=0)
meth = init$method
predM = init$predictorMatrix
meth[c("FOC_2", "FOC_3")]=""
imp <-mice(diss_data, method = meth, maxit = 10, m = 10)
densityplot(imp, layout = c(3, 6))
Error in density.default(x = c(NA_real_, NA_real_, NA_real_,
NA_real_,:need at least 2 points to select a bandwidth automatically
I read this "The relevant error message is: Error in density.default: ... need at least 2 points to select a bandwidth automatically. There is yet no workaround for this problem. Use the more robust bwplot or stripplot as a replacement" via https://rdrr.io/cran/mice/man/densityplot.mids.html.
Is it safe to say I cannot use densityplot and should use bwplot or stripplot? Or is there actually a workaround? Please bear with me I am new to R and thank you in advance for any assistance.

Resources