how to add a new column based on certain conditions with tidyverse? - r
I am trying to create a new column based on whether a respondent is healthy or not.
Here it the type fo data I have:
test <- structure(list(`cutree(hc_diana, k = 4)` = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15",
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15",
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3",
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "8a594e9340",
"8a594e9340"), covid_tested = c("positive", "positive", "positive",
"positive", "positive", "positive", "positive", "positive", "positive",
"positive", "positive", "positive", "positive", "positive", "positive",
"positive", "positive", "positive", "positive", "positive"),
age = c(51, 51, 51, 51, 51, 51, 51, 51, 51, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28), gender = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Female", "Male", "Other"), class = "factor"),
number_morbidities = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
diarrhoea = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), fatigue = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
headache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), loss_smell_taste = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
muscle_ache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), nasal_congestion = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
nausea_vomiting = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), shortness_breath = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
sore_throat = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), sputum = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
loss_appetite = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), chest_pain = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
itchy_eyes = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), joint_pain = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
comorbidities = c("asthma", "diabetes_type_one", "diabetes_type_two",
"obesity", "hypertension", "heart_disease", "lung_condition",
"liver_disease", "kidney_disease", "asthma", "diabetes_type_one",
"diabetes_type_two", "obesity", "hypertension", "heart_disease",
"lung_condition", "liver_disease", "kidney_disease", "asthma",
"diabetes_type_one"), bolean_yes_no = c("No", "No", "No",
"Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
I have 15 rows with 3 unique id's in
Yet, I want to get new column based on several conditions:
if they have a comorbidity -> then select only the row with that comorbidity in question and add it into the new column with its name, yet all the other should have NA
as you can see the second id , does not have whatsoever any comorbidity, therefore I want a new category for it and treat it as a "healthy" category and the rest of the rows pertaining to this patient to appear as NA. This is the same for the third responder.
How do I do this with tidyverse?
A sample of how I want the new column to look like is here, check the last column that summarises the above points.
structure(list(id = c("117dbbbf15", "117dbbbf15", "117dbbbf15",
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15",
"117dbbbf15", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3",
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3",
"8a594e9340", "8a594e9340"), number_morbidities = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), diarrhoea = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), fatigue = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), headache = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_smell_taste = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), muscle_ache = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nasal_congestion = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nausea_vomiting = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), shortness_breath = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sore_throat = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sputum = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_appetite = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), chest_pain = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), itchy_eyes = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), joint_pain = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), comorbidities = c("asthma",
"diabetes_type_one", "diabetes_type_two", "obesity", "hypertension",
"heart_disease", "lung_condition", "liver_disease", "kidney_disease",
"asthma", "diabetes_type_one", "diabetes_type_two", "obesity",
"hypertension", "heart_disease", "lung_condition", "liver_disease",
"kidney_disease", "asthma", "diabetes_type_one"), bolean_yes_no = c("No",
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No"), morbiditiy_healthy = c(NA,
NA, NA, "obesity", NA, NA, NA, NA, NA, "healthy", NA, NA, NA,
NA, NA, NA, NA, NA, "healthy", NA)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
We group by 'id', create the 'morbidity_healthy' with case_when where we check for 'Yes' in 'bolean_yes_no' column, if it is TRUE, then get the corresponding 'comorbidities', and if there are not (!) any 'Yes' and the row_number is 1, then return the 'healthy' for that row
library(dplyr)
test %>%
group_by(id) %>%
mutate(morbidity_healthy = case_when(bolean_yes_no == 'Yes' ~ comorbidities,
(!any(bolean_yes_no == 'Yes')) & row_number()==1 ~ 'healthy'))
Related
What does "Error in app$vspace(new_style$`margin-top` %||% 0) : attempt to apply non-function" mean?
I want to figure out if liver values (various biochemical parameters) differ from other organs. I want to include individuals' ID into my approach. I sampled from each individual every time the same set of organs (so these are not independent measurements anymore,...) I have liver, muscles,...etc. I thought about repeated ONE-WAY ANOVA. I am not so experienced with different kind of ANOVAs... ? So this is a within-subject design, correct me pls if I am wrong. I have many samples (n = 2130). Normality seems ok from the qqplot. This is some data: structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("FB4", "FB5", "FB6", "FB7", "FB8", "FB9", "KBO16", "KBO21", "KBU10", "KBU11", "KBU12", "KBU15", "RB2", "RB3", "SR1", "SR2", "SR3", "SR5", "SR6", "SR9", "TG1", "TG3", "TG4", "TG5", "TG6", "YGL23", "YGL30", "YGL31", "YGL34", "YLG16", "YLG19", "YLG21", "YLG22", "YLK11"), class = "factor"), sub_group.x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("bullhead", "salmonid" ), class = "factor"), taxa.x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cottus.gobio", "oncorhynchus.mykiss", "salmo.trutta"), class = "factor"), combi = c("bullhead.brain", "bullhead.brain", "bullhead.brain", "bullhead.brain", "bullhead.eyes", "bullhead.eyes", "bullhead.eyes", "bullhead.eyes", "bullhead.liver", "bullhead.liver"), sub_group.y = c("bullhead", "bullhead", "bullhead", "bullhead", "bullhead", "bullhead", "bullhead", "bullhead", "bullhead", "bullhead"), taxa.y = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cottus.gobio", "oncorhynchus.mykiss", "salmo.trutta"), class = "factor"), PUFA = structure(c(1L, 3L, 4L, 2L, 1L, 3L, 4L, 2L, 1L, 3L), .Label = c("ARA.d13C", "DHA.d13C", "EPA.d13C", "SDA.d13C"), class = "factor"), organ = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("brain", "eyes", "liver", "muscle"), class = "factor"), isotopic_value = c(-40.0226662, -43.1508914, -49.2039419, -44.6943377, -40.0226662, -43.1508914, -49.2039419, -44.6943377, -40.0226662, -43.1508914)), row.names = c(NA, 10L), class = "data.frame") I programmed a loop - I want to look at each biochemical parameter (variable "PUFA") separately lapply(levels(leber_new$PUFA)[-4], function(x) droplevels(na.omit(leber_new[leber_new$PUFA==x,c("ID","PUFA","organ","sub_group.x","isotopic_value")])) %>% anova_test(dv = isotopic_value, wid = ID, within = c(organ))) But it doesn't work: Error in app$vspace(new_style$`margin-top` %||% 0) : attempt to apply non-function Called from: clii__container_start(app, "span", class = funname) What is it that I am doing wrong?
How can i plot a Heatmap in R
Im trying to plot a heatmap in R, but when I run my code it gives me this error: Error: Can't combine `No.` <integer> and `Mes` <character>. What I'm doing wrong? Here is my code: df %>% pivot_longer(-Localidad) %>% ggplot(aes(x = name, y = Localidad , fill = value)) + geom_tile(colour="gray80", size=0.2) + geom_text(aes(label=value)) + theme_minimal() + scale_fill_distiller(palette = "YlGnBu", direction = -1, na.value = "white") My df its something like this, Im working with a lot of data, so thats why I didnt want to print all the head of, but here it is. > dput(head(df)) structure(list(No. = 1:6, Mes = c("oct-10", "oct-10", "oct-10", "oct-10", "oct-10", "oct-10"), Delegacion = c("09CIUDAD DE MÉXICO", "09CIUDAD DE MÉXICO", "09CIUDAD DE MÉXICO", "09CIUDAD DE MÉXICO", "09CIUDAD DE MÉXICO", "09CIUDAD DE MÉXICO"), Localidad = c("09016MIGUEL HIDALGO", "09005GUSTAVO A. MADERO", "09005GUSTAVO A. MADERO", "09003COYOACÁN", "09010ÁLVARO OBREGÓN", "09011TLÁHUAC"), Esquema = c("U", "U", "U", "U", "U", "U"), Número = c(629L, 1402L, 699L, 48L, 539L, 55L), Nombre = c("MUNDO DE LOS PEQUES", "GUARDERIA EL ARBOL DE LA NIÑEZ", "LOS PEQUEÑOS GENIOS II", "MI MUNDO FELIZ", "CENTRO ECOLÓGICO DE DESARROLLO INFANTIL II", "ESTANCIA INFANTIL TERCER MILENIO"), X2.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.5 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.6 = c(1L, 1L, 0L, 1L, 1L, 0L), X2.7 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.8 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.9 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.1.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.11 = c(1L, 1L, 1L, 1L, 1L, 1L), X2.12 = c(1L, 1L, 1L, 1L, 1L, 1L), X3.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X3.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.5 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.6 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.7 = c(1L, 1L, 1L, 1L, 1L, 1L), X5.8 = c(1L, 1L, 1L, 1L, 1L, 1L), X6.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X6.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X6.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X6.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X6.5 = c(1L, 1L, 1L, 1L, 1L, 1L), X7.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X7.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X7.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X7.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X8.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X8.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X9.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X9.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X9.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X9.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.5 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.6 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.7 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.8 = c(1L, 1L, 1L, 1L, 1L, 1L), X10.9 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.3 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.5 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.6 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.7 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.8 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.9 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.1.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.11 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.12 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.13 = c(1L, 1L, 1L, 1L, 1L, 1L), X11.14 = c(1L, 1L, 1L, 1L, 0L, 0L), X11.15 = c(1L, 1L, 1L, 1L, 1L, 0L), X11.16 = c(1L, 1L, 1L, 1L, 1L, 1L), X12.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X12.2 = c(1L, 1L, 1L, 1L, 1L, 1L), X12.3 = c(1L, 1L, 1L, 0L, 1L, 1L), X12.4 = c(1L, 1L, 1L, 1L, 1L, 1L), X12.5 = c(1L, 1L, 1L, 1L, 1L, 1L), X12.6 = c(1L, 1L, 1L, 1L, 1L, 1L), X12.7 = c("SI", "SI", "SI", "SI", "NO", "NO"), X12.8 = c(0L, 0L, 0L, 0L, NA, NA), X14.1 = c(1L, 1L, 1L, 1L, 1L, 1L), X14.2 = c(1L, 1L, 1L, 1L, 0L, 1L), Puntos.máximos = c(71L, 71L, 71L, 71L, 70L, 70L), Puntos.alcanzados = c(70L, 70L, 69L, 69L, 68L, 67L), X. = c(98.59, 98.59, 97.18, 97.18, 97.14, 95.71), No..de.Padres = c(7L, 7L, 6L, 7L, 7L, 7L), Horas = c(14L, 14L, 12L, 14L, 14L, 14L )), row.names = c(NA, 6L), class = "data.frame")
You can try to exclude the character columns from pivoting. Not entirely sure if the result will be what you expected though. library(ggplot2) library(tidyr) dff <- pivot_longer(df, colnames(df)[!sapply( df, is.character )] ) ggplot(dff, aes(x = name, y = Localidad , fill = value)) + geom_tile(colour="gray80", size=0.2) + geom_text(aes(label=value)) + theme_minimal() + scale_fill_distiller(palette = "YlGnBu", direction = -1, na.value = "white") # plot
Error in if (is.na(n) || n > 65536L) stop("size cannot be NA nor exceed 65536") : missing value where TRUE/FALSE for Gower distance
I am struggling to get hierarchical clustering, in R. Please do not downgrade this post since I have tried what is at this link How to use 'hclust' as function call in R Yet I haven't succeeded. A sample of data is here: structure(list(respondents_id = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), comorbidities = c("hypertension", "asthma", "diabetes_type_two", "hypertension", "hypertension", "lung_condition", "asthma", "obesity", "obesity", "obesity"), chills = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L ), .Label = c("No", "Yes"), class = "factor"), diarrhoea = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("No", "Yes" ), class = "factor"), fatigue = structure(c(2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), headache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L), .Label = c("No", "Yes" ), class = "factor"), muscle_ache = structure(c(2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nausea_vomiting = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), shortness_breath = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sore_throat = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sputum = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes" ), class = "factor"), temperature = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_appetite = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), chest_pain = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes" ), class = "factor"), itchy_eyes = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), joint_pain = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor")), row.names = c(NA, 10L), class = "data.frame") Here is the code: gower_distance <- cluster::daisy(data_test[,3:19], metric = "gower") class(gower_distance) divisive_clustering <- diana(as.matrix(gower_distance), diss = TRUE, keep.diss = TRUE) hc_complete <- hclust(divisive_clustering, method = "complete")
setting color axis limits in ggplot2
I am trying to make a heat-map of chlorophyll fluorescence vs depth and time. I have things working pretty ok, but I'm trying to improve my colour contrast. I generate my heatmap with the following code. ggplot(subset(ctdamotInt2, variable == 'fluorescence'), aes(time, depth)) + geom_tile(aes(fill = log10(value))) + scale_y_reverse(limits = c(110, 0)) + scale_x_time(limits = c(min(subset(ctdamot, variable == 'nh4')$time) - 2 * 60^2, max(subset(ctdamot, variable == 'nh4')$time) + 2* 60^2)) + geom_point(data = samplesCTD, aes( x = time, y = depth)) + scale_fill_gradient2(low = "blue", mid = "white", high = "green") Generally I am finding that the dark green colours essentially never get utilized and so my heatmap ends up looking washed out and doesn't do a great job of communicating where chlorophyll fluorescence is greatist If I were working in matlab, I would get around this by setting caxis([-1 0.4]) which would set all values above 0.4 to the maximum green value. You wouldn't be able to tell the relative difference of the really high values, but you'd at least be able to get a better idea about the relative differences of the intermediate values that make up most of the plot. Any suggestions on how I can have a larger proportion of this plot be green? I suppose I could manually rescale the input values, but would rather not if there is a better way. Edit: At the request of Mike H dput(head(ctdamotInt2,100)) structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("temperature", "salinity", "fluorescence", "oxygen", "nh4"), class = "factor"), depth = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), time = structure(c(1482764087, 1482767687, 1482771287, 1482774887, 1482778487, 1482782087, 1482785687, 1482789287, 1482792887, 1482796487, 1482800087, 1482803687, 1482807287, 1482810887, 1482814487, 1482818087, 1482821687, 1482825287, 1482828887, 1482832487, 1482836087, 1482839687, 1482843287, 1482846887, 1482850487, 1482854087, 1482857687, 1482861287, 1482864887, 1482868487, 1482872087, 1482875687, 1482879287, 1482882887, 1482886487, 1482890087, 1482893687, 1482897287, 1482900887, 1482904487, 1482908087, 1482911687, 1482915287, 1482918887, 1482922487, 1482926087, 1482929687, 1482933287, 1482936887, 1482940487, 1482944087, 1482947687, 1482951287, 1482954887, 1482958487, 1482962087, 1482965687, 1482969287, 1482972887, 1482976487, 1482980087, 1482983687, 1482987287, 1482990887, 1482994487, 1482998087, 1483001687, 1483005287, 1483008887, 1483012487, 1483016087, 1483019687, 1483023287, 1483026887, 1483030487, 1483034087, 1483037687, 1483041287, 1483044887, 1483048487, 1483052087, 1483055687, 1483059287, 1483062887, 1483066487, 1483070087, 1483073687, 1483077287, 1483080887, 1483084487, 1483088087, 1483091687, 1483095287, 1483098887, 1483102487, 1483106087, 1483109687, 1483113287, 1483116887, 1483120487), class = c("POSIXct", "POSIXt")), value = c(27.3483, 27.3483, 27.3483, 27.3483, 27.4404348314607, 27.5325696629213, 27.624704494382, 27.7168393258427, 27.8089741573034, 27.901108988764, 27.9932438202247, 28.0853786516854, 28.1006709677419, 28.1151870967742, 28.1297032258065, 28.1602961677656, 28.3392342471866, 28.5181723266075, 28.6971104060285, 28.8760484854494, 29.0549865648704, 29.1744078768732, 29.2330425521923, 29.2916772275114, 29.3503119028306, 29.4089465781497, 29.4675812534688, 29.5262159287879, 29.5233725024786, 29.5198033650201, 29.5162342275617, 29.5126650901032, 29.5090959526448, 29.5055268151863, 29.5019576777279, 29.4983885402694, 29.494819402811, 29.4392079391567, 29.3230472306014, 29.2068865220461, 29.0907258134908, 28.9745651049355, 28.8584043963802, 28.7422436878249, 28.6260829792696, 28.5099222707143, 28.5396702257581, 28.6045126836247, 28.6693551414913, 28.734197599358, 28.7990400572246, 28.8638825150912, 28.9287249729579, 28.9935674308245, 29.0584098886912, 29.1232523465578, 29.1880948044244, 29.2529372622911, 29.3177797201577, 29.3826221780244, 29.447464635891, 29.5123070937576, 29.4047436790674, 29.2746548739928, 29.1445660689182, 29.0144772638436, 28.8843884587691, 28.7542996536945, 28.6242108486199, 28.4941220435453, 28.4440444629526, 28.4161338799902, 28.3882232970279, 28.3603127140655, 28.3324021311032, 28.3044915481409, 28.2765809651785, 28.2486703822162, 28.2207597992539, 28.1928492162915, 28.1649386333292, 28.1370280503668, 28.1091174674045, 28.0812068844422, 28.0532963014798, 28.0253857185175, 27.9974751355552, 27.9695645525928, 27.9416539696305, 27.9137433866682, 27.8858328037058, 27.8579222207435, 27.8300116377811, 27.8021010548188, 27.7741904718565, 27.7462798888941, 27.7183693059318, 27.6904587229695, 27.6625481400071, 27.6346375570448 )), .Names = c("variable", "depth", "time", "value"), row.names = c(NA, 100L), class = "data.frame")
plotting only time using ggplot2
I have a data frame like this: head(yy) Team Date STime ETime 1 A 2012-03-06 07:03 10:13 2 A 2012-03-06 07:03 10:13 3 A 2012-03-06 07:03 10:13 4 A 2012-03-06 07:03 10:13 5 A 2012-03-06 07:03 10:13 6 A 2012-03-06 07:03 10:13 dput(yy) dput(yy) structure(list(Team = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "A", class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "2012-03-06", class = "factor"), STime = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "07:03", class = "factor"), ETime = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "10:13", class = "factor")), .Names = c("Team", "Date", "STime", "ETime"), class = "data.frame", row.names = c(NA, -50L)) I like to see the y-axis from 00:00 23:59 in 2 hours increment and be able to draw a red line on STime value. I have somthing like this but it does not look right: ggplot(yy, aes(Date, ETime, group="Team")) + geom_jitter(size=0.05) + facet_wrap( ~ Team) + geom_hline(yintercept=yy$Stime, colour="red", size=2) how would you do this in ggplot2? Can somebody give me pointers/start me in the right direction? Regards,
You have to format your times into actual times. Right now they are factors (Check your data frame with str(yy)). When ETime is plotted, the single time is plotted as 1 and labeled "10:13." So, the solution below first converts the string "10:13" into a time (strptime) then converts it to POSIXct, or seconds since an origin (1/1/1970). library(ggplot2); library(scales) #Convert date string into POSIXct format yy$STime <- as.POSIXct(strptime(yy$STime, format = "%H:%M", tz = "UTC")) yy$ETime <- as.POSIXct(strptime(yy$ETime, format = "%H:%M", tz = "UTC")) #Define y-axis limits lims <- as.POSIXct(strptime(c("0:00","23:59"), format = "%H:%M", tz= "UTC")) ggplot(yy, aes(Date, ETime, group="Team")) + geom_jitter(size=1) + facet_wrap( ~ Team) + geom_hline(data = yy, aes(yintercept= as.numeric(STime)), colour="red", size=2) + scale_y_datetime(limits =lims, breaks=date_breaks("2 hour"), labels=date_format("%H:%M", tz = "UTC") ) Note on geom_line to date axis. Pay attention to your timezones too. Otherwise R/ggplot will format things according to your local time zone.