Find lag in group matching a condition - r

I need for each group of ID's to find the last (lag) of another column, but not just the last, but the last matching the condition.
My problem is, I don't know how to add the condition to the row.
My data and desired output:
I've tried
df %>%
group_by(ID) %>%
arrange(activity_nr) %>%
mutate(desired_output = if_else(is.na(matched) &
lag(event) != "hospital",
lag(event), NA)) `
But the condition of lag(event) != "hospital" is not correct, since it only matches if event is not hospital - what I want is for it to find the last non-hospital event.
I've also tried
df %>%
group_by(ID) %>%
arrange(activity_nr) %>%
mutate(desired_output = if_else(is.na(matched) &
str_detect(event, "1"),
lag(event), NA))
and
df %>%
group_by(ID) %>%
arrange(activity_nr) %>%
mutate(desired_output = if_else(is.na(matched),
lag(str_detect(event, "1")), NA))
Data:
df <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3), event = c("108", "hospital", "hospital",
"hospital", "hospital", "108", "110", "hospital", "hospital",
"110", "hospital", "hospital", "107", "107", "105", "105", "hospital",
"hospital", "110", "110"), event_type = c("start", "start", "end",
"start", "end", "end", "start", "start", "end", "end", "start",
"end", "start", "end", "start", "end", "start", "end", "start",
"end"), activity_nr = c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7,
8, 1, 2, 3, 4, 6, 7), activity_id = c(1, 1, 1, 2, 2, 1, 1, 1,
1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2), activity_type = c("housing",
"hospital", "hospital", "hospital", "hospital", "housing", "housing",
"hospital", "hospital", "housing", "hospital", "hospital", "housing",
"housing", "housing", "housing", "hospital", "hospital", "housing",
"housing"), matched = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 107, NA, NA, NA, NA, NA, 110, NA, NA), `Desired output` = c(NA,
NA, 108, NA, 108, NA, NA, NA, 110, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-20L))

Ok so I actually found the solution.
Here you go:
df %>%
group_by(ID) %>%
arrange(activity_nr) %>%
mutate(temp = event,
temp = ifelse(temp == "hospital", NA, temp)) %>%
fill(temp, .direction = "down") %>%
mutate(desired_output = case_when(is.na(matched)
~ temp,
TRUE ~ desired_output))

Related

which function can I use to add individual name under each star plot

I want to give names to individual starplots in R
stars(norm_datas[, 1:12], full = TRUE,radius = TRUE,len = 1.0, key.loc = c(14,1), labels = abbreviate(case.names(norm_datas)),main = "Provision of Ecosystem services", draw.segments = TRUE, lwd = 0.25, lty = par("lty"), xpd = TRUE).
This is what I tried but it just labeled each star plot as 1, 2, 3.
Kindly help resolve.
structure(list(Type_Garden = c("AG", "AG", "AG"), Pollinators = c(10,
6, 5.5), Flower_abundance = c(384, 435, 499), Climate_regulation = c(1,
7, 2), Crop_area = c(34, 25, 10), Plant_diversity = c(22, 53,
41), Nitrogen_balance = c(0.95, 0.26, NA), Phosphorus_balance = c(0.24,
0.04, NA), Habitat_provision = c(1, 2, 0), Recreation_covid = c(1,
NA, NA), Aesthetic_appreciation = c(3, NA, NA), Reconnection_nature = c(4,
NA, NA), Mental_health = c(1, NA, NA), Physical_health = c(1,
NA, NA)), class = "data.frame", row.names = c(NA, -3L))

Scan a column in one df using column in another df and if condition is met assign the create an extra column in the second df and assign value

I got data like this
structure(list(id = c(1, 1, 2, 2, 2, 3, 3, 3), code2 = c("24600",
"2400", "718", "19C11", "2021", "41C200", "G8511", "2021")), class = "data.frame", row.names = c(NA, -8L))
I want to use the "code" column in the above df2 and scan it through the following df1, and if the code is matching then assign the value from the "score" column in df1
structure(list(code = c("718", "E0012", "G8511", "209BF", "466D",
"2021"), score = c(1, 1, 3, 6, 1, 2)), class = "data.frame", row.names = c(NA, -6L))
and I want the final df to look like this
structure(list(id = c(1, 1, 2, 2, 2, 3, 3, 3), code2 = c("24600",
"2400", "718", "19C11", "2021", "41C200", "G8511", "2021"), score2 = c(NA,
NA, 1, NA, 2, NA, 3, 2)), class = "data.frame", row.names = c(NA, -8L))
Here is a solution:
library(dplyr)
df1 <- structure(list(id = c(1, 1, 2, 2, 2, 3, 3, 3), code2 = c("24600",
"2400", "718", "19C11", "2021", "41C200", "G8511", "2021")), class = "data.frame", row.names = c(NA, -8L))
df2 <- structure(list(code = c("718", "E0012", "G8511", "209BF", "466D",
"2021"), score = c(1, 1, 3, 6, 1, 2)), class = "data.frame", row.names = c(NA, -6L))
res <- df1 %>% mutate(score2 = ifelse(code2 %in% df2$code, df2$score, NA))

errors in missMDA function

I'm actually working on an end-studies project on stallion fertility.
And so, I have few factors that I would like to test their effect on stallion fertility.
I have a large table with 54 columns and about 300 rows, each column is a factor, either quantitative or qualitative. The fertility is transcript by "yes" or "non" in the column "DG".
So to test all factors and maybe interactions, I would like to do an AFDM analysis but before that, I have to run missMDA function because I have empty values.
But, when I try to do missMDA function, have always error messages as for exemple :
> res.impute<-imputeFAMD(Tableau_analyse_juments_finies, ncp = 3)
Error in eigen(crossprod(X, X), symmetric = TRUE) : 0 x 0 matrix
> res.impute<-estim_ncpFAMD(Tableau_analyse_juments_finies)
Error in `[.data.frame`(jeu, , (nbquanti + 1):ncol(jeu), drop = F) :
undefined columns selected
I'm not very good in statistics and statisticians from my school don't have time to help me so I'm embarrassed. Could someone help me?
PS : I'm French so if someone is French as well, he can speak me French, it will be easier for me :)
When I do dput function, I have this :
structure(list(Jument = c("Darling-de-Courcy", "Darling-de-Courcy",
"Doublette", "Undoctra-d-Helby", "Unfee-du-Clos-Marman", "Hadelle-de-Padoue",
"Prunelle-de-la-Vallee", "Prunelle-de-la-Vallee", "Quelle-dame-du-Mesnil",
"Quiara-de-Saint-A"), Etalon = c("ARMITAGES", "ARMITAGES", "ARMITAGES",
"ARMITAGES", "ARMITAGES", "BY-CERA", "BY-CERA", "BY-CERA", "CANTURANO",
"CANTURANO"), Age = c(7, 7, 17, 12, 12, 3, 17, 17, 16, 16), Historique = c("S",
"S", "P", "TE-2019", "MPNS", "P", "MPNS", "MPNS", "P", "MPNS"
), `NEC-1` = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3), Antecedents = c("Poulin",
"Poulin", "Compet", "Compet", "Poulin", "Elevage", "Poulin",
"Poulin", "Compet", "Poulin"), Habitat = c("PT", "PT", "PI",
"Box", "PT", "PT", "PT", "PT", "PT", "PT"), `Nb-chaleurs` = c(2,
2, 1, 1, 1, 1, 2, 2, 1, 2), Lait = c("oui", "oui", "non", "non",
"non", "non", "non", "non", "non", "non"), `TM-1` = c("IAR",
"IAR", "IAR", "IAR", "IAC", "IAC", "IAR-12", "IAR-12", "IAR",
"IAR"), `N.-inse-1` = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1), D1 = structure(c(1586390400,
1588204800, 1586390400, 1585958400, 1588896000, 1587772800, 1584489600,
1586304000, 1587600000, 1584144000), tzone = "UTC", class = c("POSIXct",
"POSIXt")), H1 = c("MA", "MA", "AM", "MA", "M", "M", "AM", "AM",
"AM", "MA"), `N.-paillettes-1` = c(NA, NA, NA, NA, 2, 3, NA,
NA, NA, NA), I1 = c(6, 6, 12, 24, 0, 24, 48, 24, 24, 6), Mob1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.8), CC1 = c(NA, NA, NA, NA,
NA, NA, NA, NA, 303, 234), `N.-Saut-1` = c(NA, NA, NA, NA, NA,
NA, NA, NA, 1, 1), `Gel-1` = c(NA, NA, NA, NA, NA, NA, NA, NA,
0, 1), `DG-1` = c("non", "oui", "oui", "oui", "oui", "oui", "non",
"oui", "oui", "non")), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
But I have the impress that R consider each stallion ("Etalon" in my database), as an unique individual, but for exemple the stallion "ARMITAGES" reproduced many mares ("Jument" in my database).
When I try to do your function for missing values, my database become empty and they are no data anymore.
I had the same issue and found out that all character variables need to be converted to factor. For all character variables, convert them using as.factor().

Merge multiple files into one big data table. Column names do not match in the files

I have 50+ csv files in a folder on my computer that I would like merged into 1 giant data table. Below is an example of how 3 out of my 50 tables could look (one, two, and three) and how I would like my final table to look (together).
one <- data.frame("County" = c("Autauga", "Barbour", "Bibb"), "AAAA" = c(1,
1, 1), "BBBB" = c(2, 2, 2))
two <- data.frame("County" = c("Cape May", "Mercer", "Bergen"), "BBBB" =
c(1, 1, 1), "CCCC" = c(2, 2, 2), "DDDD" = c(1, 2 ,3))
three <- data.frame("County" = c("Lincoln", "Jackson", "Pike"), "CCCC" =
c(1, 1, 1))
together <- data.frame("County" = c("Autauga", "Barbour", "Bibb", "Cape
May", "Mercer", "Bergen", "Lincoln", "Jackson", "Pike"), "AAAA" = c(1, 1, 1,
NA, NA, NA, NA, NA, NA), "BBBB" = c(2, 2, 2, 1, 1, 1, NA, NA, NA), "CCCC" =
c(NA, NA, NA, 2, 2, 2, 1, 1, 1), "DDDD" = c(NA, NA, NA, 1, 2, 3, NA, NA,
NA))
If anyone could help me with this, that would be great! Also the blanks do not need to be "NA", they can just be left as blanks.
We can use bind_rows
library(tidyverse)
bind_rows(one, two, three)
If there are many datasets, places it in a list and then use bind_rows/rbindlist from data.table
Instead of creating multiple data.table/data.frame objects in the global env, read it into a list and then use rbindlist
library(data.table)
rbindlist(lapply(files, fread))

pivoting data with rownames to be colnames r [duplicate]

This question already has answers here:
Reshaping data.frame from wide to long format
(8 answers)
Closed 5 years ago.
I have the following dataset
structure(list(Year = c("Oranges", "Cherrys", "Apples", "Bananas"
), `42461` = c(0, NA, 12, NA), `42491` = c(1, 12, NA, NA), `42522` = c(1,
12, 7, NA), `42552` = c(NA, 12, 6, NA), `42583` = c(2, NA, 8,
NA), `42614` = c(NA, 12, 5, NA), `42644` = c(NA, NA, 4, NA),
`42675` = c(NA, 12, NA, NA), `42705` = c(NA, 3, NA, NA),
`42736` = c(NA, NA, 12, NA), `42767` = c(NA, NA, 12, NA),
`42795` = c(NA, 12, NA, NA), Total = c(0, 0, 0, 0)), .Names = c("Year",
"42461", "42491", "42522", "42552", "42583", "42614", "42644",
"42675", "42705", "42736", "42767", "42795", "Total"), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
I would like to pivot it to look like:
Category-Values-Year
I tried the following:
datdat %>% gather(Cat,Var)
but the problem is that the year is the name of each column.
I removed the "Totals" column, I'm not sure if this is what you're asking for:
library (data.table)
dat = data.table (structure(list(Year = c("Oranges", "Cherrys", "Apples",
"Bananas"
), `42461` = c(0, NA, 12, NA), `42491` = c(1, 12, NA, NA), `42522` = c(1,
12, 7, NA), `42552` = c(NA, 12, 6, NA), `42583` = c(2, NA, 8,
NA), `42614` = c(NA, 12, 5, NA), `42644` = c(NA, NA, 4, NA),
`42675` = c(NA, 12, NA, NA), `42705` = c(NA, 3, NA, NA),
`42736` = c(NA, NA, 12, NA), `42767` = c(NA, NA, 12, NA),
`42795` = c(NA, 12, NA, NA), Total = c(0, 0, 0, 0)), .Names = c("Year",
"42461", "42491", "42522", "42552", "42583", "42614", "42644",
"42675", "42705", "42736", "42767", "42795", "Total"), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L)))
names (dat)[1] = "Category"
dat [, "Total" := NULL]
melt.dat = melt (dat, id.vars = c("Category"), variable.name = "Year")
melt.dat gives you:
> head (melt.dat)
Category Year value
1: Oranges 42461 0
2: Cherrys 42461 NA
3: Apples 42461 12
4: Bananas 42461 NA
5: Oranges 42491 1
6: Cherrys 42491 12
Also note, the table is a data.table, not a data.frame :)
Forgot to mention, run install.packages ("data.table") if you don't have it yet

Resources