Creating one hot encoded columns while preserving other features

Creating one hot encoded columns while preserving other features - r

I've got the following data:
dataset <- structure(list(id = structure(c(2L, 3L, 1L, 3L, 1L, 9L), .Label = c("215101",
"215559", "216566", "217284", "219435", "220209", "220249", "220250",
"225678", "225679", "225687", "225869", "228420", "228435", "230621",
"230623", "233063", "233097", "233098", "235546", "235560", "235567",
"236379"), class = "factor"), cat1 = c("A", "B", "B", "A", "A",
"A"), cat2 = c("item 1", "item 1", "item 2", "item 5", "item 3",
"item 28"), cat3 = c("theme 2", "theme 2", "theme 1", "theme 4",
"theme 10", "theme 40")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L))
I would like to create kind of model matrix with one hot encoded columns features created from columns cat2 and cat3. Therefore, my output would look like this:
structure(list(id = structure(c(1L, 1L, 2L, 3L, 3L, 9L), .Label = c("215101",
"215559", "216566", "217284", "219435", "220209", "220249", "220250",
"225678", "225679", "225687", "225869", "228420", "228435", "230621",
"230623", "233063", "233097", "233098", "235546", "235560", "235567",
"236379"), class = "factor"), cat1 = c("A", "B", "A", "A", "B",
"A"), `item 1` = c(0, 0, 1, 0, 1, 0), `item 2` = c(0, 1, 0, 0,
0, 0), `item 28` = c(0, 0, 0, 0, 0, 1), `item 3` = c(1, 0, 0,
0, 0, 0), `item 5` = c(0, 0, 0, 1, 0, 0), `theme 1` = c(0, 1,
0, 0, 0, 0), `theme 10` = c(1, 0, 0, 0, 0, 0), `theme 2` = c(0,
0, 1, 0, 1, 0), `theme 4` = c(0, 0, 0, 1, 0, 0), `theme 40` = c(0,
0, 0, 0, 0, 1)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L))
However, I don't have my independent variable in this dataset and I would like to preserve id and cat1 columns. How can I do that?

You could use merge and dcast twice.
library(reshape2)
merge(dcast(dataset, id + cat1 ~ cat2, fun.aggregate = length),
dcast(dataset, id + cat1 ~ cat3, fun.aggregate = length),
by = c("id", "cat1"))
# id cat1 item 1 item 2 item 28 item 3 item 5 theme 1 theme 10 theme 2 theme 4 theme 40
#1 215101 A 0 0 0 1 0 0 1 0 0 0
#2 215101 B 0 1 0 0 0 1 0 0 0 0
#3 215559 A 1 0 0 0 0 0 0 1 0 0
#4 216566 A 0 0 0 0 1 0 0 0 1 0
#5 216566 B 1 0 0 0 0 0 0 1 0 0
#6 225678 A 0 0 1 0 0 0 0 0 0 1
If you have more then two variables to spread you might melt you data first. This will save you some typing.
dcast(melt(dataset, id.vars = c("id", "cat1")), id + cat1 ~ value, fun.aggregate = length)

Related

Converting binary to list

I have somating like a binary dataframe
> dput(head(dat))
structure(list(CDR3.aa = c("CALWEVQELGKKIKVF", "CAATVGGWGKLQF",
"CACDPLYGGITGGFNTDKLIF", "CACDTLLPTSLGDMAKLIF", "CALGELSSDGGGAIF",
"CALSNTGGFKTIF"), TCR_CS001_T1 = c(1, 1, 1, 1, 1, 0), TCR_CS001_T2 = c(0,
1, 1, 1, 1, 0), TCR_CS002 = c(1, 0, 0, 0, 0, 0), TCR_HC002 = c(0,
0, 0, 0, 0, 1), TCR_HC003 = c(1, 0, 0, 0, 0, 1)), row.names = c(NA,
-6L), .internal.selfref = <pointer: 0x0000023f7a101ef0>, class = c("immunr_public_repertoire",
"data.table", "data.frame"))
That shows if an amin acide exists in a sample we see 1 and if absent shown by 0
I want to replace 1 and 0 by amino acid itself
How I can do that please?

If CDR3.aa is the amino acid column you can do :
dplyr :
library(dplyr)
dat %>% mutate(across(-CDR3.aa, ~ifelse(. == 1, CDR3.aa, .)))
# CDR3.aa TCR_CS001_T1 TCR_CS001_T2 TCR_CS002
#1: CALWEVQELGKKIKVF CALWEVQELGKKIKVF 0 CALWEVQELGKKIKVF
#2: CAATVGGWGKLQF CAATVGGWGKLQF CAATVGGWGKLQF 0
#3: CACDPLYGGITGGFNTDKLIF CACDPLYGGITGGFNTDKLIF CACDPLYGGITGGFNTDKLIF 0
#4: CACDTLLPTSLGDMAKLIF CACDTLLPTSLGDMAKLIF CACDTLLPTSLGDMAKLIF 0
#5: CALGELSSDGGGAIF CALGELSSDGGGAIF CALGELSSDGGGAIF 0
#6: CALSNTGGFKTIF 0 0 0
# TCR_HC002 TCR_HC003
#1: 0 CALWEVQELGKKIKVF
#2: 0 0
#3: 0 0
#4: 0 0
#5: 0 0
#6: CALSNTGGFKTIF CALSNTGGFKTIF
data.table :
library(data.table)
dat[, (names(dat)[-1]) := lapply(.SD, function(x) ifelse(x == 1, CDR3.aa, x)), .SDcols = -1]

Create column in a data.frame with a specific condition

I would like to create a column in a data.frame, placing the first time that the year appears in each id.
That is, I have this data:
example <- structure(list(id = structure(c(1, 2, 3, 4, 5), class = "numeric"),
`2007` = c(0, 0, 0, 0, 0), `2008` = c(0, 0, 0, 0, 1), `2009` = c(1,
0, 0, 0, 0), `2010` = c(1, 0, 1, 0, 1), `2011` = c(0, 0,
0, 0, 0), `2012` = c(1, 0, 1, 1, 1), `2013` = c(1, 0, 1,
0, 1), `2014` = c(1, 1, 1, 1, 0), `2015` = c(1, 1, 0, 0,
0), `2016` = c(1, 1, 1, 0, 1)), row.names = c(NA, 5L), class = "data.frame")
And I would like to get the following:
example2 <- structure(list(id = structure(c(1, 2, 3, 4, 5), class = "numeric"),
`2007` = c(0, 0, 0, 0, 0), `2008` = c(0, 0, 0, 0, 1), `2009` = c(1,
0, 0, 0, 0), `2010` = c(1, 0, 1, 0, 1), `2011` = c(0, 0,
0, 0, 0), `2012` = c(1, 0, 1, 1, 1), `2013` = c(1, 0, 1,
0, 1), `2014` = c(1, 1, 1, 1, 0), `2015` = c(1, 1, 0, 0,
0), `2016` = c(1, 1, 1, 0, 1), situation = c(2009, 2014,
2010, 2012, 2008)), row.names = c(NA, 5L), class = "data.frame")
Is it possible to do that ? Every help is welcome. Thanks.

Try this:
#Code
example$situation <- apply(example[,-1],1,function(x) names(x)[min(which(x==1))])
Output:
example
id 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 situation
1 1 0 0 1 1 0 1 1 1 1 1 2009
2 2 0 0 0 0 0 0 0 1 1 1 2014
3 3 0 0 0 1 0 1 1 1 0 1 2010
4 4 0 0 0 0 0 1 0 1 0 0 2012
5 5 0 1 0 1 0 1 1 0 0 1 2008
Or with dplyr and tidyr reshaping and merging:
library(dplyr)
library(tidyr)
#Code
example <- example %>%
left_join(
example %>% pivot_longer(-1) %>%
group_by(id) %>%
summarise(situation=name[min(which(value==1))])
)
Same output.

Include all variables in tsibble formula

I want to fit a linear regression model using the tsibble package and I have a bunch of dummy variables that I want to include in my analysis. A sample dataset would be the following:
library(tsibble)
library(dplyr)
library(fable)
ex = structure(list(id = c("KEY1", "KEY1", "KEY1", "KEY1", "KEY1",
"KEY1", "KEY1", "KEY1", "KEY1", "KEY1", "KEY1", "KEY1", "KEY1",
"KEY1", "KEY1"), sales = c(0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), date = structure(c(15003, 15004, 15005, 15006, 15007,
15008, 15009, 15010, 15011, 15012, 15013, 15014, 15015, 15016,
15017), class = "Date"), wday = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L), dummy_1 = c(0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), dummy_2 = c(0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0), dummy_3 = c(0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -15L), key = structure(list(
id = "KEY1", .rows = list(1:15)), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), index = structure("date", ordered = TRUE), index2 = "date", interval = structure(list(
year = 0, quarter = 0, month = 0, week = 0, day = 1, hour = 0,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 0), class = "interval"), class = c("tbl_ts",
"tbl_df", "tbl", "data.frame"))
> ex
# A tsibble: 15 x 7 [1D]
# Key: id [1]
id sales date wday dummy_1 dummy_2 dummy_3
<chr> <dbl> <date> <int> <dbl> <dbl> <dbl>
1 KEY1 0 2011-01-29 1 0 0 0
2 KEY1 5 2011-01-30 2 0 0 0
3 KEY1 0 2011-01-31 3 0 0 1
4 KEY1 0 2011-02-01 4 1 0 0
5 KEY1 0 2011-02-02 5 0 0 0
6 KEY1 0 2011-02-03 6 0 0 0
7 KEY1 0 2011-02-04 7 0 1 0
8 KEY1 0 2011-02-05 1 0 0 0
9 KEY1 0 2011-02-06 2 0 0 0
10 KEY1 0 2011-02-07 3 0 0 0
11 KEY1 0 2011-02-08 4 0 0 0
12 KEY1 0 2011-02-09 5 0 0 0
13 KEY1 0 2011-02-10 6 0 0 0
14 KEY1 0 2011-02-11 7 0 0 0
15 KEY1 0 2011-02-12 1 0 0 0
They are too many dummies to specify manually so I was hoping for something faster. Normally I would use the . symbol in the formula in the following way:
fit = ex %>%
model(TSLM(sales ~ trend() + season() + .))
But this does not work:
Warning message:
1 error encountered for TSLM(sales ~ trend() + season() + .)
[1] '.' in formula and no 'data' argument
Is there a systematic tsibble way around this or do I have to create the formula on the fly using the names of the dataset?

We could create a formula with reformulate using the 'dummy' column names
nm1 <- names(ex)[startsWith(names(ex), 'dummy')]
ex %>%
model(lm = TSLM(reformulate(c(nm1, 'trend()', 'season()'), 'sales') ))

Add empty rows for gaps between subscriptions

I have been struggling with this for a while now and I haven't been able to find a comparable question asked anywhere, hence my first question on here!
I'm fairly new to R so please excuse any obvious errors I have made.
I have a dataset which has a row for each subscription that a user has or has had. Some users have multiple rows, while some others only have one. Only active or previously active subscriptions are present.
I have two variables which state when the subscription has started and when it ended called, Begindate and Enddate respectively. I already have relationlength variables created which state the amount of days between these two variables for each type of subscription. This means that the relationlength variables only give the amount of days for when a subscription was active.
What I would like to do is create empty rows in between the different subscription rows for the time periods in which no subscription was active, starting from the earliest Begindate known for the specific user and ending on a given date where all subscriptions end (20-04-2022).
I have tried to compare the date difference from the first begindate known for a user and the final date and subtracting the relation length known for the other subscription types. However, I could not make this work.
An example of what the df currently looks like:
(rl standing for relationlength)
ID Begindate Enddate Subscrtype active rl_fixed rl_promotional Productgroup
1 2019-08-26 2022-04-20 fixed 1 968 0 1
1 2018-08-24 2019-08-23 fixed 0 364 0 1
1 2015-08-24 2016-08-23 promo 0 0 364 2
2 2019-08-26 2019-09-12 fixed 0 17 0 1
2 2018-08-24 2019-08-23 fixed 0 364 0 1
What I would like it to look like:
ID Begindate Enddate Subscrtype active rl_fixed rl_promo rl_none Productgroup
1 2019-08-26 2022-04-20 fixed 1 968 0 0 1
1 2019-08-24 2019-08-25 none 0 0 0 2 NA
1 2018-08-24 2019-08-23 fixed 0 364 0 0 1
1 2016-08-24 2018-08-23 none 0 0 0 729 NA
1 2015-08-24 2016-08-23 promo 0 0 364 0 2
2 2019-09-13 2022-04-20 none 0 0 0 950 NA
2 2019-08-26 2019-09-12 fixed 0 17 0 0 1
2 2019-08-24 2019-08-25 none 0 0 0 2 NA
2 2018-08-24 2019-08-23 fixed 0 364 0 0 1
The end goal is to aggregate and have a clear overview of the specific relation lengths for the different types of relations possible for a user.
Thank you in advance!
dput for one specific user in the real df:
structure(list(ï..CRM.relatienummer = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "1", class = "factor"), Begindatum = c("2019-08-26",
"2018-08-24", "2017-08-24", "2016-08-24", "2015-08-20", "2016-06-01"
), Einddatum = c("2022-04-20", "2019-08-23", "2018-08-23", "2017-08-23",
"2016-05-31", "2016-08-19"), Type.abonnement = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Actie", class = "factor"), Status_dummy = c(1,
0, 0, 0, 0, 0), relationlength_fixed = c(0, 0, 0, 0, 0, 0), relationlength_promo = c(968,
364, 364, 364, 285, 79), relationlength_trial = c(0, 0, 0, 0,
0, 0), fixed_dummy = c(0, 0, 0, 0, 0, 0), trial_dummy = c(0,
0, 0, 0, 0, 0), promotional_dummy = c(1, 1, 1, 1, 1, 1)), row.names = c("1:20610",
"2:38646", "2:39231", "2:39232", "2:39248", "2:39837"), class = "data.frame")
Edit:
I have tried to run this code:
dfs <- split(testdata,testdata$ï..CRM.relatienummer)
r <- lapply(seq(length(dfs)), function(k){
v <- dfs[[k]]
vt <- data.frame(unique(v$ï..CRM.relatienummer),
as.character((as.Date(v$Einddatum)+1)[-1]),
as.character((as.Date(v$Begindatum)-1)[-nrow(v)]),
0,
0,
0,
0,
(as.Date(v$Begindatum)-1)[-nrow(v)] - (as.Date(v$Einddatum)+1)[-1],
NA,
0,
0,
0,
0,
0)
colnames(vt) <- c(colnames(v)[-ncol(v)],"rl_none",colnames(v)[ncol(v)])
(testdata <- rbind(data.frame(v[-ncol(v)],rl_none = 0,v[ncol(v)]),vt))[order(as.Date(testdata$Begindatum),decreasing = T),]
})
res <- data.frame(Reduce(rbind,r),row.names = NULL)
On this dataframe, with no luck unfortunately:
structure(list(ï..CRM.relatienummer = structure(c("d45248b8974dc4f8ff948779e0fd07e20f304e929ada4e14c0420aebed81e9b5",
"2ab04e80b3e64601147df977d6054c04ffa80014b3691b25dd1cc8ef85cea06a",
"2ab04e80b3e64601147df977d6054c04ffa80014b3691b25dd1cc8ef85cea06a",
"bcf2c99e6dc974380f967204b9623dce2c8a3fad694dc0b4430fcbf77f8f39f3",
"bcf2c99e6dc974380f967204b9623dce2c8a3fad694dc0b4430fcbf77f8f39f3",
"f8610cd0237858ac9384d6ba209759ae306860ffabb3f8e6c3d6fc68dbaddc51",
"e5b8b3f46165e48aec8bbe65ed1cb29d18a0492fbcac44803372f672348459db",
"c737815b2365b01a8a85c380364a0f721685a131de98cd7790b4d40bb8c4e05b",
"b9c0272caa8d5d3497d28cce3bda5d3d17c22f18c5f65c5e82c572b410a8ea71",
"b9c0272caa8d5d3497d28cce3bda5d3d17c22f18c5f65c5e82c572b410a8ea71",
"539c6c3e604245008daefbe500ff29357bee91f82a7896126bd0f69848524cb7",
"d361338bed51cb9c8aa73fd8914cbf392f4e05e7b073f637f7b150cf02b89c8c",
"505d3df3f1298e07aa96073490b72acd2391da06ad4cfbd5a9fbde3a3de79684",
"826443481cbb5b4e061040d443a0ce8d94322615d8ffae1e68b2ff7d896afcf7",
"2b59a1ec028c261c0f22cd6a49220dc7cec9a9fb0fabe2296b4ba77a60cfdaae"
), class = c("hash", "sha256")), Begindatum = c("2019-06-14",
"2019-03-01", "2019-09-02", "2019-03-03", "2019-04-01", "2019-09-21",
"2019-02-02", "2019-06-11", "2019-02-05", "2019-02-09", "2019-07-24",
"2019-05-08", "2019-09-27", "2019-08-03", "2019-04-03"), Einddatum = c("2022-04-20",
"2019-09-01", "2022-04-20", "2019-03-31", "2022-04-20", "2022-04-20",
"2019-02-14", "2019-07-08", "2019-02-11", "2020-02-08", "2019-09-03",
"2019-06-18", "2019-11-07", "2019-08-16", "2022-04-20"), Status_dummy = c(1,
0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1), relationlength_fixed = c(0,
184, 961, 28, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0), relationlength_promo = c(1041,
0, 0, 0, 1115, 942, 12, 0, 0, 364, 0, 0, 0, 0, 1113), relationlength_trial = c(0,
0, 0, 0, 0, 0, 0, 27, 0, 0, 41, 41, 41, 13, 0), rl_none = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), fixed_dummy = c(0,
1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), trial_dummy = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0), promotional_dummy = c(1,
0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1), active_subscr_dummy = c(3,
0, 5, 0, 3, 3, 0, 0, 0, 3, 0, 0, 1, 0, 3), hashedEmail = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c("1:1",
"1:2", "1:3", "1:4", "1:5", "1:6", "1:7", "1:8", "1:9", "1:10",
"1:11", "1:12", "1:13", "1:14", "1:15"), class = "data.frame")

Hopefully this is what you are expecting
dfs <- split(df,df$ID)
r <- lapply(seq(length(dfs)), function(k){
v <- dfs[[k]]
vt <- data.frame(unique(v$ID),
as.character((as.Date(v$Enddate)+1)[-1]),
as.character((as.Date(v$Begindate)-1)[-nrow(v)]),
"none",
0,
0,
0,
(as.Date(v$Begindate)-1)[-nrow(v)] - (as.Date(v$Enddate)+1)[-1],
NA)
colnames(vt) <- c(colnames(v)[-ncol(v)],"rl_none",colnames(v)[ncol(v)])
(df <- rbind(data.frame(v[-ncol(v)],rl_none = 0,v[ncol(v)]),vt))[order(as.Date(df$Begindate),decreasing = T),]
})
res <- data.frame(Reduce(rbind,r),row.names = NULL)
which gives
> res
ID Begindate Enddate Subscrtype active rl_fixed rl_promo rl_none Productgroup
1 1 2019-08-26 2022-04-20 fixed 1 968 0 0 1
2 1 2019-08-24 2019-08-25 none 0 0 0 1 NA
3 1 2018-08-24 2019-08-23 fixed 0 364 0 0 1
4 1 2016-08-24 2018-08-23 none 0 0 0 729 NA
5 1 2015-08-24 2016-08-23 promo 0 0 364 0 2
6 2 2019-08-26 2019-09-12 fixed 0 17 0 0 1
7 2 2019-08-24 2019-08-25 none 0 0 0 1 NA
8 2 2018-08-24 2019-08-23 fixed 0 364 0 0 1
DATA
structure(list(ID = c(1L, 1L, 1L, 2L, 2L), Begindate = structure(c(3L,
2L, 1L, 3L, 2L), .Label = c("2015-08-24", "2018-08-24", "2019-08-26"
), class = "factor"), Enddate = structure(c(4L, 2L, 1L, 3L, 2L
), .Label = c("2016-08-23", "2019-08-23", "2019-09-12", "2022-04-20"
), class = "factor"), Subscrtype = structure(c(1L, 1L, 2L, 1L,
1L), .Label = c("fixed", "promo"), class = "factor"), active = c(1L,
0L, 0L, 0L, 0L), rl_fixed = c(968L, 364L, 0L, 17L, 364L), rl_promo = c(0L,
0L, 364L, 0L, 0L), Productgroup = c(1L, 1L, 2L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-5L))

Sort character in vector of string in R

I have data like,
df <- structure(list(Sex = structure(c(1L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
Age = c(19L, 16L, 16L, 13L, 16L, 30L, 16L, 30L, 16L, 30L,
30L, 16L, 19L, 1L, 30L), I = c(1, 1, 0, 0, 1, 0, 1, 0, 1,
0, 0, 0, 1, 0, 1), E = c(0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
1, 0, 1, 0), S = c(1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0,
0, 1), N = c(0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0),
F = c(1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1), T = c(0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0), C = c(1, 1, 1,
0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1), D = c(0, 0, 0, 1, 0,
1, 0, 1, 0, 1, 1, 1, 1, 0, 0), type = c("CIFS", "CITN", "CESF",
"DEFS", "CIFN", "DETS", "CITS", "DEFS", "CIFN", "DEFN", "DETS",
"DETS", "DINF", "CENT", "CIFS"), PO = runif(15, -3, 3), AO = runif(15, -3, 3)), .Names = c("Sex",
"Age", "I", "E", "S", "N", "F", "T", "C", "D", "type", "PO",
"AO"), class = c("tbl_dt", "tbl", "data.table", "data.frame"), row.names = c(NA,
-15L))
I want to sort the column type. Not the column but the characters in it. And get the same structure afterwards. For example, CIFS should then be CFIS. I tried to do it as,
df <- within(df, {
type <- apply(sapply(strsplit(df[, type], split=''), sort), 2,
function(x) paste0(x, collapse = ''))
})
Is there any simpler solution, that I have missed to find.

Since you are using data.table, I would suggest
df[, type := paste(sort(unlist(strsplit(type, ""))), collapse = ""), by = type]
like described in How to sort letters in a string?

This should work for both data.frame and data.table (base R only):
df$type <- vapply(strsplit(df$type, split=''),FUN=function(x)paste(sort(x),collapse=''),"")
Result:
> df
Sex Age I E S N F T C D type PO AO
1 F 19 1 0 1 0 1 0 1 0 CFIS 2.9750666 2.0308410
2 F 16 1 0 0 1 0 1 1 0 CINT 0.7902187 2.0891158
3 M 16 0 1 1 0 1 0 1 0 CEFS -1.7173785 2.4774140
4 F 13 0 1 1 0 1 0 0 1 DEFS 1.5352127 -1.9272470
5 M 16 1 0 0 1 1 0 1 0 CFIN -0.2160741 1.7359897
6 M 30 0 1 1 0 0 1 0 1 DEST 2.6314981 -0.6252466
7 F 16 1 0 1 0 0 1 1 0 CIST -1.6032894 -1.9938226
8 M 30 0 1 1 0 1 0 0 1 DEFS 0.7748583 -2.0935737
9 F 16 1 0 0 1 1 0 1 0 CFIN -2.9368356 0.3363364
10 F 30 0 1 0 1 1 0 0 1 DEFN -0.6506217 2.6681535
11 F 30 0 1 1 0 0 1 0 1 DEST -0.4432578 0.4627441
12 F 16 0 1 1 0 0 1 0 1 DEST 2.0236760 2.7684298
13 F 19 1 0 0 1 1 0 0 1 DFIN -1.1774931 2.6546726
14 F 1 0 1 0 1 0 1 1 0 CENT -2.2365388 2.7902646
15 F 30 1 0 1 0 1 0 1 0 CFIS -1.6139238 -2.4982620

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Creating one hot encoded columns while preserving other features - r

Related

Converting binary to list

Create column in a data.frame with a specific condition

Include all variables in tsibble formula

Add empty rows for gaps between subscriptions

Sort character in vector of string in R

Categories

Resources