Get index of date with max years and min month

Get index of date with max years and min month - r

I have a dataset of 3 variables: ID, Date and Years_service. Like this:
library(data.table)
data <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), Date = structure(c(1230768000,
1233446400, 1235865600, 1238544000, 1241136000, 1243814400, 1246406400,
1249084800, 1251763200, 1254355200, 1257033600, 1259625600, 1262304000,
1264982400, 1267401600, 1270080000, 1272672000, 1275350400, 1277942400,
1280620800, 1283299200, 1285891200, 1288569600, 1291161600, 1293840000,
1296518400, 1298937600, 1301616000, 1304208000, 1306886400, 1309478400,
1312156800, 1314835200, 1317427200, 1320105600, 1322697600, 1325376000,
1328054400, 1330560000, 1333238400, 1335830400, 1338508800, 1341100800,
1343779200, 1346457600, 1349049600, 1351728000, 1354320000, 1356998400,
1359676800, 1362096000, 1364774400, 1367366400, 1370044800, 1372636800,
1375315200, 1377993600, 1380585600, 1383264000, 1385856000, 1388534400,
1391212800, 1393632000, 1396310400, 1398902400, 1401580800, 1404172800,
1406851200, 1409529600, 1412121600, 1414800000, 1417392000, 1420070400,
1422748800, 1425168000, 1427846400, 1430438400, 1433116800, 1435708800,
1438387200, 1441065600, 1443657600, 1446336000, 1448928000, 1451606400,
1454284800, 1456790400, 1459468800, 1462060800, 1464739200, 1467331200,
1470009600, 1472688000, 1475280000, 1330560000, 1333238400, 1335830400,
1338508800, 1341100800, 1343779200, 1346457600, 1349049600, 1351728000,
1354320000, 1356998400, 1359676800, 1362096000, 1364774400, 1367366400,
1370044800, 1372636800, 1375315200, 1377993600, 1380585600, 1383264000,
1385856000, 1388534400, 1391212800, 1393632000, 1396310400, 1398902400,
1401580800, 1404172800, 1406851200, 1409529600, 1412121600, 1414800000,
1417392000, 1420070400, 1422748800, 1425168000, 1427846400, 1430438400,
1433116800, 1435708800, 1438387200, 1441065600, 1443657600, 1446336000,
1448928000, 1451606400, 1454284800, 1456790400, 1459468800, 1462060800,
1464739200, 1467331200, 1470009600, 1472688000, 1475280000), class =
c("POSIXct",
"POSIXt"), tzone = "UTC"), Years_service = c(19, 19, 19, 19,
19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 8, 8, 8, 8, 8, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13),
month_1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)), .Names = c("ID", "Date",
"Years_service", "month_1"), row.names = c(NA, -150L), class =
c("data.table",
"data.frame"))
I want a new variable that contains for each ID the date for which years of service is maximum and the month of date is minimum. Something like this:
ID Date Years_service Date_1
1: 1 2009-01-01 19 2016-06-01
2: 1 2009-02-01 19 2016-06-01
3: 1 2009-03-01 19 2016-06-01
4: 1 2009-04-01 19 2016-06-01
5: 1 2009-05-01 19 2016-06-01
---
146: 2 2016-06-01 12 2016-08-01
147: 2 2016-07-01 12 2016-08-01
148: 2 2016-08-01 13 2016-08-01
149: 2 2016-09-01 13 2016-08-01
150: 2 2016-10-01 13 2016-08-01
My desired output is Date_1
I tried this:
data[,Date_1 := Date[which.max(Years_service) & which.min(month_1)], by = ID]
but didn't work.
How can I achieve this?

One option is to get the row index (.I) of the rows where the 'Years_service is max for each 'ID', then using that, get the minimum index of 'month_1' to subset the 'Date' corresponding to that value grouped by 'ID', and join on with the original data on the 'ID' column to create the 'Date_1' column
i1 <- data[, .I[Years_service == max(Years_service)], ID]$V1
data[data[i1, Date[which.min(month_1)], ID], Date_1 :=V1, on = .(ID)]
data
# ID Date Years_service month_1 Date_1
# 1: 1 2009-01-01 19 1 2016-06-01
# 2: 1 2009-02-01 19 2 2016-06-01
# 3: 1 2009-03-01 19 3 2016-06-01
# 4: 1 2009-04-01 19 4 2016-06-01
# 5: 1 2009-05-01 19 5 2016-06-01
# ---
#146: 2 2016-06-01 12 6 2016-08-01
#147: 2 2016-07-01 12 7 2016-08-01
#148: 2 2016-08-01 13 8 2016-08-01
#149: 2 2016-09-01 13 9 2016-08-01
#150: 2 2016-10-01 13 10 2016-08-01
Or extract the 'Date' corresponding to minimum 'month_1' from within the Subset of Data.table
data[, Date_1 := .SD[Years_service == max(Years_service),
Date[which.min(month_1)]], ID]
Or another option is to an order and assign 'Date_1' as the first 'Date' grouped by 'ID'
data[order(-Years_service, month_1), Date_1 := Date[1], ID]
Or using tidyverse
library(tidyverse)
data %>%
group_by(ID) %>%
arrange(desc(Years_service), month_1) %>%
mutate(Date_1 = first(Date))

Related

Find the total number of times each of a possible range of values occurs across three separate variables in R

I have data which looks like this:
library(stringr)
library(dplyr)
library(magrittr)
Codes = c(1, 2, 3, 4, 5, 6, 9)
Codes2 = c(Codes, rep(9, 100))
data <- data.frame(
MASTER_HCU_DI = do.call(paste0, Map(stri_rand_strings, n=100, length=c(4, 3),
pattern = c('[A-Z]', '[0-9]'))),
CODE_1 = sample(Codes, 100, replace = T))
data %<>%
mutate(CODE_2 = if_else(CODE_1 == 9, 9, sample(Codes2, 100, replace = T)),
CODE_3 = if_else(CODE_2 == 9, 9, sample(Codes2, 100, replace = T)))
What I want to do is find the total number of people with each of the possible values of CODE_1, CODE_2, and CODE_3; across all three Codes.
Where all of someone's CODE start with a 9, they are counted as missing. Otherwise, I'd like to ignore the CODE values which start with a 9.
This code does what I want, but seems cumbersome:
data %<>%
mutate(Sum_grp1 = if_else(CODE_1 == 1 | CODE_2 == 1 | CODE_3 == 1, 1, 0),
Sum_grp2 = if_else(CODE_1 == 2 | CODE_2 == 2 | CODE_3 == 2, 1, 0),
Sum_grp3 = if_else(CODE_1 == 3 | CODE_2 == 3 | CODE_3 == 3, 1, 0),
Sum_grp4 = if_else(CODE_1 == 4 | CODE_2 == 4 | CODE_3 == 4, 1, 0),
Sum_grp5 = if_else(CODE_1 == 5 | CODE == 5 | CODE_3 == 5, 1, 0),
Sum_grp6 = if_else(CODE_1 == 6 | CODE_2 == 6 | CODE_3 == 6, 1, 0),
Missing = if_else(CODE_1 == 9 & CODE_2 == 9 & CODE_3 == 9, 1, 0))
Group_counts <- data.frame(
Group = c("Group_1", "Group_2", "Group_3", "Group_4", "Group_5", "Group_6", "Missing"),
Sum = c(sum(data$Sum_grp1 == 1),
sum(data$Sum_grp2 == 1),
sum(data$Sum_grp3 == 1),
sum(data$Sum_grp4 == 1),
sum(data$Sum_grp5 == 1),
sum(data$Sum_grp6 == 1),
sum(data$Missing == 1)))
Expected output looks like this:
Is there an easier way to do this?
Thanks.

You can get the data in long format and use count -
library(dplyr)
library(tidyr)
data %>% pivot_longer(cols = -MASTER_HCU_DI) %>% count(name, value)

Is this what you expect?
data %>% pivot_longer(cols = -MASTER_HCU_DI) %>% group_by(name) %>%
summarise(Sum = sum(value), .groups = 'drop')
# A tibble: 3 x 2
name Sum
<chr> <dbl>
1 GROUP_1 409
2 GROUP_2 897
3 GROUP_3 900

As I understand it, the following functionality outlined in the question is not addressed by the existing answers:
Where all of someone's CODE start with a 9, they are counted as missing. Otherwise, I'd like to ignore the CODE values which start with a 9.
Here is my approach to include this functionality:
library(purrr)
library(dplyr)
data %>%
pmap_dfr(~ table(c(...)[-1])) %>%
set_names(~ paste0("Group_", .x)) %>%
mutate(Missing = ifelse(`Group_9` == 3, 1, NA)) %>%
select(-`Group_9`) %>%
colSums(na.rm = T) %>%
tibble::tibble(Group = names(.), Sum = .) %>%
arrange(Group)
Returns:
# A tibble: 7 x 2
Group Sum
<chr> <dbl>
1 Group_1 23
2 Group_2 13
3 Group_3 16
4 Group_4 13
5 Group_5 13
6 Group_6 11
7 Missing 15
Data used:
data <- structure(list(MASTER_HCU_DI = c("VBHT228", "CAAO199", "NDDI124", "AVZV996", "KMOP513", "AALT248", "IGZC617", "ZDHO229", "GXYV745", "PDTW465", "SEPM505", "ZJWQ323", "VRRU692", "NHOY962", "BBFR276", "NVML939", "VHPV534", "YTXG467", "BOCT360", "ONEO498", "CICL849", "SAIK461", "NZGL739", "NIFD497", "XMVE276", "JHZM922", "LCLV707", "BPKN209", "YTZU211", "LUNI891", "CQTC089", "FBDZ269", "VKCI112", "BLJH968", "LLML439", "TDRV973", "RTFR863", "GZAN917", "WSUI006", "JILN883", "CAHM719", "JCMI028", "BGFZ774", "BGVZ374", "WBUJ792", "DLVT690", "AVKE534", "TDPU030", "SKFI697", "UCLY688", "OODZ687", "IIPR924", "TSES431", "CQSN693", "ZQGJ398", "FMGH661", "ZORF207", "MDWD343", "OBDM142", "SATV193", "MUKZ136", "INAE029", "MWDB125", "JUXN395", "LQGW143", "ALKP557", "WQAR962", "UYZI622", "WKYM520", "WUMH621", "GLRV451", "ISHG990", "OCNW161", "WQMS244", "UQEF227", "IAEZ636", "TEZJ280", "GCCJ844", "EVTF869", "JGJH568", "MDPH890", "EHKR422", "NBIM361", "XEWM477", "PBJP921", "FGEG840", "UJOO120", "XZTB081", "GXCQ610", "ANAR117", "TNIP023", "GLFN787", "SYYV532", "GOTY296", "TXME798", "SUZK405", "VWHY631", "HAXW159", "CCJN761", "GGUN719"), GROUP_1 = c(6, 1, 4, 9, 1, 3, 3, 2, 3, 2, 1, 9, 4, 3, 1, 1, 4, 6, 5, 3, 3, 3, 9, 9, 2, 3, 4, 6, 1, 1, 1, 9, 6, 1, 5, 9, 5, 9, 5, 5, 1, 2, 9, 1, 3, 9, 9, 3, 5, 6, 1, 4, 1, 6, 4, 5, 2, 6, 4, 1, 5, 9, 1, 4, 3, 1, 2, 1, 2, 9, 1, 4, 3, 1, 2, 3, 6, 1, 6, 2, 2, 4, 1, 2, 6, 9, 3, 4, 2, 9, 6, 1, 3, 3, 1, 5, 4, 2, 4, 9), GROUP_2 = c(9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9), GROUP_3 = c(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9)), class = "data.frame", row.names = c(NA, -100L))

We can use gather
library(dplyr)
library(tidyr)
data %>%
gather('name', 'value', -MASTER_HCU_DI) %>%
count(name, value)

Reducing dataframe if first 4 columns match

I have a dataframe that looks like this
> head(printing_id_map_unique_frames)
# A tibble: 6 x 5
# Groups: frame_number [6]
X1 X2 X3 row_in_frame frame_number
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 1
2 1 2 3 15 2
3 1 2 3 15 3
4 1 2 3 15 4
5 1 2 3 15 5
6 1 2 3 15 6
As you can see, X1,X2,X3, row_in_frame is identical
However, eventually you get to a
X1 X2 X3 row_in_frame frame_number
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 32
2 1 2 3 15 33
3 1 2 3 5 34**
4 1 4 5 15 35
5 1 4 5 15 36
What I would like to do is essentially compute a dataframe that looks like:
X1 X2 X3 row_in_frame num_duplicates
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 33
2 1 2 3 5 1
...
Essentially, what I want is to "collapse" over identical first 4 columns and count how many rows of that type there are in the "num_duplicates" column.
Is there a nice way to do this in dplyr without a messy for loop that tracks a count and if there is a change.
Below please find a full data structure via dput:
> dput(printing_id_map_unique_frames)
structure(list(X1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), X2 = c(2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
), X3 = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5), row_in_frame = c(15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 5, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 5
), frame_number = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68)), row.names = c(NA, -68L), class = c("tbl_df",
"tbl", "data.frame"))

Here is one option with count
library(dplyr) # 1.0.0
df1 %>%
count(!!! rlang::syms(names(.)[1:4]))
Or specify the unquoted column names
df1 %>%
count(X1, X2, X3, row_in_frame)
If we don't want to change the order, an option is to convert the first 4 columns to factor with levels specified as the unique values (which is the same as the order of occurrence of values) and then apply the count
df1 %>%
mutate(across(1:4, ~ factor(.x, levels = unique(.x)))) %>%
count(!!! rlang::syms(names(.)[1:4])) %>%
type.convert(as.is = TRUE)
# A tibble: 4 x 5
# X1 X2 X3 row_in_frame n
# <int> <int> <int> <int> <int>
#1 1 2 3 15 33
#2 1 2 3 5 1
#3 1 4 5 15 33
#4 1 4 5 5 1

Maintain order of time series with group_by

Suppose my time series consists only of two columns: signal and day
The signal variable is supposed to repeat itself in a cycle of 1 to 6. So I need to insert empty rows for each implicit missing Signal but with signal counting from 1 to 6. (Suppose I have more columns that should also be empty (NA)).
In other words, for each unique day, there should be 6 rows with signal counting from 1 to 6.
My dataframe:
df = structure(list(data.Signal = c(2, 3, 4, 5, 6, 1, 2, 3, 4, 6,
1, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 1, 2, 3, 4, 5, 6, 2, 3, 4,
5, 6, 1, 3, 4, 5, 6, 2, 3, 4, 5, 6, 3, 4, 6, 1, 3, 4, 5, 6, 1,
2, 3, 4, 5, 6, 1, 2, 3, 4, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5,
6, 1, 2, 3, 4, 5, 1, 2, 3, 4, 6, 2, 3, 6, 3, 4, 5, 6, 1, 3, 4,
5, 6, 1, 1, 2, 3, 4, 5, 3, 4, 1, 2, 3, 4, 5, 5, 1, 2, 3, 4),
data.day = c(18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 22, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 26, 26, 26,
26, 26, 27, 27, 27, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
29, 30, 30, 30, 30, 30, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7,
7, 7, 7, 7, 8, 9, 9, 9, 9, 9, 10, 10, 11, 11, 11, 11, 11,
11, 12, 12, 12, 12)), class = "data.frame", row.names = c(NA,
-114L))
My approach:
data <- df %>%
group_by(gr=data.day) %>%
complete(data.Signal = 1:6) %>%
ungroup() %>%
select(-gr)
This however, sorts the days ascending. The order of the days is obviously meaningful in the time series data. How can I "re-sort" to the original order or is there another way of solving my problem? Thank you!

Convert data.day to factor before using complete
library(dplyr)
df %>%
group_by(gr = factor(data.day, levels = unique(data.day))) %>%
tidyr::complete(data.Signal = 1:6) %>%
ungroup() %>%
select(-gr)
# data.Signal data.day
# <dbl> <dbl>
# 1 1 NA
# 2 2 18
# 3 3 18
# 4 4 18
# 5 5 18
# 6 6 18
# 7 1 19
# 8 2 19
# 9 3 19
#10 4 19
# … with 141 more rows
If you want those NA's filled you can use this version.
df %>%
mutate(grp = factor(data.day, levels = unique(data.day))) %>%
complete(grp, data.Signal = 1:6) %>%
ungroup() %>%
select(-data.day)

separate data in a column into multiple columns

I have some data which looks like:
> head(d)
V1 V2 V3 V4 V5 V6 V7 V8 V9
50 28 79 4 6 48 2 17 4 20
51 28 79 4 6 48 2 17 4 21
52 28 79 4 6 48 2 17 4 22
53 28 79 4 6 48 2 17 4 23
54 28 79 4 6 48 2 17 4 24
55 28 79 4 6 48 2 17 4 25
V10
50 000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V
51 000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.5V000.8V000.8V000.5V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.6V
52 000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.6V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.7V000.7V000.8V
53 000.7V000.5V000.4V000.5V000.5V000.4V000.4V000.4V000.4V000.5V000.5V000.4V000.4V000.3V000.4V000.4V000.4V000.3V000.4V000.3V000.4V000.9V001.0V001.0V
54 000.8V000.5V000.3V000.4V000.4V000.4V000.7V001.2V001.2V001.0N000.5V000.4V000.4V000.4V000.4V000.3V000.3V000.3V000.4V000.4V000.5V000.5V000.6V000.6V
55 000.5V000.5V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.4V000.3V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.3V000.4V
Where column V10 is a large column separated by 24 V's and N's. The V's are "valid" observations and the N's are non-valid observations.
I want to separate the V10 column. I tried using the following but it does not solve the N's problem.
sep_data <- df %>%
separate(V10, into = paste("x",1:24, sep = "_"), sep = "V")
Looking at the tail() of the sep_data:
V1 V2 V3 V4 V5 x_1 x_2 x_3 x_4 x_5 x_6 x_7 x_8 x_9 x_10 x_11 x_12
174 28079008 6 48 2 17042 4002.0 001.1 000.6 000.4 000.4 000.4 000.4 000.5 000.7 000.9 000.7N000.5 000.5
175 28079008 6 48 2 17042 5000.7 000.5 000.4 000.3 000.3 000.3 000.3 000.4 000.5 000.6 000.5 000.5
176 28079008 6 48 2 17042 6000.4 000.3 000.3 000.3 000.3 000.3 000.3 000.4 000.5 000.6 000.5 000.4
177 28079008 6 48 2 17042 7000.3 000.3 000.2 000.2 000.2 000.2 000.2 000.3 000.4 000.5 000.4 000.4
178 28079008 6 48 2 17042 8000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.4 000.5 000.5 000.5 000.5
179 28079008 6 48 2 17042 9000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3
x_13 x_14 x_15 x_16 x_17 x_18 x_19 x_20 x_21 x_22 x_23 x_24 nchar
174 000.4 000.5 000.5 000.5 000.5 000.5 000.6 000.6 000.6 000.6 000.7 145
175 000.5 000.5 000.5 000.5 000.5 000.5 000.5 000.6 000.6 000.6 000.6 000.5 145
176 000.4 000.4 000.5 000.5 000.5 000.5 000.5 000.5 000.5 000.4 000.4 000.3 145
177 000.4 000.4 000.4 000.4 000.4 000.5 000.5 000.6 000.5 000.5 000.5 000.4 145
178 000.4 000.4 000.4 000.5 000.5 000.5 000.5 000.4 000.4 000.4 000.4 000.4 145
179 000.4 000.3 000.3 000.3 000.3 000.3 000.4 000.5 000.5 000.5 000.6 000.5 145
I have 000.7N000.5.
How can I use separate() to separate based on V OR N.
Data:
structure(list(V1 = c(28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28), V2 = c(79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79), V3 = c(4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8), V4 = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6), V5 = c(48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48), V6 = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2), V7 = c(17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17), V8 = c(4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), V9 = c(20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29), V10 = c("000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.5V000.8V000.8V000.5V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.6V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.6V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.7V000.7V000.8V",
"000.7V000.5V000.4V000.5V000.5V000.4V000.4V000.4V000.4V000.5V000.5V000.4V000.4V000.3V000.4V000.4V000.4V000.3V000.4V000.3V000.4V000.9V001.0V001.0V",
"000.8V000.5V000.3V000.4V000.4V000.4V000.7V001.2V001.2V001.0N000.5V000.4V000.4V000.4V000.4V000.3V000.3V000.3V000.4V000.4V000.5V000.5V000.6V000.6V",
"000.5V000.5V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.4V000.3V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.3V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.5V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.3V000.3V000.3V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.3V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.3V000.4V000.4V000.4V000.3V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.4V000.5V000.6V000.6V",
"000.7V000.7V000.6V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V",
"00003V00004V00002V00002V00001V00002V00004V00003V00005V00005V00005V00009V00011V00014V00014V00009V00008V00010V00011V00014V00011V00008V00009V00006V",
"00005V00005V00003V00003V00002V00003V00009V00023V00069V00068V00008V00013V00010V00007V00012V00008V00007V00004V00006V00005V00007V00006V00004V00004V",
"00002V00001V00002V00001V00001V00001V00003V00007V00018V00022V00042V00034V00021V00016V00018V00011V00009V00009V00014V00010V00010V00033V00083V00111V",
"00098V00046V00016V00010V00005V00013V00052V00217V00337V00485V00138V00029V00026V00020V00019V00011V00006V00012V00012V00008V00010V00007V00005V00004V",
"00002V00002V00001V00001V00001V00001V00002V00007V00013V00020V00012V00010V00010V00012V00009V00007V00006V00008V00010V00009V00010V00006V00004V00003V",
"00002V00001V00001V00001V00001V00002V00003V00009V00014V00015V00013V00014V00018V00017V00010V00009V00010V00009V00006V00007V00008V00036V00061V00007V",
"00005V00005V00002V00002V00002V00002V00003V00008V00055V00038V00020V00023V00022V00015V00017V00011V00007V00006V00009V00009V00015V00117V00082V00096V",
"00046V00031V00047V00042V00037V00033V00040V00044V00067V00093V00053V00009V00006V00006V00006V00004V00003V00004V00020V00016V00010V00020V00049V00104V",
"00088V00112V00055V00046V00013V00043V00023V00023V00040V00093V00046V00011V00007V00007V00007V00005V00008V00007V00010V00010V00025V00079V00059V00025V",
"00028V00038V00022V00017V00018V00028V00049V00152V00116V00158V00087V00030V00024V00017V00018V00008V00008V00008V00010V00013V00012V00022V00072V00083V",
"00060V00024V00032V00026V00014V00032V00078V00129V00199V00164V00107V00067V00052V00024V00012V00009V00015V00015V00036V00024V00019V00035V00056V00062V",
"00020V00046V00042V00019V00003V00004V00020V00091V00159V00137V00121V00052V00045V00039V00008V00004V00005V00004V00008V00009V00004V00002V00009V00005V",
"00070V00027V00026V00038V00020V00011V00016V00057V00073V00111V00053V00031V00018V00005V00003V00002V00002V00002V00010V00030V00033V00009V00005V00017V",
"00033V00059V00042V00019V00018V00015V00017V00024V00036V00063V00026V00008V00005V00003V00003V00002V00003V00005V00005V00009V00009V00007V00005V00006V",
"00009V00004V00001V00001V00002V00015V00012V00019V00031V00028V00013V00010V00008V00007V00004V00003V00003V00006V00017V00017V00014V00024V00012V00002V",
"00002V00002V00003V00002V00001V00001V00002V00002V00009V00003V00004V00004V00006V00007V00010V00005V00004V00003V00003V00002V00002V00005V00067V00080V",
"00063V00039V00025V00004V00003V00006V00015V00062V00144V00098V00079V00026V00017V00015V00007V00005V00004V00006V00004V00003V00003V00002V00003V00005V",
"00004V00001V00001V00001V00001V00001V00002V00005V00011V00014V00013V00012V00013V00020V00014V00013V00008V00007V00007V00006V00007V00006V00003V00002V",
"00002V00001V00001V00001V00001V00001V00002V00004V00008V00012V00025V00020V00026V00016V00017V00012V00011V00012V00017V00010V00012V00011V00006V00004V",
"00003V00002V00002V00001V00001V00002V00008V00013V00013V00019V00022V00015V00019V00018V00015V00015V00010V00011V00016V00015V00012V00011V00007V00005V",
"00003V00002V00002V00001V00001V00002V00004V00026V00145V00131V00027V00021V00020V00017V00019V00014V00011V00013V00012V00010V00007V00016V00025V00034V",
"00004V00003V00003V00002V00002V00003V00007V00024V00022V00046V00013V00011V00012V00016V00019V00011V00010V00010V00018V00022V00039V00060V00051V00121V",
"00085V00038V00024V00056V00039V00024V00022V00026V00040V00058V00041V00010V00010V00008V00013V00008V00008V00006V00013V00005V00004V00068V00079V00102V",
"00081V00019V00005V00015V00008V00048V00114V00179V00240V00153N00031V00021V00024V00018V00013V00008V00004V00006V00010V00014V00009V00013V00010V00032V",
"00018V00030V00006V00001V00001V00001V00003V00024V00038V00046V00024V00009V00010V00010V00006V00004V00004V00004V00004V00006V00004V00003V00002V00006V",
"00003V00002V00002V00001V00001V00001V00002V00012V00021V00025V00027V00011V00009V00006V00007V00004V00006V00005V00006V00020V00008V00004V00003V00004V",
"00002V00001V00001V00001V00001V00001V00002V00004V00007V00010V00011V00013V00013V00018V00010V00006V00005V00007V00010V00014V00016V00009V00006V00004V",
"00003V00002V00001V00001V00001V00001V00003V00006V00009V00012V00011V00015V00019V00029V00022V00016V00010V00013V00009V00008V00012V00012V00017V00009V",
"00009V00006V00003V00002V00003V00002V00003V00007V00007V00009V00010V00012V00011V00022V00013V00011V00011V00013V00019V00029V00022V00024V00037V00047V",
"00074V00082V00059V00021V00003V00002V00001V00002V00002V00002V00003V00003V00003V00004V00007V00007V00004V00004V00005V00010V00007V00009V00005V00005V",
"00025V00021V00010V00010V00008V00016V00032V00023V00023V00017V00013V00021V00024V00028V00027V00020V00020V00024V00029V00040V00048V00049V00043V00028V",
"00027V00033V00035V00022V00026V00036V00048V00063V00065V00059V00022V00029V00025V00022V00029V00028V00021V00017V00018V00021V00031V00030V00024V00020V",
"00012V00008V00008V00006V00005V00009V00018V00040V00059V00053V00063V00060V00044V00040V00043V00032V00029V00032V00052V00058V00071V00110V00128V00117V",
"00097V00074V00058V00049V00043V00043V00052V00094V00127V00160V00091V00050V00043V00038V00040V00031V00021V00038V00046V00043V00050V00061V00050V00022V",
"00017V00014V00019V00018V00008V00011V00014V00042V00050V00048V00027V00021V00019V00021V00016V00013V00016V00019V00028V00030V00034V00035V00021V00016V",
"00010V00008V00007V00006V00007V00009V00015V00040V00058V00045V00034V00032V00039V00032V00023V00024V00029V00029V00033V00039V00068V00112V00096V00047V",
"00030V00024V00018V00010V00010V00011V00019V00045V00071V00061V00050V00052V00049V00045V00046V00038V00027V00029V00041V00046V00073V00143V00131V00109V",
"00065V00054V00063V00067V00060V00055V00060V00062V00067V00075V00068V00036V00026V00025V00028V00020V00016V00020V00044V00056V00062V00098V00127V00116V",
"00091V00094V00079V00067V00040V00057V00056V00054V00054V00065V00059V00041V00027V00030V00026V00019V00023V00022V00032V00040V00060V00087V00079V00054V",
"00047V00061V00057V00050V00049V00049V00056V00085V00074V00086V00084V00062V00058V00048V00047V00027V00026V00026V00036V00064V00085V00096V00117V00106V",
"00084V00072V00062V00055V00042V00055V00066V00082V00099V00098V00096V00092V00089V00064V00045V00038V00036V00042V00075V00066V00066V00087V00088V00103V",
"00092V00090V00074V00059V00046V00038V00051V00076V00092V00091V00097V00084V00084V00081V00033V00017V00020V00018V00031V00045V00038V00041V00038V00065V",
"00113V00086V00082V00072V00059V00050V00050V00057V00058V00067V00072V00070V00061V00025V00009V00007V00010V00013V00028V00071V00084V00060V00044V00068V",
"00090V00084V00070V00061V00053V00049V00041V00041V00041V00048V00043V00030V00021V00012V00011V00007V00007V00009V00013V00021V00034V00044V00040V00041V",
"00057V00049V00015V00027V00032V00047V00041V00045V00046V00040V00034V00028V00023V00022V00009V00008V00007V00014V00033V00049V00056V00072V00039V00019V",
"00021V00017V00013V00008V00007V00011V00015V00013V00027V00010V00012V00011V00016V00023V00031V00021V00020V00017V00017V00021V00035V00073V00130V00115V",
"00102V00084V00071V00049V00041V00043V00045V00067V00083V00082V00083V00058V00048V00049V00028V00019V00015V00021V00025V00026V00033V00031V00042V00060V",
"00025V00009V00006V00010V00006V00008V00016V00038V00054V00047V00038V00037V00040V00056V00041V00041V00027V00027V00029V00034V00049V00065V00024V00012V",
"00010V00007V00004V00004V00003V00004V00010V00025V00041V00039V00051V00046V00051V00046V00046V00033V00031V00035V00040V00030V00036V00041V00035V00028V",
"00020V00015V00013V00014V00012V00017V00032V00056V00051V00057V00052V00037V00040V00039V00038V00040V00033V00036V00047V00050V00044V00046V00038V00032V",
"00021V00017V00015V00012V00010V00013V00024V00050V00101V00099V00053V00048V00043V00040V00044V00038V00035V00042V00043V00042V00040V00057V00060V00058V",
"00036V00030V00028V00021V00020V00035V00049V00067V00058V00070V00042V00038V00044V00053V00050V00044V00039V00043V00054V00059V00082V00098V00116V00119V",
"00101V00085V00076V00078V00066V00058V00054V00053V00049V00053V00058V00035V00033V00028V00032V00029V00027V00025V00047V00028V00049V00122V00135V00123V",
"00113V00077V00040V00041V00050V00056V00074V00091V00113V00097N00068V00055V00059V00044V00037V00024V00020V00028V00045V00058V00070V00082V00084V00102V",
"00082V00077V00044V00019V00012V00009V00029V00077V00084V00081V00051V00028V00031V00031V00022V00017V00014V00015V00015V00027V00037V00038V00020V00031V",
"00019V00010V00010V00009V00006V00010V00019V00048V00068V00058V00047V00030V00022V00018V00017V00014V00019V00018V00026V00059V00031V00019V00017V00014V",
"00009V00007V00003V00002V00002V00004V00008V00020V00030V00029V00024V00028V00028V00031V00022V00017V00016V00020V00028V00034V00039V00037V00029V00019V",
"00012V00009V00005V00004V00003V00003V00010V00025V00037V00034V00027V00031V00036V00043V00040V00037V00035V00040V00031V00027V00039V00038V00038V00032V",
"00031V00028V00022V00018V00021V00019V00021V00033V00036V00040V00041V00045V00040V00043V00039V00037V00041V00048V00053V00053V00052V00056V00055V00055V",
"00050V00044V00047V00048V00030V00011V00008V00011V00012V00010V00008V00014V00011V00017V00031V00022V00009V00008V00018V00038V00040V00043V00033V00027V",
"00004V00004V00004V00004V00003V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V",
"00004V00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00005V00005V00005V00004V00005V00005V00005V00006V00006V00005V00005V00004V",
"00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V00006V00006V00005V00005V00004V00005V00004V00005V00005V00005V00005V00005V00006V00008V",
"00006V00006V00006V00005V00005V00005V00004V00005V00008V00006V00006V00006V00005V00005V00005V00004V00004V00004V00004V00005V00006V00006V00006V00005V",
"00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00004V00004V",
"00004V00004V00004V00004V00004V00004V00004V00004V00005V00006V00005V00005V00005V00005V00005V00006V00005V00005V00005V00006V00007V00008V00007V00005V",
"00005V00004V00004V00004V00004V00004V00004V00004V00005V00005V00006V00006V00005V00005V00005V00005V00005V00005V00004V00004V00004V00004V00005V00004V",
"00004V00004V00005V00004V00005V00004V00004V00005V00006V00007V00005V00005V00005V00005V00005V00005V00005V00004V00004V00004V00004V00005V00005V00005V",
"00004V00004V00005V00005V00004V00004V00004V00005V00005V00006V00006V00005V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V",
"00004V00005V00004V00004V00004V00004V00004V00005V00005V00006V00005V00005V00005V00006V00005V00004V00004V00005V00004V00004V00004V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00005V00005V00005V00005V00004V00004V00004V00005V00004V00005V00006V00006V00005V00005V",
"00005V00005V00005V00005V00005V00004V00005V00005V00005V00006V00005V00006V00006V00005V00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V",
"00005V00005V00005V00006V00006V00005V00005V00005V00006V00007V00007V00006V00006V00005V00004V00004V00005V00004V00004V00004V00004V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00006V00005V00005V00005V00004V00004V00004V00004V00004V00004V00004V00004V00004V00005V",
"00005V00005V00005V00004V00004V00005V00005V00005V00005V00006V00006V00005V00005V00005V00004V00004V00005V00005V00005V00004V00004V00004V00005V00005V",
"00004V00005V00005V00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00005V00005V00005V00004V00005V00005V00004V00004V00005V00006V00006V",
"00008V00007V00005V00005V00005V00005V00005V00005V00005V00007V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00004V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00004V",
"00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00004V00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00007V",
"00006V00006V00006V00005V00005V00005V00006V00006V00006V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00008V00011V",
"00009V00006V00005V00005V00005V00005V00005V00006V00007V00008V00006N00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00006V00006V00006V00006V00006V00006V00006V00006V00006V00007V00006V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00007V00007V00007V00006V00006V00006V00006V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00006V00006V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.5V000.5V000.6V000.6V000.6V000.5V",
"000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.5V000.4V",
"000.4V000.4V000.3V000.3V000.3V000.3V000.4V000.4V000.6V000.7V000.6V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.5V000.6V000.7V000.8V001.1V",
"001.2V000.9V000.8V000.6V000.5V000.4V000.4V000.6V001.1V001.0V000.7V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.6V000.7V000.8V000.7V000.6V",
"000.5V000.4V000.4V000.4V000.3V000.3V000.4V000.5V000.6V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.6V000.5V000.4V",
"000.4V000.4V000.4V000.4V000.3V000.3V000.4V000.5V000.6V000.7V000.6V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.6V000.7V000.9V001.0V000.7V",
"000.5V000.4V000.4V000.4V000.3V000.3V000.4V000.5V000.7V000.8V000.6V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.7V000.7V000.7V",
"000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.5V000.6V000.7V000.7V",
"000.6V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V",
"000.4V000.4V000.4V000.4V000.4V000.3V000.4V000.5V000.7V000.8V000.6V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.7V000.9V000.7V",
"000.7V000.5V000.4V000.3V000.3V000.3V000.4V000.5V000.7V000.8V000.6V000.6V000.7V000.7V000.5V000.5V000.5V000.4V000.4V000.5V000.6V000.7V000.7V000.6V",
"000.6V000.5V000.4V000.4V000.4V000.3V000.4V000.5V000.6V000.7V000.7V000.6V000.6V000.5V000.6V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.7V000.7V",
"000.7V000.7V000.7V000.8V000.8V000.7V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V",
"000.6V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V",
"000.5V000.5V000.5V000.4V000.4V000.4V000.3V000.3V000.4V000.4V000.5V000.4V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.4V",
"000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.7V000.9V",
"001.1V001.2V000.7V000.4V000.3V000.3V000.3V000.4V000.6V000.7V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.6V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.3V000.4V000.5V000.6V000.7V000.5V000.4V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.5V000.5V000.6V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.6V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.6V000.7V000.6V000.5V002.7V000.8V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.5V000.5V",
"000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.5V000.6V000.8V000.8V",
"000.8V000.8V000.7V000.5V000.4V000.4V000.4V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.6V000.9V001.8V",
"002.0V001.1V000.6V000.4V000.4V000.4V000.4V000.5V000.7V000.9V000.7N000.5V000.5V000.4V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.6V000.6V000.7V",
"000.7V000.5V000.4V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.6V000.6V000.5V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.3V",
"000.3V000.3V000.2V000.2V000.2V000.2V000.2V000.3V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.6V000.5V000.5V000.5V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.5V000.5V000.6V000.5V"
)), row.names = 50:179, class = "data.frame")

How can I use separate() to separate based on V OR N? sep = "V|N".
But I'd suggest pre-processing. Use gsub or stringr::str_replace_all to make the N entries NA before separating. Something like df$V10 = gsub("N\\d+(\\.)?\\d+", "VNA", df$V10). Replace NXXX.XX with VNA, then splitting on V should leave NAs where the Ns originally were.

How to create nested data in R - Nest multiple columns in different categories

My data looks like this:
> str(tab2)
'data.frame': 36 obs. of 22 variables:
$ organisationunitname : Factor w/ 38 levels "All OUs","Angola",..: 2 3 4 5 6 7 8 9 10 11 ...
$ cd4_perform_result : num 24 6 7 1 1 1 5 3 2 1 ...
$ cd4_participate_result : num 1 8 8 1 1 1 5 3 2 1 ...
$ cd4_pass_result : num 1 4 19 1 1 1 5 3 2 1 ...
$ eid_perform_result : num 2 1 7 1 1 1 1 9 1 1 ...
$ eid_participate_result : num 2 1 5 1 1 1 1 8 1 1 ...
$ eid_pass_result : num 2 1 5 1 1 1 1 7 1 1 ...
$ vl_perform_result : num 2 1 3 1 1 1 1 9 1 1 ...
$ vl_participate_result : num 2 1 7 1 1 1 1 7 1 1 ...
$ vl_pass_result : num 2 1 7 1 1 1 1 7 1 1 ...
$ hiv_perform_result : num 19 29 14 1 1 1 26 21 10 1 ...
$ hiv_participate_result : num 19 28 14 1 1 1 22 20 4 1 ...
$ hiv_pass_result : num 20 28 14 1 1 1 18 22 7 1 ...
$ tbafb_perform_result : num 9 1 8 1 1 1 1 7 1 1 ...
$ tbafb_participate_result : num 1 1 18 1 1 1 1 5 1 1 ...
$ tbafb_pass_result : num 1 1 19 1 1 1 1 6 1 1 ...
$ tbculture_perform_result : num 3 1 2 1 1 1 1 1 1 1 ...
$ tbculture_participate_result: num 1 1 2 1 1 1 1 1 1 1 ...
$ tbculture_pass_result : num 1 1 1 1 1 1 1 1 1 1 ...
$ tbxpert_perform_result : num 1 1 4 1 1 1 1 1 1 1 ...
$ tbxpert_participate_result : num 1 1 5 1 1 1 1 1 1 1 ...
$ tbxpert_pass_result : num 1 1 2 1 1 1 1 1 1 1 ...
>
DATA
structure(list(country = c("eRkf", "KJfd", "wjkO", "Hovb", "v6Dm",
"vp8p", "TYhI", "U4OB", "GVnL", "dzJO", "11JX", "ygWc", "4Ye1",
"RykQ", "OHLW", "Xh1x", "MOl4", "67vY", "h2cA", "Ue1r", "Hr9G",
"YxpI", "S0Or", "2fss", "wz9F", "XEOG", "Vptm", "xAup", "STBG",
"AayU", "mJyW", "PvNG", "qncq", "L8dk", "6CJ8", "90i7"), cd4_perform_result = c(23,
6, 7, 1, 1, 1, 5, 3, 2, 1, 10, 1, 2, 8, 1, 2, 16, 1, 1, 22, 12,
1, 13, 11, 17, 1, 20, 15, 1, 21, 18, 4, 1, 14, 19, 9), cd4_participate_result = c(1,
8, 8, 1, 1, 1, 5, 3, 2, 1, 7, 1, 2, 9, 1, 2, 16, 1, 1, 17, 11,
1, 4, 14, 13, 1, 19, 15, 1, 20, 10, 6, 1, 18, 12, 3), cd4_pass_result = c(1,
4, 18, 1, 1, 1, 5, 3, 2, 1, 20, 1, 2, 19, 1, 2, 12, 1, 1, 13,
6, 1, 10, 11, 9, 1, 17, 2, 1, 15, 7, 16, 1, 14, 8, 3), eid_perform_result = c(2,
1, 6, 1, 1, 1, 1, 8, 1, 1, 7, 1, 2, 3, 5, 2, 5, 1, 1, 9, 5, 1,
4, 2, 10, 1, 5, 1, 1, 5, 8, 2, 1, 1, 8, 5), eid_participate_result = c(2,
1, 4, 1, 1, 1, 1, 7, 1, 1, 6, 1, 2, 9, 4, 2, 4, 1, 1, 3, 2, 1,
9, 2, 8, 1, 4, 1, 1, 4, 6, 2, 1, 1, 5, 4), eid_pass_result = c(2,
1, 4, 1, 1, 1, 1, 6, 1, 1, 5, 1, 2, 9, 1, 2, 4, 1, 1, 3, 2, 1,
8, 2, 7, 1, 4, 1, 1, 4, 5, 2, 1, 1, 4, 4), vl_perform_result = c(2,
1, 3, 1, 1, 1, 1, 8, 1, 1, 9, 1, 2, 10, 4, 2, 4, 1, 1, 5, 4,
1, 7, 6, 5, 1, 11, 1, 1, 4, 8, 2, 1, 1, 7, 4), vl_participate_result = c(2,
1, 7, 1, 1, 1, 1, 7, 1, 1, 8, 1, 2, 8, 4, 2, 4, 1, 1, 5, 2, 1,
4, 6, 3, 1, 9, 1, 1, 4, 7, 2, 1, 1, 6, 1), vl_pass_result = c(2,
1, 7, 1, 1, 1, 1, 7, 1, 1, 9, 1, 2, 8, 1, 2, 5, 1, 1, 4, 2, 1,
2, 6, 3, 1, 10, 1, 1, 5, 7, 2, 1, 1, 5, 1), hiv_perform_result = c(18,
28, 13, 1, 1, 1, 25, 20, 10, 1, 6, 11, 9, 7, 19, 26, 8, 14, 1,
27, 12, 1, 24, 17, 23, 1, 21, 5, 1, 22, 16, 15, 1, 2, 3, 4),
hiv_participate_result = c(18, 27, 13, 1, 1, 1, 21, 19, 4,
1, 15, 9, 10, 3, 11, 26, 5, 1, 1, 20, 6, 1, 23, 17, 12, 1,
24, 8, 1, 22, 14, 16, 1, 2, 25, 7), hiv_pass_result = c(19,
27, 13, 1, 1, 1, 17, 21, 6, 1, 16, 26, 10, 2, 23, 25, 9,
1, 1, 14, 4, 1, 20, 18, 11, 1, 22, 7, 1, 15, 12, 8, 1, 3,
24, 5), tbafb_perform_result = c(9, 1, 8, 1, 1, 1, 1, 7,
1, 1, 6, 1, 20, 5, 1, 2, 12, 1, 1, 15, 13, 1, 17, 11, 19,
1, 10, 1, 1, 14, 16, 4, 1, 18, 3, 1), tbafb_participate_result = c(1,
1, 17, 1, 1, 1, 1, 5, 1, 1, 12, 1, 18, 11, 1, 2, 6, 1, 1,
13, 7, 1, 10, 9, 14, 1, 8, 1, 1, 16, 15, 4, 1, 17, 3, 1),
tbafb_pass_result = c(1, 1, 18, 1, 1, 1, 1, 6, 1, 1, 13,
1, 19, 11, 1, 2, 4, 1, 1, 15, 5, 1, 7, 10, 12, 1, 8, 1, 1,
16, 9, 3, 1, 14, 17, 1), tbculture_perform_result = c(3,
1, 2, 1, 1, 1, 1, 1, 1, 1, 6, 1, 3, 8, 1, 2, 2, 1, 1, 7,
3, 1, 5, 4, 7, 1, 5, 1, 1, 3, 6, 6, 1, 3, 3, 1), tbculture_participate_result = c(1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 5, 1, 3, 8, 1, 2, 2, 1, 1, 7,
2, 1, 6, 4, 6, 1, 1, 1, 1, 3, 3, 5, 1, 3, 3, 1), tbculture_pass_result = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 7, 1, 2, 2, 1, 1, 8,
2, 1, 6, 4, 5, 1, 1, 1, 1, 3, 3, 6, 1, 3, 3, 1), tbxpert_perform_result = c(1,
1, 4, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 8, 3, 1, 1, 5,
9, 1, 15, 7, 13, 1, 4, 1, 1, 12, 11, 1, 1, 6, 14, 10), tbxpert_participate_result = c(1,
1, 5, 1, 1, 1, 1, 1, 1, 1, 15, 1, 1, 4, 1, 12, 3, 1, 1, 2,
7, 1, 16, 9, 11, 1, 1, 1, 1, 14, 10, 1, 1, 6, 8, 13), tbxpert_pass_result = c(1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 12, 1, 1, 4, 1, 9, 3, 1, 1, 14,
6, 1, 13, 8, 8, 1, 1, 1, 1, 11, 6, 1, 1, 5, 7, 10)), .Names = c("country",
"cd4_perform_result", "cd4_participate_result", "cd4_pass_result",
"eid_perform_result", "eid_participate_result", "eid_pass_result",
"vl_perform_result", "vl_participate_result", "vl_pass_result",
"hiv_perform_result", "hiv_participate_result", "hiv_pass_result",
"tbafb_perform_result", "tbafb_participate_result", "tbafb_pass_result",
"tbculture_perform_result", "tbculture_participate_result", "tbculture_pass_result",
"tbxpert_perform_result", "tbxpert_participate_result", "tbxpert_pass_result"
), row.names = c(NA, 36L), class = "data.frame")
It is organized by unique orgnationationunitname, but the columns are also grouped into different categories. For e.g. cd4, eid, vl, hiv, tbafb etc. and then into perform, participate & pass_result. I want to tabulate the data across all these categories to look like this
Country: eRkf
cat CD4 EID VL HIV TB AFB TB Culture TB Xpert
Perform 3442 288 114 29519 8572 72 591
Participate 1771 128 95 17342 5433 119 395
Pass_test 1535 118 83 11674 4508 109 343
How can I do this in R, without having to create separate data frames?

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Get index of date with max years and min month - r

Related

Find the total number of times each of a possible range of values occurs across three separate variables in R

Reducing dataframe if first 4 columns match

Maintain order of time series with group_by

separate data in a column into multiple columns

How to create nested data in R - Nest multiple columns in different categories

Categories

Resources