I have a dataset of 3 variables: ID, Date and Years_service. Like this:
library(data.table)
data <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), Date = structure(c(1230768000,
1233446400, 1235865600, 1238544000, 1241136000, 1243814400, 1246406400,
1249084800, 1251763200, 1254355200, 1257033600, 1259625600, 1262304000,
1264982400, 1267401600, 1270080000, 1272672000, 1275350400, 1277942400,
1280620800, 1283299200, 1285891200, 1288569600, 1291161600, 1293840000,
1296518400, 1298937600, 1301616000, 1304208000, 1306886400, 1309478400,
1312156800, 1314835200, 1317427200, 1320105600, 1322697600, 1325376000,
1328054400, 1330560000, 1333238400, 1335830400, 1338508800, 1341100800,
1343779200, 1346457600, 1349049600, 1351728000, 1354320000, 1356998400,
1359676800, 1362096000, 1364774400, 1367366400, 1370044800, 1372636800,
1375315200, 1377993600, 1380585600, 1383264000, 1385856000, 1388534400,
1391212800, 1393632000, 1396310400, 1398902400, 1401580800, 1404172800,
1406851200, 1409529600, 1412121600, 1414800000, 1417392000, 1420070400,
1422748800, 1425168000, 1427846400, 1430438400, 1433116800, 1435708800,
1438387200, 1441065600, 1443657600, 1446336000, 1448928000, 1451606400,
1454284800, 1456790400, 1459468800, 1462060800, 1464739200, 1467331200,
1470009600, 1472688000, 1475280000, 1330560000, 1333238400, 1335830400,
1338508800, 1341100800, 1343779200, 1346457600, 1349049600, 1351728000,
1354320000, 1356998400, 1359676800, 1362096000, 1364774400, 1367366400,
1370044800, 1372636800, 1375315200, 1377993600, 1380585600, 1383264000,
1385856000, 1388534400, 1391212800, 1393632000, 1396310400, 1398902400,
1401580800, 1404172800, 1406851200, 1409529600, 1412121600, 1414800000,
1417392000, 1420070400, 1422748800, 1425168000, 1427846400, 1430438400,
1433116800, 1435708800, 1438387200, 1441065600, 1443657600, 1446336000,
1448928000, 1451606400, 1454284800, 1456790400, 1459468800, 1462060800,
1464739200, 1467331200, 1470009600, 1472688000, 1475280000), class =
c("POSIXct",
"POSIXt"), tzone = "UTC"), Years_service = c(19, 19, 19, 19,
19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 8, 8, 8, 8, 8, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13),
month_1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)), .Names = c("ID", "Date",
"Years_service", "month_1"), row.names = c(NA, -150L), class =
c("data.table",
"data.frame"))
I want a new variable that contains for each ID the date for which years of service is maximum and the month of date is minimum. Something like this:
ID Date Years_service Date_1
1: 1 2009-01-01 19 2016-06-01
2: 1 2009-02-01 19 2016-06-01
3: 1 2009-03-01 19 2016-06-01
4: 1 2009-04-01 19 2016-06-01
5: 1 2009-05-01 19 2016-06-01
---
146: 2 2016-06-01 12 2016-08-01
147: 2 2016-07-01 12 2016-08-01
148: 2 2016-08-01 13 2016-08-01
149: 2 2016-09-01 13 2016-08-01
150: 2 2016-10-01 13 2016-08-01
My desired output is Date_1
I tried this:
data[,Date_1 := Date[which.max(Years_service) & which.min(month_1)], by = ID]
but didn't work.
How can I achieve this?
One option is to get the row index (.I) of the rows where the 'Years_service is max for each 'ID', then using that, get the minimum index of 'month_1' to subset the 'Date' corresponding to that value grouped by 'ID', and join on with the original data on the 'ID' column to create the 'Date_1' column
i1 <- data[, .I[Years_service == max(Years_service)], ID]$V1
data[data[i1, Date[which.min(month_1)], ID], Date_1 :=V1, on = .(ID)]
data
# ID Date Years_service month_1 Date_1
# 1: 1 2009-01-01 19 1 2016-06-01
# 2: 1 2009-02-01 19 2 2016-06-01
# 3: 1 2009-03-01 19 3 2016-06-01
# 4: 1 2009-04-01 19 4 2016-06-01
# 5: 1 2009-05-01 19 5 2016-06-01
# ---
#146: 2 2016-06-01 12 6 2016-08-01
#147: 2 2016-07-01 12 7 2016-08-01
#148: 2 2016-08-01 13 8 2016-08-01
#149: 2 2016-09-01 13 9 2016-08-01
#150: 2 2016-10-01 13 10 2016-08-01
Or extract the 'Date' corresponding to minimum 'month_1' from within the Subset of Data.table
data[, Date_1 := .SD[Years_service == max(Years_service),
Date[which.min(month_1)]], ID]
Or another option is to an order and assign 'Date_1' as the first 'Date' grouped by 'ID'
data[order(-Years_service, month_1), Date_1 := Date[1], ID]
Or using tidyverse
library(tidyverse)
data %>%
group_by(ID) %>%
arrange(desc(Years_service), month_1) %>%
mutate(Date_1 = first(Date))
Related
I have data which looks like this:
library(stringr)
library(dplyr)
library(magrittr)
Codes = c(1, 2, 3, 4, 5, 6, 9)
Codes2 = c(Codes, rep(9, 100))
data <- data.frame(
MASTER_HCU_DI = do.call(paste0, Map(stri_rand_strings, n=100, length=c(4, 3),
pattern = c('[A-Z]', '[0-9]'))),
CODE_1 = sample(Codes, 100, replace = T))
data %<>%
mutate(CODE_2 = if_else(CODE_1 == 9, 9, sample(Codes2, 100, replace = T)),
CODE_3 = if_else(CODE_2 == 9, 9, sample(Codes2, 100, replace = T)))
What I want to do is find the total number of people with each of the possible values of CODE_1, CODE_2, and CODE_3; across all three Codes.
Where all of someone's CODE start with a 9, they are counted as missing. Otherwise, I'd like to ignore the CODE values which start with a 9.
This code does what I want, but seems cumbersome:
data %<>%
mutate(Sum_grp1 = if_else(CODE_1 == 1 | CODE_2 == 1 | CODE_3 == 1, 1, 0),
Sum_grp2 = if_else(CODE_1 == 2 | CODE_2 == 2 | CODE_3 == 2, 1, 0),
Sum_grp3 = if_else(CODE_1 == 3 | CODE_2 == 3 | CODE_3 == 3, 1, 0),
Sum_grp4 = if_else(CODE_1 == 4 | CODE_2 == 4 | CODE_3 == 4, 1, 0),
Sum_grp5 = if_else(CODE_1 == 5 | CODE == 5 | CODE_3 == 5, 1, 0),
Sum_grp6 = if_else(CODE_1 == 6 | CODE_2 == 6 | CODE_3 == 6, 1, 0),
Missing = if_else(CODE_1 == 9 & CODE_2 == 9 & CODE_3 == 9, 1, 0))
Group_counts <- data.frame(
Group = c("Group_1", "Group_2", "Group_3", "Group_4", "Group_5", "Group_6", "Missing"),
Sum = c(sum(data$Sum_grp1 == 1),
sum(data$Sum_grp2 == 1),
sum(data$Sum_grp3 == 1),
sum(data$Sum_grp4 == 1),
sum(data$Sum_grp5 == 1),
sum(data$Sum_grp6 == 1),
sum(data$Missing == 1)))
Expected output looks like this:
Is there an easier way to do this?
Thanks.
You can get the data in long format and use count -
library(dplyr)
library(tidyr)
data %>% pivot_longer(cols = -MASTER_HCU_DI) %>% count(name, value)
Is this what you expect?
data %>% pivot_longer(cols = -MASTER_HCU_DI) %>% group_by(name) %>%
summarise(Sum = sum(value), .groups = 'drop')
# A tibble: 3 x 2
name Sum
<chr> <dbl>
1 GROUP_1 409
2 GROUP_2 897
3 GROUP_3 900
As I understand it, the following functionality outlined in the question is not addressed by the existing answers:
Where all of someone's CODE start with a 9, they are counted as missing. Otherwise, I'd like to ignore the CODE values which start with a 9.
Here is my approach to include this functionality:
library(purrr)
library(dplyr)
data %>%
pmap_dfr(~ table(c(...)[-1])) %>%
set_names(~ paste0("Group_", .x)) %>%
mutate(Missing = ifelse(`Group_9` == 3, 1, NA)) %>%
select(-`Group_9`) %>%
colSums(na.rm = T) %>%
tibble::tibble(Group = names(.), Sum = .) %>%
arrange(Group)
Returns:
# A tibble: 7 x 2
Group Sum
<chr> <dbl>
1 Group_1 23
2 Group_2 13
3 Group_3 16
4 Group_4 13
5 Group_5 13
6 Group_6 11
7 Missing 15
Data used:
data <- structure(list(MASTER_HCU_DI = c("VBHT228", "CAAO199", "NDDI124", "AVZV996", "KMOP513", "AALT248", "IGZC617", "ZDHO229", "GXYV745", "PDTW465", "SEPM505", "ZJWQ323", "VRRU692", "NHOY962", "BBFR276", "NVML939", "VHPV534", "YTXG467", "BOCT360", "ONEO498", "CICL849", "SAIK461", "NZGL739", "NIFD497", "XMVE276", "JHZM922", "LCLV707", "BPKN209", "YTZU211", "LUNI891", "CQTC089", "FBDZ269", "VKCI112", "BLJH968", "LLML439", "TDRV973", "RTFR863", "GZAN917", "WSUI006", "JILN883", "CAHM719", "JCMI028", "BGFZ774", "BGVZ374", "WBUJ792", "DLVT690", "AVKE534", "TDPU030", "SKFI697", "UCLY688", "OODZ687", "IIPR924", "TSES431", "CQSN693", "ZQGJ398", "FMGH661", "ZORF207", "MDWD343", "OBDM142", "SATV193", "MUKZ136", "INAE029", "MWDB125", "JUXN395", "LQGW143", "ALKP557", "WQAR962", "UYZI622", "WKYM520", "WUMH621", "GLRV451", "ISHG990", "OCNW161", "WQMS244", "UQEF227", "IAEZ636", "TEZJ280", "GCCJ844", "EVTF869", "JGJH568", "MDPH890", "EHKR422", "NBIM361", "XEWM477", "PBJP921", "FGEG840", "UJOO120", "XZTB081", "GXCQ610", "ANAR117", "TNIP023", "GLFN787", "SYYV532", "GOTY296", "TXME798", "SUZK405", "VWHY631", "HAXW159", "CCJN761", "GGUN719"), GROUP_1 = c(6, 1, 4, 9, 1, 3, 3, 2, 3, 2, 1, 9, 4, 3, 1, 1, 4, 6, 5, 3, 3, 3, 9, 9, 2, 3, 4, 6, 1, 1, 1, 9, 6, 1, 5, 9, 5, 9, 5, 5, 1, 2, 9, 1, 3, 9, 9, 3, 5, 6, 1, 4, 1, 6, 4, 5, 2, 6, 4, 1, 5, 9, 1, 4, 3, 1, 2, 1, 2, 9, 1, 4, 3, 1, 2, 3, 6, 1, 6, 2, 2, 4, 1, 2, 6, 9, 3, 4, 2, 9, 6, 1, 3, 3, 1, 5, 4, 2, 4, 9), GROUP_2 = c(9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9), GROUP_3 = c(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9)), class = "data.frame", row.names = c(NA, -100L))
We can use gather
library(dplyr)
library(tidyr)
data %>%
gather('name', 'value', -MASTER_HCU_DI) %>%
count(name, value)
I have a dataframe that looks like this
> head(printing_id_map_unique_frames)
# A tibble: 6 x 5
# Groups: frame_number [6]
X1 X2 X3 row_in_frame frame_number
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 1
2 1 2 3 15 2
3 1 2 3 15 3
4 1 2 3 15 4
5 1 2 3 15 5
6 1 2 3 15 6
As you can see, X1,X2,X3, row_in_frame is identical
However, eventually you get to a
X1 X2 X3 row_in_frame frame_number
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 32
2 1 2 3 15 33
3 1 2 3 5 34**
4 1 4 5 15 35
5 1 4 5 15 36
What I would like to do is essentially compute a dataframe that looks like:
X1 X2 X3 row_in_frame num_duplicates
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 3 15 33
2 1 2 3 5 1
...
Essentially, what I want is to "collapse" over identical first 4 columns and count how many rows of that type there are in the "num_duplicates" column.
Is there a nice way to do this in dplyr without a messy for loop that tracks a count and if there is a change.
Below please find a full data structure via dput:
> dput(printing_id_map_unique_frames)
structure(list(X1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), X2 = c(2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
), X3 = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5), row_in_frame = c(15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 5, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 5
), frame_number = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68)), row.names = c(NA, -68L), class = c("tbl_df",
"tbl", "data.frame"))
Here is one option with count
library(dplyr) # 1.0.0
df1 %>%
count(!!! rlang::syms(names(.)[1:4]))
Or specify the unquoted column names
df1 %>%
count(X1, X2, X3, row_in_frame)
If we don't want to change the order, an option is to convert the first 4 columns to factor with levels specified as the unique values (which is the same as the order of occurrence of values) and then apply the count
df1 %>%
mutate(across(1:4, ~ factor(.x, levels = unique(.x)))) %>%
count(!!! rlang::syms(names(.)[1:4])) %>%
type.convert(as.is = TRUE)
# A tibble: 4 x 5
# X1 X2 X3 row_in_frame n
# <int> <int> <int> <int> <int>
#1 1 2 3 15 33
#2 1 2 3 5 1
#3 1 4 5 15 33
#4 1 4 5 5 1
Suppose my time series consists only of two columns: signal and day
The signal variable is supposed to repeat itself in a cycle of 1 to 6. So I need to insert empty rows for each implicit missing Signal but with signal counting from 1 to 6. (Suppose I have more columns that should also be empty (NA)).
In other words, for each unique day, there should be 6 rows with signal counting from 1 to 6.
My dataframe:
df = structure(list(data.Signal = c(2, 3, 4, 5, 6, 1, 2, 3, 4, 6,
1, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 1, 2, 3, 4, 5, 6, 2, 3, 4,
5, 6, 1, 3, 4, 5, 6, 2, 3, 4, 5, 6, 3, 4, 6, 1, 3, 4, 5, 6, 1,
2, 3, 4, 5, 6, 1, 2, 3, 4, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5,
6, 1, 2, 3, 4, 5, 1, 2, 3, 4, 6, 2, 3, 6, 3, 4, 5, 6, 1, 3, 4,
5, 6, 1, 1, 2, 3, 4, 5, 3, 4, 1, 2, 3, 4, 5, 5, 1, 2, 3, 4),
data.day = c(18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 22, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 26, 26, 26,
26, 26, 27, 27, 27, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
29, 30, 30, 30, 30, 30, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7,
7, 7, 7, 7, 8, 9, 9, 9, 9, 9, 10, 10, 11, 11, 11, 11, 11,
11, 12, 12, 12, 12)), class = "data.frame", row.names = c(NA,
-114L))
My approach:
data <- df %>%
group_by(gr=data.day) %>%
complete(data.Signal = 1:6) %>%
ungroup() %>%
select(-gr)
This however, sorts the days ascending. The order of the days is obviously meaningful in the time series data. How can I "re-sort" to the original order or is there another way of solving my problem? Thank you!
Convert data.day to factor before using complete
library(dplyr)
df %>%
group_by(gr = factor(data.day, levels = unique(data.day))) %>%
tidyr::complete(data.Signal = 1:6) %>%
ungroup() %>%
select(-gr)
# data.Signal data.day
# <dbl> <dbl>
# 1 1 NA
# 2 2 18
# 3 3 18
# 4 4 18
# 5 5 18
# 6 6 18
# 7 1 19
# 8 2 19
# 9 3 19
#10 4 19
# … with 141 more rows
If you want those NA's filled you can use this version.
df %>%
mutate(grp = factor(data.day, levels = unique(data.day))) %>%
complete(grp, data.Signal = 1:6) %>%
ungroup() %>%
select(-data.day)
I have some data which looks like:
> head(d)
V1 V2 V3 V4 V5 V6 V7 V8 V9
50 28 79 4 6 48 2 17 4 20
51 28 79 4 6 48 2 17 4 21
52 28 79 4 6 48 2 17 4 22
53 28 79 4 6 48 2 17 4 23
54 28 79 4 6 48 2 17 4 24
55 28 79 4 6 48 2 17 4 25
V10
50 000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V
51 000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.5V000.8V000.8V000.5V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.6V
52 000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.6V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.7V000.7V000.8V
53 000.7V000.5V000.4V000.5V000.5V000.4V000.4V000.4V000.4V000.5V000.5V000.4V000.4V000.3V000.4V000.4V000.4V000.3V000.4V000.3V000.4V000.9V001.0V001.0V
54 000.8V000.5V000.3V000.4V000.4V000.4V000.7V001.2V001.2V001.0N000.5V000.4V000.4V000.4V000.4V000.3V000.3V000.3V000.4V000.4V000.5V000.5V000.6V000.6V
55 000.5V000.5V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.4V000.3V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.3V000.4V
Where column V10 is a large column separated by 24 V's and N's. The V's are "valid" observations and the N's are non-valid observations.
I want to separate the V10 column. I tried using the following but it does not solve the N's problem.
sep_data <- df %>%
separate(V10, into = paste("x",1:24, sep = "_"), sep = "V")
Looking at the tail() of the sep_data:
V1 V2 V3 V4 V5 x_1 x_2 x_3 x_4 x_5 x_6 x_7 x_8 x_9 x_10 x_11 x_12
174 28079008 6 48 2 17042 4002.0 001.1 000.6 000.4 000.4 000.4 000.4 000.5 000.7 000.9 000.7N000.5 000.5
175 28079008 6 48 2 17042 5000.7 000.5 000.4 000.3 000.3 000.3 000.3 000.4 000.5 000.6 000.5 000.5
176 28079008 6 48 2 17042 6000.4 000.3 000.3 000.3 000.3 000.3 000.3 000.4 000.5 000.6 000.5 000.4
177 28079008 6 48 2 17042 7000.3 000.3 000.2 000.2 000.2 000.2 000.2 000.3 000.4 000.5 000.4 000.4
178 28079008 6 48 2 17042 8000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.4 000.5 000.5 000.5 000.5
179 28079008 6 48 2 17042 9000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3 000.3
x_13 x_14 x_15 x_16 x_17 x_18 x_19 x_20 x_21 x_22 x_23 x_24 nchar
174 000.4 000.5 000.5 000.5 000.5 000.5 000.6 000.6 000.6 000.6 000.7 145
175 000.5 000.5 000.5 000.5 000.5 000.5 000.5 000.6 000.6 000.6 000.6 000.5 145
176 000.4 000.4 000.5 000.5 000.5 000.5 000.5 000.5 000.5 000.4 000.4 000.3 145
177 000.4 000.4 000.4 000.4 000.4 000.5 000.5 000.6 000.5 000.5 000.5 000.4 145
178 000.4 000.4 000.4 000.5 000.5 000.5 000.5 000.4 000.4 000.4 000.4 000.4 145
179 000.4 000.3 000.3 000.3 000.3 000.3 000.4 000.5 000.5 000.5 000.6 000.5 145
I have 000.7N000.5.
How can I use separate() to separate based on V OR N.
Data:
structure(list(V1 = c(28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28), V2 = c(79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79), V3 = c(4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8), V4 = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6), V5 = c(48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
38, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48), V6 = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2), V7 = c(17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17), V8 = c(4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), V9 = c(20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29), V10 = c("000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.5V000.8V000.8V000.5V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.6V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.6V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.7V000.7V000.8V",
"000.7V000.5V000.4V000.5V000.5V000.4V000.4V000.4V000.4V000.5V000.5V000.4V000.4V000.3V000.4V000.4V000.4V000.3V000.4V000.3V000.4V000.9V001.0V001.0V",
"000.8V000.5V000.3V000.4V000.4V000.4V000.7V001.2V001.2V001.0N000.5V000.4V000.4V000.4V000.4V000.3V000.3V000.3V000.4V000.4V000.5V000.5V000.6V000.6V",
"000.5V000.5V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.4V000.3V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.3V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.5V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.3V000.3V000.3V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.3V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.3V000.4V000.4V000.4V000.3V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.4V000.5V000.6V000.6V",
"000.7V000.7V000.6V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V",
"00003V00004V00002V00002V00001V00002V00004V00003V00005V00005V00005V00009V00011V00014V00014V00009V00008V00010V00011V00014V00011V00008V00009V00006V",
"00005V00005V00003V00003V00002V00003V00009V00023V00069V00068V00008V00013V00010V00007V00012V00008V00007V00004V00006V00005V00007V00006V00004V00004V",
"00002V00001V00002V00001V00001V00001V00003V00007V00018V00022V00042V00034V00021V00016V00018V00011V00009V00009V00014V00010V00010V00033V00083V00111V",
"00098V00046V00016V00010V00005V00013V00052V00217V00337V00485V00138V00029V00026V00020V00019V00011V00006V00012V00012V00008V00010V00007V00005V00004V",
"00002V00002V00001V00001V00001V00001V00002V00007V00013V00020V00012V00010V00010V00012V00009V00007V00006V00008V00010V00009V00010V00006V00004V00003V",
"00002V00001V00001V00001V00001V00002V00003V00009V00014V00015V00013V00014V00018V00017V00010V00009V00010V00009V00006V00007V00008V00036V00061V00007V",
"00005V00005V00002V00002V00002V00002V00003V00008V00055V00038V00020V00023V00022V00015V00017V00011V00007V00006V00009V00009V00015V00117V00082V00096V",
"00046V00031V00047V00042V00037V00033V00040V00044V00067V00093V00053V00009V00006V00006V00006V00004V00003V00004V00020V00016V00010V00020V00049V00104V",
"00088V00112V00055V00046V00013V00043V00023V00023V00040V00093V00046V00011V00007V00007V00007V00005V00008V00007V00010V00010V00025V00079V00059V00025V",
"00028V00038V00022V00017V00018V00028V00049V00152V00116V00158V00087V00030V00024V00017V00018V00008V00008V00008V00010V00013V00012V00022V00072V00083V",
"00060V00024V00032V00026V00014V00032V00078V00129V00199V00164V00107V00067V00052V00024V00012V00009V00015V00015V00036V00024V00019V00035V00056V00062V",
"00020V00046V00042V00019V00003V00004V00020V00091V00159V00137V00121V00052V00045V00039V00008V00004V00005V00004V00008V00009V00004V00002V00009V00005V",
"00070V00027V00026V00038V00020V00011V00016V00057V00073V00111V00053V00031V00018V00005V00003V00002V00002V00002V00010V00030V00033V00009V00005V00017V",
"00033V00059V00042V00019V00018V00015V00017V00024V00036V00063V00026V00008V00005V00003V00003V00002V00003V00005V00005V00009V00009V00007V00005V00006V",
"00009V00004V00001V00001V00002V00015V00012V00019V00031V00028V00013V00010V00008V00007V00004V00003V00003V00006V00017V00017V00014V00024V00012V00002V",
"00002V00002V00003V00002V00001V00001V00002V00002V00009V00003V00004V00004V00006V00007V00010V00005V00004V00003V00003V00002V00002V00005V00067V00080V",
"00063V00039V00025V00004V00003V00006V00015V00062V00144V00098V00079V00026V00017V00015V00007V00005V00004V00006V00004V00003V00003V00002V00003V00005V",
"00004V00001V00001V00001V00001V00001V00002V00005V00011V00014V00013V00012V00013V00020V00014V00013V00008V00007V00007V00006V00007V00006V00003V00002V",
"00002V00001V00001V00001V00001V00001V00002V00004V00008V00012V00025V00020V00026V00016V00017V00012V00011V00012V00017V00010V00012V00011V00006V00004V",
"00003V00002V00002V00001V00001V00002V00008V00013V00013V00019V00022V00015V00019V00018V00015V00015V00010V00011V00016V00015V00012V00011V00007V00005V",
"00003V00002V00002V00001V00001V00002V00004V00026V00145V00131V00027V00021V00020V00017V00019V00014V00011V00013V00012V00010V00007V00016V00025V00034V",
"00004V00003V00003V00002V00002V00003V00007V00024V00022V00046V00013V00011V00012V00016V00019V00011V00010V00010V00018V00022V00039V00060V00051V00121V",
"00085V00038V00024V00056V00039V00024V00022V00026V00040V00058V00041V00010V00010V00008V00013V00008V00008V00006V00013V00005V00004V00068V00079V00102V",
"00081V00019V00005V00015V00008V00048V00114V00179V00240V00153N00031V00021V00024V00018V00013V00008V00004V00006V00010V00014V00009V00013V00010V00032V",
"00018V00030V00006V00001V00001V00001V00003V00024V00038V00046V00024V00009V00010V00010V00006V00004V00004V00004V00004V00006V00004V00003V00002V00006V",
"00003V00002V00002V00001V00001V00001V00002V00012V00021V00025V00027V00011V00009V00006V00007V00004V00006V00005V00006V00020V00008V00004V00003V00004V",
"00002V00001V00001V00001V00001V00001V00002V00004V00007V00010V00011V00013V00013V00018V00010V00006V00005V00007V00010V00014V00016V00009V00006V00004V",
"00003V00002V00001V00001V00001V00001V00003V00006V00009V00012V00011V00015V00019V00029V00022V00016V00010V00013V00009V00008V00012V00012V00017V00009V",
"00009V00006V00003V00002V00003V00002V00003V00007V00007V00009V00010V00012V00011V00022V00013V00011V00011V00013V00019V00029V00022V00024V00037V00047V",
"00074V00082V00059V00021V00003V00002V00001V00002V00002V00002V00003V00003V00003V00004V00007V00007V00004V00004V00005V00010V00007V00009V00005V00005V",
"00025V00021V00010V00010V00008V00016V00032V00023V00023V00017V00013V00021V00024V00028V00027V00020V00020V00024V00029V00040V00048V00049V00043V00028V",
"00027V00033V00035V00022V00026V00036V00048V00063V00065V00059V00022V00029V00025V00022V00029V00028V00021V00017V00018V00021V00031V00030V00024V00020V",
"00012V00008V00008V00006V00005V00009V00018V00040V00059V00053V00063V00060V00044V00040V00043V00032V00029V00032V00052V00058V00071V00110V00128V00117V",
"00097V00074V00058V00049V00043V00043V00052V00094V00127V00160V00091V00050V00043V00038V00040V00031V00021V00038V00046V00043V00050V00061V00050V00022V",
"00017V00014V00019V00018V00008V00011V00014V00042V00050V00048V00027V00021V00019V00021V00016V00013V00016V00019V00028V00030V00034V00035V00021V00016V",
"00010V00008V00007V00006V00007V00009V00015V00040V00058V00045V00034V00032V00039V00032V00023V00024V00029V00029V00033V00039V00068V00112V00096V00047V",
"00030V00024V00018V00010V00010V00011V00019V00045V00071V00061V00050V00052V00049V00045V00046V00038V00027V00029V00041V00046V00073V00143V00131V00109V",
"00065V00054V00063V00067V00060V00055V00060V00062V00067V00075V00068V00036V00026V00025V00028V00020V00016V00020V00044V00056V00062V00098V00127V00116V",
"00091V00094V00079V00067V00040V00057V00056V00054V00054V00065V00059V00041V00027V00030V00026V00019V00023V00022V00032V00040V00060V00087V00079V00054V",
"00047V00061V00057V00050V00049V00049V00056V00085V00074V00086V00084V00062V00058V00048V00047V00027V00026V00026V00036V00064V00085V00096V00117V00106V",
"00084V00072V00062V00055V00042V00055V00066V00082V00099V00098V00096V00092V00089V00064V00045V00038V00036V00042V00075V00066V00066V00087V00088V00103V",
"00092V00090V00074V00059V00046V00038V00051V00076V00092V00091V00097V00084V00084V00081V00033V00017V00020V00018V00031V00045V00038V00041V00038V00065V",
"00113V00086V00082V00072V00059V00050V00050V00057V00058V00067V00072V00070V00061V00025V00009V00007V00010V00013V00028V00071V00084V00060V00044V00068V",
"00090V00084V00070V00061V00053V00049V00041V00041V00041V00048V00043V00030V00021V00012V00011V00007V00007V00009V00013V00021V00034V00044V00040V00041V",
"00057V00049V00015V00027V00032V00047V00041V00045V00046V00040V00034V00028V00023V00022V00009V00008V00007V00014V00033V00049V00056V00072V00039V00019V",
"00021V00017V00013V00008V00007V00011V00015V00013V00027V00010V00012V00011V00016V00023V00031V00021V00020V00017V00017V00021V00035V00073V00130V00115V",
"00102V00084V00071V00049V00041V00043V00045V00067V00083V00082V00083V00058V00048V00049V00028V00019V00015V00021V00025V00026V00033V00031V00042V00060V",
"00025V00009V00006V00010V00006V00008V00016V00038V00054V00047V00038V00037V00040V00056V00041V00041V00027V00027V00029V00034V00049V00065V00024V00012V",
"00010V00007V00004V00004V00003V00004V00010V00025V00041V00039V00051V00046V00051V00046V00046V00033V00031V00035V00040V00030V00036V00041V00035V00028V",
"00020V00015V00013V00014V00012V00017V00032V00056V00051V00057V00052V00037V00040V00039V00038V00040V00033V00036V00047V00050V00044V00046V00038V00032V",
"00021V00017V00015V00012V00010V00013V00024V00050V00101V00099V00053V00048V00043V00040V00044V00038V00035V00042V00043V00042V00040V00057V00060V00058V",
"00036V00030V00028V00021V00020V00035V00049V00067V00058V00070V00042V00038V00044V00053V00050V00044V00039V00043V00054V00059V00082V00098V00116V00119V",
"00101V00085V00076V00078V00066V00058V00054V00053V00049V00053V00058V00035V00033V00028V00032V00029V00027V00025V00047V00028V00049V00122V00135V00123V",
"00113V00077V00040V00041V00050V00056V00074V00091V00113V00097N00068V00055V00059V00044V00037V00024V00020V00028V00045V00058V00070V00082V00084V00102V",
"00082V00077V00044V00019V00012V00009V00029V00077V00084V00081V00051V00028V00031V00031V00022V00017V00014V00015V00015V00027V00037V00038V00020V00031V",
"00019V00010V00010V00009V00006V00010V00019V00048V00068V00058V00047V00030V00022V00018V00017V00014V00019V00018V00026V00059V00031V00019V00017V00014V",
"00009V00007V00003V00002V00002V00004V00008V00020V00030V00029V00024V00028V00028V00031V00022V00017V00016V00020V00028V00034V00039V00037V00029V00019V",
"00012V00009V00005V00004V00003V00003V00010V00025V00037V00034V00027V00031V00036V00043V00040V00037V00035V00040V00031V00027V00039V00038V00038V00032V",
"00031V00028V00022V00018V00021V00019V00021V00033V00036V00040V00041V00045V00040V00043V00039V00037V00041V00048V00053V00053V00052V00056V00055V00055V",
"00050V00044V00047V00048V00030V00011V00008V00011V00012V00010V00008V00014V00011V00017V00031V00022V00009V00008V00018V00038V00040V00043V00033V00027V",
"00004V00004V00004V00004V00003V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V",
"00004V00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00005V00005V00005V00004V00005V00005V00005V00006V00006V00005V00005V00004V",
"00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V00006V00006V00005V00005V00004V00005V00004V00005V00005V00005V00005V00005V00006V00008V",
"00006V00006V00006V00005V00005V00005V00004V00005V00008V00006V00006V00006V00005V00005V00005V00004V00004V00004V00004V00005V00006V00006V00006V00005V",
"00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00004V00004V",
"00004V00004V00004V00004V00004V00004V00004V00004V00005V00006V00005V00005V00005V00005V00005V00006V00005V00005V00005V00006V00007V00008V00007V00005V",
"00005V00004V00004V00004V00004V00004V00004V00004V00005V00005V00006V00006V00005V00005V00005V00005V00005V00005V00004V00004V00004V00004V00005V00004V",
"00004V00004V00005V00004V00005V00004V00004V00005V00006V00007V00005V00005V00005V00005V00005V00005V00005V00004V00004V00004V00004V00005V00005V00005V",
"00004V00004V00005V00005V00004V00004V00004V00005V00005V00006V00006V00005V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V00004V",
"00004V00005V00004V00004V00004V00004V00004V00005V00005V00006V00005V00005V00005V00006V00005V00004V00004V00005V00004V00004V00004V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00005V00005V00005V00005V00004V00004V00004V00005V00004V00005V00006V00006V00005V00005V",
"00005V00005V00005V00005V00005V00004V00005V00005V00005V00006V00005V00006V00006V00005V00004V00004V00004V00004V00004V00004V00004V00004V00005V00005V",
"00005V00005V00005V00006V00006V00005V00005V00005V00006V00007V00007V00006V00006V00005V00004V00004V00005V00004V00004V00004V00004V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00006V00005V00005V00005V00004V00004V00004V00004V00004V00004V00004V00004V00004V00005V",
"00005V00005V00005V00004V00004V00005V00005V00005V00005V00006V00006V00005V00005V00005V00004V00004V00005V00005V00005V00004V00004V00004V00005V00005V",
"00004V00005V00005V00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00005V00005V00005V00004V00005V00005V00004V00004V00005V00006V00006V",
"00008V00007V00005V00005V00005V00005V00005V00005V00005V00007V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00004V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00004V",
"00004V00004V00004V00004V00004V00004V00004V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00004V00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00007V",
"00006V00006V00006V00005V00005V00005V00006V00006V00006V00006V00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00008V00011V",
"00009V00006V00005V00005V00005V00005V00005V00006V00007V00008V00006N00006V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00006V00006V00006V00006V00006V00006V00006V00006V00006V00007V00006V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00007V00007V00007V00006V00006V00006V00006V00005V00005V00005V00005V00005V00005V00005V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00006V00006V00006V00006V00006V",
"00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V00005V",
"000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.5V000.5V000.6V000.6V000.6V000.5V",
"000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.5V000.4V",
"000.4V000.4V000.3V000.3V000.3V000.3V000.4V000.4V000.6V000.7V000.6V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.5V000.6V000.7V000.8V001.1V",
"001.2V000.9V000.8V000.6V000.5V000.4V000.4V000.6V001.1V001.0V000.7V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.6V000.7V000.8V000.7V000.6V",
"000.5V000.4V000.4V000.4V000.3V000.3V000.4V000.5V000.6V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.6V000.5V000.4V",
"000.4V000.4V000.4V000.4V000.3V000.3V000.4V000.5V000.6V000.7V000.6V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.6V000.7V000.9V001.0V000.7V",
"000.5V000.4V000.4V000.4V000.3V000.3V000.4V000.5V000.7V000.8V000.6V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.7V000.7V000.7V",
"000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.5V000.6V000.7V000.7V",
"000.6V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V",
"000.4V000.4V000.4V000.4V000.4V000.3V000.4V000.5V000.7V000.8V000.6V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.7V000.9V000.7V",
"000.7V000.5V000.4V000.3V000.3V000.3V000.4V000.5V000.7V000.8V000.6V000.6V000.7V000.7V000.5V000.5V000.5V000.4V000.4V000.5V000.6V000.7V000.7V000.6V",
"000.6V000.5V000.4V000.4V000.4V000.3V000.4V000.5V000.6V000.7V000.7V000.6V000.6V000.5V000.6V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.7V000.7V",
"000.7V000.7V000.7V000.8V000.8V000.7V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.5V",
"000.6V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V",
"000.5V000.5V000.5V000.4V000.4V000.4V000.3V000.3V000.4V000.4V000.5V000.4V000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.4V000.4V",
"000.4V000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.7V000.9V",
"001.1V001.2V000.7V000.4V000.3V000.3V000.3V000.4V000.6V000.7V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.6V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.3V000.4V000.5V000.6V000.7V000.5V000.4V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.5V000.5V000.5V000.6V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.4V000.6V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.6V000.7V000.6V000.5V002.7V000.8V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.5V000.5V",
"000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.5V000.6V000.8V000.8V",
"000.8V000.8V000.7V000.5V000.4V000.4V000.4V000.5V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.6V000.9V001.8V",
"002.0V001.1V000.6V000.4V000.4V000.4V000.4V000.5V000.7V000.9V000.7N000.5V000.5V000.4V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.6V000.6V000.7V",
"000.7V000.5V000.4V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.6V000.6V000.6V000.6V000.5V",
"000.4V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.6V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.5V000.5V000.5V000.4V000.4V000.3V",
"000.3V000.3V000.2V000.2V000.2V000.2V000.2V000.3V000.4V000.5V000.4V000.4V000.4V000.4V000.4V000.4V000.4V000.5V000.5V000.6V000.5V000.5V000.5V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.5V000.5V000.5V000.5V000.4V000.4V000.4V000.4V000.4V",
"000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.3V000.4V000.3V000.3V000.3V000.3V000.3V000.4V000.5V000.5V000.5V000.6V000.5V"
)), row.names = 50:179, class = "data.frame")
How can I use separate() to separate based on V OR N? sep = "V|N".
But I'd suggest pre-processing. Use gsub or stringr::str_replace_all to make the N entries NA before separating. Something like df$V10 = gsub("N\\d+(\\.)?\\d+", "VNA", df$V10). Replace NXXX.XX with VNA, then splitting on V should leave NAs where the Ns originally were.
My data looks like this:
> str(tab2)
'data.frame': 36 obs. of 22 variables:
$ organisationunitname : Factor w/ 38 levels "All OUs","Angola",..: 2 3 4 5 6 7 8 9 10 11 ...
$ cd4_perform_result : num 24 6 7 1 1 1 5 3 2 1 ...
$ cd4_participate_result : num 1 8 8 1 1 1 5 3 2 1 ...
$ cd4_pass_result : num 1 4 19 1 1 1 5 3 2 1 ...
$ eid_perform_result : num 2 1 7 1 1 1 1 9 1 1 ...
$ eid_participate_result : num 2 1 5 1 1 1 1 8 1 1 ...
$ eid_pass_result : num 2 1 5 1 1 1 1 7 1 1 ...
$ vl_perform_result : num 2 1 3 1 1 1 1 9 1 1 ...
$ vl_participate_result : num 2 1 7 1 1 1 1 7 1 1 ...
$ vl_pass_result : num 2 1 7 1 1 1 1 7 1 1 ...
$ hiv_perform_result : num 19 29 14 1 1 1 26 21 10 1 ...
$ hiv_participate_result : num 19 28 14 1 1 1 22 20 4 1 ...
$ hiv_pass_result : num 20 28 14 1 1 1 18 22 7 1 ...
$ tbafb_perform_result : num 9 1 8 1 1 1 1 7 1 1 ...
$ tbafb_participate_result : num 1 1 18 1 1 1 1 5 1 1 ...
$ tbafb_pass_result : num 1 1 19 1 1 1 1 6 1 1 ...
$ tbculture_perform_result : num 3 1 2 1 1 1 1 1 1 1 ...
$ tbculture_participate_result: num 1 1 2 1 1 1 1 1 1 1 ...
$ tbculture_pass_result : num 1 1 1 1 1 1 1 1 1 1 ...
$ tbxpert_perform_result : num 1 1 4 1 1 1 1 1 1 1 ...
$ tbxpert_participate_result : num 1 1 5 1 1 1 1 1 1 1 ...
$ tbxpert_pass_result : num 1 1 2 1 1 1 1 1 1 1 ...
>
DATA
structure(list(country = c("eRkf", "KJfd", "wjkO", "Hovb", "v6Dm",
"vp8p", "TYhI", "U4OB", "GVnL", "dzJO", "11JX", "ygWc", "4Ye1",
"RykQ", "OHLW", "Xh1x", "MOl4", "67vY", "h2cA", "Ue1r", "Hr9G",
"YxpI", "S0Or", "2fss", "wz9F", "XEOG", "Vptm", "xAup", "STBG",
"AayU", "mJyW", "PvNG", "qncq", "L8dk", "6CJ8", "90i7"), cd4_perform_result = c(23,
6, 7, 1, 1, 1, 5, 3, 2, 1, 10, 1, 2, 8, 1, 2, 16, 1, 1, 22, 12,
1, 13, 11, 17, 1, 20, 15, 1, 21, 18, 4, 1, 14, 19, 9), cd4_participate_result = c(1,
8, 8, 1, 1, 1, 5, 3, 2, 1, 7, 1, 2, 9, 1, 2, 16, 1, 1, 17, 11,
1, 4, 14, 13, 1, 19, 15, 1, 20, 10, 6, 1, 18, 12, 3), cd4_pass_result = c(1,
4, 18, 1, 1, 1, 5, 3, 2, 1, 20, 1, 2, 19, 1, 2, 12, 1, 1, 13,
6, 1, 10, 11, 9, 1, 17, 2, 1, 15, 7, 16, 1, 14, 8, 3), eid_perform_result = c(2,
1, 6, 1, 1, 1, 1, 8, 1, 1, 7, 1, 2, 3, 5, 2, 5, 1, 1, 9, 5, 1,
4, 2, 10, 1, 5, 1, 1, 5, 8, 2, 1, 1, 8, 5), eid_participate_result = c(2,
1, 4, 1, 1, 1, 1, 7, 1, 1, 6, 1, 2, 9, 4, 2, 4, 1, 1, 3, 2, 1,
9, 2, 8, 1, 4, 1, 1, 4, 6, 2, 1, 1, 5, 4), eid_pass_result = c(2,
1, 4, 1, 1, 1, 1, 6, 1, 1, 5, 1, 2, 9, 1, 2, 4, 1, 1, 3, 2, 1,
8, 2, 7, 1, 4, 1, 1, 4, 5, 2, 1, 1, 4, 4), vl_perform_result = c(2,
1, 3, 1, 1, 1, 1, 8, 1, 1, 9, 1, 2, 10, 4, 2, 4, 1, 1, 5, 4,
1, 7, 6, 5, 1, 11, 1, 1, 4, 8, 2, 1, 1, 7, 4), vl_participate_result = c(2,
1, 7, 1, 1, 1, 1, 7, 1, 1, 8, 1, 2, 8, 4, 2, 4, 1, 1, 5, 2, 1,
4, 6, 3, 1, 9, 1, 1, 4, 7, 2, 1, 1, 6, 1), vl_pass_result = c(2,
1, 7, 1, 1, 1, 1, 7, 1, 1, 9, 1, 2, 8, 1, 2, 5, 1, 1, 4, 2, 1,
2, 6, 3, 1, 10, 1, 1, 5, 7, 2, 1, 1, 5, 1), hiv_perform_result = c(18,
28, 13, 1, 1, 1, 25, 20, 10, 1, 6, 11, 9, 7, 19, 26, 8, 14, 1,
27, 12, 1, 24, 17, 23, 1, 21, 5, 1, 22, 16, 15, 1, 2, 3, 4),
hiv_participate_result = c(18, 27, 13, 1, 1, 1, 21, 19, 4,
1, 15, 9, 10, 3, 11, 26, 5, 1, 1, 20, 6, 1, 23, 17, 12, 1,
24, 8, 1, 22, 14, 16, 1, 2, 25, 7), hiv_pass_result = c(19,
27, 13, 1, 1, 1, 17, 21, 6, 1, 16, 26, 10, 2, 23, 25, 9,
1, 1, 14, 4, 1, 20, 18, 11, 1, 22, 7, 1, 15, 12, 8, 1, 3,
24, 5), tbafb_perform_result = c(9, 1, 8, 1, 1, 1, 1, 7,
1, 1, 6, 1, 20, 5, 1, 2, 12, 1, 1, 15, 13, 1, 17, 11, 19,
1, 10, 1, 1, 14, 16, 4, 1, 18, 3, 1), tbafb_participate_result = c(1,
1, 17, 1, 1, 1, 1, 5, 1, 1, 12, 1, 18, 11, 1, 2, 6, 1, 1,
13, 7, 1, 10, 9, 14, 1, 8, 1, 1, 16, 15, 4, 1, 17, 3, 1),
tbafb_pass_result = c(1, 1, 18, 1, 1, 1, 1, 6, 1, 1, 13,
1, 19, 11, 1, 2, 4, 1, 1, 15, 5, 1, 7, 10, 12, 1, 8, 1, 1,
16, 9, 3, 1, 14, 17, 1), tbculture_perform_result = c(3,
1, 2, 1, 1, 1, 1, 1, 1, 1, 6, 1, 3, 8, 1, 2, 2, 1, 1, 7,
3, 1, 5, 4, 7, 1, 5, 1, 1, 3, 6, 6, 1, 3, 3, 1), tbculture_participate_result = c(1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 5, 1, 3, 8, 1, 2, 2, 1, 1, 7,
2, 1, 6, 4, 6, 1, 1, 1, 1, 3, 3, 5, 1, 3, 3, 1), tbculture_pass_result = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 7, 1, 2, 2, 1, 1, 8,
2, 1, 6, 4, 5, 1, 1, 1, 1, 3, 3, 6, 1, 3, 3, 1), tbxpert_perform_result = c(1,
1, 4, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 16, 1, 8, 3, 1, 1, 5,
9, 1, 15, 7, 13, 1, 4, 1, 1, 12, 11, 1, 1, 6, 14, 10), tbxpert_participate_result = c(1,
1, 5, 1, 1, 1, 1, 1, 1, 1, 15, 1, 1, 4, 1, 12, 3, 1, 1, 2,
7, 1, 16, 9, 11, 1, 1, 1, 1, 14, 10, 1, 1, 6, 8, 13), tbxpert_pass_result = c(1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 12, 1, 1, 4, 1, 9, 3, 1, 1, 14,
6, 1, 13, 8, 8, 1, 1, 1, 1, 11, 6, 1, 1, 5, 7, 10)), .Names = c("country",
"cd4_perform_result", "cd4_participate_result", "cd4_pass_result",
"eid_perform_result", "eid_participate_result", "eid_pass_result",
"vl_perform_result", "vl_participate_result", "vl_pass_result",
"hiv_perform_result", "hiv_participate_result", "hiv_pass_result",
"tbafb_perform_result", "tbafb_participate_result", "tbafb_pass_result",
"tbculture_perform_result", "tbculture_participate_result", "tbculture_pass_result",
"tbxpert_perform_result", "tbxpert_participate_result", "tbxpert_pass_result"
), row.names = c(NA, 36L), class = "data.frame")
It is organized by unique orgnationationunitname, but the columns are also grouped into different categories. For e.g. cd4, eid, vl, hiv, tbafb etc. and then into perform, participate & pass_result. I want to tabulate the data across all these categories to look like this
Country: eRkf
cat CD4 EID VL HIV TB AFB TB Culture TB Xpert
Perform 3442 288 114 29519 8572 72 591
Participate 1771 128 95 17342 5433 119 395
Pass_test 1535 118 83 11674 4508 109 343
How can I do this in R, without having to create separate data frames?