indented unordered list to nested list() - r

I've got a log file that looks as follows:
Data:
+datadir=/data/2017-11-22
+Nusers=5292
Parameters:
+outdir=/data/2017-11-22/out
+K=20
+IC=179
+ICgroups=3
-group 1: 1-1
ICeffects: 1-5
-group 2: 2-173
ICeffects: 6-10
-group 3: 175-179
ICeffects: 11-15
I would like to parse this logfile into a nested list using R so that the result will look like this:
result <- list(Data = list(datadir = '/data/2017-11-22',
Nusers = 5292),
Parameters = list(outdir = '/data/2017-11-22/out',
K = 20,
IC = 179,
ICgroups = list(list('group 1' = '1-1',
ICeffects = '1-5'),
list('group 2' = '2-173',
ICeffects = '6-10'),
list('group 1' = '175-179',
ICeffects = '11-15'))))
Is there a not-extremely-painful way of doing this?

Disclaimer: This is messy. There is no guarantee that this will work for larger/different files without some tweaking. You will need to do some careful checking.
The key idea here is to reformat the raw data, to make it consistent with the YAML format, and then use yaml::yaml.load to parse the data to produce a nested list.
By the way, this is an excellent example on why one really should use a common markup language for log-output/config files (like JSON, YAML, etc.)...
I assume you read in the log file using readLines to produce the vector of strings ss.
# Sample data
ss <- c(
"Data:",
" +datadir=/data/2017-11-22",
" +Nusers=5292",
"Parameters:",
" +outdir=/data/2017-11-22/out",
" +K=20",
" +IC=179",
" +ICgroups=3",
" -group 1: 1-1",
" ICeffects: 1-5",
" -group 2: 2-173",
" ICeffects: 6-10",
" -group 3: 175-179",
" ICeffects: 11-15")
We then reformat the data to adhere to the YAML format.
# Reformat to adhere to YAML formatting
ss <- gsub("\\+", "- ", ss); # Replace "+" with "- "
ss <- gsub("ICgroups=\\d+","ICgroups:", ss); # Replace "ICgroups=3" with "ICgroups:"
ss <- gsub("=", " : ", ss); # Replace "=" with ": "
ss <- gsub("-group", "- group", ss); # Replace "-group" with "- group"
ss <- gsub("ICeffects", " ICeffects", ss); # Replace "ICeffects" with " ICeffects"
Note that – consistent with your expected output – the value 3 from ICgroups doesn't get used, and we need to replace ICgroups=3 with ICgroups: to initiate a nested sub-list. This was the part that threw me off first...
Loading & parsing the YAML string then produces a nested list.
require(yaml);
lst <- yaml.load(paste(ss, collapse = "\n"));
lst;
#$Data
#$Data[[1]]
#$Data[[1]]$datadir
#[1] "/data/2017-11-22"
#
#
#$Data[[2]]
#$Data[[2]]$Nusers
#[1] 5292
#
#
#
#$Parameters
#$Parameters[[1]]
#$Parameters[[1]]$outdir
#[1] "/data/2017-11-22/out"
#
#
#$Parameters[[2]]
#$Parameters[[2]]$K
#[1] 20
#
#
#$Parameters[[3]]
#$Parameters[[3]]$IC
#[1] 179
#
#
#$Parameters[[4]]
#$Parameters[[4]]$ICgroups
#$Parameters[[4]]$ICgroups[[1]]
#$Parameters[[4]]$ICgroups[[1]]$`group 1`
#[1] "1-1"
#
#$Parameters[[4]]$ICgroups[[1]]$ICeffects
#[1] "1-5"
#
#
#$Parameters[[4]]$ICgroups[[2]]
#$Parameters[[4]]$ICgroups[[2]]$`group 2`
#[1] "2-173"
#
#$Parameters[[4]]$ICgroups[[2]]$ICeffects
#[1] "6-10"
#
#
#$Parameters[[4]]$ICgroups[[3]]
#$Parameters[[4]]$ICgroups[[3]]$`group 3`
#[1] "175-179"
#
#$Parameters[[4]]$ICgroups[[3]]$ICeffects
#[1] "11-15"
PS. You will need to test this on larger files, and make changes to the substitution if necessary.

Related

Extracting gene name and ID number from a vector [duplicate]

This question already has answers here:
How do I separate a character column into two columns? [duplicate]
(2 answers)
Closed 1 year ago.
What gsub function can I use in R to get the gene name and the id number from a vector which looks like this?
head(colnames(cn), 20)
[1] "A1BG (1)" "NAT2 (10)" "ADA (100)" "CDH2 (1000)" "AKT3 (10000)" "GAGE12F (100008586)"
[7] "RNA5-8SN5 (100008587)" "RNA18SN5 (100008588)" "RNA28SN5 (100008589)" "LINC02584 (100009613)" "POU5F1P5 (100009667)" "ZBTB11-AS1 (100009676)"
[13] "MED6 (10001)" "NR2E3 (10002)" "NAALAD2 (10003)" "DUXB (100033411)" "SNORD116-1 (100033413)" "SNORD116-2 (100033414)"
[19] "SNORD116-3 (100033415)" "SNORD116-4 (100033416)"
1) Assuming the input s given in the Note at the end we can use read.table specifying that the fields are separated by ( and that ) is a comment character. We also strip white space around fields and give meaningful column names. No packages are used.
DF <- read.table(text = s, sep = "(", comment.char = ")",
strip.white = TRUE, col.names = c("Gene", "Id"))
DF
giving this data frame so DF$Gene is the genes and DF$Id is the id's.
Gene Id
1 A1BG 1
2 NAT2 10
3 ADA 100
4 CDH2 1000
5 AKT3 10000
6 GAGE12F 100008586
7 RNA5-8SN5 100008587
8 RNA18SN5 100008588
9 RNA28SN5 100008589
10 LINC02584 100009613
11 POU5F1P5 100009667
12 ZBTB11-AS1 100009676
13 MED6 10001
14 NR2E3 10002
15 NAALAD2 10003
16 DUXB 100033411
17 SNORD116-1 100033413
18 SNORD116-2 100033414
19 SNORD116-3 100033415
20 SNORD116-4 100033416
2) A variation of the above is to first remove the parentheses and then read it in giving the same result. Note that the second argument of chartr contains two spaces so that each parenthesis is translated to a space.
read.table(text = chartr("()", " ", s), col.names = c("Gene", "Id"))
Note
Lines <- '[1] "A1BG (1)" "NAT2 (10)" "ADA (100)" "CDH2 (1000)" "AKT3 (10000)" "GAGE12F (100008586)"
[7] "RNA5-8SN5 (100008587)" "RNA18SN5 (100008588)" "RNA28SN5 (100008589)" "LINC02584 (100009613)" "POU5F1P5 (100009667)" "ZBTB11-AS1 (100009676)"
[13] "MED6 (10001)" "NR2E3 (10002)" "NAALAD2 (10003)" "DUXB (100033411)" "SNORD116-1 (100033413)" "SNORD116-2 (100033414)"
[19] "SNORD116-3 (100033415)" "SNORD116-4 (100033416)" '
L <- Lines |>
textConnection() |>
readLines() |>
gsub(pattern = "\\[\\d+\\]", replacement = "")
s <- scan(text = L, what = "")
so s looks like this:
> dput(s)
c("A1BG (1)", "NAT2 (10)", "ADA (100)", "CDH2 (1000)", "AKT3 (10000)",
"GAGE12F (100008586)", "RNA5-8SN5 (100008587)", "RNA18SN5 (100008588)",
"RNA28SN5 (100008589)", "LINC02584 (100009613)", "POU5F1P5 (100009667)",
"ZBTB11-AS1 (100009676)", "MED6 (10001)", "NR2E3 (10002)", "NAALAD2 (10003)",
"DUXB (100033411)", "SNORD116-1 (100033413)", "SNORD116-2 (100033414)",
"SNORD116-3 (100033415)", "SNORD116-4 (100033416)")
First, in the future please share your data using the dput() command. See this for details.
Second, here is one solution for extracting the parts you need:
library(tidyverse)
g<-c("A1BG (1)","NAT2 (10)","ADA (100)" , "RNA18SN5 (100008588)", "RNA28SN5 (100008589)")
gnumber<-stringr::str_extract(g,"(?=\\().*?(?<=\\))")
gnumber
gname<-stringr::str_extract(g, "[:alpha:]+")
gname
# or, to get the whole first word:
gname<-stringr::word(g,1,1)
gname

Sequence of numbers by hyphen without hyphenating single occurrences

I want to generate readable number sequences (e.g. 1, 2, 3, 4 = 1-4), but for a set of data where each number in the sequence must have four digits (e.g. 99 = 0099 or 1 = 0001 or 1022 = 1022) AND where there are different letters in front of each number.
I was looking at the answer to this question, which managed to do almost exactly as I want with two caveats:
If there is a stand-alone number that does not appear in a sequence, it will appear twice with a hyphen in between
If there are several stand-alone numbers that do no appear in a sequence, they won't be included in the result
### Create Data Set ====
## Create the data for different tags. I'm only using two unique levels here, but in my dataset I've got
## 400+ unique levels.
FM <- paste0('FM', c('0001', '0016', '0017', '0018', '0019', '0021', '0024', '0026', '0028'))
SC <- paste0('SC', c('0002', '0003', '0004', '0010', '0012', '0014', '0033', '0036', '0039'))
## Combine data
my.seq1 <- c(FM, SC)
## Sort data by number in sequence
my.seq1 <- my.seq1[order(substr(my.seq1, 3, 7))]
### Attempt Number Sequencing ====
## Get the letters
sp.tags <- substr(my.seq1, 1, 2)
## Get the readable number sequence
lapply(split(my.seq1, sp.tags), ## Split data by the tag ID
function(x){
## Get the run lengths as per [previous answer][1]
rl <- rle(c(1, pmin(diff(as.numeric(substr(x, 3, 7))), 2)))
## Generate number sequence by separator as per [previous answer][1]
seq2 <- paste0(x[c(1, cumsum(rl$lengths))], c("-", ",")[rl$values], collapse="")
return(substr(seq2, 1, nchar(seq2)-1))
})
## Combine lists and sort elements
my.seq2 <- unlist(strsplit(do.call(c, my.seq2), ","))
my.seq2 <- my.seq2[order(substr(my.seq2, 3, 7))]
names(my.seq2) <- NULL
my.seq2
[1] "FM0001-FM0001" "SC0002-SC0004" "FM0016-FM0019" "FM0028" "SC0039"
my.seq1
[1] "FM0001" "SC0002" "SC0003" "SC0004" "SC0010" "SC0012" "SC0014" "FM0016" "FM0017" "FM0018" "FM0019" "FM0021"
[13] "FM0024" "FM0026" "FM0028" "SC0033" "SC0036" "SC0039"
The major problems with this are:
Some values are completely missing from the data set (e.g. FM0021, FM0024, FM0026)
The first number in the sequence (FM0001) appears with a hyphen in between
I feel like I'm getting warmer by using A5C1D2H2I1M1N2O1R2T1's answer to utilize seqToHumanReadable because it's quite elegant AND solves both problems. Two more problems are that I'm not able to tag the ID before each number and can't force the number of digits to four (e.g. 0004 becomes 4).
library(R.utils)
lapply(split(my.seq1, sp.tags), function(x){
return(unlist(strsplit(seqToHumanReadable(substr(x, 3, 7)), ',')))
})
$FM
[1] "1" " 16-19" " 21" " 24" " 26" " 28"
$SC
[1] "2-4" " 10" " 12" " 14" " 33" " 36" " 39"
Ideally the result would be:
"FM0001, SC002-SC004, SC0012, SC0014, FM0017-FM0019, FM0021, FM0024, FM0026, FM0028, SC0033, SC0036, SC0039"
Any ideas? It's one of those things that's really simple to do by hand but would take blinking ages, and you'd think a function would exist for it but I haven't found it yet or it doesn't exist :(
This should do?
# get the prefix/tag and number
tag <- gsub("(^[A-z]+)(.+)", "\\1", my.seq1)
num <- gsub("([A-z]+)(\\d+$)", "\\2", my.seq1)
# get a sequence id
n <- length(tag)
do_match <- c(FALSE, diff(as.numeric(num)) == 1 & tag[-1] == tag[-n])
seq_id <- cumsum(!do_match) # a sequence id
# tapply to combine the result
res <- setNames(tapply(my.seq1, seq_id, function(x)
if(length(x) < 2)
return(x)
else
paste(x[1], x[length(x)], sep = "-")), NULL)
# show the result
res
#R> [1] "FM0001" "SC0002-SC0004" "SC0010" "SC0012" "SC0014" "FM0016-FM0019" "FM0021"
#R> [8] "FM0024" "FM0026" "FM0028" "SC0033" "SC0036" "SC0039"
# compare with
my.seq1
#R> [1] "FM0001" "SC0002" "SC0003" "SC0004" "SC0010" "SC0012" "SC0014" "FM0016" "FM0017" "FM0018" "FM0019" "FM0021" "FM0024"
#R> [14] "FM0026" "FM0028" "SC0033" "SC0036" "SC0039"
Data
FM <- paste0('FM', c('0001', '0016', '0017', '0018', '0019', '0021', '0024', '0026', '0028'))
SC <- paste0('SC', c('0002', '0003', '0004', '0010', '0012', '0014', '0033', '0036', '0039'))
my.seq1 <- c(FM, SC)
my.seq1 <- my.seq1[order(substr(my.seq1, 3, 7))]

R Extract partially matching string

I have a question about extracting a part of a string from several files that has these rows:
units = specified
- name 0 = prDM: Pressure, Digiquartz [db]
- name 1 = t090C: Temperature [ITS-90, deg C]
- name 2 = c0S/m: Conductivity [S/m]
- name 3 = t190C:Temperature, 2 [ITS-90, deg C]
- name 4 = c1S/m: Conductivity, 2 [S/m]
- name 5 = flSP: Fluorescence, Seapoint
- name 6 = sbeox0ML/L: Oxygen, SBE 43 [ml/l]
- name 7 = altM: Altimeter [m]
- name 8 = sal00: Salinity, Practical [PSU]
- name 9 = sal11: Salinity, Practical, 2 [PSU]
- span 0 = 1.000, 42.000
I need to extract only the information of the columns that start with "name" and extract everything between = and: .
For example, in the row "name 0 = prDM: Pressure, Digiquartz [db]" the desired result will be prDM.
Some files have different number of "name"rows (i.e. this example has 13 rows but other files has 16, and the number varies), so I want it to be as general as I can so I can allways extract the right strings independently the number of rows.Rows starts with # and a space before name.
I have tried this code but it only extract the first row. Can you please help me with this? Many thanks!
CNV<-NULL
for (i in 1:nro.files){
x <- readLines(all.files[i])
name.col<-grep("^\\# name", x)
df <- data.table::fread(text = x[name.col])
CNV[[i]]<-df
}
using stringr and the regex pattern "name \\d+ = (.*?):" which means in words "name followed by one or more digits followed by an equals sign followed by a space followed by a captured group containing any character (the period) zero or more times (the *) followed by a colon".
library(stringr)
strings <- c("name 0 = prDM: Pressure, Digiquartz [db]",
"name 1 = t090C: Temperature [ITS-90, deg C]",
"name 2 = c0S/m: Conductivity [S/m]",
"name 3 = t190C:Temperature, 2 [ITS-90, deg C]",
"name 4 = c1S/m: Conductivity, 2 [S/m]",
"name 5 = flSP: Fluorescence, Seapoint",
"name 6 = sbeox0ML/L: Oxygen, SBE 43 [ml/l]",
"name 7 = altM: Altimeter [m]",
"name 8 = sal00: Salinity, Practical [PSU]",
"name 9 = sal11: Salinity, Practical, 2 [PSU]")
result <- str_match(strings, "name \\d+ = (.*):")
result[,2]
[1] "prDM" "t090C" "c0S/m" "t190C" "c1S/m" "flSP" "sbeox0ML/L"
[8] "altM" "sal00" "sal11"
Or if you prefer base
pattern = "name \\d+ = (.*):"
result <- regmatches(strings, regexec(pattern, strings))
sapply(result, "[[", 2)
[1] "prDM" "t090C" "c0S/m" "t190C" "c1S/m" "flSP" "sbeox0ML/L"
[8] "altM" "sal00" "sal11"
Use str_extract from package stringr and positive lookahead and lookbehind:
str <- "name 0 = prDM: Pressure, Digiquartz [db]"
str_extract(str, "(?<== ).*(?=:)")
[1] "prDM"
Explanation:
(?<== )if you see =followed by white space on the left (lookbehind)
.* match anything until ...
(?=:)... you see a colon on the right (lookahead)
In Base R
test <- c("name 0 = prDM: Pressure, Digiquartz [db]","name 1 = t090C: Temperature [ITS-90, deg C]")
gsub("^name [0-9]+ = (.+):.+","\\1",test)
[1] "prDM" "t090C"
explanation
^name [0-9]+ Searches for a the beginning of a string ^ with name folowed by any length of number
= (.+): any length + of any character . found between = and : are stored ( ) to be later recalled by \\1

In R, how do I wrap text around all words in a string, but a specific one(going from left to right)? Iteration and string manipulation

I know my question is a little vague, so I have an example of what I'm trying to do.
input <- c('I go to school')
#Output
'"I " * phantom("go to school")'
'phantom("I ") * "go" * phantom("to school")'
'phantom("I go ") * "to" * phantom("school")'
'phantom("I go to ") * "school"'
I've written a function, but I'm having a lot of trouble figuring out how to make it applicable to strings with different numbers of words and I can't figure out how I can include iteration to reduce copied code. It does generate the output above though.
Right now my function only works on strings with 4 words. It also includes no iteration.
My main questions are: How can I include iteration into my function? How can I make it work for any number of words?
add_phantom <- function(stuff){
strings <- c()
stuff <- str_split(stuff, ' ')
strings[1] <- str_c('"', stuff[[1]][[1]], ' "', ' * ',
'phantom("', str_c(stuff[[1]][[2]], stuff[[1]][[3]], stuff[[1]][[4]], sep = ' '), '")')
strings[2] <- str_c('phantom("', stuff[[1]][[1]], ' ")',
' * "', stuff[[1]][[2]], '" * ',
'phantom("', str_c(stuff[[1]][[3]], stuff[[1]][[4]], sep = ' '), '")')
strings[3] <- str_c('phantom("', str_c(stuff[[1]][[1]], stuff[[1]][[2]], sep = ' '), ' ")',
' * "', stuff[[1]][[3]], '" * ',
'phantom("', stuff[[1]][[4]], '")')
strings[4] <- str_c('phantom("', str_c(stuff[[1]][[1]], stuff[[1]][[2]], stuff[[1]][[3]], sep = ' '), ' ")',
' * "', stuff[[1]][[4]], '"')
return(strings)
}
this is some butcher work but it gives the expected output :):
input <- c('I go to school')
library(purrr)
inp <- c(list(NULL),strsplit(input," ")[[1]])
phantomize <- function(x,leftside = T){
if(length(x)==1) return("")
if(leftside)
ph <- paste0('phantom("',paste(x[-1],collapse=" "),' ") * ') else
ph <- paste0(' * phantom("',paste(x[-1],collapse=" "),'")')
ph
}
map(1:(length(inp)-1),
~paste0(phantomize(inp[1:.x]),
inp[[.x+1]],
phantomize(inp[(.x+1):length(inp)],F)))
# [[1]]
# [1] "I * phantom(\"go to school\")"
#
# [[2]]
# [1] "phantom(\"I \") * go * phantom(\"to school\")"
#
# [[3]]
# [1] "phantom(\"I go \") * to * phantom(\"school\")"
#
# [[4]]
# [1] "phantom(\"I go to \") * school"
This is a bit of a hack, but I think it gets at what you're trying to do:
library(corpus)
input <- 'I go to school'
types <- text_types(input, collapse = TRUE) # all word types
(loc <- text_locate(input, types)) # locate all word types, get context
## text before instance after
## 1 1 I go to school
## 2 1 I go to school
## 3 1 I go to school
## 4 1 I go to school
The return value is a data frame, with columns of type corpus_text. This approach seems crazy, but it doesn't actually allocate new strings for the before and after contexts (both of which have type corpus_text)
Here's the output you wanted:
paste0("phantom(", loc$before, ") *", loc$instance, "* phantom(", loc$after, ")")
## [1] "phantom() *I* phantom( go to school)"
## [2] "phantom(I ) *go* phantom( to school)"
## [3] "phantom(I go ) *to* phantom( school)"
## [4] "phantom(I go to ) *school* phantom()"
If you want to really get crazy and ignore punctuation:
phantomize <- function(input, ...) {
types <- text_types(input, collapse = TRUE, ...)
loc <- text_locate(input, types, ...)
paste0("phantom(", loc$before, ") *", loc$instance, "* phantom(",
loc$after, ")")
}
phantomize("I! go to school (?)...don't you?", drop_punct = TRUE)
## [1] "phantom() *I* phantom(! go to school (?)...don't you?)"
## [2] "phantom(I! ) *go* phantom( to school (?)...don't you?)"
## [3] "phantom(I! go ) *to* phantom( school (?)...don't you?)"
## [4] "phantom(I! go to ) *school* phantom( (?)...don't you?)"
## [5] "phantom(I! go to school (?)...) *don't* phantom( you?)"
## [6] "phantom(I! go to school (?)...don't ) *you* phantom(?)"
I would suggest something like this
library(tidyverse)
library(glue)
test_string <- "i go to school"
str_split(test_string, " ") %>%
map(~str_split(test_string, .x, simplify = T)) %>%
flatten() %>%
map(str_trim) %>%
keep(~.x != "") %>%
map(~glue("phantom({string})", string = .x))
This code snippet can easily be implemented in a function and will return the following output.
[[1]]
phantom(i)
[[2]]
phantom(i go)
[[3]]
phantom(i go to)
[[4]]
phantom(go to school)
[[5]]
phantom(to school)
[[6]]
phantom(school)
I might have misinterpreted your question -- i am not quite sure if you really want the output to have the same format as in your examplary output.

Splitting a string in R, different split argument elements

I imported some data with no column names, so now I have just over a million rows, and 1 column (instead of 5 columns).
Each row is formatted like this:
x <- "2012-10-19T16:59:01-07:00 192.101.136.140 <190>Oct 19 2012 23:59:01: %FWSM-6-305011: Built dynamic tcp translation from Inside:10.2.45.62/56455 to outside:192.101.136.224/9874"
strsplit( x , split = c(" ", " ", "%", " "))
and got
[[1]]
[1] "2012-10-19T16:59:01-07:00" "192.101.136.140"
[3] "<190>Oct" "19"
[5] "2012" "23:59:01:"
[7] "%FWSM-6-305011:" "Built"
[9] "dynamic" "tcp"
[11] "translation" "from"
[13] "Inside:10.2.45.62/56455" "to"
[15] "outside:192.101.136.224/9874"
I know that it has to do with recycling the split argument but I can't seem to figure how to get it how I want:
[[1]]
[1] "2012-10-19T16:59:01-07:00" "192.101.136.140"
[3] "<190>Oct 19 2012 23:59:01 "%FWSM-6-305011
[5] Built dynamic tcp translation from Inside:10.2.45.62/56455 to outside:192.101.136.224/9874"
Each row has a different message as the fifth element, but after the 4th element I just want to keep the rest of the string together.
Any help would be appreciated.
You can use paste with the collapse argument to combine every element starting with the fifth element.
A <- strsplit( x = "2012-10-19T16:59:01-07:00 192.101.136.140 <190>Oct 19 2012 23:59:01: %FWSM-6-305011: Built dynamic tcp translation from Inside:10.2.45.62/56455 to outside:192.101.136.224/9874", split = c(" ", " ", "%", " "))
c(A[[1]][1:4], paste(A[[1]][5:length(A[[1]])], collapse=" "))
As #DWin points out, split = c(" ", " ", "%", " ") is not used in order - in other words it's identical to split = c(" ", "%")
I think here you don't need to use strsplit. I use read.table to read the lines using text argument. Then you aggregate columns using paste. Since you have a lot of rows, it is better to do the column aggregation within a data.table.
dt <- read.table(text=x)
library(data.table)
DT <- as.data.table(dt)
DT[ , c('V3','V8') := list(paste(V3,V4,V5),
V8=paste(V8,V9,V10,V11,V12,V13,V14,V15))]
DT[,paste0('V',c(1:3,6:7,8)),with=FALSE]
V1 V2 V3 V6 V7
1: 2012-10-19T16:59:01-07:00 192.101.136.140 <190>Oct 19 2012 23:59:01: %FWSM-6-305011:
V8
1: Built dynamic tcp translation from Inside:10.2.45.62/56455 to outside:192.101.136.224/9874
Here is a function that I think works in the way that you thought strsplit functioned:
split.seq<-function(x,delimiters) {
break.point<-regexpr(delimiters[1], x)
first<-mapply(substring,x,1,break.point-1,USE.NAMES=FALSE)
second<-mapply(substring,x,break.point+1,nchar(x),USE.NAMES=FALSE)
if (length(delimiters)==1) return(lapply(1:length(first),function(x) c(first[x],second[x])))
else mapply(function(x,y) c(x,y),first, split.seq(second, delimiters[-1]) ,USE.NAMES=FALSE, SIMPLIFY=FALSE)
}
split.seq(x,delimiters)
A test:
x<-rep(x,2)
delimiters=c(" ", " ", "%", " ")
split.seq(x,delimiters)
[[1]]
[1] "2012-10-19T16:59:01-07:00"
[2] "192.101.136.140"
[3] "<190>Oct 19 2012 23:59:01: "
[4] "FWSM-6-305011:"
[5] "Built dynamic tcp translation from Inside:10.2.45.62/56455 to outside:192.101.136.224/9874"
[[2]]
[1] "2012-10-19T16:59:01-07:00"
[2] "192.101.136.140"
[3] "<190>Oct 19 2012 23:59:01: "
[4] "FWSM-6-305011:"
[5] "Built dynamic tcp translation from Inside:10.2.45.62/56455 to outside:192.101.136.224/9874"

Resources