How can I convert this string into a vector?
"c(HJ229, HJ230, HJ231)"
The desired result is "HJ229" "HJ230" "HJ231".
I have tried using stringr, however the ( causes an issue because of regex.
t <- "c(HJ229, HJ230, HJ231)"
strsplit(str_remove(t, "c"), "(")[[1]]
You need to escape the parentheses to remove them with regex using \\ and provide multiple patterns to match separated by | (or).
library(stringr)
t <- "c(HJ229, HJ230, HJ231)"
str_split(str_remove_all(t, "c|\\(|\\)"), ", ")[[1]]
#> [1] "HJ229" "HJ230" "HJ231"
Created on 2022-02-25 by the reprex package (v2.0.1)
Another possible solution:
library(stringr)
s <-"c(HJ229, HJ230, HJ231)"
str_extract_all(s, "[A-Z]{2}\\d{3}")[[1]]
#> [1] "HJ229" "HJ230" "HJ231"
Using base R:
t = "c(HJ229, HJ230, HJ231)"
strsplit(gsub("[c()]", "", t), ", ")[[1]]
[1] "HJ229" "HJ230" "HJ231"
Using stringr:
library(stringr)
str_split(str_remove_all(t, "[c()]"), ", ")[[1]]
[1] "HJ229" "HJ230" "HJ231"
We can try
> scan(text = gsub("c\\((.*)\\)", "\\1", s), what = "", quiet = TRUE, sep = ",")
[1] "HJ229" " HJ230" " HJ231"
Related
I have a vector of strings like so:
mystr <- c("./10g/13.9264.csv", "./6g/62.0544.csv")
I only want the part between the two forward slashes, i.e., "10g" and "6g".
You could sub() here with a capture group:
mystr <- c("./10g/13.9264.csv", "./6g/62.0544.csv")
sub(".*/([^/]+)/.*", "\\1", mystr)
[1] "10g" "6g"
similar to Tim Biegeleisen, but with a lookbehind and lookahead, using srt_extract from stringr:
library(stringr)
mystr <- c("./10g/13.9264.csv", "./6g/62.0544.csv")
str_extract(mystr,"(?<=/)[^/]+(?=/)")
[1] "10g" "6g"
More simply you can capitalize on the fact that the desired substring is one or more digits followed by literal g:
library(stringr)
str_extract(mystr, "\\d+g")
[1] "10g" "6g"
Here are a few alternatives. They use no packages and the first two do not use any regular expressions.
basename(dirname(mystr))
## [1] "10g" "6g"
read.table(text = mystr, sep = "/")[[2]]
## [1] "10g" "6g"
trimws(trimws(mystr,, "[^/]"),, "/")
## [1] "10g" "6g"
We could also reformulate these using pipes
mystr |> dirname() |> basename()
## [1] "10g" "6g"
read.table(text = mystr, sep = "/") |> (`[[`)(2)
## [1] "10g" "6g"
mystr |> trimws(, "[^/]") |> trimws(, "/")
## [1] "10g" "6g"
Note
From the question the input is
mystr <- c("./10g/13.9264.csv", "./6g/62.0544.csv")
How to extract all between two hyphens in R
ts = ("az_bna_njh","j_hj_lkiuy","ml_", "_kk")
I need to extract bna,hj,ml, and kk
We can use
sub("^\\w+_(\\w+)_.*", "\\1", trimws(ts, whitespace = "_"))
#[1] "bna" "hj" "ml" "kk"
Or another option is
sub("^\\w+_(\\w+)_.*", "\\1", gsub("^_|_$", "", ts))
Also you can try:
#Data
ts = c("az_bna_njh","j_hj_lkiuy","ml_", "_kk")
#Code
gsub(".*_(.*)\\_.*", "\\1", trimws(ts,whitespace = '_'))
Output:
[1] "bna" "hj" "ml" "kk"
Another way you can try
library(stringr)
str_replace_all(ts, c("^.*_(\\w+)_.*$" = "\\1", "^_|_$" = ""))
#[1] "bna" "hj" "ml" "kk"
How can I extract the numbers / ID from the following string in R?
link <- "D:/temp/sample_data/0000098618-13-000011.htm"
I want to just extract 0000098618-13-000011
That is discard the .htm and the D:/temp/sample_data/.
I have tried grep and gsub without much luck.
1) basename Use basename followed by sub:
sub("\\..*", "", basename(link))
## [1] "0000098618-13-000011"
2) file_path_sans_ext
library(tools)
file_path_sans_ext(link)
## [1] "0000098618-13-000011"
3) sub
sub(".*/(.*)\\..*", "\\1", link)
## [1] "0000098618-13-000011"
4) gsub
gsub(".*/|\\.[^.]*$", "", link)
## [1] "0000098618-13-000011"
5) strsplit
sapply(strsplit(link, "[/.]"), function(x) tail(x, 2)[1])
## [1] "0000098618-13-000011"
6) read.table. If link is a vector this will only work if all elements have the same number of /-separated components. Also this assumes that the only dot is the one separting the extension.
DF <- read.table(text = link, sep = "/", comment = ".", as.is = TRUE)
DF[[ncol(DF)]]
## [1] "0000098618-13-000011"
Using stringr:
library(stringr)
str_extract(link , "[0-9-]+")
# "0000098618-13-000011"
I would like to move one part within a string to the beginning of the string. Please see example below. Can this be done using regex?
in:
c("41_exo","47_exo","48_exo")
out:
c("Exo_41","Exo_47","Exo_48")
Yes, you can do this with regex.
vec <- c("41_exo","47_exo","48_exo")
# using base R
gsub("(.*)_(.*)", "\\2_\\1", vec)
#> [1] "exo_41" "exo_47" "exo_48"
# using stringr
stringr::str_replace_all(vec, "(.*)_(.*)", "\\2_\\1")
#> [1] "exo_41" "exo_47" "exo_48"
Created on 2018-07-08 by the reprex package (v0.2.0).
Or without regex:
sapply(
strsplit(vec, "_"),
function(x) {
paste0(toupper(substring(x[2], 1, 1)), substring(x[2], 2), "_", x[1])
}
)
[1] "Exo_41" "Exo_47" "Exo_48"
I have the following string: " John Andrew Thomas"(4 empty spaces before John) and I need to split and concat it so my output is "John#gmail.com;Andrew#gmail.com;Thomas#gmail.com", also I need to remove all whitespaces.
My best guess is:
test = unlist(lapply(names, strsplit, split = " ", fixed = FALSE))
paste(test, collapse = "#gmail.com")
but I get this as an output:
"#gmail.com#gmail.com#gmail.com#gmail.comJohn#gmail.comAndrew#gmail.comThomas"
names <- " John Andrew Thomas"
test <- unlist(lapply(names, strsplit, split = " ", fixed = FALSE))
paste(test[test != ""],"#gmail.com",sep = "",collapse = ";")
A small tweak to your paste line will remove the extra spaces and separate the email addresses with a semicolon.
Output is the following:
[1] "John#gmail.com;Andrew#gmail.com;Thomas#gmail.com"
With stringr, so we can use its str_trim function to deal with your leading whitespace, and assuming your string is x:
library(stringr)
paste(sapply(str_split(str_trim(x), " "), function(i) sprintf("%s#gmail.com", i)), collapse = ";")
And here's a piped version, so it's easier to follow:
library(dplyr)
library(stringr)
x %>%
# get rid of leading and trailing whitespace
str_trim() %>%
# make a list with the elements of the string, split at " "
str_split(" ") %>%
# get an array of strings where those list elements are added to a fixed chunk via sprintf
sapply(., function(i) sprintf("%s#gmail.com", i)) %>%
# concatenate the resulting array into a single string with semicolons
paste(., collapse = ";")
Another approach using trimws function of base R
paste0(unlist(strsplit(trimws(names)," ")),"#gmail.com",collapse = ";")
#[1] "John#gmail.com;Andrew#gmail.com;Thomas#gmail.com"
Data
names <- " John Andrew Thomas"
Another idea using stringi:
v <- " John Andrew Thomas"
paste0(stringi::stri_extract_all_words(v, simplify = TRUE), "#gmail.com", collapse = ";")
Which gives:
#[1] "John#gmail.com;Andrew#gmail.com;Thomas#gmail.com"
You can use gsub(), and a little creativity.
x <- " John Andrew Thomas"
paste0(gsub(" ", "#gmail.com;", trimws(x)), "#gmail.com")
# [1] "John#gmail.com;Andrew#gmail.com;Thomas#gmail.com"
No packages, no loops, and no string splitting.