Group columns in dataframe by list and mutate in R - r

I have a large dataframe containing binary columns. Here is a list of the column names:
[1] "imagetag_logos_position_Apple_BOTTOM_CENTER" "imagetag_logos_position_Apple_BOTTOM_LEFT" "imagetag_logos_position_Apple_BOTTOM_RIGHT" "imagetag_logos_position_Apple_CENTER" "imagetag_logos_position_Apple_CENTER_LEFT"
[6] "imagetag_logos_position_Apple_CENTER_RIGHT" "imagetag_logos_position_Apple_TOP_CENTER" "imagetag_logos_position_Apple_TOP_LEFT" "imagetag_logos_position_Apple_TOP_RIGHT" "imagetag_logos_position_Banana_BOTTOM_CENTER"
[11] "imagetag_logos_position_Banana_BOTTOM_LEFT" "imagetag_logos_position_Banana_BOTTOM_RIGHT" "imagetag_logos_position_Banana_CENTER_LEFT" "imagetag_logos_position_Banana_CENTER_RIGHT" "imagetag_logos_position_Banana_TOP_RIGHT"
[16] "imagetag_logos_position_Pear_BOTTOM_CENTER" "imagetag_logos_position_Pear_BOTTOM_LEFT" "imagetag_logos_position_Pear_BOTTOM_RIGHT" "imagetag_logos_position_Pear_CENTER" "imagetag_logos_position_Pear_CENTER_LEFT"
[21] "imagetag_logos_position_Pear_CENTER_RIGHT" "imagetag_logos_position_Pear_TOP_RIGHT" "imagetag_logos_position_Kiwi_BOTTOM_CENTER" "imagetag_logos_position_Kiwi_BOTTOM_LEFT" "imagetag_logos_position_Kiwi_BOTTOM_RIGHT"
[26] "imagetag_logos_position_Kiwi_CENTER_LEFT" "imagetag_logos_position_Kiwi_CENTER_RIGHT" "imagetag_logos_position_Kiwi_TOP_LEFT" "Product_position_Product_0" "Product_position_Product_BOTTOM_CENTER"
[31] "Product_position_Product_BOTTOM_LEFT" "Product_position_Product_BOTTOM_RIGHT" "Product_position_Product_CENTER" "Product_position_Product_CENTER_LEFT" "Product_position_Product_CENTER_RIGHT"
[36] "Product_position_Product_TOP_CENTER" "Product_position_Product_TOP_LEFT" "Product_position_Product_TOP_RIGHT" "Person_position_Person_0" "Person_position_Person_BOTTOM_CENTER"
[41] "Person_position_Person_BOTTOM_LEFT" "Person_position_Person_BOTTOM_RIGHT" "Person_position_Person_CENTER" "Person_position_Person_CENTER_LEFT" "Person_position_Person_CENTER_RIGHT"
[46] "Person_position_Person_TOP_CENTER" "Person_position_Person_TOP_LEFT" "Person_position_Person_TOP_RIGHT" "Logo_position_Logo_0" "Logo_position_Logo_BOTTOM_CENTER"
[51] "Logo_position_Logo_BOTTOM_LEFT" "Logo_position_Logo_BOTTOM_RIGHT" "Logo_position_Logo_CENTER" "Logo_position_Logo_CENTER_LEFT" "Logo_position_Logo_CENTER_RIGHT"
[56] "Logo_position_Logo_TOP_CENTER" "Logo_position_Logo_TOP_LEFT" "Logo_position_Logo_TOP_RIGHT" "CTA_ShopNow_position_Shop Now_0" "CTA_ShopNow_position_Shop Now_BOTTOM_CENTER"
[61] "CTA_ShopNow_position_Shop Now_BOTTOM_LEFT" "CTA_ShopNow_position_Shop Now_BOTTOM_RIGHT" "CTA_ShopNow_position_Shop Now_CENTER" "CTA_ShopNow_position_Shop Now_CENTER_LEFT" "CTA_ShopNow_position_Shop Now_CENTER_RIGHT"
[66] "CTA_ShopNow_position_Shop Now_TOP_CENTER" "CTA_ShopNow_position_Shop Now_TOP_RIGHT" "CTA_JoinNow_position_Join Now_0" "CTA_JoinNow_position_Join Now_BOTTOM_CENTER" "CTA_JoinNow_position_Join Now_BOTTOM_LEFT"
[71] "CTA_JoinNow_position_Join Now_BOTTOM_RIGHT" "CTA_JoinNow_position_Join Now_CENTER" "CTA_JoinNow_position_Join Now_CENTER_RIGHT" "CTA_JoinNow_position_Join Now_TOP_CENTER" "CTA_JoinNow_position_Join Now_TOP_RIGHT"
[76] "CTA_position_CTA_0" "CTA_position_CTA_BOTTOM_CENTER" "CTA_position_CTA_BOTTOM_LEFT" "CTA_position_CTA_BOTTOM_RIGHT" "CTA_position_CTA_CENTER"
[81] "CTA_position_CTA_CENTER_LEFT" "CTA_position_CTA_CENTER_RIGHT" "CTA_position_CTA_TOP_CENTER" "CTA_position_CTA_TOP_LEFT" "CTA_position_CTA_TOP_RIGHT"
[86] "Text_position_text_BOTTOM_CENTER" "Text_position_text_BOTTOM_LEFT" "Text_position_text_BOTTOM_RIGHT" "Text_position_text_CENTER" "Text_position_text_CENTER_LEFT"
[91] "Text_position_text_CENTER_RIGHT" "Text_position_text_TOP_CENTER" "Text_position_text_TOP_LEFT" "Text_position_text_TOP_RIGHT" "Product_position_Product_0_LF"
[96] "Product_position_Product_BOTTOM_CENTER_LF" "Product_position_Product_BOTTOM_LEFT_LF" "Product_position_Product_BOTTOM_RIGHT_LF" "Product_position_Product_CENTER_LF" "Product_position_Product_CENTER_LEFT_LF"
[101] "Product_position_Product_CENTER_RIGHT_LF" "Product_position_Product_TOP_CENTER_LF" "Product_position_Product_TOP_LEFT_LF" "Product_position_Product_TOP_RIGHT_LF" "Logo_position_Logo_0_LF"
I want to group some of these columns, for example sum the columns that contain "BOTTOM_CENTER", "BOTTOM_RIGHT", "BOTTOM_LEFT". However I need to group them within each prefix that matches, for example, only sum for imagetag_logos_position_Apple, and a separate sum for imagetag_logos_position_Banana.
I did this to create a list of the unique prefixes:
prefix_list <- str_extract(colnames(positionsdf),".+?(?=([A-Z])([A-Z])([A-Z]))")
prefix_list1 <- unique(prefix_list)
> prefix_list1
[1] "imagetag_logos_position_Apple_" "imagetag_logos_position_Banana_" "imagetag_logos_position_Kiwi_" "imagetag_logos_position_Pear_" NA "Product_position_Product_" "Person_position_Person_"
[8] "Logo_position_Logo_" "CTA_ShopNow_position_Shop Now_" "CTA_JoinNow_position_Join Now_" "CTA_position_" "Text_position_text_" "CTA_LearnMore_position_Learn More_" "Person_position_"
I have tried different ways to get the dataframe to group by the string in the list so that I can perform the addition of columns but can not seem to figure out how to go about this. %in% will not support partial match so I am not sure what other function to use
Thanks!
for(i in prefix_list1){
sapply(positionsdf, function(x) i %in% x)
}

We may do
sapply(prefix_list1, function(pat) {
nm1 <- grep(pat, names(positions_df), value = TRUE)
nm2 <- grep("BOTTOM_(CENTER|RIGHT|LEFT)", nm1, value = TRUE)
rowSums(positions_df[nm2], na.rm = TRUE)
})

Related

Using R, How do I copy the tibble to an element of the list. for example, each element like ff[i] have a nibble at each i

Use vector() to create an empty vector called ff that is of mode “list” and length 9. Now write a for() loop to loop over the 9 files in dfiles and for each (i) read the file in to a tibble, and change the column names to x and y, and (ii) copy the tibble to an element of your list ff.
dfiles is a directory which has different files.
This is what I did.
ff <- vector(mode = "list", length = 9)
length <- length(dfiles)
for (i in 1:length) {
study <- read_csv(dfiles[i])
names(study)[1] <- "x"
names(study)[2] <- "y"
ff[i] <- c(study)
print(head(ff[i]))
}
[[1]]
[1] -0.989532202 -0.052799402 0.823610903 -0.255509103 -0.220684347
[6] 0.307726791 -0.060013253 -0.555652890 -0.138615019 1.882839792
[11] 0.873668680 -0.914597073 -1.244917622 -0.359982241 1.328774701
[16] 0.292679118 -0.701505237 0.882234568 -0.133370389 -1.120678499
[21] 0.461192454 1.524142810 0.434468298 0.192000371 -0.656243128
[26] 0.568398531 -1.070570535 -1.653149024 -0.043352768 -0.034593506
[31] 2.365055532 -1.216347308 0.170906323 0.805053094 1.050592844
[36] -0.010724485 -0.743256141 -0.065784052 1.939755992 0.482739008
[41] -2.044477073 1.423459129 0.540502661 -0.033571772 -0.017863621
[46] -0.149789720 0.256559481 -0.503866933 0.277011252 -0.931356025
[51] 0.200146875 1.106837421 0.509206114 1.033749676 -1.090868762
[56] 0.054792784 0.617250303 -1.068004868 1.565814337 -1.034808011
[61] 0.164518709 0.151832330 0.121670302 -0.210424584 0.449936787
[66] -1.031164492 -1.289364188 -0.654568638 -0.057324104 1.256747820
[71] 1.587454140 0.319481463 0.381591623 -0.243644884 0.048053084
[76] -1.404545861 0.289933729 -0.535553582 0.334678773 -0.345981339
[81] -0.661615735 -0.219111377 -0.366904911 1.094578208 0.209208082
[86] 0.432491426 -1.240853586 1.496821710 0.159370441 -0.856281403
[91] 0.309046645 0.870434030 -1.383677138 1.690106970 -0.158030705
[96] 1.121170781 0.072261319 -0.332422845 -1.834920047 -1.100172219
[101] -0.041340300 0.827852545 -1.881678654 1.375441112 1.398990464
[106] -1.143316256 0.472300562 -1.033639213 -0.125199979 0.928662739
[111] 0.868339648 -0.849174604 -0.386636454 -0.976163571 0.339543660
[116] -1.559075164 -2.629325442 1.469812282 2.273472913 -0.455033540
[121] 0.761102487 -0.007502784 1.474313800
and the following error.
1: In ff[i] <- c(study) :
number of items to replace is not a multiple of replacement length
2: In ff[i] <- c(study) :
I was expecting that it'll still have column names so I am not sure how to fix it and where I am going wrong.
Was supposed to use double brackets.
ff[[i]] <- study would fix the problem.

How do I use the index value within the function of a for loop in R?

Multiplier <- numeric(200)
for (i in 1:200) Multiplier[i] <- 1/sqrt(i(i+1))
Like a math function where f(n)=1/sqrt(n(n+1), and put those first 200 values in an array. But when I run the above code I get:
# Error in i(i + 1) : could not find function "i"
and when I try to use [i] I get:
# Error: unexpected '[' in "for (i in 1:200) Multiplier[i] <- 1/sqrt([
Change the i(i+1) to i*(i+1) . When we use i() it is assuming i as function and the i+1 as argument to the function i
for (i in 1:200) Multiplier[i] <- 1/sqrt(i*(i+1))
-output
> Multiplier
[1] 0.707106781 0.408248290 0.288675135 0.223606798 0.182574186 0.154303350 0.133630621 0.117851130 0.105409255 0.095346259
[11] 0.087038828 0.080064077 0.074124932 0.069006556 0.064549722 0.060633906 0.057166195 0.054073807 0.051298918 0.048795004
[21] 0.046524211 0.044455422 0.042562827 0.040824829 0.039223227 0.037742568 0.036369648 0.035093120 0.033903175 0.032791292
[31] 0.031750032 0.030772873 0.029854072 0.028988552 0.028171808 0.027399831 0.026669037 0.025976217 0.025318484 0.024693240
[41] 0.024098135 0.023531040 0.022990024 0.022473329 0.021979349 0.021506620 0.021053798 0.020619652 0.020203051 0.019802951
[51] 0.019418391 0.019048483 0.018692405 0.018349396 0.018018749 0.017699808 0.017391962 0.017094641 0.016807316 0.016529490
[61] 0.016260700 0.016000512 0.015748520 0.015504342 0.015267620 0.015038019 0.014815221 0.014598929 0.014388862 0.014184754
[71] 0.013986356 0.013793431 0.013605757 0.013423121 0.013245324 0.013072175 0.012903494 0.012739112 0.012578865 0.012422600
[81] 0.012270170 0.012121435 0.011976263 0.011834527 0.011696106 0.011560887 0.011428758 0.011299615 0.011173359 0.011049892
[91] 0.010929125 0.010810969 0.010695340 0.010582159 0.010471348 0.010362833 0.010256545 0.010152415 0.010050378 0.009950372
[101] 0.009852336 0.009756214 0.009661948 0.009569488 0.009478779 0.009389775 0.009302426 0.009216688 0.009132515 0.009049866
[111] 0.008968700 0.008888977 0.008810658 0.008733708 0.008658090 0.008583770 0.008510715 0.008438894 0.008368274 0.008298827
[121] 0.008230522 0.008163333 0.008097232 0.008032193 0.007968191 0.007905200 0.007843198 0.007782160 0.007722065 0.007662891
[131] 0.007604618 0.007547224 0.007490689 0.007434996 0.007380124 0.007326056 0.007272775 0.007220264 0.007168505 0.007117483
[141] 0.007067182 0.007017587 0.006968683 0.006920457 0.006872893 0.006825978 0.006779700 0.006734045 0.006689001 0.006644555
[151] 0.006600696 0.006557412 0.006514693 0.006472526 0.006430901 0.006389809 0.006349238 0.006309180 0.006269623 0.006230560
[161] 0.006191980 0.006153875 0.006116237 0.006079055 0.006042324 0.006006033 0.005970176 0.005934744 0.005899731 0.005865128
[171] 0.005830929 0.005797126 0.005763713 0.005730683 0.005698029 0.005665745 0.005633825 0.005602263 0.005571052 0.005540187
[181] 0.005509663 0.005479473 0.005449612 0.005420074 0.005390855 0.005361950 0.005333352 0.005305058 0.005277063 0.005249362
[191] 0.005221950 0.005194823 0.005167976 0.005141405 0.005115106 0.005089075 0.005063307 0.005037799 0.005012547 0.004987547
According to ?Paren
Open parenthesis, (, and open brace, {, are .Primitive functions in R.
Effectively, ( is semantically equivalent to the identity function(x) x

How to remove the prefix of each sample

I was stuck in removing the prefix of each sample. I have tried to remove all the number within the sample, but this could not be a good way for grouping. I would like to only keep the sample name as the last two suffix. ( For example: AAP-L ) The details are list as below. Thank you in advance!
geo$pd$title
[1] "AAB-HT002-AAP-L" "AAB-HT003-AAP-L" "AAB-HT006-AAP-L" "AAB-HT002-AAP-NL"
[5] "AAB-HT003-AAP-NL" "AAB-HT006-AAP-NL" "AAB-C007-AU-L" "AAB-HT001-AT-L"
[9] "AAB-N-C021-Normal-NC" "AAB-N-C022-Normal-NC" "AAB-C024-Normal-NC" "AAB-N-C025-Normal-NC"
[13] "AAB-HT010-AAP.T-L" "AAB-HT011-AAP-L" "AAB-HT012-AAP-L" "AAB-HT010-AAP.T-NL"
[17] "AAB-HT011-AAP-NL" "AAB-HT012-AAP-NL" "AAB-C013-AU-L" "AAB-C033-AU-L"
[21] "AAB-C037-AT-L" "AAB-C043-AU-L" "AAB-HT041-AU-L" "AAB-N-C026-Normal-NC"
[25] "AAB-N-C027-Normal-NC" "AAB-N-C028-Normal-NC" "AAB-N-C029-Normal-NC" "AAB-C014-AAP-L"
[29] "AAB-HT017-AAP.T-L" "AAB-HT018-AAP-L" "AAB-C014-AAP-NL" "AAB-HT017-AAP.T-NL"
[33] "AAB-HT018-AAP-NL" "AAB-C047-AT-L" "AAB-M044-AU-L" "AAB-N-C030-Normal-NC"
[37] "AAB-N-C032-Normal-NC" "AAB-N-C034-Normal-NC" "AAB-N-C035-Normal-NC" "AAB-C020-AAP.T-L"
[41] "AAB-C038-AAP-L" "AABM046-AAP-L" "AAB-C020-AAP.T-NL" "AABM046-AAP-NL"
[45] "AAB-C048-AT-L" "AAB-HT050-AT-L" "AAB-M-060-AU-L" "AAB-M-061-AU-L"
[49] "AAB-N-C036-Normal-NC" "AAB-N-C039-Normal-NC" "AAB-N-C042-Normal-NC" "AAB-N-C045-Normal-NC"
[53] "AAB-C052-AAP-L" "AAB-C076-AAP-L" "AAB-M056-AAP-L" "AAB-M058-AAP-L"
[57] "AAB-C052-AAP-NL" "AAB-C076-AAP-NL" "AAB-M056-AAP-NL" "AAB-M058-AAP-NL"
[61] "AAB-HT077-AU-L" "AAB-HT082-AU-L" "AAB-M080-AU-L" "AAB-N-C054-Normal-NC"
[65] "AAB-N-C055-Normal-NC" "AAB-N-C059-Normal-NC" "AAB-N-C062-Normal-NC" "AAB-C083-AAP-L"
[69] "AAB-HT009-AAP-L" "AAB-HT079-AAP-L" "AAB-SF086-AAP-L" "AAB-C083-AAP-NL"
[73] "AAB-HT079-AAP-NL" "AAB-SF086-AAP-NL" "AAB-C016-AU-L" "AAB-HT008-AU-L"
[77] "AAB-HT091-AT-L" "AAB-SF087-AU-L" "AAB-N-C063-Normal-NC" "AAB-N-C064-Normal-NC"
[81] "AAB-N-C065-Normal-NC" "AAB-HT103-AAP-L" "AAB-SF078-AAP.T-L" "AAB-SF099-AAP-L"
[85] "AAB-HT103-AAP-NL" "AAB-SF078-AAP.T-NL" "AAB-SF099-AAP-NL" "AAB-HT096-AT-L"
[89] "AAB-M094-AU-L" "AAB-SF089-AU-L" "AAB-SF090-AU-L" "AAB-SF100-AU-L"
[93] "AAB-N-C069-Normal-NC" "AAB-N-C070-Normal-NC" "AAB-N-C071-Normal-NC" "AAB-N-C072-Normal-NC"
[97] "AAB-N-C074-Normal-NC" "AAB-N-C075-Normal-NC" "AAB-N-C085-Normal-NC" "AAB-C092-Normal-NC"
[101] "AAB-M112-AAP-L" "AAB-SF104-AAP-L" "AAB-SF114-AAP-L" "AAB-SF115-AAP.T-L"
[105] "AAB-M112-AAP-NL" "AAB-SF104-AAP-NL" "AAB-SF114-AAP-NL" "AAB-SF115-AAP.T-NL"
[109] "AAB-C109-AU-L" "AAB-C111-AU-L" "AAB-HT101-AU-L" "AAB-M110-AT-L"
[113] "AAB-SF106-AU-L" "AAB-SF113-AU-L" "AAB-N-C098-Normal-NC" "AAB-N-C105-Normal-NC"
[117] "AAB-N-C107-Normal-NC" "AAB-N-C108-Normal-NC" "AAB-HT095-AAP.T-L" "AAB-HT095-AAP.T-NL"
[121] "AAB-HT097-AT-L" "AAB-C093-Normal-NC"
Try this:
library(stringr)
# test data:
string <- c("AAB-HT002-AAP-L", "AAB-HT017-AAP.T-L", "AAB-HT003-AAP-L", "AAB-HT006-AAP-L", "AAB-HT002-AAP-NL")
str_split_fixed(string, '-', n=3)[, 3]
# output:
[1] "AAP-L" "AAP.T-L" "AAP-L" "AAP-L" "AAP-NL"
This will deliver the terminal (alpha+period)-dash-(alpha+period)-end components.
titles <-c("AAB-HT002-AAP-L", "AAB-HT003-AA.P-L", "AAB-HT006-AAP-L", "AAB-HT002-AA.P-NL")
sub( "(.+)([-])([[:alpha:].]+[-][[:alpha:].]+$)", "\\3", titles)
[1] "AAP-L" "AA.P-L" "AAP-L" "AA.P-NL"
We could use
library(stringr)
str_remove(string, ".*\\d+-")
[1] "AAP-L" "AAP.T-L" "AAP-L" "AAP-L" "AAP-NL"

Code to rename multiple columns in rStudio

I want to rename this columns in R, I want to remove X from each of them so that it remains just figures which represents different years varying from 1960 to 2020. The first two (country name and Country Code) are sorted out already.
[1] "ï..Country.Name" "Country.Code" "X1960" "X1961" "X1962"
[6] "X1963" "X1964" "X1965" "X1966" "X1967"
[11] "X1968" "X1969" "X1970" "X1971" "X1972"
[16] "X1973" "X1974" "X1975" "X1976" "X1977"
[21] "X1978" "X1979" "X1980" "X1981" "X1982"
[26] "X1983" "X1984" "X1985" "X1986" "X1987"
[31] "X1988" "X1989" "X1990" "X1991" "X1992"
[36] "X1993" "X1994" "X1995" "X1996" "X1997"
[41] "X1998" "X1999" "X2000" "X2001" "X2002"
[46] "X2003" "X2004" "X2005" "X2006" "X2007"
[51] "X2008" "X2009" "X2010" "X2011" "X2012"
[56] "X2013" "X2014" "X2015" "X2016" "X2017"
[61] "X2018" "X2019" "X2020"
names(df) <- gsub("^X", "", names(df))
gsub() matches a regular expression and replaces it if found. In our case, the regex says the string must have an X at the beginning.

Split a sequence of numbers into groups of 10 digits using R

I would like for R to read in the first 10,000 digits of Pi and group every 10 digits together
e.g., I want R to read in a sequence
pi <- 3.14159265358979323846264338327950288419716939937510582097...
and would like R to give me a table where each row contains 10 digit:
3141592653
5897932384
6264338327
...
I am new to R and really don't know where to start so any help would be much appreciated!
Thank you in advance
https://rextester.com/OQRM27791
p <- strsplit("314159265358979323846264338327950288419716939937510582097", "")
digits <- p[[1]]
split(digits, ceiling((1:length(digits)) / 10));
Here's one way to do it. It's fully reproducible, so just cut and paste it into your R console. The vector result is the first 10,000 digits of pi, split into 1000 strings of 10 digits.
For this many digits, I have used an online source for the precalculated value of pi. This is read in using readChar and the decimal point is stripped out with gsub. The resulting string is split into individual characters and put in a 1000 * 10 matrix (filled row-wise). The rows are then pasted into strings, giving the result. I have displayed only the first 100 entries of result for clarity of presentation.
pi_url <- "https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt"
pi_char <- gsub("\\.", "", readChar(url, 1e4 + 1))
pi_mat <- matrix(strsplit(pi_char, "")[[1]], byrow = TRUE, ncol = 10)
result <- apply(pi_mat, 1, paste0, collapse = "")
head(result, 100)
#> [1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751"
#> [6] "0582097494" "4592307816" "4062862089" "9862803482" "5342117067"
#> [11] "9821480865" "1328230664" "7093844609" "5505822317" "2535940812"
#> [16] "8481117450" "2841027019" "3852110555" "9644622948" "9549303819"
#> [21] "6442881097" "5665933446" "1284756482" "3378678316" "5271201909"
#> [26] "1456485669" "2346034861" "0454326648" "2133936072" "6024914127"
#> [31] "3724587006" "6063155881" "7488152092" "0962829254" "0917153643"
#> [36] "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
#> [41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854"
#> [46] "8074462379" "9627495673" "5188575272" "4891227938" "1830119491"
#> [51] "2983367336" "2440656643" "0860213949" "4639522473" "7190702179"
#> [56] "8609437027" "7053921717" "6293176752" "3846748184" "6766940513"
#> [61] "2000568127" "1452635608" "2778577134" "2757789609" "1736371787"
#> [66] "2146844090" "1224953430" "1465495853" "7105079227" "9689258923"
#> [71] "5420199561" "1212902196" "0864034418" "1598136297" "7477130996"
#> [76] "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
#> [81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188"
#> [86] "1710100031" "3783875288" "6587533208" "3814206171" "7766914730"
#> [91] "3598253490" "4287554687" "3115956286" "3882353787" "5937519577"
#> [96] "8185778053" "2171226806" "6130019278" "7661119590" "9216420198"
Created on 2020-07-23 by the reprex package (v0.3.0)
We can use str_extract:
pi <- readLines("https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt")
library(stringr)
t <- unlist(str_extract_all(sub("\\.","", pi), "\\d{10}"))
t[1:100]
[1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751" "0582097494" "4592307816" "4062862089"
[9] "9862803482" "5342117067" "9821480865" "1328230664" "7093844609" "5505822317" "2535940812" "8481117450"
[17] "2841027019" "3852110555" "9644622948" "9549303819" "6442881097" "5665933446" "1284756482" "3378678316"
[25] "5271201909" "1456485669" "2346034861" "0454326648" "2133936072" "6024914127" "3724587006" "6063155881"
[33] "7488152092" "0962829254" "0917153643" "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
[41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854" "8074462379" "9627495673" "5188575272"
[49] "4891227938" "1830119491" "2983367336" "2440656643" "0860213949" "4639522473" "7190702179" "8609437027"
[57] "7053921717" "6293176752" "3846748184" "6766940513" "2000568127" "1452635608" "2778577134" "2757789609"
[65] "1736371787" "2146844090" "1224953430" "1465495853" "7105079227" "9689258923" "5420199561" "1212902196"
[73] "0864034418" "1598136297" "7477130996" "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
[81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188" "1710100031" "3783875288" "6587533208"
[89] "3814206171" "7766914730" "3598253490" "4287554687" "3115956286" "3882353787" "5937519577" "8185778053"
[97] "2171226806" "6130019278" "7661119590" "9216420198"

Resources