Split a sequence of numbers into groups of 10 digits using R - r

I would like for R to read in the first 10,000 digits of Pi and group every 10 digits together
e.g., I want R to read in a sequence
pi <- 3.14159265358979323846264338327950288419716939937510582097...
and would like R to give me a table where each row contains 10 digit:
3141592653
5897932384
6264338327
...
I am new to R and really don't know where to start so any help would be much appreciated!
Thank you in advance

https://rextester.com/OQRM27791
p <- strsplit("314159265358979323846264338327950288419716939937510582097", "")
digits <- p[[1]]
split(digits, ceiling((1:length(digits)) / 10));

Here's one way to do it. It's fully reproducible, so just cut and paste it into your R console. The vector result is the first 10,000 digits of pi, split into 1000 strings of 10 digits.
For this many digits, I have used an online source for the precalculated value of pi. This is read in using readChar and the decimal point is stripped out with gsub. The resulting string is split into individual characters and put in a 1000 * 10 matrix (filled row-wise). The rows are then pasted into strings, giving the result. I have displayed only the first 100 entries of result for clarity of presentation.
pi_url <- "https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt"
pi_char <- gsub("\\.", "", readChar(url, 1e4 + 1))
pi_mat <- matrix(strsplit(pi_char, "")[[1]], byrow = TRUE, ncol = 10)
result <- apply(pi_mat, 1, paste0, collapse = "")
head(result, 100)
#> [1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751"
#> [6] "0582097494" "4592307816" "4062862089" "9862803482" "5342117067"
#> [11] "9821480865" "1328230664" "7093844609" "5505822317" "2535940812"
#> [16] "8481117450" "2841027019" "3852110555" "9644622948" "9549303819"
#> [21] "6442881097" "5665933446" "1284756482" "3378678316" "5271201909"
#> [26] "1456485669" "2346034861" "0454326648" "2133936072" "6024914127"
#> [31] "3724587006" "6063155881" "7488152092" "0962829254" "0917153643"
#> [36] "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
#> [41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854"
#> [46] "8074462379" "9627495673" "5188575272" "4891227938" "1830119491"
#> [51] "2983367336" "2440656643" "0860213949" "4639522473" "7190702179"
#> [56] "8609437027" "7053921717" "6293176752" "3846748184" "6766940513"
#> [61] "2000568127" "1452635608" "2778577134" "2757789609" "1736371787"
#> [66] "2146844090" "1224953430" "1465495853" "7105079227" "9689258923"
#> [71] "5420199561" "1212902196" "0864034418" "1598136297" "7477130996"
#> [76] "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
#> [81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188"
#> [86] "1710100031" "3783875288" "6587533208" "3814206171" "7766914730"
#> [91] "3598253490" "4287554687" "3115956286" "3882353787" "5937519577"
#> [96] "8185778053" "2171226806" "6130019278" "7661119590" "9216420198"
Created on 2020-07-23 by the reprex package (v0.3.0)

We can use str_extract:
pi <- readLines("https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt")
library(stringr)
t <- unlist(str_extract_all(sub("\\.","", pi), "\\d{10}"))
t[1:100]
[1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751" "0582097494" "4592307816" "4062862089"
[9] "9862803482" "5342117067" "9821480865" "1328230664" "7093844609" "5505822317" "2535940812" "8481117450"
[17] "2841027019" "3852110555" "9644622948" "9549303819" "6442881097" "5665933446" "1284756482" "3378678316"
[25] "5271201909" "1456485669" "2346034861" "0454326648" "2133936072" "6024914127" "3724587006" "6063155881"
[33] "7488152092" "0962829254" "0917153643" "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
[41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854" "8074462379" "9627495673" "5188575272"
[49] "4891227938" "1830119491" "2983367336" "2440656643" "0860213949" "4639522473" "7190702179" "8609437027"
[57] "7053921717" "6293176752" "3846748184" "6766940513" "2000568127" "1452635608" "2778577134" "2757789609"
[65] "1736371787" "2146844090" "1224953430" "1465495853" "7105079227" "9689258923" "5420199561" "1212902196"
[73] "0864034418" "1598136297" "7477130996" "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
[81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188" "1710100031" "3783875288" "6587533208"
[89] "3814206171" "7766914730" "3598253490" "4287554687" "3115956286" "3882353787" "5937519577" "8185778053"
[97] "2171226806" "6130019278" "7661119590" "9216420198"

Related

Using R, How do I copy the tibble to an element of the list. for example, each element like ff[i] have a nibble at each i

Use vector() to create an empty vector called ff that is of mode “list” and length 9. Now write a for() loop to loop over the 9 files in dfiles and for each (i) read the file in to a tibble, and change the column names to x and y, and (ii) copy the tibble to an element of your list ff.
dfiles is a directory which has different files.
This is what I did.
ff <- vector(mode = "list", length = 9)
length <- length(dfiles)
for (i in 1:length) {
study <- read_csv(dfiles[i])
names(study)[1] <- "x"
names(study)[2] <- "y"
ff[i] <- c(study)
print(head(ff[i]))
}
[[1]]
[1] -0.989532202 -0.052799402 0.823610903 -0.255509103 -0.220684347
[6] 0.307726791 -0.060013253 -0.555652890 -0.138615019 1.882839792
[11] 0.873668680 -0.914597073 -1.244917622 -0.359982241 1.328774701
[16] 0.292679118 -0.701505237 0.882234568 -0.133370389 -1.120678499
[21] 0.461192454 1.524142810 0.434468298 0.192000371 -0.656243128
[26] 0.568398531 -1.070570535 -1.653149024 -0.043352768 -0.034593506
[31] 2.365055532 -1.216347308 0.170906323 0.805053094 1.050592844
[36] -0.010724485 -0.743256141 -0.065784052 1.939755992 0.482739008
[41] -2.044477073 1.423459129 0.540502661 -0.033571772 -0.017863621
[46] -0.149789720 0.256559481 -0.503866933 0.277011252 -0.931356025
[51] 0.200146875 1.106837421 0.509206114 1.033749676 -1.090868762
[56] 0.054792784 0.617250303 -1.068004868 1.565814337 -1.034808011
[61] 0.164518709 0.151832330 0.121670302 -0.210424584 0.449936787
[66] -1.031164492 -1.289364188 -0.654568638 -0.057324104 1.256747820
[71] 1.587454140 0.319481463 0.381591623 -0.243644884 0.048053084
[76] -1.404545861 0.289933729 -0.535553582 0.334678773 -0.345981339
[81] -0.661615735 -0.219111377 -0.366904911 1.094578208 0.209208082
[86] 0.432491426 -1.240853586 1.496821710 0.159370441 -0.856281403
[91] 0.309046645 0.870434030 -1.383677138 1.690106970 -0.158030705
[96] 1.121170781 0.072261319 -0.332422845 -1.834920047 -1.100172219
[101] -0.041340300 0.827852545 -1.881678654 1.375441112 1.398990464
[106] -1.143316256 0.472300562 -1.033639213 -0.125199979 0.928662739
[111] 0.868339648 -0.849174604 -0.386636454 -0.976163571 0.339543660
[116] -1.559075164 -2.629325442 1.469812282 2.273472913 -0.455033540
[121] 0.761102487 -0.007502784 1.474313800
and the following error.
1: In ff[i] <- c(study) :
number of items to replace is not a multiple of replacement length
2: In ff[i] <- c(study) :
I was expecting that it'll still have column names so I am not sure how to fix it and where I am going wrong.
Was supposed to use double brackets.
ff[[i]] <- study would fix the problem.

How to generate all string permutations with given sets of letters at each position in R

I am sure this has been asked and solved before, but probably I am searching for the wrong terms. I cannot find the relevant thread.
In R, I would like to generate all possible words / strings, where each position can take only a set of values, like
pos1 can be ABC
pos2 can be ABCD
pos3 can be ABC
pos4 can be BCD
etc.
Eg.: BABC is a solution but DABC is not.
If you can point me towards a solution, I would really appreciate!
Thanks for your time!
... timb!, timc!, timd! ...
thx,
Bud
In Base R we can do the following
pos1 <- c('A','B','C')
pos2 <- c('A','B','C','D')
pos4 <- c('B','C','D')
AllPos <- list(pos1,pos2,pos3,pos4)
result <- AllPos[1]
for(i in AllPos[-1] ){
result <- apply(merge(result ,i),1,paste0,collapse="")
}
> result
[1] "AAAB" "BAAB" "CAAB" "ABAB" "BBAB" "CBAB" "ACAB" "BCAB" "CCAB" "ADAB"
[11] "BDAB" "CDAB" "AABB" "BABB" "CABB" "ABBB" "BBBB" "CBBB" "ACBB" "BCBB"
[21] "CCBB" "ADBB" "BDBB" "CDBB" "AACB" "BACB" "CACB" "ABCB" "BBCB" "CBCB"
[31] "ACCB" "BCCB" "CCCB" "ADCB" "BDCB" "CDCB" "AAAC" "BAAC" "CAAC" "ABAC"
[41] "BBAC" "CBAC" "ACAC" "BCAC" "CCAC" "ADAC" "BDAC" "CDAC" "AABC" "BABC"
[51] "CABC" "ABBC" "BBBC" "CBBC" "ACBC" "BCBC" "CCBC" "ADBC" "BDBC" "CDBC"
[61] "AACC" "BACC" "CACC" "ABCC" "BBCC" "CBCC" "ACCC" "BCCC" "CCCC" "ADCC"
[71] "BDCC" "CDCC" "AAAD" "BAAD" "CAAD" "ABAD" "BBAD" "CBAD" "ACAD" "BCAD"
[81] "CCAD" "ADAD" "BDAD" "CDAD" "AABD" "BABD" "CABD" "ABBD" "BBBD" "CBBD"
[91] "ACBD" "BCBD" "CCBD" "ADBD" "BDBD" "CDBD" "AACD" "BACD" "CACD" "ABCD"
[101] "BBCD" "CBCD" "ACCD" "BCCD" "CCCD" "ADCD" "BDCD" "CDCD"
A quick and dirty base R solution...
p1 <- "ABC"
p2 <- "ABCD"
p3 <- "ABC"
p4 <- "BCD"
apply(expand.grid(strsplit(p1, "")[[1]], strsplit(p2, "")[[1]],
strsplit(p3, "")[[1]], strsplit(p4, "")[[1]]), 1, paste0,
collapse = "")
#> [1] "AAAB" "BAAB" "CAAB" "ABAB" "BBAB" "CBAB" "ACAB" "BCAB" "CCAB" "ADAB"
#> [11] "BDAB" "CDAB" "AABB" "BABB" "CABB" "ABBB" "BBBB" "CBBB" "ACBB" "BCBB"
#> [21] "CCBB" "ADBB" "BDBB" "CDBB" "AACB" "BACB" "CACB" "ABCB" "BBCB" "CBCB"
#> [31] "ACCB" "BCCB" "CCCB" "ADCB" "BDCB" "CDCB" "AAAC" "BAAC" "CAAC" "ABAC"
#> [41] "BBAC" "CBAC" "ACAC" "BCAC" "CCAC" "ADAC" "BDAC" "CDAC" "AABC" "BABC"
#> [51] "CABC" "ABBC" "BBBC" "CBBC" "ACBC" "BCBC" "CCBC" "ADBC" "BDBC" "CDBC"
#> [61] "AACC" "BACC" "CACC" "ABCC" "BBCC" "CBCC" "ACCC" "BCCC" "CCCC" "ADCC"
#> [71] "BDCC" "CDCC" "AAAD" "BAAD" "CAAD" "ABAD" "BBAD" "CBAD" "ACAD" "BCAD"
#> [81] "CCAD" "ADAD" "BDAD" "CDAD" "AABD" "BABD" "CABD" "ABBD" "BBBD" "CBBD"
#> [91] "ACBD" "BCBD" "CCBD" "ADBD" "BDBD" "CDBD" "AACD" "BACD" "CACD" "ABCD"
#> [101] "BBCD" "CBCD" "ACCD" "BCCD" "CCCD" "ADCD" "BDCD" "CDCD"
Created on 2020-06-18 by the reprex package (v0.3.0)
expand.grid is your friend here.
A simple solution:
apply(expand.grid(list(
LETTERS[1:3],
LETTERS[1:4],
LETTERS[1:3],
LETTERS[2:4])), 1, paste, collapse = "")
#> [1] "AAAB" "BAAB" "CAAB" "ABAB" "BBAB" "CBAB" "ACAB" "BCAB" "CCAB" "ADAB"
#> [11] "BDAB" "CDAB" "AABB" "BABB" "CABB" "ABBB" "BBBB" "CBBB" "ACBB" "BCBB"
#> [21] "CCBB" "ADBB" "BDBB" "CDBB" "AACB" "BACB" "CACB" "ABCB" "BBCB" "CBCB"
#> [31] "ACCB" "BCCB" "CCCB" "ADCB" "BDCB" "CDCB" "AAAC" "BAAC" "CAAC" "ABAC"
#> [41] "BBAC" "CBAC" "ACAC" "BCAC" "CCAC" "ADAC" "BDAC" "CDAC" "AABC" "BABC"
#> [51] "CABC" "ABBC" "BBBC" "CBBC" "ACBC" "BCBC" "CCBC" "ADBC" "BDBC" "CDBC"
#> [61] "AACC" "BACC" "CACC" "ABCC" "BBCC" "CBCC" "ACCC" "BCCC" "CCCC" "ADCC"
#> [71] "BDCC" "CDCC" "AAAD" "BAAD" "CAAD" "ABAD" "BBAD" "CBAD" "ACAD" "BCAD"
#> [81] "CCAD" "ADAD" "BDAD" "CDAD" "AABD" "BABD" "CABD" "ABBD" "BBBD" "CBBD"
#> [91] "ACBD" "BCBD" "CCBD" "ADBD" "BDBD" "CDBD" "AACD" "BACD" "CACD" "ABCD"
#> [101] "BBCD" "CBCD" "ACCD" "BCCD" "CCCD" "ADCD" "BDCD" "CDCD"
Created on 2020-06-18 by the reprex package (v0.3.0)

Vectorize over all combinations of arguments

Is there a way to vectorize an R function over all combinations of multiple parameters and return the result as a list?
As an example, using Vectorize over rnorm produces the following, but I would like to have a list of vectors corresponding to each combination of the arguments (so it should return a list of 60 vectors instead of just 5):
> vrnorm <- Vectorize(rnorm)
> vrnorm( 10*1:5, mean = 1:4, sd = 1:3)
[[1]]
[1] 1.37858918 -0.85432372 1.87321175 2.08362291 0.02950438 1.67967249
[7] 2.25954748 1.44031251 0.09816078 0.91365201
[[2]]
[1] 1.7717267 1.7961157 2.3291686 2.6114272 2.6228930 -0.2580403
[7] 3.3232109 -0.4652434 -0.4803258 -0.1170871 0.1158350 -1.0902252
[13] -0.6400934 3.6625290 2.5924096 4.5878564 0.7265718 3.2034281
[19] -0.2499768 2.0164275
[[3]]
[1] 5.8251252 3.1089121 2.8893594 2.9079357 1.9308677 4.3359878
[7] -0.3668157 4.9728508 -0.6494110 6.7729562 6.1623976 -0.1696638
[13] 5.4664038 3.8141798 -3.1842879 2.3985010 0.3840465 4.0696628
[19] 4.8217798 3.3135100 4.9028273 3.6193840 4.8861864 3.9871897
[25] -0.1059491 3.8961742 4.8293925 3.8935335 6.3194862 4.7846143
[[4]]
[1] 3.737043 2.849215 4.611868 3.494396 2.909659 4.861474 2.000194 3.343171
[9] 4.019523 3.277575 3.885272 3.331160 4.581551 4.960162 3.061960 5.359514
[17] 4.651848 3.640535 3.612368 4.338019 5.233665 3.585976 4.018191 4.320883
[25] 2.598541 3.519587 5.231375 4.733647 2.493334 2.791483 4.330052 2.498424
[33] 3.317115 3.515012 5.079780 4.720884 3.055191 5.262385 1.939961 4.779480
[[5]]
[1] 4.31697756 0.93754587 3.96698522 -0.03680018 1.94987430 1.73985617
[7] -1.42300550 2.07764933 0.45701395 2.42548257 0.67745524 -2.42054060
[13] 1.14655845 1.60277193 -1.04636658 0.94097335 3.07688803 0.58049012
[19] 1.25812532 1.91613097 -2.95408979 3.00990345 -0.67314868 0.64746260
[25] 1.69640497 0.68493689 2.84261574 1.65290227 4.16990548 -3.30426803
[31] 3.80508273 5.95888355 -0.09021591 3.88157980 -1.19166351 2.70208228
[37] -0.56278834 -0.83943824 -0.86868493 -1.19995506 -2.30275483 1.70435276
[43] 2.67984044 -0.04976799 0.98716782 2.71171575 5.21648742 0.13860495
[49] 1.61038570 0.50679460
Use expand.grid to expand all arguments and create a data frame, and then use mapply.
dat <- expand.grid(n = 10 * 1:5, mean = 1:4, sd = 1:3)
mapply(rnorm, dat$n, dat$mean, dat$sd, SIMPLIFY = FALSE)
You can also use purrr::pmap() as an alternative to mapply
library(purrr)
dat <- expand.grid(n = 10 * 1:5, mean = 1:4, sd = 1:3)
pmap(dat, rnorm)

Dividing components of a vector into several data points in R

I am trying to turn a vector of length n (say, 14), and turn it into a vector of length N (say, 90). For example, my vector is
x<-c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
and I want to turn it into a vector of length 90, by creating 90 equally "spaced" points on this vector- think of x as a function. Is there any way to do that in R?
Something like this?
> x<-c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
> seq(min(x),max(x),length=90)
[1] 2.000000 2.426966 2.853933 3.280899 3.707865 4.134831 4.561798
[8] 4.988764 5.415730 5.842697 6.269663 6.696629 7.123596 7.550562
[15] 7.977528 8.404494 8.831461 9.258427 9.685393 10.112360 10.539326
[22] 10.966292 11.393258 11.820225 12.247191 12.674157 13.101124 13.528090
[29] 13.955056 14.382022 14.808989 15.235955 15.662921 16.089888 16.516854
[36] 16.943820 17.370787 17.797753 18.224719 18.651685 19.078652 19.505618
[43] 19.932584 20.359551 20.786517 21.213483 21.640449 22.067416 22.494382
[50] 22.921348 23.348315 23.775281 24.202247 24.629213 25.056180 25.483146
[57] 25.910112 26.337079 26.764045 27.191011 27.617978 28.044944 28.471910
[64] 28.898876 29.325843 29.752809 30.179775 30.606742 31.033708 31.460674
[71] 31.887640 32.314607 32.741573 33.168539 33.595506 34.022472 34.449438
[78] 34.876404 35.303371 35.730337 36.157303 36.584270 37.011236 37.438202
[85] 37.865169 38.292135 38.719101 39.146067 39.573034 40.000000
>
Try this:
#data
x <- c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
#expected new length
N=90
#number of numbers between 2 numbers
my.length.out=round((N-length(x))/(length(x)-1))+1
#new data
x1 <- unlist(
lapply(1:(length(x)-1), function(i)
seq(x[i],x[i+1],length.out = my.length.out)))
#plot
par(mfrow=c(2,1))
plot(x)
plot(x1)

R: Using for loop on data frame

I have a data frame, deflator.
I want to get a new data frame inflation which can be calculated by:
deflator[i] - deflator[i-4]
----------------------------- * 100
deflator [i - 4]
The data frame deflator has 71 numbers:
> deflator
[1] 0.9628929 0.9596746 0.9747274 0.9832532 0.9851884
[6] 0.9797770 0.9913502 1.0100561 1.0176906 1.0092516
[11] 1.0185932 1.0241043 1.0197975 1.0174097 1.0297328
[16] 1.0297071 1.0313232 1.0244618 1.0347808 1.0480411
[21] 1.0322142 1.0351968 1.0403264 1.0447121 1.0504402
[26] 1.0487097 1.0664664 1.0935239 1.0965951 1.1141851
[31] 1.1033155 1.1234482 1.1333870 1.1188136 1.1336276
[36] 1.1096461 1.1226584 1.1287245 1.1529588 1.1582911
[41] 1.1691221 1.1782178 1.1946234 1.1963453 1.1939922
[46] 1.2118189 1.2227960 1.2140535 1.2228828 1.2314258
[51] 1.2570788 1.2572214 1.2607763 1.2744415 1.2982076
[56] 1.3318808 1.3394186 1.3525902 1.3352815 1.3492751
[61] 1.3593859 1.3368135 1.3642940 1.3538567 1.3658135
[66] 1.3710932 1.3888638 1.4262185 1.4309707 1.4328823
[71] 1.4497201
This is a very tricky question for me.
I tried to do this using a for loop:
> d <- data.frame(deflator)
> for (i in 1:71) {d <-rbind(d,c(delfaotr ))}
I think I might be doing it wrong.
Why use data frames? This is a straightforward vector operation.
inflation = 100 * (deflator[1:67] - deflator[-(1:4)])/deflator[-(1:4)]
I agree with #Fhnuzoag that your example suggests calculations on a numeric vector, not a data frame. Here's an additional way to do your calculations taking advantage of the lag argument in the diff function (with indexes that match those in your question):
lagBy <- 4 # The number of indexes by which to lag
laggedDiff <- diff(deflator, lag = lagBy) # The numerator above
theDenom <- deflator[seq_len(length(deflator) - lagBy)] # The denominator above
inflation <- laggedDiff/theDenom
The first few results are:
head(inflation)
# [1] 0.02315470 0.02094710 0.01705379 0.02725941 0.03299085 0.03008297

Resources