When to use approxfun vs. approx - r

The documentation for approxfun states that it is "often more useful than approx". I'm struggling to get my head around approxfun. When would approxfun be more useful than approx (and when would approx be more useful)?

approx returns the value of the approximated function at (either) specified points or at a given number of points. approxfun returns a function which can then be evaluated at some specific points. If you need the approximation at points that you know at the time of making the approximation, approx will do that for you. If you need a function (in the mathematical sense) which will return the value of the approximation for some argument given later, approxfun is what you need.
Here are some examples.
dat <- data.frame(x=1:10, y=(1:10)^2)
The output from approx and approxfun using this data
> approx(dat$x, dat$y)
$x
[1] 1.000000 1.183673 1.367347 1.551020 1.734694 1.918367 2.102041
[8] 2.285714 2.469388 2.653061 2.836735 3.020408 3.204082 3.387755
[15] 3.571429 3.755102 3.938776 4.122449 4.306122 4.489796 4.673469
[22] 4.857143 5.040816 5.224490 5.408163 5.591837 5.775510 5.959184
[29] 6.142857 6.326531 6.510204 6.693878 6.877551 7.061224 7.244898
[36] 7.428571 7.612245 7.795918 7.979592 8.163265 8.346939 8.530612
[43] 8.714286 8.897959 9.081633 9.265306 9.448980 9.632653 9.816327
[50] 10.000000
$y
[1] 1.000000 1.551020 2.102041 2.653061 3.204082 3.755102
[7] 4.510204 5.428571 6.346939 7.265306 8.183673 9.142857
[13] 10.428571 11.714286 13.000000 14.285714 15.571429 17.102041
[19] 18.755102 20.408163 22.061224 23.714286 25.448980 27.469388
[25] 29.489796 31.510204 33.530612 35.551020 37.857143 40.244898
[31] 42.632653 45.020408 47.408163 49.918367 52.673469 55.428571
[37] 58.183673 60.938776 63.693878 66.775510 69.897959 73.020408
[43] 76.142857 79.265306 82.551020 86.040816 89.530612 93.020408
[49] 96.510204 100.000000
> approxfun(dat$x, dat$y)
function (v)
.C(C_R_approxfun, as.double(x), as.double(y), as.integer(n),
xout = as.double(v), as.integer(length(v)), as.integer(method),
as.double(yleft), as.double(yright), as.double(f), NAOK = TRUE,
PACKAGE = "stats")$xout
<bytecode: 0x05244854>
<environment: 0x030632fc>
More examples of usage:
a <- approx(dat$x, dat$y)
af <- approxfun(dat$x, dat$y)
plot(dat)
points(a, pch=2)
plot(dat)
curve(af, add=TRUE)
or another example where a function is needed:
> uniroot(function(x) {af(x)-4}, interval=c(1,10))
$root
[1] 1.999994
$f.root
[1] -1.736297e-05
$iter
[1] 24
$estim.prec
[1] 6.103516e-05

Related

Split a sequence of numbers into groups of 10 digits using R

I would like for R to read in the first 10,000 digits of Pi and group every 10 digits together
e.g., I want R to read in a sequence
pi <- 3.14159265358979323846264338327950288419716939937510582097...
and would like R to give me a table where each row contains 10 digit:
3141592653
5897932384
6264338327
...
I am new to R and really don't know where to start so any help would be much appreciated!
Thank you in advance
https://rextester.com/OQRM27791
p <- strsplit("314159265358979323846264338327950288419716939937510582097", "")
digits <- p[[1]]
split(digits, ceiling((1:length(digits)) / 10));
Here's one way to do it. It's fully reproducible, so just cut and paste it into your R console. The vector result is the first 10,000 digits of pi, split into 1000 strings of 10 digits.
For this many digits, I have used an online source for the precalculated value of pi. This is read in using readChar and the decimal point is stripped out with gsub. The resulting string is split into individual characters and put in a 1000 * 10 matrix (filled row-wise). The rows are then pasted into strings, giving the result. I have displayed only the first 100 entries of result for clarity of presentation.
pi_url <- "https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt"
pi_char <- gsub("\\.", "", readChar(url, 1e4 + 1))
pi_mat <- matrix(strsplit(pi_char, "")[[1]], byrow = TRUE, ncol = 10)
result <- apply(pi_mat, 1, paste0, collapse = "")
head(result, 100)
#> [1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751"
#> [6] "0582097494" "4592307816" "4062862089" "9862803482" "5342117067"
#> [11] "9821480865" "1328230664" "7093844609" "5505822317" "2535940812"
#> [16] "8481117450" "2841027019" "3852110555" "9644622948" "9549303819"
#> [21] "6442881097" "5665933446" "1284756482" "3378678316" "5271201909"
#> [26] "1456485669" "2346034861" "0454326648" "2133936072" "6024914127"
#> [31] "3724587006" "6063155881" "7488152092" "0962829254" "0917153643"
#> [36] "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
#> [41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854"
#> [46] "8074462379" "9627495673" "5188575272" "4891227938" "1830119491"
#> [51] "2983367336" "2440656643" "0860213949" "4639522473" "7190702179"
#> [56] "8609437027" "7053921717" "6293176752" "3846748184" "6766940513"
#> [61] "2000568127" "1452635608" "2778577134" "2757789609" "1736371787"
#> [66] "2146844090" "1224953430" "1465495853" "7105079227" "9689258923"
#> [71] "5420199561" "1212902196" "0864034418" "1598136297" "7477130996"
#> [76] "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
#> [81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188"
#> [86] "1710100031" "3783875288" "6587533208" "3814206171" "7766914730"
#> [91] "3598253490" "4287554687" "3115956286" "3882353787" "5937519577"
#> [96] "8185778053" "2171226806" "6130019278" "7661119590" "9216420198"
Created on 2020-07-23 by the reprex package (v0.3.0)
We can use str_extract:
pi <- readLines("https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt")
library(stringr)
t <- unlist(str_extract_all(sub("\\.","", pi), "\\d{10}"))
t[1:100]
[1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751" "0582097494" "4592307816" "4062862089"
[9] "9862803482" "5342117067" "9821480865" "1328230664" "7093844609" "5505822317" "2535940812" "8481117450"
[17] "2841027019" "3852110555" "9644622948" "9549303819" "6442881097" "5665933446" "1284756482" "3378678316"
[25] "5271201909" "1456485669" "2346034861" "0454326648" "2133936072" "6024914127" "3724587006" "6063155881"
[33] "7488152092" "0962829254" "0917153643" "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
[41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854" "8074462379" "9627495673" "5188575272"
[49] "4891227938" "1830119491" "2983367336" "2440656643" "0860213949" "4639522473" "7190702179" "8609437027"
[57] "7053921717" "6293176752" "3846748184" "6766940513" "2000568127" "1452635608" "2778577134" "2757789609"
[65] "1736371787" "2146844090" "1224953430" "1465495853" "7105079227" "9689258923" "5420199561" "1212902196"
[73] "0864034418" "1598136297" "7477130996" "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
[81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188" "1710100031" "3783875288" "6587533208"
[89] "3814206171" "7766914730" "3598253490" "4287554687" "3115956286" "3882353787" "5937519577" "8185778053"
[97] "2171226806" "6130019278" "7661119590" "9216420198"

How to output in R all possible deviations of a word for a fixed distance value?

I have a word and want to output in R all possible deviatons (replacement, substitution, insertion) for a fixed distance value into a vector.
For instance, the word "Cat" and a fixed distance value of 1 results in a vector with the elements "cot", "at", ...
I'm going to assume that you want all actual words, not just permutations of the characters with an edit distance of 1 that would include non-words such as "zat".
We can do this using adist() to compute the edit distance between your target word and all eligible English words, taken from some word list. Here, I used the English syllable dictionary from the quanteda package (you did tag this question as quanteda after all) but this could have been any vector of English dictionary words from any other source as well.
To narrow things down, we first exclude all words that are different in length from the target word by your distance value.
distfn <- function(word, distance = 1) {
# select eligible words for efficiency
eligible_y_words <- names(quanteda::data_int_syllables)
wordlengths <- nchar(eligible_y_words)
eligible_y_words <- eligible_y_words[wordlengths >= (nchar(word) - distance) &
wordlengths <= (nchar(word) + distance)]
# compute Levenshtein distance
distances <- utils::adist(word, eligible_y_words)[1, ]
# return only those for the requested distance value
eligible_y_words[distances == distance]
}
distfn("cat", 1)
## [1] "at" "bat" "ca" "cab" "cac" "cad" "cai" "cal" "cam" "can"
## [11] "cant" "cao" "cap" "caq" "car" "cart" "cas" "cast" "cate" "cato"
## [21] "cats" "catt" "cau" "caw" "cay" "chat" "coat" "cot" "ct" "cut"
## [31] "dat" "eat" "fat" "gat" "hat" "kat" "lat" "mat" "nat" "oat"
## [41] "pat" "rat" "sat" "scat" "tat" "vat" "wat"
To demonstrate how this works on longer words, with alternative distance values.
distfn("coffee", 1)
## [1] "caffee" "coffeen" "coffees" "coffel" "coffer" "coffey" "cuffee"
## [8] "toffee"
distfn("coffee", 2)
## [1] "caffey" "calfee" "chafee" "chaffee" "cofer" "coffee's"
## [7] "coffelt" "coffers" "coffin" "cofide" "cohee" "coiffe"
## [13] "coiffed" "colee" "colfer" "combee" "comfed" "confer"
## [19] "conlee" "coppee" "cottee" "coulee" "coutee" "cuffe"
## [25] "cuffed" "diffee" "duffee" "hoffer" "jaffee" "joffe"
## [31] "mcaffee" "moffet" "noffke" "offen" "offer" "roffe"
## [37] "scoffed" "soffel" "soffer" "yoffie"
(Yes, according to the CMU pronunciation dictionary, those are all actual words...)
EDIT: Make for all permutations of letters, not just actual words
This involves permutations from the alphabet that have the fixed edit distances from the input word. Here I've done it not particular efficiently by forming all permutations of letters within the eligible ranges, and then computing their edit distance from the target word, and then selecting them. So it's a variation of above, except instead of a dictionary, it uses permuted words.
distfn2 <- function(word, distance = 1) {
result <- character()
# start with deletions
for (i in max((nchar(word) - distance), 0):(nchar(word) - 1)) {
result <- c(
result,
combn(unlist(strsplit(word, "", fixed = TRUE)), i,
paste,
collapse = "", simplify = TRUE
)
)
}
# now for changes and insertions
for (i in (nchar(word)):(nchar(word) + distance)) {
# all possible edits
edits <- apply(expand.grid(rep(list(letters), i)),
1, paste0,
collapse = ""
)
# remove original word
edits <- edits[edits != word]
# get all distances, add to result
distances <- utils::adist(word, edits)[1, ]
result <- c(result, edits[distances == distance])
}
result
}
For the OP example:
distfn2("cat", 1)
## [1] "ca" "ct" "at" "caa" "cab" "cac" "cad" "cae" "caf" "cag"
## [11] "cah" "cai" "caj" "cak" "cal" "cam" "can" "cao" "cap" "caq"
## [21] "car" "cas" "aat" "bat" "dat" "eat" "fat" "gat" "hat" "iat"
## [31] "jat" "kat" "lat" "mat" "nat" "oat" "pat" "qat" "rat" "sat"
## [41] "tat" "uat" "vat" "wat" "xat" "yat" "zat" "cbt" "cct" "cdt"
## [51] "cet" "cft" "cgt" "cht" "cit" "cjt" "ckt" "clt" "cmt" "cnt"
## [61] "cot" "cpt" "cqt" "crt" "cst" "ctt" "cut" "cvt" "cwt" "cxt"
## [71] "cyt" "czt" "cau" "cav" "caw" "cax" "cay" "caz" "cata" "catb"
## [81] "catc" "catd" "cate" "catf" "catg" "cath" "cati" "catj" "catk" "catl"
## [91] "catm" "catn" "cato" "catp" "catq" "catr" "cats" "caat" "cbat" "acat"
## [101] "bcat" "ccat" "dcat" "ecat" "fcat" "gcat" "hcat" "icat" "jcat" "kcat"
## [111] "lcat" "mcat" "ncat" "ocat" "pcat" "qcat" "rcat" "scat" "tcat" "ucat"
## [121] "vcat" "wcat" "xcat" "ycat" "zcat" "cdat" "ceat" "cfat" "cgat" "chat"
## [131] "ciat" "cjat" "ckat" "clat" "cmat" "cnat" "coat" "cpat" "cqat" "crat"
## [141] "csat" "ctat" "cuat" "cvat" "cwat" "cxat" "cyat" "czat" "cabt" "cact"
## [151] "cadt" "caet" "caft" "cagt" "caht" "cait" "cajt" "cakt" "calt" "camt"
## [161] "cant" "caot" "capt" "caqt" "cart" "cast" "catt" "caut" "cavt" "cawt"
## [171] "caxt" "cayt" "cazt" "catu" "catv" "catw" "catx" "caty" "catz"
Also works with other edit distances, although it becomes very slow for longer words.
d2 <- distfn2("cat", 2)
set.seed(100)
c(head(d2, 50), sample(d2, 50), tail(d2, 50))
## [1] "c" "a" "t" "ca" "ct" "at" "aaa" "baa"
## [9] "daa" "eaa" "faa" "gaa" "haa" "iaa" "jaa" "kaa"
## [17] "laa" "maa" "naa" "oaa" "paa" "qaa" "raa" "saa"
## [25] "taa" "uaa" "vaa" "waa" "xaa" "yaa" "zaa" "cba"
## [33] "aca" "bca" "cca" "dca" "eca" "fca" "gca" "hca"
## [41] "ica" "jca" "kca" "lca" "mca" "nca" "oca" "pca"
## [49] "qca" "rca" "cnts" "cian" "pcatb" "cqo" "uawt" "hazt"
## [57] "cpxat" "aaet" "ckata" "caod" "ncatl" "qcamt" "cdtp" "qajt"
## [65] "bckat" "qcatr" "cqah" "rcbt" "cvbt" "bbcat" "vcaz" "ylcat"
## [73] "cahz" "jcgat" "mant" "jatd" "czlat" "cbamt" "cajta" "cafp"
## [81] "cizt" "cmaut" "qwat" "jcazt" "hdcat" "ucant" "hate" "cajtl"
## [89] "caaty" "cix" "nmat" "cajit" "cmnat" "caobt" "catoi" "ncau"
## [97] "ucoat" "ncamt" "jath" "oats" "chatz" "ciatz" "cjatz" "ckatz"
## [105] "clatz" "cmatz" "cnatz" "coatz" "cpatz" "cqatz" "cratz" "csatz"
## [113] "ctatz" "cuatz" "cvatz" "cwatz" "cxatz" "cyatz" "czatz" "cabtz"
## [121] "cactz" "cadtz" "caetz" "caftz" "cagtz" "cahtz" "caitz" "cajtz"
## [129] "caktz" "caltz" "camtz" "cantz" "caotz" "captz" "caqtz" "cartz"
## [137] "castz" "cattz" "cautz" "cavtz" "cawtz" "caxtz" "caytz" "caztz"
## [145] "catuz" "catvz" "catwz" "catxz" "catyz" "catzz"
This could be speeded up by less brute force formation of all permutations and then applying adist() to them - it could consist of changes or insertions of known edit distances generated algorithmically from letters.

Vectorize over all combinations of arguments

Is there a way to vectorize an R function over all combinations of multiple parameters and return the result as a list?
As an example, using Vectorize over rnorm produces the following, but I would like to have a list of vectors corresponding to each combination of the arguments (so it should return a list of 60 vectors instead of just 5):
> vrnorm <- Vectorize(rnorm)
> vrnorm( 10*1:5, mean = 1:4, sd = 1:3)
[[1]]
[1] 1.37858918 -0.85432372 1.87321175 2.08362291 0.02950438 1.67967249
[7] 2.25954748 1.44031251 0.09816078 0.91365201
[[2]]
[1] 1.7717267 1.7961157 2.3291686 2.6114272 2.6228930 -0.2580403
[7] 3.3232109 -0.4652434 -0.4803258 -0.1170871 0.1158350 -1.0902252
[13] -0.6400934 3.6625290 2.5924096 4.5878564 0.7265718 3.2034281
[19] -0.2499768 2.0164275
[[3]]
[1] 5.8251252 3.1089121 2.8893594 2.9079357 1.9308677 4.3359878
[7] -0.3668157 4.9728508 -0.6494110 6.7729562 6.1623976 -0.1696638
[13] 5.4664038 3.8141798 -3.1842879 2.3985010 0.3840465 4.0696628
[19] 4.8217798 3.3135100 4.9028273 3.6193840 4.8861864 3.9871897
[25] -0.1059491 3.8961742 4.8293925 3.8935335 6.3194862 4.7846143
[[4]]
[1] 3.737043 2.849215 4.611868 3.494396 2.909659 4.861474 2.000194 3.343171
[9] 4.019523 3.277575 3.885272 3.331160 4.581551 4.960162 3.061960 5.359514
[17] 4.651848 3.640535 3.612368 4.338019 5.233665 3.585976 4.018191 4.320883
[25] 2.598541 3.519587 5.231375 4.733647 2.493334 2.791483 4.330052 2.498424
[33] 3.317115 3.515012 5.079780 4.720884 3.055191 5.262385 1.939961 4.779480
[[5]]
[1] 4.31697756 0.93754587 3.96698522 -0.03680018 1.94987430 1.73985617
[7] -1.42300550 2.07764933 0.45701395 2.42548257 0.67745524 -2.42054060
[13] 1.14655845 1.60277193 -1.04636658 0.94097335 3.07688803 0.58049012
[19] 1.25812532 1.91613097 -2.95408979 3.00990345 -0.67314868 0.64746260
[25] 1.69640497 0.68493689 2.84261574 1.65290227 4.16990548 -3.30426803
[31] 3.80508273 5.95888355 -0.09021591 3.88157980 -1.19166351 2.70208228
[37] -0.56278834 -0.83943824 -0.86868493 -1.19995506 -2.30275483 1.70435276
[43] 2.67984044 -0.04976799 0.98716782 2.71171575 5.21648742 0.13860495
[49] 1.61038570 0.50679460
Use expand.grid to expand all arguments and create a data frame, and then use mapply.
dat <- expand.grid(n = 10 * 1:5, mean = 1:4, sd = 1:3)
mapply(rnorm, dat$n, dat$mean, dat$sd, SIMPLIFY = FALSE)
You can also use purrr::pmap() as an alternative to mapply
library(purrr)
dat <- expand.grid(n = 10 * 1:5, mean = 1:4, sd = 1:3)
pmap(dat, rnorm)

Dividing components of a vector into several data points in R

I am trying to turn a vector of length n (say, 14), and turn it into a vector of length N (say, 90). For example, my vector is
x<-c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
and I want to turn it into a vector of length 90, by creating 90 equally "spaced" points on this vector- think of x as a function. Is there any way to do that in R?
Something like this?
> x<-c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
> seq(min(x),max(x),length=90)
[1] 2.000000 2.426966 2.853933 3.280899 3.707865 4.134831 4.561798
[8] 4.988764 5.415730 5.842697 6.269663 6.696629 7.123596 7.550562
[15] 7.977528 8.404494 8.831461 9.258427 9.685393 10.112360 10.539326
[22] 10.966292 11.393258 11.820225 12.247191 12.674157 13.101124 13.528090
[29] 13.955056 14.382022 14.808989 15.235955 15.662921 16.089888 16.516854
[36] 16.943820 17.370787 17.797753 18.224719 18.651685 19.078652 19.505618
[43] 19.932584 20.359551 20.786517 21.213483 21.640449 22.067416 22.494382
[50] 22.921348 23.348315 23.775281 24.202247 24.629213 25.056180 25.483146
[57] 25.910112 26.337079 26.764045 27.191011 27.617978 28.044944 28.471910
[64] 28.898876 29.325843 29.752809 30.179775 30.606742 31.033708 31.460674
[71] 31.887640 32.314607 32.741573 33.168539 33.595506 34.022472 34.449438
[78] 34.876404 35.303371 35.730337 36.157303 36.584270 37.011236 37.438202
[85] 37.865169 38.292135 38.719101 39.146067 39.573034 40.000000
>
Try this:
#data
x <- c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
#expected new length
N=90
#number of numbers between 2 numbers
my.length.out=round((N-length(x))/(length(x)-1))+1
#new data
x1 <- unlist(
lapply(1:(length(x)-1), function(i)
seq(x[i],x[i+1],length.out = my.length.out)))
#plot
par(mfrow=c(2,1))
plot(x)
plot(x1)

Draw a plot in R as the function varies

I have this code named prob.
prob<-function(k){
start=1
for(i in 1:k-1){
cumm=start*(1-i/365)
start=cumm
}
return(start)
}
Then, I created this function, opp.
opp<-function(a){
1-prob(a)
}
Now, I want to plot opp from 1 to 25.
For example, I want to see opp(1), opp(2), opp(3), opp(4)...., opp(25)
I have tried
plot(opp(a=x),from=1,to=25)
or
plot(opp,from=1,to=25)
or
plot(1:25,opp[1:25])
...
none of these work...
so frustrated... please help!
I would fix the opp function to be vectorized before plotting.
You can just Vectorize(opp) for a quick fix:
prob<-function(k){
start=1
for(i in 1:k-1){
cumm=start*(1-i/365)
start=cumm
}
return(start)
}
opp<-function(a){
1-prob(a)
}
vopp <- Vectorize(opp)
vopp(1:25)
# [1] 0.000000000 0.002739726 0.008204166 0.016355912 0.027135574 0.040462484
# [7] 0.056235703 0.074335292 0.094623834 0.116948178 0.141141378 0.167024789
# [13] 0.194410275 0.223102512 0.252901320 0.283604005 0.315007665 0.346911418
# [19] 0.379118526 0.411438384 0.443688335 0.475695308 0.507297234 0.538344258
# [25] 0.568699704
Alternatively, doing it a more "R" way would be:
opp2 <- function(k) 1 - cumprod(1 - (k - 1) / 365)
opp2(1:25)
# [1] 0.000000000 0.002739726 0.008204166 0.016355912 0.027135574 0.040462484
# [7] 0.056235703 0.074335292 0.094623834 0.116948178 0.141141378 0.167024789
# [13] 0.194410275 0.223102512 0.252901320 0.283604005 0.315007665 0.346911418
# [19] 0.379118526 0.411438384 0.443688335 0.475695308 0.507297234 0.538344258
# [25] 0.568699704
And both give me this
plot(opp2(1:25))
plot(vopp(1:25))

Resources