Related
I have this character vector:
dput(t$line)
c("0304", "0305", "0306", "0308", "0311", "0313", "0314", "0316",
"0318", "0321", "0322", "0323", "0324", "0326", "0327", "0330",
"0333", "0337", "0338", "0339", "0342", "0341", "0344", "0346",
"0347", "0348", "0349", "0350", "0352", "0353", "0357", "0359",
"0360", "0362", "0363", "0364", "0365", "0367", "0371", "0370",
"0373", "0375", "0378", "0380", "0381", "0385", "0386", "0387",
"0391", "0395", "0394", "0397", "0398", "0399", "0400", "0402",
"0404", "0405", "0406", "0408", "0412", "0416", "0419", "0423",
"0424", "0425", "0426", "0428", "0429", "0432", "0433", "0436",
"0435", "0439", "0437", "0440", "0441")
The numbers it contains are not completely continuous. I'd like to make them continuous, while preserving the leading zero or zeros where needed. I've come up with this solution:
paste("0", seq(as.numeric(t$line[1]), as.numeric(t$line[1]) + length(t$line), 1), sep = "")
[1] "0304" "0305" "0306" "0307" "0308" "0309" "0310" "0311" "0312" "0313" "0314" "0315" "0316" "0317" "0318" "0319" "0320"
[18] "0321" "0322" "0323" "0324" "0325" "0326" "0327" "0328" "0329" "0330" "0331" "0332" "0333" "0334" "0335" "0336" "0337"
[35] "0338" "0339" "0340" "0341" "0342" "0343" "0344" "0345" "0346" "0347" "0348" "0349" "0350" "0351" "0352" "0353" "0354"
[52] "0355" "0356" "0357" "0358" "0359" "0360" "0361" "0362" "0363" "0364" "0365" "0366" "0367" "0368" "0369" "0370" "0371"
[69] "0372" "0373" "0374" "0375" "0376" "0377" "0378" "0379" "0380" "0381"
This works okay as long as there is exactly one 0 to be added. There may however be more than one leading zero or none at all. How can the sequence be made continuous with appropriate leading zeros?
One stringr option could be:
str_pad(seq.int(min(as.numeric(x)), length.out = length(x)), 4, "left", "0")
[1] "0304" "0305" "0306" "0307" "0308" "0309" "0310" "0311" "0312" "0313" "0314" "0315" "0316"
[14] "0317" "0318" "0319" "0320" "0321" "0322" "0323" "0324" "0325" "0326" "0327" "0328" "0329"
[27] "0330" "0331" "0332" "0333" "0334" "0335" "0336" "0337" "0338" "0339" "0340" "0341" "0342"
[40] "0343" "0344" "0345" "0346" "0347" "0348" "0349" "0350" "0351" "0352" "0353" "0354" "0355"
[53] "0356" "0357" "0358" "0359" "0360" "0361" "0362" "0363" "0364" "0365" "0366" "0367" "0368"
[66] "0369" "0370" "0371" "0372" "0373" "0374" "0375" "0376" "0377" "0378" "0379" "0380"
A more general solution that takes into account the maximum length of the entries and therefore implictly the number of leading zeros:
t$line2 <- c("000517","00524")
Cont.PadZero <- function(vec) sprintf(paste0("%0", max(nchar(vec)), "d"), seq.int(min(as.numeric(vec)), max(as.numeric(vec))))
Cont.PadZero(t$line2)
[1] "000517" "000518" "000519" "000520" "000521" "000522" "000523" "000524"
You want a continuous sequence of length(x) starting at min(x), where nchar of the resulting elements is identical to that of x.
Use sprintf instead of paste0 to format leading zeros. nchar(x)[1] gives the length to which (occasional) padding with zeros is required. If it's not safe that the lengths are equal use max(nchar(x)), but that's slower.
Since x[1] does not necessarily have to be the minimum you may want to use min(as.numeric(x)) as starting point. When you use seq, it's end point should be min(as.numeric(x)) + length(x) - 1 (because the min is already the first element). Or use length.out=length(x) which appears to be faster, combined with seq.int even faster.
sprintf(paste0("%0", nchar(x)[1], "d"), seq.int(min(as.numeric(x)), length.out=length(x)))
# [1] "0304" "0305" "0306" "0307" "0308" "0309" "0310" "0311" "0312" "0313" "0314" "0315"
# [13] "0316" "0317" "0318" "0319" "0320" "0321" "0322" "0323" "0324" "0325" "0326" "0327"
# [25] "0328" "0329" "0330" "0331" "0332" "0333" "0334" "0335" "0336" "0337" "0338" "0339"
# [37] "0340" "0341" "0342" "0343" "0344" "0345" "0346" "0347" "0348" "0349" "0350" "0351"
# [49] "0352" "0353" "0354" "0355" "0356" "0357" "0358" "0359" "0360" "0361" "0362" "0363"
# [61] "0364" "0365" "0366" "0367" "0368" "0369" "0370" "0371" "0372" "0373" "0374" "0375"
# [73] "0376" "0377" "0378" "0379" "0380"
Another option is using colon :, but seq.int above appears to be faster (see benchmark below).
sprintf(paste0("%0", nchar(x)[1], "d"), 0:(length(x) - 1) + min(as.numeric(x)))
NB: To complete the original vector by imputing missings, you may do:
sprintf(paste0("%0", max(nchar(x)), "d"), do.call(`:`, as.list(range(as.numeric(x)))))
# [1] "0304" "0305" "0306" "0307" "0308" "0309" "0310" "0311" "0312" "0313" "0314"
# [12] "0315" "0316" "0317" "0318" "0319" "0320" "0321" "0322" "0323" "0324" "0325"
# [23] "0326" "0327" "0328" "0329" "0330" "0331" "0332" "0333" "0334" "0335" "0336"
# [34] "0337" "0338" "0339" "0340" "0341" "0342" "0343" "0344" "0345" "0346" "0347"
# [45] "0348" "0349" "0350" "0351" "0352" "0353" "0354" "0355" "0356" "0357" "0358"
# [56] "0359" "0360" "0361" "0362" "0363" "0364" "0365" "0366" "0367" "0368" "0369"
# [67] "0370" "0371" "0372" "0373" "0374" "0375" "0376" "0377" "0378" "0379" "0380"
# [78] "0381" "0382" "0383" "0384" "0385" "0386" "0387" "0388" "0389" "0390" "0391"
# [89] "0392" "0393" "0394" "0395" "0396" "0397" "0398" "0399" "0400" "0401" "0402"
# [100] "0403" "0404" "0405" "0406" "0407" "0408" "0409" "0410" "0411" "0412" "0413"
# [111] "0414" "0415" "0416" "0417" "0418" "0419" "0420" "0421" "0422" "0423" "0424"
# [122] "0425" "0426" "0427" "0428" "0429" "0430" "0431" "0432" "0433" "0434" "0435"
# [133] "0436" "0437" "0438" "0439" "0440" "0441"
Benchmark
f1 <- function() sprintf(paste0("%0", max(nchar(x)), "d"),
seq(min(as.numeric(x)), min(as.numeric(x)) + length(x) - 1))
f2 <- function() sprintf(paste0("%0", max(nchar(x)), "d"),
seq(min(as.numeric(x)), length.out=length(x)))
f3 <- function() sprintf(paste0("%0", max(nchar(x)), "d"),
seq.int(min(as.numeric(x)), length.out=length(x)))
f31 <- function() sprintf(paste0("%0", nchar(x[1]), "d"),
seq.int(min(as.numeric(x)), length.out=length(x)))
f4 <- function() sprintf(paste0("%0", nchar(x[1]), "d"),
0:(length(x) - 1) + min(as.numeric(x)))
f5 <- function() stringr::str_pad(seq.int(min(as.numeric(x)),
length.out=length(x)),
nchar(x[1]), "left", "0")
set.seed(5789)
x <- sample(sprintf("%05d", 1:99999))
microbenchmark::microbenchmark(seq_to=f1(), seq_len=f2(), seq.int=f3(),
seq.int1=f31(), colon=f4(), stringr=f5())
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# seq_to 104.22119 106.83928 108.92791 107.81301 109.68406 124.35686 100 f
# seq_len 87.14385 89.89180 92.34962 90.97192 92.09823 110.59426 100 d
# seq.int 85.72324 87.93885 89.91353 89.03327 90.32758 113.41480 100 c
# seq.int1 59.54312 61.63065 62.86618 62.47707 63.53334 76.33471 100 a
# colon 60.94867 63.16109 64.73306 63.88925 64.79997 81.63646 100 b
# stringr 99.08452 101.56649 104.01522 102.74420 104.20269 158.30948 100 e
I am writing a function that takes in a start and end day in the format of dhhmmss (day-hour-minutes-second) and calculates the length of the Palindrome numbers between the start and end dhhmmss.
By defintion the start hhmmss is 000000 and end hhmmss is 235959.
My function has to take only the start d and end d and calculate the length of the Palindrome numbers between these two
Here's how I did it
Reverse.numberAsString <- function(x){ # Reverse using string manipulation
x.out <- as.character(x) # convert number to a character string
x.out <- unlist(strsplit(x.out, '')) # break the string up into a vector
x.out <- rev(x.out) # reverse it
x.out <- paste(x.out, collapse='') # join it back together
x.out <- as.numeric(x.out) # turn it back to a number
return(x.out)
}
is.Palindrome <- function(x){
x == sapply(x,Reverse.numberAsString)
}
palindrom_fun <- function(n1, n2){
if (n1 > n2) { print('n1 cannot be > n2')
} else {
n1.mod <- as.numeric(paste(c(n1, "000000"), collapse = ""))
n2.mod <- as.numeric(paste(c(n2, "235959"), collapse = ""))
x <- seq(from = n1.mod, to = n2.mod, by = 1)
palindrome_number <- x[is.Palindrome(x)]
length.palindrom <- length(palindrome_number)
return(length.palindrom)
}
}
palindrom_fun(1, 2)
# 1236
However, the above function will not work if n1 = 0 and n1 = 1 because of the line
n1.mod <- as.numeric(paste(c(n1, "000000"), collapse = ""))
n2.mod <- as.numeric(paste(c(n2, "235959"), collapse = ""))
since R is not able to create a sequence of number from 0000000 to 1235959. How can I get my function to work for this case?
You may compare head and reversed tail of character vectors using : (since head and tail are slow). For the desired sequence you may use sprintf to generate leading zeros.
isPalindrome <- Vectorize(function(x) {
s <- el(strsplit(as.character(x), ""))
ll <- length(s)
l2 <- pmax(floor(ll / 2), 1)
# out <- all(head(s, l) == rev(tail(s, l))) ## slower
out <- all(s[1:l2] == s[ll:(ll - l2 + 1)])
return(out)
})
## Test
x <- c("0000000", "1123456", "1231321", "0000", "1234", "11", "12", "1")
isPalindrome(x)
# 0000000 1123456 1231321 0000 1234 11 12 1
# TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE
In the following palindromFun function I'll add the actual palindroms as attributes so that they are being returned by the function. (To switch off this behavior just comment out the line with the ## mark).
palindromFun <- function(n1, n2) {
if (n1 > n2) {
print('n1 cannot be > n2')
} else {
tm <- sprintf("%06d", 0:235959)
dy <- n1:n2
r <- paste0(rep(dy, each=length(tm)), tm)
pd <- isPalindrome(r)
out <- sum(pd)
out <- `attr<-`(out, "palindroms", r[pd]) ## mark
return(out)
}
}
Result 1
r1 <- palindromFun(n1=0, n2=1)
r1
# [1] 472
# attr(,"palindroms")
# [1] "0000000" "0001000" "0002000" "0003000" "0004000" "0005000" "0006000"
# [8] "0007000" "0008000" "0009000" "0010100" "0011100" "0012100" "0013100"
# [15] "0014100" "0015100" "0016100" "0017100" "0018100" "0019100" "0020200"
# [22] "0021200" "0022200" "0023200" "0024200" "0025200" "0026200" "0027200"
# [29] "0028200" "0029200" "0030300" "0031300" "0032300" "0033300" "0034300"
# [36] "0035300" "0036300" "0037300" "0038300" "0039300" "0040400" "0041400"
# [43] "0042400" "0043400" "0044400" "0045400" "0046400" "0047400" "0048400"
# [50] "0049400" "0050500" "0051500" "0052500" "0053500" "0054500" "0055500"
# [57] "0056500" "0057500" "0058500" "0059500" "0060600" "0061600" "0062600"
# [64] "0063600" "0064600" "0065600" "0066600" "0067600" "0068600" "0069600"
# [71] "0070700" "0071700" "0072700" "0073700" "0074700" "0075700" "0076700"
# [78] "0077700" "0078700" "0079700" "0080800" "0081800" "0082800" "0083800"
# [85] "0084800" "0085800" "0086800" "0087800" "0088800" "0089800" "0090900"
# [92] "0091900" "0092900" "0093900" "0094900" "0095900" "0096900" "0097900"
# [99] "0098900" "0099900" "0100010" "0101010" "0102010" "0103010" "0104010"
# [106] "0105010" "0106010" "0107010" "0108010" "0109010" "0110110" "0111110"
# [113] "0112110" "0113110" "0114110" "0115110" "0116110" "0117110" "0118110"
# [120] "0119110" "0120210" "0121210" "0122210" "0123210" "0124210" "0125210"
# [127] "0126210" "0127210" "0128210" "0129210" "0130310" "0131310" "0132310"
# [134] "0133310" "0134310" "0135310" "0136310" "0137310" "0138310" "0139310"
# [141] "0140410" "0141410" "0142410" "0143410" "0144410" "0145410" "0146410"
# [148] "0147410" "0148410" "0149410" "0150510" "0151510" "0152510" "0153510"
# [155] "0154510" "0155510" "0156510" "0157510" "0158510" "0159510" "0160610"
# [162] "0161610" "0162610" "0163610" "0164610" "0165610" "0166610" "0167610"
# [169] "0168610" "0169610" "0170710" "0171710" "0172710" "0173710" "0174710"
# [176] "0175710" "0176710" "0177710" "0178710" "0179710" "0180810" "0181810"
# [183] "0182810" "0183810" "0184810" "0185810" "0186810" "0187810" "0188810"
# [190] "0189810" "0190910" "0191910" "0192910" "0193910" "0194910" "0195910"
# [197] "0196910" "0197910" "0198910" "0199910" "0200020" "0201020" "0202020"
# [204] "0203020" "0204020" "0205020" "0206020" "0207020" "0208020" "0209020"
# [211] "0210120" "0211120" "0212120" "0213120" "0214120" "0215120" "0216120"
# [218] "0217120" "0218120" "0219120" "0220220" "0221220" "0222220" "0223220"
# [225] "0224220" "0225220" "0226220" "0227220" "0228220" "0229220" "0230320"
# [232] "0231320" "0232320" "0233320" "0234320" "0235320" "1000001" "1001001"
# [239] "1002001" "1003001" "1004001" "1005001" "1006001" "1007001" "1008001"
# [246] "1009001" "1010101" "1011101" "1012101" "1013101" "1014101" "1015101"
# [253] "1016101" "1017101" "1018101" "1019101" "1020201" "1021201" "1022201"
# [260] "1023201" "1024201" "1025201" "1026201" "1027201" "1028201" "1029201"
# [267] "1030301" "1031301" "1032301" "1033301" "1034301" "1035301" "1036301"
# [274] "1037301" "1038301" "1039301" "1040401" "1041401" "1042401" "1043401"
# [281] "1044401" "1045401" "1046401" "1047401" "1048401" "1049401" "1050501"
# [288] "1051501" "1052501" "1053501" "1054501" "1055501" "1056501" "1057501"
# [295] "1058501" "1059501" "1060601" "1061601" "1062601" "1063601" "1064601"
# [302] "1065601" "1066601" "1067601" "1068601" "1069601" "1070701" "1071701"
# [309] "1072701" "1073701" "1074701" "1075701" "1076701" "1077701" "1078701"
# [316] "1079701" "1080801" "1081801" "1082801" "1083801" "1084801" "1085801"
# [323] "1086801" "1087801" "1088801" "1089801" "1090901" "1091901" "1092901"
# [330] "1093901" "1094901" "1095901" "1096901" "1097901" "1098901" "1099901"
# [337] "1100011" "1101011" "1102011" "1103011" "1104011" "1105011" "1106011"
# [344] "1107011" "1108011" "1109011" "1110111" "1111111" "1112111" "1113111"
# [351] "1114111" "1115111" "1116111" "1117111" "1118111" "1119111" "1120211"
# [358] "1121211" "1122211" "1123211" "1124211" "1125211" "1126211" "1127211"
# [365] "1128211" "1129211" "1130311" "1131311" "1132311" "1133311" "1134311"
# [372] "1135311" "1136311" "1137311" "1138311" "1139311" "1140411" "1141411"
# [379] "1142411" "1143411" "1144411" "1145411" "1146411" "1147411" "1148411"
# [386] "1149411" "1150511" "1151511" "1152511" "1153511" "1154511" "1155511"
# [393] "1156511" "1157511" "1158511" "1159511" "1160611" "1161611" "1162611"
# [400] "1163611" "1164611" "1165611" "1166611" "1167611" "1168611" "1169611"
# [407] "1170711" "1171711" "1172711" "1173711" "1174711" "1175711" "1176711"
# [414] "1177711" "1178711" "1179711" "1180811" "1181811" "1182811" "1183811"
# [421] "1184811" "1185811" "1186811" "1187811" "1188811" "1189811" "1190911"
# [428] "1191911" "1192911" "1193911" "1194911" "1195911" "1196911" "1197911"
# [435] "1198911" "1199911" "1200021" "1201021" "1202021" "1203021" "1204021"
# [442] "1205021" "1206021" "1207021" "1208021" "1209021" "1210121" "1211121"
# [449] "1212121" "1213121" "1214121" "1215121" "1216121" "1217121" "1218121"
# [456] "1219121" "1220221" "1221221" "1222221" "1223221" "1224221" "1225221"
# [463] "1226221" "1227221" "1228221" "1229221" "1230321" "1231321" "1232321"
# [470] "1233321" "1234321" "1235321"
Result 2
r2 <- palindromFun(n1=0, n2=2)
r2
# [1] 708
# attr(,"palindroms")
# [1] "0000000" "0001000" "0002000" "0003000" "0004000" "0005000" "0006000"
# [8] "0007000" "0008000" "0009000" "0010100" "0011100" "0012100" "0013100"
# [15] "0014100" "0015100" "0016100" "0017100" "0018100" "0019100" "0020200"
# [22] "0021200" "0022200" "0023200" "0024200" "0025200" "0026200" "0027200"
# [29] "0028200" "0029200" "0030300" "0031300" "0032300" "0033300" "0034300"
# [36] "0035300" "0036300" "0037300" "0038300" "0039300" "0040400" "0041400"
# [43] "0042400" "0043400" "0044400" "0045400" "0046400" "0047400" "0048400"
# [50] "0049400" "0050500" "0051500" "0052500" "0053500" "0054500" "0055500"
# [57] "0056500" "0057500" "0058500" "0059500" "0060600" "0061600" "0062600"
# [64] "0063600" "0064600" "0065600" "0066600" "0067600" "0068600" "0069600"
# [71] "0070700" "0071700" "0072700" "0073700" "0074700" "0075700" "0076700"
# [78] "0077700" "0078700" "0079700" "0080800" "0081800" "0082800" "0083800"
# [85] "0084800" "0085800" "0086800" "0087800" "0088800" "0089800" "0090900"
# [92] "0091900" "0092900" "0093900" "0094900" "0095900" "0096900" "0097900"
# [99] "0098900" "0099900" "0100010" "0101010" "0102010" "0103010" "0104010"
# [106] "0105010" "0106010" "0107010" "0108010" "0109010" "0110110" "0111110"
# [113] "0112110" "0113110" "0114110" "0115110" "0116110" "0117110" "0118110"
# [120] "0119110" "0120210" "0121210" "0122210" "0123210" "0124210" "0125210"
# [127] "0126210" "0127210" "0128210" "0129210" "0130310" "0131310" "0132310"
# [134] "0133310" "0134310" "0135310" "0136310" "0137310" "0138310" "0139310"
# [141] "0140410" "0141410" "0142410" "0143410" "0144410" "0145410" "0146410"
# [148] "0147410" "0148410" "0149410" "0150510" "0151510" "0152510" "0153510"
# [155] "0154510" "0155510" "0156510" "0157510" "0158510" "0159510" "0160610"
# [162] "0161610" "0162610" "0163610" "0164610" "0165610" "0166610" "0167610"
# [169] "0168610" "0169610" "0170710" "0171710" "0172710" "0173710" "0174710"
# [176] "0175710" "0176710" "0177710" "0178710" "0179710" "0180810" "0181810"
# [183] "0182810" "0183810" "0184810" "0185810" "0186810" "0187810" "0188810"
# [190] "0189810" "0190910" "0191910" "0192910" "0193910" "0194910" "0195910"
# [197] "0196910" "0197910" "0198910" "0199910" "0200020" "0201020" "0202020"
# [204] "0203020" "0204020" "0205020" "0206020" "0207020" "0208020" "0209020"
# [211] "0210120" "0211120" "0212120" "0213120" "0214120" "0215120" "0216120"
# [218] "0217120" "0218120" "0219120" "0220220" "0221220" "0222220" "0223220"
# [225] "0224220" "0225220" "0226220" "0227220" "0228220" "0229220" "0230320"
# [232] "0231320" "0232320" "0233320" "0234320" "0235320" "1000001" "1001001"
# [239] "1002001" "1003001" "1004001" "1005001" "1006001" "1007001" "1008001"
# [246] "1009001" "1010101" "1011101" "1012101" "1013101" "1014101" "1015101"
# [253] "1016101" "1017101" "1018101" "1019101" "1020201" "1021201" "1022201"
# [260] "1023201" "1024201" "1025201" "1026201" "1027201" "1028201" "1029201"
# [267] "1030301" "1031301" "1032301" "1033301" "1034301" "1035301" "1036301"
# [274] "1037301" "1038301" "1039301" "1040401" "1041401" "1042401" "1043401"
# [281] "1044401" "1045401" "1046401" "1047401" "1048401" "1049401" "1050501"
# [288] "1051501" "1052501" "1053501" "1054501" "1055501" "1056501" "1057501"
# [295] "1058501" "1059501" "1060601" "1061601" "1062601" "1063601" "1064601"
# [302] "1065601" "1066601" "1067601" "1068601" "1069601" "1070701" "1071701"
# [309] "1072701" "1073701" "1074701" "1075701" "1076701" "1077701" "1078701"
# [316] "1079701" "1080801" "1081801" "1082801" "1083801" "1084801" "1085801"
# [323] "1086801" "1087801" "1088801" "1089801" "1090901" "1091901" "1092901"
# [330] "1093901" "1094901" "1095901" "1096901" "1097901" "1098901" "1099901"
# [337] "1100011" "1101011" "1102011" "1103011" "1104011" "1105011" "1106011"
# [344] "1107011" "1108011" "1109011" "1110111" "1111111" "1112111" "1113111"
# [351] "1114111" "1115111" "1116111" "1117111" "1118111" "1119111" "1120211"
# [358] "1121211" "1122211" "1123211" "1124211" "1125211" "1126211" "1127211"
# [365] "1128211" "1129211" "1130311" "1131311" "1132311" "1133311" "1134311"
# [372] "1135311" "1136311" "1137311" "1138311" "1139311" "1140411" "1141411"
# [379] "1142411" "1143411" "1144411" "1145411" "1146411" "1147411" "1148411"
# [386] "1149411" "1150511" "1151511" "1152511" "1153511" "1154511" "1155511"
# [393] "1156511" "1157511" "1158511" "1159511" "1160611" "1161611" "1162611"
# [400] "1163611" "1164611" "1165611" "1166611" "1167611" "1168611" "1169611"
# [407] "1170711" "1171711" "1172711" "1173711" "1174711" "1175711" "1176711"
# [414] "1177711" "1178711" "1179711" "1180811" "1181811" "1182811" "1183811"
# [421] "1184811" "1185811" "1186811" "1187811" "1188811" "1189811" "1190911"
# [428] "1191911" "1192911" "1193911" "1194911" "1195911" "1196911" "1197911"
# [435] "1198911" "1199911" "1200021" "1201021" "1202021" "1203021" "1204021"
# [442] "1205021" "1206021" "1207021" "1208021" "1209021" "1210121" "1211121"
# [449] "1212121" "1213121" "1214121" "1215121" "1216121" "1217121" "1218121"
# [456] "1219121" "1220221" "1221221" "1222221" "1223221" "1224221" "1225221"
# [463] "1226221" "1227221" "1228221" "1229221" "1230321" "1231321" "1232321"
# [470] "1233321" "1234321" "1235321" "2000002" "2001002" "2002002" "2003002"
# [477] "2004002" "2005002" "2006002" "2007002" "2008002" "2009002" "2010102"
# [484] "2011102" "2012102" "2013102" "2014102" "2015102" "2016102" "2017102"
# [491] "2018102" "2019102" "2020202" "2021202" "2022202" "2023202" "2024202"
# [498] "2025202" "2026202" "2027202" "2028202" "2029202" "2030302" "2031302"
# [505] "2032302" "2033302" "2034302" "2035302" "2036302" "2037302" "2038302"
# [512] "2039302" "2040402" "2041402" "2042402" "2043402" "2044402" "2045402"
# [519] "2046402" "2047402" "2048402" "2049402" "2050502" "2051502" "2052502"
# [526] "2053502" "2054502" "2055502" "2056502" "2057502" "2058502" "2059502"
# [533] "2060602" "2061602" "2062602" "2063602" "2064602" "2065602" "2066602"
# [540] "2067602" "2068602" "2069602" "2070702" "2071702" "2072702" "2073702"
# [547] "2074702" "2075702" "2076702" "2077702" "2078702" "2079702" "2080802"
# [554] "2081802" "2082802" "2083802" "2084802" "2085802" "2086802" "2087802"
# [561] "2088802" "2089802" "2090902" "2091902" "2092902" "2093902" "2094902"
# [568] "2095902" "2096902" "2097902" "2098902" "2099902" "2100012" "2101012"
# [575] "2102012" "2103012" "2104012" "2105012" "2106012" "2107012" "2108012"
# [582] "2109012" "2110112" "2111112" "2112112" "2113112" "2114112" "2115112"
# [589] "2116112" "2117112" "2118112" "2119112" "2120212" "2121212" "2122212"
# [596] "2123212" "2124212" "2125212" "2126212" "2127212" "2128212" "2129212"
# [603] "2130312" "2131312" "2132312" "2133312" "2134312" "2135312" "2136312"
# [610] "2137312" "2138312" "2139312" "2140412" "2141412" "2142412" "2143412"
# [617] "2144412" "2145412" "2146412" "2147412" "2148412" "2149412" "2150512"
# [624] "2151512" "2152512" "2153512" "2154512" "2155512" "2156512" "2157512"
# [631] "2158512" "2159512" "2160612" "2161612" "2162612" "2163612" "2164612"
# [638] "2165612" "2166612" "2167612" "2168612" "2169612" "2170712" "2171712"
# [645] "2172712" "2173712" "2174712" "2175712" "2176712" "2177712" "2178712"
# [652] "2179712" "2180812" "2181812" "2182812" "2183812" "2184812" "2185812"
# [659] "2186812" "2187812" "2188812" "2189812" "2190912" "2191912" "2192912"
# [666] "2193912" "2194912" "2195912" "2196912" "2197912" "2198912" "2199912"
# [673] "2200022" "2201022" "2202022" "2203022" "2204022" "2205022" "2206022"
# [680] "2207022" "2208022" "2209022" "2210122" "2211122" "2212122" "2213122"
# [687] "2214122" "2215122" "2216122" "2217122" "2218122" "2219122" "2220222"
# [694] "2221222" "2222222" "2223222" "2224222" "2225222" "2226222" "2227222"
# [701] "2228222" "2229222" "2230322" "2231322" "2232322" "2233322" "2234322"
# [708] "2235322"
My number of palindroms seems to be different from yours, though.
Here is a quick method to create the desired sequence using R's builtin time and date functions.
#create the time sequence for every second for 1 day
dateseq <- seq(as.POSIXct("2020-08-15"), as.POSIXct("2020-08-16"), by="1 sec")
#remove the last element (midnight the next day)
dateseq <- dateseq[-86401]
#format the desire
answer <- format(dateseq, "%H%M%S")
tail(answer)
#[1] "235954" "235955" "235956" "235957" "235958" "235959"
Here's one way to approach the entire problem using a functional approach, using only base R. That is, breaking each problem down to a single task and building up the functionality you need:
# Converts strings in the format "1234556" to date times
as_time <- function(chr) {
chr[nchar(chr) == 7] <- paste0("0", chr[nchar(chr) == 7])
strptime(chr, "%d%H%M%S")
}
# Converts date-times to strings in format "1234556"
as_chr <- function(t) {
paste0(as.numeric(substr(t, 9, 10)), strftime(t, "%H%M%S"))
}
# Gets a sequence of valid strings between to strings in format "1234556"
seq_times <- function(t1, t2)
{
as_chr(seq(as_time(t1), as_time(t2), by = "1 sec"))
}
# Reverse strings in a character vector
rev_string <- function(s) {
sapply(s, function(x) intToUtf8(rev(utf8ToInt(x))), USE.NAMES = FALSE)
}
# Returns only the subset of a given character vector that are palindromes
get_palindromes <- function(t1, t2) {
str <- seq_times(t1, t2)
str[str == rev_string(str)]
}
So now we can do:
get_palindromes("1000000", "2000000")
#> [1] "1000001" "1001001" "1002001" "1003001" "1004001" "1005001" "1010101"
#> [8] "1011101" "1012101" "1013101" "1014101" "1015101" "1020201" "1021201"
#> [15] "1022201" "1023201" "1024201" "1025201" "1030301" "1031301" "1032301"
#> [22] "1033301" "1034301" "1035301" "1040401" "1041401" "1042401" "1043401"
#> [29] "1044401" "1045401" "1050501" "1051501" "1052501" "1053501" "1054501"
#> [36] "1055501" "1060601" "1061601" "1062601" "1063601" "1064601" "1065601"
#> [43] "1070701" "1071701" "1072701" "1073701" "1074701" "1075701" "1080801"
#> [50] "1081801" "1082801" "1083801" "1084801" "1085801" "1090901" "1091901"
#> [57] "1092901" "1093901" "1094901" "1095901" "1100011" "1101011" "1102011"
#> [64] "1103011" "1104011" "1105011" "1110111" "1111111" "1112111" "1113111"
#> [71] "1114111" "1115111" "1120211" "1121211" "1122211" "1123211" "1124211"
#> [78] "1125211" "1130311" "1131311" "1132311" "1133311" "1134311" "1135311"
#> [85] "1140411" "1141411" "1142411" "1143411" "1144411" "1145411" "1150511"
#> [92] "1151511" "1152511" "1153511" "1154511" "1155511" "1160611" "1161611"
#> [99] "1162611" "1163611" "1164611" "1165611" "1170711" "1171711" "1172711"
#> [106] "1173711" "1174711" "1175711" "1180811" "1181811" "1182811" "1183811"
#> [113] "1184811" "1185811" "1190911" "1191911" "1192911" "1193911" "1194911"
#> [120] "1195911" "1200021" "1201021" "1202021" "1203021" "1204021" "1205021"
#> [127] "1210121" "1211121" "1212121" "1213121" "1214121" "1215121" "1220221"
#> [134] "1221221" "1222221" "1223221" "1224221" "1225221" "1230321" "1231321"
#> [141] "1232321" "1233321" "1234321" "1235321"
and
get_palindromes("2235000", "3060000")
#> [1] "2235322" "3000003" "3001003" "3002003" "3003003" "3004003" "3005003"
#> [8] "3010103" "3011103" "3012103" "3013103" "3014103" "3015103" "3020203"
#> [15] "3021203" "3022203" "3023203" "3024203" "3025203" "3030303" "3031303"
#> [22] "3032303" "3033303" "3034303" "3035303" "3040403" "3041403" "3042403"
#> [29] "3043403" "3044403" "3045403" "3050503" "3051503" "3052503" "3053503"
#> [36] "3054503" "3055503"
What do you mean by the length? If you mean the count then I think we can use of simple math to see how many possibilities are there.
Let us say for n1 = 1 and n2 =2, out of 7 places available(dhhmmss), you can have only 2 choices for the 1st and the 7th place. Now for the remaining 6 places, we need to think only about first 3 places as the rest of them will be same as the first three( by the palindrome logic).
Now for the 2nd place, we can have only 3 choices(0, 1, 2 as we can only have the hour from 00 to 23, just consider the ten's place). Let us store the value at the 2nd place to a variable h. Next, we have 3rd place which can have 10, 10 and 4 choices for h={0,1,2} respectively. Following that, we have the 4th place which can only have 6 choices( ranging from 00 to 59,here just the ten's place).
Hence, the total choices are 2*[10+10+4]*6 = 288 choices.
You can use rep() to create the various time elements (days, hours,etc) and then expand.grid() to get every combination of the elements. stri_reverse() from stringi can be used to compare the reverse of the string and thus establish if it is a palindrome.
find_palindrome<-function(day_start,day_end){
day<-rep(day_start:day_end)
hour<-rep(0:23)
min_sec<-rep(0:59)
#expand.grid() finds every combination of inputs
#min_sec is used twice within expand.grid(), once for minutes and once for seconds.
# The "%02d" within sprint() preserves a 2-digit length (e.g. '01' instead of '1'.)
df<-expand.grid(day, sprintf("%02d",hour), sprintf("%02d",min_sec), sprintf("%02d",min_sec))
df<-as.data.frame(df)
#create a column concatinating the values
df$compare1<-paste(df[,1],df[,2], df[,3], df[,4], sep="")
#reverse the order in another column
df$compare2<-stringi::stri_reverse(df$compare1)
#compare the numbers to find your palendromes
palindrone<-df$compare1[df$compare1 == df$compare2]
return(palindrone)
}
Then run the function:
#example using day 0 to day 2
find_palindrome(0,2)
I would like for R to read in the first 10,000 digits of Pi and group every 10 digits together
e.g., I want R to read in a sequence
pi <- 3.14159265358979323846264338327950288419716939937510582097...
and would like R to give me a table where each row contains 10 digit:
3141592653
5897932384
6264338327
...
I am new to R and really don't know where to start so any help would be much appreciated!
Thank you in advance
https://rextester.com/OQRM27791
p <- strsplit("314159265358979323846264338327950288419716939937510582097", "")
digits <- p[[1]]
split(digits, ceiling((1:length(digits)) / 10));
Here's one way to do it. It's fully reproducible, so just cut and paste it into your R console. The vector result is the first 10,000 digits of pi, split into 1000 strings of 10 digits.
For this many digits, I have used an online source for the precalculated value of pi. This is read in using readChar and the decimal point is stripped out with gsub. The resulting string is split into individual characters and put in a 1000 * 10 matrix (filled row-wise). The rows are then pasted into strings, giving the result. I have displayed only the first 100 entries of result for clarity of presentation.
pi_url <- "https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt"
pi_char <- gsub("\\.", "", readChar(url, 1e4 + 1))
pi_mat <- matrix(strsplit(pi_char, "")[[1]], byrow = TRUE, ncol = 10)
result <- apply(pi_mat, 1, paste0, collapse = "")
head(result, 100)
#> [1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751"
#> [6] "0582097494" "4592307816" "4062862089" "9862803482" "5342117067"
#> [11] "9821480865" "1328230664" "7093844609" "5505822317" "2535940812"
#> [16] "8481117450" "2841027019" "3852110555" "9644622948" "9549303819"
#> [21] "6442881097" "5665933446" "1284756482" "3378678316" "5271201909"
#> [26] "1456485669" "2346034861" "0454326648" "2133936072" "6024914127"
#> [31] "3724587006" "6063155881" "7488152092" "0962829254" "0917153643"
#> [36] "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
#> [41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854"
#> [46] "8074462379" "9627495673" "5188575272" "4891227938" "1830119491"
#> [51] "2983367336" "2440656643" "0860213949" "4639522473" "7190702179"
#> [56] "8609437027" "7053921717" "6293176752" "3846748184" "6766940513"
#> [61] "2000568127" "1452635608" "2778577134" "2757789609" "1736371787"
#> [66] "2146844090" "1224953430" "1465495853" "7105079227" "9689258923"
#> [71] "5420199561" "1212902196" "0864034418" "1598136297" "7477130996"
#> [76] "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
#> [81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188"
#> [86] "1710100031" "3783875288" "6587533208" "3814206171" "7766914730"
#> [91] "3598253490" "4287554687" "3115956286" "3882353787" "5937519577"
#> [96] "8185778053" "2171226806" "6130019278" "7661119590" "9216420198"
Created on 2020-07-23 by the reprex package (v0.3.0)
We can use str_extract:
pi <- readLines("https://www.pi2e.ch/blog/wp-content/uploads/2017/03/pi_dec_1m.txt")
library(stringr)
t <- unlist(str_extract_all(sub("\\.","", pi), "\\d{10}"))
t[1:100]
[1] "3141592653" "5897932384" "6264338327" "9502884197" "1693993751" "0582097494" "4592307816" "4062862089"
[9] "9862803482" "5342117067" "9821480865" "1328230664" "7093844609" "5505822317" "2535940812" "8481117450"
[17] "2841027019" "3852110555" "9644622948" "9549303819" "6442881097" "5665933446" "1284756482" "3378678316"
[25] "5271201909" "1456485669" "2346034861" "0454326648" "2133936072" "6024914127" "3724587006" "6063155881"
[33] "7488152092" "0962829254" "0917153643" "6789259036" "0011330530" "5488204665" "2138414695" "1941511609"
[41] "4330572703" "6575959195" "3092186117" "3819326117" "9310511854" "8074462379" "9627495673" "5188575272"
[49] "4891227938" "1830119491" "2983367336" "2440656643" "0860213949" "4639522473" "7190702179" "8609437027"
[57] "7053921717" "6293176752" "3846748184" "6766940513" "2000568127" "1452635608" "2778577134" "2757789609"
[65] "1736371787" "2146844090" "1224953430" "1465495853" "7105079227" "9689258923" "5420199561" "1212902196"
[73] "0864034418" "1598136297" "7477130996" "0518707211" "3499999983" "7297804995" "1059731732" "8160963185"
[81] "9502445945" "5346908302" "6425223082" "5334468503" "5261931188" "1710100031" "3783875288" "6587533208"
[89] "3814206171" "7766914730" "3598253490" "4287554687" "3115956286" "3882353787" "5937519577" "8185778053"
[97] "2171226806" "6130019278" "7661119590" "9216420198"
I have a word and want to output in R all possible deviatons (replacement, substitution, insertion) for a fixed distance value into a vector.
For instance, the word "Cat" and a fixed distance value of 1 results in a vector with the elements "cot", "at", ...
I'm going to assume that you want all actual words, not just permutations of the characters with an edit distance of 1 that would include non-words such as "zat".
We can do this using adist() to compute the edit distance between your target word and all eligible English words, taken from some word list. Here, I used the English syllable dictionary from the quanteda package (you did tag this question as quanteda after all) but this could have been any vector of English dictionary words from any other source as well.
To narrow things down, we first exclude all words that are different in length from the target word by your distance value.
distfn <- function(word, distance = 1) {
# select eligible words for efficiency
eligible_y_words <- names(quanteda::data_int_syllables)
wordlengths <- nchar(eligible_y_words)
eligible_y_words <- eligible_y_words[wordlengths >= (nchar(word) - distance) &
wordlengths <= (nchar(word) + distance)]
# compute Levenshtein distance
distances <- utils::adist(word, eligible_y_words)[1, ]
# return only those for the requested distance value
eligible_y_words[distances == distance]
}
distfn("cat", 1)
## [1] "at" "bat" "ca" "cab" "cac" "cad" "cai" "cal" "cam" "can"
## [11] "cant" "cao" "cap" "caq" "car" "cart" "cas" "cast" "cate" "cato"
## [21] "cats" "catt" "cau" "caw" "cay" "chat" "coat" "cot" "ct" "cut"
## [31] "dat" "eat" "fat" "gat" "hat" "kat" "lat" "mat" "nat" "oat"
## [41] "pat" "rat" "sat" "scat" "tat" "vat" "wat"
To demonstrate how this works on longer words, with alternative distance values.
distfn("coffee", 1)
## [1] "caffee" "coffeen" "coffees" "coffel" "coffer" "coffey" "cuffee"
## [8] "toffee"
distfn("coffee", 2)
## [1] "caffey" "calfee" "chafee" "chaffee" "cofer" "coffee's"
## [7] "coffelt" "coffers" "coffin" "cofide" "cohee" "coiffe"
## [13] "coiffed" "colee" "colfer" "combee" "comfed" "confer"
## [19] "conlee" "coppee" "cottee" "coulee" "coutee" "cuffe"
## [25] "cuffed" "diffee" "duffee" "hoffer" "jaffee" "joffe"
## [31] "mcaffee" "moffet" "noffke" "offen" "offer" "roffe"
## [37] "scoffed" "soffel" "soffer" "yoffie"
(Yes, according to the CMU pronunciation dictionary, those are all actual words...)
EDIT: Make for all permutations of letters, not just actual words
This involves permutations from the alphabet that have the fixed edit distances from the input word. Here I've done it not particular efficiently by forming all permutations of letters within the eligible ranges, and then computing their edit distance from the target word, and then selecting them. So it's a variation of above, except instead of a dictionary, it uses permuted words.
distfn2 <- function(word, distance = 1) {
result <- character()
# start with deletions
for (i in max((nchar(word) - distance), 0):(nchar(word) - 1)) {
result <- c(
result,
combn(unlist(strsplit(word, "", fixed = TRUE)), i,
paste,
collapse = "", simplify = TRUE
)
)
}
# now for changes and insertions
for (i in (nchar(word)):(nchar(word) + distance)) {
# all possible edits
edits <- apply(expand.grid(rep(list(letters), i)),
1, paste0,
collapse = ""
)
# remove original word
edits <- edits[edits != word]
# get all distances, add to result
distances <- utils::adist(word, edits)[1, ]
result <- c(result, edits[distances == distance])
}
result
}
For the OP example:
distfn2("cat", 1)
## [1] "ca" "ct" "at" "caa" "cab" "cac" "cad" "cae" "caf" "cag"
## [11] "cah" "cai" "caj" "cak" "cal" "cam" "can" "cao" "cap" "caq"
## [21] "car" "cas" "aat" "bat" "dat" "eat" "fat" "gat" "hat" "iat"
## [31] "jat" "kat" "lat" "mat" "nat" "oat" "pat" "qat" "rat" "sat"
## [41] "tat" "uat" "vat" "wat" "xat" "yat" "zat" "cbt" "cct" "cdt"
## [51] "cet" "cft" "cgt" "cht" "cit" "cjt" "ckt" "clt" "cmt" "cnt"
## [61] "cot" "cpt" "cqt" "crt" "cst" "ctt" "cut" "cvt" "cwt" "cxt"
## [71] "cyt" "czt" "cau" "cav" "caw" "cax" "cay" "caz" "cata" "catb"
## [81] "catc" "catd" "cate" "catf" "catg" "cath" "cati" "catj" "catk" "catl"
## [91] "catm" "catn" "cato" "catp" "catq" "catr" "cats" "caat" "cbat" "acat"
## [101] "bcat" "ccat" "dcat" "ecat" "fcat" "gcat" "hcat" "icat" "jcat" "kcat"
## [111] "lcat" "mcat" "ncat" "ocat" "pcat" "qcat" "rcat" "scat" "tcat" "ucat"
## [121] "vcat" "wcat" "xcat" "ycat" "zcat" "cdat" "ceat" "cfat" "cgat" "chat"
## [131] "ciat" "cjat" "ckat" "clat" "cmat" "cnat" "coat" "cpat" "cqat" "crat"
## [141] "csat" "ctat" "cuat" "cvat" "cwat" "cxat" "cyat" "czat" "cabt" "cact"
## [151] "cadt" "caet" "caft" "cagt" "caht" "cait" "cajt" "cakt" "calt" "camt"
## [161] "cant" "caot" "capt" "caqt" "cart" "cast" "catt" "caut" "cavt" "cawt"
## [171] "caxt" "cayt" "cazt" "catu" "catv" "catw" "catx" "caty" "catz"
Also works with other edit distances, although it becomes very slow for longer words.
d2 <- distfn2("cat", 2)
set.seed(100)
c(head(d2, 50), sample(d2, 50), tail(d2, 50))
## [1] "c" "a" "t" "ca" "ct" "at" "aaa" "baa"
## [9] "daa" "eaa" "faa" "gaa" "haa" "iaa" "jaa" "kaa"
## [17] "laa" "maa" "naa" "oaa" "paa" "qaa" "raa" "saa"
## [25] "taa" "uaa" "vaa" "waa" "xaa" "yaa" "zaa" "cba"
## [33] "aca" "bca" "cca" "dca" "eca" "fca" "gca" "hca"
## [41] "ica" "jca" "kca" "lca" "mca" "nca" "oca" "pca"
## [49] "qca" "rca" "cnts" "cian" "pcatb" "cqo" "uawt" "hazt"
## [57] "cpxat" "aaet" "ckata" "caod" "ncatl" "qcamt" "cdtp" "qajt"
## [65] "bckat" "qcatr" "cqah" "rcbt" "cvbt" "bbcat" "vcaz" "ylcat"
## [73] "cahz" "jcgat" "mant" "jatd" "czlat" "cbamt" "cajta" "cafp"
## [81] "cizt" "cmaut" "qwat" "jcazt" "hdcat" "ucant" "hate" "cajtl"
## [89] "caaty" "cix" "nmat" "cajit" "cmnat" "caobt" "catoi" "ncau"
## [97] "ucoat" "ncamt" "jath" "oats" "chatz" "ciatz" "cjatz" "ckatz"
## [105] "clatz" "cmatz" "cnatz" "coatz" "cpatz" "cqatz" "cratz" "csatz"
## [113] "ctatz" "cuatz" "cvatz" "cwatz" "cxatz" "cyatz" "czatz" "cabtz"
## [121] "cactz" "cadtz" "caetz" "caftz" "cagtz" "cahtz" "caitz" "cajtz"
## [129] "caktz" "caltz" "camtz" "cantz" "caotz" "captz" "caqtz" "cartz"
## [137] "castz" "cattz" "cautz" "cavtz" "cawtz" "caxtz" "caytz" "caztz"
## [145] "catuz" "catvz" "catwz" "catxz" "catyz" "catzz"
This could be speeded up by less brute force formation of all permutations and then applying adist() to them - it could consist of changes or insertions of known edit distances generated algorithmically from letters.
I am trying to turn a vector of length n (say, 14), and turn it into a vector of length N (say, 90). For example, my vector is
x<-c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
and I want to turn it into a vector of length 90, by creating 90 equally "spaced" points on this vector- think of x as a function. Is there any way to do that in R?
Something like this?
> x<-c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
> seq(min(x),max(x),length=90)
[1] 2.000000 2.426966 2.853933 3.280899 3.707865 4.134831 4.561798
[8] 4.988764 5.415730 5.842697 6.269663 6.696629 7.123596 7.550562
[15] 7.977528 8.404494 8.831461 9.258427 9.685393 10.112360 10.539326
[22] 10.966292 11.393258 11.820225 12.247191 12.674157 13.101124 13.528090
[29] 13.955056 14.382022 14.808989 15.235955 15.662921 16.089888 16.516854
[36] 16.943820 17.370787 17.797753 18.224719 18.651685 19.078652 19.505618
[43] 19.932584 20.359551 20.786517 21.213483 21.640449 22.067416 22.494382
[50] 22.921348 23.348315 23.775281 24.202247 24.629213 25.056180 25.483146
[57] 25.910112 26.337079 26.764045 27.191011 27.617978 28.044944 28.471910
[64] 28.898876 29.325843 29.752809 30.179775 30.606742 31.033708 31.460674
[71] 31.887640 32.314607 32.741573 33.168539 33.595506 34.022472 34.449438
[78] 34.876404 35.303371 35.730337 36.157303 36.584270 37.011236 37.438202
[85] 37.865169 38.292135 38.719101 39.146067 39.573034 40.000000
>
Try this:
#data
x <- c(5,3,7,11,12,19,40,2,22,6,10,12,12,4)
#expected new length
N=90
#number of numbers between 2 numbers
my.length.out=round((N-length(x))/(length(x)-1))+1
#new data
x1 <- unlist(
lapply(1:(length(x)-1), function(i)
seq(x[i],x[i+1],length.out = my.length.out)))
#plot
par(mfrow=c(2,1))
plot(x)
plot(x1)