What is the format of "{123, affdsf, 223, 22, dgbwa, 33333}"? - r

I have the following format, please advise how to convert it to a list in R?
"{1948, 2507, 2510, 7030, 7110, 9009, 00027, 00206, 00399, 00717, 00814, 00828, 00848, 00917, 01050, 01105, 01144, 02130, 02768, 03037, 03752, 03754, 04070, 04110, 05050, 05255, 05289, 05564, 05595, 06100, 06330, 06671, 07041, 07119, 07137, 07273, 07313, 07454, 07871, 08104, 08714, 08726, 08995, 09059, 09073, 09525, 09949, 09981, 10092, 10439, 10782, 11185, 11507, 11712, 11806, 11858, 11980, 12067, 12113, 12139, 12643, 13820, 14534, 15007, 15014, 15549, 15953, 16151, 16174, 16634, 16733, 16888, 17111, 17207, 17377, 17721, 17900, 18118, 18400, 18686, 18880, 19080, 19342, 19444, 19772, 19790, 19891, 20091, 20245, 20402, 20811, 21114, 21345, 21811, 21881, 22222, 22311, 22320, 22831, 22969, 23251, 23572, 23734, 23862, 23889, 24034, 24463, 25172, 25688, 26143, 26221, 26803, 26850, 26898, 27497, 28291, 28343, 29411, 29419, 30024, 30561, 30923, 31345, 31351, 31555, 31927, 32198, 32861, 33020, 33040, 33095, 33188, 33311, 33368, 33377, 33475, 33519, 33574, 33592, 34207, 34235, 34272, 34484, 34854, 34872, 34875, 34876, 34880, 35222, 35292, 35344, 36177, 36266, 37038, 37060, 37548, 37686, 37700, 38139, 39368, 39369, 39633, 40132, 40698, 40704, 40744, 40819, 41311, 41971, 42102, 42616, 43055, 43211, 43234, 43428, 43494, 43934, 44117, 44252, 44272, 44301, 44336, 44619, 44866, 44888, 45049, 45197, 45412, 45718, 46694, 46736, 47000, 48046, 48540, 49078, 49109, 49216, 49388, 49464, 50056, 50155, 50217, 50477, 50692, 51122, 51445, 51946, 52475, 52537, 52982, 54011, 54031, 54160, 54963, 55000, 55537, 56080, 56163, 56282, 56760, 56787, 57102, 57727, 57871, 58101, 58558, 58882, 59902, 60225, 60397, 60501, 60619, 60703, 60890, 61075, 61894, 61944, 62322, 62337, 62380, 62413, 62729, 62766, 62923, 63010, 63234, 63977, 64127, 65359, 65428, 65542, 65750, 65863, 66184, 66636, 66712, 67201, 67439, 67953, 68133, 68854, 69251, 69959, 70107, 70725, 70768, 71081, 71099, 71948, 72013, 72377, 72400, 72420, 72735, 73000, 73015, 73142, 73223, 73455, 73717, 74049, 74492, 74854, 74941, 75142, 75399, 75464, 75587, 75618, 75642, 75887, 76357, 76651, 77199, 77302, 77456, 77579, 77601, 77649, 77668, 77694, 77745, 78006, 78010, 78178, 78335, 78656, 78729, 78808, 78824, 78844, 78945, 79416, 79471, 79915, 80077, 80111, 80189, 80262, 80409, 80470, 80529, 80539, 80838, 81272, 81513, 81658, 81740, 81743, 81762, 81843, 82001, 82070, 82106, 82342, 82472, 82719, 83670, 84009, 84151, 84299, 84430, 84450, 84460, 84945, 86411, 86443, 86446, 86668, 86942, 87286, 87317, 87624, 87785, 88023, 88517, 88696, 88787, 88868, 88977, 89206, 90108, 90440, 90734, 90802, 90849, 90920, 90931, 91011, 91031, 91133, 91777, 91949, 92162, 92494, 93012, 93172, 94300, 94517, 95142, 95410, 95559, 95859, 96112, 97255, 97787, 97986, 98240, 98817, 99050, 99198, 99222, 99241, 99295, 99326, 99335, 99503, 99603, 99643, 99803, 99968}"
THIS IS NOT A DUPLICATE OF convert json to list in a vectorized way in R
IT'S COMPLETELY DIFFERENT BECAUSE THE FORMAT IS ABSOLUTELY DIFFERENT.

Try this one line code:
as.numeric(sapply(strsplit(substr(j,2,nchar(j)-1),split = ","),trimws))
[1] 1948 2507 2510 7030 7110 9009 27 206 399 717 814 828 848 917 1050 1105 1144
[18] 2130 2768 3037 3752 3754 4070 4110 5050 5255 5289 5564 5595 6100 6330 6671 7041 7119
[35] 7137 7273 7313 7454 7871 8104 8714 8726 8995 9059 9073 9525 9949 9981 10092 10439 10782
[52] 11185 11507 11712 11806 11858 11980 12067 12113 1213 ..
Your input:
j<-"{1948, 2507, 2510, 7030, 7110, 9009, 00027, 00206, 00399, 00717, 00814, 00828, 00848, 00917, 01050, 01105, 01144, 02130, 02768, 03037, 03752, 03754, 04070, 04110, 05050, 05255, 05289, 05564, 05595, 06100, 06330, 06671, 07041, 07119, 07137, 07273, 07313, 07454, 07871, 08104, 08714, 08726, 08995, 09059, 09073, 09525, 09949, 09981, 10092, 10439, 10782, 11185, 11507, 11712, 11806, 11858, 11980, 12067, 12113, 12139, 12643, 13820, 14534, 15007, 15014, 15549, 15953, 16151, 16174, 16634, 16733, 16888, 17111, 17207, 17377, 17721, 17900, 18118, 18400, 18686, 18880, 19080, 19342, 19444, 19772, 19790, 19891, 20091, 20245, 20402, 20811, 21114, 21345, 21811, 21881, 22222, 22311, 22320, 22831, 22969, 23251, 23572, 23734, 23862, 23889, 24034, 24463, 25172, 25688, 26143, 26221, 26803, 26850, 26898, 27497, 28291, 28343, 29411, 29419, 30024, 30561, 30923, 31345, 31351, 31555, 31927, 32198, 32861, 33020, 33040, 33095, 33188, 33311, 33368, 33377, 33475, 33519, 33574, 33592, 34207, 34235, 34272, 34484, 34854, 34872, 34875, 34876, 34880, 35222, 35292, 35344, 36177, 36266, 37038, 37060, 37548, 37686, 37700, 38139, 39368, 39369, 39633, 40132, 40698, 40704, 40744, 40819, 41311, 41971, 42102, 42616, 43055, 43211, 43234, 43428, 43494, 43934, 44117, 44252, 44272, 44301, 44336, 44619, 44866, 44888, 45049, 45197, 45412, 45718, 46694, 46736, 47000, 48046, 48540, 49078, 49109, 49216, 49388, 49464, 50056, 50155, 50217, 50477, 50692, 51122, 51445, 51946, 52475, 52537, 52982, 54011, 54031, 54160, 54963, 55000, 55537, 56080, 56163, 56282, 56760, 56787, 57102, 57727, 57871, 58101, 58558, 58882, 59902, 60225, 60397, 60501, 60619, 60703, 60890, 61075, 61894, 61944, 62322, 62337, 62380, 62413, 62729, 62766, 62923, 63010, 63234, 63977, 64127, 65359, 65428, 65542, 65750, 65863, 66184, 66636, 66712, 67201, 67439, 67953, 68133, 68854, 69251, 69959, 70107, 70725, 70768, 71081, 71099, 71948, 72013, 72377, 72400, 72420, 72735, 73000, 73015, 73142, 73223, 73455, 73717, 74049, 74492, 74854, 74941, 75142, 75399, 75464, 75587, 75618, 75642, 75887, 76357, 76651, 77199, 77302, 77456, 77579, 77601, 77649, 77668, 77694, 77745, 78006, 78010, 78178, 78335, 78656, 78729, 78808, 78824, 78844, 78945, 79416, 79471, 79915, 80077, 80111, 80189, 80262, 80409, 80470, 80529, 80539, 80838, 81272, 81513, 81658, 81740, 81743, 81762, 81843, 82001, 82070, 82106, 82342, 82472, 82719, 83670, 84009, 84151, 84299, 84430, 84450, 84460, 84945, 86411, 86443, 86446, 86668, 86942, 87286, 87317, 87624, 87785, 88023, 88517, 88696, 88787, 88868, 88977, 89206, 90108, 90440, 90734, 90802, 90849, 90920, 90931, 91011, 91031, 91133, 91777, 91949, 92162, 92494, 93012, 93172, 94300, 94517, 95142, 95410, 95559, 95859, 96112, 97255, 97787, 97986, 98240, 98817, 99050, 99198, 99222, 99241, 99295, 99326, 99335, 99503, 99603, 99643, 99803, 99968}"
This code removes first and last character of the string ("{" and "}" characters), splits values by "," and removes whitespaces using trimws. After that it moves the format to number.

If it happens your data actually is json, stick with the rjson package. This answer is assuming your data is not json (since rjson::fromjson throws an error on your data)
Try:
string <- "{1948, 2507, 2510, 7030, 7110, 9009, 00027, 00206, 00399, 00717, 00814, 00828, 00848, 00917, 01050, 01105, 01144, 02130, 02768, 03037, 03752, 03754, 04070, 04110, 05050, 05255, 05289, 05564, 05595, 06100, 06330, 06671, 07041, 07119, 07137, 07273, 07313, 07454, 07871, 08104, 08714, 08726, 08995, 09059, 09073, 09525, 09949, 09981, 10092, 10439, 10782, 11185, 11507, 11712, 11806, 11858, 11980, 12067, 12113, 12139, 12643, 13820, 14534, 15007, 15014, 15549, 15953, 16151, 16174, 16634, 16733, 16888, 17111, 17207, 17377, 17721, 17900, 18118, 18400, 18686, 18880, 19080, 19342, 19444, 19772, 19790, 19891, 20091, 20245, 20402, 20811, 21114, 21345, 21811, 21881, 22222, 22311, 22320, 22831, 22969, 23251, 23572, 23734, 23862, 23889, 24034, 24463, 25172, 25688, 26143, 26221, 26803, 26850, 26898, 27497, 28291, 28343, 29411, 29419, 30024, 30561, 30923, 31345, 31351, 31555, 31927, 32198, 32861, 33020, 33040, 33095, 33188, 33311, 33368, 33377, 33475, 33519, 33574, 33592, 34207, 34235, 34272, 34484, 34854, 34872, 34875, 34876, 34880, 35222, 35292, 35344, 36177, 36266, 37038, 37060, 37548, 37686, 37700, 38139, 39368, 39369, 39633, 40132, 40698, 40704, 40744, 40819, 41311, 41971, 42102, 42616, 43055, 43211, 43234, 43428, 43494, 43934, 44117, 44252, 44272, 44301, 44336, 44619, 44866, 44888, 45049, 45197, 45412, 45718, 46694, 46736, 47000, 48046, 48540, 49078, 49109, 49216, 49388, 49464, 50056, 50155, 50217, 50477, 50692, 51122, 51445, 51946, 52475, 52537, 52982, 54011, 54031, 54160, 54963, 55000, 55537, 56080, 56163, 56282, 56760, 56787, 57102, 57727, 57871, 58101, 58558, 58882, 59902, 60225, 60397, 60501, 60619, 60703, 60890, 61075, 61894, 61944, 62322, 62337, 62380, 62413, 62729, 62766, 62923, 63010, 63234, 63977, 64127, 65359, 65428, 65542, 65750, 65863, 66184, 66636, 66712, 67201, 67439, 67953, 68133, 68854, 69251, 69959, 70107, 70725, 70768, 71081, 71099, 71948, 72013, 72377, 72400, 72420, 72735, 73000, 73015, 73142, 73223, 73455, 73717, 74049, 74492, 74854, 74941, 75142, 75399, 75464, 75587, 75618, 75642, 75887, 76357, 76651, 77199, 77302, 77456, 77579, 77601, 77649, 77668, 77694, 77745, 78006, 78010, 78178, 78335, 78656, 78729, 78808, 78824, 78844, 78945, 79416, 79471, 79915, 80077, 80111, 80189, 80262, 80409, 80470, 80529, 80539, 80838, 81272, 81513, 81658, 81740, 81743, 81762, 81843, 82001, 82070, 82106, 82342, 82472, 82719, 83670, 84009, 84151, 84299, 84430, 84450, 84460, 84945, 86411, 86443, 86446, 86668, 86942, 87286, 87317, 87624, 87785, 88023, 88517, 88696, 88787, 88868, 88977, 89206, 90108, 90440, 90734, 90802, 90849, 90920, 90931, 91011, 91031, 91133, 91777, 91949, 92162, 92494, 93012, 93172, 94300, 94517, 95142, 95410, 95559, 95859, 96112, 97255, 97787, 97986, 98240, 98817, 99050, 99198, 99222, 99241, 99295, 99326, 99335, 99503, 99603, 99643, 99803, 99968}"
string as list of characters:
string_as_list_char <- as.list(strsplit(gsub('\\{|\\}', '', string), ", "))[[1]]
or converted to numeric:
string_as_list_num <- as.list(as.numeric(strsplit(gsub('\\{|\\}', '', string), ", ")[[1]]))

Related

group by year in yearqrt format R

I would like to group values of all other columns by the year in column yearqtr the following data
dput(narepurchasement)
structure(list(Date = structure(c(844128000, 852076800, 859852800,
867715200, 875664000, 883612800, 891388800, 899251200, 907200000,
915148800, 922924800, 930787200, 938736000, 946684800, 954547200,
962409600, 970358400, 978307200, 986083200, 993945600, 1001894400,
1009843200, 1017619200, 1025481600, 1033430400, 1041379200, 1049155200,
1057017600, 1064966400, 1072915200, 1080777600, 1088640000, 1096588800,
1104537600, 1112313600, 1120176000, 1128124800, 1136073600, 1143849600,
1151712000, 1159660800, 1167609600, 1175385600, 1183248000, 1191196800,
1199145600, 1207008000, 1214870400, 1222819200, 1230768000, 1238544000,
1246406400, 1254355200, 1262304000, 1270080000, 1277942400, 1285891200,
1293840000, 1301616000, 1309478400, 1317427200, 1325376000, 1333238400,
1341100800, 1349049600, 1356998400, 1364774400, 1372636800, 1380585600,
1388534400, 1396310400, 1404172800, 1412121600, 1420070400, 1427846400,
1435708800, 1443657600, 1451606400, 1459468800, 1467331200, 1475280000,
1483228800, 1491004800, 1498867200, 1506816000, 1514764800, 1522540800,
1530403200, 1538352000, 1546300800, 1554076800, 1561939200, 1569888000,
1577836800, 1585699200, 1593561600, 1601510400, 1609459200, 1617235200,
1625097600, 1633046400, 1640995200, 1648771200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), NetIssuance = c("-7450", "-13950",
"-14675", "-22875", "-25875", "-21675", "-17808", "-64840", "-111214",
"-6920", "-76700", "-26188", "-1", "27044", "-50630", "-10731",
"-83887", "-4850", "-14775", "-27350", "-1150", "-2644", "6357",
"-20316", "2098", "-10173", "-3438", "0", "-2055", "-0.802",
"-16823", "-32200", "-70730", "-43031", "-58722", "-90630", "-83784",
"-110795", "-116977", "-107859", "-137542", "-109583", "-149516",
"-162019", "-226618", "-84099", "-38612", "-73533", "-93475",
"-37950", "39311", "20920", "-62302", "-35987", "-35433", "-71238",
"-58295", "-59766", "-101392", "-133088", "-88329", "-49568",
"-99135", "-73428", "-77876", "-38256", "-73497", "-60269", "-105274",
"-101911", "-48493", "-80452", "-71090", "-116963", "-102404",
"-129399", "-104711", "-127487", "-136914", "-150658", "-80792",
"-89438", "-55464", "-119607", "-61042", "-122438", "-225035",
"-79778", "-190075", "-174006", "-46583", "-111504", "-124927",
"-95947", "-14946", "7398", "-67450", "-30403", "-133211", "-218291",
"-237670", "-227868", "-135084"), GrossIssuance = c(35393, 34426,
39963, 36586, 40630, 36993, 57637, 31110, 52737, 52487, 78711,
65846, 95574, 113349, 86067, 75480, 71906, 54552, 64094, 39824,
55322, 43624, 50257, 29329, 35664, 32098, 36084, 42285, 48634,
57955, 47497, 43892, 55599, 48385, 52197, 63692, 63159, 68401,
69557, 63825, 94723, 88627, 97967, 102944, 108022, 86316, 96002,
93730, 75885, 64674, 77307, 62616, 66705, 54873, 57173, 48392,
68703, 64334, 69966, 43637, 55198, 66678, 70380, 68331, 72198,
73702, 83784, 103945, 94138, 89471, 100239, 100418, 111302, 129933,
124281, 116589, 97678, 106734, 118234, 106262, 107965, 122679,
115625, 107485, 112226, 106358, 99560, 101952, 91526, 95447,
118912, 108570, 100615, 107853, 154908, 134115, 154227, 163567,
126579, 112180, 132474, 92327, 80342), GrossRetirement = c(42843,
48376, 54638, 59461, 66505, 58668, 75445, 95950, 163951, 59407,
155411, 92034, 96134, 86305, 136697, 86211, 155793, 59402, 78869,
67174, 56472, 46268, 43900, 49645, 33566, 42271, 39522, 42226,
50689, 58757, 64320, 76092, 126329, 91416, 110919, 154322, 146943,
179196, 186534, 171684, 232265, 198210, 247483, 264963, 334640,
170415, 134614, 167263, 169360, 102624, 37996, 41696, 129007,
90860, 92606, 119630, 126998, 124100, 171358, 176725, 143527,
116246, 169515, 141759, 150074, 111958, 157281, 164214, 199412,
191382, 148732, 180870, 182392, 246896, 226685, 245988, 202389,
234221, 255148, 256920, 188757, 212117, 171089, 227092, 173268,
228796, 324595, 181730, 281601, 269453, 165495, 220074, 225542,
203800, 169854, 126717, 221677, 193970, 259790, 330471, 370144,
320195, 215426), Repurchases = c(22263, 22638, 23514, 25005,
34369, 26643, 29082, 41095, 27253, 31805, 30779, 29350, 35972,
38084, 22859, 24761, 30152, 25245, 26623, 27689, 24038, 20954,
27243, 27314, 18885, 20208, 22000, 25993, 34329, 31567, 34011,
42358, 46643, 52980, 63201, 66599, 90778, 76295, 97243, 91990,
96248, 92541, 121025, 121251, 98213, 94359, 75799, 80943, 45745,
26459, 17862, 24888, 33600, 40277, 59624, 57199, 62624, 66172,
73022, 96186, 74495, 64511, 83483, 65770, 86040, 77135, 100169,
97375, 105120, 124551, 99652, 108215, 106062, 113685, 100343,
122057, 107005, 123418, 99546, 75010, 89025, 93073, 81638, 84879,
87762, 143170, 138764, 134874, 148169, 137193, 107400, 108922,
119371, 143785, 79929, 88312, 110984, 128796, 141252, 154680,
195502, 220050, 120000), MA = c(20579, 25738, 31124, 34456, 32136,
32025, 46364, 54855, 136698, 27602, 124632, 62684, 60162, 48221,
113837, 61450, 125641, 34157, 52246, 39486, 32434, 25314, 16657,
22331, 14681, 22063, 17522, 16233, 16360, 27191, 30309, 33735,
79686, 38436, 47718, 87723, 56166, 102901, 89291, 79694, 136016,
105669, 126458, 143711, 236427, 76055, 58816, 86320, 123615,
76166, 20134, 16809, 95407, 50583, 32982, 62430, 64373, 57928,
98336, 80539, 69032, 51735, 86032, 75988, 64033, 34823, 57112,
66838, 94292, 66831, 49079, 72655, 76330, 133211, 126342, 123931,
95384, 110803, 155602, 181911, 99732, 119044, 89451, 142213,
85506, 85626, 185832, 46856, 133432, 132260, 58095, 111152, 106172,
60015, 89925, 38404, 110693, 65174, 118539, 175791, 174642, 100146,
95426), GDP = c(8259.771, 8362.655, 8518.825, 8662.823, 8765.907,
8866.48, 8969.699, 9121.097, 9293.991, 9411.682, 9526.21, 9686.626,
9900.169, 10002.179, 10247.72, 10318.165, 10435.744, 10470.231,
10599, 10598.02, 10660.465, 10783.5, 10887.46, 10984.04, 11061.433,
11174.129, 11312.766, 11566.669, 11772.234, 11923.447, 12112.815,
12305.307, 12527.214, 12767.286, 12922.656, 13142.642, 13324.204,
13599.16, 13753.424, 13870.188, 14039.56, 14215.651, 14402.082,
14564.117, 14715.058, 14706.538, 14865.701, 14898.999, 14608.208,
14430.901, 14381.236, 14448.882, 14651.248, 14764.611, 14980.193,
15141.605, 15309.471, 15351.444, 15557.535, 15647.681, 15842.267,
16068.824, 16207.13, 16319.54, 16420.386, 16629.05, 16699.551,
16911.068, 17133.114, 17144.281, 17462.703, 17743.227, 17852.54,
17991.348, 18193.707, 18306.96, 18332.079, 18425.306, 18611.617,
18775.459, 18968.041, 19148.194, 19304.506, 19561.896, 19894.75,
20155.486, 20470.197, 20687.278, 20819.269, 21013.085, 21272.448,
21531.839, 21706.532, 21538.032, 19636.731, 21362.428, 21704.706,
22313.85, 23046.934, 23550.42, 24349.121, 24740.48, 25248.476
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-103L))
I don't know how exactly...
We may use
library(dplyr)
library(zoo)
library(lubridate)
narepurchasement %>%
mutate(yearqtr = as.yearqtr(Date)) %>%
group_by(year = year(yearqtr))

Updating dataframe column value by referring to another dataframe

My first data frame (df) contains Entrydate and ExitDate columns. Another dataframe (n1) has all trading dates. I need a new column in first dataframe calculated as number of days as calculated from the second dataframe. How do I call this dayCount function for each row of df. When I try to use mapply, I am unable to pass n1 as a parameter.
dayCount <- function (startDate, endDate, n1) {
return (nrow(subset(n1, Date >= startDate & Date <= endDate)))
}
df<- structure(list(EntryDate = structure(c(11355, 11418, 11436, 11449,
11520, 11523, 11548, 11620, 11768, 11773), class = "Date"), ExitDate = structure(c(11360,
11422, 11438, 11457, 11522, 11526, 11554, 11625, 11772, 11778
), class = "Date")), row.names = c(22L, 65L, 76L, 84L, 135L,
138L, 155L, 204L, 305L, 307L), class = "data.frame")
n1<- structure(c(11354, 11355, 11358, 11359, 11360, 11361, 11362,
11365, 11366, 11367, 11368, 11369, 11372, 11373, 11374, 11375,
11376, 11379, 11380, 11381, 11382, 11383, 11386, 11388, 11389,
11390, 11393, 11394, 11395, 11396, 11397, 11400, 11401, 11402,
11403, 11404, 11407, 11408, 11409, 11410, 11411, 11414, 11415,
11416, 11418, 11421, 11422, 11423, 11424, 11428, 11429, 11430,
11431, 11432, 11435, 11436, 11437, 11438, 11439, 11442, 11444,
11445, 11446, 11449, 11450, 11451, 11452, 11453, 11456, 11457,
11458, 11459, 11460, 11463, 11464, 11465, 11466, 11467, 11470,
11471, 11472, 11473, 11474, 11477, 11478, 11479, 11480, 11481,
11484, 11485, 11486, 11487, 11488, 11491, 11492, 11493, 11494,
11495, 11498, 11499, 11500, 11501, 11502, 11505, 11506, 11507,
11508, 11509, 11512, 11513, 11514, 11515, 11516, 11519, 11520,
11521, 11522, 11523, 11526, 11527, 11528, 11529, 11530, 11533,
11534, 11535, 11536, 11537, 11540, 11541, 11542, 11543, 11544,
11547, 11548, 11550, 11551, 11554, 11555, 11557, 11558, 11561,
11562, 11563, 11564, 11565, 11568, 11569, 11570, 11571, 11572,
11575, 11576, 11577, 11578, 11579, 11582, 11583, 11584, 11585,
11586, 11589, 11590, 11591, 11592, 11593, 11596, 11598, 11599,
11600, 11603, 11604, 11605, 11606, 11607, 11610, 11611, 11612,
11613, 11614, 11617, 11618, 11619, 11620, 11624, 11625, 11626,
11627, 11628, 11631, 11632, 11633, 11634, 11635, 11638, 11639,
11640, 11641, 11645, 11646, 11647, 11648, 11649, 11652, 11653,
11654, 11655, 11659, 11660, 11661, 11662, 11663, 11666, 11667,
11668, 11669, 11670, 11674, 11675, 11676, 11677, 11680, 11682,
11683, 11684, 11687, 11688, 11689, 11690, 11691, 11694, 11695,
11696, 11697, 11698, 11701, 11702, 11703, 11704, 11705, 11708,
11709, 11710, 11711, 11712, 11715, 11716, 11717, 11718, 11719,
11722, 11723, 11724, 11725, 11726, 11729, 11730, 11731, 11732,
11733, 11736, 11737, 11738, 11739, 11740, 11743, 11744, 11745,
11746, 11747, 11750, 11751, 11752, 11753, 11754, 11757, 11758,
11759, 11760, 11761, 11764, 11765, 11766, 11767, 11768, 11772,
11773, 11774, 11778), class = "Date")
You can use %in% to count number of days in n1 between each EntryDate and ExitDate.
df$dayCount <- colSums(mapply(function(x, y) n1 %in% seq(x, y, by = '1 day'),
df$EntryDate, df$ExitDate))
df
# EntryDate ExitDate dayCount
#22 2001-02-02 2001-02-07 4
#65 2001-04-06 2001-04-10 3
#76 2001-04-24 2001-04-26 3
#84 2001-05-07 2001-05-15 7
#135 2001-07-17 2001-07-19 3
#138 2001-07-20 2001-07-23 2
#155 2001-08-14 2001-08-20 4
#204 2001-10-25 2001-10-30 3
#305 2002-03-22 2002-03-26 2
#307 2002-03-27 2002-04-01 3

Forecast with auto Arima, with long term trend line, the 30 day forecast "jumps"

I'm trying to create a 30 day forecast using auto.arima from the forecast package. I want to capture the long term trend, so I inserted it into the xreg argument.
The data:
dput(data)
structure(list(TKDate = structure(c(15706, 15707, 15708, 15709,
15710, 15711, 15712, 15713, 15714, 15715, 15716, 15717, 15718,
15719, 15720, 15721, 15722, 15723, 15724, 15725, 15726, 15727,
15728, 15729, 15730, 15731, 15732, 15733, 15734, 15735, 15736,
15737, 15738, 15739, 15740, 15741, 15742, 15743, 15744, 15745,
15746, 15747, 15748, 15749, 15750, 15751, 15752, 15753, 15754,
15755, 15756, 15757, 15758, 15759, 15760, 15761, 15762, 15763,
15764, 15765, 15766, 15767, 15768, 15769, 15770, 15771, 15772,
15773, 15774, 15775, 15776, 15777, 15778, 15779, 15780, 15781,
15782, 15783, 15784, 15785, 15786, 15787, 15788, 15789, 15790,
15791, 15792, 15793, 15794, 15795, 15796, 15797, 15798, 15799,
15800, 15801, 15802, 15803, 15804, 15805, 15806, 15807, 15808,
15809, 15810, 15811, 15812, 15813, 15814, 15815, 15816, 15817,
15818, 15819, 15820, 15821, 15822, 15823, 15824, 15825, 15826,
15827, 15828, 15829, 15830, 15831, 15832, 15833, 15834, 15835,
15836, 15837, 15838, 15839, 15840, 15841, 15842, 15843, 15844,
15845, 15846, 15847, 15848, 15849, 15850, 15851, 15852, 15853,
15854, 15855, 15856, 15857, 15858, 15859, 15860, 15861, 15862,
15863, 15864, 15865, 15866, 15867, 15868, 15869, 15870, 15871,
15872, 15873, 15874, 15875, 15876, 15877, 15878, 15879, 15880,
15881, 15882, 15883, 15884, 15885, 15886, 15887, 15888, 15889,
15890, 15891, 15892, 15893, 15894, 15895, 15896, 15897, 15898,
15899, 15900, 15901, 15902, 15903, 15904, 15905, 15906, 15907,
15908, 15909, 15910, 15911, 15912, 15913, 15914, 15915, 15916,
15917, 15918, 15919, 15920, 15921, 15922, 15923, 15924, 15925,
15926, 15927, 15928, 15929, 15930, 15931, 15932, 15933, 15934,
15935, 15936, 15937, 15938, 15939, 15940, 15941, 15942, 15943,
15944, 15945, 15946, 15947, 15948, 15949, 15950, 15951, 15952,
15953, 15954, 15955, 15956, 15957, 15958, 15959, 15960, 15961,
15962, 15963, 15964, 15965, 15966, 15967, 15968, 15969, 15970,
15971, 15972, 15973, 15974, 15975, 15976, 15977, 15978, 15979,
15980, 15981, 15982, 15983, 15984, 15985, 15986, 15987, 15988,
15989, 15990, 15991, 15992, 15993, 15994, 15995, 15996, 15997,
15998, 15999, 16000, 16001, 16002, 16003, 16004, 16005, 16006,
16007, 16008, 16009, 16010, 16011, 16012, 16013, 16014, 16015,
16016, 16017, 16018, 16019, 16020, 16021, 16022, 16023, 16024,
16025, 16026, 16027, 16028, 16029, 16030, 16031, 16032, 16033,
16034, 16035, 16036, 16037, 16038, 16039, 16040, 16041, 16042,
16043, 16044, 16045, 16046, 16047, 16048, 16049, 16050, 16051,
16052, 16053, 16054, 16055, 16056, 16057, 16058, 16059, 16060,
16061, 16062, 16063, 16064, 16065, 16066, 16067, 16068, 16069,
16070, 16071, 16072, 16073, 16074, 16075, 16076, 16077, 16078,
16079, 16080, 16081, 16082, 16083, 16084, 16085, 16086, 16087,
16088, 16089, 16090, 16091, 16092, 16093, 16094, 16095, 16096,
16097, 16098, 16099, 16100, 16101, 16102, 16103, 16104, 16105,
16106, 16107, 16108, 16109, 16110, 16111, 16112, 16113, 16114,
16115, 16116, 16117, 16118), class = "Date"), spend = c(7984.39,
11476.06, 6555.57, 3981.45, 3963.83, 4827.72, 6309.32, 13503.36,
17075.89, 33353.71, 29324.34, 7968.68, 5540.63, 12113.45, 15596.38,
19328.67, 20224.68, 18977.55, 16128.27, 10633.56, 11887.79, 17881.11,
12613.46, 11607.55, 38232.11, 7861.25, 9397.88, 12056.02, 15115.87,
12275.93, 14537.35, 9594.26, 8215.83, 9632.52, 9993.15, 13478.37,
28509.38, 12016.33, 8907.76, 8757.43, 9513.09, 10299.5, 10385.03,
12515.62, 9008.95, 17825.68, 9320.47, 11189.58, 12902.31, 13341.35,
18675.32, 16989.53, 10114.53, 9876.65, 11203.39, 11718.73, 26264.95,
12414.19, 12275.16, 9242.85, 8883.97, 10095.72, 11581.55, 14815.78,
25064.12, 9297.07, 8047.91, 6876.37, 8881.63, 10982.85, 9975.33,
24124.62, 8514.66, 15719.84, 5807.39, 8422.38, 15184.95, 14757.58,
11087.61, 11070.78, 10425.67, 15517.8, 11257.69, 11915.47, 11720.37,
34064.62, 6493.41, 5757.4, 4387.54, 6520.58, 7806.81, 6356.63,
10916.36, 9013.43, 9722.41, 6044.25, 7971.7, 23933.54, 8627.85,
9722.77, 18660.13, 13011.36, 11445.11, 14219.2, 17138.92, 16016.68,
11434, 31379.03, 8494.25, 12493.85, 7708.1, 21583.05, 9026.17,
9379.35, 8287.13, 7298.16, 6097.03, 8076.57, 12871.87, 11346.89,
9115.82, 7737.98, 15065.38, 5262.73, 6522.58, 12743.94, 23945.16,
16109.26, 6985.89, 6345.08, 6246.93, 6824.66, 8491.42, 9654.99,
18976.58, 19565.68, 8075.47, 7219.79, 8629.04, 12491.64, 11915.89,
27533.16, 13554.35, 10102.21, 20029.15, 11641.82, 15855.19, 14139.17,
15376.63, 14625.99, 9098.87, 9396.64, 12015.84, 17532.75, 15131.65,
15815.5, 16048.65, 9769.63, 9582.12, 11201.8, 12810, 18857.38,
11822.71, 19289.08, 8911.29, 9437.55, 10987.14, 12995.65, 16675.26,
9741.82, 9723.57, 10328.24, 7738.04, 8432.16, 23021.73, 10367.28,
8210.53, 10468.4, 8024.25, 7296.25, 7445.34, 8539.59, 12386.23,
15335.72, 9013.49, 7994.95, 7759.46, 8789.38, 11242.38, 28653.23,
9750.96, 14398.62, 9248.74, 6766.08, 8159.14, 9899.38, 9453.35,
17588.96, 8958.16, 8256.61, 6240.4, 7235.24, 23841.62, 9002.73,
11839.47, 8693.31, 7161.37, 7046.39, 9221.53, 10004.93, 8698.76,
7948.68, 9013.27, 18536.68, 7980.38, 8968.95, 23594.14, 17744.66,
12615.73, 13646.05, 10512.58, 9066.02, 9665.15, 13183.2, 23864.45,
12017.52, 10831.07, 8954.76, 7276.41, 7882.9, 16616.41, 15384.68,
11046.53, 10621.01, 8094.74, 5451.26, 6237.79, 10717.69, 7076.38,
7044.62, 7047.45, 7774.77, 6496.21, 6340.9, 7110.53, 7691.28,
17482.02, 5576.19, 3763.79, 11477.68, 5710.5, 6519.51, 20022.61,
13153.68, 6526.28, 5885.28, 5656.17, 6270.04, 9795.38, 6320.95,
5741.98, 10808.72, 5150.87, 5416.52, 6305.05, 20953.12, 6569.02,
6360.21, 9376.68, 4973.93, 5034.48, 6380.45, 15307.28, 14386.65,
17705.88, 4779.52, 4784.79, 4737.05, 5350.28, 12112.11, 13153.72,
6049.69, 5430.46, 4627.59, 3637.2, 5482.43, 16705.15, 12221.16,
13198.88, 6484.54, 5590.86, 4979.09, 5771.75, 7311.92, 16111.86,
8047.77, 11706.91, 6042.14, 5670.74, 6905.07, 11261.89, 9700.4,
6643.03, 5693.85, 14778.67, 9128.14, 3682.01, 7911.5, 17742.85,
5093.31, 7867.97, 3202.78, 2843.35, 2598.77, 10930.81, 11204.67,
7289.62, 4000.17, 4178.89, 4507.33, 6671.48, 10317.48, 9368.98,
6156.41, 8375.24, 2762.76, 2457.59, 4707.51, 4584.52, 3749.82,
11667.82, 4271.67, 3614.3, 3715.83, 4510.57, 4872.36, 21805.71,
4757.04, 6515.92, 2834.25, 2685.19, 3509.28, 4479.35, 17817.99,
10357.67, 3412.15, 3044.95, 2840.24, 3348.91, 13671.68, 2027.42,
1616.25, 1177.73, 995.25, 1062.25, 1578.07, 1649.8, 1410.06,
1592.03, 3995.24, 6489.87, 6895.21, 8298.58, 7698.68, 5782.07,
7671.08, 19539.4, 7023.84, 6509.9, 6643.28, 19850.3, 6856.67,
13142.15, 5524.75, 5063.2, 4916.81, 6117.54, 6717.86, 9393.95,
10462.44, 10511.15, 4497.94, 4038.31, 5503.91, 5554.82, 5801.11,
12992.82, 4778.61, 4067.41, 4359.53, 6148.1, 9236.51, 5773.16,
11313.13, 4702.37, 4167.3, 4067.75, 4469.11, 9278.41, 9911.18,
5161.13, 4477.78, 4459.53, 4080.14, 5084.67, 7735.34, 10676.6,
5507.86, 8286.12, 4332.23, 4737.52, 5952.09, 7134.44)), .Names = c("TKDate",
"spend"), row.names = c(NA, 413L), class = "data.frame")
The code:
library(forecast)
explaining<-rep(1:length(data$TKDate))
predic<-rep((length(data$TKDate)+1):(length(data$TKDate)+31))
modArima <- auto.arima(data[,2],xreg=explaining)
fit<-forecast(modArima,h=30,xreg=explaining,newdata=predic)
plot(fit)
I get this weird jump:
Can anyone explain to me this weird jump? Why doesn't the forecast continue from the last observed data point (or at least close to it)?
This is a hard to find error, I'll admit.
forecast.Arima() takes the new values of the external regressors not in a newdata parameter (as does predict.lm()), but in the xreg parameter. So instead of
fit <- forecast(modArima,h=30,xreg=explaining,newdata=predic)
where you forecasted using the values of explaining, not those of predic (unfortunately, forecast.Arima() does not throw a warning if you feed data to the nonexistent newdata parameter), do this:
fit <- forecast(modArima,h=30,xreg=predic)
and plot (with in-sample fits thrown in for good measure - EDIT: somewhat confusingly, the in-sample fits are not returned by auto.arima() or arima() as they are by lm(), but by forecast.Arima()):
plot(fit)
lines(fit$fitted,col="red")

R forecast function not picking up seasonality

I am having trouble picking up the seasonality the seems to be implied in the data. I think (though its just a guess that its using additive and not multiplicative seasonality). I am using the forecast function and thought it would automatically pick what I need based on a lecture from Dr. Hyndman. The following snipet of code plots the chart and I would have expected the forecast to be higher then it is. Am I missing a model parameter or something? Any help would be appreciated.
sw<-c(2280, 1754, 1667, 1359, 1285, 1379, 2166, 1053, 1076, 1149, 1277, 1577, 1639, 1719, 1592, 2306, 3075, 2897, 1875, 1966, 2927, 3528, 2948, 2890, 3947, 3913, 3885, 4148, 5293, 5752, 6001, 7719, 5512, 6782, 6320, 6425, 6406, 7237, 8655, 9269, 12447, 13470, 13469, 13949, 17753, 17653, 14531, 14496, 13643, 12652, 12665, 10629, 8962, 8198, 6833, 5027, 4407, 4449, 4399, 5896, 6589, 3786, 4386, 4847, 5597, 5407, 4800, 7803, 9255, 10423, 5523, 8121, 6944, 8434, 9847, 9292, 9794, 10195, 10124, 11310, 12245, 12798, 14611, 15402, 13532, 16154, 15101, 14755, 17139, 16475, 19935, 19980, 25173, 28568, 27839, 28991, 27073, 29615, 25849, 27910, 27067, 21303, 20544, 15188, 13706, 9277, 10815, 7228, 4608, 4409, 9866, 8471, 8223, 6445, 6641, 6833, 11421, 8945, 8127, 10380, 12005, 13272, 9431, 12144, 14934, 14052, 11712, 14888, 15824, 17275, 18067, 19839, 21192, 22763, 22976, 23721, 22681, 20131, 19965, 20539, 19517, 22022, 23076, 30574, 40247, 43111, 39577, 40724, 44982, 44388, 46372, 43153, 36821, 32258, 31256, 27153, 23180, 18252, 16381, 13220, 12500, 10727, 9636, 8892, 8644, 9482, 9170, 10937, 12299, 15781, 11477, 16524, 16752, 18072, 14776, 13388, 18056, 19815, 21263, 22046, 26415, 24247, 25403, 30058, 26331, 32533, 31891, 35973, 27558, 24554, 25692, 25955, 24284, 24930, 28354, 34840, 40055, 42099, 42768, 48279, 50086, 56466, 42244, 51451, 44583, 39091, 33391, 29452, 25533)
swts <- ts(sw, frequency=52, start=c(2006,30))
swfc <- forecast(swts,h=52)
plot(swfc)
Did you data have multiple seasonal periods? If so you could check the tbats function.
Anyway, your seasonal period is greater than 12, so forecast is using a stl decomposition to adjust your seasonal data. Maybe you wanna check ?stlf for more info on what parameters you can change, or try a BoxCox transformation:
lambda <- BoxCox.lambda(sw)
swfc <- forecast(swts,h=52, lambda = lambda, robust = TRUE)
plot(swfc)

Calculating Time Weighted Rate of Return in R

Is there an R function or library that will give me the monthly (or any other specified timeframe) time weighted rate of return (twrr) for my portfolio?
I am including a dput dump of sample data below of the date and portfolio ending balance below. Not sure why the dates were dput'ed the way they were, but the first date 12053 is '2003-01-01' and the last date 12195 is '2003-05-23'.
portfolio.df <- structure(
list(
Date = structure(c(12053, 12054, 12055, 12058,
12059, 12060, 12061, 12062, 12065, 12066, 12067, 12068, 12069,
12073, 12074, 12075, 12076, 12079, 12080, 12081, 12082, 12083,
12086, 12087, 12088, 12089, 12090, 12093, 12094, 12095, 12096,
12097, 12101, 12102, 12103, 12104, 12107, 12108, 12109, 12110,
12111, 12114, 12115, 12116, 12117, 12118, 12121, 12122, 12123,
12124, 12125, 12128, 12129, 12130, 12131, 12132, 12135, 12136,
12137, 12138, 12139, 12142, 12143, 12144, 12145, 12146, 12149,
12150, 12151, 12152, 12153, 12156, 12157, 12158, 12159, 12163,
12164, 12165, 12166, 12167, 12170, 12171, 12172, 12173, 12174,
12177, 12178, 12179, 12180, 12181, 12184, 12185, 12186, 12187,
12188, 12191, 12192, 12193, 12194, 12195),
class = "Date"),
Ending_Balance = c(56250000L,
56852500L, 57080000L, 57355000L, 57477500L, 56817500L, 57885000L,
57810000L, 57732500L, 57670000L, 57520000L, 57285000L, 57270000L,
56655000L, 55802500L, 56337500L, 55642500L, 54510000L, 54987500L,
55802500L, 56065000L, 56865000L, 56635000L, 56497500L, 56640000L,
56155000L, 55757500L, 55972500L, 55865000L, 55535000L, 55885000L,
56840000L, 56902500L, 56945000L, 56622500L, 57012500L, 57200000L,
58072500L, 57612500L, 57447500L, 57157500L, 57032500L, 57405000L,
57502500L, 56785000L, 57007500L, 56342500L, 55697500L, 56655000L,
56900000L, 57002500L, 57465000L, 57467500L, 57382500L, 57982500L,
56562500L, 58065000L, 58935000L, 58502500L, 58200000L, 57767500L,
57757500L, 58055000L, 58305000L, 58277500L, 58295000L, 59047500L,
58907500L, 59125000L, 59072500L, 59107500L, 59315000L, 59690000L,
58957500L, 59407500L, 59385000L, 59965000L, 60297500L, 59890000L,
59822500L, 60367500L, 60407500L, 60380000L, 60815000L, 61155000L,
61080000L, 61132500L, 61265000L, 60912500L, 61107500L, 61445000L,
61345000L, 61137500L, 61035000L, 60707500L, 61340000L, 61365000L,
61402500L, 61640000L, 61675000L)),
.Names = c("Date", "Ending_Balance"),
row.names = c(NA, 100L),
class = "data.frame")

Resources