I have the following format, please advise how to convert it to a list in R?
"{1948, 2507, 2510, 7030, 7110, 9009, 00027, 00206, 00399, 00717, 00814, 00828, 00848, 00917, 01050, 01105, 01144, 02130, 02768, 03037, 03752, 03754, 04070, 04110, 05050, 05255, 05289, 05564, 05595, 06100, 06330, 06671, 07041, 07119, 07137, 07273, 07313, 07454, 07871, 08104, 08714, 08726, 08995, 09059, 09073, 09525, 09949, 09981, 10092, 10439, 10782, 11185, 11507, 11712, 11806, 11858, 11980, 12067, 12113, 12139, 12643, 13820, 14534, 15007, 15014, 15549, 15953, 16151, 16174, 16634, 16733, 16888, 17111, 17207, 17377, 17721, 17900, 18118, 18400, 18686, 18880, 19080, 19342, 19444, 19772, 19790, 19891, 20091, 20245, 20402, 20811, 21114, 21345, 21811, 21881, 22222, 22311, 22320, 22831, 22969, 23251, 23572, 23734, 23862, 23889, 24034, 24463, 25172, 25688, 26143, 26221, 26803, 26850, 26898, 27497, 28291, 28343, 29411, 29419, 30024, 30561, 30923, 31345, 31351, 31555, 31927, 32198, 32861, 33020, 33040, 33095, 33188, 33311, 33368, 33377, 33475, 33519, 33574, 33592, 34207, 34235, 34272, 34484, 34854, 34872, 34875, 34876, 34880, 35222, 35292, 35344, 36177, 36266, 37038, 37060, 37548, 37686, 37700, 38139, 39368, 39369, 39633, 40132, 40698, 40704, 40744, 40819, 41311, 41971, 42102, 42616, 43055, 43211, 43234, 43428, 43494, 43934, 44117, 44252, 44272, 44301, 44336, 44619, 44866, 44888, 45049, 45197, 45412, 45718, 46694, 46736, 47000, 48046, 48540, 49078, 49109, 49216, 49388, 49464, 50056, 50155, 50217, 50477, 50692, 51122, 51445, 51946, 52475, 52537, 52982, 54011, 54031, 54160, 54963, 55000, 55537, 56080, 56163, 56282, 56760, 56787, 57102, 57727, 57871, 58101, 58558, 58882, 59902, 60225, 60397, 60501, 60619, 60703, 60890, 61075, 61894, 61944, 62322, 62337, 62380, 62413, 62729, 62766, 62923, 63010, 63234, 63977, 64127, 65359, 65428, 65542, 65750, 65863, 66184, 66636, 66712, 67201, 67439, 67953, 68133, 68854, 69251, 69959, 70107, 70725, 70768, 71081, 71099, 71948, 72013, 72377, 72400, 72420, 72735, 73000, 73015, 73142, 73223, 73455, 73717, 74049, 74492, 74854, 74941, 75142, 75399, 75464, 75587, 75618, 75642, 75887, 76357, 76651, 77199, 77302, 77456, 77579, 77601, 77649, 77668, 77694, 77745, 78006, 78010, 78178, 78335, 78656, 78729, 78808, 78824, 78844, 78945, 79416, 79471, 79915, 80077, 80111, 80189, 80262, 80409, 80470, 80529, 80539, 80838, 81272, 81513, 81658, 81740, 81743, 81762, 81843, 82001, 82070, 82106, 82342, 82472, 82719, 83670, 84009, 84151, 84299, 84430, 84450, 84460, 84945, 86411, 86443, 86446, 86668, 86942, 87286, 87317, 87624, 87785, 88023, 88517, 88696, 88787, 88868, 88977, 89206, 90108, 90440, 90734, 90802, 90849, 90920, 90931, 91011, 91031, 91133, 91777, 91949, 92162, 92494, 93012, 93172, 94300, 94517, 95142, 95410, 95559, 95859, 96112, 97255, 97787, 97986, 98240, 98817, 99050, 99198, 99222, 99241, 99295, 99326, 99335, 99503, 99603, 99643, 99803, 99968}"
THIS IS NOT A DUPLICATE OF convert json to list in a vectorized way in R
IT'S COMPLETELY DIFFERENT BECAUSE THE FORMAT IS ABSOLUTELY DIFFERENT.
Try this one line code:
as.numeric(sapply(strsplit(substr(j,2,nchar(j)-1),split = ","),trimws))
[1] 1948 2507 2510 7030 7110 9009 27 206 399 717 814 828 848 917 1050 1105 1144
[18] 2130 2768 3037 3752 3754 4070 4110 5050 5255 5289 5564 5595 6100 6330 6671 7041 7119
[35] 7137 7273 7313 7454 7871 8104 8714 8726 8995 9059 9073 9525 9949 9981 10092 10439 10782
[52] 11185 11507 11712 11806 11858 11980 12067 12113 1213 ..
Your input:
j<-"{1948, 2507, 2510, 7030, 7110, 9009, 00027, 00206, 00399, 00717, 00814, 00828, 00848, 00917, 01050, 01105, 01144, 02130, 02768, 03037, 03752, 03754, 04070, 04110, 05050, 05255, 05289, 05564, 05595, 06100, 06330, 06671, 07041, 07119, 07137, 07273, 07313, 07454, 07871, 08104, 08714, 08726, 08995, 09059, 09073, 09525, 09949, 09981, 10092, 10439, 10782, 11185, 11507, 11712, 11806, 11858, 11980, 12067, 12113, 12139, 12643, 13820, 14534, 15007, 15014, 15549, 15953, 16151, 16174, 16634, 16733, 16888, 17111, 17207, 17377, 17721, 17900, 18118, 18400, 18686, 18880, 19080, 19342, 19444, 19772, 19790, 19891, 20091, 20245, 20402, 20811, 21114, 21345, 21811, 21881, 22222, 22311, 22320, 22831, 22969, 23251, 23572, 23734, 23862, 23889, 24034, 24463, 25172, 25688, 26143, 26221, 26803, 26850, 26898, 27497, 28291, 28343, 29411, 29419, 30024, 30561, 30923, 31345, 31351, 31555, 31927, 32198, 32861, 33020, 33040, 33095, 33188, 33311, 33368, 33377, 33475, 33519, 33574, 33592, 34207, 34235, 34272, 34484, 34854, 34872, 34875, 34876, 34880, 35222, 35292, 35344, 36177, 36266, 37038, 37060, 37548, 37686, 37700, 38139, 39368, 39369, 39633, 40132, 40698, 40704, 40744, 40819, 41311, 41971, 42102, 42616, 43055, 43211, 43234, 43428, 43494, 43934, 44117, 44252, 44272, 44301, 44336, 44619, 44866, 44888, 45049, 45197, 45412, 45718, 46694, 46736, 47000, 48046, 48540, 49078, 49109, 49216, 49388, 49464, 50056, 50155, 50217, 50477, 50692, 51122, 51445, 51946, 52475, 52537, 52982, 54011, 54031, 54160, 54963, 55000, 55537, 56080, 56163, 56282, 56760, 56787, 57102, 57727, 57871, 58101, 58558, 58882, 59902, 60225, 60397, 60501, 60619, 60703, 60890, 61075, 61894, 61944, 62322, 62337, 62380, 62413, 62729, 62766, 62923, 63010, 63234, 63977, 64127, 65359, 65428, 65542, 65750, 65863, 66184, 66636, 66712, 67201, 67439, 67953, 68133, 68854, 69251, 69959, 70107, 70725, 70768, 71081, 71099, 71948, 72013, 72377, 72400, 72420, 72735, 73000, 73015, 73142, 73223, 73455, 73717, 74049, 74492, 74854, 74941, 75142, 75399, 75464, 75587, 75618, 75642, 75887, 76357, 76651, 77199, 77302, 77456, 77579, 77601, 77649, 77668, 77694, 77745, 78006, 78010, 78178, 78335, 78656, 78729, 78808, 78824, 78844, 78945, 79416, 79471, 79915, 80077, 80111, 80189, 80262, 80409, 80470, 80529, 80539, 80838, 81272, 81513, 81658, 81740, 81743, 81762, 81843, 82001, 82070, 82106, 82342, 82472, 82719, 83670, 84009, 84151, 84299, 84430, 84450, 84460, 84945, 86411, 86443, 86446, 86668, 86942, 87286, 87317, 87624, 87785, 88023, 88517, 88696, 88787, 88868, 88977, 89206, 90108, 90440, 90734, 90802, 90849, 90920, 90931, 91011, 91031, 91133, 91777, 91949, 92162, 92494, 93012, 93172, 94300, 94517, 95142, 95410, 95559, 95859, 96112, 97255, 97787, 97986, 98240, 98817, 99050, 99198, 99222, 99241, 99295, 99326, 99335, 99503, 99603, 99643, 99803, 99968}"
This code removes first and last character of the string ("{" and "}" characters), splits values by "," and removes whitespaces using trimws. After that it moves the format to number.
If it happens your data actually is json, stick with the rjson package. This answer is assuming your data is not json (since rjson::fromjson throws an error on your data)
Try:
string <- "{1948, 2507, 2510, 7030, 7110, 9009, 00027, 00206, 00399, 00717, 00814, 00828, 00848, 00917, 01050, 01105, 01144, 02130, 02768, 03037, 03752, 03754, 04070, 04110, 05050, 05255, 05289, 05564, 05595, 06100, 06330, 06671, 07041, 07119, 07137, 07273, 07313, 07454, 07871, 08104, 08714, 08726, 08995, 09059, 09073, 09525, 09949, 09981, 10092, 10439, 10782, 11185, 11507, 11712, 11806, 11858, 11980, 12067, 12113, 12139, 12643, 13820, 14534, 15007, 15014, 15549, 15953, 16151, 16174, 16634, 16733, 16888, 17111, 17207, 17377, 17721, 17900, 18118, 18400, 18686, 18880, 19080, 19342, 19444, 19772, 19790, 19891, 20091, 20245, 20402, 20811, 21114, 21345, 21811, 21881, 22222, 22311, 22320, 22831, 22969, 23251, 23572, 23734, 23862, 23889, 24034, 24463, 25172, 25688, 26143, 26221, 26803, 26850, 26898, 27497, 28291, 28343, 29411, 29419, 30024, 30561, 30923, 31345, 31351, 31555, 31927, 32198, 32861, 33020, 33040, 33095, 33188, 33311, 33368, 33377, 33475, 33519, 33574, 33592, 34207, 34235, 34272, 34484, 34854, 34872, 34875, 34876, 34880, 35222, 35292, 35344, 36177, 36266, 37038, 37060, 37548, 37686, 37700, 38139, 39368, 39369, 39633, 40132, 40698, 40704, 40744, 40819, 41311, 41971, 42102, 42616, 43055, 43211, 43234, 43428, 43494, 43934, 44117, 44252, 44272, 44301, 44336, 44619, 44866, 44888, 45049, 45197, 45412, 45718, 46694, 46736, 47000, 48046, 48540, 49078, 49109, 49216, 49388, 49464, 50056, 50155, 50217, 50477, 50692, 51122, 51445, 51946, 52475, 52537, 52982, 54011, 54031, 54160, 54963, 55000, 55537, 56080, 56163, 56282, 56760, 56787, 57102, 57727, 57871, 58101, 58558, 58882, 59902, 60225, 60397, 60501, 60619, 60703, 60890, 61075, 61894, 61944, 62322, 62337, 62380, 62413, 62729, 62766, 62923, 63010, 63234, 63977, 64127, 65359, 65428, 65542, 65750, 65863, 66184, 66636, 66712, 67201, 67439, 67953, 68133, 68854, 69251, 69959, 70107, 70725, 70768, 71081, 71099, 71948, 72013, 72377, 72400, 72420, 72735, 73000, 73015, 73142, 73223, 73455, 73717, 74049, 74492, 74854, 74941, 75142, 75399, 75464, 75587, 75618, 75642, 75887, 76357, 76651, 77199, 77302, 77456, 77579, 77601, 77649, 77668, 77694, 77745, 78006, 78010, 78178, 78335, 78656, 78729, 78808, 78824, 78844, 78945, 79416, 79471, 79915, 80077, 80111, 80189, 80262, 80409, 80470, 80529, 80539, 80838, 81272, 81513, 81658, 81740, 81743, 81762, 81843, 82001, 82070, 82106, 82342, 82472, 82719, 83670, 84009, 84151, 84299, 84430, 84450, 84460, 84945, 86411, 86443, 86446, 86668, 86942, 87286, 87317, 87624, 87785, 88023, 88517, 88696, 88787, 88868, 88977, 89206, 90108, 90440, 90734, 90802, 90849, 90920, 90931, 91011, 91031, 91133, 91777, 91949, 92162, 92494, 93012, 93172, 94300, 94517, 95142, 95410, 95559, 95859, 96112, 97255, 97787, 97986, 98240, 98817, 99050, 99198, 99222, 99241, 99295, 99326, 99335, 99503, 99603, 99643, 99803, 99968}"
string as list of characters:
string_as_list_char <- as.list(strsplit(gsub('\\{|\\}', '', string), ", "))[[1]]
or converted to numeric:
string_as_list_num <- as.list(as.numeric(strsplit(gsub('\\{|\\}', '', string), ", ")[[1]]))
Related
I would like to group values of all other columns by the year in column yearqtr the following data
dput(narepurchasement)
structure(list(Date = structure(c(844128000, 852076800, 859852800,
867715200, 875664000, 883612800, 891388800, 899251200, 907200000,
915148800, 922924800, 930787200, 938736000, 946684800, 954547200,
962409600, 970358400, 978307200, 986083200, 993945600, 1001894400,
1009843200, 1017619200, 1025481600, 1033430400, 1041379200, 1049155200,
1057017600, 1064966400, 1072915200, 1080777600, 1088640000, 1096588800,
1104537600, 1112313600, 1120176000, 1128124800, 1136073600, 1143849600,
1151712000, 1159660800, 1167609600, 1175385600, 1183248000, 1191196800,
1199145600, 1207008000, 1214870400, 1222819200, 1230768000, 1238544000,
1246406400, 1254355200, 1262304000, 1270080000, 1277942400, 1285891200,
1293840000, 1301616000, 1309478400, 1317427200, 1325376000, 1333238400,
1341100800, 1349049600, 1356998400, 1364774400, 1372636800, 1380585600,
1388534400, 1396310400, 1404172800, 1412121600, 1420070400, 1427846400,
1435708800, 1443657600, 1451606400, 1459468800, 1467331200, 1475280000,
1483228800, 1491004800, 1498867200, 1506816000, 1514764800, 1522540800,
1530403200, 1538352000, 1546300800, 1554076800, 1561939200, 1569888000,
1577836800, 1585699200, 1593561600, 1601510400, 1609459200, 1617235200,
1625097600, 1633046400, 1640995200, 1648771200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), NetIssuance = c("-7450", "-13950",
"-14675", "-22875", "-25875", "-21675", "-17808", "-64840", "-111214",
"-6920", "-76700", "-26188", "-1", "27044", "-50630", "-10731",
"-83887", "-4850", "-14775", "-27350", "-1150", "-2644", "6357",
"-20316", "2098", "-10173", "-3438", "0", "-2055", "-0.802",
"-16823", "-32200", "-70730", "-43031", "-58722", "-90630", "-83784",
"-110795", "-116977", "-107859", "-137542", "-109583", "-149516",
"-162019", "-226618", "-84099", "-38612", "-73533", "-93475",
"-37950", "39311", "20920", "-62302", "-35987", "-35433", "-71238",
"-58295", "-59766", "-101392", "-133088", "-88329", "-49568",
"-99135", "-73428", "-77876", "-38256", "-73497", "-60269", "-105274",
"-101911", "-48493", "-80452", "-71090", "-116963", "-102404",
"-129399", "-104711", "-127487", "-136914", "-150658", "-80792",
"-89438", "-55464", "-119607", "-61042", "-122438", "-225035",
"-79778", "-190075", "-174006", "-46583", "-111504", "-124927",
"-95947", "-14946", "7398", "-67450", "-30403", "-133211", "-218291",
"-237670", "-227868", "-135084"), GrossIssuance = c(35393, 34426,
39963, 36586, 40630, 36993, 57637, 31110, 52737, 52487, 78711,
65846, 95574, 113349, 86067, 75480, 71906, 54552, 64094, 39824,
55322, 43624, 50257, 29329, 35664, 32098, 36084, 42285, 48634,
57955, 47497, 43892, 55599, 48385, 52197, 63692, 63159, 68401,
69557, 63825, 94723, 88627, 97967, 102944, 108022, 86316, 96002,
93730, 75885, 64674, 77307, 62616, 66705, 54873, 57173, 48392,
68703, 64334, 69966, 43637, 55198, 66678, 70380, 68331, 72198,
73702, 83784, 103945, 94138, 89471, 100239, 100418, 111302, 129933,
124281, 116589, 97678, 106734, 118234, 106262, 107965, 122679,
115625, 107485, 112226, 106358, 99560, 101952, 91526, 95447,
118912, 108570, 100615, 107853, 154908, 134115, 154227, 163567,
126579, 112180, 132474, 92327, 80342), GrossRetirement = c(42843,
48376, 54638, 59461, 66505, 58668, 75445, 95950, 163951, 59407,
155411, 92034, 96134, 86305, 136697, 86211, 155793, 59402, 78869,
67174, 56472, 46268, 43900, 49645, 33566, 42271, 39522, 42226,
50689, 58757, 64320, 76092, 126329, 91416, 110919, 154322, 146943,
179196, 186534, 171684, 232265, 198210, 247483, 264963, 334640,
170415, 134614, 167263, 169360, 102624, 37996, 41696, 129007,
90860, 92606, 119630, 126998, 124100, 171358, 176725, 143527,
116246, 169515, 141759, 150074, 111958, 157281, 164214, 199412,
191382, 148732, 180870, 182392, 246896, 226685, 245988, 202389,
234221, 255148, 256920, 188757, 212117, 171089, 227092, 173268,
228796, 324595, 181730, 281601, 269453, 165495, 220074, 225542,
203800, 169854, 126717, 221677, 193970, 259790, 330471, 370144,
320195, 215426), Repurchases = c(22263, 22638, 23514, 25005,
34369, 26643, 29082, 41095, 27253, 31805, 30779, 29350, 35972,
38084, 22859, 24761, 30152, 25245, 26623, 27689, 24038, 20954,
27243, 27314, 18885, 20208, 22000, 25993, 34329, 31567, 34011,
42358, 46643, 52980, 63201, 66599, 90778, 76295, 97243, 91990,
96248, 92541, 121025, 121251, 98213, 94359, 75799, 80943, 45745,
26459, 17862, 24888, 33600, 40277, 59624, 57199, 62624, 66172,
73022, 96186, 74495, 64511, 83483, 65770, 86040, 77135, 100169,
97375, 105120, 124551, 99652, 108215, 106062, 113685, 100343,
122057, 107005, 123418, 99546, 75010, 89025, 93073, 81638, 84879,
87762, 143170, 138764, 134874, 148169, 137193, 107400, 108922,
119371, 143785, 79929, 88312, 110984, 128796, 141252, 154680,
195502, 220050, 120000), MA = c(20579, 25738, 31124, 34456, 32136,
32025, 46364, 54855, 136698, 27602, 124632, 62684, 60162, 48221,
113837, 61450, 125641, 34157, 52246, 39486, 32434, 25314, 16657,
22331, 14681, 22063, 17522, 16233, 16360, 27191, 30309, 33735,
79686, 38436, 47718, 87723, 56166, 102901, 89291, 79694, 136016,
105669, 126458, 143711, 236427, 76055, 58816, 86320, 123615,
76166, 20134, 16809, 95407, 50583, 32982, 62430, 64373, 57928,
98336, 80539, 69032, 51735, 86032, 75988, 64033, 34823, 57112,
66838, 94292, 66831, 49079, 72655, 76330, 133211, 126342, 123931,
95384, 110803, 155602, 181911, 99732, 119044, 89451, 142213,
85506, 85626, 185832, 46856, 133432, 132260, 58095, 111152, 106172,
60015, 89925, 38404, 110693, 65174, 118539, 175791, 174642, 100146,
95426), GDP = c(8259.771, 8362.655, 8518.825, 8662.823, 8765.907,
8866.48, 8969.699, 9121.097, 9293.991, 9411.682, 9526.21, 9686.626,
9900.169, 10002.179, 10247.72, 10318.165, 10435.744, 10470.231,
10599, 10598.02, 10660.465, 10783.5, 10887.46, 10984.04, 11061.433,
11174.129, 11312.766, 11566.669, 11772.234, 11923.447, 12112.815,
12305.307, 12527.214, 12767.286, 12922.656, 13142.642, 13324.204,
13599.16, 13753.424, 13870.188, 14039.56, 14215.651, 14402.082,
14564.117, 14715.058, 14706.538, 14865.701, 14898.999, 14608.208,
14430.901, 14381.236, 14448.882, 14651.248, 14764.611, 14980.193,
15141.605, 15309.471, 15351.444, 15557.535, 15647.681, 15842.267,
16068.824, 16207.13, 16319.54, 16420.386, 16629.05, 16699.551,
16911.068, 17133.114, 17144.281, 17462.703, 17743.227, 17852.54,
17991.348, 18193.707, 18306.96, 18332.079, 18425.306, 18611.617,
18775.459, 18968.041, 19148.194, 19304.506, 19561.896, 19894.75,
20155.486, 20470.197, 20687.278, 20819.269, 21013.085, 21272.448,
21531.839, 21706.532, 21538.032, 19636.731, 21362.428, 21704.706,
22313.85, 23046.934, 23550.42, 24349.121, 24740.48, 25248.476
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-103L))
I don't know how exactly...
We may use
library(dplyr)
library(zoo)
library(lubridate)
narepurchasement %>%
mutate(yearqtr = as.yearqtr(Date)) %>%
group_by(year = year(yearqtr))
My first data frame (df) contains Entrydate and ExitDate columns. Another dataframe (n1) has all trading dates. I need a new column in first dataframe calculated as number of days as calculated from the second dataframe. How do I call this dayCount function for each row of df. When I try to use mapply, I am unable to pass n1 as a parameter.
dayCount <- function (startDate, endDate, n1) {
return (nrow(subset(n1, Date >= startDate & Date <= endDate)))
}
df<- structure(list(EntryDate = structure(c(11355, 11418, 11436, 11449,
11520, 11523, 11548, 11620, 11768, 11773), class = "Date"), ExitDate = structure(c(11360,
11422, 11438, 11457, 11522, 11526, 11554, 11625, 11772, 11778
), class = "Date")), row.names = c(22L, 65L, 76L, 84L, 135L,
138L, 155L, 204L, 305L, 307L), class = "data.frame")
n1<- structure(c(11354, 11355, 11358, 11359, 11360, 11361, 11362,
11365, 11366, 11367, 11368, 11369, 11372, 11373, 11374, 11375,
11376, 11379, 11380, 11381, 11382, 11383, 11386, 11388, 11389,
11390, 11393, 11394, 11395, 11396, 11397, 11400, 11401, 11402,
11403, 11404, 11407, 11408, 11409, 11410, 11411, 11414, 11415,
11416, 11418, 11421, 11422, 11423, 11424, 11428, 11429, 11430,
11431, 11432, 11435, 11436, 11437, 11438, 11439, 11442, 11444,
11445, 11446, 11449, 11450, 11451, 11452, 11453, 11456, 11457,
11458, 11459, 11460, 11463, 11464, 11465, 11466, 11467, 11470,
11471, 11472, 11473, 11474, 11477, 11478, 11479, 11480, 11481,
11484, 11485, 11486, 11487, 11488, 11491, 11492, 11493, 11494,
11495, 11498, 11499, 11500, 11501, 11502, 11505, 11506, 11507,
11508, 11509, 11512, 11513, 11514, 11515, 11516, 11519, 11520,
11521, 11522, 11523, 11526, 11527, 11528, 11529, 11530, 11533,
11534, 11535, 11536, 11537, 11540, 11541, 11542, 11543, 11544,
11547, 11548, 11550, 11551, 11554, 11555, 11557, 11558, 11561,
11562, 11563, 11564, 11565, 11568, 11569, 11570, 11571, 11572,
11575, 11576, 11577, 11578, 11579, 11582, 11583, 11584, 11585,
11586, 11589, 11590, 11591, 11592, 11593, 11596, 11598, 11599,
11600, 11603, 11604, 11605, 11606, 11607, 11610, 11611, 11612,
11613, 11614, 11617, 11618, 11619, 11620, 11624, 11625, 11626,
11627, 11628, 11631, 11632, 11633, 11634, 11635, 11638, 11639,
11640, 11641, 11645, 11646, 11647, 11648, 11649, 11652, 11653,
11654, 11655, 11659, 11660, 11661, 11662, 11663, 11666, 11667,
11668, 11669, 11670, 11674, 11675, 11676, 11677, 11680, 11682,
11683, 11684, 11687, 11688, 11689, 11690, 11691, 11694, 11695,
11696, 11697, 11698, 11701, 11702, 11703, 11704, 11705, 11708,
11709, 11710, 11711, 11712, 11715, 11716, 11717, 11718, 11719,
11722, 11723, 11724, 11725, 11726, 11729, 11730, 11731, 11732,
11733, 11736, 11737, 11738, 11739, 11740, 11743, 11744, 11745,
11746, 11747, 11750, 11751, 11752, 11753, 11754, 11757, 11758,
11759, 11760, 11761, 11764, 11765, 11766, 11767, 11768, 11772,
11773, 11774, 11778), class = "Date")
You can use %in% to count number of days in n1 between each EntryDate and ExitDate.
df$dayCount <- colSums(mapply(function(x, y) n1 %in% seq(x, y, by = '1 day'),
df$EntryDate, df$ExitDate))
df
# EntryDate ExitDate dayCount
#22 2001-02-02 2001-02-07 4
#65 2001-04-06 2001-04-10 3
#76 2001-04-24 2001-04-26 3
#84 2001-05-07 2001-05-15 7
#135 2001-07-17 2001-07-19 3
#138 2001-07-20 2001-07-23 2
#155 2001-08-14 2001-08-20 4
#204 2001-10-25 2001-10-30 3
#305 2002-03-22 2002-03-26 2
#307 2002-03-27 2002-04-01 3
I'm trying to create a 30 day forecast using auto.arima from the forecast package. I want to capture the long term trend, so I inserted it into the xreg argument.
The data:
dput(data)
structure(list(TKDate = structure(c(15706, 15707, 15708, 15709,
15710, 15711, 15712, 15713, 15714, 15715, 15716, 15717, 15718,
15719, 15720, 15721, 15722, 15723, 15724, 15725, 15726, 15727,
15728, 15729, 15730, 15731, 15732, 15733, 15734, 15735, 15736,
15737, 15738, 15739, 15740, 15741, 15742, 15743, 15744, 15745,
15746, 15747, 15748, 15749, 15750, 15751, 15752, 15753, 15754,
15755, 15756, 15757, 15758, 15759, 15760, 15761, 15762, 15763,
15764, 15765, 15766, 15767, 15768, 15769, 15770, 15771, 15772,
15773, 15774, 15775, 15776, 15777, 15778, 15779, 15780, 15781,
15782, 15783, 15784, 15785, 15786, 15787, 15788, 15789, 15790,
15791, 15792, 15793, 15794, 15795, 15796, 15797, 15798, 15799,
15800, 15801, 15802, 15803, 15804, 15805, 15806, 15807, 15808,
15809, 15810, 15811, 15812, 15813, 15814, 15815, 15816, 15817,
15818, 15819, 15820, 15821, 15822, 15823, 15824, 15825, 15826,
15827, 15828, 15829, 15830, 15831, 15832, 15833, 15834, 15835,
15836, 15837, 15838, 15839, 15840, 15841, 15842, 15843, 15844,
15845, 15846, 15847, 15848, 15849, 15850, 15851, 15852, 15853,
15854, 15855, 15856, 15857, 15858, 15859, 15860, 15861, 15862,
15863, 15864, 15865, 15866, 15867, 15868, 15869, 15870, 15871,
15872, 15873, 15874, 15875, 15876, 15877, 15878, 15879, 15880,
15881, 15882, 15883, 15884, 15885, 15886, 15887, 15888, 15889,
15890, 15891, 15892, 15893, 15894, 15895, 15896, 15897, 15898,
15899, 15900, 15901, 15902, 15903, 15904, 15905, 15906, 15907,
15908, 15909, 15910, 15911, 15912, 15913, 15914, 15915, 15916,
15917, 15918, 15919, 15920, 15921, 15922, 15923, 15924, 15925,
15926, 15927, 15928, 15929, 15930, 15931, 15932, 15933, 15934,
15935, 15936, 15937, 15938, 15939, 15940, 15941, 15942, 15943,
15944, 15945, 15946, 15947, 15948, 15949, 15950, 15951, 15952,
15953, 15954, 15955, 15956, 15957, 15958, 15959, 15960, 15961,
15962, 15963, 15964, 15965, 15966, 15967, 15968, 15969, 15970,
15971, 15972, 15973, 15974, 15975, 15976, 15977, 15978, 15979,
15980, 15981, 15982, 15983, 15984, 15985, 15986, 15987, 15988,
15989, 15990, 15991, 15992, 15993, 15994, 15995, 15996, 15997,
15998, 15999, 16000, 16001, 16002, 16003, 16004, 16005, 16006,
16007, 16008, 16009, 16010, 16011, 16012, 16013, 16014, 16015,
16016, 16017, 16018, 16019, 16020, 16021, 16022, 16023, 16024,
16025, 16026, 16027, 16028, 16029, 16030, 16031, 16032, 16033,
16034, 16035, 16036, 16037, 16038, 16039, 16040, 16041, 16042,
16043, 16044, 16045, 16046, 16047, 16048, 16049, 16050, 16051,
16052, 16053, 16054, 16055, 16056, 16057, 16058, 16059, 16060,
16061, 16062, 16063, 16064, 16065, 16066, 16067, 16068, 16069,
16070, 16071, 16072, 16073, 16074, 16075, 16076, 16077, 16078,
16079, 16080, 16081, 16082, 16083, 16084, 16085, 16086, 16087,
16088, 16089, 16090, 16091, 16092, 16093, 16094, 16095, 16096,
16097, 16098, 16099, 16100, 16101, 16102, 16103, 16104, 16105,
16106, 16107, 16108, 16109, 16110, 16111, 16112, 16113, 16114,
16115, 16116, 16117, 16118), class = "Date"), spend = c(7984.39,
11476.06, 6555.57, 3981.45, 3963.83, 4827.72, 6309.32, 13503.36,
17075.89, 33353.71, 29324.34, 7968.68, 5540.63, 12113.45, 15596.38,
19328.67, 20224.68, 18977.55, 16128.27, 10633.56, 11887.79, 17881.11,
12613.46, 11607.55, 38232.11, 7861.25, 9397.88, 12056.02, 15115.87,
12275.93, 14537.35, 9594.26, 8215.83, 9632.52, 9993.15, 13478.37,
28509.38, 12016.33, 8907.76, 8757.43, 9513.09, 10299.5, 10385.03,
12515.62, 9008.95, 17825.68, 9320.47, 11189.58, 12902.31, 13341.35,
18675.32, 16989.53, 10114.53, 9876.65, 11203.39, 11718.73, 26264.95,
12414.19, 12275.16, 9242.85, 8883.97, 10095.72, 11581.55, 14815.78,
25064.12, 9297.07, 8047.91, 6876.37, 8881.63, 10982.85, 9975.33,
24124.62, 8514.66, 15719.84, 5807.39, 8422.38, 15184.95, 14757.58,
11087.61, 11070.78, 10425.67, 15517.8, 11257.69, 11915.47, 11720.37,
34064.62, 6493.41, 5757.4, 4387.54, 6520.58, 7806.81, 6356.63,
10916.36, 9013.43, 9722.41, 6044.25, 7971.7, 23933.54, 8627.85,
9722.77, 18660.13, 13011.36, 11445.11, 14219.2, 17138.92, 16016.68,
11434, 31379.03, 8494.25, 12493.85, 7708.1, 21583.05, 9026.17,
9379.35, 8287.13, 7298.16, 6097.03, 8076.57, 12871.87, 11346.89,
9115.82, 7737.98, 15065.38, 5262.73, 6522.58, 12743.94, 23945.16,
16109.26, 6985.89, 6345.08, 6246.93, 6824.66, 8491.42, 9654.99,
18976.58, 19565.68, 8075.47, 7219.79, 8629.04, 12491.64, 11915.89,
27533.16, 13554.35, 10102.21, 20029.15, 11641.82, 15855.19, 14139.17,
15376.63, 14625.99, 9098.87, 9396.64, 12015.84, 17532.75, 15131.65,
15815.5, 16048.65, 9769.63, 9582.12, 11201.8, 12810, 18857.38,
11822.71, 19289.08, 8911.29, 9437.55, 10987.14, 12995.65, 16675.26,
9741.82, 9723.57, 10328.24, 7738.04, 8432.16, 23021.73, 10367.28,
8210.53, 10468.4, 8024.25, 7296.25, 7445.34, 8539.59, 12386.23,
15335.72, 9013.49, 7994.95, 7759.46, 8789.38, 11242.38, 28653.23,
9750.96, 14398.62, 9248.74, 6766.08, 8159.14, 9899.38, 9453.35,
17588.96, 8958.16, 8256.61, 6240.4, 7235.24, 23841.62, 9002.73,
11839.47, 8693.31, 7161.37, 7046.39, 9221.53, 10004.93, 8698.76,
7948.68, 9013.27, 18536.68, 7980.38, 8968.95, 23594.14, 17744.66,
12615.73, 13646.05, 10512.58, 9066.02, 9665.15, 13183.2, 23864.45,
12017.52, 10831.07, 8954.76, 7276.41, 7882.9, 16616.41, 15384.68,
11046.53, 10621.01, 8094.74, 5451.26, 6237.79, 10717.69, 7076.38,
7044.62, 7047.45, 7774.77, 6496.21, 6340.9, 7110.53, 7691.28,
17482.02, 5576.19, 3763.79, 11477.68, 5710.5, 6519.51, 20022.61,
13153.68, 6526.28, 5885.28, 5656.17, 6270.04, 9795.38, 6320.95,
5741.98, 10808.72, 5150.87, 5416.52, 6305.05, 20953.12, 6569.02,
6360.21, 9376.68, 4973.93, 5034.48, 6380.45, 15307.28, 14386.65,
17705.88, 4779.52, 4784.79, 4737.05, 5350.28, 12112.11, 13153.72,
6049.69, 5430.46, 4627.59, 3637.2, 5482.43, 16705.15, 12221.16,
13198.88, 6484.54, 5590.86, 4979.09, 5771.75, 7311.92, 16111.86,
8047.77, 11706.91, 6042.14, 5670.74, 6905.07, 11261.89, 9700.4,
6643.03, 5693.85, 14778.67, 9128.14, 3682.01, 7911.5, 17742.85,
5093.31, 7867.97, 3202.78, 2843.35, 2598.77, 10930.81, 11204.67,
7289.62, 4000.17, 4178.89, 4507.33, 6671.48, 10317.48, 9368.98,
6156.41, 8375.24, 2762.76, 2457.59, 4707.51, 4584.52, 3749.82,
11667.82, 4271.67, 3614.3, 3715.83, 4510.57, 4872.36, 21805.71,
4757.04, 6515.92, 2834.25, 2685.19, 3509.28, 4479.35, 17817.99,
10357.67, 3412.15, 3044.95, 2840.24, 3348.91, 13671.68, 2027.42,
1616.25, 1177.73, 995.25, 1062.25, 1578.07, 1649.8, 1410.06,
1592.03, 3995.24, 6489.87, 6895.21, 8298.58, 7698.68, 5782.07,
7671.08, 19539.4, 7023.84, 6509.9, 6643.28, 19850.3, 6856.67,
13142.15, 5524.75, 5063.2, 4916.81, 6117.54, 6717.86, 9393.95,
10462.44, 10511.15, 4497.94, 4038.31, 5503.91, 5554.82, 5801.11,
12992.82, 4778.61, 4067.41, 4359.53, 6148.1, 9236.51, 5773.16,
11313.13, 4702.37, 4167.3, 4067.75, 4469.11, 9278.41, 9911.18,
5161.13, 4477.78, 4459.53, 4080.14, 5084.67, 7735.34, 10676.6,
5507.86, 8286.12, 4332.23, 4737.52, 5952.09, 7134.44)), .Names = c("TKDate",
"spend"), row.names = c(NA, 413L), class = "data.frame")
The code:
library(forecast)
explaining<-rep(1:length(data$TKDate))
predic<-rep((length(data$TKDate)+1):(length(data$TKDate)+31))
modArima <- auto.arima(data[,2],xreg=explaining)
fit<-forecast(modArima,h=30,xreg=explaining,newdata=predic)
plot(fit)
I get this weird jump:
Can anyone explain to me this weird jump? Why doesn't the forecast continue from the last observed data point (or at least close to it)?
This is a hard to find error, I'll admit.
forecast.Arima() takes the new values of the external regressors not in a newdata parameter (as does predict.lm()), but in the xreg parameter. So instead of
fit <- forecast(modArima,h=30,xreg=explaining,newdata=predic)
where you forecasted using the values of explaining, not those of predic (unfortunately, forecast.Arima() does not throw a warning if you feed data to the nonexistent newdata parameter), do this:
fit <- forecast(modArima,h=30,xreg=predic)
and plot (with in-sample fits thrown in for good measure - EDIT: somewhat confusingly, the in-sample fits are not returned by auto.arima() or arima() as they are by lm(), but by forecast.Arima()):
plot(fit)
lines(fit$fitted,col="red")
I am having trouble picking up the seasonality the seems to be implied in the data. I think (though its just a guess that its using additive and not multiplicative seasonality). I am using the forecast function and thought it would automatically pick what I need based on a lecture from Dr. Hyndman. The following snipet of code plots the chart and I would have expected the forecast to be higher then it is. Am I missing a model parameter or something? Any help would be appreciated.
sw<-c(2280, 1754, 1667, 1359, 1285, 1379, 2166, 1053, 1076, 1149, 1277, 1577, 1639, 1719, 1592, 2306, 3075, 2897, 1875, 1966, 2927, 3528, 2948, 2890, 3947, 3913, 3885, 4148, 5293, 5752, 6001, 7719, 5512, 6782, 6320, 6425, 6406, 7237, 8655, 9269, 12447, 13470, 13469, 13949, 17753, 17653, 14531, 14496, 13643, 12652, 12665, 10629, 8962, 8198, 6833, 5027, 4407, 4449, 4399, 5896, 6589, 3786, 4386, 4847, 5597, 5407, 4800, 7803, 9255, 10423, 5523, 8121, 6944, 8434, 9847, 9292, 9794, 10195, 10124, 11310, 12245, 12798, 14611, 15402, 13532, 16154, 15101, 14755, 17139, 16475, 19935, 19980, 25173, 28568, 27839, 28991, 27073, 29615, 25849, 27910, 27067, 21303, 20544, 15188, 13706, 9277, 10815, 7228, 4608, 4409, 9866, 8471, 8223, 6445, 6641, 6833, 11421, 8945, 8127, 10380, 12005, 13272, 9431, 12144, 14934, 14052, 11712, 14888, 15824, 17275, 18067, 19839, 21192, 22763, 22976, 23721, 22681, 20131, 19965, 20539, 19517, 22022, 23076, 30574, 40247, 43111, 39577, 40724, 44982, 44388, 46372, 43153, 36821, 32258, 31256, 27153, 23180, 18252, 16381, 13220, 12500, 10727, 9636, 8892, 8644, 9482, 9170, 10937, 12299, 15781, 11477, 16524, 16752, 18072, 14776, 13388, 18056, 19815, 21263, 22046, 26415, 24247, 25403, 30058, 26331, 32533, 31891, 35973, 27558, 24554, 25692, 25955, 24284, 24930, 28354, 34840, 40055, 42099, 42768, 48279, 50086, 56466, 42244, 51451, 44583, 39091, 33391, 29452, 25533)
swts <- ts(sw, frequency=52, start=c(2006,30))
swfc <- forecast(swts,h=52)
plot(swfc)
Did you data have multiple seasonal periods? If so you could check the tbats function.
Anyway, your seasonal period is greater than 12, so forecast is using a stl decomposition to adjust your seasonal data. Maybe you wanna check ?stlf for more info on what parameters you can change, or try a BoxCox transformation:
lambda <- BoxCox.lambda(sw)
swfc <- forecast(swts,h=52, lambda = lambda, robust = TRUE)
plot(swfc)
Is there an R function or library that will give me the monthly (or any other specified timeframe) time weighted rate of return (twrr) for my portfolio?
I am including a dput dump of sample data below of the date and portfolio ending balance below. Not sure why the dates were dput'ed the way they were, but the first date 12053 is '2003-01-01' and the last date 12195 is '2003-05-23'.
portfolio.df <- structure(
list(
Date = structure(c(12053, 12054, 12055, 12058,
12059, 12060, 12061, 12062, 12065, 12066, 12067, 12068, 12069,
12073, 12074, 12075, 12076, 12079, 12080, 12081, 12082, 12083,
12086, 12087, 12088, 12089, 12090, 12093, 12094, 12095, 12096,
12097, 12101, 12102, 12103, 12104, 12107, 12108, 12109, 12110,
12111, 12114, 12115, 12116, 12117, 12118, 12121, 12122, 12123,
12124, 12125, 12128, 12129, 12130, 12131, 12132, 12135, 12136,
12137, 12138, 12139, 12142, 12143, 12144, 12145, 12146, 12149,
12150, 12151, 12152, 12153, 12156, 12157, 12158, 12159, 12163,
12164, 12165, 12166, 12167, 12170, 12171, 12172, 12173, 12174,
12177, 12178, 12179, 12180, 12181, 12184, 12185, 12186, 12187,
12188, 12191, 12192, 12193, 12194, 12195),
class = "Date"),
Ending_Balance = c(56250000L,
56852500L, 57080000L, 57355000L, 57477500L, 56817500L, 57885000L,
57810000L, 57732500L, 57670000L, 57520000L, 57285000L, 57270000L,
56655000L, 55802500L, 56337500L, 55642500L, 54510000L, 54987500L,
55802500L, 56065000L, 56865000L, 56635000L, 56497500L, 56640000L,
56155000L, 55757500L, 55972500L, 55865000L, 55535000L, 55885000L,
56840000L, 56902500L, 56945000L, 56622500L, 57012500L, 57200000L,
58072500L, 57612500L, 57447500L, 57157500L, 57032500L, 57405000L,
57502500L, 56785000L, 57007500L, 56342500L, 55697500L, 56655000L,
56900000L, 57002500L, 57465000L, 57467500L, 57382500L, 57982500L,
56562500L, 58065000L, 58935000L, 58502500L, 58200000L, 57767500L,
57757500L, 58055000L, 58305000L, 58277500L, 58295000L, 59047500L,
58907500L, 59125000L, 59072500L, 59107500L, 59315000L, 59690000L,
58957500L, 59407500L, 59385000L, 59965000L, 60297500L, 59890000L,
59822500L, 60367500L, 60407500L, 60380000L, 60815000L, 61155000L,
61080000L, 61132500L, 61265000L, 60912500L, 61107500L, 61445000L,
61345000L, 61137500L, 61035000L, 60707500L, 61340000L, 61365000L,
61402500L, 61640000L, 61675000L)),
.Names = c("Date", "Ending_Balance"),
row.names = c(NA, 100L),
class = "data.frame")