r anomalize::time_decompose error: `time_scale == key_value` - r
I'm trying to analyze anomalies in a time series using the anomalize package. Data is logged every second. When I try to call the time_decompose function, I get an error as per the following reprex.
library(tidyverse)
library(lubridate)
library(tibbletime)
library(anomalize)
structure(list(t = c(1001, 1002, 1003, 1004, 1005, 1006, 1007,
1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018,
1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029,
1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040,
1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051,
1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062,
1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073,
1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084,
1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106,
1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117,
1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128,
1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139,
1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150,
1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161,
1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172,
1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183,
1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194,
1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205,
1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216,
1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227,
1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238,
1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249,
1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260,
1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271,
1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282,
1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293,
1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304,
1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315,
1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326,
1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337,
1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348,
1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370,
1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381,
1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392,
1393, 1394, 1395, 1396, 1397, 1398, 1399), value = c(118.62,
121.57, 121.08, 118.5, 118.09, 115.28, 115.8, 111.83, 116.73,
120.34, 120.11, 124.51, 125.28, 127.2, 128.45, 124.24, 122.7,
121, 116.26, 114.12, 111.08, 109.83, 107.71, 109.31, 108.4, 106.59,
103.34, 104.02, 106.15, 105.9, 105.96, 104.79, 104.04, 103.45,
102.07, 99.71, 97.9, 99.12, 100.45, 99, 97.3, 96.11, 95.09, 95.98,
95.3, 92.88, 93.1, 91.2099999999999, 85.21, 85.6, 82.9500000000001,
80.85, 77.41, 78.66, 77.93, 73.88, 72.68, 71.09, 67.04, 68.25,
70.23, 67.86, 67.94, 69.44, 68.5, 67.11, 65.8899999999999, 64.7299999999999,
66.4900000000001, 67.2099999999999, 68.5400000000001, 69.56,
66.68, 67.24, 68.79, 69.74, 72.43, 73.17, 75.39, 79.2, 80.72,
83.04, 84.73, 87.82, 88.7, 92.38, 95.55, 97.0499999999999, 97.32,
97.59, 97.97, 97.96, 100.63, 100.77, 104.89, 105.38, 109.1, 107.84,
107.78, 105.08, 106.36, 103.95, 107.74, 107.58, 109.69, 112.46,
115.77, 117.11, 121.28, 123.4, 127.66, 127.01, 130.15, 131.31,
130.12, 129.88, 129.22, 128.48, 126.17, 127.26, 128.93, 127.57,
127.93, 128.92, 128.53, 128.72, 129.58, 129.12, 126.49, 127.31,
125.46, 125.09, 127.55, 129.01, 128.58, 128.7, 128.04, 127.13,
125.86, 124.27, 124.5, 125.45, 124.67, 126.13, 127.71, 126.17,
127.14, 125.11, 122.11, 120.4, 118.86, 117.7, 113.78, 109.02,
109.34, 108.79, 105.44, 108.11, 105.5, 106.75, 104.14, 100.34,
99.8099999999999, 95.9300000000001, 97.44, 97.63, 96.53, 97.25,
94.95, 93.06, 92.09, 86.22, 83.3400000000001, 77.04, 77.37, 74.88,
77.41, 78.21, 78.77, 81.79, 80.51, 84.71, 88.06, 88.25, 86.5300000000001,
86.6099999999999, 86.57, 89.06, 89.3499999999999, 90.92, 89.9100000000001,
90.26, 88.7699999999999, 88.08, 86.2699999999999, 84.93, 83.81,
83.18, 83.39, 84.43, 87.67, 87.23, 90.16, 89.88, 94.14, 97.64,
99.49, 100.61, 102.85, 103.01, 104.28, 106.94, 107.88, 107.5,
111.68, 110.92, 113.76, 113.8, 114.3, 115.95, 116.02, 113.34,
114.1, 118.21, 120.39, 122.09, 123.47, 122.46, 120.2, 117.88,
120.75, 119.2, 119.76, 120.43, 121.09, 120.36, 121.01, 120.87,
119.03, 120.17, 118.65, 118.13, 118.57, 118.84, 120.07, 119.93,
122.45, 125.04, 126.96, 124.86, 127.39, 129.6, 129.93, 127.71,
124.05, 120.7, 118.72, 116.6, 114.48, 110.81, 105.67, 97.48,
93.07, 95.25, 91.2100000000001, 89.47, 83.41, 84.85, 83.2500000000001,
80.89, 85.66, 86.3000000000001, 87.1499999999999, 87.1800000000001,
89.01, 91.27, 96.7400000000001, 100.41, 103.36, 108.56, 110.08,
109.31, 111, 109.77, 112.32, 114.05, 112.37, 114.59, 114.84,
114.31, 115.14, 115.38, 116.1, 112.19, 108.39, 106.65, 103.69,
100.23, 96.56, 92.07, 88.1000000000001, 80.79, 80.2800000000001,
76.8999999999999, 72.3900000000001, 70.37, 66.37, 66.8700000000001,
64.9199999999999, 63.23, 61.96, 53.95, 58.94, 59.9199999999999,
61, 60.3500000000001, 60.01, 57.33, 58.25, 59.37, 61.28, 62.61,
62.16, 63.38, 65.02, 68.9599999999999, 70.56, 71.54, 67.54, 69.34,
71.69, 73.87, 74.94, 78.94, 82.14, 84.88, 86.9299999999999, 90.26,
92.2500000000001, 95.15, 96.98, 99.2799999999999, 100.41, 105.08,
106.06, 106.95, 107.14, 106.84, 106.68, 104.91, 106.05, 102.63,
104.43, 103.92, 103.85, 103.71, 99.32, 103.02, 100.79, 101.5,
106.55, 108.73, 109.68, 112.88, 116.66, 116.59, 119.13, 122.81,
124.18, 127.61, 127.22, 129.81, 130.39, 131.11, 130.48, 126.35,
125.6, 118.47, 118.32, 116.57, 111.24, 109.76, 108.27, 105.05,
104.8, 103.92, 106.12, 108.32, 105.49, 104.84, 105.95, 103.16,
99.33, 98.52, 94.97, 92.6499999999999, 94.0800000000001, 92.09,
89.09)), row.names = c(NA, -399L), class = c("tbl_df", "tbl",
"data.frame")) %>%
mutate(t = make_datetime(sec = t)) %>%
as_tbl_time(index = t) %>%
time_decompose(value, merge = TRUE)
#> Error in `dplyr::filter()`:
#> ℹ In argument: `time_scale == key_value`.
#> Caused by error:
#> ! `..1` must be of size 8 or 1, not size 0.
#> Backtrace:
#> ▆
#> 1. ├─... %>% time_decompose(value, merge = TRUE)
#> 2. ├─anomalize::time_decompose(., value, merge = TRUE)
#> 3. ├─anomalize:::time_decompose.tbl_time(., value, merge = TRUE)
#> 4. │ └─data %>% ...
#> 5. ├─anomalize::decompose_stl(...)
#> 6. │ └─anomalize::time_frequency(data, period = frequency, message = message)
#> 7. │ └─template %>% ...
#> 8. ├─anomalize:::target_time_decomposition_scale(...)
#> 9. │ └─template %>% dplyr::filter(time_scale == key_value) %>% ...
#> 10. ├─dplyr::pull(., !!target_expr)
#> 11. ├─dplyr::filter(., time_scale == key_value)
#> 12. ├─dplyr:::filter.data.frame(., time_scale == key_value)
#> 13. │ └─dplyr:::filter_rows(.data, dots, by)
#> 14. │ └─dplyr:::filter_eval(dots, mask = mask, error_call = error_call)
#> 15. │ ├─base::withCallingHandlers(...)
#> 16. │ └─mask$eval_all_filter(dots, env_filter)
#> 17. │ └─dplyr (local) eval()
#> 18. ├─dplyr:::dplyr_internal_error(...)
#> 19. │ └─rlang::abort(class = c(class, "dplyr:::internal_error"), dplyr_error_data = data)
#> 20. │ └─rlang:::signal_abort(cnd, .file)
#> 21. │ └─base::signalCondition(cnd)
#> 22. └─dplyr (local) `<fn>`(`<dpl:::__>`)
#> 23. └─rlang::abort(message, class = error_class, parent = parent, call = error_call)
Created on 2023-02-14 with reprex v2.0.2
If I change the sample rate of the time series to a minute in make_datetime (not true), the time_decompose function works fine. However, I would like to carry out this analysis on the right units (seconds). The dataset is, of course, a small slice of the real dataset, but it allows you to reproduce the error.
library(tidyverse)
library(lubridate)
library(tibbletime)
library(anomalize)
structure(list(t = c(1001, 1002, 1003, 1004, 1005, 1006, 1007,
1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018,
1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029,
1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040,
1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051,
1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062,
1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073,
1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084,
1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106,
1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117,
1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128,
1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139,
1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150,
1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161,
1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172,
1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183,
1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194,
1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205,
1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216,
1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227,
1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238,
1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249,
1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260,
1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271,
1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282,
1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293,
1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304,
1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315,
1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326,
1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337,
1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348,
1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370,
1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381,
1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392,
1393, 1394, 1395, 1396, 1397, 1398, 1399), value = c(118.62,
121.57, 121.08, 118.5, 118.09, 115.28, 115.8, 111.83, 116.73,
120.34, 120.11, 124.51, 125.28, 127.2, 128.45, 124.24, 122.7,
121, 116.26, 114.12, 111.08, 109.83, 107.71, 109.31, 108.4, 106.59,
103.34, 104.02, 106.15, 105.9, 105.96, 104.79, 104.04, 103.45,
102.07, 99.71, 97.9, 99.12, 100.45, 99, 97.3, 96.11, 95.09, 95.98,
95.3, 92.88, 93.1, 91.2099999999999, 85.21, 85.6, 82.9500000000001,
80.85, 77.41, 78.66, 77.93, 73.88, 72.68, 71.09, 67.04, 68.25,
70.23, 67.86, 67.94, 69.44, 68.5, 67.11, 65.8899999999999, 64.7299999999999,
66.4900000000001, 67.2099999999999, 68.5400000000001, 69.56,
66.68, 67.24, 68.79, 69.74, 72.43, 73.17, 75.39, 79.2, 80.72,
83.04, 84.73, 87.82, 88.7, 92.38, 95.55, 97.0499999999999, 97.32,
97.59, 97.97, 97.96, 100.63, 100.77, 104.89, 105.38, 109.1, 107.84,
107.78, 105.08, 106.36, 103.95, 107.74, 107.58, 109.69, 112.46,
115.77, 117.11, 121.28, 123.4, 127.66, 127.01, 130.15, 131.31,
130.12, 129.88, 129.22, 128.48, 126.17, 127.26, 128.93, 127.57,
127.93, 128.92, 128.53, 128.72, 129.58, 129.12, 126.49, 127.31,
125.46, 125.09, 127.55, 129.01, 128.58, 128.7, 128.04, 127.13,
125.86, 124.27, 124.5, 125.45, 124.67, 126.13, 127.71, 126.17,
127.14, 125.11, 122.11, 120.4, 118.86, 117.7, 113.78, 109.02,
109.34, 108.79, 105.44, 108.11, 105.5, 106.75, 104.14, 100.34,
99.8099999999999, 95.9300000000001, 97.44, 97.63, 96.53, 97.25,
94.95, 93.06, 92.09, 86.22, 83.3400000000001, 77.04, 77.37, 74.88,
77.41, 78.21, 78.77, 81.79, 80.51, 84.71, 88.06, 88.25, 86.5300000000001,
86.6099999999999, 86.57, 89.06, 89.3499999999999, 90.92, 89.9100000000001,
90.26, 88.7699999999999, 88.08, 86.2699999999999, 84.93, 83.81,
83.18, 83.39, 84.43, 87.67, 87.23, 90.16, 89.88, 94.14, 97.64,
99.49, 100.61, 102.85, 103.01, 104.28, 106.94, 107.88, 107.5,
111.68, 110.92, 113.76, 113.8, 114.3, 115.95, 116.02, 113.34,
114.1, 118.21, 120.39, 122.09, 123.47, 122.46, 120.2, 117.88,
120.75, 119.2, 119.76, 120.43, 121.09, 120.36, 121.01, 120.87,
119.03, 120.17, 118.65, 118.13, 118.57, 118.84, 120.07, 119.93,
122.45, 125.04, 126.96, 124.86, 127.39, 129.6, 129.93, 127.71,
124.05, 120.7, 118.72, 116.6, 114.48, 110.81, 105.67, 97.48,
93.07, 95.25, 91.2100000000001, 89.47, 83.41, 84.85, 83.2500000000001,
80.89, 85.66, 86.3000000000001, 87.1499999999999, 87.1800000000001,
89.01, 91.27, 96.7400000000001, 100.41, 103.36, 108.56, 110.08,
109.31, 111, 109.77, 112.32, 114.05, 112.37, 114.59, 114.84,
114.31, 115.14, 115.38, 116.1, 112.19, 108.39, 106.65, 103.69,
100.23, 96.56, 92.07, 88.1000000000001, 80.79, 80.2800000000001,
76.8999999999999, 72.3900000000001, 70.37, 66.37, 66.8700000000001,
64.9199999999999, 63.23, 61.96, 53.95, 58.94, 59.9199999999999,
61, 60.3500000000001, 60.01, 57.33, 58.25, 59.37, 61.28, 62.61,
62.16, 63.38, 65.02, 68.9599999999999, 70.56, 71.54, 67.54, 69.34,
71.69, 73.87, 74.94, 78.94, 82.14, 84.88, 86.9299999999999, 90.26,
92.2500000000001, 95.15, 96.98, 99.2799999999999, 100.41, 105.08,
106.06, 106.95, 107.14, 106.84, 106.68, 104.91, 106.05, 102.63,
104.43, 103.92, 103.85, 103.71, 99.32, 103.02, 100.79, 101.5,
106.55, 108.73, 109.68, 112.88, 116.66, 116.59, 119.13, 122.81,
124.18, 127.61, 127.22, 129.81, 130.39, 131.11, 130.48, 126.35,
125.6, 118.47, 118.32, 116.57, 111.24, 109.76, 108.27, 105.05,
104.8, 103.92, 106.12, 108.32, 105.49, 104.84, 105.95, 103.16,
99.33, 98.52, 94.97, 92.6499999999999, 94.0800000000001, 92.09,
89.09)), row.names = c(NA, -399L), class = c("tbl_df", "tbl",
"data.frame")) %>%
mutate(t = make_datetime(min = t)) %>%
as_tbl_time(index = t) %>%
time_decompose(value, merge = TRUE)
#> frequency = 60 minutes
#> trend = 399 minutes
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> # A time tibble: 399 × 6
#> # Index: t
#> t value observed season trend remainder
#> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1970-01-01 16:41:00 119. 119. -0.485 104. 15.5
#> 2 1970-01-01 16:42:00 122. 122. -0.462 104. 18.4
#> 3 1970-01-01 16:43:00 121. 121. 0.274 104. 17.2
#> 4 1970-01-01 16:44:00 118. 118. 0.284 104. 14.6
#> 5 1970-01-01 16:45:00 118. 118. 0.348 104. 14.1
#> 6 1970-01-01 16:46:00 115. 115. -0.421 104. 12.0
#> 7 1970-01-01 16:47:00 116. 116. 0.582 104. 11.6
#> 8 1970-01-01 16:48:00 112. 112. 1.26 104. 6.90
#> 9 1970-01-01 16:49:00 117. 117. 2.72 104. 10.3
#> 10 1970-01-01 16:50:00 120. 120. 4.42 104. 12.2
#> # … with 389 more rows
Created on 2023-02-14 with reprex v2.0.2
Related
how do I add keys of a sorted dictionary sequentially having no intersection with other value lists
I have a dictionary which contains key pair values as key interprets "nodes " and value is a list of communities it belongs to. arranging them according to non-increasing order based on length of list value ,i need to create a list of keys starting from the top rank key and iterate over all keys to find keys with no intersection in their list values with previously added key this is a dictionary " {'2179': [15, 197, 363, 594, 766, 865, 1150, 1417, 1575, 1615, 1617, 1618, 1621, 1623, 1624, 1625, 1627], '2188': [15, 363, 766, 1150, 1417, 1616, 1617, 1618, 1619, 1620, 1622, 1624, 1625, 1626, 1629], '2180': [197, 594, 1150, 1575, 1616, 1617, 1618, 1619, 1620, 1622, 1624, 1625, 1626, 1629, 2201], '2195': [1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629], '2452': [1757, 1758, 1759, 1760, 1761, 1762, 1763, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772], '238': [57, 65, 76, 213, 251, 1080, 1126, 1448, 1896, 1897, 1898, 1899, 1900], '6974': [14, 122, 137, 491, 641, 660, 675, 1046, 1800, 2054, 2371], '124': [19, 66, 70, 113, 123, 159, 276, 297, 826, 2122], '3224': [18, 36, 44, 215, 230, 419, 1139, 1259, 2153], '100': [19, 66, 113, 297, 635, 826, 1356, 2122], '553': [40, 50, 133, 135, 192, 526, 1677, 1829]}" . I need to add keys iteratively to the list which have no intersection with list values of previously added key and next key to be added. this is a code i tried "this is a code i tried. k=len(new_dict) seed=list(new_dict.keys())[0] print(seed) CummunitySet=[] CommunitySet=set(new_dict.get(seed)) print(CommunitySet) seedSet=set(seed) Index=1 while ((seedCount < k) & (Index < count)): seed=list(new_dict.keys())[Index] if(set(new_dict.get(seed)).difference(CommunitySet)!=set()): CommunitySet = CommunitySet.union(new_dict.get(seed)) print(CommunitySet) seedSet = seedSet.union(set(seed)) Index=Index+1 seedCount=seedCount+1 else: Index=Index+1 Index=Index+1 print(seedSet) thankyou.
Maybe you could utilize set.intersection: def main() -> None: data = { '2179': [15, 197, 363, 594, 766, 865, 1150, 1417, 1575, 1615, 1617, 1618, 1621, 1623, 1624, 1625, 1627], '2188': [15, 363, 766, 1150, 1417, 1616, 1617, 1618, 1619, 1620, 1622, 1624, 1625, 1626, 1629], '2180': [197, 594, 1150, 1575, 1616, 1617, 1618, 1619, 1620, 1622, 1624, 1625, 1626, 1629, 2201], '2195': [1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629], '2452': [1757, 1758, 1759, 1760, 1761, 1762, 1763, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772], '238': [57, 65, 76, 213, 251, 1080, 1126, 1448, 1896, 1897, 1898, 1899, 1900], '6974': [14, 122, 137, 491, 641, 660, 675, 1046, 1800, 2054, 2371], '124': [19, 66, 70, 113, 123, 159, 276, 297, 826, 2122], '3224': [18, 36, 44, 215, 230, 419, 1139, 1259, 2153], '100': [19, 66, 113, 297, 635, 826, 1356, 2122], '553': [40, 50, 133, 135, 192, 526, 1677, 1829] } new_data = {} used_values = set() for key, values in data.items(): values_set = set(values) if values_set.intersection(used_values): # Equivalant to `values_set & used_values`. continue used_values |= values_set new_data[key] = values print(new_data) if __name__ == '__main__': main() Output: { '2179': [15, 197, 363, 594, 766, 865, 1150, 1417, 1575, 1615, 1617, 1618, 1621, 1623, 1624, 1625, 1627], '2452': [1757, 1758, 1759, 1760, 1761, 1762, 1763, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772], '238': [57, 65, 76, 213, 251, 1080, 1126, 1448, 1896, 1897, 1898, 1899, 1900], '6974': [14, 122, 137, 491, 641, 660, 675, 1046, 1800, 2054, 2371], '124': [19, 66, 70, 113, 123, 159, 276, 297, 826, 2122], '3224': [18, 36, 44, 215, 230, 419, 1139, 1259, 2153], '553': [40, 50, 133, 135, 192, 526, 1677, 1829] } Note: Indentation in output has been added manually for readability.
ARIMA Modeling running time issue
My data set is a weekly data that contains two variables Production and Shipment. Production is the independent variable and Shipment is the dependent variable. First I'm trying to forecast Production values and use that as a regressor to forecast Shipment variable. If I run the Arima using the training set date range From-> "2018-12-31" To-> "2021-11-22" The model runs within 10 minutes and I could see the model values. Using the Same model, If I extend the training set data range From -"2018-12-31" To-> "2021-12-27" The model runs for so long as it never finished the model execution and I couldnt view the model output. Could you please help me with this query. Thank you for the support Original.df<-structure(list(YearWeek = c("201901", "201902", "201903", "201904", "201905", "201906", "201907", "201908", "201909", "201910", "201911", "201912", "201913", "201914", "201915", "201916", "201917", "201918", "201919", "201920", "201921", "201922", "201923", "201924", "201925", "201926", "201927", "201928", "201929", "201930", "201931", "201932", "201933", "201934", "201935", "201936", "201937", "201938", "201939", "201940", "201941", "201942", "201943", "201944", "201945", "201946", "201947", "201948", "201949", "201950", "201951", "201952", "202001", "202002", "202003", "202004", "202005", "202006", "202007", "202008", "202009", "202010", "202011", "202012", "202013", "202014", "202015", "202016", "202017", "202018", "202019", "202020", "202021", "202022", "202023", "202024", "202025", "202026", "202027", "202028", "202029", "202030", "202031", "202032", "202033", "202034", "202035", "202036", "202037", "202038", "202039", "202040", "202041", "202042", "202043", "202044", "202045", "202046", "202047", "202048", "202049", "202050", "202051", "202052", "202053", "202101", "202102", "202103", "202104", "202105", "202106", "202107", "202108", "202109", "202110", "202111", "202112", "202113", "202114", "202115", "202116", "202117", "202118", "202119", "202120", "202121", "202122", "202123", "202124", "202125", "202126", "202127", "202128", "202129", "202130", "202131", "202132", "202133", "202134", "202135", "202136", "202137", "202138", "202139", "202140", "202141", "202142", "202143", "202144", "202145", "202146", "202147", "202148", "202149", "202150", "202151", "202152", "202201", "202202", "202203"), Shipment = c(399, 1336, 1018, 1126, 1098, 1235, 1130, 1258, 897, 1333, 1221, 1294, 1628, 1611, 1484, 1238, 1645, 1936, 1664, 1482, 2060, 1964, 1875, 1645, 2039, 1640, 733, 1764, 1639, 1968, 1692, 1677, 1542, 1299, 1328, 1130, 1741, 1929, 1843, 1427, 1467, 1450, 1041, 1238, 1721, 1757, 1813, 1001, 1208, 1916, 1435, 540, 681, 1436, 1170, 938, 1206, 1648, 1169, 1311, 1772, 1333, 1534, 1365, 1124, 846, 732, 753, 1266, 1652, 1772, 1814, 1649, 1191, 1298, 986, 1296, 1066, 777, 1041, 1388, 1289, 1097, 1356, 1238, 1732, 1109, 1104, 1155, 1334, 1094, 770, 1411, 1304, 1269, 1093, 1096, 1121, 943, 695, 1792, 2033, 1586, 768, 685, 993, 1406, 1246, 1746, 1740, 938, 160, 1641, 1373, 1023, 1173, 1611, 928, 1038, 1009, 1274, 1369, 1231, 1053, 1163, 880, 870, 1131, 882, 1143, 632, 394, 510, 543, 535, 824, 874, 591, 512, 448, 247, 452, 470, 747, 545, 639, 326, 414, 604, 640, 458, 272, 524, 589, 666, 217, 215, 348, 537, 466), Production = c(794, 1400, 1505, 1055, 1396, 1331, 1461, 1623, 1513, 1667, 1737, 1264, 1722, 1587, 2094, 1363, 2007, 1899, 1749, 1693, 1748, 1455, 2078, 1702, 1736, 1885, 860, 1372, 1716, 1290, 1347, 1451, 1347, 1409, 1203, 1235, 1397, 1557, 1406, 1451, 1704, 670, 1442, 1336, 1611, 1401, 1749, 744, 1558, 1665, 1317, 41, 441, 1351, 1392, 1180, 1447, 1265, 1485, 1494, 1543, 1581, 1575, 1597, 1191, 1386, 889, 1002, 1573, 1380, 1346, 1243, 1009, 965, 1051, 905, 1094, 1194, 891, 1033, 921, 880, 1135, 1058, 1171, 1022, 956, 880, 902, 983, 1014, 945, 1021, 1058, 1191, 1139, 1292, 573, 1173, 514, 1292, 1310, 1239, 41, 41, 1182, 1028, 1028, 1196, 1214, 1045, 256, 1451, 1344, 1352, 1257, 1444, 786, 1369, 1185, 1262, 1025, 949, 1051, 941, 727, 911, 951, 987, 1136, 884, 770, 959, 1102, 1109, 1098, 988, 983, 1002, 904, 1147, 1149, 919, 1058, 1112, 479, 1028, 1154, 1126, 1155, 1208, 536, 839, 1178, 1225, 539, 41, 862, 839, 873)), row.names = c(NA, 160L), class = "data.frame") # Converting the df to accomodate leap year for weekly observations Original.df <- Original.df %>% mutate( isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"), date = ISOweek::ISOweek2date(isoweek) ) #creating test and train data- 1st case- Training data until WK47("2021-11-22") Original.train.df <- Original.df %>% filter(date >= "2018-12-31", date <= "2021-11-22") Original.test.df <- Original.df %>% filter(date >= "2021-11-29", date <= "2021-12-27") Shipment.Test.df<- Original.test.df %>% dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble() # splitting the original train data to contain only Week, Dependent and Independent variables Total.train.df<-Original.train.df %>% mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>% dplyr::select(-YearWeek,-date,-isoweek) %>% as_tsibble(index = Week.1) #Model.1-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training #until WK47(2021-11-22) lambda_production<-Total.train.df %>% features(Production,features = guerrero) %>% pull(lambda_guerrero) bestfit.Prod.1.AICc <- Inf for(K in seq(25)){ fit.Prod.1 <- Total.train.df %>% model(ARIMA(box_cox(Production,lambda_production) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE)) if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc) { bestfit.Prod.1.AICc <- purrr::pluck(glance(fit.Prod.1), "AICc") bestfit.Prod.1<- fit.Prod.1 bestK.Prod.1 <- K } } bestK.Prod.1 glance(bestfit.Prod.1) #creating test and train data- 2nd case- Training data until WK52("2021-12-27") Original.train.df_2 <- Original.df %>% filter(date >= "2018-12-31", date <= "2021-12-27") Original.test.df_2 <- Original.df %>% filter(date >= "2022-01-03", date <= "2022-01-17") Shipment.Test.df_2<- Original.test.df_2 %>% dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble() # splitting the original train data to contain only Week, Dependent and Independent variables Total.train.df_2<-Original.train.df_2 %>% mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>% dplyr::select(-YearWeek,-date,-isoweek) %>% as_tsibble(index = Week.1) #Model.2-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training #until WK52 lambda_production_2<-Total.train.df_2 %>% features(Production,features = guerrero) %>% pull(lambda_guerrero) bestfit.Prod.2.AICc <- Inf for(K in seq(25)){ fit.Prod.2 <- Total.train.df %>% model(ARIMA(box_cox(Production,lambda_production_2) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE)) if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc) { bestfit.Prod.2.AICc <- purrr::pluck(glance(fit.Prod.2), "AICc") bestfit.Prod.2<- fit.Prod.2 bestK.Prod.2 <- K } } bestK.Prod.2 glance(bestfit.Prod.2) On the above model 2 never got executed fully and still the model is running. As you can see from above, model 1 and model 2 didnt have any difference other than the training data ,so could you please let me know what is it that im missing here. Thank you
Detect peaks with two adjacent identical values using pracma::findpeaks [duplicate]
This question already has answers here: Find sustained peaks using pracma::findpeaks (1 answer) Identify sustained peaks using pracma::findpeaks (2 answers) Closed 2 years ago. I've got some data with 23 peaks. I've used pracma::findpeaks to pick out the peaks. However, one of the peaks has two identical values adjacent each other, at time=7524 and time=7525. It seems findpeaks deals with this by ignoring the peak. Could I please ask if someone could help me make it recognise it. I'd like it to pick out the first of the two peaks, though it would also be good to know how to make it pick out the last of them as well data <- data.frame(time=c(1562, 1563, 1564, 1565, 1566, 1810, 1811, 1812, 1813, 1814, 2058, 2059, 2060, 2061, 2306, 2307, 2308, 2309, 2310, 2560, 2561, 2562, 2563, 2564, 3064, 3065, 3066, 3067, 3580, 3581, 3582, 3583, 3584, 4095, 4096, 4097, 4098, 4099, 4610, 4611, 4612, 4613, 4614, 5128, 5129, 5130, 5131, 5132, 5133, 5637, 5638, 5639, 5640, 5641, 5876, 5877, 5878, 5879, 5880, 5881, 5882, 6125, 6126, 6127, 6128, 6129, 6130, 6607, 6608, 6609, 6610, 6611, 6612, 6613, 7072, 7073, 7074, 7075, 7076, 7077, 7078, 7079, 7519, 7520, 7521, 7522, 7523, 7524, 7525, 7526, 7527, 7528, 7941, 7942, 7943, 7944, 7945, 7946, 7947, 7948, 7949, 8342, 8343, 8344, 8345, 8346, 8347, 8348, 8349, 8350, 8351, 8708, 8709, 8710, 8711, 8712, 8713, 8714, 8715, 8716, 8717, 8718, 9045, 9046, 9047, 9048, 9049, 9050, 9051, 9052, 9053, 9054, 9055, 9352, 9353, 9354, 9355, 9356, 9357, 9358, 9359, 9360, 9361, 9362, 9363, 9624, 9625, 9626, 9627, 9628, 9629, 9630, 9631, 9632, 9633, 9634, 9867, 9868, 9869, 9870, 9871, 9872, 9873, 9874, 9875, 9876), value=c(509, 672, 758, 686, 584, 559, 727, 759, 688, 528, 562, 711, 768, 678, 644, 750, 822, 693, 531, 566, 738, 793, 730, 511, 587, 739, 761, 651, 579, 747, 768, 705, 544, 551, 687, 756, 749, 645, 564, 680, 724, 691, 596, 535, 625, 685, 689, 612, 512, 537, 616, 657, 653, 573, 506, 598, 675, 685, 668, 609, 515, 575, 656, 687, 678, 626, 533, 509, 587, 641, 680, 663, 602, 515, 505, 583, 646, 693, 696, 684, 630, 549, 500, 572, 637, 681, 725, 736, 736, 703, 649, 556, 568, 637, 682, 743, 765, 767, 709, 660, 587, 548, 622, 690, 761, 779, 764, 749, 694, 631, 525, 571, 646, 724, 788, 811, 834, 818, 776, 712, 616, 536, 556, 649, 738, 801, 857, 866, 837, 808, 718, 647, 568, 508, 605, 714, 823, 872, 917, 916, 890, 825, 742, 642, 543, 549, 656, 766, 851, 921, 947, 951, 892, 830, 730, 617, 586, 675, 760, 804, 816, 795, 740, 690, 613, 522)) peaks <- data.frame(findpeaks(data$value, npeaks=23, threshold=100, sortstr=TRUE)) data$n <- seq(1,length(data$value)) data <- merge(x=data, y=peaks, by.x="n", by.y="X2", all.x=TRUE, all.y=TRUE) ggplot(data, aes(x=time, y=value)) + geom_col(fill="red") + geom_point(aes(x=time, y=X1))
How to perform a bootstrap and find 95% confidence interval for the median of a dataset
I am working to perform a bootstrap using the statistic median for dataset "file", containing only one column "Total". This is it: Total <- c(2089, 1567, 1336, 1616, 1590, 1649, 1341, 1614, 1590, 1621, 1621, 1631, 1295, 107, 18, 195, 2059, 870, 2371, 787, 98, 2422, 655, 1277, 1336, 2109, 1811, 1337, 1290, 1308, 1359, 1600, 1296, 693, 107, 1359, 89, 89, 89, 89, 2411, 1639, 89, 89, 1283, 89, 89, 89, 2341, 1012, 1295, 1853, 1277, 1571, 1288, 1300, 1619, 107, 555, 1612, 1300, 1300, 2093, 133, 1674, 988, 132, 647, 606, 544, 873, 274, 120, 1620, 1601, 1601, 906, 1603, 1613, 1592, 1603, 1610, 1321, 2380, 1575, 1575, 1277, 2354, 1561, 1579, 2367, 2341, 876, 1612, 1588, 2087, 1612, 890, 1586, 1580, 611, 1797, 2079, 1937, 189, 171, 706, 1647, 1642, 1278, 1650, 1623, 1647, 1661, 1692, 1632, 1684, 2474, 403, 842, 593, 98, 2354, 1265, 866, 1483, 2379, 1650, 1875, 1655, 1632, 1691, 1329, 867, 1632, 1693, 1623, 829, 1659, 1685, 666, 1585, 1659, 2169, 1623, 1645, 1654, 1698, 2172, 789, 1698, 579, 2443, 335, 132, 1952, 1265, 978, 1624, 979, 1729, 607, 181, 752, 424, 386, 309, 998, 1435, 2476, 392, 1657, 348, 1652, 1646, 1345, 2445, 1655, 840, 1624, 1652, 1321, 1321, 2201, 957, 917, 2458, 4096, 2458, 1346, 2459, 1634, 2459, 2459, 2459, 2508, 714, 2457, 2457, 1703, 669, 976, 1634, 2459, 2491, 2393, 625, 1763, 879, 886, 1085, 731, 924, 1649, 1216, 1647, 2470, 668, 2326, 757, 215, 276, 186, 901, 1402, 429, 554, 2457, 1643, 986, 730, 1028, 971, 1952, 1584, 1023, 1352, 839, 2434, 430, 2462, 1327, 1004, 385, 1099, 1067, 758, 679, 1423, 2495, 1664, 2495, 2495, 1345, 2530, 1754, 1804, 2525, 1652, 2536, 1646, 2529, 1380, 1845, 963, 1339, 2482, 1417, 1729, 1384, 1648, 344, 1648, 955, 609, 485, 1822, 513, 223, 222, 193, 1410, 1159, 586, 585, 2671, 2702, 2529, 2212, 1658, 741, 2529, 861, 1758, 905, 2529, 597, 1049, 2529, 619, 2620, 2596, 1688, 2590, 2545, 2590, 883, 287, 723, 2565, 1835, 1738, 2243, 1693, 2565, 250, 2529, 1880, 1777, 701, 444, 927, 1127, 825, 2726, 1977, 235, 241, 269, 660, 1523, 420, 678, 213, 544, 940, 983, 605, 2716, 1848, 1848, 182, 1225, 365, 993, 224, 267, 309, 271, 324, 178, 2657, 1772, 546, 456, 2637, 1771, 677, 1409, 653, 2359, 690, 828, 2742, 1812, 2777, 552, 1572, 2742, 2792, 2819, 1753, 265, 1901, 1753, 2716, 2800, 2742, 453, 2742, 586, 1920, 929, 1897, 2742, 1859, 1899, 1106, 1135, 759, 730, 1838, 863, 1929, 2751, 2751, 2751, 2751, 713, 430, 2788, 1784, 966, 2483, 1784, 1786, 2727, 857, 1798, 1815, 730, 390, 593, 1489, 1448, 1784, 1510, 2788, 812, 856, 808, 941, 2797, 2757, 1852, 2757, 2412, 486, 1034, 615, 845, 974, 727, 969, 2916, 1841, 1926, 1926, 533, 446, 733, 696, 1214, 1857, 1907, 2824, 2631, 3556, 2496, 1617, 1000, 707, 936, 761, 960, 1936, 857, 423, 1130, 1165, 2453, 338, 988, 1869, 1951, 1932, 2820, 2742, 628, 447, 866, 637, 932, 2742, 1795, 2881, 695, 762, 2778, 427, 714, 2781, 1865, 1861, 678, 1465, 1770, 845, 356, 817, 385, 1820, 2692, 1787, 1510, 1814, 857, 2616, 204, 465, 1773, 2754, 1793, 1773, 1900, 185, 2706, 1162, 766, 2742, 1816, 2742, 1790, 1803, 1795, 1026, 334, 832, 478, 1849, 2679, 1773, 797, 2649, 1814, 1808, 99, 2037, 2616, 2719, 1813, 2637, 2648, 1813, 865, 1717, 2588, 2711, 2818, 1828, 2553, 2720, 1791, 1780, 2706, 2565, 1717, 1881, 1037, 329, 893, 723, 1821, 2692, 2586, 2729, 1755, 1793, 2670, 2602, 2638, 2684, 1813, 1755, 1755, 2626, 832, 739, 724, 1968, 2598, 2627, 851, 749, 684, 625, 2673, 2778, 1764, 2644, 1800, 1792, 511, 2776, 1890, 1764, 2776, 1040, 1049, 2699, 2061, 897, 1764, 274, 2755, 1912, 2581, 1780, 820, 1803, 2692, 2783, 572, 2751, 2699, 1830, 1875, 633, 1083) Then I tried to use the bootstrap function: > boot (Total, median, 1000) ORDINARY NONPARAMETRIC BOOTSTRAP Call: boot(data = Total, statistic = median, R = 1000) Bootstrap Statistics : original bias std. error t1* 1603 0 0 There were 50 or more warnings (use warnings() to see the first 50) The warning message was: the condition has length > 1 and only the first element will be used Can you please advise me how do I perform bootstrap to generate 95% confidence intervals for the median? I am a beginner in this and your help would be much appreciated. Thank you so much in advance.
Admittedly the boot function from the boot package has a slightly non-intuitive aspect to it. But if you read the documentation (or look at the examples in the documentation) you'll see specific instructions about the statistic argument: In all other cases statistic must take at least two arguments. The first argument passed will always be the original data. The second will be a vector of indices, frequencies or weights which define the bootstrap sample. So instead of: x <- rnorm(10) boot(data = x,statistic = median,R = 1000) You want this: boot(data = x,statistic = function(x,i) median(x[i]),R = 1000) Once you're that far, the function boot.ci() can be used to compute the confidence intervals (only some of them are available in this particular example I believe). b <- boot(data = x,statistic = function(x,i) median(x[i]),R = 1000) boot.ci(b)
Though the answer by #joran is right, since I already had code tested, with the CI computation, here it goes. library(boot) bootMedian <- function(data, indices) median(data[indices]) b <- boot(Total, bootMedian, R = 1000) boot.ci(b)
This is how you would "roll your own" bootrap: # number of bootstrap replicates B <- 10000 # create empty storage container result_vec <- vector(length=B) for(b in 1:B) { # draw a bootstrap sample this_sample <- sample(Total, size=length(Total), replace=TRUE) # calculate your statistic m <- median(this_sample) # save your calucated statistic result_vec[b] <- m } # then probably draw a histogram of your bootstrapped replicates hist(result_vec) # get 95% confidence interval result_vec <- result_vec[order(result_vec)] lower_bound <- result_vec[round(0.025*B)] upper_bound <- result_vec[round(0.0975*B)]
I use the standard normal random generator in this code: B <- i bs.result <- matrix(NA, nrow=i, ncol=...) for (b in 1:i) { sample.n <- rnorm(n, mean-..., sd=...) optim.b <- optim(c(mu=0, sd=1), loglik, control=list(fnscale=-1), z=sample.n) bs.result <- c(optim.b$par, optim.b$converge) } With the last column of the table you can check whether your optimize function had converged.
Unused argument in GA package
I'm trying to use TSP package with GA. I want to do something similar to this My code: library(GA) library(globalOptTests) library(TSP) data("USCA50") fitFun <- function(x) -tour_length(solve_TSP(USCA50)) dist <- as.matrix(USCA50) GA <- ga( type = "permutation", fitness = fitFun, distMatrix = dist, min =1, max = 50 ) The error I get: Error in fitness(Pop[i, ], ...) : unused argument (distMatrix = c(0, 1167, 1579, 437, 3575, 1453, 226, 2976, 1107, 1006, 1046, 891, 1488, 1030, 1803, 190, 1122, 1373, 1860, 523, 1047, 1152, 370, 1453, 1629, 1323, 1032, 654, 1462, 752, 993, 813, 1178, 1705, 816, 1206, 1285, 1641, 1578, 1703, 1343, 1317, 1647, 1157, 1479, 1703, 1166, 1211, 795, 1572, 1167, 0, 413, 1422, 2895, 316, 1172, 3094, 140, 382, 189, 530, 392, 526, 635, 1174, 2056, 286, 692, 910, 207, 211, 1035, 303, 2046, 2164, 1385, 845, 297, 597, 1033, 393, 1766, 546, 386, 1076, 153, 476, 432, 546, 184, 184, 481, 1579, 1686, 543, 20, 2008, 527, 434, 1579, 413, 0, 1832, 2766, 167, 1585, 3265, 508, 677, 547, 842, 229, 775, 229, 1575, 2451, 275, 289, 1277, 582, 514, 1420, 207, 2347, 2544, 1720, 1189, 116, 947, 1350, 800, 2117, 138, 777, 1338, 334, 62, 106, 145, 260, 312, 128, 1911, 1961, 136, 413, 2384, 913, 131, 437, 1422, 1832, 0, 3437, 1732, 272, 2607, 1327, 1355, 1345, 1269, 1787, 1409, 2041, 615, 697, 1670, 2093, 954, 1256, 1345, 807, 1672, 1242, 8 Is there something wrong with my GA package? RStudio doesn't show me this parameter but somehow others are able to run it.