Related
I have two very large dataframes of the following structure(s):
df1:
structure(list(Date = structure(c(18871, 18873, 18883, 18928,
18950, 18961, 18962, 18963, 18967, 18969, 18970, 18971, 18974,
18975, 18976, 18977, 18978, 18981, 18982, 18983, 18984, 18985,
18990, 18991, 18992, 18996, 18997, 18998, 18999, 19002, 19003,
19004, 19005, 19006, 19009, 19010, 19011, 19012, 19013, 19016,
19017, 19018, 19019, 19020, 19023, 19025, 19026, 19027, 19030,
19031, 19033, 19034, 19037, 19038, 19039, 19040, 19041, 19044,
19045, 19046, 19047, 19048, 19051, 19052, 19053, 19054, 19055,
19058, 19059, 19066, 19082, 19104, 19107, 19108, 19109, 19110,
19111, 19115, 19143, 19166, 19167, 19170, 19171, 19172, 19173,
19174, 19177, 19178, 19179, 19180, 19181, 19184, 19185, 19186,
19187, 19188, 19191, 19192, 19193, 19194, 19195, 19198, 19199,
19200, 19201, 19202, 19205, 19206, 19207, 19208, 19209, 19212,
19213, 19214, 19215, 19216, 19219, 19220, 19221, 19222, 19223,
19226, 19227, 19228, 19229, 19230, 19234, 19235, 19236, 19237,
19240, 19241, 19242, 19243, 19244, 19247, 19248, 19249, 19250,
19251, 19255, 19256, 19257, 19258, 19261, 19262, 19263, 19269,
19270, 19271, 19272, 19275, 19276, 19291, 19292, 19293, 19296,
19297, 19298, 19299, 19300, 19303, 19304, 19305, 19306, 19307,
19310, 19312, 19313, 19314, 19317, 19318, 19319, 19320, 19321,
19324, 19325, 19326, 19327, 19328, 19333, 19334, 19335, 19338,
19339, 19340, 19341, 19342, 19345, 19346, 19347, 19348, 19349,
19354, 19355, 19356, 19360, 19361, 19362, 19363, 19366, 19367,
19368, 19369, 19370, 19373, 19374, 19375, 19376, 19377, 19380,
19381, 19382, 19383, 19384, 19387, 19388, 19389, 19390, 19391,
19394, 19395, 19396, 19397, 19398, 19401, 19402, 19403, 19404,
18964, 18968, 19024, 19032, 19103, 19264, 19268, 18884, 18935,
19060, 19061, 19068, 19102, 19165, 19278, 18907, 18919, 18942,
18914, 18872, 18876, 18877, 18878, 18879, 18880, 18885, 18886,
18887, 18890, 18891, 18892, 18893, 18894, 18897, 18898, 18899,
18900, 18901, 18904, 18905, 18906, 18908, 18911, 18912, 18913,
18915, 18918, 18920, 18921, 18922, 18925, 18926, 18927, 18929,
18932, 18933, 18934, 18936, 18939, 18940, 18941, 18943, 18946,
18947, 18948, 18949, 18953, 18954, 18955, 18956, 18957, 18960,
19062, 19065, 19067, 19069, 19072, 19073, 19074, 19075, 19076,
19079, 19080, 19081, 19083, 19086, 19087, 19088, 19089, 19090,
19093, 19094, 19095, 19096, 19101, 19116, 19117, 19118, 19121,
19122, 19123, 19124, 19125, 19128, 19129, 19130, 19131, 19132,
19135, 19136, 19137, 19138, 19139, 19142, 19144, 19149, 19150,
19151, 19152, 19153, 19156, 19157, 19158, 19159, 19160, 19163,
19164, 19265, 19277, 19279, 19282, 19283, 19284, 19285, 19286,
19289, 19290, 19311, 19331, 19332), class = "Date"), Value = c(-5.33417292743301,
-2.52617494564308, -1.49324009324009, -17.0425444455863, -9.35793961841595,
-11.8841517857143, -9.69152125451611, -10.5028364323984, -8.5025565123789,
-7.38025700934579, -7.78238256870689, -8.17978487280178, -7.7131012583794,
-6.41295139213209, -8.91708282295298, -8.71221652160492, -10.0680747922438,
-11.9437278705109, -16.4053740896049, -20.3138418538824, -23.3452022125799,
-28.1797446210017, -28.9915196362348, -22.7089976941569, -53.3396956436279,
-12.4708793300343, -11.1787848605578, -11.2914868901427, -9.00403312503746,
-5.26940458505923, -3.2678042007321, -1.80023400936037, -1.00480716704905,
-2.57038505839299, -6.25052713783678, -4.13951632213265, -4.66916949663517,
-5.80691219642381, -9.01403811889207, -8.76200989722116, -5.14415894039735,
-4.49265067482651, -6.81002817489304, -5.00625312656328, -12.9697844076655,
-16.4525949550594, 1.35574468085106, -11.14101743721, -10.3890230312036,
-9.95399610136452, -10.7999731146659, -10.0551036897388, -8.00489963647858,
-6.98869824910699, -5.52771977448319, -5.8565313387104, -10.6561639051647,
-10.1591881404835, -10.5755809770487, -12.7858263854831, -12.4515269244669,
-9.75300293620429, -10.6174887030093, -10.9297709205513, -8.62649503888442,
-5.67639625979277, -7.76497565794115, -5.11864292912328, -4.15677111515569,
-1.24773160858034, -13.3209387381896, -4.61524571133755, -2.7064391500322,
-2.54590337369225, -4.7534422125529, -5.77741708660495, -9.54039920679418,
-3.49263873159683, -6.37633384146341, -4.8018775807754, -5.92095982827354,
-6.7982259326898, -8.14429721160882, -14.0318602941176, -15.7693341697285,
-6.33099406742874, -6.02467779730522, -6.67451006984472, -5.78277734678045,
-6.76536805011747, -5.30100480559197, -5.48448933319592, -3.07906668141104,
-1.19141872046993, -0.986825656313108, -2.78210279591495, -1.85680436798187,
1.15368964707724, 0.592389680247037, 2.09444444444445, 1.47565073474096,
-3.77455441789913, -4.99841919285848, -4.54737286347331, -6.52210666135604,
-12.4586229788041, -9.76655410805627, -10.1672813163265, -10.1071078030427,
12.1093036008042, -6.27654856354408, -11.7451651977779, -9.69657142857144,
-14.7645443406988, -13.8044752609797, -12.8212930202637, -10.7131114789657,
-9.7502835651603, -9.12285518188058, -7.82418739307804, -7.37290867229471,
-5.42488374865864, -7.16548962504551, -7.22640747577296, -5.67880989576978,
-2.38449197860963, -4.98403249527278, -8.01237481293888, -8.92721143345521,
-11.0038498048729, -7.91574019894676, -7.24256979885921, -13.0468491640639,
-12.4842416971359, -13.1832300362112, -9.69594285190952, -11.8236710963455,
-15.4970313957103, -16.001590401224, -23.0116940912636, -20.0316726652775,
-15.7387646961417, 14.3248459700742, -33.9856294639016, -29.5890893667004,
-24.5815039111784, 0.351626092151443, -12.4957158872518, -13.5265822044065,
-15.8057934508816, -18.5712850985479, -19.0062351207346, -17.8759978712081,
-30.248495829345, -33.496764540864, -32.9090642540002, -35.9095838866755,
-39.0622059592264, -37.4053056372396, 30.4444623180246, -17.2843857072932,
-10.2894505770196, -9.46059792738388, -10.9101340816963, -10.6768441621886,
-15.8564689156004, -25.0377375363291, -23.491809908999, -16.0848675710594,
-9.52203025543524, -9.57329945269742, -8.26118487113689, -8.77146105741898,
-7.52932569974554, -7.51696981061316, -6.34114162627894, -4.169791026429,
-8.65363440517035, -7.07429566797939, -6.04901960784313, -3.95286523637039,
-4.57831931564948, -4.70401930472815, -4.58048473762432, -4.19802716367601,
-4.8712225795747, 33.7199676963457, -8.38811297695784, -8.41675133350266,
-7.95239554602104, -7.61540762007296, -7.36910990686483, -5.19474884165624,
-6.40677640427848, -6.6880699933269, -2.50748947332091, -5.01177083333332,
-4.96342420082369, -3.74379953975965, -4.39864864864864, -3.96323864499468,
-3.64570892339191, -4.00421168284196, -4.32506635700066, -3.84347776249426,
-3.98014059753953, -4.93978444946085, -2.00666607412675, -4.40675836944916,
-4.67217558943196, -4.69624595469255, -4.93134138588684, -4.59980852082335,
-3.79959344732751, -3.56417422281594, -3.72577039757304, -6.8438477254722,
-6.90215521144616, 17.0278089071515, -8.69234350531171, -8.31160081053697,
-6.48055612912106, -2.82488289917003, -1.97938846776255, -2.76711193952573,
-3.57596324527513, -2.66680277664353, -2.76065192083818, -3.07607539874335,
-9.19352066115703, -7.40534903692798, -20.9290200655884, -9.4949337142347,
-1.67593031748771, -4.00032731786333, -12.9233908365795, -3.12797485406376,
-4.58038461538462, -4.92624181954726, -1.64177861663151, 14.5233533882204,
-3.43662848605578, -7.00163549013596, -22.4328399502006, -5.68106442090641,
-10.4667593755607, -8.32068944589428, -7.5574384739845, -1.18996298205332,
0.685336364575798, 0.683983218163869, -0.190581124472707, 0.40016858076072,
-0.685425265972909, -3.23485727546995, -2.17232406175909, -4.03732694666745,
-3.43151815181518, -4.09670641680863, -6.31686046511628, -8.351512347464,
-8.85095265741279, -7.33867558133818, -8.51522800856061, -8.98040313549832,
-21.688225290116, -7.97696621402419, -7.54824182322704, -6.15927894799952,
-4.38810945273632, -4.66345711216818, -6.63357687936353, -6.46356517733763,
-7.69505643096672, -8.85590341722938, -9.65988090292203, -7.41890982503365,
-8.4019243496411, -9.20245522237875, -11.7157621602604, -15.8009129904372,
-16.7188609056313, -29.5936953565184, -16.1899352493468, -14.6818673311081,
-9.23683475995534, -7.2676923950379, -5.58227412415489, -6.1622267560478,
-7.530241504595, -8.17124428752656, -6.35655475028349, -8.35745036021789,
-8.91222072419106, -8.32131192161132, -8.17337013669821, -10.877467450651,
-9.77258672063867, -11.2631774313289, -10.1360580092287, -7.70075269546349,
-3.16698645907571, -2.38109087441594, -0.022514417531718, -1.08499335989376,
-3.3854513350099, -13.1874976421323, -12.0737487121089, -6.16346335921227,
-6.0325834936609, -8.26060968320382, -6.76706578585191, -9.52737443465841,
-3.80397824702367, -4.72200198216056, -5.16157624343703, -7.23900280010839,
-6.18115990990991, -2.1293152465275, -3.7406576456566, -3.66946114241044,
-2.04129052515896, -0.222846441947564, -0.318394134689292, -2.16259885464958,
14.1518376302798, -1.73382942186606, 0.100049115913557, -0.378155479059093,
-0.588078533746669, -0.121356930514267, -2.06994937873907, -2.84085812981032,
-4.51622825625796, -3.44699324686462, -6.08916034821679, -10.3705139175517,
-7.18844523191799, -7.9430781129157, -5.99308878256247, -7.00972211589987,
-3.268970347887, -0.549423136888057, -6.08099664736361, -2.04115579182029,
-0.832810782197972, -0.916704322940202, -0.2527559897864, -1.11381138113811,
-1.81908592321755, -2.61384042630849, -2.35869084475896, 11.992052157715,
-2.80402835408022, -1.61877788005381, -2.68177518524358, -11.6975791772843,
-20.9404339865207, -27.5328187051002, -29.4899791627202, -28.9357382364862,
-30.3360181743081, -30.4065839909808, -32.100806252164, -34.963228894691,
-36.3754188653083, -24.3521969489358, -7.1991154066672, -4.39962753162101
)), row.names = c(NA, -369L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x00000214068b1ef0>)
df2:
structure(list(Date = structure(c(19402, 19396, 19395,
19389, 19376, 19374, 19367, 19362, 19346, 19339, 19333, 19332,
19332, 19327, 19319, 19311, 19311, 19306, 19305, 19291, 19290,
19284, 19283, 19277, 19276, 19270, 19269, 19262, 19242, 19241,
19236, 19227, 19220, 19213, 19206, 19200, 19194, 19192, 19185,
19178, 19171, 19151, 19150, 19136, 19129, 19123, 19122, 19102,
19094, 19087, 19066, 19059, 19053, 19038, 19018, 19010, 19003,
18968, 18962, 18947, 18941, 18934, 18933, 18933, 18926, 18920,
18913, 18912, 18906, 18905, 18905, 18898, 18892, 18885, 18884,
18878, 18877, 18877, 18871, 18863, 18856, 18850, 18849, 18842,
18835, 18828, 18815, 18814, 18814, 18809, 18801, 18794, 18793,
18793, 18787, 18786, 18786, 18780, 18780, 18766, 18765, 18765,
18758, 18758, 18752, 18752, 18745, 18738, 18737, 18731, 18730,
18725, 18724, 18724, 18710, 18709, 18703, 18702, 18702, 18696,
18695, 18690, 18688, 18688, 18681, 18675, 18674, 18674, 18668,
18661, 18660, 18660, 18654, 18653, 18653, 18648, 18640, 18639,
18639, 18633, 18605, 18604, 18604, 18598, 18598, 18597, 18597,
18590, 18590, 18584, 18583, 18583, 18578, 18578, 18577, 18570,
18569, 18569, 18563, 18562, 18562, 18557, 18557, 18555, 18549,
18548, 18548, 18542, 18542, 18541, 18541, 18536, 18536, 18534,
18529, 18529, 18521, 18520, 18520, 18515, 18515, 18508, 18508,
18507, 18500, 18499, 18499, 18494, 18493, 18492, 18492, 18486,
18485, 18485, 18479, 18479, 18478, 18478, 18472, 18472, 18471,
18471, 18465, 18465, 18464, 18464, 18458, 18458, 18457, 18457,
18452, 18450, 18450, 18445, 18445, 18444, 18444, 18438, 18437,
18437, 18436, 18436, 18430, 18430, 18429, 18429, 18424, 18424,
18423, 18416, 18416, 18415, 18415, 18410, 18410, 18409, 18409,
18403, 18403, 18402, 18396, 18396, 18395, 18388, 18388, 18387,
18387, 18381, 18381, 18380, 18380, 18374, 18374, 18373, 18373,
18368, 18368, 18367, 18367, 18360, 18360, 18359, 18359, 18354,
18340, 18338, 18331, 18326, 18325, 18317, 18312, 18296, 18289,
18282, 18275, 18270, 18268, 18247, 18241, 18235, 18233, 18226,
18214, 18205, 18198, 18191, 18184, 18177, 18170, 18163, 18144,
18142, 18128, 18121, 18114, 18100, 18093, 18079, 18072, 18065,
18059, 18051, 18039, 18025, 18002, 17995, 17988, 17981, 17969,
17961, 17953, 17948, 17941, 17918, 17913, 17904, 17877, 17871,
17869, 17855, 17850, 17841, 17827, 17808, 17799, 17794, 17785,
17780, 17764, 17751, 17736, 17731, 17715, 17708, 17702, 17688,
17675, 17660, 17654, 17645, 17640, 17631, 17625, 17617, 17605,
17596, 17591, 17584, 17577, 17554, 17549, 17542, 17540, 17513,
17507, 17505, 17491, 17486, 17479, 17463, 17458, 17449, 17444,
17436, 17428, 17422, 17401, 17386, 17379, 17372, 17366, 17353,
17344, 17339, 17323, 17318, 17309, 17304, 17290, 17282, 17276,
17268, 17260, 17253, 17247, 17239, 17234, 17227, 17220, 17212,
17206, 17204, 17197, 17184, 17178, 17176, 17171), class = "Date"),
Amount = c(3000, 2000, 3500, 3000, 1195.925, 4440.75, 3702.5,
3500, 3619.25, 3749.999, 744.65, 4062.498, 2812.5, 2812.499,
3559.5, 3250, 2250, 4374.998, 2750, 3500, 898.85, 4062.5,
3125, 4365.25, 1106.299, 3749.999, 2500, 1200, 3437.5, 4183.157,
2499.999, 750, 2803.5, 1760, 2771.875, 700, 4366.249, 2500,
3437.5, 2378.75, 1011.975, 3125, 3075, 847.7, 2187.499, 2500,
2250, 2499.998, 3124.998, 3250, 1239.4, 1874.999, 3250, 2250,
600, 3124.998, 3000, 1875, 2250, 2170.375, 900, 2500, 3000,
1533.75, 2750, 2812.5, 567.4, 1562.5, 3125, 3000, 2288.125,
2000, 350, 2857.916, 3000, 1010.725, 3250, 1250, 2859, 3000,
2274.25, 705, 3437.499, 2000, 3000, 1562.5, 701.05, 3437.499,
1866.25, 4232.497, 500, 3118.75, 3055.281, 2000, 1172.438,
3742.497, 1562.5, 3437.499, 2500, 3093.749, 4062.495, 2812.499,
3283.75, 1500, 2750, 2000, 1113.65, 3124.999, 4062.498, 713.825,
1000, 3437.5, 3394, 2000, 357.25, 2250, 2500, 4375, 1633,
878.975, 2812.5, 3437.5, 3147.351, 1562.499, 2499.998, 2500,
3250, 1250, 2000, 3143, 3000, 1071.25, 1036.3, 2936.25, 1750,
3250, 375, 3000, 1450.624, 3749.999, 3011.102, 4029.372,
2500, 1250, 2750, 3669.355, 2250, 3002.75, 1562.499, 2500,
4062.499, 2500, 3749.999, 1562.499, 861.4, 3000, 3405.625,
2499.999, 3084.999, 3510.742, 1249.999, 2632, 1986.875, 697.049,
3125, 3000, 1562.499, 2500, 884.149, 3518.452, 2000, 3250,
2000, 3437.499, 3749.999, 1310.749, 3124.999, 2867.5, 2187.5,
3250, 2421.749, 3412.749, 2500, 459.05, 2750, 3000, 1342.375,
1374.999, 3437.5, 4062.497, 2477.25, 3437.499, 3250, 1250,
2750, 562.25, 4062.499, 2812.499, 3500, 3008.875, 3437.499,
1874.999, 3000, 500, 3749.999, 2711, 3250, 1500, 3834.095,
3750, 1062.85, 3750, 2812.5, 4261.195, 2329.25, 3000, 2785.999,
1229.576, 3250, 1768.25, 3250, 3628.75, 4260.749, 2812.5,
3716.25, 2000, 3250, 4036.624, 900, 3941.249, 1500, 3328.75,
3749.999, 2927.5, 2357.75, 3750, 1785.499, 1562.499, 2500,
3869.624, 3250, 2250, 745.85, 3897.958, 1750, 4062.499, 3000,
3616.375, 2152.5, 1499.999, 3749.999, 3671.476, 3141, 4062.499,
1874.999, 3648.75, 2499.998, 3676.249, 2313.749, 2750, 2092.5,
4062.5, 1562.499, 2299.997, 3250, 2299.999, 2587.488, 1244.238,
3500, 3441.975, 3269.875, 800, 2750, 2545.872, 3250, 1056.037,
3162.497, 3162.496, 500, 2082.45, 3162.498, 3449.922, 2356.675,
2820.25, 1100, 3449.997, 3162.499, 919.998, 2250, 574.995,
2750, 3000, 1264.997, 2299.997, 3000, 3162.497, 2250, 3373.097,
2250, 3162.499, 700, 3000, 1437.496, 3000, 2250, 3135.1,
3449.998, 325, 1724.998, 3414.749, 1254.571, 1688.5, 2587.497,
2012.498, 2799.403, 2563.87, 500, 2012.499, 2500, 555.065,
2299.997, 2250, 1100, 3000, 872.72, 2750, 2500, 3000, 1100,
2500, 2750, 2299.997, 2874.995, 2587.495, 1381.4, 2750, 1146.874,
2810.081, 3427.034, 750, 2500, 2185.16, 2750, 675.24, 2864.731,
2250, 2815.66, 950, 2562.68, 1000, 2500, 2250, 2587.497,
800, 2276.5, 2750, 1000, 2722.32, 2750, 650, 2599.24, 2500,
3162.5, 950, 2500, 2500, 3158.628, 1000, 2250, 2500, 3162.498,
2867.806, 1000, 2587.498, 2874.999, 2500, 882.3, 2962.15,
2293.788, 857.793, 3162.497, 1724.999, 2867.739, 2874.999,
2299.997, 2557.917, 833.737, 2500, 2299.997, 1250, 2624.29,
3004.215, 2444.046, 2881.949, 2250, 837.798, 2329.914)), row.names = c(NA,
397L), class = "data.frame")
I'm trying to work out the percentage impact the events listed in the second dataframe (df2) have on the values from the previous day entry listed in the first dataframe (df1). Does anyone know how I can achieve this? APologies if unclear, English is not my first language.
I am trying to write R code to summarize count and median amounts of a date variable, and a dollar amount variable in my data frame. The summarize part is not the issue, my issue is in trying to group_by time periods to then summarize. I understand the distinction between a time period and a time duration. I am interested in time periods, in this particular case I want to summarize in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time, as in 2022-02-6 to 2021-02-06 to 2020-02-06 and so on. The data frame goes back about 30 months in total, so there is two full years of data to summarize.
The code I wrote here groups by the calendar year, and not what I need;
> sum_closed_date_yr <- scrubbed_data01 %>%
+ group_by(time_period = year(close_date)) %>%
+ summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
> sum_closed_date_yr
# A tibble: 5 × 3
time_period close_count med_close_price
<dbl> <int> <dbl>
1 2019 31 570000
2 2020 80 661250
3 2021 104 930750
4 2022 9 1010000
5 NA 8 0
I am very new to coding in R, I am a real estate appraiser not a statwhizzician. I have taken 23 DataCamp tutorials in R, so I have a newbie working knowledge of R. I have searched through multiple SO posts on summarizing by date, but can not find what I am specifically looking for. Any help would be greatly appreciated, thank you - Joe
Second try, I selected just the two of many variables
scrubbed_data01 %>%
+ select(close_date, close_price) %>%
+ dput()
structure(list(close_date = structure(c(NA, NA, NA, 19039, 19038,
19034, 19024, 19020, 19016, 19013, 18999, 18989, 18976, 18969,
18969, 18968, 18955, 18955, 18954, 18953, 18953, 18949, 18948,
18943, 18940, 18936, 18934, 18933, 18929, 18922, 18921, 18921,
18921, 18918, 18915, 18912, 18908, 18908, 18907, 18906, 18905,
18900, 18900, 18899, 18897, 18897, 18897, 18891, 18891, 18890,
18887, 18880, 18879, 18878, 18878, 18873, 18873, 18873, 18869,
18866, 18866, 18851, 18850, 18844, 18836, 18836, 18831, 18830,
18822, 18821, 18821, 18815, 18810, 18806, 18802, 18796, 18795,
18789, 18786, 18782, 18781, 18781, 18780, 18779, 18775, 18775,
18774, 18761, 18761, 18753, 18752, 18747, 18746, 18746, 18740,
18739, 18737, 18729, 18718, 18715, 18705, 18704, 18701, 18695,
18689, 18683, 18677, 18655, 18652, 18648, 18646, 18640, 18634,
18633, 18631, 18619, 18613, 18611, 18590, 18585, 18579, 18576,
18569, 18569, 18569, 18563, 18558, 18557, 18557, 18556, 18554,
18549, 18544, 18540, 18540, 18533, 18519, 18519, 18519, 18514,
18514, 18513, 18507, 18502, 18502, 18501, 18501, 18499, 18495,
18492, 18492, 18491, 18488, 18484, 18472, 18466, 18464, 18459,
18459, 18453, 18451, 18450, 18445, 18443, 18442, 18423, 18422,
18411, 18401, 18400, 18397, 18397, 18397, 18396, 18387, 18386,
18366, 18361, 18360, 18340, 18338, 18331, 18317, 18313, 18302,
18297, 18289, 18283, 18283, 18277, 18274, 18271, 18271, 18269,
18263, 18261, 18261, 18261, 18260, 18250, 18247, 18239, 18208,
18200, 18199, 18197, 18194, 18190, 18185, 18185, 18180, 18179,
18177, 18177, 18176, 18170, 18169, 18156, 18155, 18152, 18151,
18142, 18142, 18138, 18137, 18136, NA, NA, NA, NA, NA, 19044), class = "Date"),
close_price = c(0, 0, 0, 1150001, 940000, 1253000, 979000,
881000, 1010000, 1060000, 1100000, 1070000, 1025000, 755000,
740000, 930000, 1250000, 990000, 930000, 931500, 975000,
950000, 850000, 865000, 921000, 790000, 778000, 935000, 1270000,
970000, 1061500, 960000, 1015000, 1100000, 1082000, 880000,
1000000, 1140000, 950000, 852000, 1045000, 795000, 950000,
950000, 880000, 850000, 945000, 949500, 1220000, 1015000,
899000, 1100000, 805000, 868000, 1102000, 1015000, 923000,
810000, 890000, 826000, 1140000, 970000, 830000, 790000,
1151000, 835500, 1080000, 870000, 1049000, 985000, 962000,
926000, 1008888, 950000, 810000, 760000, 955000, 930000,
985000, 1210000, 878000, 950000, 855000, 930000, 960000,
1180000, 980000, 960000, 898000, 1100000, 1215000, 885000,
985000, 880000, 1100000, 810000, 1210000, 810000, 970700,
1010000, 800000, 850000, 849000, 770000, 925000, 930000,
875000, 755000, 675000, 875500, 715000, 837000, 747000, 805000,
785000, 801200, 900000, 800000, 610000, 720000, 730000, 700000,
695000, 720000, 750000, 860000, 915000, 787000, 785000, 710000,
735000, 620000, 788000, 780000, 780000, 645000, 700000, 686000,
686000, 745000, 745000, 605000, 730000, 625000, 625000, 685000,
731000, 715000, 695000, 710000, 700000, 575000, 561000, 590000,
595000, 720500, 670000, 711000, 645000, 595000, 700000, 545000,
695000, 531000, 581000, 518000, 645000, 562500, 530000, 640000,
643000, 680000, 700000, 540000, 630000, 658000, 675000, 525000,
600000, 664500, 590000, 569595, 620500, 555000, 585000, 630000,
639900, 515000, 475000, 670000, 610000, 524888, 550000, 520000,
650000, 500000, 500000, 540000, 608000, 575000, 570000, 639900,
645000, 648500, 635000, 530000, 655000, 520000, 555000, 542500,
515000, 620000, 580140, 535000, 638888, 540000, 590000, 535000,
497500, 505000, 675000, 545000, 640000, 555000, 630000, 590000,
0, 0, 0, 0, 0, 985000)), row.names = c(NA, 232L), class = "data.frame")
>
You can create a new variable to assign the year to your desired period:
library(dplyr)
df %>% mutate(period_year = case_when(close_date < "2019-02-06" ~ 2018,
close_date >= "2019-02-06" & close_date < "2020-02-06" ~ 2019,
close_date >= "2020-02-06" & close_date < "2021-02-06" ~ 2020,
close_date >= "2021-02-06" & close_date < "2022-02-06" ~ 2021,
close_date >= "2022-02-06" & close_date < "2023-02-06" ~ 2022)) %>%
group_by(time_period = period_year) %>%
summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
I have two data frames which I want to process with a for loop. Their structures are the following:
> m_ivae
structure(list(fecha = structure(c(17805, 17836, 17866, 17897,
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 18170,
18201, 18231, 18262, 18293, 18322, 18353, 18383, 18414, 18444,
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,
18748, 18779, 18809), class = "Date"), IVAE = c(109.19, 110.09,
111.34, 111.84, 112.49, 111.99, 113.11, 111.89, 112.11, 112.75,
113.7, 112.93, 112.43, 114.88, 114.5, 114.93, 115.13, 105.54,
91.71, 87.93, 93.06, 96.74, 103.26, 106.76, 109.6, 110.74, 112,
112.73, 114.97, 115.01, 114.67, 115.78, 114.52, 111.91), `Agricultura, Ganadería, Silvicultura y Pesca` = c(99.58,
98.71, 103.44, 101.83, 101.31, 98.87, 99.06, 99.46, 96.55, 100.47,
98.79, 98.91, 100.17, 101.98, 100.48, 99.64, 96.04, 92.42, 97.21,
96.11, 100.57, 94.82, 99.07, 103.63, 97.34, 97.17, 95.46, 98.46,
101.02, 100.24, 100.6, 99.95, 103.07, 98.23), `Índice de Producción Industrial (IPI): Industrias Manufactureras, Explotación de Minas y Canteras y Otras Actividades Industriales` = c(101.4,
103.4, 105.07, 106.72, 108.45, 107.76, 107.25, 105.75, 107.03,
107.31, 106.61, 106.95, 106.61, 110.18, 108.68, 109.66, 111.32,
100.02, 76.77, 73.46, 81.99, 94.83, 100.64, 104.51, 106.74, 107.04,
108.75, 110.8, 110.59, 111.25, 108.82, 110.03, 111.32, 107.61
), Construcción = c(112.25, 117.5, 121.37, 124.32, 122.64, 121.21,
128.69, 122.28, 126.55, 120.13, 137.47, 129.82, 126.83, 132.92,
131.72, 137.56, 130.89, 117.08, 87.62, 67.49, 79.56, 88.97, 117.57,
110.01, 118.02, 117.61, 121.64, 120.76, 120.99, 118.96, 122.7,
122.59, 101.2, 106.3), `Comercio, Transporte y Almacenamiento, Actividades de Alojamiento y de Servicio de Comidas` = c(112.2,
113.03, 113.03, 115.69, 113.74, 114.7, 115.93, 115.3, 114.25,
115.05, 116.68, 114.84, 114.56, 116.58, 117.77, 119.19, 119.15,
103.41, 76.66, 75.21, 90.32, 91.72, 97.53, 105.21, 110.43, 109.72,
112.41, 114.05, 115.88, 117.29, 115.05, 114.69, 116.79, 109.68
), `Información y Comunicaciones` = c(115.49, 116.57, 116.18,
114.29, 113.92, 113.82, 116.45, 115.96, 114.81, 115.72, 116.07,
115.42, 115.32, 115.59, 114.22, 114.21, 113.05, 112.42, 111.52,
108.77, 113.92, 114.07, 115.02, 115.79, 117.78, 117.02, 119.21,
119.56, 125.27, 123.15, 118.56, 119.68, 120.02, 127.68), `Actividades Financieras y de Seguros` = c(117.96,
122.17, 120.93, 119.53, 121.15, 122.17, 125.01, 121.22, 127.48,
124.1, 124.56, 126.86, 124.59, 129.96, 131.74, 131.56, 138.4,
134.4, 131.6, 127.16, 124.61, 116.65, 120.28, 119.57, 127.23,
138.75, 141.25, 138.8, 138.79, 141.28, 141.62, 143.53, 137.62,
139.72), `Actividades Inmobiliarias` = c(113.31, 113.83, 114.41,
114.69, 114.97, 115.98, 116.2, 116.22, 115.64, 115.79, 115.95,
116.24, 117.6, 117.84, 115.35, 108.98, 105.89, 103.74, 103.16,
102.5, 102.42, 102.41, 104.16, 107.74, 112.87, 116.57, 115.68,
113.47, 112.41, 112.08, 112.42, 112.74, 113.21, 112.56), `Actividades Profesionales, Científicas, Técnicas, Administrativas, de Apoyo y Otros Servicios` = c(111.84,
111.92, 114.11, 116.44, 117.77, 112.96, 114.64, 113.67, 112.33,
115.12, 113.31, 114.14, 115.46, 117.17, 120.57, 124.26, 122.68,
99.51, 86.36, 79.21, 81.56, 83.6, 88.71, 97.76, 98.16, 101.04,
102.68, 108.37, 113.64, 114.82, 115.91, 118.35, 118.74, 109.14
), `Actividades de Administración Pública y Defensa, Enseñanza, Salud y Asistencia Social` = c(110.04,
108.07, 109.24, 105.85, 108.99, 109.12, 109.6, 109.31, 108.63,
111.22, 111.25, 109.67, 107.59, 108.8, 106.9, 105.82, 108.24,
107.71, 106.75, 104.67, 98.47, 102.09, 108.94, 109.34, 110.3,
110.01, 109.3, 107.24, 113.46, 111.17, 113.44, 116.42, 112.98,
114.37)), row.names = c(NA, -34L), class = c("tbl_df", "tbl",
"data.frame"))
> m_ipc
structure(list(fecha = structure(c(17805, 17836, 17866, 17897,
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 18170,
18201, 18231, 18262, 18293, 18322, 18353, 18383, 18414, 18444,
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,
18748, 18779, 18809, 18840, 18871), class = "Date"), `Índice General` = c(113.02,
112.82, 112.3, 112.24, 112.44, 112.69, 112.87, 113.01, 112.85,
112.56, 112.16, 111.99, 112.04, 112.17, 112.29, 112.15, 112,
112.09, 111.69, 111.94, 112.59, 112.49, 111.82, 111.56, 111.81,
111.98, 112.2, 112.49, 113.19, 114.08, 114.81, 114.84, 115.51,
116.36, 116.63, 117.1), `Alimentos y Bebidas no Alcohólicas` = c(120.22,
120.56, 120.44, 120.81, 121.12, 121.39, 121.71, 122.29, 122.61,
121.82, 120.79, 120.64, 121.08, 121.48, 121.88, 122.35, 122.22,
122.68, 124.24, 125.06, 126.14, 125.84, 123.33, 122.36, 121.89,
122.24, 122.33, 122.5, 123.12, 124.09, 124.19, 123.97, 124.85,
125.76, 125.93, 127.18), `Bebidas Alcohólicas, Tabaco` = c(146,
145.59, 145.84, 147.3, 146.86, 146.84, 147.11, 147.74, 148.21,
149.24, 150.04, 150.05, 150.11, 149.9, 150.54, 151.89, 151.81,
152.29, 152.01, 153.09, 152.72, 154.65, 154.56, 152.64, 153.4,
153.59, 153.87, 154.49, 155.21, 155.63, 155.4, 155.2, 156.36,
156.2, 156, 157.11), `Prendas de Vestir y Calzado` = c(92.82,
92.77, 92.74, 92.76, 92.93, 92.89, 92.9, 92.69, 92.57, 92.42,
92.13, 91.42, 91.44, 91.17, 91.03, 91.09, 91.43, 91.88, 91.84,
91.84, 91.84, 91.84, 91.84, 92.05, 92.55, 92.6, 92.75, 93, 93.5,
93.84, 93.98, 94.35, 94.5, 94.71, 94.86, 94.85), `Alojamiento, Agua, Electricidad, Gas y otros Combustibles` = c(140.49,
139.57, 138.12, 137.52, 137.35, 137.51, 136.16, 135.75, 135.34,
134.77, 134.82, 134.79, 133.85, 134.04, 134.93, 132.51, 131.61,
131.68, 131.02, 131.03, 131.83, 129.07, 128.61, 129, 131.34,
131.41, 131.97, 132.01, 134.25, 135.03, 137.66, 136.74, 136.96,
140.04, 141.58, 141.93), `Muebles, Artículos para el Hogar y para la Conservación Ordinaria del Hogar` = c(100.24,
100.36, 100.14, 100.29, 100.52, 100.16, 100.25, 100.3, 99.86,
99.73, 99.64, 99.63, 99.48, 99.16, 98.94, 99.16, 99.54, 99.98,
100.08, 100.13, 100.02, 99.83, 100.23, 100.39, 100.07, 100.17,
100.92, 101, 101.98, 102.74, 103.46, 103.81, 104.38, 105.06,
105.3, 106.45), Salud = c(99.37, 99.28, 99.29, 99.29, 99.27,
99.27, 99.34, 99.44, 99.54, 99.6, 99.77, 100.06, 100.07, 100.14,
100.12, 100.17, 100.01, 99.98, 99.96, 100.19, 100.22, 100.9,
100.97, 101.13, 101.24, 101.9, 101.88, 102.04, 102.93, 103.14,
103.37, 103.83, 104.14, 104.19, 104.45, 104.53), Transporte = c(112.15,
110.75, 108.27, 106.83, 107.41, 108.94, 111.01, 111.41, 110.51,
110.51, 109.34, 108.64, 109.05, 109.47, 108.79, 108.56, 107.88,
106.73, 100.48, 100.6, 102.77, 104.29, 103.76, 103.45, 103.59,
103.53, 103.64, 105.12, 105.76, 109.23, 111.09, 111.72, 112.93,
113.5, 112.71, 112.13), Comunicaciones = c(84.77, 84.69, 84.69,
84.64, 84.32, 84.32, 84.32, 84.31, 84.1, 83.78, 83.78, 83.78,
83.89, 83.89, 83.7, 83.2, 83.16, 83.16, 83.2, 83.17, 83.17, 82.99,
82.99, 83.03, 83.19, 83.19, 83.17, 83.12, 83.12, 83.12, 83.12,
83.11, 83.11, 83.09, 83.09, 83.09), `Recreación y Cultura` = c(87.35,
87.37, 87.4, 87.77, 88.71, 88.48, 88.72, 88.75, 88.08, 88.14,
88.18, 87.97, 87.81, 87.72, 87.58, 87.63, 87.89, 87.74, 87.67,
87.6, 87.65, 87.81, 88.29, 87.68, 88.02, 88.08, 88.14, 88.06,
87.86, 88.11, 88.51, 88.77, 89.12, 89.11, 88.98, 89.14), Educación = c(112.83,
112.83, 112.83, 113.27, 113.27, 113.27, 113.27, 113.27, 113.27,
113.27, 113.27, 113.27, 113.65, 113.65, 113.65, 114.06, 114.06,
114.06, 114.06, 114.06, 114.06, 114.06, 114.06, 114.06, 114.26,
114.26, 114.26, 114.26, 114.26, 114.26, 114.26, 114.26, 114.26,
114.26, 114.26, 114.26), `Restaurantes y Hoteles` = c(122.94,
122.7, 122.81, 123.41, 123.37, 123.54, 123.49, 123.57, 123.55,
123.63, 123.59, 123.5, 123.58, 123.54, 123.93, 124.32, 124.44,
124.44, 124.5, 124.61, 124.7, 125.04, 125.34, 125.52, 125.52,
125.8, 126.01, 126.36, 126.65, 126.97, 127.49, 127.95, 129.19,
129.73, 130.46, 131.3), `Bienes y Servicios Diversos` = c(107.55,
107.75, 107.6, 107.39, 107.4, 107.55, 107.36, 107.13, 107.22,
107.26, 107.4, 107.48, 107.42, 107.4, 107.3, 107.37, 107.55,
108.21, 108.38, 108.39, 108.46, 109.45, 109.67, 109.42, 109.65,
109.65, 109.99, 110.25, 110.37, 110.19, 110.34, 110.36, 111.16,
111.8, 112.28, 112.23)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
And I am using the following code:
library(janitor)
wide_dataframes = list(m_ivae,m_ipc)
names(wide_dataframes) = c('m_ivae','m_ipc')
for (nm in names(wide_dataframes)){
df = get(nm)
df = clean_names(df)
df[paste0("lag", 1:3)] = lapply(1:3, lag, x=df[,2:ncol(df)])
df[,2:ncol(df)] = apply(df[,2:ncol(df)],2,function(x) as.numeric(as.character(x)))
assign(nm, df)
}
However, after I run the for loop, I get the following error message:
Error: Can't recycle `apply(df[, 2:ncol(df)], 2, function(x) as.numeric(as.character(x)))` (size 40) to size 13.
I tried to fix it by removing the column specifications in the fifth line of the for loop, like this:
for (nm in names(wide_dataframes)){
df = get(nm)
df = clean_names(df)
df[paste0("lag", 1:3)] = lapply(1:3, lag, x=df[,2:ncol(df)])
df = apply(df[,2:ncol(df)],2,function(x) as.numeric(as.character(x)))
assign(nm, df)
}
This solves the error, but removes the first column, which I need to keep in order to perform a left join with a different data frame later on.
The issue seems to be assigning the column names df[paste0("lag", 1:3)] i.e. when we do the lag on the whole data or a part of it df[,2:ncol(df)], the assignment to the lhs of = is not of the same length i.e. it is just of length 3 compared to the original ncol(df)-1. As we are using a for loop, the inner lag can also be in a for loop
for (nm in names(wide_dataframes)){
df <- get(nm)
df <- clean_names(df)
nm1 <- names(df)[2:ncol(df)] # get the names of the columns to be lagged
for(i in 1:3) {
nm2 <- paste0(nm1, "lag", i)
df[nm2] <- lag(df[, nm1], n = i)
}
df[,2:ncol(df)] <- lapply(df[,2:ncol(df)],
function(x) as.numeric(as.character(x)))
assign(nm, df)
}
-checking
> ncol(m_ivae)
[1] 41
> ncol(m_ipc)
[1] 53
compare with original number of columns
> sapply(wide_dataframes, ncol)
m_ivae m_ipc
11 14
I create the plotly chart below which normally is in a shiny app so the selection of y variables is dynamic. In this case I have Bob and Anna. I want to modify the legend. The colors order is correct as I want the deeper blue to be on top but I want to stabilize the order that the names are displayed, probably alphabetically so Anna should always be displayed first with the deeper blue color in the legend. Remember that the selection is dynamic in a shiny app.
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
Bob<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Anna<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)*50
re<-data.frame(Week,Bob,Anna)
re<-re %>% group_by(month_year = format(Week, '%Y-%b')) %>% summarise(across(c(Bob,Anna), sum, na.rm =TRUE))
colnames(re)[1]<-"Week"
ay <- list(
overlaying = "y",
side = "right",
title = "Second",
)
tempNames <- c("Bob", "Anna")
tempNamesV2 <- tempNames[order(tempNames)]
# plotlyObjList <-
p <- plot_ly(re)
for(i in seq_along(tempNamesV2)){
if(i == 1){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#3E5B84"), yaxis = "y", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNamesV2[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
} else if (i == 2){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#6BBABF"), yaxis = "y2", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNamesV2[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
}
}
p <- p %>% layout(yaxis2 = ay,
xaxis = list(title = "Date"),
yaxis = list(title = "i"),
margin = list(l=50,b = 100, t=50),
barmode = 'group',
legend=list(x = 1.05, y = 1,title=list(text='<b> Country </b>')))
p
You can use a loop to load the object after sorting it. I prefer using order() so I can extract the sort position for other uses, but a simple sort() is good. The legend for plotly is based on when you add_trace/add_bars to the plotly, the earlier one gets to the top position.
Since you only used 2 y's, I assume the user can only select up to 2 "Names" at a time, and you are trying to make sure the Countries are sorted:
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
Bob<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Anna<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)*50
re<-data.frame(Week,Bob,Anna)
re<-re %>% group_by(month_year = format(Week, '%Y-%b')) %>% summarise(across(c(Bob,Anna), sum, na.rm =TRUE))
colnames(re)[1]<-"Week"
ay <- list(
overlaying = "y",
side = "right",
title = "Second"
)
tempNames <- c("Bob", "Anna")
tempNamesV2 <- tempNames[order(tempNames)]
p <- plot_ly(re)
for(i in seq_along(tempNamesV2)){
if(i == 1){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#3E5B84"), yaxis = "y", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNames[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
} else if (i == 2){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#6BBABF"), yaxis = "y2", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNames[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
}
}
p
I also realized you have two margins in the layout, and I put them together.
I have the dataframe below:
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
First<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Second<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
re<-data.frame(Week,First,Second)
and I want to sum by month but I have 2 years 2020 and 2021 so I need to have separated months for each year
library(lubridate)
bymonth <- aggregate(cbind(First)~month(Week),
data=re,FUN=sum)
then I need to create a plotly bar chart but the months are not displayed properly.
p <- plot_ly() %>%
add_bars(bymonth, x = ~Month, y = bymonth[,2], name = "fIRST",
marker = list(color = "#3E5B84")
)
You can extract year-month from the date, aggregate and plot -
library(dplyr)
library(plotly)
re %>%
arrange(Week) %>%
mutate(month_year = format(Week, '%Y-%b'),
month_year = factor(month_year, unique(month_year))) %>%
group_by(month_year) %>%
summarise(First = sum(First, na.rm = TRUE)) %>%
plot_ly() %>%
add_bars(x = ~month_year, y = ~First,
marker = list(color = "#3E5B84"))