Change manually the shape of a legend ggplot2 - r

I've been customizing themes, lines, and colors in the following plot
library(dplyr)
library(ggplot2)
library(readr)
library(zoo)
cvper <- read.csv("https://cloud.minsa.gob.pe/s/Y8w3wHsEdYQSZRp/download", stringsAsFactors = FALSE)
nuevos_cvper <- cvper %>%
group_by(FECHA_RESULTADO) %>%
arrange(desc(FECHA_RESULTADO)) %>%
summarize (casos_x_dia= n()) %>%
mutate(media_movil = rollmean(casos_x_dia, k=7, fill = NA, align = "right"))
prueba_legend <- ggplot(nuevos_cvper) +
geom_line(aes (x = FECHA_RESULTADO, y = media_movil, color = "media_movil"), size = 1.5) +
geom_line(aes (x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia"), linetype = "dashed" ) +
geom_point (aes(x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia")) +
scale_colour_manual("", values = c("media_movil"="#CF3721", "casos_x_dia"="#31A9B8",
"casos_x_dia"="#31A9B8")) +
theme_bw () + theme(legend.position="bottom")
prueba_legend
It shows a legend with short lines. I want to change those lines to circles. I´ve tried with scale_shape_manual, but it doesn't work. Is there a way?

Since one of the more recent ggplot2 versions (make sure you update via install.packages("ggplot2")), the argument key_glyph= can be used to specify the draw_key function that should be used to draw the legend glyphs for a given geom and aesthetic. See here for some more information and examples of usage; however, I will demonstrate with the following example using mtcars and ggplot2 version 3.3.2:
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
geom_line(key_glyph = "point")
You may notice as I did that the point size is a bit small for my taste. That can be adjusted by using override.aes= via the guide_legend() function specified for the color aesthetic to make those points a bit bigger.
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
geom_line(key_glyph = "point") +
guides(color=guide_legend(override.aes = list(size=3)))

Maybe you are looking for this:
library(ggplot2)
#Data
df2 <- structure(list(FECHA_RESULTADO = structure(c(18327, 18328, 18329,
18330, 18331, 18332, 18333, 18334, 18335, 18336, 18337, 18338,
18339, 18340, 18341, 18342, 18343, 18344, 18345, 18346, 18347,
18348, 18349, 18350, 18351, 18352, 18353, 18354, 18355, 18356,
18357, 18358, 18359, 18360, 18361, 18362, 18363, 18364, 18365,
18366, 18367, 18368, 18369, 18370, 18371, 18372, 18373, 18374,
18375, 18376, 18377, 18378, 18379, 18380, 18381, 18382, 18383,
18384, 18385, 18386, 18387, 18388, 18389, 18390, 18391, 18392,
18393, 18394, 18395, 18396, 18397, 18398, 18399, 18400, 18401,
18402, 18403, 18404, 18405, 18406, 18407, 18408, 18409, 18410,
18411, 18412, 18413, 18414, 18415, 18416, 18417, 18418, 18419,
18420, 18421, 18422, 18423, 18424, 18425, 18426, 18427, 18428,
18429, 18430, 18431, 18432, 18433, 18434, 18435, 18436, 18437,
18438, 18439, 18440, 18441, 18442, 18443, 18444, 18445, 18446,
18447, 18448, 18449, 18450, 18451, 18452, 18453, 18454, 18455,
18456, 18457, 18458, 18459, 18460, 18461, 18462, 18463, 18464,
18465, 18466, 18467, 18468, 18469, 18470, 18471, 18472, 18473,
18474, 18475, 18476, 18477, 18478, 18479, 18480, 18481, 18482,
18483, 18484, 18485, 18486, 18487, 18488), class = "Date"), casos_x_dia = c(1,
5, 2, 3, 1, 8, 8, 10, 19, 28, 20, 27, 56, 62, 56, 30, 39, 33,
64, 100, 52, 34, 136, 142, 130, 250, 117, 222, 292, 833, 444,
647, 1042, 1083, 817, 1038, 1404, 738, 1284, 1041, 1383, 1329,
1109, 1407, 1076, 2039, 2171, 2104, 2056, 2239, 2397, 1422, 3399,
3367, 4238, 3372, 2625, 3369, 1922, 3990, 3969, 3634, 3612, 3297,
2469, 936, 3601, 4348, 4441, 3739, 4304, 4125, 1785, 5130, 5198,
5290, 5514, 6100, 5399, 1792, 6968, 6919, 7371, 6425, 5745, 4613,
2262, 4242, 3774, 2614, 4029, 4944, 4764, 2637, 4743, 5310, 5726,
5069, 4661, 4500, 2441, 4363, 3376, 3915, 3436, 3447, 3526, 1446,
4335, 3768, 4109, 4154, 4331, 3526, 1598, 2729, 3748, 3648, 3349,
3862, 3518, 2299, 3783, 4035, 2598, 2495, 4913, 4246, 2380, 3114,
4194, 4432, 4535, 5141, 5066, 2228, 3756, 4815, 5972, 5474, 5960,
5626, 2950, 7071, 3017, 6721, 7248, 7601, 6697, 3194, 7818, 7754,
7508, 8442, 7407, 6759, 3491, 7679, 8473, 8560, 7590, 4805),
media_movil = c(NA, NA, NA, NA, NA, NA, 4, 5.28571428571429,
7.28571428571429, 11, 13.4285714285714, 17.1428571428571,
24, 31.7142857142857, 38.2857142857143, 39.8571428571429,
41.4285714285714, 43.2857142857143, 48.5714285714286, 54.8571428571429,
53.4285714285714, 50.2857142857143, 65.4285714285714, 80.1428571428571,
94, 120.571428571429, 123, 147.285714285714, 184.142857142857,
283.714285714286, 326.857142857143, 400.714285714286, 513.857142857143,
651.857142857143, 736.857142857143, 843.428571428571, 925,
967, 1058, 1057.85714285714, 1100.71428571429, 1173.85714285714,
1184, 1184.42857142857, 1232.71428571429, 1340.57142857143,
1502, 1605, 1708.85714285714, 1870.28571428571, 2011.71428571429,
2061.14285714286, 2255.42857142857, 2426.28571428571, 2731.14285714286,
2919.14285714286, 2974.28571428571, 3113.14285714286, 3184.57142857143,
3269, 3355, 3268.71428571429, 3303, 3399, 3270.42857142857,
3129.57142857143, 3074, 3128.14285714286, 3243.42857142857,
3261.57142857143, 3405.42857142857, 3642, 3763.28571428571,
3981.71428571429, 4103.14285714286, 4224.42857142857, 4478,
4734.57142857143, 4916.57142857143, 4917.57142857143, 5180.14285714286,
5426, 5723.28571428571, 5853.42857142857, 5802.71428571429,
5690.42857142857, 5757.57142857143, 5368.14285714286, 4918.85714285714,
4239.28571428571, 3897, 3782.57142857143, 3804.14285714286,
3857.71428571429, 3929.28571428571, 4148.71428571429, 4593.28571428571,
4741.85714285714, 4701.42857142857, 4663.71428571429, 4635.71428571429,
4581.42857142857, 4305.14285714286, 4046.42857142857, 3813.14285714286,
3639.71428571429, 3500.57142857143, 3358.42857142857, 3354.42857142857,
3410.42857142857, 3438.14285714286, 3540.71428571429, 3667,
3667, 3688.71428571429, 3459.28571428571, 3456.42857142857,
3390.57142857143, 3275.57142857143, 3208.57142857143, 3207.42857142857,
3307.57142857143, 3458.14285714286, 3499.14285714286, 3349.14285714286,
3227.14285714286, 3377.28571428571, 3481.28571428571, 3492.85714285714,
3397.28571428571, 3420, 3682, 3973.42857142857, 4006, 4123.14285714286,
4101.42857142857, 4193.14285714286, 4281.85714285714, 4501.85714285714,
4636, 4753, 4833, 4936.14285714286, 5409.71428571429, 5152.85714285714,
5259.85714285714, 5513.28571428571, 5747.71428571429, 5900.71428571429,
5935.57142857143, 6042.28571428571, 6719, 6831.42857142857,
7002, 6974.28571428571, 6983.14285714286, 7025.57142857143,
7005.71428571429, 7108.42857142857, 7258.71428571429, 7137,
6765.28571428571)), row.names = c(NA, -162L), class = "data.frame")
The code:
prueba_legend <- ggplot(df2) +
geom_line(aes (x = FECHA_RESULTADO, y = media_movil, color = "media_movil"),
size = 1.5,show.legend = F) +
geom_line(aes (x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia"),
linetype = "dashed",show.legend = F ) +
geom_point (aes(x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia")) +
scale_colour_manual("", values = c("media_movil"="#CF3721", "casos_x_dia"="#31A9B8",
"casos_x_dia"="#31A9B8")) +
theme_bw () + theme(legend.position="bottom")
prueba_legend
Output:

Related

Impact of events in one dataframe have on the values of another dataframe

I have two very large dataframes of the following structure(s):
df1:
structure(list(Date = structure(c(18871, 18873, 18883, 18928,
18950, 18961, 18962, 18963, 18967, 18969, 18970, 18971, 18974,
18975, 18976, 18977, 18978, 18981, 18982, 18983, 18984, 18985,
18990, 18991, 18992, 18996, 18997, 18998, 18999, 19002, 19003,
19004, 19005, 19006, 19009, 19010, 19011, 19012, 19013, 19016,
19017, 19018, 19019, 19020, 19023, 19025, 19026, 19027, 19030,
19031, 19033, 19034, 19037, 19038, 19039, 19040, 19041, 19044,
19045, 19046, 19047, 19048, 19051, 19052, 19053, 19054, 19055,
19058, 19059, 19066, 19082, 19104, 19107, 19108, 19109, 19110,
19111, 19115, 19143, 19166, 19167, 19170, 19171, 19172, 19173,
19174, 19177, 19178, 19179, 19180, 19181, 19184, 19185, 19186,
19187, 19188, 19191, 19192, 19193, 19194, 19195, 19198, 19199,
19200, 19201, 19202, 19205, 19206, 19207, 19208, 19209, 19212,
19213, 19214, 19215, 19216, 19219, 19220, 19221, 19222, 19223,
19226, 19227, 19228, 19229, 19230, 19234, 19235, 19236, 19237,
19240, 19241, 19242, 19243, 19244, 19247, 19248, 19249, 19250,
19251, 19255, 19256, 19257, 19258, 19261, 19262, 19263, 19269,
19270, 19271, 19272, 19275, 19276, 19291, 19292, 19293, 19296,
19297, 19298, 19299, 19300, 19303, 19304, 19305, 19306, 19307,
19310, 19312, 19313, 19314, 19317, 19318, 19319, 19320, 19321,
19324, 19325, 19326, 19327, 19328, 19333, 19334, 19335, 19338,
19339, 19340, 19341, 19342, 19345, 19346, 19347, 19348, 19349,
19354, 19355, 19356, 19360, 19361, 19362, 19363, 19366, 19367,
19368, 19369, 19370, 19373, 19374, 19375, 19376, 19377, 19380,
19381, 19382, 19383, 19384, 19387, 19388, 19389, 19390, 19391,
19394, 19395, 19396, 19397, 19398, 19401, 19402, 19403, 19404,
18964, 18968, 19024, 19032, 19103, 19264, 19268, 18884, 18935,
19060, 19061, 19068, 19102, 19165, 19278, 18907, 18919, 18942,
18914, 18872, 18876, 18877, 18878, 18879, 18880, 18885, 18886,
18887, 18890, 18891, 18892, 18893, 18894, 18897, 18898, 18899,
18900, 18901, 18904, 18905, 18906, 18908, 18911, 18912, 18913,
18915, 18918, 18920, 18921, 18922, 18925, 18926, 18927, 18929,
18932, 18933, 18934, 18936, 18939, 18940, 18941, 18943, 18946,
18947, 18948, 18949, 18953, 18954, 18955, 18956, 18957, 18960,
19062, 19065, 19067, 19069, 19072, 19073, 19074, 19075, 19076,
19079, 19080, 19081, 19083, 19086, 19087, 19088, 19089, 19090,
19093, 19094, 19095, 19096, 19101, 19116, 19117, 19118, 19121,
19122, 19123, 19124, 19125, 19128, 19129, 19130, 19131, 19132,
19135, 19136, 19137, 19138, 19139, 19142, 19144, 19149, 19150,
19151, 19152, 19153, 19156, 19157, 19158, 19159, 19160, 19163,
19164, 19265, 19277, 19279, 19282, 19283, 19284, 19285, 19286,
19289, 19290, 19311, 19331, 19332), class = "Date"), Value = c(-5.33417292743301,
-2.52617494564308, -1.49324009324009, -17.0425444455863, -9.35793961841595,
-11.8841517857143, -9.69152125451611, -10.5028364323984, -8.5025565123789,
-7.38025700934579, -7.78238256870689, -8.17978487280178, -7.7131012583794,
-6.41295139213209, -8.91708282295298, -8.71221652160492, -10.0680747922438,
-11.9437278705109, -16.4053740896049, -20.3138418538824, -23.3452022125799,
-28.1797446210017, -28.9915196362348, -22.7089976941569, -53.3396956436279,
-12.4708793300343, -11.1787848605578, -11.2914868901427, -9.00403312503746,
-5.26940458505923, -3.2678042007321, -1.80023400936037, -1.00480716704905,
-2.57038505839299, -6.25052713783678, -4.13951632213265, -4.66916949663517,
-5.80691219642381, -9.01403811889207, -8.76200989722116, -5.14415894039735,
-4.49265067482651, -6.81002817489304, -5.00625312656328, -12.9697844076655,
-16.4525949550594, 1.35574468085106, -11.14101743721, -10.3890230312036,
-9.95399610136452, -10.7999731146659, -10.0551036897388, -8.00489963647858,
-6.98869824910699, -5.52771977448319, -5.8565313387104, -10.6561639051647,
-10.1591881404835, -10.5755809770487, -12.7858263854831, -12.4515269244669,
-9.75300293620429, -10.6174887030093, -10.9297709205513, -8.62649503888442,
-5.67639625979277, -7.76497565794115, -5.11864292912328, -4.15677111515569,
-1.24773160858034, -13.3209387381896, -4.61524571133755, -2.7064391500322,
-2.54590337369225, -4.7534422125529, -5.77741708660495, -9.54039920679418,
-3.49263873159683, -6.37633384146341, -4.8018775807754, -5.92095982827354,
-6.7982259326898, -8.14429721160882, -14.0318602941176, -15.7693341697285,
-6.33099406742874, -6.02467779730522, -6.67451006984472, -5.78277734678045,
-6.76536805011747, -5.30100480559197, -5.48448933319592, -3.07906668141104,
-1.19141872046993, -0.986825656313108, -2.78210279591495, -1.85680436798187,
1.15368964707724, 0.592389680247037, 2.09444444444445, 1.47565073474096,
-3.77455441789913, -4.99841919285848, -4.54737286347331, -6.52210666135604,
-12.4586229788041, -9.76655410805627, -10.1672813163265, -10.1071078030427,
12.1093036008042, -6.27654856354408, -11.7451651977779, -9.69657142857144,
-14.7645443406988, -13.8044752609797, -12.8212930202637, -10.7131114789657,
-9.7502835651603, -9.12285518188058, -7.82418739307804, -7.37290867229471,
-5.42488374865864, -7.16548962504551, -7.22640747577296, -5.67880989576978,
-2.38449197860963, -4.98403249527278, -8.01237481293888, -8.92721143345521,
-11.0038498048729, -7.91574019894676, -7.24256979885921, -13.0468491640639,
-12.4842416971359, -13.1832300362112, -9.69594285190952, -11.8236710963455,
-15.4970313957103, -16.001590401224, -23.0116940912636, -20.0316726652775,
-15.7387646961417, 14.3248459700742, -33.9856294639016, -29.5890893667004,
-24.5815039111784, 0.351626092151443, -12.4957158872518, -13.5265822044065,
-15.8057934508816, -18.5712850985479, -19.0062351207346, -17.8759978712081,
-30.248495829345, -33.496764540864, -32.9090642540002, -35.9095838866755,
-39.0622059592264, -37.4053056372396, 30.4444623180246, -17.2843857072932,
-10.2894505770196, -9.46059792738388, -10.9101340816963, -10.6768441621886,
-15.8564689156004, -25.0377375363291, -23.491809908999, -16.0848675710594,
-9.52203025543524, -9.57329945269742, -8.26118487113689, -8.77146105741898,
-7.52932569974554, -7.51696981061316, -6.34114162627894, -4.169791026429,
-8.65363440517035, -7.07429566797939, -6.04901960784313, -3.95286523637039,
-4.57831931564948, -4.70401930472815, -4.58048473762432, -4.19802716367601,
-4.8712225795747, 33.7199676963457, -8.38811297695784, -8.41675133350266,
-7.95239554602104, -7.61540762007296, -7.36910990686483, -5.19474884165624,
-6.40677640427848, -6.6880699933269, -2.50748947332091, -5.01177083333332,
-4.96342420082369, -3.74379953975965, -4.39864864864864, -3.96323864499468,
-3.64570892339191, -4.00421168284196, -4.32506635700066, -3.84347776249426,
-3.98014059753953, -4.93978444946085, -2.00666607412675, -4.40675836944916,
-4.67217558943196, -4.69624595469255, -4.93134138588684, -4.59980852082335,
-3.79959344732751, -3.56417422281594, -3.72577039757304, -6.8438477254722,
-6.90215521144616, 17.0278089071515, -8.69234350531171, -8.31160081053697,
-6.48055612912106, -2.82488289917003, -1.97938846776255, -2.76711193952573,
-3.57596324527513, -2.66680277664353, -2.76065192083818, -3.07607539874335,
-9.19352066115703, -7.40534903692798, -20.9290200655884, -9.4949337142347,
-1.67593031748771, -4.00032731786333, -12.9233908365795, -3.12797485406376,
-4.58038461538462, -4.92624181954726, -1.64177861663151, 14.5233533882204,
-3.43662848605578, -7.00163549013596, -22.4328399502006, -5.68106442090641,
-10.4667593755607, -8.32068944589428, -7.5574384739845, -1.18996298205332,
0.685336364575798, 0.683983218163869, -0.190581124472707, 0.40016858076072,
-0.685425265972909, -3.23485727546995, -2.17232406175909, -4.03732694666745,
-3.43151815181518, -4.09670641680863, -6.31686046511628, -8.351512347464,
-8.85095265741279, -7.33867558133818, -8.51522800856061, -8.98040313549832,
-21.688225290116, -7.97696621402419, -7.54824182322704, -6.15927894799952,
-4.38810945273632, -4.66345711216818, -6.63357687936353, -6.46356517733763,
-7.69505643096672, -8.85590341722938, -9.65988090292203, -7.41890982503365,
-8.4019243496411, -9.20245522237875, -11.7157621602604, -15.8009129904372,
-16.7188609056313, -29.5936953565184, -16.1899352493468, -14.6818673311081,
-9.23683475995534, -7.2676923950379, -5.58227412415489, -6.1622267560478,
-7.530241504595, -8.17124428752656, -6.35655475028349, -8.35745036021789,
-8.91222072419106, -8.32131192161132, -8.17337013669821, -10.877467450651,
-9.77258672063867, -11.2631774313289, -10.1360580092287, -7.70075269546349,
-3.16698645907571, -2.38109087441594, -0.022514417531718, -1.08499335989376,
-3.3854513350099, -13.1874976421323, -12.0737487121089, -6.16346335921227,
-6.0325834936609, -8.26060968320382, -6.76706578585191, -9.52737443465841,
-3.80397824702367, -4.72200198216056, -5.16157624343703, -7.23900280010839,
-6.18115990990991, -2.1293152465275, -3.7406576456566, -3.66946114241044,
-2.04129052515896, -0.222846441947564, -0.318394134689292, -2.16259885464958,
14.1518376302798, -1.73382942186606, 0.100049115913557, -0.378155479059093,
-0.588078533746669, -0.121356930514267, -2.06994937873907, -2.84085812981032,
-4.51622825625796, -3.44699324686462, -6.08916034821679, -10.3705139175517,
-7.18844523191799, -7.9430781129157, -5.99308878256247, -7.00972211589987,
-3.268970347887, -0.549423136888057, -6.08099664736361, -2.04115579182029,
-0.832810782197972, -0.916704322940202, -0.2527559897864, -1.11381138113811,
-1.81908592321755, -2.61384042630849, -2.35869084475896, 11.992052157715,
-2.80402835408022, -1.61877788005381, -2.68177518524358, -11.6975791772843,
-20.9404339865207, -27.5328187051002, -29.4899791627202, -28.9357382364862,
-30.3360181743081, -30.4065839909808, -32.100806252164, -34.963228894691,
-36.3754188653083, -24.3521969489358, -7.1991154066672, -4.39962753162101
)), row.names = c(NA, -369L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x00000214068b1ef0>)
df2:
structure(list(Date = structure(c(19402, 19396, 19395,
19389, 19376, 19374, 19367, 19362, 19346, 19339, 19333, 19332,
19332, 19327, 19319, 19311, 19311, 19306, 19305, 19291, 19290,
19284, 19283, 19277, 19276, 19270, 19269, 19262, 19242, 19241,
19236, 19227, 19220, 19213, 19206, 19200, 19194, 19192, 19185,
19178, 19171, 19151, 19150, 19136, 19129, 19123, 19122, 19102,
19094, 19087, 19066, 19059, 19053, 19038, 19018, 19010, 19003,
18968, 18962, 18947, 18941, 18934, 18933, 18933, 18926, 18920,
18913, 18912, 18906, 18905, 18905, 18898, 18892, 18885, 18884,
18878, 18877, 18877, 18871, 18863, 18856, 18850, 18849, 18842,
18835, 18828, 18815, 18814, 18814, 18809, 18801, 18794, 18793,
18793, 18787, 18786, 18786, 18780, 18780, 18766, 18765, 18765,
18758, 18758, 18752, 18752, 18745, 18738, 18737, 18731, 18730,
18725, 18724, 18724, 18710, 18709, 18703, 18702, 18702, 18696,
18695, 18690, 18688, 18688, 18681, 18675, 18674, 18674, 18668,
18661, 18660, 18660, 18654, 18653, 18653, 18648, 18640, 18639,
18639, 18633, 18605, 18604, 18604, 18598, 18598, 18597, 18597,
18590, 18590, 18584, 18583, 18583, 18578, 18578, 18577, 18570,
18569, 18569, 18563, 18562, 18562, 18557, 18557, 18555, 18549,
18548, 18548, 18542, 18542, 18541, 18541, 18536, 18536, 18534,
18529, 18529, 18521, 18520, 18520, 18515, 18515, 18508, 18508,
18507, 18500, 18499, 18499, 18494, 18493, 18492, 18492, 18486,
18485, 18485, 18479, 18479, 18478, 18478, 18472, 18472, 18471,
18471, 18465, 18465, 18464, 18464, 18458, 18458, 18457, 18457,
18452, 18450, 18450, 18445, 18445, 18444, 18444, 18438, 18437,
18437, 18436, 18436, 18430, 18430, 18429, 18429, 18424, 18424,
18423, 18416, 18416, 18415, 18415, 18410, 18410, 18409, 18409,
18403, 18403, 18402, 18396, 18396, 18395, 18388, 18388, 18387,
18387, 18381, 18381, 18380, 18380, 18374, 18374, 18373, 18373,
18368, 18368, 18367, 18367, 18360, 18360, 18359, 18359, 18354,
18340, 18338, 18331, 18326, 18325, 18317, 18312, 18296, 18289,
18282, 18275, 18270, 18268, 18247, 18241, 18235, 18233, 18226,
18214, 18205, 18198, 18191, 18184, 18177, 18170, 18163, 18144,
18142, 18128, 18121, 18114, 18100, 18093, 18079, 18072, 18065,
18059, 18051, 18039, 18025, 18002, 17995, 17988, 17981, 17969,
17961, 17953, 17948, 17941, 17918, 17913, 17904, 17877, 17871,
17869, 17855, 17850, 17841, 17827, 17808, 17799, 17794, 17785,
17780, 17764, 17751, 17736, 17731, 17715, 17708, 17702, 17688,
17675, 17660, 17654, 17645, 17640, 17631, 17625, 17617, 17605,
17596, 17591, 17584, 17577, 17554, 17549, 17542, 17540, 17513,
17507, 17505, 17491, 17486, 17479, 17463, 17458, 17449, 17444,
17436, 17428, 17422, 17401, 17386, 17379, 17372, 17366, 17353,
17344, 17339, 17323, 17318, 17309, 17304, 17290, 17282, 17276,
17268, 17260, 17253, 17247, 17239, 17234, 17227, 17220, 17212,
17206, 17204, 17197, 17184, 17178, 17176, 17171), class = "Date"),
Amount = c(3000, 2000, 3500, 3000, 1195.925, 4440.75, 3702.5,
3500, 3619.25, 3749.999, 744.65, 4062.498, 2812.5, 2812.499,
3559.5, 3250, 2250, 4374.998, 2750, 3500, 898.85, 4062.5,
3125, 4365.25, 1106.299, 3749.999, 2500, 1200, 3437.5, 4183.157,
2499.999, 750, 2803.5, 1760, 2771.875, 700, 4366.249, 2500,
3437.5, 2378.75, 1011.975, 3125, 3075, 847.7, 2187.499, 2500,
2250, 2499.998, 3124.998, 3250, 1239.4, 1874.999, 3250, 2250,
600, 3124.998, 3000, 1875, 2250, 2170.375, 900, 2500, 3000,
1533.75, 2750, 2812.5, 567.4, 1562.5, 3125, 3000, 2288.125,
2000, 350, 2857.916, 3000, 1010.725, 3250, 1250, 2859, 3000,
2274.25, 705, 3437.499, 2000, 3000, 1562.5, 701.05, 3437.499,
1866.25, 4232.497, 500, 3118.75, 3055.281, 2000, 1172.438,
3742.497, 1562.5, 3437.499, 2500, 3093.749, 4062.495, 2812.499,
3283.75, 1500, 2750, 2000, 1113.65, 3124.999, 4062.498, 713.825,
1000, 3437.5, 3394, 2000, 357.25, 2250, 2500, 4375, 1633,
878.975, 2812.5, 3437.5, 3147.351, 1562.499, 2499.998, 2500,
3250, 1250, 2000, 3143, 3000, 1071.25, 1036.3, 2936.25, 1750,
3250, 375, 3000, 1450.624, 3749.999, 3011.102, 4029.372,
2500, 1250, 2750, 3669.355, 2250, 3002.75, 1562.499, 2500,
4062.499, 2500, 3749.999, 1562.499, 861.4, 3000, 3405.625,
2499.999, 3084.999, 3510.742, 1249.999, 2632, 1986.875, 697.049,
3125, 3000, 1562.499, 2500, 884.149, 3518.452, 2000, 3250,
2000, 3437.499, 3749.999, 1310.749, 3124.999, 2867.5, 2187.5,
3250, 2421.749, 3412.749, 2500, 459.05, 2750, 3000, 1342.375,
1374.999, 3437.5, 4062.497, 2477.25, 3437.499, 3250, 1250,
2750, 562.25, 4062.499, 2812.499, 3500, 3008.875, 3437.499,
1874.999, 3000, 500, 3749.999, 2711, 3250, 1500, 3834.095,
3750, 1062.85, 3750, 2812.5, 4261.195, 2329.25, 3000, 2785.999,
1229.576, 3250, 1768.25, 3250, 3628.75, 4260.749, 2812.5,
3716.25, 2000, 3250, 4036.624, 900, 3941.249, 1500, 3328.75,
3749.999, 2927.5, 2357.75, 3750, 1785.499, 1562.499, 2500,
3869.624, 3250, 2250, 745.85, 3897.958, 1750, 4062.499, 3000,
3616.375, 2152.5, 1499.999, 3749.999, 3671.476, 3141, 4062.499,
1874.999, 3648.75, 2499.998, 3676.249, 2313.749, 2750, 2092.5,
4062.5, 1562.499, 2299.997, 3250, 2299.999, 2587.488, 1244.238,
3500, 3441.975, 3269.875, 800, 2750, 2545.872, 3250, 1056.037,
3162.497, 3162.496, 500, 2082.45, 3162.498, 3449.922, 2356.675,
2820.25, 1100, 3449.997, 3162.499, 919.998, 2250, 574.995,
2750, 3000, 1264.997, 2299.997, 3000, 3162.497, 2250, 3373.097,
2250, 3162.499, 700, 3000, 1437.496, 3000, 2250, 3135.1,
3449.998, 325, 1724.998, 3414.749, 1254.571, 1688.5, 2587.497,
2012.498, 2799.403, 2563.87, 500, 2012.499, 2500, 555.065,
2299.997, 2250, 1100, 3000, 872.72, 2750, 2500, 3000, 1100,
2500, 2750, 2299.997, 2874.995, 2587.495, 1381.4, 2750, 1146.874,
2810.081, 3427.034, 750, 2500, 2185.16, 2750, 675.24, 2864.731,
2250, 2815.66, 950, 2562.68, 1000, 2500, 2250, 2587.497,
800, 2276.5, 2750, 1000, 2722.32, 2750, 650, 2599.24, 2500,
3162.5, 950, 2500, 2500, 3158.628, 1000, 2250, 2500, 3162.498,
2867.806, 1000, 2587.498, 2874.999, 2500, 882.3, 2962.15,
2293.788, 857.793, 3162.497, 1724.999, 2867.739, 2874.999,
2299.997, 2557.917, 833.737, 2500, 2299.997, 1250, 2624.29,
3004.215, 2444.046, 2881.949, 2250, 837.798, 2329.914)), row.names = c(NA,
397L), class = "data.frame")
I'm trying to work out the percentage impact the events listed in the second dataframe (df2) have on the values from the previous day entry listed in the first dataframe (df1). Does anyone know how I can achieve this? APologies if unclear, English is not my first language.

How do I summarise in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time in R

I am trying to write R code to summarize count and median amounts of a date variable, and a dollar amount variable in my data frame. The summarize part is not the issue, my issue is in trying to group_by time periods to then summarize. I understand the distinction between a time period and a time duration. I am interested in time periods, in this particular case I want to summarize in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time, as in 2022-02-6 to 2021-02-06 to 2020-02-06 and so on. The data frame goes back about 30 months in total, so there is two full years of data to summarize.
The code I wrote here groups by the calendar year, and not what I need;
> sum_closed_date_yr <- scrubbed_data01 %>%
+ group_by(time_period = year(close_date)) %>%
+ summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
> sum_closed_date_yr
# A tibble: 5 × 3
time_period close_count med_close_price
<dbl> <int> <dbl>
1 2019 31 570000
2 2020 80 661250
3 2021 104 930750
4 2022 9 1010000
5 NA 8 0
I am very new to coding in R, I am a real estate appraiser not a statwhizzician. I have taken 23 DataCamp tutorials in R, so I have a newbie working knowledge of R. I have searched through multiple SO posts on summarizing by date, but can not find what I am specifically looking for. Any help would be greatly appreciated, thank you - Joe
Second try, I selected just the two of many variables
scrubbed_data01 %>%
+ select(close_date, close_price) %>%
+ dput()
structure(list(close_date = structure(c(NA, NA, NA, 19039, 19038,
19034, 19024, 19020, 19016, 19013, 18999, 18989, 18976, 18969,
18969, 18968, 18955, 18955, 18954, 18953, 18953, 18949, 18948,
18943, 18940, 18936, 18934, 18933, 18929, 18922, 18921, 18921,
18921, 18918, 18915, 18912, 18908, 18908, 18907, 18906, 18905,
18900, 18900, 18899, 18897, 18897, 18897, 18891, 18891, 18890,
18887, 18880, 18879, 18878, 18878, 18873, 18873, 18873, 18869,
18866, 18866, 18851, 18850, 18844, 18836, 18836, 18831, 18830,
18822, 18821, 18821, 18815, 18810, 18806, 18802, 18796, 18795,
18789, 18786, 18782, 18781, 18781, 18780, 18779, 18775, 18775,
18774, 18761, 18761, 18753, 18752, 18747, 18746, 18746, 18740,
18739, 18737, 18729, 18718, 18715, 18705, 18704, 18701, 18695,
18689, 18683, 18677, 18655, 18652, 18648, 18646, 18640, 18634,
18633, 18631, 18619, 18613, 18611, 18590, 18585, 18579, 18576,
18569, 18569, 18569, 18563, 18558, 18557, 18557, 18556, 18554,
18549, 18544, 18540, 18540, 18533, 18519, 18519, 18519, 18514,
18514, 18513, 18507, 18502, 18502, 18501, 18501, 18499, 18495,
18492, 18492, 18491, 18488, 18484, 18472, 18466, 18464, 18459,
18459, 18453, 18451, 18450, 18445, 18443, 18442, 18423, 18422,
18411, 18401, 18400, 18397, 18397, 18397, 18396, 18387, 18386,
18366, 18361, 18360, 18340, 18338, 18331, 18317, 18313, 18302,
18297, 18289, 18283, 18283, 18277, 18274, 18271, 18271, 18269,
18263, 18261, 18261, 18261, 18260, 18250, 18247, 18239, 18208,
18200, 18199, 18197, 18194, 18190, 18185, 18185, 18180, 18179,
18177, 18177, 18176, 18170, 18169, 18156, 18155, 18152, 18151,
18142, 18142, 18138, 18137, 18136, NA, NA, NA, NA, NA, 19044), class = "Date"),
close_price = c(0, 0, 0, 1150001, 940000, 1253000, 979000,
881000, 1010000, 1060000, 1100000, 1070000, 1025000, 755000,
740000, 930000, 1250000, 990000, 930000, 931500, 975000,
950000, 850000, 865000, 921000, 790000, 778000, 935000, 1270000,
970000, 1061500, 960000, 1015000, 1100000, 1082000, 880000,
1000000, 1140000, 950000, 852000, 1045000, 795000, 950000,
950000, 880000, 850000, 945000, 949500, 1220000, 1015000,
899000, 1100000, 805000, 868000, 1102000, 1015000, 923000,
810000, 890000, 826000, 1140000, 970000, 830000, 790000,
1151000, 835500, 1080000, 870000, 1049000, 985000, 962000,
926000, 1008888, 950000, 810000, 760000, 955000, 930000,
985000, 1210000, 878000, 950000, 855000, 930000, 960000,
1180000, 980000, 960000, 898000, 1100000, 1215000, 885000,
985000, 880000, 1100000, 810000, 1210000, 810000, 970700,
1010000, 800000, 850000, 849000, 770000, 925000, 930000,
875000, 755000, 675000, 875500, 715000, 837000, 747000, 805000,
785000, 801200, 900000, 800000, 610000, 720000, 730000, 700000,
695000, 720000, 750000, 860000, 915000, 787000, 785000, 710000,
735000, 620000, 788000, 780000, 780000, 645000, 700000, 686000,
686000, 745000, 745000, 605000, 730000, 625000, 625000, 685000,
731000, 715000, 695000, 710000, 700000, 575000, 561000, 590000,
595000, 720500, 670000, 711000, 645000, 595000, 700000, 545000,
695000, 531000, 581000, 518000, 645000, 562500, 530000, 640000,
643000, 680000, 700000, 540000, 630000, 658000, 675000, 525000,
600000, 664500, 590000, 569595, 620500, 555000, 585000, 630000,
639900, 515000, 475000, 670000, 610000, 524888, 550000, 520000,
650000, 500000, 500000, 540000, 608000, 575000, 570000, 639900,
645000, 648500, 635000, 530000, 655000, 520000, 555000, 542500,
515000, 620000, 580140, 535000, 638888, 540000, 590000, 535000,
497500, 505000, 675000, 545000, 640000, 555000, 630000, 590000,
0, 0, 0, 0, 0, 985000)), row.names = c(NA, 232L), class = "data.frame")
>
You can create a new variable to assign the year to your desired period:
library(dplyr)
df %>% mutate(period_year = case_when(close_date < "2019-02-06" ~ 2018,
close_date >= "2019-02-06" & close_date < "2020-02-06" ~ 2019,
close_date >= "2020-02-06" & close_date < "2021-02-06" ~ 2020,
close_date >= "2021-02-06" & close_date < "2022-02-06" ~ 2021,
close_date >= "2022-02-06" & close_date < "2023-02-06" ~ 2022)) %>%
group_by(time_period = period_year) %>%
summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))

"Can't recycle" error when using for loop in r

I have two data frames which I want to process with a for loop. Their structures are the following:
> m_ivae
structure(list(fecha = structure(c(17805, 17836, 17866, 17897,
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 18170,
18201, 18231, 18262, 18293, 18322, 18353, 18383, 18414, 18444,
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,
18748, 18779, 18809), class = "Date"), IVAE = c(109.19, 110.09,
111.34, 111.84, 112.49, 111.99, 113.11, 111.89, 112.11, 112.75,
113.7, 112.93, 112.43, 114.88, 114.5, 114.93, 115.13, 105.54,
91.71, 87.93, 93.06, 96.74, 103.26, 106.76, 109.6, 110.74, 112,
112.73, 114.97, 115.01, 114.67, 115.78, 114.52, 111.91), `Agricultura, Ganadería, Silvicultura y Pesca` = c(99.58,
98.71, 103.44, 101.83, 101.31, 98.87, 99.06, 99.46, 96.55, 100.47,
98.79, 98.91, 100.17, 101.98, 100.48, 99.64, 96.04, 92.42, 97.21,
96.11, 100.57, 94.82, 99.07, 103.63, 97.34, 97.17, 95.46, 98.46,
101.02, 100.24, 100.6, 99.95, 103.07, 98.23), `Índice de Producción Industrial (IPI): Industrias Manufactureras, Explotación de Minas y Canteras y Otras Actividades Industriales` = c(101.4,
103.4, 105.07, 106.72, 108.45, 107.76, 107.25, 105.75, 107.03,
107.31, 106.61, 106.95, 106.61, 110.18, 108.68, 109.66, 111.32,
100.02, 76.77, 73.46, 81.99, 94.83, 100.64, 104.51, 106.74, 107.04,
108.75, 110.8, 110.59, 111.25, 108.82, 110.03, 111.32, 107.61
), Construcción = c(112.25, 117.5, 121.37, 124.32, 122.64, 121.21,
128.69, 122.28, 126.55, 120.13, 137.47, 129.82, 126.83, 132.92,
131.72, 137.56, 130.89, 117.08, 87.62, 67.49, 79.56, 88.97, 117.57,
110.01, 118.02, 117.61, 121.64, 120.76, 120.99, 118.96, 122.7,
122.59, 101.2, 106.3), `Comercio, Transporte y Almacenamiento, Actividades de Alojamiento y de Servicio de Comidas` = c(112.2,
113.03, 113.03, 115.69, 113.74, 114.7, 115.93, 115.3, 114.25,
115.05, 116.68, 114.84, 114.56, 116.58, 117.77, 119.19, 119.15,
103.41, 76.66, 75.21, 90.32, 91.72, 97.53, 105.21, 110.43, 109.72,
112.41, 114.05, 115.88, 117.29, 115.05, 114.69, 116.79, 109.68
), `Información y Comunicaciones` = c(115.49, 116.57, 116.18,
114.29, 113.92, 113.82, 116.45, 115.96, 114.81, 115.72, 116.07,
115.42, 115.32, 115.59, 114.22, 114.21, 113.05, 112.42, 111.52,
108.77, 113.92, 114.07, 115.02, 115.79, 117.78, 117.02, 119.21,
119.56, 125.27, 123.15, 118.56, 119.68, 120.02, 127.68), `Actividades Financieras y de Seguros` = c(117.96,
122.17, 120.93, 119.53, 121.15, 122.17, 125.01, 121.22, 127.48,
124.1, 124.56, 126.86, 124.59, 129.96, 131.74, 131.56, 138.4,
134.4, 131.6, 127.16, 124.61, 116.65, 120.28, 119.57, 127.23,
138.75, 141.25, 138.8, 138.79, 141.28, 141.62, 143.53, 137.62,
139.72), `Actividades Inmobiliarias` = c(113.31, 113.83, 114.41,
114.69, 114.97, 115.98, 116.2, 116.22, 115.64, 115.79, 115.95,
116.24, 117.6, 117.84, 115.35, 108.98, 105.89, 103.74, 103.16,
102.5, 102.42, 102.41, 104.16, 107.74, 112.87, 116.57, 115.68,
113.47, 112.41, 112.08, 112.42, 112.74, 113.21, 112.56), `Actividades Profesionales, Científicas, Técnicas, Administrativas, de Apoyo y Otros Servicios` = c(111.84,
111.92, 114.11, 116.44, 117.77, 112.96, 114.64, 113.67, 112.33,
115.12, 113.31, 114.14, 115.46, 117.17, 120.57, 124.26, 122.68,
99.51, 86.36, 79.21, 81.56, 83.6, 88.71, 97.76, 98.16, 101.04,
102.68, 108.37, 113.64, 114.82, 115.91, 118.35, 118.74, 109.14
), `Actividades de Administración Pública y Defensa, Enseñanza, Salud y Asistencia Social` = c(110.04,
108.07, 109.24, 105.85, 108.99, 109.12, 109.6, 109.31, 108.63,
111.22, 111.25, 109.67, 107.59, 108.8, 106.9, 105.82, 108.24,
107.71, 106.75, 104.67, 98.47, 102.09, 108.94, 109.34, 110.3,
110.01, 109.3, 107.24, 113.46, 111.17, 113.44, 116.42, 112.98,
114.37)), row.names = c(NA, -34L), class = c("tbl_df", "tbl",
"data.frame"))
> m_ipc
structure(list(fecha = structure(c(17805, 17836, 17866, 17897,
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 18170,
18201, 18231, 18262, 18293, 18322, 18353, 18383, 18414, 18444,
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,
18748, 18779, 18809, 18840, 18871), class = "Date"), `Índice General` = c(113.02,
112.82, 112.3, 112.24, 112.44, 112.69, 112.87, 113.01, 112.85,
112.56, 112.16, 111.99, 112.04, 112.17, 112.29, 112.15, 112,
112.09, 111.69, 111.94, 112.59, 112.49, 111.82, 111.56, 111.81,
111.98, 112.2, 112.49, 113.19, 114.08, 114.81, 114.84, 115.51,
116.36, 116.63, 117.1), `Alimentos y Bebidas no Alcohólicas` = c(120.22,
120.56, 120.44, 120.81, 121.12, 121.39, 121.71, 122.29, 122.61,
121.82, 120.79, 120.64, 121.08, 121.48, 121.88, 122.35, 122.22,
122.68, 124.24, 125.06, 126.14, 125.84, 123.33, 122.36, 121.89,
122.24, 122.33, 122.5, 123.12, 124.09, 124.19, 123.97, 124.85,
125.76, 125.93, 127.18), `Bebidas Alcohólicas, Tabaco` = c(146,
145.59, 145.84, 147.3, 146.86, 146.84, 147.11, 147.74, 148.21,
149.24, 150.04, 150.05, 150.11, 149.9, 150.54, 151.89, 151.81,
152.29, 152.01, 153.09, 152.72, 154.65, 154.56, 152.64, 153.4,
153.59, 153.87, 154.49, 155.21, 155.63, 155.4, 155.2, 156.36,
156.2, 156, 157.11), `Prendas de Vestir y Calzado` = c(92.82,
92.77, 92.74, 92.76, 92.93, 92.89, 92.9, 92.69, 92.57, 92.42,
92.13, 91.42, 91.44, 91.17, 91.03, 91.09, 91.43, 91.88, 91.84,
91.84, 91.84, 91.84, 91.84, 92.05, 92.55, 92.6, 92.75, 93, 93.5,
93.84, 93.98, 94.35, 94.5, 94.71, 94.86, 94.85), `Alojamiento, Agua, Electricidad, Gas y otros Combustibles` = c(140.49,
139.57, 138.12, 137.52, 137.35, 137.51, 136.16, 135.75, 135.34,
134.77, 134.82, 134.79, 133.85, 134.04, 134.93, 132.51, 131.61,
131.68, 131.02, 131.03, 131.83, 129.07, 128.61, 129, 131.34,
131.41, 131.97, 132.01, 134.25, 135.03, 137.66, 136.74, 136.96,
140.04, 141.58, 141.93), `Muebles, Artículos para el Hogar y para la Conservación Ordinaria del Hogar` = c(100.24,
100.36, 100.14, 100.29, 100.52, 100.16, 100.25, 100.3, 99.86,
99.73, 99.64, 99.63, 99.48, 99.16, 98.94, 99.16, 99.54, 99.98,
100.08, 100.13, 100.02, 99.83, 100.23, 100.39, 100.07, 100.17,
100.92, 101, 101.98, 102.74, 103.46, 103.81, 104.38, 105.06,
105.3, 106.45), Salud = c(99.37, 99.28, 99.29, 99.29, 99.27,
99.27, 99.34, 99.44, 99.54, 99.6, 99.77, 100.06, 100.07, 100.14,
100.12, 100.17, 100.01, 99.98, 99.96, 100.19, 100.22, 100.9,
100.97, 101.13, 101.24, 101.9, 101.88, 102.04, 102.93, 103.14,
103.37, 103.83, 104.14, 104.19, 104.45, 104.53), Transporte = c(112.15,
110.75, 108.27, 106.83, 107.41, 108.94, 111.01, 111.41, 110.51,
110.51, 109.34, 108.64, 109.05, 109.47, 108.79, 108.56, 107.88,
106.73, 100.48, 100.6, 102.77, 104.29, 103.76, 103.45, 103.59,
103.53, 103.64, 105.12, 105.76, 109.23, 111.09, 111.72, 112.93,
113.5, 112.71, 112.13), Comunicaciones = c(84.77, 84.69, 84.69,
84.64, 84.32, 84.32, 84.32, 84.31, 84.1, 83.78, 83.78, 83.78,
83.89, 83.89, 83.7, 83.2, 83.16, 83.16, 83.2, 83.17, 83.17, 82.99,
82.99, 83.03, 83.19, 83.19, 83.17, 83.12, 83.12, 83.12, 83.12,
83.11, 83.11, 83.09, 83.09, 83.09), `Recreación y Cultura` = c(87.35,
87.37, 87.4, 87.77, 88.71, 88.48, 88.72, 88.75, 88.08, 88.14,
88.18, 87.97, 87.81, 87.72, 87.58, 87.63, 87.89, 87.74, 87.67,
87.6, 87.65, 87.81, 88.29, 87.68, 88.02, 88.08, 88.14, 88.06,
87.86, 88.11, 88.51, 88.77, 89.12, 89.11, 88.98, 89.14), Educación = c(112.83,
112.83, 112.83, 113.27, 113.27, 113.27, 113.27, 113.27, 113.27,
113.27, 113.27, 113.27, 113.65, 113.65, 113.65, 114.06, 114.06,
114.06, 114.06, 114.06, 114.06, 114.06, 114.06, 114.06, 114.26,
114.26, 114.26, 114.26, 114.26, 114.26, 114.26, 114.26, 114.26,
114.26, 114.26, 114.26), `Restaurantes y Hoteles` = c(122.94,
122.7, 122.81, 123.41, 123.37, 123.54, 123.49, 123.57, 123.55,
123.63, 123.59, 123.5, 123.58, 123.54, 123.93, 124.32, 124.44,
124.44, 124.5, 124.61, 124.7, 125.04, 125.34, 125.52, 125.52,
125.8, 126.01, 126.36, 126.65, 126.97, 127.49, 127.95, 129.19,
129.73, 130.46, 131.3), `Bienes y Servicios Diversos` = c(107.55,
107.75, 107.6, 107.39, 107.4, 107.55, 107.36, 107.13, 107.22,
107.26, 107.4, 107.48, 107.42, 107.4, 107.3, 107.37, 107.55,
108.21, 108.38, 108.39, 108.46, 109.45, 109.67, 109.42, 109.65,
109.65, 109.99, 110.25, 110.37, 110.19, 110.34, 110.36, 111.16,
111.8, 112.28, 112.23)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
And I am using the following code:
library(janitor)
wide_dataframes = list(m_ivae,m_ipc)
names(wide_dataframes) = c('m_ivae','m_ipc')
for (nm in names(wide_dataframes)){
df = get(nm)
df = clean_names(df)
df[paste0("lag", 1:3)] = lapply(1:3, lag, x=df[,2:ncol(df)])
df[,2:ncol(df)] = apply(df[,2:ncol(df)],2,function(x) as.numeric(as.character(x)))
assign(nm, df)
}
However, after I run the for loop, I get the following error message:
Error: Can't recycle `apply(df[, 2:ncol(df)], 2, function(x) as.numeric(as.character(x)))` (size 40) to size 13.
I tried to fix it by removing the column specifications in the fifth line of the for loop, like this:
for (nm in names(wide_dataframes)){
df = get(nm)
df = clean_names(df)
df[paste0("lag", 1:3)] = lapply(1:3, lag, x=df[,2:ncol(df)])
df = apply(df[,2:ncol(df)],2,function(x) as.numeric(as.character(x)))
assign(nm, df)
}
This solves the error, but removes the first column, which I need to keep in order to perform a left join with a different data frame later on.
The issue seems to be assigning the column names df[paste0("lag", 1:3)] i.e. when we do the lag on the whole data or a part of it df[,2:ncol(df)], the assignment to the lhs of = is not of the same length i.e. it is just of length 3 compared to the original ncol(df)-1. As we are using a for loop, the inner lag can also be in a for loop
for (nm in names(wide_dataframes)){
df <- get(nm)
df <- clean_names(df)
nm1 <- names(df)[2:ncol(df)] # get the names of the columns to be lagged
for(i in 1:3) {
nm2 <- paste0(nm1, "lag", i)
df[nm2] <- lag(df[, nm1], n = i)
}
df[,2:ncol(df)] <- lapply(df[,2:ncol(df)],
function(x) as.numeric(as.character(x)))
assign(nm, df)
}
-checking
> ncol(m_ivae)
[1] 41
> ncol(m_ipc)
[1] 53
compare with original number of columns
> sapply(wide_dataframes, ncol)
m_ivae m_ipc
11 14

Order alphabetically the entries of plotly legend in R

I create the plotly chart below which normally is in a shiny app so the selection of y variables is dynamic. In this case I have Bob and Anna. I want to modify the legend. The colors order is correct as I want the deeper blue to be on top but I want to stabilize the order that the names are displayed, probably alphabetically so Anna should always be displayed first with the deeper blue color in the legend. Remember that the selection is dynamic in a shiny app.
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
Bob<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Anna<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)*50
re<-data.frame(Week,Bob,Anna)
re<-re %>% group_by(month_year = format(Week, '%Y-%b')) %>% summarise(across(c(Bob,Anna), sum, na.rm =TRUE))
colnames(re)[1]<-"Week"
ay <- list(
overlaying = "y",
side = "right",
title = "Second",
)
tempNames <- c("Bob", "Anna")
tempNamesV2 <- tempNames[order(tempNames)]
# plotlyObjList <-
p <- plot_ly(re)
for(i in seq_along(tempNamesV2)){
if(i == 1){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#3E5B84"), yaxis = "y", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNamesV2[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
} else if (i == 2){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#6BBABF"), yaxis = "y2", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNamesV2[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
}
}
p <- p %>% layout(yaxis2 = ay,
xaxis = list(title = "Date"),
yaxis = list(title = "i"),
margin = list(l=50,b = 100, t=50),
barmode = 'group',
legend=list(x = 1.05, y = 1,title=list(text='<b> Country </b>')))
p
You can use a loop to load the object after sorting it. I prefer using order() so I can extract the sort position for other uses, but a simple sort() is good. The legend for plotly is based on when you add_trace/add_bars to the plotly, the earlier one gets to the top position.
Since you only used 2 y's, I assume the user can only select up to 2 "Names" at a time, and you are trying to make sure the Countries are sorted:
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
Bob<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Anna<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)*50
re<-data.frame(Week,Bob,Anna)
re<-re %>% group_by(month_year = format(Week, '%Y-%b')) %>% summarise(across(c(Bob,Anna), sum, na.rm =TRUE))
colnames(re)[1]<-"Week"
ay <- list(
overlaying = "y",
side = "right",
title = "Second"
)
tempNames <- c("Bob", "Anna")
tempNamesV2 <- tempNames[order(tempNames)]
p <- plot_ly(re)
for(i in seq_along(tempNamesV2)){
if(i == 1){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#3E5B84"), yaxis = "y", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNames[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
} else if (i == 2){
p <- add_bars(p, x = ~Week, y = re[[tempNamesV2[i]]], name = tempNamesV2[i],
marker = list(color = "#6BBABF"), yaxis = "y2", offsetgroup = i,
text = ~ paste("<b>Country:</b>", tempNames[i], "<br><b>Date:</b>",Week ),
hovertemplate = paste('%{text}<extra></extra>'))
}
}
p
I also realized you have two margins in the layout, and I put them together.

Aggregate by month for different years and create plotly bar with months in x-axis

I have the dataframe below:
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
First<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Second<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
re<-data.frame(Week,First,Second)
and I want to sum by month but I have 2 years 2020 and 2021 so I need to have separated months for each year
library(lubridate)
bymonth <- aggregate(cbind(First)~month(Week),
data=re,FUN=sum)
then I need to create a plotly bar chart but the months are not displayed properly.
p <- plot_ly() %>%
add_bars(bymonth, x = ~Month, y = bymonth[,2], name = "fIRST",
marker = list(color = "#3E5B84")
)
You can extract year-month from the date, aggregate and plot -
library(dplyr)
library(plotly)
re %>%
arrange(Week) %>%
mutate(month_year = format(Week, '%Y-%b'),
month_year = factor(month_year, unique(month_year))) %>%
group_by(month_year) %>%
summarise(First = sum(First, na.rm = TRUE)) %>%
plot_ly() %>%
add_bars(x = ~month_year, y = ~First,
marker = list(color = "#3E5B84"))

Resources