Related
I have two very large dataframes of the following structure(s):
df1:
structure(list(Date = structure(c(18871, 18873, 18883, 18928,
18950, 18961, 18962, 18963, 18967, 18969, 18970, 18971, 18974,
18975, 18976, 18977, 18978, 18981, 18982, 18983, 18984, 18985,
18990, 18991, 18992, 18996, 18997, 18998, 18999, 19002, 19003,
19004, 19005, 19006, 19009, 19010, 19011, 19012, 19013, 19016,
19017, 19018, 19019, 19020, 19023, 19025, 19026, 19027, 19030,
19031, 19033, 19034, 19037, 19038, 19039, 19040, 19041, 19044,
19045, 19046, 19047, 19048, 19051, 19052, 19053, 19054, 19055,
19058, 19059, 19066, 19082, 19104, 19107, 19108, 19109, 19110,
19111, 19115, 19143, 19166, 19167, 19170, 19171, 19172, 19173,
19174, 19177, 19178, 19179, 19180, 19181, 19184, 19185, 19186,
19187, 19188, 19191, 19192, 19193, 19194, 19195, 19198, 19199,
19200, 19201, 19202, 19205, 19206, 19207, 19208, 19209, 19212,
19213, 19214, 19215, 19216, 19219, 19220, 19221, 19222, 19223,
19226, 19227, 19228, 19229, 19230, 19234, 19235, 19236, 19237,
19240, 19241, 19242, 19243, 19244, 19247, 19248, 19249, 19250,
19251, 19255, 19256, 19257, 19258, 19261, 19262, 19263, 19269,
19270, 19271, 19272, 19275, 19276, 19291, 19292, 19293, 19296,
19297, 19298, 19299, 19300, 19303, 19304, 19305, 19306, 19307,
19310, 19312, 19313, 19314, 19317, 19318, 19319, 19320, 19321,
19324, 19325, 19326, 19327, 19328, 19333, 19334, 19335, 19338,
19339, 19340, 19341, 19342, 19345, 19346, 19347, 19348, 19349,
19354, 19355, 19356, 19360, 19361, 19362, 19363, 19366, 19367,
19368, 19369, 19370, 19373, 19374, 19375, 19376, 19377, 19380,
19381, 19382, 19383, 19384, 19387, 19388, 19389, 19390, 19391,
19394, 19395, 19396, 19397, 19398, 19401, 19402, 19403, 19404,
18964, 18968, 19024, 19032, 19103, 19264, 19268, 18884, 18935,
19060, 19061, 19068, 19102, 19165, 19278, 18907, 18919, 18942,
18914, 18872, 18876, 18877, 18878, 18879, 18880, 18885, 18886,
18887, 18890, 18891, 18892, 18893, 18894, 18897, 18898, 18899,
18900, 18901, 18904, 18905, 18906, 18908, 18911, 18912, 18913,
18915, 18918, 18920, 18921, 18922, 18925, 18926, 18927, 18929,
18932, 18933, 18934, 18936, 18939, 18940, 18941, 18943, 18946,
18947, 18948, 18949, 18953, 18954, 18955, 18956, 18957, 18960,
19062, 19065, 19067, 19069, 19072, 19073, 19074, 19075, 19076,
19079, 19080, 19081, 19083, 19086, 19087, 19088, 19089, 19090,
19093, 19094, 19095, 19096, 19101, 19116, 19117, 19118, 19121,
19122, 19123, 19124, 19125, 19128, 19129, 19130, 19131, 19132,
19135, 19136, 19137, 19138, 19139, 19142, 19144, 19149, 19150,
19151, 19152, 19153, 19156, 19157, 19158, 19159, 19160, 19163,
19164, 19265, 19277, 19279, 19282, 19283, 19284, 19285, 19286,
19289, 19290, 19311, 19331, 19332), class = "Date"), Value = c(-5.33417292743301,
-2.52617494564308, -1.49324009324009, -17.0425444455863, -9.35793961841595,
-11.8841517857143, -9.69152125451611, -10.5028364323984, -8.5025565123789,
-7.38025700934579, -7.78238256870689, -8.17978487280178, -7.7131012583794,
-6.41295139213209, -8.91708282295298, -8.71221652160492, -10.0680747922438,
-11.9437278705109, -16.4053740896049, -20.3138418538824, -23.3452022125799,
-28.1797446210017, -28.9915196362348, -22.7089976941569, -53.3396956436279,
-12.4708793300343, -11.1787848605578, -11.2914868901427, -9.00403312503746,
-5.26940458505923, -3.2678042007321, -1.80023400936037, -1.00480716704905,
-2.57038505839299, -6.25052713783678, -4.13951632213265, -4.66916949663517,
-5.80691219642381, -9.01403811889207, -8.76200989722116, -5.14415894039735,
-4.49265067482651, -6.81002817489304, -5.00625312656328, -12.9697844076655,
-16.4525949550594, 1.35574468085106, -11.14101743721, -10.3890230312036,
-9.95399610136452, -10.7999731146659, -10.0551036897388, -8.00489963647858,
-6.98869824910699, -5.52771977448319, -5.8565313387104, -10.6561639051647,
-10.1591881404835, -10.5755809770487, -12.7858263854831, -12.4515269244669,
-9.75300293620429, -10.6174887030093, -10.9297709205513, -8.62649503888442,
-5.67639625979277, -7.76497565794115, -5.11864292912328, -4.15677111515569,
-1.24773160858034, -13.3209387381896, -4.61524571133755, -2.7064391500322,
-2.54590337369225, -4.7534422125529, -5.77741708660495, -9.54039920679418,
-3.49263873159683, -6.37633384146341, -4.8018775807754, -5.92095982827354,
-6.7982259326898, -8.14429721160882, -14.0318602941176, -15.7693341697285,
-6.33099406742874, -6.02467779730522, -6.67451006984472, -5.78277734678045,
-6.76536805011747, -5.30100480559197, -5.48448933319592, -3.07906668141104,
-1.19141872046993, -0.986825656313108, -2.78210279591495, -1.85680436798187,
1.15368964707724, 0.592389680247037, 2.09444444444445, 1.47565073474096,
-3.77455441789913, -4.99841919285848, -4.54737286347331, -6.52210666135604,
-12.4586229788041, -9.76655410805627, -10.1672813163265, -10.1071078030427,
12.1093036008042, -6.27654856354408, -11.7451651977779, -9.69657142857144,
-14.7645443406988, -13.8044752609797, -12.8212930202637, -10.7131114789657,
-9.7502835651603, -9.12285518188058, -7.82418739307804, -7.37290867229471,
-5.42488374865864, -7.16548962504551, -7.22640747577296, -5.67880989576978,
-2.38449197860963, -4.98403249527278, -8.01237481293888, -8.92721143345521,
-11.0038498048729, -7.91574019894676, -7.24256979885921, -13.0468491640639,
-12.4842416971359, -13.1832300362112, -9.69594285190952, -11.8236710963455,
-15.4970313957103, -16.001590401224, -23.0116940912636, -20.0316726652775,
-15.7387646961417, 14.3248459700742, -33.9856294639016, -29.5890893667004,
-24.5815039111784, 0.351626092151443, -12.4957158872518, -13.5265822044065,
-15.8057934508816, -18.5712850985479, -19.0062351207346, -17.8759978712081,
-30.248495829345, -33.496764540864, -32.9090642540002, -35.9095838866755,
-39.0622059592264, -37.4053056372396, 30.4444623180246, -17.2843857072932,
-10.2894505770196, -9.46059792738388, -10.9101340816963, -10.6768441621886,
-15.8564689156004, -25.0377375363291, -23.491809908999, -16.0848675710594,
-9.52203025543524, -9.57329945269742, -8.26118487113689, -8.77146105741898,
-7.52932569974554, -7.51696981061316, -6.34114162627894, -4.169791026429,
-8.65363440517035, -7.07429566797939, -6.04901960784313, -3.95286523637039,
-4.57831931564948, -4.70401930472815, -4.58048473762432, -4.19802716367601,
-4.8712225795747, 33.7199676963457, -8.38811297695784, -8.41675133350266,
-7.95239554602104, -7.61540762007296, -7.36910990686483, -5.19474884165624,
-6.40677640427848, -6.6880699933269, -2.50748947332091, -5.01177083333332,
-4.96342420082369, -3.74379953975965, -4.39864864864864, -3.96323864499468,
-3.64570892339191, -4.00421168284196, -4.32506635700066, -3.84347776249426,
-3.98014059753953, -4.93978444946085, -2.00666607412675, -4.40675836944916,
-4.67217558943196, -4.69624595469255, -4.93134138588684, -4.59980852082335,
-3.79959344732751, -3.56417422281594, -3.72577039757304, -6.8438477254722,
-6.90215521144616, 17.0278089071515, -8.69234350531171, -8.31160081053697,
-6.48055612912106, -2.82488289917003, -1.97938846776255, -2.76711193952573,
-3.57596324527513, -2.66680277664353, -2.76065192083818, -3.07607539874335,
-9.19352066115703, -7.40534903692798, -20.9290200655884, -9.4949337142347,
-1.67593031748771, -4.00032731786333, -12.9233908365795, -3.12797485406376,
-4.58038461538462, -4.92624181954726, -1.64177861663151, 14.5233533882204,
-3.43662848605578, -7.00163549013596, -22.4328399502006, -5.68106442090641,
-10.4667593755607, -8.32068944589428, -7.5574384739845, -1.18996298205332,
0.685336364575798, 0.683983218163869, -0.190581124472707, 0.40016858076072,
-0.685425265972909, -3.23485727546995, -2.17232406175909, -4.03732694666745,
-3.43151815181518, -4.09670641680863, -6.31686046511628, -8.351512347464,
-8.85095265741279, -7.33867558133818, -8.51522800856061, -8.98040313549832,
-21.688225290116, -7.97696621402419, -7.54824182322704, -6.15927894799952,
-4.38810945273632, -4.66345711216818, -6.63357687936353, -6.46356517733763,
-7.69505643096672, -8.85590341722938, -9.65988090292203, -7.41890982503365,
-8.4019243496411, -9.20245522237875, -11.7157621602604, -15.8009129904372,
-16.7188609056313, -29.5936953565184, -16.1899352493468, -14.6818673311081,
-9.23683475995534, -7.2676923950379, -5.58227412415489, -6.1622267560478,
-7.530241504595, -8.17124428752656, -6.35655475028349, -8.35745036021789,
-8.91222072419106, -8.32131192161132, -8.17337013669821, -10.877467450651,
-9.77258672063867, -11.2631774313289, -10.1360580092287, -7.70075269546349,
-3.16698645907571, -2.38109087441594, -0.022514417531718, -1.08499335989376,
-3.3854513350099, -13.1874976421323, -12.0737487121089, -6.16346335921227,
-6.0325834936609, -8.26060968320382, -6.76706578585191, -9.52737443465841,
-3.80397824702367, -4.72200198216056, -5.16157624343703, -7.23900280010839,
-6.18115990990991, -2.1293152465275, -3.7406576456566, -3.66946114241044,
-2.04129052515896, -0.222846441947564, -0.318394134689292, -2.16259885464958,
14.1518376302798, -1.73382942186606, 0.100049115913557, -0.378155479059093,
-0.588078533746669, -0.121356930514267, -2.06994937873907, -2.84085812981032,
-4.51622825625796, -3.44699324686462, -6.08916034821679, -10.3705139175517,
-7.18844523191799, -7.9430781129157, -5.99308878256247, -7.00972211589987,
-3.268970347887, -0.549423136888057, -6.08099664736361, -2.04115579182029,
-0.832810782197972, -0.916704322940202, -0.2527559897864, -1.11381138113811,
-1.81908592321755, -2.61384042630849, -2.35869084475896, 11.992052157715,
-2.80402835408022, -1.61877788005381, -2.68177518524358, -11.6975791772843,
-20.9404339865207, -27.5328187051002, -29.4899791627202, -28.9357382364862,
-30.3360181743081, -30.4065839909808, -32.100806252164, -34.963228894691,
-36.3754188653083, -24.3521969489358, -7.1991154066672, -4.39962753162101
)), row.names = c(NA, -369L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x00000214068b1ef0>)
df2:
structure(list(Date = structure(c(19402, 19396, 19395,
19389, 19376, 19374, 19367, 19362, 19346, 19339, 19333, 19332,
19332, 19327, 19319, 19311, 19311, 19306, 19305, 19291, 19290,
19284, 19283, 19277, 19276, 19270, 19269, 19262, 19242, 19241,
19236, 19227, 19220, 19213, 19206, 19200, 19194, 19192, 19185,
19178, 19171, 19151, 19150, 19136, 19129, 19123, 19122, 19102,
19094, 19087, 19066, 19059, 19053, 19038, 19018, 19010, 19003,
18968, 18962, 18947, 18941, 18934, 18933, 18933, 18926, 18920,
18913, 18912, 18906, 18905, 18905, 18898, 18892, 18885, 18884,
18878, 18877, 18877, 18871, 18863, 18856, 18850, 18849, 18842,
18835, 18828, 18815, 18814, 18814, 18809, 18801, 18794, 18793,
18793, 18787, 18786, 18786, 18780, 18780, 18766, 18765, 18765,
18758, 18758, 18752, 18752, 18745, 18738, 18737, 18731, 18730,
18725, 18724, 18724, 18710, 18709, 18703, 18702, 18702, 18696,
18695, 18690, 18688, 18688, 18681, 18675, 18674, 18674, 18668,
18661, 18660, 18660, 18654, 18653, 18653, 18648, 18640, 18639,
18639, 18633, 18605, 18604, 18604, 18598, 18598, 18597, 18597,
18590, 18590, 18584, 18583, 18583, 18578, 18578, 18577, 18570,
18569, 18569, 18563, 18562, 18562, 18557, 18557, 18555, 18549,
18548, 18548, 18542, 18542, 18541, 18541, 18536, 18536, 18534,
18529, 18529, 18521, 18520, 18520, 18515, 18515, 18508, 18508,
18507, 18500, 18499, 18499, 18494, 18493, 18492, 18492, 18486,
18485, 18485, 18479, 18479, 18478, 18478, 18472, 18472, 18471,
18471, 18465, 18465, 18464, 18464, 18458, 18458, 18457, 18457,
18452, 18450, 18450, 18445, 18445, 18444, 18444, 18438, 18437,
18437, 18436, 18436, 18430, 18430, 18429, 18429, 18424, 18424,
18423, 18416, 18416, 18415, 18415, 18410, 18410, 18409, 18409,
18403, 18403, 18402, 18396, 18396, 18395, 18388, 18388, 18387,
18387, 18381, 18381, 18380, 18380, 18374, 18374, 18373, 18373,
18368, 18368, 18367, 18367, 18360, 18360, 18359, 18359, 18354,
18340, 18338, 18331, 18326, 18325, 18317, 18312, 18296, 18289,
18282, 18275, 18270, 18268, 18247, 18241, 18235, 18233, 18226,
18214, 18205, 18198, 18191, 18184, 18177, 18170, 18163, 18144,
18142, 18128, 18121, 18114, 18100, 18093, 18079, 18072, 18065,
18059, 18051, 18039, 18025, 18002, 17995, 17988, 17981, 17969,
17961, 17953, 17948, 17941, 17918, 17913, 17904, 17877, 17871,
17869, 17855, 17850, 17841, 17827, 17808, 17799, 17794, 17785,
17780, 17764, 17751, 17736, 17731, 17715, 17708, 17702, 17688,
17675, 17660, 17654, 17645, 17640, 17631, 17625, 17617, 17605,
17596, 17591, 17584, 17577, 17554, 17549, 17542, 17540, 17513,
17507, 17505, 17491, 17486, 17479, 17463, 17458, 17449, 17444,
17436, 17428, 17422, 17401, 17386, 17379, 17372, 17366, 17353,
17344, 17339, 17323, 17318, 17309, 17304, 17290, 17282, 17276,
17268, 17260, 17253, 17247, 17239, 17234, 17227, 17220, 17212,
17206, 17204, 17197, 17184, 17178, 17176, 17171), class = "Date"),
Amount = c(3000, 2000, 3500, 3000, 1195.925, 4440.75, 3702.5,
3500, 3619.25, 3749.999, 744.65, 4062.498, 2812.5, 2812.499,
3559.5, 3250, 2250, 4374.998, 2750, 3500, 898.85, 4062.5,
3125, 4365.25, 1106.299, 3749.999, 2500, 1200, 3437.5, 4183.157,
2499.999, 750, 2803.5, 1760, 2771.875, 700, 4366.249, 2500,
3437.5, 2378.75, 1011.975, 3125, 3075, 847.7, 2187.499, 2500,
2250, 2499.998, 3124.998, 3250, 1239.4, 1874.999, 3250, 2250,
600, 3124.998, 3000, 1875, 2250, 2170.375, 900, 2500, 3000,
1533.75, 2750, 2812.5, 567.4, 1562.5, 3125, 3000, 2288.125,
2000, 350, 2857.916, 3000, 1010.725, 3250, 1250, 2859, 3000,
2274.25, 705, 3437.499, 2000, 3000, 1562.5, 701.05, 3437.499,
1866.25, 4232.497, 500, 3118.75, 3055.281, 2000, 1172.438,
3742.497, 1562.5, 3437.499, 2500, 3093.749, 4062.495, 2812.499,
3283.75, 1500, 2750, 2000, 1113.65, 3124.999, 4062.498, 713.825,
1000, 3437.5, 3394, 2000, 357.25, 2250, 2500, 4375, 1633,
878.975, 2812.5, 3437.5, 3147.351, 1562.499, 2499.998, 2500,
3250, 1250, 2000, 3143, 3000, 1071.25, 1036.3, 2936.25, 1750,
3250, 375, 3000, 1450.624, 3749.999, 3011.102, 4029.372,
2500, 1250, 2750, 3669.355, 2250, 3002.75, 1562.499, 2500,
4062.499, 2500, 3749.999, 1562.499, 861.4, 3000, 3405.625,
2499.999, 3084.999, 3510.742, 1249.999, 2632, 1986.875, 697.049,
3125, 3000, 1562.499, 2500, 884.149, 3518.452, 2000, 3250,
2000, 3437.499, 3749.999, 1310.749, 3124.999, 2867.5, 2187.5,
3250, 2421.749, 3412.749, 2500, 459.05, 2750, 3000, 1342.375,
1374.999, 3437.5, 4062.497, 2477.25, 3437.499, 3250, 1250,
2750, 562.25, 4062.499, 2812.499, 3500, 3008.875, 3437.499,
1874.999, 3000, 500, 3749.999, 2711, 3250, 1500, 3834.095,
3750, 1062.85, 3750, 2812.5, 4261.195, 2329.25, 3000, 2785.999,
1229.576, 3250, 1768.25, 3250, 3628.75, 4260.749, 2812.5,
3716.25, 2000, 3250, 4036.624, 900, 3941.249, 1500, 3328.75,
3749.999, 2927.5, 2357.75, 3750, 1785.499, 1562.499, 2500,
3869.624, 3250, 2250, 745.85, 3897.958, 1750, 4062.499, 3000,
3616.375, 2152.5, 1499.999, 3749.999, 3671.476, 3141, 4062.499,
1874.999, 3648.75, 2499.998, 3676.249, 2313.749, 2750, 2092.5,
4062.5, 1562.499, 2299.997, 3250, 2299.999, 2587.488, 1244.238,
3500, 3441.975, 3269.875, 800, 2750, 2545.872, 3250, 1056.037,
3162.497, 3162.496, 500, 2082.45, 3162.498, 3449.922, 2356.675,
2820.25, 1100, 3449.997, 3162.499, 919.998, 2250, 574.995,
2750, 3000, 1264.997, 2299.997, 3000, 3162.497, 2250, 3373.097,
2250, 3162.499, 700, 3000, 1437.496, 3000, 2250, 3135.1,
3449.998, 325, 1724.998, 3414.749, 1254.571, 1688.5, 2587.497,
2012.498, 2799.403, 2563.87, 500, 2012.499, 2500, 555.065,
2299.997, 2250, 1100, 3000, 872.72, 2750, 2500, 3000, 1100,
2500, 2750, 2299.997, 2874.995, 2587.495, 1381.4, 2750, 1146.874,
2810.081, 3427.034, 750, 2500, 2185.16, 2750, 675.24, 2864.731,
2250, 2815.66, 950, 2562.68, 1000, 2500, 2250, 2587.497,
800, 2276.5, 2750, 1000, 2722.32, 2750, 650, 2599.24, 2500,
3162.5, 950, 2500, 2500, 3158.628, 1000, 2250, 2500, 3162.498,
2867.806, 1000, 2587.498, 2874.999, 2500, 882.3, 2962.15,
2293.788, 857.793, 3162.497, 1724.999, 2867.739, 2874.999,
2299.997, 2557.917, 833.737, 2500, 2299.997, 1250, 2624.29,
3004.215, 2444.046, 2881.949, 2250, 837.798, 2329.914)), row.names = c(NA,
397L), class = "data.frame")
I'm trying to work out the percentage impact the events listed in the second dataframe (df2) have on the values from the previous day entry listed in the first dataframe (df1). Does anyone know how I can achieve this? APologies if unclear, English is not my first language.
I am trying to write R code to summarize count and median amounts of a date variable, and a dollar amount variable in my data frame. The summarize part is not the issue, my issue is in trying to group_by time periods to then summarize. I understand the distinction between a time period and a time duration. I am interested in time periods, in this particular case I want to summarize in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time, as in 2022-02-6 to 2021-02-06 to 2020-02-06 and so on. The data frame goes back about 30 months in total, so there is two full years of data to summarize.
The code I wrote here groups by the calendar year, and not what I need;
> sum_closed_date_yr <- scrubbed_data01 %>%
+ group_by(time_period = year(close_date)) %>%
+ summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
> sum_closed_date_yr
# A tibble: 5 × 3
time_period close_count med_close_price
<dbl> <int> <dbl>
1 2019 31 570000
2 2020 80 661250
3 2021 104 930750
4 2022 9 1010000
5 NA 8 0
I am very new to coding in R, I am a real estate appraiser not a statwhizzician. I have taken 23 DataCamp tutorials in R, so I have a newbie working knowledge of R. I have searched through multiple SO posts on summarizing by date, but can not find what I am specifically looking for. Any help would be greatly appreciated, thank you - Joe
Second try, I selected just the two of many variables
scrubbed_data01 %>%
+ select(close_date, close_price) %>%
+ dput()
structure(list(close_date = structure(c(NA, NA, NA, 19039, 19038,
19034, 19024, 19020, 19016, 19013, 18999, 18989, 18976, 18969,
18969, 18968, 18955, 18955, 18954, 18953, 18953, 18949, 18948,
18943, 18940, 18936, 18934, 18933, 18929, 18922, 18921, 18921,
18921, 18918, 18915, 18912, 18908, 18908, 18907, 18906, 18905,
18900, 18900, 18899, 18897, 18897, 18897, 18891, 18891, 18890,
18887, 18880, 18879, 18878, 18878, 18873, 18873, 18873, 18869,
18866, 18866, 18851, 18850, 18844, 18836, 18836, 18831, 18830,
18822, 18821, 18821, 18815, 18810, 18806, 18802, 18796, 18795,
18789, 18786, 18782, 18781, 18781, 18780, 18779, 18775, 18775,
18774, 18761, 18761, 18753, 18752, 18747, 18746, 18746, 18740,
18739, 18737, 18729, 18718, 18715, 18705, 18704, 18701, 18695,
18689, 18683, 18677, 18655, 18652, 18648, 18646, 18640, 18634,
18633, 18631, 18619, 18613, 18611, 18590, 18585, 18579, 18576,
18569, 18569, 18569, 18563, 18558, 18557, 18557, 18556, 18554,
18549, 18544, 18540, 18540, 18533, 18519, 18519, 18519, 18514,
18514, 18513, 18507, 18502, 18502, 18501, 18501, 18499, 18495,
18492, 18492, 18491, 18488, 18484, 18472, 18466, 18464, 18459,
18459, 18453, 18451, 18450, 18445, 18443, 18442, 18423, 18422,
18411, 18401, 18400, 18397, 18397, 18397, 18396, 18387, 18386,
18366, 18361, 18360, 18340, 18338, 18331, 18317, 18313, 18302,
18297, 18289, 18283, 18283, 18277, 18274, 18271, 18271, 18269,
18263, 18261, 18261, 18261, 18260, 18250, 18247, 18239, 18208,
18200, 18199, 18197, 18194, 18190, 18185, 18185, 18180, 18179,
18177, 18177, 18176, 18170, 18169, 18156, 18155, 18152, 18151,
18142, 18142, 18138, 18137, 18136, NA, NA, NA, NA, NA, 19044), class = "Date"),
close_price = c(0, 0, 0, 1150001, 940000, 1253000, 979000,
881000, 1010000, 1060000, 1100000, 1070000, 1025000, 755000,
740000, 930000, 1250000, 990000, 930000, 931500, 975000,
950000, 850000, 865000, 921000, 790000, 778000, 935000, 1270000,
970000, 1061500, 960000, 1015000, 1100000, 1082000, 880000,
1000000, 1140000, 950000, 852000, 1045000, 795000, 950000,
950000, 880000, 850000, 945000, 949500, 1220000, 1015000,
899000, 1100000, 805000, 868000, 1102000, 1015000, 923000,
810000, 890000, 826000, 1140000, 970000, 830000, 790000,
1151000, 835500, 1080000, 870000, 1049000, 985000, 962000,
926000, 1008888, 950000, 810000, 760000, 955000, 930000,
985000, 1210000, 878000, 950000, 855000, 930000, 960000,
1180000, 980000, 960000, 898000, 1100000, 1215000, 885000,
985000, 880000, 1100000, 810000, 1210000, 810000, 970700,
1010000, 800000, 850000, 849000, 770000, 925000, 930000,
875000, 755000, 675000, 875500, 715000, 837000, 747000, 805000,
785000, 801200, 900000, 800000, 610000, 720000, 730000, 700000,
695000, 720000, 750000, 860000, 915000, 787000, 785000, 710000,
735000, 620000, 788000, 780000, 780000, 645000, 700000, 686000,
686000, 745000, 745000, 605000, 730000, 625000, 625000, 685000,
731000, 715000, 695000, 710000, 700000, 575000, 561000, 590000,
595000, 720500, 670000, 711000, 645000, 595000, 700000, 545000,
695000, 531000, 581000, 518000, 645000, 562500, 530000, 640000,
643000, 680000, 700000, 540000, 630000, 658000, 675000, 525000,
600000, 664500, 590000, 569595, 620500, 555000, 585000, 630000,
639900, 515000, 475000, 670000, 610000, 524888, 550000, 520000,
650000, 500000, 500000, 540000, 608000, 575000, 570000, 639900,
645000, 648500, 635000, 530000, 655000, 520000, 555000, 542500,
515000, 620000, 580140, 535000, 638888, 540000, 590000, 535000,
497500, 505000, 675000, 545000, 640000, 555000, 630000, 590000,
0, 0, 0, 0, 0, 985000)), row.names = c(NA, 232L), class = "data.frame")
>
You can create a new variable to assign the year to your desired period:
library(dplyr)
df %>% mutate(period_year = case_when(close_date < "2019-02-06" ~ 2018,
close_date >= "2019-02-06" & close_date < "2020-02-06" ~ 2019,
close_date >= "2020-02-06" & close_date < "2021-02-06" ~ 2020,
close_date >= "2021-02-06" & close_date < "2022-02-06" ~ 2021,
close_date >= "2022-02-06" & close_date < "2023-02-06" ~ 2022)) %>%
group_by(time_period = period_year) %>%
summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
I have the dataframe below:
Week<-structure(c(18323, 18330, 18337, 18344, 18351, 18358, 18365,
18372, 18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428,
18435, 18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491,
18498, 18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554,
18561, 18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617,
18624, 18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680,
18687, 18694, 18701, 18708, 18715, 18722, NA), class = "Date")
First<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
Second<-c(NA, 12, 28, 89, 205, 311, 367, 419, 536, 673, 787, 996, 1501,
2091, 2836, 3971, 5429, 7422, 9653, 12205, 15096, 19962, 23567,
28432, 33051, 37347, 43390, 49897, 54851, 60913, 67073, 72769,
79629, 84063, 88398, 89579, 88464, 85595, 81697, 74943, 67632,
58226, 53371, 49759, 51508, 55515, 58813, 62240, 62627, 62646,
61285, 54438, 49614, 46721, 44554, 48151, 54014, 68891, 47176
)
re<-data.frame(Week,First,Second)
and I want to sum by month but I have 2 years 2020 and 2021 so I need to have separated months for each year
library(lubridate)
bymonth <- aggregate(cbind(First)~month(Week),
data=re,FUN=sum)
then I need to create a plotly bar chart but the months are not displayed properly.
p <- plot_ly() %>%
add_bars(bymonth, x = ~Month, y = bymonth[,2], name = "fIRST",
marker = list(color = "#3E5B84")
)
You can extract year-month from the date, aggregate and plot -
library(dplyr)
library(plotly)
re %>%
arrange(Week) %>%
mutate(month_year = format(Week, '%Y-%b'),
month_year = factor(month_year, unique(month_year))) %>%
group_by(month_year) %>%
summarise(First = sum(First, na.rm = TRUE)) %>%
plot_ly() %>%
add_bars(x = ~month_year, y = ~First,
marker = list(color = "#3E5B84"))
I have a data in columns I have characters part of which are TRG1, TRG2, TRG3, TRG4 and TRG5
How I can order this data frame based on TRG so that first TRG1 ....finally TRG5 are placed in the columns?
My data is
> dput(head(result))
structure(list(`Sample Name` = c("ACTB", "ATP5F1", "DDX5", "EEF1G",
"GAPDH", "NCL"), `31-10TRG3R` = c(15723, 1682, 16598, 17240,
38686, 10670), `31-11TRG4R` = c(24846, 3294, 25522, 38914, 73022,
14628), `31-12TRG4R` = c(7812, 1326, 5750, 9204, 12352, 5489),
`31-13TRG1R` = c(15332, 1162, 18268, 20875, 62257, 10614),
`31-14TRG4R` = c(7644, 1435, 16822, 13731, 26244, 10548),
`31-15TRG4R` = c(6501, 947, 10320, 7285, 10538, 4638), `31-16TRG4R` = c(5428,
825, 11789, 12018, 6812, 5954), `31-17TRG3R` = c(10074, 1056,
7966, 12489, 26819, 6404), `31-18TRG1R` = c(12487, 567, 13945,
16474, 43309, 11831), `31-19TRG4R` = c(5211, 917, 9144, 8024,
8200, 3935), `31-1TRG3R` = c(9928, 1112, 5726, 6227, 12942,
3644), `31-21TRG3R` = c(6806, 1460, 7472, 12420, 46378, 5871
), `31-22TRG3R` = c(4834, 640, 9807, 7082, 14823, 4594),
`31-23TRG1R` = c(3156, 765, 18034, 18982, 17237, 18880),
`31-24TRG4R` = c(6990, 761, 4440, 2833, 8150, 1340), `31-25TRG2R` = c(60621,
6290, 47502, 135948, 233717, 37583), `31-26TRG3R` = c(4198,
718, 2564, 3830, 5790, 1258), `31-27TRG2R` = c(10815, 1010,
8694, 11868, 18684, 5706), `31-28TRG4R` = c(7980, 1343, 7342,
9874, 14286, 4255), `31-29TRG1R` = c(3854, 748, 9314, 9132,
25546, 7852), `31-2TRG1R` = c(7653, 1495, 12238, 12568, 11296,
11256), `31-30TRG5R` = c(24358, 2091, 15594, 26998, 91442,
20914), `31-31TRG4R` = c(6796, 940, 12752, 11642, 41967,
12922), `31-32TRG2R` = c(127379, 11541, 90020, 74881, 234454,
51464), `31-33TRG1R` = c(4139, 338, 8260, 8650, 13916, 8000
), `31-34TRG3R` = c(37303, 2998, 22122, 30431, 51981, 11737
), `31-35TRG4R` = c(32279, 2718, 42178, 36956, 115962, 21194
), `31-36TRG3R` = c(12424, 1134, 8177, 14462, 20147, 6648
), `31-37TRG2R` = c(7031, 690, 8208, 17495, 28514, 7058),
`31-38TRG3R` = c(3645, 698, 16117, 11122, 25739, 7031), `31-39TRG3R` = c(28273,
2169, 14697, 20890, 68353, 25293), `31-3TRG4R` = c(9250,
1335, 24776, 14674, 31266, 8732), `31-40TRG1R` = c(28858,
2100, 26910, 43331, 104235, 19544), `31-41TRG1R` = c(13980,
1184, 13204, 13624, 47414, 11870), `31-42TRG2R` = c(22697,
2401, 16326, 22962, 40136, 11796), `31-43TRG3R` = c(13820,
797, 16245, 7827, 38292, 6206), `31-44TRG2R` = c(9477, 1244,
7140, 6580, 12457, 5176), `31-45TRG3R` = c(12182, 573, 2818,
3699, 4365, 1639), `31-46TRG1R` = c(5438, 997, 9226, 26045,
17740, 8628), `31-47TRG3R` = c(14419, 1927, 7350, 10375,
15736, 3415), `31-48TRG2R` = c(8758, 1002, 8044, 6677, 17354,
7355), `31-49TRG4R` = c(7738, 792, 13920, 15589, 42536, 14056
), `31-4TRG3R` = c(9947, 1115, 7267, 5957, 13831, 2793),
`31-50TRG4R` = c(6660, 701, 4092, 16796, 7958, 2408), `31-51TRG2R` = c(151880,
16572, 93610, 110556, 303604, 57029), `31-52TRG2R` = c(7184,
1396, 12785, 11124, 13050, 8934), `31-53TRG2R` = c(9012,
1118, 7786, 11482, 19512, 9143), `31-5TRG2R` = c(5479, 440,
8913, 7103, 15886, 5801), `31-6TRG4R` = c(6716, 677, 8812,
12184, 14380, 7684), `31-7TRG3R` = c(16192, 1155, 9405, 11930,
30034, 7726), `31-8TRG1R` = c(11408, 1007, 11396, 20424,
38188, 9570), `31-9TRG1R` = c(9468, 812, 10774, 8504, 15464,
4606)), row.names = c(NA, 6L), class = "data.frame")
>
May be, we extract the digits after the 'TRG' and use that in order
result2 <- result[c(1, order(as.numeric(sub(".*TRG(\\d+)\\D+", "\\1",
names(result)[-1])))+1)]
I've been customizing themes, lines, and colors in the following plot
library(dplyr)
library(ggplot2)
library(readr)
library(zoo)
cvper <- read.csv("https://cloud.minsa.gob.pe/s/Y8w3wHsEdYQSZRp/download", stringsAsFactors = FALSE)
nuevos_cvper <- cvper %>%
group_by(FECHA_RESULTADO) %>%
arrange(desc(FECHA_RESULTADO)) %>%
summarize (casos_x_dia= n()) %>%
mutate(media_movil = rollmean(casos_x_dia, k=7, fill = NA, align = "right"))
prueba_legend <- ggplot(nuevos_cvper) +
geom_line(aes (x = FECHA_RESULTADO, y = media_movil, color = "media_movil"), size = 1.5) +
geom_line(aes (x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia"), linetype = "dashed" ) +
geom_point (aes(x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia")) +
scale_colour_manual("", values = c("media_movil"="#CF3721", "casos_x_dia"="#31A9B8",
"casos_x_dia"="#31A9B8")) +
theme_bw () + theme(legend.position="bottom")
prueba_legend
It shows a legend with short lines. I want to change those lines to circles. I´ve tried with scale_shape_manual, but it doesn't work. Is there a way?
Since one of the more recent ggplot2 versions (make sure you update via install.packages("ggplot2")), the argument key_glyph= can be used to specify the draw_key function that should be used to draw the legend glyphs for a given geom and aesthetic. See here for some more information and examples of usage; however, I will demonstrate with the following example using mtcars and ggplot2 version 3.3.2:
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
geom_line(key_glyph = "point")
You may notice as I did that the point size is a bit small for my taste. That can be adjusted by using override.aes= via the guide_legend() function specified for the color aesthetic to make those points a bit bigger.
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
geom_line(key_glyph = "point") +
guides(color=guide_legend(override.aes = list(size=3)))
Maybe you are looking for this:
library(ggplot2)
#Data
df2 <- structure(list(FECHA_RESULTADO = structure(c(18327, 18328, 18329,
18330, 18331, 18332, 18333, 18334, 18335, 18336, 18337, 18338,
18339, 18340, 18341, 18342, 18343, 18344, 18345, 18346, 18347,
18348, 18349, 18350, 18351, 18352, 18353, 18354, 18355, 18356,
18357, 18358, 18359, 18360, 18361, 18362, 18363, 18364, 18365,
18366, 18367, 18368, 18369, 18370, 18371, 18372, 18373, 18374,
18375, 18376, 18377, 18378, 18379, 18380, 18381, 18382, 18383,
18384, 18385, 18386, 18387, 18388, 18389, 18390, 18391, 18392,
18393, 18394, 18395, 18396, 18397, 18398, 18399, 18400, 18401,
18402, 18403, 18404, 18405, 18406, 18407, 18408, 18409, 18410,
18411, 18412, 18413, 18414, 18415, 18416, 18417, 18418, 18419,
18420, 18421, 18422, 18423, 18424, 18425, 18426, 18427, 18428,
18429, 18430, 18431, 18432, 18433, 18434, 18435, 18436, 18437,
18438, 18439, 18440, 18441, 18442, 18443, 18444, 18445, 18446,
18447, 18448, 18449, 18450, 18451, 18452, 18453, 18454, 18455,
18456, 18457, 18458, 18459, 18460, 18461, 18462, 18463, 18464,
18465, 18466, 18467, 18468, 18469, 18470, 18471, 18472, 18473,
18474, 18475, 18476, 18477, 18478, 18479, 18480, 18481, 18482,
18483, 18484, 18485, 18486, 18487, 18488), class = "Date"), casos_x_dia = c(1,
5, 2, 3, 1, 8, 8, 10, 19, 28, 20, 27, 56, 62, 56, 30, 39, 33,
64, 100, 52, 34, 136, 142, 130, 250, 117, 222, 292, 833, 444,
647, 1042, 1083, 817, 1038, 1404, 738, 1284, 1041, 1383, 1329,
1109, 1407, 1076, 2039, 2171, 2104, 2056, 2239, 2397, 1422, 3399,
3367, 4238, 3372, 2625, 3369, 1922, 3990, 3969, 3634, 3612, 3297,
2469, 936, 3601, 4348, 4441, 3739, 4304, 4125, 1785, 5130, 5198,
5290, 5514, 6100, 5399, 1792, 6968, 6919, 7371, 6425, 5745, 4613,
2262, 4242, 3774, 2614, 4029, 4944, 4764, 2637, 4743, 5310, 5726,
5069, 4661, 4500, 2441, 4363, 3376, 3915, 3436, 3447, 3526, 1446,
4335, 3768, 4109, 4154, 4331, 3526, 1598, 2729, 3748, 3648, 3349,
3862, 3518, 2299, 3783, 4035, 2598, 2495, 4913, 4246, 2380, 3114,
4194, 4432, 4535, 5141, 5066, 2228, 3756, 4815, 5972, 5474, 5960,
5626, 2950, 7071, 3017, 6721, 7248, 7601, 6697, 3194, 7818, 7754,
7508, 8442, 7407, 6759, 3491, 7679, 8473, 8560, 7590, 4805),
media_movil = c(NA, NA, NA, NA, NA, NA, 4, 5.28571428571429,
7.28571428571429, 11, 13.4285714285714, 17.1428571428571,
24, 31.7142857142857, 38.2857142857143, 39.8571428571429,
41.4285714285714, 43.2857142857143, 48.5714285714286, 54.8571428571429,
53.4285714285714, 50.2857142857143, 65.4285714285714, 80.1428571428571,
94, 120.571428571429, 123, 147.285714285714, 184.142857142857,
283.714285714286, 326.857142857143, 400.714285714286, 513.857142857143,
651.857142857143, 736.857142857143, 843.428571428571, 925,
967, 1058, 1057.85714285714, 1100.71428571429, 1173.85714285714,
1184, 1184.42857142857, 1232.71428571429, 1340.57142857143,
1502, 1605, 1708.85714285714, 1870.28571428571, 2011.71428571429,
2061.14285714286, 2255.42857142857, 2426.28571428571, 2731.14285714286,
2919.14285714286, 2974.28571428571, 3113.14285714286, 3184.57142857143,
3269, 3355, 3268.71428571429, 3303, 3399, 3270.42857142857,
3129.57142857143, 3074, 3128.14285714286, 3243.42857142857,
3261.57142857143, 3405.42857142857, 3642, 3763.28571428571,
3981.71428571429, 4103.14285714286, 4224.42857142857, 4478,
4734.57142857143, 4916.57142857143, 4917.57142857143, 5180.14285714286,
5426, 5723.28571428571, 5853.42857142857, 5802.71428571429,
5690.42857142857, 5757.57142857143, 5368.14285714286, 4918.85714285714,
4239.28571428571, 3897, 3782.57142857143, 3804.14285714286,
3857.71428571429, 3929.28571428571, 4148.71428571429, 4593.28571428571,
4741.85714285714, 4701.42857142857, 4663.71428571429, 4635.71428571429,
4581.42857142857, 4305.14285714286, 4046.42857142857, 3813.14285714286,
3639.71428571429, 3500.57142857143, 3358.42857142857, 3354.42857142857,
3410.42857142857, 3438.14285714286, 3540.71428571429, 3667,
3667, 3688.71428571429, 3459.28571428571, 3456.42857142857,
3390.57142857143, 3275.57142857143, 3208.57142857143, 3207.42857142857,
3307.57142857143, 3458.14285714286, 3499.14285714286, 3349.14285714286,
3227.14285714286, 3377.28571428571, 3481.28571428571, 3492.85714285714,
3397.28571428571, 3420, 3682, 3973.42857142857, 4006, 4123.14285714286,
4101.42857142857, 4193.14285714286, 4281.85714285714, 4501.85714285714,
4636, 4753, 4833, 4936.14285714286, 5409.71428571429, 5152.85714285714,
5259.85714285714, 5513.28571428571, 5747.71428571429, 5900.71428571429,
5935.57142857143, 6042.28571428571, 6719, 6831.42857142857,
7002, 6974.28571428571, 6983.14285714286, 7025.57142857143,
7005.71428571429, 7108.42857142857, 7258.71428571429, 7137,
6765.28571428571)), row.names = c(NA, -162L), class = "data.frame")
The code:
prueba_legend <- ggplot(df2) +
geom_line(aes (x = FECHA_RESULTADO, y = media_movil, color = "media_movil"),
size = 1.5,show.legend = F) +
geom_line(aes (x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia"),
linetype = "dashed",show.legend = F ) +
geom_point (aes(x = FECHA_RESULTADO, y = casos_x_dia, color = "casos_x_dia")) +
scale_colour_manual("", values = c("media_movil"="#CF3721", "casos_x_dia"="#31A9B8",
"casos_x_dia"="#31A9B8")) +
theme_bw () + theme(legend.position="bottom")
prueba_legend
Output: