Related
I am trying to compute Cumulative Abnormal Returns per firm around the EPS announcement dates by using the EvReturn function from the ererpackage. I have a dataframe (df) with column Date (time-series), 5 columns with daily firms returns, and last column with the market index returns. The second dataframe (Dates_clean), consists of 2 columns, one referring to the firms and the second with the announcement dates. This dataframe has been tidied and cleaned from NAs and it is possible that a firm is not present due to missing observations regarding EPS dates.
df looks as follow (subset only for the last 2 years, daily observations of returns):
df <-- structure(list(Date = c("2021-12-31", "2022-01-03", "2022-01-04",
"2022-01-05", "2022-01-06", "2022-01-07", "2022-01-10", "2022-01-11",
"2022-01-12", "2022-01-13", "2022-01-14", "2022-01-17", "2022-01-18",
"2022-01-19", "2022-01-20", "2022-01-21", "2022-01-24", "2022-01-25",
"2022-01-26", "2022-01-27", "2022-01-28", "2022-01-31", "2022-02-01",
"2022-02-02", "2022-02-03", "2022-02-04", "2022-02-07", "2022-02-08",
"2022-02-09", "2022-02-10", "2022-02-11", "2022-02-14", "2022-02-15",
"2022-02-16", "2022-02-17", "2022-02-18", "2022-02-21", "2022-02-22",
"2022-02-23", "2022-02-24", "2022-02-25", "2022-02-28", "2022-03-01",
"2022-03-02", "2022-03-03", "2022-03-04", "2022-03-07", "2022-03-08",
"2022-03-09", "2022-03-10", "2022-03-11", "2022-03-14", "2022-03-15",
"2022-03-16", "2022-03-17", "2022-03-18", "2022-03-21", "2022-03-22",
"2022-03-23", "2022-03-24", "2022-03-25", "2022-03-28", "2022-03-29",
"2022-03-30", "2022-03-31", "2022-04-01", "2022-04-04", "2022-04-05",
"2022-04-06", "2022-04-07", "2022-04-08", "2022-04-11", "2022-04-12",
"2022-04-13", "2022-04-14", "2022-04-15", "2022-04-18", "2022-04-19",
"2022-04-20", "2022-04-21", "2022-04-22", "2022-04-25", "2022-04-26",
"2022-04-27", "2022-04-28", "2022-04-29"), NESN = c(NA, 0.0128217636632681,
0.0055390098819148, -0.0303446403908639, -0.00353202700477517,
-0.0148639604729367, -0.0242468464841654, 0.000833755941585634,
0.000566825263230264, -0.00107292453477992, -0.00413304805849857,
0.00944796286389749, 0.00606019163539706, -0.0178671560381302,
0.0102682549157, 0.00238042231089342, -0.0264129023440169, -0.00359737479380895,
-0.00057237456191328, 0.00371815995136426, -0.00208920373247656,
0.00360658333406638, 0.00460158995889248, 0.0124240943672786,
-0.0192088934850052, -0.0116772543954452, 0.00161803666364402,
-0.00947063835830442, 0.00910418738855001, -0.0111621216023017,
0.0149520456912964, -0.00898947983118181, -0.000580331235212728,
-0.00085760228693943, 0.00337970744966198, 0.00873269055978332,
0.0104415116340701, -0.00548153587914391, 0.00615346747892431,
-0.0248914439483823, 0.0173285069170668, 0.0237445174472881,
0.0114592474061839, -0.00044228969975324, -0.0129341888051192,
-0.0039655856135451, -0.0259827935397877, -0.0295994170754248,
0.0307311936266654, -0.013299455406402, -0.0056093783764315,
0.0110647313950434, 0.00334040819250792, 0.00425756006997746,
0.0172069005359381, 0.0251378343192163, -0.0021649080340248,
0.00189626896268957, -0.0135983085238801, 0.0114262007450368,
-0.00381128173575684, 0.0058674672957324, -0.00307865494333059,
0.00175366558880397, 0.00248499184478623, 0.00721501950695935,
0.0140475807883897, 0.0150789811679537, -0.0124887026538493,
0.00613195773358854, 0.0132145840052262, -0.0015996735360132,
-0.0066051386016398, 0.00478929567729036, 0.000712513206063692,
0, -0.00234061707177347, -0.0254790654941593, -0.002887253994175,
0.0031573171246464, 0.0188337863652812, 0.0128193479643504, -0.0131137915005016,
0.0142879572931387, 0.00363369724621165, -0.00110402156089173
), MC = c(NA, 0.00550261575073563, 0.0177834448996657, 0.0188176125853796,
-0.0407649022737596, -0.0155420098162522, -0.0183010694192803,
0.0103886467592562, -0.00183070448319778, -0.0263867607751148,
-0.021159281495444, 0.0162863512416866, -0.0195218616791354,
0.036701342749085, 0.00888605545163479, -0.00951769122788382,
-0.049340634153576, 0.00633645237423841, 0.0248878672659074,
0.0152138699844748, 0.0322766524097042, 0.00865438181820477,
0.0132850798110877, -0.00396012072659391, -0.0241332685589187,
-0.00786778247404551, 0.0141615838200913, -0.0139641131076824,
0.014870269120445, -0.0145131154740279, -0.0300199660152931,
-0.0218975214448938, 0.0358204424148594, -0.00922132830585842,
-0.00509031265552273, 0.00321590363749413, -0.0205452396116511,
-0.00446266058653078, -0.00508069260599331, -0.0449083741501392,
0.0482780235448934, -0.0100514536376457, -0.0516743602589363,
0.0194952092513918, -0.0322887147335422, -0.0641394765595972,
-0.0173068505872533, -0.0311734655958205, 0.0954377559421478,
-0.0243947811500892, 0.0119067888938236, 0.00638796543754672,
-0.0145319835423444, 0.065932734214486, 0.00143122890265102,
0.0152422943022248, -0.0096967499833438, 0.0168987761680872,
-0.0304396645441839, 0.000640877885772717, 0.00208067872578832,
0.0154956918516587, 0.0567879413578825, -0.0168206261325913,
-0.0168057506728613, 0.000308592594485901, 0.0220126129867861,
-0.00451860424097195, -0.0373729875542405, -0.0191766740018862,
0.0123403885706372, -0.0186802220420382, 0.0177444777777833,
0.005072438614252, 0.00930500067891504, 0, -3.12499902288543e-07,
-0.00921874999999994, 0.0129321873521526, 0.0172808575677141,
-0.0211199942209961, -0.0375232958098812, -0.0134827788364557,
0.0153135163323701, 0.000972753811222749, 0.00372650801247842
), ASML = c(NA, -0.00707457495736874, -0.0290725339785473, -0.0152647901460136,
-0.0184823371590401, 0.00971814435287954, -0.0640693722781605,
0.00755262856381633, 0.0307819825808584, 0.0255294675755942,
-0.0310802702293709, 0.0217999099657287, -0.0225541062341073,
-0.0236980117549149, 0.0220375314655912, -0.0167179739743669,
-0.070396438711375, -0.0162393217912211, 0.0253696004759689,
-0.0127093712930013, -0.0283213085062449, 0.0496373256889857,
0.0104341989959946, -0.00316405782212825, -0.0412701616473979,
-0.0167302195887068, 0.014887800465154, -0.0106529882383843,
0.0393645189761693, -0.00798216711956512, -0.024996058038682,
-0.017207904689711, 0.0235835268894051, 0.00174602988969963,
0.00749259452866369, -0.0134901418194397, -0.0343618513323983,
0.0248730936819173, -0.000354295774261049, -0.0108096737888227,
0.0553563040643841, 0.0166350336725456, -0.0362331061055623,
0.010395360497353, -0.0169754801097395, -0.069247700897907, 0.0131186232688818,
-0.0436560986000191, 0.077563666368923, -0.0210021464720349,
-0.00476696090336659, -0.0152907148120854, 0.011037414017806,
0.0712443030593835, 0.0100189998239766, 0.0430985046864196, 0.00459075408780252,
0.0236664062345713, -0.0212058320781517, 0.00293240036892017,
0.00730826701315568, 0.00403160465895103, 0.0165410283377181,
-0.0170617639615278, -0.0196080007058823, -0.00540967213114751,
0.0176357313934843, -0.0294781484344746, -0.0457273107021545,
-0.00769569645056023, 0.00334966691029259, -0.029861744320177,
0.0146653992395438, 0.010171668543006, -0.0130715444394179, 0,
-1.7898690185536e-07, 0.00519062108466084, 0.0530628561253561,
-0.0125130178368247, -0.0219178119722282, -0.0411412887152115,
-0.0334124520723025, -0.00132262939176431, 0.0378286505877248,
-0.00127556088046676), ROG = c(NA, -0.0016563124362825, -0.0167576047110743,
0.00558547220350492, -0.00931186868686873, 0.00929598170989054,
-0.0189468728798304, 0.0102378674073382, -0.0211679240011176,
-0.0090002397780341, 0.00835899311866473, 0.00677858618059668,
-0.004236109928897, 0.00246186811339411, -3.6951028517751e-05,
-0.0085303180179872, -0.0453608483871986, 0.00297013943738533,
0.00575500793484429, 0.0165975968466054, 0.00233697546175771,
-0.000929104641140599, 0.0127388162493018, -0.00699098766109441,
-0.034148341582287, -0.0182875241992058, 0.0008188551362458,
0.0038028755990831, 0.0290255912422577, -0.00518866385230854,
-0.014558705998831, -0.0111877742472386, 0.0114612566678807,
0.00773075782606858, -0.00964711322222001, -0.00537533349502783,
-0.00277687179752917, 0.0100685911308367, 0.00239370555303364,
-0.00492692631648861, 0.0156943530061375, 0.0141076879880042,
0.0095079601184076, -0.00810543484286608, -0.0153314409260049,
-0.00265576875393958, 0.011543968321482, 0.00650858137799504,
0.0172871323248192, -0.013768790740987, 0.00885575961597662,
0.0135187375923218, -5.44893487662845e-05, -0.00846638540982525,
0.00148964049045319, 0.0260458184590857, 0.0049176082464939,
-0.000655460348850556, -0.000953002940294323, 0.0201022930361616,
0.00785770864045432, -0.00585620372821327, -0.0105159715950733,
-0.00101256130850647, -0.00614261872165789, 0.0145685186996185,
0.0366446438948085, 0.00785569119190255, 0.0111315542528179,
0.0105840795630807, 0.0110330061523778, 0.00756508782135668,
-0.0247558224043232, 0.000999685813030116, -0.00142410234860157,
0, -0.00234052114911543, -0.0406737559105963, -0.0119804035990065,
-0.0253254619925773, 0.00693062609568162, -0.00655543146327209,
-0.0173983359583453, 0.0239373293877456, -0.00889126611629498,
-2.8255212379813e-06), DJSTOXX = c(NA, 0.00448956330284256, 0.00822265035062086,
0.000665968984872922, -0.0125074340644002, -0.00391672453668146,
-0.014833902655213, 0.00842939391029529, 0.00646896140565878,
-0.000302344092258267, -0.0100667827031564, 0.00696648315220894,
-0.00975618817245349, 0.00231978423713897, 0.00508630710398661,
-0.0184133486433087, -0.0381245455216094, 0.00708217470094685,
0.0167954411441527, 0.00646040726886765, -0.0101525109126204,
0.00715709523318564, 0.0127472893082297, 0.0045192172025077,
-0.0175615871465199, -0.0138360753686276, 0.0067878981966738,
0.000120356858084092, 0.0171767249252694, -0.00207254104941135,
-0.00589392210453665, -0.0183275230158848, 0.0143092365731592,
0.000447004322467581, -0.0068709568459443, -0.00805937763694919,
-0.0130249474835507, 0.000692602999960323, -0.00277069175010591,
-0.0328295068963999, 0.0331852560597776, -0.000926074963563228,
-0.0237052175314, 0.00896542681851686, -0.020101628384252, -0.0356274007682458,
-0.0110105316066746, -0.00508229969266472, 0.0468249263880254,
-0.0168766270684341, 0.00950095407560014, 0.0120160585194342,
-0.00282797911787214, 0.0306352270637984, 0.00455346192440631,
0.0091122594851849, 0.000435550608561153, 0.00846535252146574,
-0.0100600682445028, -0.00211439307007666, 0.00104840080252311,
0.00137141546833175, 0.0174494900565429, -0.00411821862698636,
-0.00940915708728995, 0.00543807309261601, 0.00839116898191072,
0.00191698129536877, -0.015334581522056, -0.0020812772770139,
0.0130697264521857, -0.00588544107183153, -0.0035024167766855,
0.000289061644585642, 0.00664430735011456, 0, 0, -0.00769436536709167,
0.00837864552170919, 0.00318842343654246, -0.0178911495356058,
-0.0180825084765205, -0.00902690097368308, 0.00728639570524336,
0.00620287637010186, 0.00743961741666777)), class = "data.frame", row.names = c(NA,
-86L))
Whereas, Dates_clean is:
Dates_clean <-- structure(list(stock = c("NESN", "NESN", "MC", "ASML", "ASML",
"ROG", "ROG"), EPS = structure(c(19103, 19040, 19019, 19102,
19011, 19107, 19026), class = "Date")), row.names = c("33", "130",
"260", "293", "390", "423", "520"), class = "data.frame", na.action = structure(c(`NA` = 1L,
NA.1 = 2L, NA.2 = 3L, NA.3 = 4L, NA.4 = 5L, NA.5 = 6L, NA.6 = 7L,
NA.7 = 8L, NA.8 = 9L, NA.9 = 10L, NA.10 = 11L, NA.11 = 12L, NA.12 = 13L,
NA.13 = 14L, NA.14 = 15L, NA.15 = 16L, NA.16 = 17L, NA.17 = 18L,
NA.18 = 19L, NA.19 = 20L, NA.20 = 21L, NA.21 = 22L, NA.22 = 23L,
NA.23 = 24L, NA.24 = 25L, NA.25 = 27L, NA.26 = 28L, NA.27 = 29L,
NA.28 = 30L, NA.29 = 31L, NA.30 = 32L, NA.31 = 33L, NA.32 = 34L,
NA.33 = 35L, NA.34 = 36L, NA.35 = 37L, NA.36 = 38L, NA.37 = 39L,
NA.38 = 40L, NA.39 = 41L, NA.40 = 42L, NA.41 = 43L, NA.42 = 44L,
NA.43 = 45L, NA.44 = 46L, NA.45 = 47L, NA.46 = 48L, NA.47 = 49L,
NA.48 = 50L, NA.49 = 51L, NA.50 = 52L, NA.51 = 53L, NA.52 = 54L,
NA.53 = 55L, NA.54 = 57L, NA.55 = 58L, NA.56 = 59L, NA.57 = 60L,
NA.58 = 61L, NA.59 = 62L, NA.60 = 63L, NA.61 = 64L, NA.62 = 65L,
NA.63 = 66L, NA.64 = 67L, NA.65 = 68L, NA.66 = 69L, NA.67 = 70L,
NA.68 = 71L, NA.69 = 72L, NA.70 = 73L, NA.71 = 74L, NA.72 = 75L,
NA.73 = 76L, NA.74 = 77L, NA.75 = 78L, NA.76 = 79L, NA.77 = 80L,
NA.78 = 81L, NA.79 = 82L, NA.80 = 83L, NA.81 = 84L, NA.82 = 85L,
NA.83 = 86L, NA.84 = 87L, NA.85 = 88L, NA.86 = 89L, NA.87 = 90L,
NA.88 = 91L, NA.89 = 92L, NA.90 = 93L, NA.91 = 94L, NA.92 = 95L,
NA.93 = 96L, NA.94 = 97L, NA.95 = 98L, NA.96 = 99L, NA.97 = 100L,
NA.98 = 101L, NA.99 = 102L, NA.100 = 103L, NA.101 = 104L, NA.102 = 106L,
NA.103 = 107L, NA.104 = 108L, NA.105 = 109L, NA.106 = 110L, NA.107 = 111L,
NA.108 = 112L, NA.109 = 113L, NA.110 = 114L, NA.111 = 115L, NA.112 = 116L,
NA.113 = 117L, NA.114 = 118L, NA.115 = 119L, NA.116 = 120L, NA.117 = 121L,
NA.118 = 122L, NA.119 = 124L, NA.120 = 125L, NA.121 = 126L, NA.122 = 127L,
NA.123 = 128L, NA.124 = 129L, NA.125 = 130L, NA.126 = 131L, NA.127 = 132L,
NA.128 = 133L, NA.129 = 134L, NA.130 = 135L, NA.131 = 136L, NA.132 = 137L,
NA.133 = 138L, NA.134 = 139L, NA.135 = 140L, NA.136 = 141L, NA.137 = 142L,
NA.138 = 143L, NA.139 = 144L, NA.140 = 145L, NA.141 = 146L, NA.142 = 147L,
NA.143 = 148L, NA.144 = 149L, NA.145 = 150L, NA.146 = 151L, NA.147 = 152L,
NA.148 = 153L, NA.149 = 155L, NA.150 = 156L, NA.151 = 157L, NA.152 = 158L,
NA.153 = 159L, NA.154 = 160L, NA.155 = 161L, NA.156 = 162L, NA.157 = 163L,
NA.158 = 164L, NA.159 = 165L, NA.160 = 166L, NA.161 = 167L, NA.162 = 168L,
NA.163 = 169L, NA.164 = 170L, NA.165 = 171L, NA.166 = 172L, NA.167 = 173L,
NA.168 = 174L, NA.169 = 175L, NA.170 = 177L, NA.171 = 178L, NA.172 = 179L,
NA.173 = 180L, NA.174 = 181L, NA.175 = 182L, NA.176 = 183L, NA.177 = 184L,
NA.178 = 185L, NA.179 = 186L, NA.180 = 187L, NA.181 = 188L, NA.182 = 189L,
NA.183 = 190L, NA.184 = 191L, NA.185 = 192L, NA.186 = 193L, NA.187 = 194L,
NA.188 = 195L, NA.189 = 196L, NA.190 = 197L, NA.191 = 198L, NA.192 = 199L,
NA.193 = 200L, NA.194 = 201L, NA.195 = 202L, NA.196 = 203L, NA.197 = 204L,
NA.198 = 205L), class = "omit"))
Those are subsets of my original dataframes, encompassing more than 20 years of daily observations and 250 firms.
My goal is to compute automatically cumulative abnormal returns for each firm around each earnings announcement date. I was able to compute the simple 1 firm or multiple firms with 1 event date in common for all. I am struggling in telling R how to match each date and firm with the dataframe containing returns and then compute the cumulative abnormal returns.
Until now I have tried this code:
hh2 <- list()
for(i in Dates_clean[2]){
firms <- colnames(df)[2:6]
hh2[[i]] <- evReturn(y = df, firm = firms, event.date = i, y.date = "Date", index = "DJSTOXX", event.win = 3, est.win = 100, digits = 4)}
Error in xj[i] : only 0's may be mixed with negative subscripts
Any help is highly appreciated.
I am trying to label the outliers in my boxplot using the text function so I can find out from which class the outliers are coming from. I've stored the rownames of my data in variable "rownames" using names(vehData) to get the row names. When I apply this however, I get an error.
ERROR: Error in which(removeOutliers1 == bxpdat$out, arr.ind = TRUE) :
'list' object cannot be coerced to type 'double'
Completely new to R programming. Completely not sure how to fix this or what I am doing wrong
Thanks in advance for any help!
library(reshape2)
vehData <-
structure(
list(
Samples = 1:6,
Comp = c(95L, 91L, 104L, 93L, 85L,
107L),
Circ = c(48L, 41L, 50L, 41L, 44L, 57L),
D.Circ = c(83L,
84L, 106L, 82L, 70L, 106L),
Rad.Ra = c(178L, 141L, 209L, 159L,
205L, 172L),
Pr.Axis.Ra = c(72L, 57L, 66L, 63L, 103L, 50L),
Max.L.Ra = c(10L,
9L, 10L, 9L, 52L, 6L),
Scat.Ra = c(162L, 149L, 207L, 144L, 149L,
255L),
Elong = c(42L, 45L, 32L, 46L, 45L, 26L),
Pr.Axis.Rect = c(20L,
19L, 23L, 19L, 19L, 28L),
Max.L.Rect = c(159L, 143L, 158L, 143L,
144L, 169L),
Sc.Var.Maxis = c(176L, 170L, 223L, 160L, 241L, 280L),
Sc.Var.maxis = c(379L, 330L, 635L, 309L, 325L, 957L),
Ra.Gyr = c(184L,
158L, 220L, 127L, 188L, 264L),
Skew.Maxis = c(70L, 72L, 73L,
63L, 127L, 85L),
Skew.maxis = c(6L, 9L, 14L, 6L, 9L, 5L),
Kurt.maxis = c(16L,
14L, 9L, 10L, 11L, 9L),
Kurt.Maxis = c(187L, 189L, 188L, 199L,
180L, 181L),
Holl.Ra = c(197L, 199L, 196L, 207L, 183L, 183L),
Class = c("van", "van", "saab", "van", "bus", "bus")
),
row.names = c(NA,
6L), class = "data.frame")
#Remove outliers
removeOutliers <- function(data) {
OutVals <- boxplot(data)$out
remOutliers <- sapply(data, function(x) x[!x %in% OutVals])
return (remOutliers)
}
vehDataRemove1 <- vehData[, -1]
vehDataRemove2 <- vehDataRemove1[,-19]
vehData <- vehDataRemove2
vehClass <- vehData$Class
rownames <- names(vehData) #column names
#Begin removing outliers
removeOutliers1 <- removeOutliers(vehData)
bxpdat <- boxplot(removeOutliers1)
#Also tried using vehicles$Class instead of rownames but get the same error
text(bxpdat$group, bxpdat$out,
rownames[which(removeOutliers1 == bxpdat$out, arr.ind = TRUE)[,1]],
pos = 4)
The boxplot looks like this. I am trying to label the outliers based on the x axis e.g. "Comp", "Circ", "D.Circ", "Rad.Ra", "Max.L.Ra" etc.. & by vehicle class "Van", "Bus" ..
Crammed text issue when identifying class
If it is the outliers in the 2nd boxplot, it would be:
bxpdat <- boxplot(removeOutliers1)
text(bxpdat$group, bxpdat$out,
bxpdat$names[bxpdat$group],
pos = 4)
Maybe looks better like this, if you adjust the margin and flip the labels:
par(mar=c(8,3.5,3.5,3.5))
bxpdat = boxplot(removeOutliers1,las=2,cex=0.5)
text(bxpdat$group, bxpdat$out,
bxpdat$names[bxpdat$group],
pos = 4,cex=0.5)
I understood the question differently to #StupidWolf. I thought the goal was to replace points indicating outliers with the text of the vehicle class (bus, van or saab). If you simply print the variable name (e.g. Skew.maxis), then you might as well have simply plotted the outliers as points. Unless I'm missing something.
Here is code to answer the question as I understood it, for what it's worth (beginning after defining removeOutliers):
# CHANGE: Create vehClass vector before removing Class from the dataframe
vehClass <- vehData$Class
vehDataRemove1 <- vehData[, -1]
vehDataRemove2 <- vehDataRemove1[,-19]
vehData <- vehDataRemove2
#Begin removing outliers
removeOutliers1 <- removeOutliers(vehData)
bxpdat <- boxplot(removeOutliers1) # use boxplot(vehData) if you plot all the outliers as points
# loop over columns
n_plot <- 1; set.seed(123) # only plot n_plot randomly-chosen outliers
for(i in 1:ncol(vehData)){
# find out which row indices were removed as outliers
diffInd <- which(vehData[[i]] %in% setdiff(vehData[[i]], removeOutliers1[[i]]))
# if none were, then don't add any outlier text
if(length(diffInd) == 0) next
print(i)
print(paste0("l:", length(diffInd)))
if(length(diffInd) > n_plot){
diffIndPlot <- sample(diffInd, n_plot, replace = FALSE)
} else diffIndPlot <- diffInd
text(x = i, y = vehData[[i]][diffIndPlot],
labels = paste0(vehClass[diffIndPlot], ": ", vehData[[i]][diffIndPlot]))
}
test <- structure(list(trip_count = 1:10, dropoff_longitude = c(-73.959862,
-73.882202, -73.934113, -73.992203, -74.00563, -73.975189, -73.97448,
-73.974838, -73.981377, -73.955093), dropoff_latitude = c(40.773617,
40.744175, 40.715923, 40.749203, 40.726158, 40.729824, 40.763599,
40.754135, 40.759987, 40.765224)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fd18800f6e0>)
> dput(zip_codes)
zip_codes <- structure(list(zipcode = c("10001", "10002", "10003", "10004",
"10005", "10006", "10007", "10009", "10010", "10011", "10012",
"10013", "10014", "10016", "10017", "10018", "10019", "10020",
"10021", "10022", "10023", "10024", "10025", "10026", "10027",
"10028", "10029", "10030", "10031", "10032", "10033", "10034",
"10035", "10036", "10037", "10038", "10039", "10040", "10044",
"10065", "10069", "10075", "10103", "10110", "10111", "10112",
"10115", "10119", "10128", "10152", "10153", "10154", "10162",
"10165", "10167", "10168", "10169", "10170", "10171", "10172",
"10173", "10174", "10177", "10199", "10271", "10278", "10279",
"10280", "10282"), bounds_north = c(40.759731, 40.724136, 40.739673,
40.709044, 40.709294, 40.71369, 40.71719, 40.734975, 40.745421,
40.756703, 40.731706, 40.727557, 40.742873, 40.752197, 40.757912,
40.762526, 40.773446, 40.761094, 40.775045, 40.764898, 40.783192,
40.818099, 40.811264, 40.807546, 40.822108, 40.782213, 40.800665,
40.824032, 40.834372, 40.850517, 40.861552, 40.87765, 40.809582,
40.765558, 40.819569, 40.714451, 40.846615, 40.866336, 40.772955,
40.770517, 40.781007, 40.777677, 40.761771, 40.755516, 40.759689,
40.759899, 40.811331, 40.751522, 40.787914, 40.759059, 40.764279,
40.758432, 40.770085, 40.752801, 40.755303, 40.752119, 40.754974,
40.753811, 40.756556, 40.755928, 40.754783, 40.752116, 40.7556,
40.752723, 40.708797, 40.71628, 40.713256, 40.714767, 40.719611
), bounds_south = c(40.743451, 40.708802, 40.722933, 40.683919,
40.702879, 40.705871, 40.709806, 40.718612, 40.73231, 40.731043,
40.719867, 40.713446, 40.72428, 40.73801, 40.747251, 40.749102,
40.758645, 40.757284, 40.758133, 40.751445, 40.768436, 40.778805,
40.788476, 40.79691, 40.803047, 40.770062, 40.782531, 40.812791,
40.817221, 40.829083, 40.842958, 40.849745, 40.781075, 40.752197,
40.806636, 40.701689, 40.817912, 40.851863, 40.749415, 40.759284,
40.771612, 40.769441, 40.759787, 40.753481, 40.758538, 40.758436,
40.810373, 40.749101, 40.773108, 40.757749, 40.762964, 40.757125,
40.768355, 40.75146, 40.753994, 40.750775, 40.753811, 40.751441,
40.755243, 40.754619, 40.753481, 40.750766, 40.754678, 40.750241,
40.707694, 40.714082, 40.711995, 40.700273, 40.713378), bounds_east = c(-73.984076,
-73.973635, -73.979864, -73.995657, -74.004569, -74.009988, -74.000455,
-73.971282, -73.971566, -73.990798, -73.991794, -73.994035, -73.999555,
-73.968192, -73.964271, -73.981822, -73.973015, -73.977201, -73.947973,
-73.958599, -73.974067, -73.960687, -73.954966, -73.944667, -73.940404,
-73.944337, -73.930891, -73.936232, -73.938588, -73.934671, -73.92216,
-73.910587, -73.914228, -73.978116, -73.933219, -73.991772, -73.929107,
-73.924385, -73.940026, -73.952085, -73.986609, -73.947039, -73.975831,
-73.980395, -73.976744, -73.97845, -73.963058, -73.99111, -73.937328,
-73.970993, -73.971411, -73.971451, -73.94827, -73.977677, -73.973735,
-73.976048, -73.975209, -73.974648, -73.97282, -73.973276, -73.978332,
-73.973959, -73.975352, -73.993948, -74.009829, -74.002115, -74.007666,
-74.013754, -74.012441), bounds_west = c(-74.008621, -73.997532,
-73.999604, -74.047285, -74.012508, -74.015905, -74.013754, -73.988643,
-73.994028, -74.012359, -74.004575, -74.016381, -74.01599, -73.987746,
-73.981822, -74.007989, -74.003477, -73.98373, -73.968441, -73.977655,
-73.990149, -73.98814, -73.977092, -73.962475, -73.9659, -73.96323,
-73.955778, -73.948677, -73.960007, -73.950403, -73.944672, -73.947051,
-73.946462, -74.001702, -73.943398, -74.010542, -73.943506, -73.938947,
-73.961583, -73.972553, -73.996142, -73.965148, -73.979513, -73.984118,
-73.97845, -73.980886, -73.964424, -73.994844, -73.959921, -73.973068,
-73.973465, -73.973524, -73.951858, -73.979768, -73.975807, -73.978159,
-73.976974, -73.977107, -73.974897, -73.975352, -73.980395, -73.976048,
-73.976516, -74.00143, -74.011248, -74.00542, -74.009668, -74.019603,
-74.01831), zip = c(10001, 10002, 10003, 10004, 10005, 10006,
10007, 10009, 10010, 10011, 10012, 10013, 10014, 10016, 10017,
10018, 10019, 10020, 10021, 10022, 10023, 10024, 10025, 10026,
10027, 10028, 10029, 10030, 10031, 10032, 10033, 10034, 10035,
10036, 10037, 10038, 10039, 10040, 10044, 10065, 10069, 10075,
10103, 10110, 10111, 10112, 10115, 10119, 10128, 10152, 10153,
10154, 10162, 10165, 10167, 10168, 10169, 10170, 10171, 10172,
10173, 10174, 10177, 10199, 10271, 10278, 10279, 10280, 10282
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L,
13L, 14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L,
40L, 43L, 50L, 51L, 53L, 67L, 74L, 75L, 76L, 79L, 83L, 91L, 101L,
102L, 103L, 111L, 114L, 116L, 117L, 118L, 119L, 120L, 121L, 122L,
123L, 126L, 133L, 151L, 158L, 159L, 160L, 162L), class = "data.frame")
Hey guys, so I am trying to fuzzy-join lat & lon information to get the zip code of a specific location. I tried:
test <- test %>% fuzzy_left_join(zip_codes,by = c("dropoff_longitude" = "bounds_east", "dropoff_longitude" = "bounds_west", "dropoff_latitude" = "bounds_north","dropoff_latitude" = "bounds_south"), match_fun = list('<=', '>=' , '<=', '>='))
But unfortunately, this returns the error message Error: All columns in a tibble must be vectors. x Column "col" is NULL.
I don't know how to solve this. There is no column "col" in either one of the data frames. The result should give me the correspondent zip code if the dropoff_longitute is between bounds_east and bounds_west and the dropoff_latitude is between bounds_north and bounds_south.
Thanks a lot in advance!
We could use the non-equi join from data.table as one of the dataset is data.table
library(data.table)
setDT(test)[zip_codes, on = .(dropoff_longitude <= bounds_east,
dropoff_longitude >= bounds_west,
dropoff_latitude <= bounds_north,
dropoff_latitude >= bounds_south)]
I have a quite "messy data". I have a model with a interaction between two factors. And I want to plot it. So:
f1 <- structure(list(tipo = c("digitables", "digitables", "digitables",
"digitables", "digitables", "digitables", "digitables", "digitables",
"payments", "payments", "payments", "payments", "payments", "payments",
"payments", "payments", "traditionals", "traditionals", "traditionals",
"traditionals", "traditionals", "traditionals", "traditionals",
"traditionals"), categoria = c("Advice", "Digital banks", "Exchange",
"FinTech", "Insurance", "Investments", "Lending", "Payments and transfers",
"Advice", "Digital banks", "Exchange", "FinTech", "Insurance",
"Investments", "Lending", "Payments and transfers", "Advice",
"Digital banks", "Exchange", "FinTech", "Insurance", "Investments",
"Lending", "Payments and transfers"), Total = c(63L, 450L, 279L,
63L, 36L, 108L, 567L, 549L, 63L, 450L, 279L, 63L, 36L, 108L,
567L, 549L, 35L, 250L, 155L, 35L, 20L, 60L, 315L, 305L), Frequencia = c(44L,
266L, 118L, 9L, 14L, 45L, 134L, 242L, 33L, 68L, 2L, 10L, 3L,
8L, 11L, 78L, 27L, 226L, 142L, 10L, 20L, 45L, 300L, 245L), Perc = c(69.84,
59.11, 42.29, 14.29, 38.89, 41.67, 23.63, 44.08, 52.38, 15.11,
0.72, 15.87, 8.33, 7.41, 1.94, 14.21, 77.14, 90.4, 91.61, 28.57,
100, 75, 95.24, 80.33), Failure = c(19L, 184L, 161L, 54L, 22L,
63L, 433L, 307L, 30L, 382L, 277L, 53L, 33L, 100L, 556L, 471L,
8L, 24L, 13L, 25L, 0L, 15L, 15L, 60L)), row.names = c(NA, -24L
), class = "data.frame")
# Packages
library(dplyr)
library(ggplot2)
library(emmeans) #version 1.4.8. or 1.5.1
# Works as expected
m1 <- glm(cbind(Frequencia, Failure) ~ tipo*categoria,
data = f1, family = binomial(link = "logit"))
l1 <- emmeans(m1, ~categoria|tipo)
plot(l1, type = "response",
comparison = T,
by = "categoria")
Using by="tipo" results:
# Doesn't work:
plot(l1, type = "response",
comparison = T,
by = "tipo")
Error: Aborted -- Some comparison arrows have negative length!
In addition: Warning message:
Comparison discrepancy in group digitables, Advice - Insurance:
Target overlap = -0.0241, overlap on graph = 0.0073
If I use comparison = F as suggested by explanation supplement vignette, it works. However, it does not show me the arrows, which are very important.
Q1 - Is there a work around for it? (Or is it impossible due to my data?)
As we can see from the last plot, there is a category with probability = 1 (categoria=Insurance and tipo=traditionals). So, I delete only this row of my data frame, and I try to redo the plotting, and results to me:
f1 <- f1 %>%
filter(!Perc ==100)
m1 <- glm(cbind(Frequencia, Failure) ~ tipo*categoria,
data = f1, family = binomial(link = "logit"))
l1 <- emmeans(m1, ~categoria|tipo)
plot(l1, type = "response",
comparison = T,
by = "categoria")
Error in if (dif[i] > 0) lmat[i, id1[i]] = rmat[i, id2[i]] = wgt * v1[i] else rmat[i, :
missing value where TRUE/FALSE needed
Q2 - How to plot my results even when I have a missing level of one variable (with respect to another variable?). I would expect that the Insurance facet would have only have the payments and digitables levels (while the others remain the same).
First, please don't ever re-use the same variable names for more than one thing; that makes things not reproducible. If you modify a dataset, or a model, or whatever, give it a new name so it can be distinguished.
Q1
As documented, comparison arrows cannot always be computed. This is such an example. I suggest displaying the results some other way, e.g. using pwpp() or pwpm()
Q2
There was a bug in handling missing cases. This has been fixed in the GitHub version:
f2 <- f1 %>%
filter(!Perc ==100)
m2 <- glm(cbind(Frequencia, Failure) ~ tipo*categoria,
data = f2, family = binomial(link = "logit"))
l2 <- emmeans(m2, ~categoria|tipo)
plot(l2, type = "response",
comparison = TRUE,
by = "categoria")
plot(l2, type = "response",
comparison = TRUE,
by = "tipo")
## Error: Aborted -- Some comparison arrows have negative length!
## (in group "payments")
I have the following data which contains data from 7 combinations (rows) and 12 methods (columns).
structure(list(Beams = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 2L
), .Label = c("1 – 2", "1 – 2 – 3 – 4", "1 – 3", "1 – 4", "2 – 3",
"2 – 4", "3 – 4"), class = "factor"), Slope...No.weight = c(75L,
65L, 45L, 30L, 95L, 70L, 75L), Slope...W1 = c(85L, 70L, 65L,
55L, 90L, 85L, 75L), Slope...W2 = c(80L, 65L, 65L, 50L, 90L,
90L, 75L), Slope...W3 = c(80L, 75L, 75L, 65L, 90L, 95L, 80L),
Average.Time...No.Weight = c(75L, 65L, 45L, 30L, 95L, 70L,
70L), Average.Time...W1 = c(70L, 60L, 75L, 60L, 75L, 75L,
80L), Average.Time...W2 = c(65L, 40L, 65L, 50L, 75L, 85L,
70L), Average.Time...W3 = c(65L, 40L, 80L, 75L, 65L, 85L,
80L), Momentum...No.weight = c(80L, 60L, 45L, 30L, 95L, 70L,
75L), Momentum...W1 = c(85L, 75L, 60L, 55L, 95L, 90L, 80L
), Momentum...W2 = c(80L, 65L, 70L, 50L, 90L, 90L, 85L),
Momentum...W3 = c(85L, 75L, 75L, 55L, 90L, 95L, 80L)), .Names = c("Beams",
"Slope...No.weight", "Slope...W1", "Slope...W2", "Slope...W3",
"Average.Time...No.Weight", "Average.Time...W1", "Average.Time...W2",
"Average.Time...W3", "Momentum...No.weight", "Momentum...W1",
"Momentum...W2", "Momentum...W3"), class = "data.frame", row.names = c(NA,
-7L))
I would like to get a barplot like the one below:
I've tried with
library(RColorBrewer)
dat<-read.csv("phaser-p13-30dBm-100ms.csv")
names <- c("1-2","1-3","1-4","2-3","2-4","3-4","1-2-3-4")
barx <-
barplot(as.integer(dat2[,2:13]),
beside=TRUE,
col=brewer.pal(12,"Set3"),
names.arg=names,
ylim=c(0,100),
xlab='Combination of beams',
ylab='Correct detection [%]')
box()
par(xpd=TRUE)
legend("top", c("Slope - No weight","Slope - W1","Slope - W2","Slope - W3","Average Time - No weight","Average Time - W1","Average Time - W2","Average Time - W3","Momentum - No weight","Momentum - W1","Momentum - W2","Momentum - W3"), fill = brewer.pal(12,"Set3"),horiz = T)
but I got this error:
Error in barplot.default(as.integer(dat2[, 2:13]), beside = TRUE, col = brewer.pal(12, :
incorrect number of names
Could you find the error?
I've named you dataframe df here and made use of three packages. This is not a base R solution. Given your dataset format, this is the easiest way (IMO) to do this:
library(dplyr)
library(tidyr)
library(ggplot2)
df %>% # dataframe
gather(variable, value, -Beams) %>% # convert to long format excluding beams column
ggplot(aes(x=Beams, y=value, fill=variable)) + # plot the bar plot
geom_bar(stat='identity', position='dodge')
This should get you started, if you wish to use base graphics and not ggplot2:
df <- as.matrix(dat[,-1])
rownames(df) <- dat[, 1]
barplot(df, beside = TRUE, las = 2)
Use ggplot2 package and make sure that your data is neat and ordered?
something like ggplot(dataframe, aes(colour = some_factor))) + geom_bar(aes(x=Some_variable, y=Some_other_variable))
More explict statement as to how your data matches the image would be useful.