Differences between merge and match functions in R - r

I everybody I remove my last post to make a reproducible exmaple of my problem. I am working with the next to data frames a1 (dput structure):
structure(list(r04_numero_operacion = c("0050475725", "0050490602",
"0050491033", "0050496386", "0050518985", "0050630090", "0050631615",
"0060235906", "0060238732", "0060241333", "0060244391", "0060245813",
"0060260056", "0060266356", "0800041441", "0800054041", "0800055382",
"0800058554", "2020200062", "2020200073", "CAR1010001706000",
"CAR1010001795000", "CAR1010001803000", "CAR1010001871000", "CAR1010001962000",
"CAR1010002002000", "CAR1010002120000", "CAR1010002189000", "CAR1010002215000",
"CAR1010002250000"), perdida3 = c(523.12, 265.43, 8371.66, 5242.13,
4960.51, 8473.27, 3743.45, 1283.32, 2229.25, 8001.27, 8653.94,
3670.13, 4536.02, 8216.55, 2481.36, 288.94, 1637.28, 4566.89,
1573.63, 11217.92, 0, 0, 0, 0, 0, 0, 0, 0, 9633.9, 0), Saldo = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 4566.89,
1, 1, 481.59, 299.52, 258.13, 603.84, 231.61, 631.68, 220.6,
210.54, 1, 1224.44), Bvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603.84, 0, 631.68,
0, 0, 0, 0), Cvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1224.44),
Dvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), vencida = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28,
4566.89, 1, 1, 0, 0, 0, 603.84, 0, 631.68, 0, 0, 1, 1224.44
), V1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("r04_numero_operacion",
"perdida3", "Saldo", "Bvencida", "Cvencida", "Dvencida", "vencida",
"V1"), codepage = 1252L, row.names = c(NA, 30L), class = "data.frame")
And a2 data frame (dput structure):
structure(list(r04_numero_operacion = c("0050475725", "0050490602",
"0050491033", "0050496386", "0050518985", "0050630090", "0050631615",
"0060235906", "0060238732", "0060241333", "0060244391", "0060245813",
"0060260056", "0060266356", "0800041441", "0800054041", "0800055382",
"0800058554", "2020200073", "CAR1010002002000", "CAR1010002189000",
"CAR1010002215000", "CAR1010002250000", "CAR1010002264000", "CAR1010002297000",
"CAR1010002401000", "CAR1010002412000", "CAR1010002436000", "CAR1010002529000",
"CAR1010002709000"), perdida3 = c(523.12, 265.43, 8371.66, 5242.13,
4960.51, 8473.27, 3743.45, 1283.32, 2229.25, 8001.27, 8653.94,
3670.13, 4536.02, 8216.55, 2481.36, 288.94, 1637.28, 4566.89,
11217.92, 0, 0, 9633.9, 0, 0, 0, 0, 0, 0, 0, 0), Saldo = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 4566.89,
1, 317.72, 210.54, 1, 868.93, 242.91, 298.78, 120.63, 255.01,
357.68, 284.08, 308.83), Bvencida = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 317.72, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), Cvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 868.93, 0, 0, 0, 0, 0, 0, 0), Dvencida = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), vencida = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 4566.89, 1, 317.72, 0,
1, 868.93, 0, 0, 0, 0, 0, 0, 0), V2 = c(2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2)), .Names = c("r04_numero_operacion", "perdida3", "Saldo",
"Bvencida", "Cvencida", "Dvencida", "vencida", "V2"), class = "data.frame", row.names = c(NA,
30L))
My problem is when I use merge() and match() functions. merge() is more functional than match() related to add new variables by common one but when I use merge() I don't get the same result as match(). First I used merge() with a2 and a1 to create DF with the next code:
DF=merge(a2,a1,all.x=TRUE)
It added V1 variable from a1 to DF and I got this summary for DF$V1:
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1 1 1 1 1 1 9
After I create a copy of a2 named DF and I made a match with r04_numero_operacion using this code to add V1 variable from a1 to a2:
a2$V1<-a1[match(a2$r04_numero_operacion,a1$r04_numero_operacion),"V1"]
It added `V1 to DF but the result is different to the merge() way. I got this summary for DF$V1 in match() solution:
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1 1 1 1 1 1 7
My problem is I want to make the same I made with match() but using merge() function due to this function is more poweful than match(). Thanks for your help.

In using match(a2$r04_numero_operacion,a1$r04_numero_operacion) the a2$r04_numero_operacion values gets matched the coresponding column in a1 while in using merge(a2,a1,all.x=TRUE) the a1 all the matching columns get matched to the matching column names in a2. If you only match on the first column, the NA counts match up:
summary( merge(a2,a1,by=1,all.x=TRUE)$V1 )
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1 1 1 1 1 1 7

Related

Combining multiple calibration curves in one plot

I would like some help in combining two or more calibration plots in one plot in R.
I am comparing the calibration of two models and I would like them in one plot.
I am using the calibration_plot function form the predtools package. Is this the correct package or are there more powerful packages for R?
calibration_plot(data = stackoverflow, obs = "event", pred = "model1", x_lim = c(0,1), y_lim = c(0,1),title = "Model1", points_col_list = NULL, data_summary = T)
calibration_plot(data = stackoverflow, obs = "event", pred = "model2", x_lim = c(0,1), y_lim = c(0,1),title = "Model2", points_col_list = NULL, data_summary = T)
dput of stackoverflow
structure(list(model1 = c(0.237760176222135, 0.71546420180643,
0.794432429369465, 0.656363881639676, 0.791708216360907, 0.687126456661465,
0.285599617509653, 0.184137148744874, 0.864448003819623, 0.68633722517368,
0.633141834438598, 0.342033236744753, 0.809527471856904, 0.44709310706345,
0.642309783414134, 0.478634921655348, 0.749205389344258, 0.218507206790561,
0.715674356193537, 0.722136223616077, 0.365559623908335, 0.633141834438598,
0.832424627307168, 0.530368910251955, 0.428880665771525, 0.775641696932919,
0.330128697609423, 0.783171338536037, 0.783793672057888, 0.468355345435376,
0.710245078226952, 0.81648327238482, 0.603693592753907, 0.592283374978545,
0.20507631783337, 0.485882139691015, 0.809455349796892, 0.754732165553727,
0.66377865123304, 0.438721686675472, 0.2709932360314, 0.176381161846607,
0.369232324737991, 0.654900775755287, 0.677447167734547, 0.180268404814802,
0.399419971681492, 0.73438881598655, 0.47646627399175, 0.853704813768205,
0.608974716778207, 0.634887771555211, 0.592291547214112, 0.663813688339367,
0.53943039194057, 0.7145666044468, 0.234543422020881, 0.202729809644918,
0.39672336107067, 0.158096774829406, 0.320329566236219, 0.740201212163183,
0.796777244675989, 0.308199837768449, 0.786598711217149, 0.667179376789805,
0.732390196193263, 0.117181865869223, 0.282712898098667, 0.813513774287869,
0.693399632523288, 0.493475284817971, 0.593983673049009, 0.738452518666474,
0.623275128184214, 0.855655675514564, 0.634887771555211, 0.69907205055215,
0.721513188211345, 0.622910777455488, 0.309031119328554, 0.268184852225134,
0.623816023317224, 0.620180359930797, 0.65607957849868, 0.318748098514039,
0.786138246273698, 0.770147379877078, 0.368364517958795, 0.889787669491316,
0.564862773577324, 0.644936236318243, 0.859568428574226, 0.825393586455367,
0.443267542815971, 0.662158972085537, 0.833393716075399, 0.710245078226952,
0.706450321722432, 0.83195359999601, 0.384551972953433, 0.669445207069002,
0.814471247707942, 0.809263905314706, 0.778049639424727, 0.208123706702556,
0.858117029177706, 0.816715622153779, 0.260468350284386, 0.601943372497283,
0.732426863335112, 0.570001496800709, 0.909610698898771, 0.789637001870557,
0.341839397973598, 0.571752088809212, 0.287188981135642, 0.736473504179658,
0.656363881639676, 0.300022596959034, 0.883593082700905, 0.515709672432742,
0.806949851044393, 0.597577448403585, 0.787642720305032, 0.283398218900544,
0.869505288499405, 0.225857887615281, 0.612365595338034, 0.834203334098187,
0.65572943010048, 0.38504423454054, 0.828390320481501, 0.875166731695581,
0.35145027474754, 0.534808195965384, 0.822411007103645, 0.455010761704292,
0.856585071099577, 0.757934255761915, 0.363235512112777, 0.298988516351143,
0.62357099020229, 0.821536981782977, 0.743740947803668, 0.268184852225134,
0.542172476801765, 0.789637001870557, 0.77057687698505, 0.688781883558654,
0.731975036767798, 0.58841221075051, 0.614992060475678, 0.772927795822135,
0.571844722455543, 0.786598711217149, 0.814236699887853, 0.24632201788091,
0.461305611875743, 0.357877442912028, 0.629777768176522, 0.340708895525214,
0.858328679154275, 0.896374497790043, 0.508117603191322, 0.868395985194952,
0.325391234150186, 0.217666716175776, 0.738651896306012, 0.44896380642455,
0.788773121465437, 0.855280657199334, 0.703134160788728, 0.243986928768584,
0.676029521174355, 0.578073026049121, 0.665893397554917, 0.923993035769077,
0.604501259273048, 0.300669521764063, 0.829237758054682, 0.660669727162949,
0.382885828324256, 0.357034833076844, 0.564862773577324, 0.800651230924735,
0.924025873813848, 0.547345708551748, 0.897415894457121, 0.378769589693012,
0.787320407651021, 0.835350498525985, 0.806949851044393, 0.498678748045157,
0.807549787634665, 0.758667475339789, 0.7145666044468, 0.713417011226846,
0.287188981135642, 0.390819912242695, 0.807549787634665, 0.648880360799679,
0.294317907149489, 0.634353464782934, 0.738303504906273, 0.703614919895444,
0.56018558222595, 0.365559623908335, 0.373744408366516, 0.422142971154104,
0.780393752718648, 0.420192692166099, 0.537551762931482, 0.58841221075051,
0.795977982509655, 0.750581504918475, 0.74077785828739, 0.357391060095084,
0.90335324857881, 0.88512447866342, 0.573593424038461, 0.376636890712417,
0.814471247707942, 0.516728126564283, 0.511769167592521, 0.753160027303678,
0.379974608419567, 0.247151253298088, 0.789834529579229, 0.27119953909381,
0.578282022920176, 0.289890319811112, 0.88870041654423, 0.605378010737711,
0.351803978568917, 0.852843747717878, 0.742703232488038, 0.770147379877078,
0.784056463649842, 0.678270092659001, 0.59309862179546, 0.399457633245555,
0.852843747717878, 0.794010119798579, 0.667825589460395, 0.825212537892541,
0.792966972627726, 0.770106127346002, 0.706404124524996, 0.744885306690795,
0.719123857349234, 0.53519415315944, 0.787668394777512, 0.506672792915601,
0.712945665228562, 0.660440139900567, 0.944906901834028, 0.430863999447529,
0.725700303601332, 0.783434677550229, 0.401698737182868, 0.662322756723675,
0.510386937625764, 0.767961401284267, 0.384551972953433, 0.785396724092888,
0.653104308767684, 0.687730601503667, 0.791984688175846, 0.885437716111375,
0.867068735171316, 0.213182756545354, 0.628989953572443, 0.381845536523953,
0.456595194489821, 0.206497805251196, 0.29389687736639, 0.56562528089224,
0.22423014383975, 0.876624588279104, 0.62948388586649, 0.341057473957848,
0.836726008192051, 0.959077640833203, 0.700380060001825, 0.550565054363783,
0.857986332751831, 0.73606239655555, 0.626958047968868, 0.801665344467059,
0.437818514822793, 0.84172513914277, 0.249938361820059, 0.844698016882311,
0.698481177252194, 0.425091126036881, 0.270744340397641, 0.911317329972866,
0.628317598316906, 0.798858730229755, 0.536048796135506, 0.558330930540637,
0.769744711534908, 0.835724887768629, 0.716896341598601, 0.277571045573504,
0.767928060731326, 0.171725546041467, 0.62357099020229, 0.372575487235624,
0.704787562421115, 0.279904670683375, 0.320055251165285, 0.470719150191045,
0.634353464782934, 0.260468350284386, 0.165380669830666, 0.633502115957914,
0.270438131899591, 0.298663446487892, 0.160383482822362, 0.153892088510337,
0.201934325478114, 0.248088137480339, 0.581594778507714, 0.185982855759208,
0.442026886814123, 0.180184819010326, 0.294941540461375, 0.27916583091498,
0.179726924309564, 0.176381161846607, 0.169591266926947, 0.220985714153048,
0.218292082577411, 0.256088777375909, 0.200434356927467, 0.294697652744116,
0.315059393256192, 0.309483384308751, 0.731559469346124, 0.153818437822602,
0.117181865869223, 0.230597455066238, 0.213538098408414, 0.171703216612226,
0.117342457219404, 0.268154040879572, 0.249203995151824, 0.289135705273671,
0.260767305152473, 0.243986928768584, 0.191193730214216, 0.343771287624594,
0.270744340397641, 0.63744932909826, 0.523368837567113, 0.154394806405882,
0.410858823948176, 0.363235512112777, 0.173279305917588, 0.134889443422701,
0.261718547032863, 0.357391060095084, 0.672111652745064, 0.314724695162023,
0.109741058161593, 0.268184852225134, 0.346565137119793, 0.604872114491051,
0.230805654103775, 0.251721750397117, 0.182834120641036, 0.322972096586922,
0.322746359746767, 0.229914079265902, 0.645775875936576, 0.201934325478114,
0.553545512735033, 0.187303180765189, 0.198028579423478, 0.290364891461295,
0.229265182965244, 0.710032536799647, 0.735760882566064, 0.182834120641036,
0.401191274559514, 0.297035951335436, 0.441177176238355, 0.298988516351143,
0.553928890511863, 0.24317228518234, 0.229914079265902, 0.289890319811112,
0.0624502711689494, 0.162722362310802, 0.169591266926947, 0.642273712916542,
0.169075242884674, 0.25265127289933, 0.165887669057233, 0.235150322065362,
0.589857972739157, 0.338141490381861, 0.125767093771288, 0.373381352261149,
0.362344177990451, 0.25212074667222, 0.256751801775933, 0.187067135355391,
0.216054935520473, 0.238425678298765, 0.141675251821082, 0.392138013572663,
0.267385232516098, 0.255390506596019, 0.186538897906035, 0.310084569356542,
0.214186337707389, 0.154094207815253, 0.209949367371578, 0.260468350284386,
0.340233482715629, 0.205675021254086, 0.212922641652452, 0.169293313540589,
0.237148695480572, 0.252848455583522, 0.359702689379285, 0.092827863257841,
0.554763268477136, 0.136704135686601, 0.162722362310802, 0.350614534717934,
0.269714920758443, 0.208123706702556, 0.309815028457651, 0.171703216612226,
0.487788581673596, 0.187817836035314, 0.320516166012631, 0.205675021254086,
0.310307964614977, 0.16055325992601, 0.338141490381861, 0.261718547032863,
0.277571045573504, 0.570001496800709, 0.638837851776372, 0.273858461205031
), model2 = c(0.287514786741101, 0.750187800029493, 0.988418181061398,
0.838173140408305, 0.645239560308121, 0.677315834518225, 0.158242529420476,
0.125115299019299, 0.730655708202056, 0.650323967533871, 0.46668206957655,
0.477695763474193, 0.517548581264567, 0.777222315964033, 0.710407521182698,
0.265575276954178, 0.712813831748586, 0.536061703578936, 0.799982280979083,
0.82465248790953, 0.304409261779039, 0.532580860681508, 0.663549547438732,
0.525529652068901, 0.438036097303733, 0.500581402303234, 0.340790071460301,
0.6585150480268, 0.764755402974609, 0.495348039833107, 0.936052199799697,
0.86884447627363, 0.64779296673596, 0.729130744413302, 0.212308846717215,
0.921575063542082, 0.896725345760408, 0.908630767579495, 0.552735032621137,
0.333089198076349, 0.411240252256137, 0.210089558407819, 0.73811428745976,
0.598277326712665, 0.812100882633133, 0.553780145134378, 0.285594535525249,
0.976265415980042, 0.479482389182052, 0.884418579477935, 0.675232647472854,
0.549910482547915, 0.503157835462091, 0.665670569303347, 0.468546618084848,
0.568520882937806, 0.270816408384732, 0.221250962981007, 0.326798623488733,
0.731113083904769, 0.264546800699518, 0.637416563146408, 0.612254123798448,
0.137589225529098, 0.973856314310539, 0.57355849354993, 0.795724887855882,
0.207887346031912, 0.315264931197944, 0.570327096970961, 0.821667790125253,
0.34700932633174, 0.700348491487529, 0.917036300698461, 0.499629857729573,
0.945677859174471, 0.734707685742723, 0.793812332967952, 0.770460729642163,
0.527396810927778, 0.288295112916966, 0.425445404428372, 0.483612884217641,
0.675259561878702, 0.592376322434429, 0.179112926596139, 0.91845435860965,
0.559221364493348, 0.2420893235427, 0.987070707204451, 0.47041204527438,
0.561066676064396, 0.979709257022779, 0.97459617254262, 0.493734807579198,
0.633862083931177, 0.984516332934101, 0.740187112417555, 0.766781488979252,
0.97975192431102, 0.33055072207903, 0.533054583463754, 0.790202073936537,
0.680287263575367, 0.660197257135086, 0.265210247355631, 0.635683810747643,
0.929441105581883, 0.200564850844362, 0.624207163066426, 0.677315834518225,
0.576813581489832, 0.870011812596891, 0.784528876996448, 0.349112814763239,
0.713692748225076, 0.246612007341578, 0.851081180574875, 0.687137281881956,
0.341479975901666, 0.877294727463993, 0.345744536252705, 0.813818470175183,
0.559690409480896, 0.621134908742507, 0.278731001691605, 0.982206757909454,
0.202689509120882, 0.607302163137454, 0.851508613763468, 0.649527821417898,
0.296858688986366, 0.637473293434683, 0.861231144697065, 0.754589517375169,
0.963050340651459, 0.796382681438869, 0.321017842587893, 0.548527383260705,
0.440018622160444, 0.40853771801107, 0.26933993698801, 0.520368464701376,
0.7280745989701, 0.471834600908167, 0.204830907885538, 0.506228982219536,
0.731428209461483, 0.776728970181845, 0.722816963684959, 0.877294727463993,
0.479842448557783, 0.576319001528652, 0.74198934756818, 0.712480680225209,
0.704672118556432, 0.98052035986783, 0.217382974876404, 0.367373216386543,
0.307185123289256, 0.512795273384737, 0.260517614803284, 0.968697596752212,
0.749162451797087, 0.68041909739219, 0.79699918505845, 0.238291353978693,
0.219963202913491, 0.618951001840467, 0.580254548293966, 0.818616105457586,
0.942296461590003, 0.666912717900493, 0.332605259941962, 0.651690837935606,
0.50857664168015, 0.90399764992811, 0.846846428061044, 0.799716912284323,
0.181061805447383, 0.767483350924048, 0.384190617749709, 0.381014832615475,
0.286346662970687, 0.415112136598318, 0.871297702675277, 0.904491992447163,
0.691290885733912, 0.883663383425187, 0.218031207659998, 0.845471915435013,
0.851225057418906, 0.974737142979397, 0.304384913954126, 0.809217536951246,
0.673169505573611, 0.587832517321454, 0.690755125022416, 0.22291607285211,
0.179953907381919, 0.970837663453446, 0.738032658514138, 0.164877770046258,
0.559221364493348, 0.737961463367539, 0.574811799071391, 0.823334805691295,
0.397181521409539, 0.450228391674526, 0.388746691319082, 0.610474606608326,
0.614247050968821, 0.233533077961654, 0.513485424702372, 0.645035767493623,
0.764733321673292, 0.635712234455418, 0.321432791509016, 0.980875944753058,
0.930793776634344, 0.515141614894262, 0.267122769086816, 0.742852208475956,
0.353042840393565, 0.747623556575278, 0.568520882937806, 0.336815676156739,
0.294901862001193, 0.795145496446215, 0.249762053766042, 0.780927060658826,
0.799538641669496, 0.736927326640062, 0.560628178989406, 0.277966452045218,
0.955942719184481, 0.99446721952086, 0.806262158705558, 0.622506360194255,
0.599162290212476, 0.911568883259971, 0.919636749455923, 0.755663660003546,
0.786091771573376, 0.663124547545389, 0.728451212871414, 0.915017637389918,
0.540976155120713, 0.966571939655214, 0.944017655309776, 0.769111705432106,
0.730655708202056, 0.793731921435595, 0.672331547859415, 0.954188301697073,
0.771490498213196, 0.759323050904125, 0.371749915918801, 0.896029675337791,
0.806875130512909, 0.428847795602538, 0.879100543795116, 0.683667479610655,
0.652853497164203, 0.322424456351873, 0.63223630618384, 0.739111324425798,
0.842480963836762, 0.656399746538259, 0.650393771706781, 0.811904090325055,
0.232195146241057, 0.475599646446869, 0.262723383404926, 0.375875902973021,
0.121452811688801, 0.267495498619919, 0.48738518650336, 0.145570737158047,
0.836295028297079, 0.866323236416769, 0.339370978784442, 0.950244949336958,
0.932430555794746, 0.553175128755609, 0.581965592354742, 0.894949559660516,
0.576843540774841, 0.87024074399061, 0.894254944829874, 0.439366370246273,
0.701519570518178, 0.283657063015796, 0.93318279804328, 0.805782242770265,
0.992222214238406, 0.394450834972146, 0.901654917225723, 0.933970824223853,
0.768817215838203, 0.651466105934146, 0.501057173441579, 0.892777201339042,
0.858754977433982, 0.695586052629751, 0.248716707595335, 0.856898191452172,
0.158303873916073, 0.619956780913837, 0.226391315408489, 0.558313224726938,
0.298026108292324, 0.234215057380327, 0.428383617689134, 0.549439406197447,
0.220289907919949, 0.286164787814256, 0.803010050278043, 0.170897787205529,
0.250118826376969, 0.169329123235089, 0.105169643729422, 0.230818986882482,
0.18870738933734, 0.643086595448451, 0.351739919456562, 0.346870221210139,
0.170151016814197, 0.293716098173589, 0.178851307909495, 0.164354353678441,
0.137152736940256, 0.162048818603576, 0.215166581519086, 0.187007449141651,
0.237439972839634, 0.14258798699763, 0.330156871723161, 0.237946098901768,
0.194887699919184, 0.890822417716102, 0.14467963257714, 0.158733788855624,
0.315702415553601, 0.205141048154358, 0.186429463355873, 0.157989199214296,
0.271192384689673, 0.176903209875729, 0.268965581092294, 0.235561057229909,
0.168828874843123, 0.212308846717215, 0.570545224925858, 0.218680833334588,
0.520812798555289, 0.570357173420229, 0.148201635635738, 0.252266585477661,
0.370890047542259, 0.14213825397019, 0.112490591448769, 0.214524529099719,
0.21837679510844, 0.6077559310819, 0.295668356350065, 0.193398997617611,
0.188167557710595, 0.236590688745572, 0.608692142201058, 0.136927677224389,
0.203613730295902, 0.182193295757089, 0.182761098473245, 0.212627281905761,
0.210109927239357, 0.629520107298525, 0.162566320963278, 0.465261364100146,
0.241020865363341, 0.187877013216933, 0.210961933060156, 0.320603176249504,
0.557843871658153, 0.590069216234008, 0.17636750730924, 0.359238412133482,
0.228170276870657, 0.476807428925973, 0.249072482719156, 0.480317465811968,
0.329288559790918, 0.183045514601816, 0.265264079554823, 0.1192496449915,
0.188458447253358, 0.14328740848045, 0.558313224726938, 0.179673238503788,
0.199063071112044, 0.217080239755128, 0.168828874843123, 0.62158265253597,
0.441681318415537, 0.0992888953767996, 0.2825232763026, 0.251191180467748,
0.197248787997729, 0.174714952234069, 0.138494990579275, 0.26635431665589,
0.297256081297999, 0.135360948278321, 0.291010824664818, 0.166192163105498,
0.436117818696803, 0.25871219982226, 0.296090406042039, 0.295297734909279,
0.108413551555087, 0.237256621439176, 0.190192131110892, 0.650196567547233,
0.188167557710595, 0.262699610356166, 0.182761098473245, 0.209773911216204,
0.188458447253358, 0.274945653887741, 0.125323762673128, 0.435395196228578,
0.0971197191841969, 0.194589266165696, 0.199082640326805, 0.200870163474221,
0.164894670499535, 0.276441228444482, 0.160008586958398, 0.747010603204249,
0.15649233284349, 0.374808964328024, 0.18476385704053, 0.248716707595335,
0.150361187732168, 0.224878905369068, 0.235218536432899, 0.231856035256552,
0.49592350798512, 0.761359026335127, 0.417676219153237), event = c(1,
1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0)), row.names = c(NA, -450L), class = "data.frame")
Thank you in advance!

Multiple wilcox.tests across columns using variables in first column (R)

I have this data.frame
df <- data.frame(
variable=c(2.4860651, -0.68863024, 2.63530974, -2.95754943, 1.67945091, 2.63530974,
4.79002539, 2.32575938, 3.57236441, -0.364825998, -2.00646016, -3.12380516,
0.69307013, -5.65846824, 0.45632519, 2.08978142),
A=c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0),
B=c(1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0),
C=c(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1),
D=c(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0),
E=c(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
F=c(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1))
I would like to perform wilcox.test for each column with groups defined by 0 and 1 in the columns and using the variables in the column df$variable. Then add the p.values in a new row and adjusted p.values in another row.
I have tried this:
library(dplyr)
result <- df %>% summarise(across(!variable, ~wilcox.test(.x ~ variable)$p.value), exact=NULL) %>%
bind_rows(., p.adjust(., method = 'BH')) %>%
bind_rows(df, .) %>%
mutate(variable=replace(variable, is.na(variable), c('p.values', 'p.adjust')))
But this causes errors.
This is the result I would like to get:
result <- data.frame(
variable=c(2.4860651, -0.68863024, 2.63530974, -2.95754943, 1.67945091, 2.63530974,
4.79002539, 2.32575938, 3.57236441, -0.364825998, -2.00646016, -3.12380516,
0.69307013, -5.65846824, 0.45632519, 2.08978142, 'p.value', 'p.adjust'),
A=c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1),
B=c(1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0.560444274, 1),
C=c(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0.143117298, 0.764253489),
D=c(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0.820753088, 1),
E=c(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0.95482869, 1),
F=c(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0.254751163, 0.764253489))
Can anyone help?
You may try something along the lines of the following -
library(dplyr)
tmp <- df %>% summarise(across(!variable,
~wilcox.test(variable[.x == 0], variable[.x == 1])$p.value))
adj_value <- p.adjust(unlist(tmp), method = "BH")
result <- bind_rows(df %>% mutate(variable = as.character(variable)),
rbind(tmp, adj_value) %>%
mutate(variable = c('p.values', 'p.adjust'))
)
Thank you, Ronak. I modified your former answer and I found that this also works and results in the same as you found:
result <- df %>%
summarise(across(!variable,
~wilcox.test(variable[.x == 0], variable[.x == 1])$p.value), exact=NULL) %>%
bind_rows(., p.adjust(., method = 'BH')) %>%
bind_rows(df, .) %>%
mutate(variable=replace(variable, is.na(variable), c('p.values', 'p.adjust')))
Thank you! :)

R function to change value after a condition has been fulfilled

Participants in an experiment took a test that has a rule that says "once a participant has gotten 6 items wrong in a window of 8 items, you stop running the test". However, some experimenters kept testing past this point. I now need to find a way in which I can automatically see where the test should have been stopped, and change all values following the end to 0 (= item wrong). I am not even sure if this is something that can be done in R.
To be clear, I would like to go row by row (which are the participants) and once there are six 0s in a given window of 8 columns (items), I would need all values after the sixth 0 to be 0 too.
While the reproducible data is below, here is a visualization of what I would need, where the blue cells are the ones that should change to 0:
Pre-changes
Post-changes
Reproducible data:
structure(list(Participant_ID = c("E01P01", "E01P02", "E01P03",
"E01P04", "E01P05", "E01P06", "E01P07", "E01P08", "E02P01", "E02P02"
), A2 = c(1, 1, 1, 0, 0, 1, 1, 1, 1, 1), A3 = c(1, 1, 0, 0, 0,
1, 0, 0, 0, 0), B1 = c(1, 1, 1, 0, 0, 1, 0, 0, 1, 1), B2 = c(1,
1, 1, 1, 1, 1, 0, 0, 0, 1), C3 = c(1, 0, 0, 1, 0, 1, 0, 0, 0,
1), C4 = c(1, 0, 0, 0, 0, 1, 0, 0, 1, 1), D1 = c(1, 0, 0, 0,
0, 1, 0, 0, 0, 0), D3 = c(1, 1, 1, 1, 0, 0, 1, 0, 0, 1), E1 = c(1,
0, 0, 0, 0, 1, 0, 0, 0, 1), E3 = c(1, 1, 0, 1, 0, 1, 0, 0, 0,
0), F1 = c(1, 0, 0, 0, 1, 0, 0, 1, 0, 0), F4 = c(1, 1, 1, 1,
0, 1, 0, 1, 1, 0), G1 = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 1), G2 = c(0,
0, 0, 0, 1, 1, 1, 0, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
Any help is highly appreciated!
Here is a solution that involves some pivoting, rollsum, cumsum, if_else logic, then pivoting back. Let me know if it works.
library(tidyverse)
library(zoo)
structure(list(Participant_ID = c("E01P01", "E01P02", "E01P03",
"E01P04", "E01P05", "E01P06", "E01P07", "E01P08", "E02P01", "E02P02"
), A2 = c(1, 1, 1, 0, 0, 1, 1, 1, 1, 1), A3 = c(1, 1, 0, 0, 0,
1, 0, 0, 0, 0), B1 = c(1, 1, 1, 0, 0, 1, 0, 0, 1, 1), B2 = c(1,
1, 1, 1, 1, 1, 0, 0, 0, 1), C3 = c(1, 0, 0, 1, 0, 1, 0, 0, 0,
1), C4 = c(1, 0, 0, 0, 0, 1, 0, 0, 1, 1), D1 = c(1, 0, 0, 0,
0, 1, 0, 0, 0, 0), D3 = c(1, 1, 1, 1, 0, 0, 1, 0, 0, 1), E1 = c(1,
0, 0, 0, 0, 1, 0, 0, 0, 1), E3 = c(1, 1, 0, 1, 0, 1, 0, 0, 0,
0), F1 = c(1, 0, 0, 0, 1, 0, 0, 1, 0, 0), F4 = c(1, 1, 1, 1,
0, 1, 0, 1, 1, 0), G1 = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 1), G2 = c(0,
0, 0, 0, 1, 1, 1, 0, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame")) %>%
as_tibble() %>%
pivot_longer(-1) %>%
group_by(Participant_ID) %>%
mutate(running_total = zoo::rollsumr(value==0, k = 8, fill = 0),
should_terminate = cumsum(running_total >= 6),
value = if_else(should_terminate > 0, 0, value)) %>%
ungroup() %>%
select(Participant_ID, name, value) %>%
pivot_wider(names_from = name, values_from = value)

How to use ids from one dataframe to sum rows in another dataframe

I feel like this answer has been asked before, but I can't seem to find an answer to this question. Maybe my title is too vague, so feel free to change it.
So I have one data frame, a, with ids the correspond to column name in data frame b. Both data frames are simplified versions of a much larger data frame.
here is data frame a
a <- structure(list(V1 = structure(c(4L, 5L, 1L, 2L, 3L), .Label = c("GEN[D00105].GT",
"GEN[D00151].GT", "GEN[D00188].GT", "GEN[D86396].GT", "GEN[D86397].GT"
), class = "factor")), row.names = c(NA, -5L), class = "data.frame")
here is data frame b
b <- structure(list(`GEN[D01104].GT` = c(0, 0, 0, 0, 1, 0, 0, 2, 0,
1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0), `GEN[D01312].GT` = c(1, 0,
2, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 0, 0, 2, 0, 0, 0), `GEN[D01878].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 0, 0), `GEN[D01882].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0), `GEN[D01952].GT` = c(0,
0, 1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0), `GEN[D01953].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 2, 0), `GEN[D02053].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0), `GEN[D00316].GT` = c(0,
0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 0, 0), `GEN[D01827].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0), `GEN[D01881].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0), `GEN[D02044].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0), `GEN[D02085].GT` = c(0,
0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0), `GEN[D02204].GT` = c(0,
0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0), `GEN[D02276].GT` = c(0,
0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0), `GEN[D02297].GT` = c(0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0), `GEN[D02335].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 0), `GEN[D02397].GT` = c(0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0), `GEN[D00856].GT` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0), `GEN[D00426].GT` = c(0,
0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0), `GEN[D02139].GT` = c(0,
0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0), `GEN[D02168].GT` = c(0,
0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0)), row.names = c(NA,
-20L), class = "data.frame")
I want to be able to use the ids from data frame a to sum the row in data frame b that have a matching id if that makes sense.
So in the past, I just did something like
b$affected.samples <- (b$`GEN[D86396].GT` + b$`GEN[D86397].GT` + b$`GEN[D00105].GT` + b$`GEN[D00151].GT` + b$`GEN[D00188].GT`)
which got annoying and took to much time, so I moved over to
b$affected.samples <- rowSums(b[,c(1:5)])
Which isn't too bad for this example but with my large data set, my sample can be all over the place, and it's starting to take too much time to finds where everything is. I was hoping there is a way just to use my data frame a to sum the correct rows in data frame b.
Hopefully, I gave this is all the information you need! Let me know if you have any questions.
Thanks in advance!!
Extract the 'V1' column as a character string, use that to select the columns of 'b' (assuming these column names are found in 'b') and get the rowSums
rowSums( b[as.character(a$V1)], na.rm = TRUE)

y-axis label of highest value not printed

I want to plot the hourly visits to a certain webpage of my website. The x-axis shows the hours (0 to 23), the y-axis shows the number of unique visits.
I'm supressing axes in plot() and adding them with axis(). I want only the lowest and highest y-values labelled:
axis(2,
at = seq(min(...), max(...), 1),
labels = c(min(...),
rep.int("", max(...) - min(...) - 1),
max(...)
)
)
(Ellipses in the code sample represend the column. I left this out for better visual clarity of the code structure.)
But in the plot, the label in the highest value does not appear:
Sometimes (depending on the range of values) I can get the highest value label to appear by changing rep.int("" ... to rep.int(" ", i.e., labelling the ticks with a space, but this doesn't work always.
Why does R not print the hightest label? And, more importantly:
How can I force R to print the highest label?
Complete code example:
sitzungen <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 8, 4, 0, 8, 3, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 2, 0, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 2, 1, 0, 0, 0, 1, 0, 0, 1, 3, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 4, 1, 2, 1, 7, 7, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 3, 8, 4, 1, 2, 0, 1, 1, 0, 0, 5, 0, 3, 3, 2, 3, 1, 2, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 3, 0, 0, 3, 1, 6, 3, 9, 1, 0, 2, 1, 4, 8, 2, 2, 2, 0, 0, 0, 2, 1, 3, 1, 1, 2, 1, 2, 3, 1, 4, 3, 0, 2, 3, 1, 3, 1, 5, 2, 0, 0, 1, 0, 1, 2, 1, 0, 3, 0, 1, 0, 3, 7, 2, 2, 1, 2, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 15, 2, 2, 1, 0, 0, 0, 0, 0, 2, 3, 0, 3, 0, 2, 1, 1, 2, 2, 4, 2, 1, 4, 2, 1, 2, 2, 1, 0, 0, 0, 7, 0, 2, 4, 2, 0, 2, 3, 5, 2, 1, 4, 4, 2, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 2, 1, 2, 1, 1, 2, 1, 4, 1, 1, 1, 0, 0, 3, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
stunde <- rep(0:23, 20)
stuendlich <- data.frame(cbind(stunde, sitzungen))
aggr <- aggregate(sitzungen ~ stunde, stuendlich, sum)
aggr <- rbind(aggr, c(24, aggr[which(aggr$stunde == "23"),]$sitzungen))
plot(aggr, type = "s", xlim = c(0, 24), axes = FALSE, xlab = "Stunde", ylab = "Sitzungen", main = "Sitzungen pro Stunde (kumuliert)")
axis(1, at = seq(0.5, 23.5, 1), labels = 0:23)
axis(2, at = seq(min(aggr$sitzungen), max(aggr$sitzungen), 1), labels = c(min(aggr$sitzungen), rep.int(" ", max(aggr$sitzungen) - min(aggr$sitzungen) - 1), max(aggr$sitzungen)))
Don't use " " or ""; use NA:
axis(2, at = seq(min(aggr$sitzungen), max(aggr$sitzungen), 1),
labels = c(min(aggr$sitzungen),
rep.int(NA, max(aggr$sitzungen) - min(aggr$sitzungen) - 1),
max(aggr$sitzungen)))

Resources