Related
I have three sets of data with the same variables and different observations. The variables all share the same name, but when I try to bind them using the rbind function I see this:
names do not match previous names.
Does anyone know how to fix the problem? My desired goal is to have one dataset with numerous observations of the same variables.
What I have tried so far is this:
Daten1
> attach(Daten1)
rel.Var.1 <- data.frame(Q35, Q37, Q38, Q42, Q46, Q47, Q50, Q51, Q52, Q55, Q60, Q61,
Q91_1, Q92_1, Q93_1, Q94_1, Q95_1, Q96_1, Q97_1, Q301_1, Q300_1, Q98_1,
Q99_1, Q100_1, Q101_1, Q102_1, Q103_1, Q104_1, Q105_1, Q106_1,
Q107_1, Q108_1, Q109_1, Q110_1, Q111_1, Q112_1, Q113_1, Q114_1,
Q115_1, Q116_1, Q117_1, Q118_1, Q119_1, Q121_1, Q122_1,
Q123_1, Q124_1, Q125_1, Q126_1, Q127_1, Q128_1, Q129_1, Q130_1,
Q131_1, Q132_1, Q133_1, Q134_1, Q135_1, Q136_1, Q137_1, Q138_1,
Q139_1, Q140_1, Q141_1, Q142_1, Q143_1, Q144_1, Q145_1,
Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, Q16, Q17, Q18, Q19, Q20,
Q21, Q22, Q23, Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31, Q32, Q33,
Q176, Q177, Q178, Q175, VPN)
>detach(Daten1)
>rel.Var.1 <- rel.Var.1 %>% rename(
neo_01 = Q35, neo_03 = Q37,neo_04 = Q38, neo_08 = Q42,
neo_12 = Q46,neo_13 = Q47, neo_16 = Q50, neo_17 = Q51, neo_18 = Q52,
neo_21 = Q55, neo_26 = Q60, neo_27 = Q61, TICS_1 = Q91_1, TICS_2 = Q92_1,
TICS_3 = Q93_1, TICS_4 = Q94_1, TICS_5 = Q95_1, TICS_6 = Q96_1,
TICS_7 = Q97_1, TICS_8 = Q301_1, TICS_9 = Q300_1, TICS_10 = Q98_1,
TICS_11 = Q99_1, TICS_12= Q100_1, TICS_13 = Q101_1, TICS_14 = Q102_1,
TICS_15 = Q103_1,
ICS_16 = Q104_1, TICS_17 = Q105_1, TICS_18 = Q106_1, TICS_19 = Q107_1,
TICS_20 = Q108_1, TICS_21 = Q109_1, TICS_22 = Q110_1, TICS_24 = Q111_1,
TICS_25 = Q112_1,
TICS_26 = Q113_1, TICS_27 = Q114_1, TICS_28 = Q115_1, TICS_29 = Q116_1,
TICS_30 = Q117_1, TICS_31 = Q118_1, TICS_32 = Q119_1, TICS_33 = Q121_1,
TICS_34 = Q122_1, TICS_35 = Q123_1, TICS_36 = Q124_1, TICS_37 = Q125_1,
TICS_38 = Q126_1, TICS_39 = Q127_1, TICS_40 = Q128_1, TICS_41 = Q129_1,
TICS_42 = Q130_1,
TICS_43 = Q131_1, TICS_44 = Q132_1, TICS_45 = Q133_1, TICS_46 = Q134_1,
TICS_47 = Q135_1, TICS_48 = Q136_1, TICS_49 = Q137_1,
TICS_50 = Q138_1, TICS_51 = Q139_1, TICS_52 = Q140_1, TICS_53 = Q141_1,
TICS_54 = Q142_1, TICS_55 = Q143_1, TICS_56 = Q144_1, TICS_57 = Q145_1,
HSPS_1 = Q7, HSPS_2 = Q8, HSPS_3 = Q9, HSPS_4 = Q10, HSPS_5 = Q11,
HSPS_6 = Q12, HSPS_7 = Q13, HSPS_8 = Q14,
HSPS_9 = Q15, HSPS_10 = Q16, HSPS_11 = Q17, HSPS_12 = Q18, HSPS_13 = Q19,
HSPS_14 = Q20, HSPS_15 = Q21, HSPS_16 = Q22,
HSPS_17 = Q23, HSPS_18 = Q24, HSPS_19 = Q25, HSPS_20 = Q26,
HSPS_21 = Q27, HSPS_22 = Q28, HSPS_23 = Q29,
HSPS_24 = Q30, HSPS_25 = Q31, HSPS_26 = Q32, HSPS_27 = Q33,
sex = Q176, Bildung = Q177, Tat = Q178, age= Q175)
>rel.Var.1 <- na.omit(rel.Var.1)
Daten 2
> attach(Daten2)
rel.Var.2 <- data.frame(Q182, Q186, Q188, Q196, Q204, Q206, Q212, Q214,
Q216, Q222, Q232, Q234,
Q221, Q222.1, Q223, Q224.1, Q225, Q226.1, Q227, Q174, Q175, Q228.1, Q229,
Q230.1,Q231, Q232.1, Q233, Q234.1, Q235, Q236.1, Q237, Q238.1,
Q239,Q240.1, Q241, Q242, Q243, Q244, Q245, Q246, Q247, Q248, Q249, Q251,
Q252, Q253, Q254, Q255, Q256, Q257, Q258, Q259, Q260, Q261, Q262, Q263,
Q264, Q265, Q266, Q267, Q268,
Q269, Q270, Q271, Q272, Q273, Q274, Q275,
Q207, Q209, Q211, Q213, Q215, Q217, Q219, Q221.1, Q223.1, Q225.1,
Q227.1, Q229.1, Q231.1, Q233.1, Q235.1, Q237.1, Q239.1, Q241.1, Q243.1,
Q245.1, Q247.1, Q249.1, Q251.1, Q253.1, Q255.1, Q257.1, Q259.1,
Q6, Q7,Q8, Q5, VPN)
>detach(Daten2)
>rel.Var.2 <- rel.Var.2 %>% rename(
neo_01 = Q182, neo_03 = Q186, neo_04 = Q188, neo_08 = Q196, neo_12 =
Q204,
neo_13 = Q206, neo_16 = Q212, neo_17 = Q214, neo_18 = Q216, neo_21 =
Q222,
neo_26 = Q232, neo_27 = Q234, TICS_1 = Q221, TICS_2 = Q222.1, TICS_3 =
Q223,
TICS_4 = Q224.1, TICS_5 = Q225, TICS_6 = Q226.1, TICS_7 = Q227, TICS_8 =
Q174,
TICS_9 = Q175, TICS_10 = Q228.1, TICS_11 = Q229, TICS_12 = Q230.1,
TICS_13 = Q231,
TICS_14 = Q232.1, TICS_15 = Q233, TICS_16 = Q234.1, TICS_17 = Q235,
TICS_18 = Q236.1,
TICS_19 = Q237, TICS_20 = Q238.1, TICS_21 = Q239, TICS_22 = Q240.1,
TICS_24 = Q241,
TICS_25 = Q242, TICS_26 = Q243, TICS_27 = Q244, TICS_28 = Q245, TICS_29 =
Q246,
TICS_30 = Q247, TICS_31 = Q248, TICS_32 = Q249, TICS_33 = Q251, TICS_34 =
Q252,
TICS_35 = Q253, ICS_36 = Q254, TICS_37 = Q255, TICS_38 = Q256, TICS_39 =
Q257,
TICS_40 = Q258, TICS_41 = Q259, TICS_42 = Q260, TICS_43 = Q261, TICS_44 =
Q262,
TICS_45 = Q263, TICS_46 = Q264, TICS_47 = Q265, TICS_48 = Q266, TICS_49 =
Q267,
TICS_50 = Q268, TICS_51 = Q269, TICS_52 = Q270, TICS_53 = Q271, TICS_54 =
Q272,
TICS_55 = Q273, TICS_56 = Q274, TICS_57 = Q275, HSPS_1 = Q207, HSPS_2 =
Q209,
HSPS_3 = Q211, HSPS_4 = Q213, HSPS_5 = Q215, HSPS_6 = Q217, HSPS_7 =
Q219,
HSPS_8 = Q221.1, HSPS_9 = Q223.1, HSPS_10 = Q225.1, HSPS_11 = Q227.1,
HSPS_12 = Q229.1,
HSPS_13 = Q231.1, HSPS_14 = Q233.1, HSPS_15 = Q235.1, HSPS_16 = Q237.1,
HSPS_17 = Q239.1,
HSPS_18 = Q241, HSPS_19 = Q243, HSPS_20 = Q245, HSPS_21 = Q247, HSPS_22 =
Q249,
HSPS_23 = Q251.1, HSPS_24 = Q253.1, HSPS_25 = Q255.1, HSPS_26 = Q257.1,
HSPS_27 = Q259.1,
sex = Q6, Bildung = Q7, Tat = Q8, age= Q5)
>rel.Var.2 <- na.omit(rel.Var.2)
Daten3
> attach(Daten3)
rel.Var.3 <- data.frame(neo_03, neo_08, neo_12, neo_16, neo_21, neo_26,
neo_01, neo_04, neo_13,
neo_17, neo_18, neo_27, TICS_1, TICS_2, TICS_3, TICS_4, TICS_5, TICS_6,
TICS_7, TICS_8, TICS_9,
TICS_10, TICS_11, TICS_12, TICS_13, TICS_14, TICS_15, ICS_16, TICS_17,
TICS_18, TICS_19, TICS_20, TICS_21, TICS_22,
TICS_24, TICS_25, TICS_26, TICS_27, TICS_28, TICS_29, TICS_30, TICS_31,
TICS_32, TICS_33, TICS_34, TICS_35, TICS_36,
TICS_37, TICS_38, TICS_39, TICS_40, TICS_41, TICS_42, TICS_43, TICS_44,
TICS_45, TICS_46, TICS_47, TICS_48, TICS_49, TICS_50,
TICS_51, TICS_52, TICS_53, TICS_54, TICS_55, TICS_56, TICS_57,
HSPS_1, HSPS_2, HSPS_3, HSPS_4, HSPS_5, HSPS_6, HSPS_7, HSPS_8, HSPS_9,
HSPS_10, HSPS_11, HSPS_12, HSPS_13,
HSPS_14, HSPS_15, HSPS_16, HSPS_17, HSPS_18, HSPS_19, HSPS_20, HSPS_21,
HSPS_22, HSPS_23, HSPS_24, HSPS_25, HSPS_26, HSPS_27,
Geschlecht, Bildungsabschluss, derzeitige_Beschaeftigung, Alter, NR)
>detach(Daten3)
>rel.Var.3 <- rel.Var.3 %>% rename(
sex = Geschlecht, Bildung = Bildungsabschluss, Tat =
derzeitige_Beschaeftigung, age= Alter, VPN = NR)
>rel.Var.3 <- na.omit(rel.Var.3)
>View(rel.Var.1)
>View(rel.Var.2)
>View(rel.Var.3)
## Datensaetze zusammenfuegen ##bind data
data_gesamt <- rbind(rel.Var.1, rel.Var.2, rel.Var.3)
data_gesamt <- bind_rows(rel.Var.1, rel.Var.2, rel.Var.3)
With bind_rows I get this error:
Can't combine `..1$neo_01` <character> and `..3$neo_01` <integer>.
Backtrace:
1. dplyr::bind_rows(rel.Var.1, rel.Var.2, rel.Var.3)
2. vctrs::vec_rbind(!!!dots, .names_to = .id)
4. vctrs::vec_default_ptype2(...)
5. vctrs::stop_incompatible_type(...)
6. vctrs:::stop_incompatible(...)
7. vctrs:::stop_vctrs(...)
I have a nested list with 3 levels:
length of string (I don't care about that when all is done, it's just for the ordering of the lists)
Category (I want to keep this in the output)
Elements (strings I also want to keep)
I want to unlist/flatten this construct to get a character vector with the order in the list and all elements should be named:
"Category""_""Element"
I cannot get this done. My problem is that either level 2 is not kept as a name or that it keeps the index as well so I get wrong names (e.g. Dog, Dog1, Dog2, Dog3 instead of Dog).
I have tried first purrr::flatten -> the resulting list looks good (I think).
Then I tried unlist of the flattened output to get all these items into a character vector and this is, where it gets me.
I receive names of the elements like:
TCRBV11-031 TCRBV11-032 TCRBV11-033
but they should all be TCRBV11-03
How can I keep the name of the sublist and combine it with the element. Preferably with a "_" in-between?
Edit: dput of an example list:
list(`11` = list(`TCRBV06-01` = "CAIDRSYEQYF", `TCRBV29-01` = "CSADRKETQYF"),
`12` = list(`TCRBV05-06` = "CASSRPNTEAFF", `TCRBV06-01` = "CASGHKNTEAFF",
`TCRBV07-03` = "CASSLRLHEQYF"), `13` = list(`TCRBV02-01` = "CASQGIVGYEQYF",
`TCRBV05-01` = "CASTGSLNTEAFF", `TCRBV06-04` = "CASSLGKNTEAFF",
`TCRBV07-03` = "CASSLTEGGGYTF", `TCRBV07-08` = "CASSPDTNTEAFF",
`TCRBV07-09` = "CASSPDRDTEAFF", `TCRBV10-03` = "CAISMISNQPQHF",
`TCRBV18-01` = "CASSPPETYEQYF", `TCRBV19-01` = "CASSIGGSTEAFF",
`TCRBV20-01` = c("CSAMAYRADGYTF", "CSAREAQAETQYF", "CSAITSRVDGYTF"
), `TCRBV27-01` = "CASSFPGGYGYTF"), `14` = list(`TCRBV05-06` = "CASSLVGASYEQYF",
`TCRBV06-01` = c("CASRQGQENQPQHF", "CASRRQGEGTEAFF"),
`TCRBV07-09` = "CASSPGVSGNTIYF", `TCRBV10-02` = "CASSPAEGGYEQYF",
`TCRBV13-01` = "CASSLGQGVYEQYF", `TCRBV24-01` = "CATRSTGANTEAFF"),
`15` = list(`TCRBV02-01` = "CASSDVLAGGPEQYF", `TCRBV04-01` = "CASSQDPAGGRKLFF",
`TCRBV04-03` = "CASSQDSTALYGYTF", `TCRBV05-01` = "CASSFEDRGAGGYTF",
`TCRBV05-04` = c("CASGLTGEGNTEAFF", "CASSPVSSRTDTQYF"
), `TCRBV06-04` = "CASSDSSSGYNEQFF", `TCRBV07-09` = "CASSPTSGVGSEQFF",
`TCRBV10-03` = "CAISGGGGSTNIQYF", `TCRBV11-03` = "CASSDETGAGYEQFF",
`TCRBV13-01` = "CASSSSTALYNEQFF", `TCRBV18-01` = "CASSPLAGGSNEQYF",
`TCRBV25-01` = "CASSEDRGLDQPQHF", `TCRBV28-01` = "CASSPVAGGHYEQYF",
`TCRBV30-01` = "CAWSWGGGLGEKLFF"), `16` = list(`TCRBV04-01` = "CASSQVVIPPGEKLFF",
`TCRBV14-01` = "CASSQPGLLGTDTQYF", `TCRBV15-01` = "CATSRWGTGELTEAFF",
`TCRBV19-01` = "CASSIGTADIYNEQFF", `TCRBV20-01` = "CSARGQTSGFTYEQYF"),
`17` = list(`TCRBV04-01` = "CASSQDPRTASYNEQFF", `TCRBV04-02` = "CASSQDLETSGDDIQYF",
`TCRBV25-01` = "CASSEYTTGEVTDTQYF"), `18` = list(`TCRBV12-05` = "CASGLVGGVLSGANVLTF"),
`19` = list(`TCRBV04-03` = "CASSQDPKGGLLNTGELFF"))
This is my matrix:
x<-structure(list(Sample_250 = list(`ITUB4~time+ITSA4` = 0.0189772705000679,
`ITSA4~time+ITUB4` = 0.0172247829378391, `KROT3~time+ESTC3` = 0.362976295896543,
`ESTC3~time+KROT3` = 0.919654541750147, `ELET6~time+ELET3` = 0.563149047013394,
`ELET3~time+ELET6` = 0.938978962441099, `VALE5~time+BRAP4` = 0.00879735041567956,
`BRAP4~time+VALE5` = 0.00327639807633581, `RSID3~time+PDGR3` = 0.537991430220927,
`PDGR3~time+RSID3` = 0.246554103682342, `PDGR3~time+BISA3` = 0.559254391144534,
`BISA3~time+PDGR3` = 0.61031816244403, `VALE5~time+VALE3` = 0.180842743583616,
`VALE3~time+VALE5` = 0.66647273985911, `BRPR3~time+BRML3` = 0.338499489464644,
`BRML3~time+BRPR3` = 0.319063657443075, `PETR4~time+PETR3` = 0.125540460125629,
`PETR3~time+PETR4` = 0.124801328997536, `DTEX3~time+CSAN3` = 0.93868928574058,
`CSAN3~time+DTEX3` = 0.237699406950144, `RSID3~time+BISA3` = 0.449718913669525,
`BISA3~time+RSID3` = 0.7561632200477, `ELPL4~time+ELET3` = 0.174294574975377,
`ELET3~time+ELPL4` = 0.300066723578605, `EVEN3~time+CSAN3` = 0.734452997271797,
`CSAN3~time+EVEN3` = 0.104402290451259, `KROT3~time+CIEL3` = 0.93683315998679,
`CIEL3~time+KROT3` = 0.936544198858508, `MRFG3~time+BISA3` = 0.588077047082012,
`BISA3~time+MRFG3` = 0.241408284405396), Sample_220 = list(
`ITUB4~time+ITSA4` = 0.0173697888550166, `ITSA4~time+ITUB4` = 0.0149942952128483,
`KROT3~time+ESTC3` = 0.482794731209648, `ESTC3~time+KROT3` = 0.890472799194387,
`ELET6~time+ELET3` = 0.289262231792853, `ELET3~time+ELET6` = 0.583772170805346,
`VALE5~time+BRAP4` = 0.0115132699560557, `BRAP4~time+VALE5` = 0.00454387128721931,
`RSID3~time+PDGR3` = 0.701361295124465, `PDGR3~time+RSID3` = 0.276392398580336,
`PDGR3~time+BISA3` = 0.459917895151059, `BISA3~time+PDGR3` = 0.932334809205404,
`VALE5~time+VALE3` = 0.228621489426817, `VALE3~time+VALE5` = 0.599616896543261,
`BRPR3~time+BRML3` = 0.423214373690621, `BRML3~time+BRPR3` = 0.43367402957197,
`PETR4~time+PETR3` = 0.0726218638061883, `PETR3~time+PETR4` = 0.0684556705423691,
`DTEX3~time+CSAN3` = 0.957213428702438, `CSAN3~time+DTEX3` = 0.643249328242026,
`RSID3~time+BISA3` = 0.140702283930701, `BISA3~time+RSID3` = 0.438759561659429,
`ELPL4~time+ELET3` = 0.108415504373493, `ELET3~time+ELPL4` = 0.259235741006097,
`EVEN3~time+CSAN3` = 0.995097190780355, `CSAN3~time+EVEN3` = 0.35833286961364,
`KROT3~time+CIEL3` = 0.883381800410008, `CIEL3~time+KROT3` = 0.58096328992918,
`MRFG3~time+BISA3` = 0.811273794794714, `BISA3~time+MRFG3` = 0.162511686203042),
Sample_200 = list(`ITUB4~time+ITSA4` = 0.0269410475431228,
`ITSA4~time+ITUB4` = 0.0268281043283851, `KROT3~time+ESTC3` = 0.648973944293657,
`ESTC3~time+KROT3` = 0.843925839073412, `ELET6~time+ELET3` = 0.85074648265282,
`ELET3~time+ELET6` = 0.926090646237098, `VALE5~time+BRAP4` = 0.0298988391464108,
`BRAP4~time+VALE5` = 0.0210534678726486, `RSID3~time+PDGR3` = 0.913261323047721,
`PDGR3~time+RSID3` = 0.460744060168818, `PDGR3~time+BISA3` = 0.681848278084124,
`BISA3~time+PDGR3` = 0.700508228924671, `VALE5~time+VALE3` = 0.404824931817606,
`VALE3~time+VALE5` = 0.858492744479535, `BRPR3~time+BRML3` = 0.282313695830455,
`BRML3~time+BRPR3` = 0.421361074266136, `PETR4~time+PETR3` = 0.0389941410401918,
`PETR3~time+PETR4` = 0.0366363568643157, `DTEX3~time+CSAN3` = 0.593381022274927,
`CSAN3~time+DTEX3` = 0.296186622367649, `RSID3~time+BISA3` = 0.136337062156413,
`BISA3~time+RSID3` = 0.253647313739565, `ELPL4~time+ELET3` = 0.0404140463603602,
`ELET3~time+ELPL4` = 0.0584026420525388, `EVEN3~time+CSAN3` = 0.992224496682121,
`CSAN3~time+EVEN3` = 0.364016491282029, `KROT3~time+CIEL3` = 0.923443434909376,
`CIEL3~time+KROT3` = 0.492267643047159, `MRFG3~time+BISA3` = 0.505439622239642,
`BISA3~time+MRFG3` = 0.433741779126583), Sample_180 = list(
`ITUB4~time+ITSA4` = 0.0709729806619366, `ITSA4~time+ITUB4` = 0.0703318148854131,
`KROT3~time+ESTC3` = 0.714222637099451, `ESTC3~time+KROT3` = 0.983192555139107,
`ELET6~time+ELET3` = 0.651446390753224, `ELET3~time+ELET6` = 0.504251519490735,
`VALE5~time+BRAP4` = 0.0655201102796135, `BRAP4~time+VALE5` = 0.064459649024225,
`RSID3~time+PDGR3` = 0.966515813873172, `PDGR3~time+RSID3` = 0.353225059948276,
`PDGR3~time+BISA3` = 0.819582167704402, `BISA3~time+PDGR3` = 0.457403474593761,
`VALE5~time+VALE3` = 0.834891076683459, `VALE3~time+VALE5` = 0.624305154223115,
`BRPR3~time+BRML3` = 0.338684631277372, `BRML3~time+BRPR3` = 0.645983354906404,
`PETR4~time+PETR3` = 0.016615774081754, `PETR3~time+PETR4` = 0.0165629129043023,
`DTEX3~time+CSAN3` = 0.642061011299162, `CSAN3~time+DTEX3` = 0.424690135396935,
`RSID3~time+BISA3` = 0.101897354576195, `BISA3~time+RSID3` = 0.204241392846169,
`ELPL4~time+ELET3` = 0.0729734425567139, `ELET3~time+ELPL4` = 0.128996393897499,
`EVEN3~time+CSAN3` = 0.899884399768484, `CSAN3~time+EVEN3` = 0.146722568327017,
`KROT3~time+CIEL3` = 0.830125914939971, `CIEL3~time+KROT3` = 0.567087012782755,
`MRFG3~time+BISA3` = 0.122725171728208, `BISA3~time+MRFG3` = 0.459448430490008)), row.names = c("ITUB4~time+ITSA4",
"ITSA4~time+ITUB4", "KROT3~time+ESTC3", "ESTC3~time+KROT3", "ELET6~time+ELET3",
"ELET3~time+ELET6", "VALE5~time+BRAP4", "BRAP4~time+VALE5", "RSID3~time+PDGR3",
"PDGR3~time+RSID3", "PDGR3~time+BISA3", "BISA3~time+PDGR3", "VALE5~time+VALE3",
"VALE3~time+VALE5", "BRPR3~time+BRML3", "BRML3~time+BRPR3", "PETR4~time+PETR3",
"PETR3~time+PETR4", "DTEX3~time+CSAN3", "CSAN3~time+DTEX3", "RSID3~time+BISA3",
"BISA3~time+RSID3", "ELPL4~time+ELET3", "ELET3~time+ELPL4", "EVEN3~time+CSAN3",
"CSAN3~time+EVEN3", "KROT3~time+CIEL3", "CIEL3~time+KROT3", "MRFG3~time+BISA3",
"BISA3~time+MRFG3"), class = "data.frame")
1º Question) I want to remove all rows that contain values bellow 0.10. It is necessary that values bellow 0.10 belongs for the 4 columns
2º Question) I want to remove all rows that contain values bellow 0.10 on the first 3 columns.
I tried this:
x[x[1:nrow(x),]<.10,]
Is it possible to do this with a basic function in R?
Any help ?
Thanks
Try for question 1 x[!apply(x, 1, function(x) any(x < .10)), ]
Sample_250 Sample_220 Sample_200 Sample_180
KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
RSID3~time+PDGR3 0.5379914 0.7013613 0.9132613 0.9665158
PDGR3~time+RSID3 0.2465541 0.2763924 0.4607441 0.3532251
PDGR3~time+BISA3 0.5592544 0.4599179 0.6818483 0.8195822
BISA3~time+PDGR3 0.6103182 0.9323348 0.7005082 0.4574035
VALE5~time+VALE3 0.1808427 0.2286215 0.4048249 0.8348911
VALE3~time+VALE5 0.6664727 0.5996169 0.8584927 0.6243052
BRPR3~time+BRML3 0.3384995 0.4232144 0.2823137 0.3386846
BRML3~time+BRPR3 0.3190637 0.433674 0.4213611 0.6459834
DTEX3~time+CSAN3 0.9386893 0.9572134 0.593381 0.642061
CSAN3~time+DTEX3 0.2376994 0.6432493 0.2961866 0.4246901
RSID3~time+BISA3 0.4497189 0.1407023 0.1363371 0.1018974
BISA3~time+RSID3 0.7561632 0.4387596 0.2536473 0.2042414
EVEN3~time+CSAN3 0.734453 0.9950972 0.9922245 0.8998844
CSAN3~time+EVEN3 0.1044023 0.3583329 0.3640165 0.1467226
KROT3~time+CIEL3 0.9368332 0.8833818 0.9234434 0.8301259
CIEL3~time+KROT3 0.9365442 0.5809633 0.4922676 0.567087
MRFG3~time+BISA3 0.588077 0.8112738 0.5054396 0.1227252
BISA3~time+MRFG3 0.2414083 0.1625117 0.4337418 0.4594484
For question 2: x[!apply(x[, 1:3], 1, function(x) any(x < .10)), ]
Sample_250 Sample_220 Sample_200 Sample_180
KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
RSID3~time+PDGR3 0.5379914 0.7013613 0.9132613 0.9665158
PDGR3~time+RSID3 0.2465541 0.2763924 0.4607441 0.3532251
PDGR3~time+BISA3 0.5592544 0.4599179 0.6818483 0.8195822
BISA3~time+PDGR3 0.6103182 0.9323348 0.7005082 0.4574035
VALE5~time+VALE3 0.1808427 0.2286215 0.4048249 0.8348911
VALE3~time+VALE5 0.6664727 0.5996169 0.8584927 0.6243052
BRPR3~time+BRML3 0.3384995 0.4232144 0.2823137 0.3386846
BRML3~time+BRPR3 0.3190637 0.433674 0.4213611 0.6459834
DTEX3~time+CSAN3 0.9386893 0.9572134 0.593381 0.642061
CSAN3~time+DTEX3 0.2376994 0.6432493 0.2961866 0.4246901
RSID3~time+BISA3 0.4497189 0.1407023 0.1363371 0.1018974
BISA3~time+RSID3 0.7561632 0.4387596 0.2536473 0.2042414
EVEN3~time+CSAN3 0.734453 0.9950972 0.9922245 0.8998844
CSAN3~time+EVEN3 0.1044023 0.3583329 0.3640165 0.1467226
KROT3~time+CIEL3 0.9368332 0.8833818 0.9234434 0.8301259
CIEL3~time+KROT3 0.9365442 0.5809633 0.4922676 0.567087
MRFG3~time+BISA3 0.588077 0.8112738 0.5054396 0.1227252
BISA3~time+MRFG3 0.2414083 0.1625117 0.4337418 0.4594484
Does this do what you want?
In regards to question 1:
cond1 <- apply(x[,1:3] < 0.1, 1, any)
y <- x[!cond1, ]
head(x)
# Sample_250 Sample_220 Sample_200 Sample_180
#ITUB4~time+ITSA4 0.01897727 0.01736979 0.02694105 0.07097298
#ITSA4~time+ITUB4 0.01722478 0.0149943 0.0268281 0.07033181
#KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
#ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
#ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
#ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
In regards to question 2:
cond2 <- apply(x < 0.1, 1, all)
z <- x[!cond2, ]
head(y)
# Sample_250 Sample_220 Sample_200 Sample_180
#ITUB4~time+ITSA4 0.01897727 0.01736979 0.02694105 0.07097298
#ITSA4~time+ITUB4 0.01722478 0.0149943 0.0268281 0.07033181
#KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
#ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
#ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
#ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
For the first question:
subset(x, apply(x, 1, function(x) all(x > 0.1)) == TRUE)
For the second one:
subset(x, apply(x[, 1:3], 1, function(x) all(x > 0.1)) == TRUE)
I am working on a data mining project (as a total coding outsider) and am trying to run a K-Nearest Neighbor analysis. However, I keep getting the "no missing values are allowed" error. My data does not have missing values so something must be wrong with my code. Can anyone help?
AirbnbNYCApril = read.delim(file=file.choose(),
header = T,
sep = ",",
stringsAsFactors = F)
> str(AirbnbNYCApril)
AirbnbNYCApril = na.omit(AirbnbNYCApril)
set.seed(1)
n = nrow(AirbnbNYCApril)
Train_indices = 1:round(0.75*n)
Test_indices = (round(0.75*n)+1):n
AirbnbNYCApril_shuffle = AirbnbNYCApril[sample(n),]
AirbnbNYCApril_Train = AirbnbNYCApril_shuffle[Train_indices,]
AirbnbNYCApril_Test = AirbnbNYCApril_shuffle[Test_indices,]
Train_labels = AirbnbNYCApril_Train$neighborhood
Test_labels = AirbnbNYCApril_Test$neighborhood
AirbnbNYCApril_Train$neighborhood = NULL
AirbnbNYCApril_Test$neighborhood = NULL
min_reviews = min(AirbnbNYCApril_Train$reviews)
max_reviews = max(AirbnbNYCApril_Train$reviews)
AirbnbNYCApril_Train$reviews = (AirbnbNYCApril_Train$reviews - min_reviews)/(max_reviews - min_reviews)
AirbnbNYCApril_Test$reviews = (AirbnbNYCApril_Test$reviews - min_reviews)/(max_reviews - min_reviews)
min_accommodates = min(AirbnbNYCApril_Train$accommodates)
max_accommodates = max(AirbnbNYCApril_Train$accommodates)
AirbnbNYCApril_Train$accommodates = (AirbnbNYCApril_Train$accommodates - min_accommodates)/(max_accommodates - min_accommodates)
min_price = min(AirbnbNYCApril_Train$price)
max_price = max(AirbnbNYCApril_Train$price)
AirbnbNYCApril_Train$price = (AirbnbNYCApril_Train$price - min_price)/(max_price - min_price)
AirbnbNYCApril_Test$price = (AirbnbNYCApril_Test$price - min_price)/(max_price - min_price)
min_lat = min(AirbnbNYCApril_Train$latitude)
max_lat = max(AirbnbNYCApril_Train$latitutde)
AirbnbNYCApril_Train$latitude = (AirbnbNYCApril_Train$latitude - min_lat)/(max_lat - min_lat)
AirbnbNYCApril_Test$latitude = (AirbnbNYCApril_Test$latitude - min_lat)/(max_lat - min_lat)
min_lon = min(AirbnbNYCApril_Train$longitude)
max_lon = max(AirbnbNYCApril_Train$longitude)
AirbnbNYCApril_Train$longitude = (AirbnbNYCApril_Train$longitude - min_lon)/(max_lon - min_lon)
AirbnbNYCApril_Test$longitude = (AirbnbNYCApril_Test$longitude - min_lon)/(max_lon - min_lon)
neighborhood_prediction = knn(train = AirbnbNYCApril_Train[,-1], test = AirbnbNYCApril_Test[,-1], cl = Train_labels, k=5)
confusion_matrix = table(Test_labels, neighborhood_prediction)
print(confusion_matrix)
To calculate the Red Edge Position Index, I need to find the wavelength value (column name) corresponding to the maximum derivative of reflectance in the red edge region from 690nm to 740nm. I have included a subset of my dataframe below, it contains the correct interval...
I have 640 rows (Sample) of 2151 measurements (values) plus a few catagoricals in the first columns (e.g. plantType and plantCondition). I need to find the column of the value corresponding to the maximum of the derivative of the values in the interval specified and return the wavelength value to the REPI column.
I am trying something like this but I do not know how to calculate the maximum of the derivative in the specified interval
# find the maximum of the derivative of the values in columns x690:x740
# attempt to find for single sample first
> which( colnames(spec.data)=="X690")
[1] 352
> which( colnames(spec.data)=="X740")
[1] 402
# I want to return the values of the differential but this doesn't work
> foo.vector <- diff(spec.data[1,352:402])
>> Error in r[i1] - r[-length(r):-(length(r) - lag + 1L)] : non-numeric argument to binary operator
This makes sense because I don't have the dt in dx/dt but I am not sure how to retrieve the position of the maximum value of the derivative of this interval. once I did I think I would
> spec.data$REPI <- which( colnames(spec.data) == max(foo.vector))
Then I think I would lapply this for each row?
Can anyone point me towards a solution for this?
Thank you...
subset of data from dput
> dput(spec.data[1:2, c(1:3, 7, 300:450)])
structure(list(Sample = c("JUMO_G1 P1T9 Leaf Clip00000.asd",
"JUMO_G1 P1T9 Leaf Clip00001.asd"), plantType = c("JUMO", "JUMO"
), plantCondition = c("G", "G"), REPI = c(NA_real_, NA_real_),
X638 = c(0.0611, 0.06114), X639 = c(0.0606, 0.06064), X640 = c(0.0601,
0.06012), X641 = c(0.0595, 0.05953), X642 = c(0.0589, 0.05893
), X643 = c(0.0584, 0.05834), X644 = c(0.0577, 0.05775),
X645 = c(0.05717, 0.05717), X646 = c(0.0566, 0.05664), X647 = c(0.0562,
0.05618), X648 = c(0.0557, 0.05573), X649 = c(0.0554, 0.05536
), X650 = c(0.0551, 0.05505), X651 = c(0.0547, 0.05475),
X652 = c(0.05448, 0.05447), X653 = c(0.0542, 0.05421), X654 = c(0.054,
0.05395), X655 = c(0.0536, 0.05357), X656 = c(0.0532, 0.05319
), X657 = c(0.0528, 0.05277), X658 = c(0.0523, 0.05229),
X659 = c(0.0518, 0.05176), X660 = c(0.05128, 0.05126), X661 = c(0.0508,
0.05077), X662 = c(0.0503, 0.05024), X663 = c(0.0498, 0.04978
), X664 = c(0.0494, 0.04936), X665 = c(0.049, 0.04897), X666 = c(0.04869,
0.04866), X667 = c(0.0484, 0.04838), X668 = c(0.0482, 0.04815
), X669 = c(0.048, 0.04797), X670 = c(0.0479, 0.04782), X671 = c(0.0478,
0.04775), X672 = c(0.0478, 0.04773), X673 = c(0.0478, 0.04773
), X674 = c(0.0478, 0.04776), X675 = c(0.0479, 0.04786),
X676 = c(0.0481, 0.04802), X677 = c(0.0483, 0.0482), X678 = c(0.0486,
0.04843), X679 = c(0.0489, 0.04873), X680 = c(0.04925, 0.04911
), X681 = c(0.0498, 0.04962), X682 = c(0.0504, 0.05026),
X683 = c(0.05122, 0.05103), X684 = c(0.0522, 0.052), X685 = c(0.0533,
0.05317), X686 = c(0.0548, 0.05458), X687 = c(0.05647, 0.05627
), X688 = c(0.0584, 0.05824), X689 = c(0.0608, 0.06057),
X690 = c(0.0634, 0.06326), X691 = c(0.0664, 0.06626), X692 = c(0.0698,
0.06958), X693 = c(0.0734, 0.07317), X694 = c(0.0773, 0.07701
), X695 = c(0.0814, 0.08109), X696 = c(0.0856, 0.0854), X697 = c(0.0901,
0.08989), X698 = c(0.0947, 0.09449), X699 = c(0.0994, 0.09917
), X700 = c(0.10417, 0.10395), X701 = c(0.10899, 0.10881),
X702 = c(0.11385, 0.11366), X703 = c(0.11871, 0.11854), X704 = c(0.12356,
0.12342), X705 = c(0.1284, 0.12829), X706 = c(0.13324, 0.13312
), X707 = c(0.13803, 0.13792), X708 = c(0.14281, 0.14273),
X709 = c(0.14763, 0.14755), X710 = c(0.15243, 0.15235), X711 = c(0.15718,
0.15713), X712 = c(0.16192, 0.16189), X713 = c(0.1667, 0.16663
), X714 = c(0.17143, 0.17137), X715 = c(0.17609, 0.17605),
X716 = c(0.18069, 0.18062), X717 = c(0.18528, 0.1852), X718 = c(0.18977,
0.18968), X719 = c(0.19417, 0.19406), X720 = c(0.19851, 0.19838
), X721 = c(0.20276, 0.20263), X722 = c(0.20686, 0.20671),
X723 = c(0.2108, 0.21063), X724 = c(0.21465, 0.21449), X725 = c(0.21837,
0.21819), X726 = c(0.22194, 0.22174), X727 = c(0.22534, 0.22515
), X728 = c(0.2286, 0.22838), X729 = c(0.23164, 0.23142),
X730 = c(0.23447, 0.23427), X731 = c(0.23719, 0.23696), X732 = c(0.23984,
0.23959), X733 = c(0.24229, 0.24203), X734 = c(0.24452, 0.24426
), X735 = c(0.24668, 0.24638), X736 = c(0.24867, 0.24839),
X737 = c(0.25053, 0.25028), X738 = c(0.25229, 0.25203), X739 = c(0.25382,
0.25359), X740 = c(0.25531, 0.25508), X741 = c(0.25672, 0.25646
), X742 = c(0.25791, 0.25766), X743 = c(0.25907, 0.25884),
X744 = c(0.26014, 0.25993), X745 = c(0.2611, 0.26089), X746 = c(0.26201,
0.26178), X747 = c(0.26278, 0.26257), X748 = c(0.26347, 0.26329
), X749 = c(0.26414, 0.26397), X750 = c(0.26475, 0.26459),
X751 = c(0.26525, 0.2651), X752 = c(0.26568, 0.26554), X753 = c(0.26614,
0.266), X754 = c(0.26652, 0.26639), X755 = c(0.26682, 0.26671
), X756 = c(0.2671, 0.26701), X757 = c(0.26743, 0.26734),
X758 = c(0.26767, 0.26758), X759 = c(0.26789, 0.26781), X760 = c(0.26814,
0.26808), X761 = c(0.2682, 0.26817), X762 = c(0.26835, 0.26831
), X763 = c(0.26856, 0.26851), X764 = c(0.26872, 0.26869),
X765 = c(0.26884, 0.26881), X766 = c(0.26892, 0.2689), X767 = c(0.26896,
0.26894), X768 = c(0.26898, 0.26896), X769 = c(0.2691, 0.26909
), X770 = c(0.2692, 0.2692), X771 = c(0.26921, 0.26921),
X772 = c(0.26923, 0.26926), X773 = c(0.26927, 0.26931), X774 = c(0.26935,
0.26939), X775 = c(0.26945, 0.26947), X776 = c(0.26946, 0.26949
), X777 = c(0.26948, 0.26952), X778 = c(0.26953, 0.26958),
X779 = c(0.26958, 0.26963), X780 = c(0.26965, 0.2697), X781 = c(0.2697,
0.26975), X782 = c(0.2697, 0.26977), X783 = c(0.26972, 0.26978
), X784 = c(0.26979, 0.26982), X785 = c(0.26987, 0.2699),
X786 = c(0.26991, 0.26998), X787 = c(0.26989, 0.26997), X788 = c(0.26991,
0.26998)), .Names = c("Sample", "plantType", "plantCondition",
"REPI", "X638", "X639", "X640", "X641", "X642", "X643", "X644",
"X645", "X646", "X647", "X648", "X649", "X650", "X651", "X652",
"X653", "X654", "X655", "X656", "X657", "X658", "X659", "X660",
"X661", "X662", "X663", "X664", "X665", "X666", "X667", "X668",
"X669", "X670", "X671", "X672", "X673", "X674", "X675", "X676",
"X677", "X678", "X679", "X680", "X681", "X682", "X683", "X684",
"X685", "X686", "X687", "X688", "X689", "X690", "X691", "X692",
"X693", "X694", "X695", "X696", "X697", "X698", "X699", "X700",
"X701", "X702", "X703", "X704", "X705", "X706", "X707", "X708",
"X709", "X710", "X711", "X712", "X713", "X714", "X715", "X716",
"X717", "X718", "X719", "X720", "X721", "X722", "X723", "X724",
"X725", "X726", "X727", "X728", "X729", "X730", "X731", "X732",
"X733", "X734", "X735", "X736", "X737", "X738", "X739", "X740",
"X741", "X742", "X743", "X744", "X745", "X746", "X747", "X748",
"X749", "X750", "X751", "X752", "X753", "X754", "X755", "X756",
"X757", "X758", "X759", "X760", "X761", "X762", "X763", "X764",
"X765", "X766", "X767", "X768", "X769", "X770", "X771", "X772",
"X773", "X774", "X775", "X776", "X777", "X778", "X779", "X780",
"X781", "X782", "X783", "X784", "X785", "X786", "X787", "X788"
), row.names = 1:2, class = "data.frame")
You can try this
spec.data$REPI <- apply(spec.data[,-(1:4)], 1, function(x) which.max(diff(x)))
Or you can try using dplyr and tidyr:
library(dplyr)
library(tidyr)
spec.data %>%
gather(key, value, -Sample, -plantType, - plantCondition, -REPI) %>%
group_by(Sample) %>%
summarise(which.max(diff(value)))
They both seem to give same results.