Make a data.frame from a modified subset of two data.frames - r
I have two data sets, related to each other through an index column. One data set contains data about individuals, while the other data set contains data about households of those individuals. The index column is a unique number given to a household. I am especially interested in married women and and their living conditions, and I would like to create a data set for married women only, that would contain the following information for each woman:
Her age;
Her husband’s age;
Her level of education;
Her husband’s level of education;
Number of children under the age of 15;
Whether or not she lives with her mother (or her mother lives with her);
Whether or not she lives with her father (or vice versa);
Whether or not she lives with her father-in-law (or vice versa);
Whether or not she lives with her mother-in-law (or vice versa);
I would like someone to help me write a dplyr script to create this data frame. Thank you very much in advance.
PS: here is an overview of both data sets:
> str(household)
tibble [15,970 × 792] (S3: tbl_df/tbl/data.frame)
$ N_ménage : num [1:15970] 1 2 3 4 5 6 7 8 9 10 ...
..- attr(*, "label")= chr "Numéro du ménage"
..- attr(*, "format.spss")= chr "F5.0"
$ coef_ménage : num [1:15970] 165 165 165 165 165 165 165 165 165 165 ...
..- attr(*, "label")= chr "Coefficient d'extrapolation des ménages"
..- attr(*, "format.spss")= chr "F8.0"
..- attr(*, "display_width")= int 9
$ coef_indiv : num [1:15970] 560 676 677 796 789 ...
..- attr(*, "label")= chr "Coefficient d'extrapolation des individues"
..- attr(*, "format.spss")= chr "F8.0"
..- attr(*, "display_width")= int 9
$ Milieu : dbl+lbl [1:15970] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
..# label : chr "MIilieu de résidence"
..# format.spss : chr "F1.0"
..# display_width: int 5
..# labels : Named num [1:2] 1 2
.. ..- attr(*, "names")= chr [1:2] "Urbain" "Rural"
$ Région_12 : dbl+lbl [1:15970] 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,...
..# label : chr "Régions"
..# format.spss : chr "F2.0"
..# display_width: int 11
..# labels : Named num [1:12] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:12] "Tanger-Tétouan-Al Hoceïma" "Oriental" "Fès-Meknès" "Rabat-Salé-Kénitra" ...
$ Taille_ménage : chr [1:15970] " 2" " 3" " 3" " 3" ...
..- attr(*, "format.spss")= chr "A2"
$ Taille_agregée : dbl+lbl [1:15970] 2, 3, 3, 3, 4, 5, 4, 6, 6, 6, 2, 3, 3, 3, 4, 5, 4, 6, 6, 6, 2, 2, 3, 4, 4, 4, 5, 5,...
..# label : chr "Taille du ménage"
..# format.spss : chr "F1.0"
..# display_width: int 14
..# labels : Named num [1:6] 1 2 3 4 5 6
.. ..- attr(*, "names")= chr [1:6] "1 personne" "2 personnes" "3 personnes" "4 personnes" ...
$ Pauvre : dbl+lbl [1:15970] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
..# format.spss: chr "F1.0"
..# labels : Named num [1:2] 0 1
.. ..- attr(*, "names")= chr [1:2] "Non pauvre" "Pauvre"
$ Vulnérable : dbl+lbl [1:15970] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
..# format.spss : chr "F1.0"
..# display_width: int 10
..# labels : Named num [1:2] 1 2
.. ..- attr(*, "names")= chr [1:2] "Vulnérable" "Non vulnérable"
$ Quintiles : dbl+lbl [1:15970] 5, 4, 4, 5, 5, 5, 5, 5, 1, 5, 5, 3, 5, 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5,...
..# label : chr "Quintiles_National"
..# format.spss : chr "F1.0"
..# display_width: int 11
..# labels : Named num [1:5] 1 2 3 4 5
.. ..- attr(*, "names")= chr [1:5] "<= 7149,77" "7149,78 - 9964,29" "9964,30 - 13639,80" "13639,81 - 20394,92" ...
$ Deciles : dbl+lbl [1:15970] 10, 7, 8, 10, 10, 9, 10, 10, 2, 9, 10, 6, 9, 10, 10, 9, 10, 10, 3, 9, 10,...
..# label : chr "Déciles_National"
..# format.spss : chr "F1.0"
..# display_width: int 10
..# labels : Named num [1:10] 1 2 3 4 5 6 7 8 9 10
.. ..- attr(*, "names")= chr [1:10] "<= 5522,56" "5522,57 - 7149,77" "7149,78 - 8529,14" "8529,15 - 9964,29" ...
$ Quintileurbain : dbl+lbl [1:15970] 5, 3, 4, 5, 5, 5, 5, 5, 1, 4, 5, 3, 4, 5, 5, 4, 5, 5, 1, 4, 5, 5, 5, 5, 5, 5, 5, 4,...
..# label : chr "Quintiles_urbain"
..# format.spss : chr "F1.0"
..# display_width: int 5
..# labels : Named num [1:5] 1 2 3 4 5
.. ..- attr(*, "names")= chr [1:5] "<= 8932,27" "8932,28 - 12240,48" "12240,49 - 16693,25" "16693,26 - 25063,80" ...
$ Decileurbain : dbl+lbl [1:15970] 10, 6, 8, 10, 10, 9, 10, 10, 2, 8, 10, 5, 8, 10, 10, 8, 10, 10, 2, 8, 9,...
..# label : chr "Deciles_urbain"
..# format.spss : chr "F2.0"
..# display_width: int 5
..# labels : Named num [1:10] 1 2 3 4 5 6 7 8 9 10
.. ..- attr(*, "names")= chr [1:10] "<= 7072,83" "7072,84 - 8932,27" "8932,28 - 10556,50" "10556,51 - 12240,48" ...
$ Quintilerural : dbl+lbl [1:15970] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
..# label : chr "Quintiles_rural"
..# format.spss : chr "F1.0"
..# display_width: int 5
..# labels : Named num [1:5] 1 2 3 4 5
.. ..- attr(*, "names")= chr [1:5] "<= 5590,11" "5590,12 - 7660,28" "7660,29 - 9909,80" "9909,81 - 13779,63" ...
$ Decilerural : dbl+lbl [1:15970] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
..# label : chr "Déciles_ rural"
..# format.spss : chr "F2.0"
..# display_width: int 5
..# labels : Named num [1:10] 1 2 3 4 5 6 7 8 9 10
.. ..- attr(*, "names")= chr [1:10] "<= 4376,60" "4376,61 - 5590,11" "5590,12 - 6606,89" "6606,90 - 7660,28" ...
$ Sexe_CM : dbl+lbl [1:15970] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1,...
..# label : chr "Sexe du CM"
..# format.spss : chr "F1.0"
..# display_width: int 3
..# labels : Named num [1:2] 1 2
.. ..- attr(*, "names")= chr [1:2] "Masculin" "Féminin"
$ Age_CM : chr [1:15970] "26" "36" "38" "34" ...
..- attr(*, "label")= chr "Age"
..- attr(*, "format.spss")= chr "A2"
..- attr(*, "display_width")= int 3
$ Age_quin_CM : dbl+lbl [1:15970] 6, 8, 8, 7, 9, 7, 9, 11, 10, 11, 6, 8, 8, 7, 9, 7, 9, 11, 10, 11, 14,...
..# label : chr "Age quinquenal du chef de ménage"
..# format.spss : chr "F2.0"
..# display_width: int 10
..# labels : Named num [1:16] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:16] "0-4" "5-9" "10-14" "15-19" ...
$ Lieunaissance_CM : num [1:15970] 1 1 2 1 2 1 1 2 2 2 ...
..- attr(*, "label")= chr "Lieu de naissance"
..- attr(*, "format.spss")= chr "F1.0"
..- attr(*, "display_width")= int 3
$ Etat_matrimonial_CM : dbl+lbl [1:15970] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2,...
..# label : chr "Etat matrimoniale du CM"
..# format.spss : chr "F1.0"
..# display_width: int 3
..# labels : Named num [1:4] 1 2 3 4
.. ..- attr(*, "names")= chr [1:4] "Célibataire" "Marié" "Divorcé" "Veuf"
$ Niveau_scolaire_agreg_CM : dbl+lbl [1:15970] 2, 4, 2, 5, 5, 4, 3, 4, 4, 0, 2, 4, 2, 5, 5, 4, 3, 4, 4, 0, 3, 0, 4, 4, 6, 4, 4, 4,...
..# label : chr "Plus haut niveau scolaire"
..# format.spss : chr "F1.0"
..# display_width: int 21
..# labels : Named num [1:7] 0 1 2 3 4 5 6
.. ..- attr(*, "names")= chr [1:7] "Sans niveau" "Préscolaire" "Primaire" "Secondaire collégial" ...
$ Diplôme_agregé_CM : dbl+lbl [1:15970] 2, 2, 1, 3, 3, 2, 2, 2, 2, 1, 2, 2, 1, 3, 3, 2, 2, 2, 2, 1, 2, 1, 3, 2, 1, 3, 3, 3,...
..# label : chr "Le plus haut Diplôme"
..# format.spss : chr "F1.0"
..# display_width: int 10
..# labels : Named num [1:3] 1 2 3
.. ..- attr(*, "names")= chr [1:3] "Sans diplôme" "Niveau moyen" "Niveau supérieur"
$ Type_activité_dominante_CM : dbl+lbl [1:15970] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 8, 1, 1, 1, 1, 1, 1,...
..# label : chr "Type d'activité habituelle durant les 12 dérniers mois"
..# format.spss : chr "F2.0"
..# display_width: int 15
..# labels : Named num [1:12] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:12] "Actif occupé" "Chômeur ayant déjà travaillé" "Chômeur n' ayant jamais travaillé" "Femme au foyer" ...
$ Profession_agreg_CM : dbl+lbl [1:15970] 6, 4, 5, 2, 4, 4, 2, 5, 5, 3, 6, 4, 5, 2, 4, 4, 2, 5, 5, 3, NA,...
..# label : chr "Catégories professionnelles dans l'activité habituelle durant les 12 dérniers mois"
..# format.spss : chr "F1.0"
..# display_width: int 16
..# labels : Named num [1:8] 0 1 2 3 4 5 6 9
.. ..- attr(*, "names")= chr [1:8] "Chômeur n'ayant jamais travaillés" "directeurs et cadres de direction, membres des corps législatifs,cadres supérieurs et membres des professions libérales" "Cadres moyens et employés de bureau" "Commerçants, intermédiaires commerciaux et financiers" ...
$ Secteur_activité_agreg_CM : dbl+lbl [1:15970] 4, 1, 3, 5, 2, 2, 5, 2, 3, 1, 4, 1, 3, 5, 2, 2, 5, 3, 3, 1, NA,...
..# label : chr "Secteur d'activité habituelle durant les 12 dérniers mois"
..# format.spss : chr "F1.0"
..# display_width: int 14
..# labels : Named num [1:7] 0 1 2 3 4 5 9
.. ..- attr(*, "names")= chr [1:7] "Chômeur n'ayant jamais travaillé" "Agriculture, forêt et pêhe" "Industrie" "Batiment et travaux publics" ...
$ Situation_profession_agreg_CM: dbl+lbl [1:15970] 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 1, 1, 2, 2, 9, 2, 2, 2,...
..# label : chr "Situation dans la profession habituelle durant les 12 dérniers mois"
..# format.spss: chr "F1.0"
..# labels : Named num [1:7] 0 1 2 3 4 5 9
.. ..- attr(*, "names")= chr [1:7] "Chômeur n'ayant jamais travaillé" "Inactif" "Salarié" "Indépendant" ...
$ DAM : num [1:15970] 78028 47075 60254 157237 288210 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 11
$ DAM_G1 : num [1:15970] 16340 16596 25243 69508 57982 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en alimentation"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G2 : num [1:15970] 1392 0 726 5600 8560 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en habillement"
..- attr(*, "format.spss")= chr "F8.3"
..- attr(*, "display_width")= int 12
$ DAM_G3 : num [1:15970] 10948 18604 12124 15904 43448 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en habitation et dépenses d'énergie"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G4 : num [1:15970] 8572 2634 3658 5572 16240 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en équipements ménagers"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G5 : num [1:15970] 8448 3568 4338 14784 11456 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en hygiène et soins médicaux"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G6 : num [1:15970] 2815 3448 4945 2988 68840 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en transport et communication"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G7 : num [1:15970] 12204 0 1200 3088 56104 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en loisirs, enseignement et culture"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G8 : num [1:15970] 15778 125 5000 30833 15800 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en autres biens et services"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_G9 : num [1:15970] 1530 2100 3020 8960 9780 995 1190 13800 434 1690 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en paiments fiscaux, transferts et remboursement des prêts"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 12
$ DAM_hygiene : num [1:15970] 1668 3508 1588 2412 5516 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en hygiène (G51)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 13
$ DAM_soins_medicaux : num [1:15970] 6780 60 2750 12372 5940 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en soins médicaux (G52+G53)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 20
$ DAM_Transport : num [1:15970] 1040 520 1820 0 65840 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en transport (G61+G62)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 15
$ DAM_Communication : num [1:15970] 1775 2928 3125 2988 3000 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en communication (G63)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 19
$ DAM_Loisirs : num [1:15970] 12204 0 1200 3088 35906 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en loisirs (G71+G72+G73+G74)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 13
$ DAM_Enseignement : num [1:15970] 0 0 0 0 20198 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en enseignement (G75+G76)"
..- attr(*, "format.spss")= chr "F8.1"
..- attr(*, "display_width")= int 18
$ DAM_Autres_dépenses : num [1:15970] 17308 2225 8020 39793 25580 ...
..- attr(*, "label")= chr "Dépense annuelle par ménage (en DH) en autres biens et services (G8+G9)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 22
$ DAM_G01 : num [1:15970] 3172 2418 4160 13910 9438 ...
..- attr(*, "label")= chr "Dépense annuelle du ménage du sous groupe 01 (céréales et produits à base de céréales)"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 13
[list output truncated]
and
> str(individual)
tibble [75,691 × 16] (S3: tbl_df/tbl/data.frame)
$ N_ménage : num [1:75691] 1 1 2 2 2 3 3 3 4 4 ...
..- attr(*, "label")= chr "Numéro du ménage"
..- attr(*, "format.spss")= chr "F8.2"
$ Nordre : num [1:75691] 2 1 3 2 1 3 2 1 3 2 ...
..- attr(*, "label")= chr "Numéro d'ordre de l'individu"
..- attr(*, "format.spss")= chr "F2.0"
..- attr(*, "display_width")= int 4
$ Coef_individu : num [1:75691] 168 392 191 178 307 187 183 307 191 178 ...
..- attr(*, "label")= chr "Coefficient individu"
..- attr(*, "format.spss")= chr "F8.2"
..- attr(*, "display_width")= int 4
$ Milieu : dbl+lbl [1:75691] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
..# label : chr "Milieu de résidence"
..# format.spss : chr "F1.0"
..# display_width: int 4
..# labels : Named num [1:2] 1 2
.. ..- attr(*, "names")= chr [1:2] "Urbain" "Rural"
$ Région_12 : dbl+lbl [1:75691] 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,...
..# label : chr "Régions"
..# format.spss : chr "F2.0"
..# display_width: int 11
..# labels : Named num [1:12] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:12] "Tanger-Tétouan-Al Hoceïma" "Oriental" "Fès-Meknès" "Rabat-Salé-Kénitra" ...
$ Sexe : dbl+lbl [1:75691] 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1,...
..# label : chr "Sexe"
..# format.spss : chr "F1.0"
..# display_width: int 4
..# labels : Named num [1:2] 1 2
.. ..- attr(*, "names")= chr [1:2] "Masculin" "Féminin"
$ Age : num [1:75691] 17 26 1 31 36 0 27 38 2 30 ...
..- attr(*, "label")= chr "Age"
..- attr(*, "format.spss")= chr "F2.0"
..- attr(*, "display_width")= int 4
$ Age_quin : dbl+lbl [1:75691] 4, 6, 1, 7, 8, 1, 6, 8, 1, 7, 7, 2, 3, 9, 9, 1, 2, 10, 6, 7, 1,...
..# label : chr "Age quinquenal de l'individu"
..# format.spss : chr "F8.2"
..# display_width: int 10
..# labels : Named num [1:16] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:16] "0-4" "5-9" "10-14" "15-19" ...
$ Liendeparenté : dbl+lbl [1:75691] 2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 1, 3, 3, 2, 1, 3, 3, 7, 2, 1, 3, 3, 2, 1, 3, 3, 3, 3,...
..# label : chr "Lien de parenté avec le chef de ménage"
..# format.spss : chr "F2.0"
..# display_width: int 4
..# labels : Named num [1:11] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:11] "Chef de ménage" "Epoux (se) du chef de ménage" "Fils (fille) du chef de ménage" "Epouse du fils /Epoux de la fille du chef de ménage" ...
$ Etat_matrimonial : dbl+lbl [1:75691] 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 3, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1,...
..# label : chr "Etat matrimonial"
..# format.spss : chr "F1.0"
..# display_width: int 15
..# labels : Named num [1:4] 1 2 3 4
.. ..- attr(*, "names")= chr [1:4] "Célibataire" "Marié(e)" "Divorcé(e)" "Veuf(ve)"
$ Niveau_scolaire_agreg : dbl+lbl [1:75691] 2, 2, NA, 3, 4, NA, 2, 2, NA, 5, 5, 2, 2, 5, 5, NA, 2, 2, 4, 4, NA,...
..# label : chr "Le plus haut niveau scolaire"
..# format.spss : chr "F8.2"
..# display_width: int 7
..# labels : Named num [1:7] 0 1 2 3 4 5 6
.. ..- attr(*, "names")= chr [1:7] "Sans niveau" "Préscolaire" "Primaire" "Secondaire collégial" ...
$ Diplôme_agregé : dbl+lbl [1:75691] 1, 2, 0, 2, 2, 0, 1, 1, 0, 3, 3, 0, 1, 3, 3, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 2,...
..# label : chr "Le plus haut diplôme"
..# format.spss : chr "F4.0"
..# display_width: int 10
..# labels : Named num [1:4] 0 1 2 3
.. ..- attr(*, "names")= chr [1:4] "Age moins de 10 ans" "Sans diplôme" "Niveau moyen" "Niveau supérieur"
$ Type_activité_dominante : dbl+lbl [1:75691] 4, 1, NA, 4, 1, NA, 4, 1, NA, 3, 1, NA, 5, 1, 1, NA, 5, 1, 4, 1, NA,...
..# label : chr "Type d'activité habituelle durant les 12 dérniers mois"
..# format.spss : chr "F2.0"
..# display_width: int 9
..# labels : Named num [1:12] 1 2 3 4 5 6 7 8 9 10 ...
.. ..- attr(*, "names")= chr [1:12] "Actif occupé" "Chômeur ayant déjà travaillé" "Chômeur n' ayant jamais travaillé" "Femme au foyer" ...
$ Profession_agreg : dbl+lbl [1:75691] NA, 6, NA, NA, 4, NA, NA, 5, NA, 0, 2, NA, NA, 2, 4, NA, NA, 4, NA, 4, NA,...
..# label : chr "Catégories professionnelles dans l'activité habituelle durant les 12 dérniers mois"
..# format.spss : chr "F1.0"
..# display_width: int 16
..# labels : Named num [1:8] 0 1 2 3 4 5 6 9
.. ..- attr(*, "names")= chr [1:8] "Chômeur n'ayant jamais travaillés" "directeurs et cadres de direction, membres des corps législatifs,cadres supérieurs et membres des professions libérales" "Cadres moyens et employés de bureau" "Commerçants, intermédiaires commerciaux et financiers" ...
$ Secteur_activité_agreg : dbl+lbl [1:75691] NA, 4, NA, NA, 1, NA, NA, 3, NA, 0, 5, NA, NA, 5, 2, NA, NA, 2, NA, 2, NA,...
..# label : chr "Secteur d'activité habituelle durant les 12 dérniers mois"
..# format.spss : chr "F1.0"
..# display_width: int 14
..# labels : Named num [1:7] 0 1 2 3 4 5 9
.. ..- attr(*, "names")= chr [1:7] "Chômeur n'ayant jamais travaillé" "Agriculture, forêt et pêhe" "Industrie" "Batiment et travaux publics" ...
$ Situation_profession_agreg: dbl+lbl [1:75691] 1, 3, NA, 1, 2, NA, 1, 3, NA, 0, 2, NA, 1, 2, 2, NA, 1, 2, 1, 2, NA,...
..# label : chr "Situation dans la profession habituelle durant les 12 dérniers mois"
..# format.spss: chr "F1.0"
..# labels : Named num [1:7] 0 1 2 3 4 5 9
.. ..- attr(*, "names")= chr [1:7] "Chômeur n'ayant jamais travaillé" "Inactif" "Salarié" "Indépendant" ...
>
You describe two basic steps (1) merge or join datasets (2) filter or mutate. See this test data:
authors <- data.frame(
## I(*) : use character columns of names to get sensible sort order
surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
nationality = c("US", "Australia", "US", "UK", "Australia"),
deceased = c("yes", rep("no", 4)))
authorN <- within(authors, { name <- surname; rm(surname) })
books <- data.frame(
name = I(c("Tukey", "Venables", "Tierney",
"Ripley", "Ripley", "McNeil", "R Core")),
title = c("Exploratory Data Analysis",
"Modern Applied Statistics ...",
"LISP-STAT",
"Spatial Statistics", "Stochastic Simulation",
"Interactive Data Analysis",
"An Introduction to R"),
other.author = c(NA, "Ripley", NA, NA, NA, NA,
"Venables & Smith"))
authors
> authors
surname nationality deceased
1 Tukey US yes
2 Venables Australia no
3 Tierney US no
4 Ripley UK no
5 McNeil Australia no
books
name title other.author
1 Tukey Exploratory Data Analysis <NA>
2 Venables Modern Applied Statistics ... Ripley
3 Tierney LISP-STAT <NA>
4 Ripley Spatial Statistics <NA>
5 Ripley Stochastic Simulation <NA>
6 McNeil Interactive Data Analysis <NA>
7 R Core An Introduction to R Venables & Smith
m1 <- merge(authors, books, by.x = "surname", by.y = "name")
m1
surname nationality deceased title other.author
1 McNeil Australia no Interactive Data Analysis <NA>
2 Ripley UK no Spatial Statistics <NA>
3 Ripley UK no Stochastic Simulation <NA>
4 Tierney US no LISP-STAT <NA>
5 Tukey US yes Exploratory Data Analysis <NA>
6 Venables Australia no Modern Applied Statistics ... Ripley
Say I only want authors from nationality UK.
m1 %>% filter(nationality == "UK")
surname nationality deceased title other.author
1 Ripley UK no Spatial Statistics <NA>
2 Ripley UK no Stochastic Simulation <NA>
Each of your variables will work with either a filter or a mutate to create new dummy variables.
Related
fisher pairwise test in R
I was able to run the fisher's exact tet in R ion my data: fisherTest <- fisher.test(table(dtQ1$IHD_other_healthy, dtQ1$ACE_FREQ_SUM), workspace = 2e8, simulate.p.value=TRUE) fisherTest and the results are singificant Fisher's Exact Test for Count Data with simulated p-value (based on 2000 replicates) data: table(dtQ1$IHD_other_healthy, dtQ1$ACE_FREQ_SUM) p-value = 0.0004998 alternative hypothesis: two.sided Now, i am trying to run the pairwise test: pairwise_fisher_test(table(dtQ1$IHD_other_healthy, dtQ1$ACE_FREQ_SUM), p.adjust.method = "bonferroni") but i get an error: Error in pairwise_fisher_test(table(dtQ1$IHD_other_healthy, dtQ1$ACE_FREQ_SUM), : A two-dimensionnal contingency table required. How can i create a two dimenssional contingency table from my data? str(dtQ1) Classes ‘data.table’ and 'data.frame': 1018 obs. of 2 variables: $ IHD_other_healthy: dbl+lbl [1:1018] 1, 1, 1, 2, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 2, 1... ..# format.spss : chr "F8.2" ..# display_width: int 19 ..# labels : Named num 0 1 2 .. ..- attr(*, "names")= chr [1:3] "no illness" "other than IHD" "IHD" $ ACE_FREQ_SUM : num 3 0 1 0 1 1 0 2 1 1 ... ..- attr(*, "format.spss")= chr "F8.2" ..- attr(*, "display_width")= int 14 - attr(*, ".internal.selfref")=<externalptr>
How I can view rows and columns of 'Adult' Dataset in R [closed]
Closed. This question needs details or clarity. It is not currently accepting answers. Want to improve this question? Add details and clarify the problem by editing this post. Closed 4 years ago. Improve this question How I can view rows and columns of 'Adult' Dataset in R? I just started learning R. Any help is appreciated.Pls refer to the screenhotenter image description here
First, start by running str to see the structure of your dataset. str(Adult) #Formal class 'transactions' [package "arules"] with 3 slots # ..# data :Formal class 'ngCMatrix' [package "Matrix"] with #5 slots # .. .. ..# i : int [1:612200] 1 10 25 32 35 50 59 61 63 65 #... # .. .. ..# p : int [1:48843] 0 13 26 39 52 65 78 91 104 117 #... # .. .. ..# Dim : int [1:2] 115 48842 # .. .. ..# Dimnames:List of 2 # .. .. .. ..$ : NULL # .. .. .. ..$ : NULL # .. .. ..# factors : list() # ..# itemInfo :'data.frame': 115 obs. of 3 variables: # .. ..$ labels : chr [1:115] "age=Young" "age=Middle-aged" #"age=Senior" "age=Old" ... # .. ..$ variables: Factor w/ 13 levels "age","capital-gain",..: 1 #1 1 1 13 13 13 13 13 13 ... # .. ..$ levels : Factor w/ 112 levels "10th","11th",..: 111 63 #92 69 30 54 65 82 90 91 ... # ..# itemsetInfo:'data.frame': 48842 obs. of 1 variable: # .. ..$ transactionID: chr [1:48842] "1" "2" "3" "4" ... This tells you that Adult is an S4 object with three slots, data, itemInfo and itemsetInfo. The slot data is a sparse matrix created by package Matrix; The slot itemInfo is a data.frame; The slot itemsetInfo is also a data.frame. S4 objects' slots are accessed with operator #. In order to see what is in those slots, run Adult#data Adult#itemInfo Adult#itemsetInfo In the case of the two dataframes, you might prefer to run head(Adult#itemInfo) head(Adult#itemsetInfo) since they have 115 and 48842 observations, respectively and don't fit in a screen display.
To get the text output shown in your example you can use this: cat(dim(mtcars)[1], "transactions (rows)\n", dim(mtcars)[2], "items (cols)") #32 transactions (rows) # 11 items (cols) Change mtcars with Adult(or any data.frame). cat lets you print out to the console, and dim() gets you rows and columns of the data. Similarly to str from base R, you can use glimpse from dplyr package: install.packages("dplyr") # run this the first time to install the package dplyr::glimpse(mtcars) # Observations: 32 # Variables: 11 # $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32... # $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4 # $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472.0,... # $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 1... # $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3.00, 3.23, 4.... # $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.440, 3.440, 4.070, 3.730, 3.780, 5.250,... # $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18.30, 18.90, 17.40, 17.60, 18.00, 17.98,... # $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1 # $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 # $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4 # $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2 You have the number of Observations (rows) and Variables (columns), and each variables listed with it's format type, and values.
Replacing all negative values conditional on the attributes of the variable independent of type
I have a very large mixed dataset (character variables, numerical variables, factors), in which negative values often represent missing values, see Scales, but not always, see Profit : Country Ccode Year Profit Scale ID Happiness_d Power_d ID_d <chr> <fcr> <dbl> <dbl> <labelled> <dbl> <dbl> <dbl> <dbl> 1 France FR 2000 1000 NA 1 40000. 160000. 1.67 2 France FR 2001 -1200 1 1 80000. 320000. 1.67 3 France FR 2000 1400 0 2 40000. 160000. 1.67 4 France FR 2001 1600 3 2 80000. 320000. 1.67 5 UK UK 2000 -1000 -9 3 40000. 160000. 1.67 6 UK UK 2001 1000 2 3 80000. 320000. 1.67 7 UK UK 2000 1000 4 4 40000. 160000. 1.67 8 UK UK 2001 1000 0 4 80000. 320000. 1.67 I wanted to replace all negative values with NA's using: df[df< 0] <- NA The issue is, although it is intended to remove negative values representing NA's such as in Scale, it would in the example dataset also remove negative numbers in Profit, which are obviously not NA. As a result I would like to make the result conditional on the range of the variable. The structure the Scale variable is as follows: Class 'labelled' atomic [1:135894] NA NA 2 NA NA NA NA NA NA NA ... ..- attr(*, "label")= chr "Do You Use Technology Licensed From A Foreign-Owned Company?" ..- attr(*, "format.stata")= chr "%24.0g" ..- attr(*, "labels")= Named num [1:3] -9 1 2 .. ..- attr(*, "names")= chr [1:3] "Don't Know (Spontaneous)" "Yes" "No" > names(New_Comprehensive_June_25_2018$e6) I have figured out that with the haven library link you can get out the factor levels from; ..- attr(*, "labels")= Named num [1:3] -9 1 2 with get_values(). get_values(df$Scale) [1] -9 1 2 Would it be possible to have the solution only remove these negative factors instead of other negative values? ..- attr(*, "labels")= Named num [1:3] -9 1 2 To be clear, the desired output would be: Country Ccode Year Profit Scale ID Happiness_d Power_d ID_d <chr> <fcr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 France FR 2000 1000 NA 1 40000. 160000. 1.67 2 France FR 2001 -1200 1 1 80000. 320000. 1.67 3 France FR 2000 1400 0 2 40000. 160000. 1.67 4 France FR 2001 1600 3 2 80000. 320000. 1.67 5 UK UK 2000 -1000 **NA** 3 40000. 160000. 1.67 6 UK UK 2001 1000 2 3 80000. 320000. 1.67 7 UK UK 2000 1000 4 4 40000. 160000. 1.67 8 UK UK 2001 1000 0 4 80000. 320000. 1.67 dput example (please note that the varable Scale does not actually exist: h7a = structure(c(1, -9, 2, 3, 1, 3, -9, 2, 3, 1, 2, 1, 3, 3, 2, 2, 1, 2, 2, 1, 2, -9, 1, 4, 3, 3, 1, 1, 1, 1, 3, 4, 3, 1, 2, 2, 1, 2, 1, NA, 2, 1, 2, 4, 3, 1, 3, 4, 4, 3, 2, 4, 1, 1, 2, 3, 2, 2, 2, 2, 1, 2, 1, 3, 4, 3, 1, 3, 1, 2, 3, 3, 3, 1, 1, 4, -9, 4, 3, 1, 2, 3, 1, -9, 1, 4, 1, 3, 1, -9, 1, 1, 1, 1, 2, 3, 1, 3, 1, 2, 1, 2, 3, 4, 3, 3, 2, 4, 3, 3, 1, -9, 1, -7, 3, 1, 1, 2, 1, 2, -7, 2, 3, 1, 3, -7, 3, 4, 3, 2, 3, NA, 3, 3, 3, 1, 1, 2, 2, -9, 3, 1, 1, 2, 1, 1, -9, -9, -9, 2, -9, 1, 2, 3, 2, 3, 3, 3, 3, 1, 2, -9, 4, 3, 3, 1, 2, 2, 4, 4, 4, 3, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, -9, 4, 4, 4, 2, 1, -7, 2, 2, 1, 1, 2, 1, 2, 2, 4, 2, 3, -7, 3, 3, 3, 4, 2, 4, 2, NA, 1, 3, 1, 2, 3, 4, 3, -9, 3, 3, 4, 3, 2, 4, 1, 3, 1, 3, 4, 3, 1, 3, 3, 3, NA, 1, 3, 3, -7, 1, 1, 3, 2, 1, 4), label = "The Court System Is Fair, Impartial And Uncorrupted", format.stata = "%24.0g", class = "labelled", labels = structure(c(NA, NA, 1, 2, 3, 4), .Names = c("Don't Know (Spontaneous)", "Does Not Apply", "Strongly disagree", "Tend to disagree", "Tend to agree", "Strongly agree"))),
Here's a simple example that you can apply to your dataset. # example data df = data.frame(a = c("A","A","B"), x = c(1,2,3), y = c(NA,3,-7), z = c(200,300,-400)) library(dplyr) df %>% mutate_if(is.numeric, ~ifelse(between(min(., na.rm = T), -9, -1) & .<0, NA, .)) # a x y z # 1 A 1 NA 200 # 2 A 2 3 300 # 3 B 3 NA -400 You can update (mutate) a column only if that column is numeric and the minimum value of that column is between -9 and -1. And the update is to replace negative values with NA. This assumes you have only integer values. If not you can use between(..., -9, 0).
Base-R Solution: # Find negative value from 3rd column onwards, replace it with NA # and bind with Country,Ccode and Profit columns. cbind(df[,c(1,2,4)],do.call(cbind, lapply(df[,-c(1,2,4)], function(x) ifelse(x<0,NA,x)))) Output: Country Ccode Profit Year Scale ID Happiness_d Power_d ID_d 1 France FR 1000 2000 NA 1 40000 160000 1.67 2 France FR -1200 2001 1 1 80000 320000 1.67 3 France FR 1400 2000 0 2 40000 160000 1.67 4 France FR 1600 2001 3 2 80000 320000 1.67 5 UK UK -1000 2000 NA 3 40000 160000 1.67 6 UK UK 1000 2001 2 3 80000 320000 1.67 7 UK UK 1000 2000 4 4 40000 160000 1.67 8 UK UK 1000 2001 0 4 80000 320000 1.67
T.test between two data sets - row by row
I think that title explains everything. I would like to do t.test between two data sets. I would like to compare row by row. Let's use mtcars for that and slightly modified mtcars_mod. structure(list(mpg = c(21, 25, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 24.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 36.4, 31.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 29.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 7, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 6, 460, 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 15, 97, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11 ), wt = c(2.62, 2.875, 2.32, 7, 3.44, 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 6, 2.78), qsec = c(16.46, 17.02, 18.61, 114, 17.02, 20.22, 15.84, 12, 22.9, 18.3, 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 32, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6 ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", "Volvo 142E"), class = "data.frame" I tried to do it in the loop but I don't know how to store the results. I get only the last value... for(z in 1:nrow(mtcars)){ vec_1 <- mtcars[z,1:7] vec_2 <- mtcars_mod[z,1:7] vec_results <- unlist(t.test(vec_1, vec_2)[3]) } Can someone show me how to correct my loop ? I would prefer to use apply function but still would like to know what I did wrong with my loop....
(I'll just use my own modified mtcarsmod ... sorry, yours is missing at least one paren, and -- though I know exactly what happened -- it is ugly in that SO window!) set.seed(42) mtcarsmod <- as.data.frame(lapply(mtcars, jitter, factor = 5)) head(mtcarsmod) # mpg cyl disp hp drat wt qsec vs am gear carb # 1 21.1 5.55 160 109.7 3.89 2.62 16.5 -0.373 0.221 3.68 3.861 # 2 21.1 6.74 160 110.0 3.90 2.88 17.0 0.641 1.080 3.06 3.788 # 3 22.8 2.02 108 93.5 3.86 2.32 18.6 0.614 1.142 4.73 0.284 # 4 21.5 7.33 258 110.2 3.08 3.21 19.4 0.371 0.238 3.46 0.560 # 5 18.7 6.03 360 175.3 3.15 3.44 17.0 -0.903 0.430 2.63 2.130 # 6 18.1 4.83 225 104.4 2.77 3.46 20.2 0.491 -0.753 2.77 1.870 Instead of a loop you should probably use sapply or one of its kin. sapply(seq_len(nrow(mtcars)), function(r) unlist(t.test(mtcars[r,1:7], mtcarsmod[r,1:7])[3])) # p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value # 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 # p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value # 0.995 0.999 0.999 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 # p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value # 0.997 0.998 1.000 0.990 0.997 0.999 0.999 0.995 0.997 0.995 One advantage to using lapply might be using more of the test results. For instance: ret <- lapply(seq_len(nrow(mtcars)), function(r) t.test(mtcars[r,1:7], mtcarsmod[r,1:7])) str(head(ret, n = 2)) # List of 2 # $ :List of 9 # ..$ statistic : Named num 0.0024 # .. ..- attr(*, "names")= chr "t" # ..$ parameter : Named num 12 # .. ..- attr(*, "names")= chr "df" # ..$ p.value : num 0.998 # ..$ conf.int : atomic [1:2] -73.4 73.5 # .. ..- attr(*, "conf.level")= num 0.95 # ..$ estimate : Named num [1:2] 45.7 45.6 # .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y" # ..$ null.value : Named num 0 # .. ..- attr(*, "names")= chr "difference in means" # ..$ alternative: chr "two.sided" # ..$ method : chr "Welch Two Sample t-test" # ..$ data.name : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]" # ..- attr(*, "class")= chr "htest" # $ :List of 9 # ..$ statistic : Named num -0.00311 # .. ..- attr(*, "names")= chr "t" # ..$ parameter : Named num 12 # .. ..- attr(*, "names")= chr "df" # ..$ p.value : num 0.998 # ..$ conf.int : atomic [1:2] -73.4 73.2 # .. ..- attr(*, "conf.level")= num 0.95 # ..$ estimate : Named num [1:2] 45.8 45.9 # .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y" # ..$ null.value : Named num 0 # .. ..- attr(*, "names")= chr "difference in means" # ..$ alternative: chr "two.sided" # ..$ method : chr "Welch Two Sample t-test" # ..$ data.name : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]" # ..- attr(*, "class")= chr "htest" ret[[1]]$p.value # [1] 0.998 And you can still easily get a vector of p-values from the results: sapply(ret, `[[`, "p.value") # [1] 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 0.995 0.999 0.999 # [15] 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 0.997 0.998 1.000 0.990 0.997 0.999 # [29] 0.999 0.995 0.997 0.995
how to add names to each object of a list
I have a list like below d1 <- data.frame(y1=c(1,2,3),y2=c(4,5,6)) d2 <- data.frame(y1=c(3,2,1),y2=c(6,5,4)) my.list <- list(d1, d2) str(my.list) List of 2 $ :'data.frame': 3 obs. of 2 variables: ..$ y1: num [1:3] 1 2 3 ..$ y2: num [1:3] 4 5 6 $ :'data.frame': 3 obs. of 2 variables: ..$ y1: num [1:3] 3 2 1 ..$ y2: num [1:3] 6 5 4 what i want to have is to add names as i wish to each object of this list for example $myFirst :'data.frame': 3 obs. of 2 variables: ..$ y1: num [1:3] 1 2 3 ..$ y2: num [1:3] 4 5 6 $mySecond :'data.frame': 3 obs. of 2 variables: ..$ y1: num [1:3] 3 2 1 ..$ y2: num [1:3] 6 5 4 I tired to do it by myNam<-c("myFirst","mySecond") names(myNam) <- sapply(my.list,paste) where is the problem ? A new data which the function does not work on it df<- structure(list(A = structure(list(breaks = c(-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4), counts = c(1L, 0L, 1L, 5L, 9L, 38L, 56L, 105L, 529L, 2858L, 17L, 2L, 0L, 2L), density = c(0.000276014352746343, 0, 0.000276014352746343, 0.00138007176373171, 0.00248412917471709, 0.010488545404361, 0.0154568037537952, 0.028981507038366, 0.146011592602815, 0.788849020149048, 0.00469224399668783, 0.000552028705492686, 0, 0.000552028705492686), mids = c(-9.5, -8.5, -7.5, -6.5, -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5), xname = "x", equidist = TRUE), .Names = c("breaks", "counts", "density", "mids", "xname", "equidist"), class = "histogram"), B = structure(list( breaks = c(-7, -6, -5, -4, -3, -2, -1, 0), counts = c(2L, 0L, 6L, 2L, 2L, 1L, 3L), density = c(0.125, 0, 0.375, 0.125, 0.125, 0.0625, 0.1875), mids = c(-6.5, -5.5, -4.5, -3.5, -2.5, -1.5, -0.5), xname = "x", equidist = TRUE), .Names = c("breaks", "counts", "density", "mids", "xname", "equidist"), class = "histogram"), C = structure(list(breaks = c(-7, -6, -5, -4, -3, -2, -1, 0, 1), counts = c(2L, 2L, 4L, 5L, 14L, 22L, 110L, 3L), density = c(0.0123456790123457, 0.0123456790123457, 0.0246913580246914, 0.0308641975308642, 0.0864197530864197, 0.135802469135802, 0.679012345679012, 0.0185185185185185), mids = c(-6.5, -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5), xname = "x", equidist = TRUE), .Names = c("breaks", "counts", "density", "mids", "xname", "equidist"), class = "histogram")), .Names = c("A", "B", "C"))
We need to assign the 'myNam' to the names of my.list names(my.list) <- myNam str(my.list) #List of 2 #$ myFirst :'data.frame': 3 obs. of 2 variables: # ..$ y1: num [1:3] 1 2 3 # ..$ y2: num [1:3] 4 5 6 #$ mySecond:'data.frame': 3 obs. of 2 variables: # ..$ y1: num [1:3] 3 2 1 # ..$ y2: num [1:3] 6 5 4 or with setNames setNames(my.list, myNam) The OP's code sapply(my.list,paste) is looping through the list elements and pasteing the elements of the columns to a single string.