How to make a random strata sample in R? - r
I have a data.frame calls "per" who has three variables: nrodocumento, cod_jer(42 groups) and grupo_fict(8 groups). I would like to have a random sample (data.frame)for each cod_jer and inside each grupo_fict.
> dput(head(per))
structure(list(nrodocumento = c(49574917L, 54692750L, 54731807L,
57364176L, 57364198L, 46867674L), cod_jer = c(1146L, 32L, 0L,
0L, 0L, 0L), grupo_fict = c(3L, 1L, 8L, 1L, 1L, 1L)), .Names =
c("nrodocumento",
"cod_jer", "grupo_fict"), row.names = c(NA, 6L), class = "data.frame")
> head(per,n=100)
nrodocumento cod_jer grupo_fict
1 49574917 1146 3
2 54692750 32 1
3 54731807 0 8
4 57364176 0 1
5 57364198 0 1
6 46867674 0 1
7 46867668 0 1
8 57364201 0 1
9 53767871 0 1
10 55339012 0 1
11 49204318 0 8
12 53743017 0 1
13 47622958 0 1
14 49019862 0 1
15 50167428 0 2
16 48783260 0 4
17 52020945 433 5
18 54486680 236 4
19 51402916 0 4
20 48543242 0 2
21 54671603 0 1
22 50644599 0 8
23 53293608 0 1
24 52742799 0 4
25 49815210 0 8
26 50967719 236 3
27 51938997 0 8
28 50057188 324 3
29 52754706 0 6
30 55322102 0 3
31 53040748 0 1
32 50321642 0 5
33 51621354 236 8
34 49611806 0 7
35 53347667 0 8
36 52462498 0 3
37 54158570 0 8
38 54034849 0 8
39 52507674 321 3
40 50218598 317 7
41 45078442 432 7
42 51491066 0 8
43 53278953 0 2
44 52661658 0 2
45 50092873 236 3
46 50308064 0 7
47 51941635 0 7
48 53527966 0 1
49 49614579 0 1
50 49450678 318 8
51 52953427 1146 7
52 52133221 0 8
53 53363128 0 7
54 52819643 0 1
55 47516589 0 1
56 52563137 0 3
57 49511296 0 7
58 54154013 0 2
59 50822420 1349 4
60 50822408 1349 4
61 50822414 1349 6
62 52339683 0 1
63 50026113 0 7
64 47328586 0 7
65 56041961 0 7
66 47756955 432 8
67 53158397 0 7
68 53151167 0 7
69 54710039 0 3
70 54408844 114 4
71 46286323 114 4
72 50310877 0 1
73 50929135 0 7
74 49817218 0 1
75 53604540 0 8
76 52812736 1147 1
77 53726314 1147 1
78 50835936 0 8
79 55429334 0 1
80 48421020 329 8
81 49800217 0 3
82 52818263 0 1
83 45884978 0 1
84 50203385 0 1
85 53433610 0 2
86 54515938 0 1
87 50263935 0 8
88 52439152 0 2
89 48424129 236 3
90 47031563 0 8
91 53577610 11 1
92 48759083 11 1
93 50344731 432 1
94 51164013 0 3
95 52026977 163 7
96 50965482 0 3
97 45947594 433 8
98 53357234 0 7
99 48367529 0 8
100 54286153 0 3
> table(per$cod_jer,per$grupo_fict)
1 2 3 4 5 6 7 8
0 3990 2296 1743 1453 356 250 2031 2051
11 149 85 29 34 14 6 34 25
13 2 4 1 0 0 0 1 1
14 3 1 0 0 0 0 0 1
32 37 12 13 10 3 1 23 13
101 19 12 6 5 3 0 6 12
102 2 0 0 0 0 0 0 0
103 11 10 3 3 0 1 3 0
104 17 8 1 7 2 1 7 9
105 11 12 3 3 3 0 6 10
106 147 57 30 29 8 1 43 42
107 33 37 5 9 3 2 8 9
108 6 10 2 3 0 2 3 4
109 44 37 11 9 6 2 14 14
111 112 81 26 28 8 3 22 18
112 21 8 4 8 2 0 3 2
113 94 61 14 16 4 1 17 24
114 60 52 10 14 9 5 8 20
115 72 24 21 13 5 1 11 16
125 5 4 1 0 1 0 0 1
138 15 5 2 2 1 0 2 0
163 50 35 26 26 7 12 43 41
234 51 43 31 32 10 7 49 53
236 78 29 46 35 7 7 39 37
317 44 28 21 13 7 2 28 21
318 20 27 5 10 4 3 12 14
319 45 21 25 19 1 2 26 21
321 6 4 9 3 0 3 8 1
322 43 30 24 16 5 3 16 34
323 30 14 25 15 3 4 24 22
324 59 29 31 27 8 5 28 27
325 15 12 6 5 1 2 8 11
326 18 12 17 13 4 2 20 15
327 45 28 23 26 7 6 25 40
328 52 49 33 32 5 9 31 35
329 42 36 26 20 2 3 23 30
431 6 2 4 1 2 0 2 6
432 39 18 27 24 5 1 28 34
433 139 92 90 89 18 13 61 66
1146 97 49 26 14 7 5 24 29
1147 56 33 26 25 9 0 19 20
1349 15 9 11 10 0 1 10 3
1544 62 33 20 32 4 3 25 43
1545 37 13 22 14 1 3 14 31
1848 16 27 11 15 3 0 10 12
For other hand I have a data.frame wiht vacancies, I mean, the size of each sample I need inside each gruop.
> dput(head(vacantes))
structure(list(cod_jer = c(101L, 316L, 325L, 1349L, 1544L, 102L
), vacantes = c(132, 180, 54, 63, 45, 0), vac1 = c(27, 36, 11,
13, 9, 0), vac2 = c(27, 36, 11, 13, 9, 0), vac3 = c(24, 33, 10,
12, 9, 0), vac4 = c(24, 33, 10, 12, 9, 0), vac5 = c(8, 11, 4,
4, 3, 0), vac6 = c(8, 11, 4, 4, 3, 0), vac7 = c(7, 10, 3, 3,
2, 0), vac8 = c(7, 10, 3, 3, 2, 0)), .Names = c("cod_jer", "vacantes",
"vac1", "vac2", "vac3", "vac4", "vac5", "vac6", "vac7", "vac8"
), row.names = c(NA, 6L), class = "data.frame")
> vacantes
cod_jer vacantes vac1 vac2 vac3 vac4 vac5 vac6 vac7 vac8
1 101 132 27 27 24 24 8 8 7 7
2 316 180 36 36 33 33 11 11 10 10
3 325 54 11 11 10 10 4 4 3 3
4 1349 63 13 13 12 12 4 4 3 3
5 1544 45 9 9 9 9 3 3 2 2
6 102 0 0 0 0 0 0 0 0 0
7 103 0 0 0 0 0 0 0 0 0
8 104 0 0 0 0 0 0 0 0 0
9 105 0 0 0 0 0 0 0 0 0
10 106 0 0 0 0 0 0 0 0 0
11 107 0 0 0 0 0 0 0 0 0
12 108 0 0 0 0 0 0 0 0 0
13 109 0 0 0 0 0 0 0 0 0
14 110 0 0 0 0 0 0 0 0 0
15 111 0 0 0 0 0 0 0 0 0
16 112 0 0 0 0 0 0 0 0 0
17 113 0 0 0 0 0 0 0 0 0
18 114 0 0 0 0 0 0 0 0 0
19 115 0 0 0 0 0 0 0 0 0
20 137 0 0 0 0 0 0 0 0 0
21 138 0 0 0 0 0 0 0 0 0
22 139 0 0 0 0 0 0 0 0 0
23 140 0 0 0 0 0 0 0 0 0
24 234 0 0 0 0 0 0 0 0 0
25 236 0 0 0 0 0 0 0 0 0
26 317 0 0 0 0 0 0 0 0 0
27 318 0 0 0 0 0 0 0 0 0
28 319 0 0 0 0 0 0 0 0 0
29 320 0 0 0 0 0 0 0 0 0
30 321 0 0 0 0 0 0 0 0 0
31 322 0 0 0 0 0 0 0 0 0
32 323 0 0 0 0 0 0 0 0 0
33 324 0 0 0 0 0 0 0 0 0
34 326 0 0 0 0 0 0 0 0 0
35 327 0 0 0 0 0 0 0 0 0
36 328 0 0 0 0 0 0 0 0 0
37 329 0 0 0 0 0 0 0 0 0
38 431 0 0 0 0 0 0 0 0 0
39 432 0 0 0 0 0 0 0 0 0
40 433 0 0 0 0 0 0 0 0 0
41 1146 0 0 0 0 0 0 0 0 0
42 1147 0 0 0 0 0 0 0 0 0
43 1545 0 0 0 0 0 0 0 0 0
44 1630 0 0 0 0 0 0 0 0 0
45 1848 0 0 0 0 0 0 0 0 0
I would like to make a sample strata in each of this combination groups: cod_jer and grupo_fict, in case of vacancies are 0, the sample size will be 0.
I was trying this:
size=subset(vacantes,select=c(vac1,vac2,vac3,vac4,vac5,vac6,vac7,vac8))
size=as.matrix(size)
size=as.vector(size)
for(i in 1:length(size)) {
if (size[i] > 0 ) {
s=strata(per,c("cod_jer","grupo_fict"),size=size,
method="srswor")
} else {
s="0"
}}
But I cant get it work :(
Any suugestion?
Thanks!
Related
Calculate weighted mean from matrix in R
I have a matrix that looks like the following. For rows 1:23, I would like to calculate the weighted mean, where the data in rows 1:23 are the weights and row 24 is the data. 1 107 33 41 22 12 4 122 44 297 123 51 16 7 9 1 1 0 10 5 2 2 1 0 3 4 6 12 3 3 0 1 1 0 0 0 11 1 3 1 0 0 0 4 2 8 3 4 0 0 0 0 0 0 12 2 1 1 0 0 0 2 1 5 6 3 1 0 0 0 0 0 13 1 0 1 0 0 0 3 1 3 5 2 2 0 1 0 0 0 14 3 0 0 0 0 0 3 1 2 3 0 1 0 0 0 0 0 15 0 0 0 0 0 0 2 0 0 1 0 1 0 0 0 0 0 16 0 0 0 0 1 0 0 0 2 0 0 0 0 0 0 0 0 17 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 18 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 19 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 80 27 37 5 6 4 97 48 242 125 44 27 7 8 8 0 2 20 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 21 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 22 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 23 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 3 47 12 33 12 6 1 63 42 200 96 45 19 6 6 9 2 0 4 45 14 21 9 4 2 54 26 130 71 36 17 8 5 1 0 2 5 42 10 14 6 3 2 45 19 89 45 26 7 4 8 2 1 0 6 17 3 12 5 2 0 18 21 51 41 19 15 5 1 1 0 0 7 16 2 6 0 0 1 14 9 37 23 17 7 3 0 3 0 0 8 9 4 4 2 1 0 7 9 30 15 8 3 3 1 1 0 1 9 12 2 3 1 1 1 6 5 14 12 5 1 2 0 0 1 0 24 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 As an example using the top two rows, there would have an additional column at the end indicated the weighted mean. 1 107 33 41 22 12 4 122 44 297 123 51 16 7 9 1 1 0 6.391011 10 5 2 2 1 0 3 4 6 12 3 3 0 1 1 0 0 0 6.232558 I'm a little new to coding so I wasn't too sure how to do it - any advice would be appreciated!
You can do: apply(df[-nrow(df), ], 1, function(row) weighted.mean(df[nrow(df), ], row))
I'm assuming your first columns is some kind of index and not used for the weighted mean (and the data is stored in matr_dat): apply(matr_dat[-nrow(matr_dat), -1], 1, function(row) weighted.mean(matr_dat[nrow(matr_dat), -1], row)) Using apply and setting the margin to 1, the function defined in the third argument of apply to each row of the data; to calculate the weighted mean, you can use weighted.mean and set the weights to the values of the row.
Could any one explain me about below error in the R germinationmetrics Package?
I would like to compute cumulative germination counts and Compute germination indices and Plot FPHF curves My data structure is the following: concentration temp rep Day01 Day02 Day03 Day04 Day05 Day06 Day07 1 0.0 10 1 0 0 0 0 0 0 0 2 0.5 10 1 0 0 0 0 6 6 6 3 0.3 10 1 0 0 0 0 8 8 8 4 0.1 10 1 0 0 0 0 6 6 6 5 0.0 10 2 0 0 0 0 0 0 0 6 0.5 10 2 0 0 0 0 9 9 9 7 0.3 10 2 0 0 0 0 8 8 8 8 0.1 10 2 0 0 0 0 6 6 6 9 0.0 10 3 0 0 0 0 0 0 0 10 0.5 10 3 0 0 0 0 5 5 5 11 0.3 10 3 0 0 0 0 8 8 8 12 0.1 10 3 0 0 0 0 2 2 2 13 0.0 20 1 0 0 0 0 0 7 7 14 0.5 20 1 0 0 0 0 17 17 17 15 0.3 20 1 0 0 0 0 21 21 21 16 0.1 20 1 0 0 0 0 20 20 20 17 0.0 20 2 0 0 0 0 0 7 10 18 0.5 20 2 0 0 0 0 13 13 13 19 0.3 20 2 0 0 0 0 18 18 18 20 0.1 20 2 0 0 0 0 22 22 22 21 0.0 20 3 0 0 0 0 0 14 14 22 0.5 20 3 0 0 0 0 15 15 15 23 0.3 20 3 0 0 0 0 15 15 15 24 0.1 20 3 0 0 0 0 14 14 14 25 0.0 30 1 0 0 0 0 0 0 0 26 0.5 30 1 0 0 0 0 0 0 0 27 0.3 30 1 0 0 0 0 0 0 0 28 0.1 30 1 0 0 0 0 0 0 0 29 0.0 30 2 0 0 0 0 0 0 0 30 0.5 30 2 0 0 0 0 0 0 0 31 0.3 30 2 0 0 0 0 0 0 0 32 0.1 30 2 0 0 0 0 0 0 0 33 0.0 30 3 0 0 0 0 0 0 0 34 0.5 30 3 0 0 0 0 0 0 0 35 0.3 30 3 0 0 0 0 0 0 0 36 0.1 30 3 0 0 0 0 0 0 0 Day08 Day09 Day10 Day11 Day12 Day13 Day14 Day15 Day16 Day17 Day18 1 0 0 1 1 1 1 1 1 1 1 1 2 18 18 18 18 20 20 20 20 20 20 20 3 18 18 18 18 20 20 20 20 20 20 20 4 16 16 16 16 18 18 18 19 19 19 19 5 0 0 1 1 1 1 1 1 1 1 1 6 22 22 22 22 23 23 23 23 23 23 23 7 22 22 22 22 23 23 23 23 23 23 23 8 18 18 18 18 19 19 19 19 19 19 19 9 0 0 2 2 2 4 4 4 4 4 4 10 20 20 20 20 21 21 21 21 21 21 21 11 17 17 17 17 20 20 20 20 20 20 20 12 22 22 22 22 23 23 23 23 23 23 23 13 7 7 7 7 7 7 7 7 7 7 7 14 23 23 23 23 23 23 23 23 23 23 23 15 24 24 24 24 24 24 24 24 24 24 24 16 24 24 24 24 24 24 24 24 24 24 24 17 10 10 10 10 10 10 10 10 10 10 10 18 25 25 25 25 25 25 25 25 25 25 25 19 23 23 23 23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 21 14 14 14 14 14 14 14 14 14 14 14 22 23 23 23 23 23 23 23 23 23 23 23 23 21 21 21 21 21 21 21 21 21 21 21 24 20 20 20 20 20 20 20 20 20 20 20 25 0 0 0 0 0 0 0 0 0 0 0 26 0 0 0 0 0 0 0 0 0 0 0 27 0 0 0 0 0 0 0 0 0 0 0 28 0 0 0 0 0 0 0 0 0 0 0 29 0 0 0 0 0 0 0 0 0 0 0 30 0 0 0 0 0 0 0 0 0 0 0 31 0 0 0 0 0 0 0 0 0 0 0 32 0 0 0 0 0 0 0 0 0 0 0 33 0 0 0 0 0 0 0 0 0 0 0 34 0 0 0 0 0 0 0 0 0 0 0 35 0 0 0 0 0 0 0 0 0 0 0 36 0 0 0 0 0 0 0 0 0 0 0 Day19 Total.Seeds 1 1 25 2 20 25 3 20 25 4 19 25 5 1 25 6 23 25 7 23 25 8 19 25 9 4 25 10 21 25 11 20 25 12 23 25 13 7 25 14 23 25 15 24 25 16 24 25 17 10 25 18 25 25 19 23 25 20 23 25 21 14 25 22 23 25 23 21 25 24 20 25 25 0 25 26 0 25 27 0 25 28 0 25 29 0 25 30 0 25 31 0 25 32 0 25 33 0 25 34 0 25 35 0 25 36 0 25 I receive the following error: data(gcdata1) Warning message: In data(gcdata1) : data set ‘gcdata1’ not found I created the below variable for counts.per.intervals counts.per.intervals <- c("Day01", "Day02", "Day03", "Day04", "Day05", + "Day06", "Day07", "Day08", "Day09", "Day10", + "Day11", "Day12", "Day13", "Day14", "Day15", "Day16", "Day17", "Day18", "Day19") As the following variable for indices indices<-germination.indices(gcdata1, total.seeds.col = "Total.Seeds", counts.intervals.cols = counts.per.intervals, intervals = 1:19, partial = FALSE, max.int = 5) I received the below error: Error in if (nearest[2] == nearest[1]) { : missing value where TRUE/FALSE needed In addition: There were 50 or more warnings (use warnings() to see the first 50)
Summing up different elements in a matrix in R
I'm trying to perform calculations on different elements in a matrix in R. My Matrix is 18x18 and I would like to get e.g. the mean of each 6x6 array (which makes 9 arrays in total). My desired arrays would be: A1 <- df[1:6,1:6] A2 <- df[1:6,7:12] A3 <- df[1:6,13:18] B1 <- df[7:12,1:6] B2 <- df[7:12,7:12] B3 <- df[7:12,13:18] C1 <- df[13:18,1:6] C2 <- df[13:18,7:12] C3 <- df[13:18,13:18] The matrix looks like this: 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 5 14 17 9 10 8 4 10 12 18 9 13 14 NA NA 19 15 10 10 10 30 32 23 27 17 28 25 12 28 29 28 26 19 25 34 24 11 17 15 16 16 16 9 17 27 17 16 30 13 18 13 15 13 19 8 7 9 20 15 12 18 18 18 6 4 6 9 11 10 10 13 11 8 10 15 15 25 7 13 21 7 3 5 2 5 5 4 3 2 3 5 2 1 5 6 30 5 9 1 7 7 4 4 12 8 9 2 0 5 2 1 0 2 6 35 3 0 2 0 0 4 4 7 4 4 5 2 0 0 1 0 0 0 40 0 4 0 0 0 1 3 9 10 10 1 0 0 0 1 0 1 0 45 0 0 0 0 0 3 10 9 17 9 1 0 0 0 0 0 0 0 50 0 0 2 0 0 0 2 8 20 0 0 0 0 0 1 0 0 0 55 0 0 0 0 0 0 7 3 21 0 0 0 0 0 0 0 0 0 60 0 0 0 0 3 4 10 2 2 0 0 1 0 0 0 0 0 0 65 0 0 0 0 0 4 8 4 8 11 0 0 0 0 0 0 0 0 70 0 0 0 0 0 6 2 5 14 0 0 0 0 0 0 0 0 0 75 0 0 0 0 0 4 0 5 9 0 0 0 0 0 0 0 0 0 80 0 0 0 0 0 4 4 0 4 2 0 0 0 0 0 0 0 0 85 0 0 0 0 0 0 0 4 1 1 0 0 0 0 0 0 0 0 90 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 Is there a clean way to solve this issue with a loop? Thanks a lot in advance, Paul
Given your matrix, e.g. x <- matrix(1:(18*18), ncol=18) Try, for example for sub matrices of 6 step <- 6 nx <- nrow(x) if((nx %% step) != 0) stop("nx %% step should be 0") indI <- seq(1, nx, by=step) nbStep <- length(indI) for(Col in 1:nbStep){ for(Row in 1:nbStep){ name <- paste0(LETTERS[Col],Row) theCol <- indI[Col]:(indI[Col]+step-1) theRow <- indI[Row]:(indI[Row]+step-1) assign(name, sum(x[theCol, theRow])) } } You'll get your results in A1, A2, A3... This is the idea. Twist the code for non square matrices, different size of sub matrices, ...
Here's one way: # generate fake data set.seed(47) n = 18 m = matrix(rpois(n * n, lambda = 5), nrow = n) # generate starting indices n_array = 6 start_i = seq(1, n, by = n_array) arr_starts = expand.grid(row = start_i, col = start_i) # calculate sums with(arr_starts, mapply(function(x, y) sum(m[(x + 1:n_array) - 1, (y + 1:n_array) - 1]), row, col)) # [1] 158 188 176 201 188 201 197 206 204
How to show all threads on a specific CPU on Solaris?
Some process (or threads) is hammering CPU0 as you can see in mpstat 30 2 CPU minf mjf xcal intr ithr csw icsw migr smtx srw syscl usr sys wt idl 0 0 0 0 13 0 2 7 0 151 0 4250 99 1 0 0 1 114 0 2 197 84 5220 5 10 109 0 10518 30 2 0 67 2 79 0 1 184 83 5208 5 5 89 0 9788 30 2 0 68 3 67 0 1 181 84 5150 5 4 87 0 9510 30 2 0 69 4 53 0 3 171 72 12238 4 7 183 0 22214 3 3 0 94 5 43 0 3 135 7 218 2 6 16 0 162 0 1 0 99 6 110 0 2 172 79 4918 5 3 164 0 9553 34 2 0 64 7 120 0 1 180 80 4873 4 4 194 0 9494 32 2 0 66 8 53 0 1 23 2 28665 5 7 494 0 62023 12 9 0 79 9 43 0 0 34 2 21469 6 8 676 0 58090 10 13 0 77 10 59 0 1 210 2 33462 4 4 227 0 63500 7 16 0 78 11 93 0 2 16940 16627 1261 2 6 1027 0 2043 0 10 0 90 12 17 0 1 65 3 59 0 3 3 0 19 0 0 0 100 13 6 0 1 89 4 104 0 3 2 0 9 0 0 0 100 14 4 0 10 65 5 54 0 3 1 0 12 0 0 0 100 15 4 0 1 66 6 56 0 3 2 0 21 0 0 0 100 16 2 0 0 91 16 78 0 3 2 0 30 0 0 0 100 17 17 0 1 80 15 70 0 4 2 0 79 0 0 0 100 18 76 0 3 14946 14928 25 0 4 24 0 102 0 4 0 96 19 57 0 0 20 2 17 0 3 15 0 107 0 0 0 100 20 18 0 0 26 0 25 0 3 10 0 21 0 0 0 100 21 0 0 0 106 70 46 0 3 4 0 40 0 1 0 99 22 13 0 0 31 3 28 0 3 4 0 49 0 0 0 100 23 0 0 0 35 5 24 0 3 5 0 54 0 0 0 100 but with prstat -P0 only see the ndbmtd running wit around 15% on CPU0 PID USERNAME SIZE RSS STATE PRI NICE TIME CPU PROCESS/NLWP 20028 root 77G 75G cpu0 40 0 8369:33:0 15% ndbmtd/44 660 root 6200K 3700K sleep 59 0 0:00:53 0.0% inetd/4 159 daemon 4540K 2408K sleep 59 0 0:00:09 0.0% kcfd/3 11 root 11M 10M sleep 59 0 0:00:58 0.0% svc.configd/15 Is there a way to show all processes and treads on CPU0?
To show all processes and threads (LWPs) on CPU0: prstat -P0 -L
Importing DAT file into R but uneven columns
I have a DAT file I want to read into R but when I import my data, it keeps on showing I have 10 columns/variables (coming from first line) when in actuality, it is really supposed to be 29 columns/variables. How do i fix this problem? DAT file example on notepad: smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded nodaded momed nomomed momdad14 sinmom14 step14 south66 south76 lwage76 famed black wage76 enroll76 kww iqscore mar76 libcrd14 exp76 exp762 1 1 0 0 0 0 7 5 29 9.94 1 10.25 1 1 0 0 0 0 6.306275 9 1 548 0 15 . 1 0 16 256 1 1 0 0 0 0 12 11 27 8 0 8 0 1 0 0 0 0 6.175867 8 0 481 0 35 93 1 1 9 81 1 1 0 0 0 0 12 12 34 14 0 12 0 1 0 0 0 0 6.580639 2 0 721 0 42 103 1 1 16 256 1 1 1 1 1 0 11 11 27 11 0 12 0 1 0 0 0 0 5.521461 6 0 250 0 25 88 1 1 10 100 1 1 1 1 1 0 12 12 34 8 0 7 0 1 0 0 0 0 6.591674 8 0 729 0 34 108 1 0 16 256 1 1 1 1 1 0 12 11 26 9 0 12 0 1 0 0 0 0 6.214608 6 0 500 0 38 85 1 1 8 64 1 1 1 1 1 0 18 16 33 14 0 14 0 1 0 0 0 0 6.336826 1 0 565 0 41 119 1 1 9 81 1 1 1 1 1 0 14 13 29 14 0 14 0 1 0 0 0 0 6.410175 1 0 608 0 46 108 1 1 9 81
txt1<-" smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded nodaded momed nomomed momdad14 sinmom14 step14 south66 south76 lwage76 famed black wage76 enroll76 kww iqscore mar76 libcrd14 exp76 exp762" txt2 <- " 1 1 0 0 0 0 7 5 29 9.94 1 10.25 1 1 0 0 0 0 6.306275 9 1 548 0 15 NA 1 0 16 256 1 1 0 0 0 0 12 11 27 8 0 8 0 1 0 0 0 0 6.175867 8 0 481 0 35 93 1 1 9 81 1 1 0 0 0 0 12 12 34 14 0 12 0 1 0 0 0 0 6.580639 2 0 721 0 42 103 1 1 16 256 1 1 1 1 1 0 11 11 27 11 0 12 0 1 0 0 0 0 5.521461 6 0 250 0 25 88 1 1 10 100 1 1 1 1 1 0 12 12 34 8 0 7 0 1 0 0 0 0 6.591674 8 0 729 0 34 108 1 0 16 256 1 1 1 1 1 0 12 11 26 9 0 12 0 1 0 0 0 0 6.214608 6 0 500 0 38 85 1 1 8 64 1 1 1 1 1 0 18 16 33 14 0 14 0 1 0 0 0 0 6.336826 1 0 565 0 41 119 1 1 9 81 1 1 1 1 1 0 14 13 29 14 0 14 0 1 0 0 0 0 6.410175 1 0 608 0 46 108 1 1 9 81" Now the code: inp <- scan(text=txt2, what="numeric") inmat <- matrix( as.numeric(inp), ncol=29, byrow=TRUE) dfrm <- as.data.frame(inmat) scan(text=txt1, what="") Read 29 items [1] "smsa66" "smsa76" "nearc2" "nearc4" "nearc4a" "nearc4b" "ed76" [8] "ed66" "age76" "daded" "nodaded" "momed" "nomomed" "momdad14" [15] "sinmom14" "step14" "south66" "south76" "lwage76" "famed" "black" [22] "wage76" "enroll76" "kww" "iqscore" "mar76" "libcrd14" "exp76" [29] "exp762" names(dfrm) <- scan(text=txt1, what="") #Read 29 items dfrm #----------------------- smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded nodaded momed nomomed 1 1 1 0 0 0 0 7 5 29 9.94 1 10.25 1 2 1 1 0 0 0 0 12 11 27 8 0 8 0 3 1 1 0 0 0 0 12 12 snipped remainder of output Final result: str(dfrm) 'data.frame': 8 obs. of 29 variables: $ smsa66 : num 1 1 1 1 1 1 1 1 $ smsa76 : num 1 1 1 1 1 1 1 1 $ nearc2 : num 0 0 0 1 1 1 1 1 $ nearc4 : num 0 0 0 1 1 1 1 1 $ nearc4a : num 0 0 0 1 1 1 1 1 $ nearc4b : num 0 0 0 0 0 0 0 0 $ ed76 : num 7 12 12 11 12 12 18 14 $ ed66 : num 5 11 12 11 12 11 16 13 $ age76 : num 29 27 34 27 34 26 33 29 $ daded : num 9.94 8 14 11 8 9 14 14 $ nodaded : num 1 0 0 0 0 0 0 0 $ momed : num 10.2 8 12 12 7 ... $ nomomed : num 1 0 0 0 0 0 0 0 $ momdad14: num 1 1 1 1 1 1 1 1 $ sinmom14: num 0 0 0 0 0 0 0 0 $ step14 : num 0 0 0 0 0 0 0 0 $ south66 : num 0 0 0 0 0 0 0 0 $ south76 : num 0 0 0 0 0 0 0 0 $ lwage76 : num 6.31 6.18 6.58 5.52 6.59 ... $ famed : num 9 8 2 6 8 6 1 1 $ black : num 1 0 0 0 0 0 0 0 $ wage76 : num 548 481 721 250 729 500 565 608 $ enroll76: num 0 0 0 0 0 0 0 0 $ kww : num 15 35 42 25 34 38 41 46 $ iqscore : num NA 93 103 88 108 85 119 108 $ mar76 : num 1 1 1 1 1 1 1 1 $ libcrd14: num 0 1 1 1 0 1 1 1 $ exp76 : num 16 9 16 10 16 8 9 9 $ exp762 : num 256 81 256 100 256 64 81 81