I have a list "hhvrs" of length 2 with names and values. The names of these 2 elements of list are years "1920" and "1929".
$`1920`
Nykvarn - 147 - 211920 Nykvarn - 262 - 211920 ...
1.235629 1.013191 ...
$`1929`
Långed - 125 - 11929 Långed - 126 - 11929 ...
1.316499 1.026785 ...
I also have a data.frame "data" consisting of two years, 1920 and 1929. See dput at the bottom of this post.
I then want to negatively match names (i.e. not include those names present in list above). With other words I want to keep the names in my data frame, in last column uniquezCorrectCG, that are not present in the list above. I then want to calculate efficiencies for each company without the names present in the list.
Here is my code:
hhvrsu=lapply(unique(data$year),function(x){
library(Benchmarking)
datat=data[data$year==x,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[x]])),]
#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])
#inputs
x=with(datat2,
cbind(Labour_input_1000_hour,
Capital_input_1000_sek,
Electric_input_Mwh,
Rawmaterial_input_M3))
rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
return(e$eff) }
)
names(hhvrsu)=unique(data$year)
But that fails. For example the company Långed - 125 - 11929 year 1929 is still present in the output of my code, while it should be dropped because Långed - 125 - 11929 is present in the list above...
head(hhvrsu[["1929"]])
Billingsfors - 123 - 11929 Billingsfors - 124 - 11929 Långed - 125 - 11929 Långed - 126 - 11929 Långed - 127 - 11929
0.9975506 1.0000000 1.0000000 1.0000000 1.0000000
Hånsfors - 183 - 21929
0.9928677
But it still works if i do it manually:
datat=data[data$year==1929,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[["1929"]])),]
#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])
#inputs
x=with(datat2,cbind(Labour_input_1000_hour,Capital_input_1000_sek,Electric_input_Mwh,Rawmaterial_input_M3))
rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
head(e$eff)
Billingsfors - 123 - 11929 Billingsfors - 124 - 11929 Hånsfors - 183 - 21929 Hällefors - 237 - 21929 Grycksbo - 350 - 21929
0.9984071 1.0000000 1.0000000 0.5863832 0.9813024
Brättne - 100 - 31929
0.9915349
in e$eff above Långed - 125 - 11929 is dropped!
EDIT:
It works if I put as.character(x) below instead of simply x
hhvrsu=lapply(unique(data$year),function(x){
library(Benchmarking)
datat=data[data$year==x,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[**as.character(x)**]])),]
#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])
#inputs
x=with(datat2,
cbind(Labour_input_1000_hour,
Capital_input_1000_sek,
Electric_input_Mwh,
Rawmaterial_input_M3))
rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
return(e$eff) }
)
names(hhvrsu)=unique(data$year)
Any suggestions?
Dputs:
dput(hhvrs)
structure(list(`1920` = structure(c(1.23562876282578, 1.01319073788091,
1.55783496400001, 1.06191988898698, 1.12744927131341, 1.08504615635299,
1.25725741409574, 2.03370195312046, 1.00667697472372, 1.00260726981462,
1.3050604346423, 1.3594555255334, 1.55671945006842, 1.0072581093466,
1.65164991096899, 2.47385616808447, 1.18471196771314, 1.24186522915967,
1.65133103063843, Inf, 1.16498198151401, 1.07017484481922), .Names = c("Nykvarn - 147 - 211920",
"Nykvarn - 262 - 211920", "Tumba - 68 - 381920", "Byske - 294 - 451920",
"Långed - 127 - 571920", "Väja - 270 - 691920", "Ljusfors - 141 - 731920",
"Skärblacka - 370 - 731920", "Sätra - 152 - 781920", "Krokfors - 129 - 871920",
"Åsen - 207 - 1011920", "Åsen - 208 - 1011920", "Lagerfors - 225 - 10121920",
"Lindefors - 243 - 10281920", "Munksjö - 253 - 10281920", "Qvill - 211 - 10431920",
"Esseltewell - 375 - 10521920", "Esseltewell - 376 - 10521920",
"Ulriksfors - 205 - 10541920", "Sellnäs - 352 - 10541920", "Vivstavarv - 314 - 10751920",
"Älvsborg - 369 - 10791920")), `1929` = structure(c(1.31649939189229,
1.02678542256861, 1.50667886828221, 1.06101596031178, 1.00477142430659,
Inf, 1.00038550231904, 1.10347307305662, 1.53782048667181, 1.80890790261425,
1.06103833744605, 1.00036736526695, 1.01053736983199, 1.01119078294682,
1.00295000872313, 1.01778128036389, 1.22049428994262, 1.15078822074877,
1.00346763843347, 1.2192497185324, 1.03195112444193, 1.71491513543284,
1.00168840525869, 1.00575972592046, 1.105483053952, 1.00427057272637,
1.94482017228275, 1.00388363163126), .Names = c("Långed - 125 - 11929",
"Långed - 126 - 11929", "Långed - 127 - 11929", "Hällefors - 234 - 21929",
"Göteborg-Dals - 156 - 91929", "Papyrus - 280 - 231929", "Sofiehem - 330 - 271929",
"Tollare - 66 - 361929", "Tumba - 68 - 381929", "Alstermo - 4 - 491929",
"Billerud - 106 - 571929", "Fengersfors - 135 - 711929", "Gamlestaden - 153 - 821929",
"Gransholm - 228 - 851929", "Åsen - 207 - 1011929", "Nykvarn - 262 - 1101929",
"Haga - 24 - 10041929", "Ljusne - 218 - 10181929", "Husum - 232 - 10251929",
"Munksjö - 253 - 10281929", "Pauliström - 239 - 10311929", "Qvill - 211 - 10431929",
"Esseltewell - 375 - 10521929", "Ställdalen - 356 - 10531929",
"Kvarnsveden - 343 - 10541929", "Skutskär - 345 - 10541929",
"Sellnäs - 352 - 10541929", "Vivstavarv - 314 - 10751929"))), .Names = c("1920",
"1929"))
Dput data.frame
dput( data[data$year==1929,][1:5,])
structure(list(company_code = c(1L, 1L, 1L, 1L, 1L), company_name = c("AB Billingsfors-Långed",
"AB Billingsfors-Långed", "AB Billingsfors-Långed", "AB Billingsfors-Långed",
"AB Billingsfors-Långed"), year_cg_code = c(11929L, 11929L, 11929L,
11929L, 11929L), plant_code = 123:127, plant_name = c("Billingsfors",
"Billingsfors", "Långed", "Långed", "Långed"), plant_location = c("Billingsfors",
"Billingsfors", "Dals Långed", "Dals Långed", "Dals Långed"),
plant_location_by_municipal = c("Bengtsfors", "Bengtsfors",
"Bengtsfors", "Bengtsfors", "Bengtsfors"), year = c(1929L,
1929L, 1929L, 1929L, 1929L), Output_value_1000_sek = c(720L,
2304L, 531L, 3040L, 2079L), Labour_cost_1000_sek = c(102L,
348L, 93L, 199L, 225L), Capital_cost_1000_sek = c(108L, 468L,
126L, 304L, 180L), Electricity_cost_1000_sek = c(130L, 90L,
10L, 120L, 40L), Raw_material_cost_1000_sek = c(174L, 744L,
177L, 1824L, 1080L), Output_price_1_sek.ton = c(220L, 220L,
220L, 220L, 220L), Output_price__sek.ton = c(196L, 196L,
196L, 196L, 196L), Labour_price_sek.hour = c(1, 1.208333333,
2.657142857, 1.093406593, 2.083333333), Capital_price_interest.rate = c(4.556666667,
4.556666667, 4.556666667, 4.556666667, 4.556666667), Motive_Power_pricekr.MwH = c(43.10344828,
67.61833208, 31.54574132, 93.45794393, 45.14672686), Electricity_price_kr.MwH = c(24.34456929,
24.19354839, 13.88888889, 25.26315789, 22.22222222), Raw_Material_price_kr.m3 = c(14.5,
15.5, 11.8, 19, 12), Mean_raw.material_price = c(14.3, 14.3,
14.3, 14.3, 14.3), Output_capacity_ton = c(6000L, 12000L,
3000L, 9500L, 9000L), Ouput_ton = c(3272L, 10472L, 2413L,
13818L, 9450L), Labour_input_1000_hour = c(102L, 288L, 35L,
182L, 108L), Capital_input_1000_sek = c(2853L, 1975L, 219L,
2634L, 878L), Motive_Power_Mwh = c(3016L, 1331L, 317L, 1284L,
886L), Electric_input_Mwh = c(5340, 3720, 720, 4750, 1800
), Rawmaterial_input_M3 = c(12000, 48000, 15000, 96000, 90000
), Capacity_Utilization = c(54.53333333, 87.26666667, 80.43333333,
145.4526316, 105), Labour_cost_share = c(14.16666667, 15.10416667,
17.51412429, 6.546052632, 10.82251082), Capital_cost_share = c(15,
20.3125, 23.72881356, 10, 8.658008658), Electricity_cost_share = c(18.05555556,
3.90625, 1.883239171, 3.947368421, 1.924001924), Raw_Material_cost_share = c(24.16666667,
32.29166667, 33.33333333, 60, 51.94805195), Labour_productivity = c(1.433165382,
1.624502304, 3.080154233, 3.392008925, 3.909230144), Capital_productivity = c(4.8,
22.1, 45.8, 21.9, 44.8), Power_productivity = c(0.24, 1.73,
1.68, 2.37, 2.35), Electricity_productivity = c(0.303469526,
1.39421497, 1.659846295, 1.440769899, 2.60017364), Raw.material.productivity = c(1.439189112,
1.151527229, 0.849086388, 0.759730866, 0.554210966), uniquezCorrect = c("Billingsfors - 123",
"Billingsfors - 124", "Långed - 125", "Långed - 126", "Långed - 127"
), uniquezCorrectCG = c("Billingsfors - 123 - 11929", "Billingsfors - 124 - 11929",
"Långed - 125 - 11929", "Långed - 126 - 11929", "Långed - 127 - 11929"
)), .Names = c("company_code", "company_name", "year_cg_code",
"plant_code", "plant_name", "plant_location", "plant_location_by_municipal",
"year", "Output_value_1000_sek", "Labour_cost_1000_sek", "Capital_cost_1000_sek",
"Electricity_cost_1000_sek", "Raw_material_cost_1000_sek", "Output_price_1_sek.ton",
"Output_price__sek.ton", "Labour_price_sek.hour", "Capital_price_interest.rate",
"Motive_Power_pricekr.MwH", "Electricity_price_kr.MwH", "Raw_Material_price_kr.m3",
"Mean_raw.material_price", "Output_capacity_ton", "Ouput_ton",
"Labour_input_1000_hour", "Capital_input_1000_sek", "Motive_Power_Mwh",
"Electric_input_Mwh", "Rawmaterial_input_M3", "Capacity_Utilization",
"Labour_cost_share", "Capital_cost_share", "Electricity_cost_share",
"Raw_Material_cost_share", "Labour_productivity", "Capital_productivity",
"Power_productivity", "Electricity_productivity", "Raw.material.productivity",
"uniquezCorrect", "uniquezCorrectCG"), row.names = 6:10, class = "data.frame")
I'd do it a bit different (not using lapply at all). I'd use stack to construct a data.frame from hhvrs as follows, first:
my.df <- stack(hhvrs)[, c("ind"), drop = FALSE]
names(my.df) <- c("year")
my.df <- transform(my.df, uniquezCorrectCG = rownames(my.df))
rownames(my.df) <- NULL
Now check for those entries where year and uniquezCorrectCG are present in data but not in my.df.
data[!duplicated(rbind(my.df, data[, c("year",
"uniquezCorrectCG")]))[-seq_len(nrow(my.df))], ]
Related
When solving a portfolio optimization problem with an additional 1-norm constraint on the portfolio weights, I faced with convergency problems.
Description of the exercise:
For given N assets with T observations of their returns find the value of 1Norm constraint Theta, such that the last period portfolio return (T-th one) is maximized. That is, solve the problem: min_w w'COVw$ s.t. w1+w2+...+wN=1 and |w1|+|w2|+...+|wN|<=Theta and, out of all values of Theta, choose the one with the maximum value of w'r_T, where r_T is the vector of assets returns in the last period and COV is the variance-covariance matrix of asset returns.
Description of the problem:
First I tried the "naive" approach: with a grid of 0.001 for Theta from 1 to 6 I was going to solve the portfolio optimization problem and compute the last period portfolio return for each Theta. The idea was to choose then the value of Theta with the largest corresponding last period return. However, I noticed that for quite a few values of Theta the solnp function did not converge. The problem occured mostly for small values of Theta: from 1 to 3. For larger values no problems with convergency were detected.
The second approach was to use solnp function twice: first as the function to find Theta and second as the inner part of the objective function. However, I could not find reliable estimates in this way: the values I got did not deliver the optimal solution. Apparently, the objective function is not smooth, but gosolnp function does not find the solution.
The code with data (6 assets with 120 return observations) is provided below. Any suggestions are welcome.
> exp_d
[,1] [,2] [,3] [,4] [,5] [,6]
1 1.3724 0.9081 -0.0695 5.7168 1.9642 1.4222
2 0.6095 1.5075 5.3842 2.7154 2.6838 6.3154
3 -2.6779 -0.1359 -0.4374 1.4287 0.0709 -0.7967
4 -3.5365 -4.3572 -2.0112 -3.5898 -2.3460 -4.0970
5 3.1210 3.6608 2.0944 3.1292 2.8965 3.4614
6 2.7364 1.8411 3.2639 2.9678 2.6067 2.3950
7 -1.0001 -0.3782 3.9316 -0.2621 0.0347 4.4635
8 3.9022 6.3784 6.6192 5.0044 3.5568 8.6305
9 -1.6000 -0.9889 -3.1676 1.3025 0.2071 -2.4764
10 -1.3184 0.8741 3.4796 3.0510 -0.7634 -0.5452
11 5.5482 3.3467 13.3256 5.4076 5.1017 7.4921
12 -1.5484 1.3040 -3.9474 -0.9628 -3.0156 -1.6326
13 4.3331 5.2347 3.9846 9.2369 6.7429 7.2603
14 2.3503 -2.2044 0.8600 3.9191 1.2181 -1.9651
15 2.3981 2.2316 0.3990 5.3864 4.3919 5.9674
16 -0.1633 -2.1458 -5.8357 -3.6349 -4.2840 -6.6219
17 10.4346 8.0620 10.2275 7.0560 6.7676 6.6346
18 5.5505 2.6016 2.4506 2.4954 1.8547 3.4755
19 3.2031 2.7804 3.5948 -0.4774 -0.3667 -2.3168
20 -4.7913 -1.7203 -4.1271 -0.6762 -1.1395 -2.7296
21 7.3930 8.6229 9.4570 12.2800 6.1327 7.8254
22 4.2158 10.6845 9.9723 2.9145 6.0000 4.4979
23 7.5326 1.9540 2.5740 2.6065 -0.1128 0.6388
24 -8.5131 -8.3044 -6.8294 -3.6094 -4.1224 -5.4164
25 -0.4048 -0.4017 -0.8867 1.3590 0.1098 0.9017
26 6.1240 5.0517 3.6990 8.7368 5.3867 6.9468
27 5.7317 4.4538 6.1762 3.4108 1.9153 4.4896
28 6.8299 3.1244 1.6621 1.3590 1.4325 2.0067
29 8.6705 11.1936 12.2831 11.1602 13.2781 13.1497
30 0.9055 -0.5953 -0.6462 0.9332 -0.0008 1.2917
31 -0.0340 1.9379 1.4480 6.5262 4.8373 2.6307
32 0.7414 1.1014 0.3820 -0.5791 0.8306 3.1476
33 -5.9533 -3.4602 -4.1597 -1.3835 2.2098 -0.0642
34 -0.0822 2.7549 0.5136 2.2172 1.1145 2.8362
35 -10.2009 -9.3603 -12.8907 -5.7297 -4.1622 -6.1709
36 6.9673 9.4356 6.4064 12.4593 9.3500 7.5052
37 -0.0681 -0.7029 3.8633 3.5471 6.2018 4.9529
38 -0.8885 -0.4641 -0.4544 7.2353 12.5430 6.9497
39 -4.2569 -4.3105 -3.6218 -4.4211 -4.9685 -6.3175
40 -21.7261 -19.3351 -19.8329 -24.1865 -14.0943 -10.3723
41 -16.4096 -9.8425 -12.3366 -14.2535 -10.9314 -7.6677
42 -2.7511 -2.7249 -1.8488 3.3988 0.5515 1.3170
43 6.2347 9.4395 8.4631 7.1735 3.3113 2.9221
44 1.3502 1.2075 4.3779 3.4639 1.8818 1.2221
45 9.0009 11.0661 10.7647 6.8160 8.3348 5.3573
46 -6.0956 -1.7406 -3.4814 -2.3610 -1.7939 -6.6523
47 -3.7611 -2.3132 -2.9578 0.7061 -2.2643 -1.3650
48 -16.9330 -16.6497 -18.9287 -17.8490 -13.1565 -11.8439
49 6.5208 3.7280 2.5406 4.4017 4.2198 5.2644
50 -3.7646 -1.6135 -2.5073 1.1969 -0.8000 -1.6232
51 -13.6823 -14.6721 -19.1619 -12.0325 -11.7394 -17.0858
52 -10.1007 -7.6801 -10.7443 -9.4278 -7.3264 -11.4835
53 0.2234 -2.7838 -2.6023 -2.3616 -2.8339 -6.6002
54 -10.4236 -11.0266 -17.2866 -5.8057 -9.1776 -9.7245
55 9.9121 11.3602 15.4627 4.7170 6.9823 13.5911
56 13.2032 11.6577 17.0900 12.8706 6.7357 12.1756
57 -4.3440 -2.9204 -6.9601 -4.1971 -10.3577 -8.8826
58 -12.8949 -14.0212 -18.2229 -8.8412 -11.1070 -11.3490
59 -9.6709 -8.7006 -13.8440 -11.3932 -15.9343 -20.3630
60 10.1262 7.7295 20.3174 12.8509 16.1956 25.2657
61 -8.5867 -8.9411 -5.7363 -5.1668 -10.1775 -12.2143
62 -1.6114 -1.1797 -3.7504 0.8986 -1.5948 0.0531
63 -26.5722 -30.0521 -33.8286 -28.8671 -28.1493 -35.1131
64 4.2319 9.2071 5.4593 9.5401 3.2588 11.7214
65 -7.7335 -7.2915 -9.3902 -6.1288 -16.6307 -14.5665
66 -13.7987 -16.5088 -22.4192 -11.5667 -18.8221 -20.6686
67 2.4924 3.9264 10.5116 -3.4347 1.9132 6.6353
68 -2.4754 1.9822 1.3833 7.1142 1.7366 0.3507
69 -10.5259 -12.1366 -10.6244 -10.2031 -15.1403 -14.7460
70 -19.3072 -17.1449 -16.0430 -18.1275 -20.1476 -18.5418
71 -17.1387 -24.8400 -17.4741 -19.2184 -25.9531 -25.4360
72 0.9540 6.2481 1.0364 -1.7110 0.0545 8.8476
73 31.9086 36.1125 63.2475 28.1955 48.6209 67.7847
74 48.7266 55.3725 83.5754 31.3211 50.9661 62.5277
75 -2.8054 -4.5841 -12.4737 -1.0366 -6.5149 -5.0087
76 -16.2535 -19.6450 -23.8672 -10.7514 -17.5126 -23.2272
77 -0.2972 -8.2786 -13.2053 -2.7223 -8.9288 -16.3816
78 -5.1581 -4.2529 -11.0631 6.8503 1.0438 -3.5633
79 4.3896 1.3914 7.8950 -0.0120 3.0297 8.7685
80 -17.8468 -18.4137 -20.8881 -14.9493 -16.5816 -17.3738
81 7.0790 6.1577 16.0332 0.9569 9.8608 6.7577
82 45.4453 54.8024 56.5610 33.7390 51.8933 57.4905
83 59.9378 62.1965 73.3394 16.9717 26.7781 41.6260
84 32.9373 23.3035 18.5882 11.3683 15.5089 21.9496
85 -14.1385 -12.7712 -7.2418 -8.8583 -13.0573 -9.2208
86 10.5585 10.1102 8.2144 10.5958 15.7784 18.8615
87 -7.6121 -13.3352 -20.9042 -9.1454 -12.3603 -19.3866
88 -8.4949 -12.4200 -13.8362 -6.1614 -10.5922 -17.7448
89 4.4492 4.6618 6.1086 9.6900 12.2772 12.6924
90 4.7702 4.0567 0.4565 2.1093 1.8286 3.3536
91 21.5203 28.5643 38.3403 9.8669 16.8246 24.1368
92 -0.2935 1.1796 5.3723 -2.3815 -3.1241 -3.8100
93 4.7722 2.0674 -0.0727 -0.0029 -0.1362 -0.6200
94 2.8356 -1.3170 -1.8303 -1.7780 -2.3152 -4.6160
95 -7.3381 -9.8907 -12.0504 -6.4033 -8.5879 -13.5106
96 3.7672 0.0811 -2.3100 2.6280 2.4811 2.8615
97 -18.0226 -21.5042 -24.4760 -8.3110 -11.7051 -23.1790
98 10.9235 9.4125 12.2230 6.0718 4.4464 5.7691
99 -0.9255 -0.5806 -3.5672 -0.2597 -0.1487 -0.1050
100 -0.9250 -1.6380 -4.1937 -0.4115 -2.7080 -7.3690
101 17.4218 15.4433 12.5284 10.2402 5.0137 10.6573
102 4.9409 1.7554 1.5981 0.7507 0.5333 -2.2372
103 -5.5523 -3.3988 -3.0325 -3.0099 -2.8838 -9.3128
104 -3.5225 -4.8589 -5.8780 -0.8581 -1.6838 -12.9875
105 -5.9234 -7.4692 -11.2668 -2.9715 -3.3289 -7.7499
106 6.7174 9.3484 10.2238 8.0311 9.8871 13.0598
107 -2.4801 2.3317 1.7345 3.1370 4.3390 3.9570
108 -1.7502 5.9262 0.8071 6.3821 5.6136 8.0813
109 9.1528 12.9358 12.7537 6.9882 6.3196 17.1148
110 4.2227 8.6796 14.4462 2.5694 2.2001 3.7475
111 5.0917 5.5331 0.5575 4.6757 0.6768 1.1198
112 10.9064 10.8504 6.8068 6.6016 7.9769 6.0328
113 6.6969 10.4475 18.9743 3.0389 5.6087 14.5739
114 5.7713 10.1556 2.2152 2.9835 5.8064 8.6858
115 10.3194 7.6727 22.3771 3.2881 9.5299 12.2406
116 1.9010 6.5601 6.7824 1.9137 2.8060 7.1039
117 0.5096 2.3380 0.8324 2.6378 0.0632 -1.0088
118 -14.3931 -13.9743 -15.4640 -7.1563 -8.3505 -10.2494
119 4.9011 5.5856 8.6767 5.0578 5.1107 6.5508
120 -2.2080 -0.2588 -1.2498 3.6325 2.1402 0.2676
#define equality constraint function
equal <- function(x) c(sum(x))
#define inequality constraint function
in_inequal <- function(x) c(sum(abs(x)))
#define objective function1
obj_f <- function(x) {
int_r <- t(x)%*%V_C_M%*%x
c(as.numeric(int_r))
}
#define objective function2
ex_obj_f <- function(x) {
tteta <- x
port_w <- solnp(rep(1/n,n), fun = obj_f, eqfun=equal, eqB=1,
ineqfun = in_inequal, ineqLB = 0, ineqUB = tteta, control = list(trace=0))
lp_ret <- exp_d[nrow(exp_d),]%*%port_w$pars
-lp_ret
}
#First "naive" attempt
exp_d <- as.matrix(exp_d)
n <- 6
V_C_M <- cov(exp_d)
res <- matrix(0:0, nrow = 5000, ncol = 3)
for (i in 1:5000) {
tteta <- 1 + i*0.001
port_w <- solnp(rep(1/n,n), fun = obj_f, eqfun=equal, eqB=1, ineqfun = in_inequal, ineqLB = 0, ineqUB = tteta, control = list(trace=0))
lp_ret <- exp_d[nrow(exp_d),]%*%port_w$pars
res[i,1] <- tteta
res[i,2] <- lp_ret
res[i,3] <- port_w$convergence
}
#Second Approach (the result really depends on the starting value of the parameter)
tt_op1 <- solnp(pars = 1.5, fun = ex_obj_f, LB = 1, UB = 10, control = list(trace=1))
tt_op2 <- gosolnp(pars = 1.5, fun = ex_obj_f, LB = 1, UB = 10)
P.S. I have read posts with similar problems here, but coud not find a solution to my question.
Your model can be formulated as a pure QP (quadratic programming) problem instead of a difficult nonlinear problem with a nonlinear non-differentiable constraint.
The constraint
sum(i, |x(i)|) <= Theta
can be linearized in different ways. One possible reformulation is
-y(i) <= x(i) <= y(i)
sum(i, y(i)) <= Theta
non-negative (or free) variable y(i)
Now you can solve the model with a QP solver instead of a general purpose NLP solver.
I am working with a data set with multiple questionnaires which were supposed to be filled in on different timepoints i.e.
173 9/13/2013 10/29/2013 9/26/2014
174 10/21/2013 11/25/2013 11/3/2014
175 7/1/2014 7/3/2015 4/27/2016
176 1/15/2014 2/24/2014 6/10/2015
177 3/15/2014 4/1/2015
178 7/18/2014 9/18/2014 8/17/2015
179 6/30/2013 8/15/2013 7/15/2014
180 4/22/2013 6/24/2013 5/11/2014
181 12/7/2014 12/26/2015
182 4/2/2015 5/17/2015 4/20/2016
183 1/12/2015 2/26/2015 1/28/2016
184 7/18/2014 8/26/2014 8/14/2015
185 8/27/2013 10/19/2013 9/21/2014
186 10/29/2013 11/30/2013 11/6/2014
187 9/17/2014 11/18/2014 10/20/2015
188 5/10/2014 6/27/2014 6/1/2015
189 10/4/2013 10/5/2014
190 1/22/2013 4/11/2013
191 10/21/2014 10/21/2014
I would like to know how to see how many participants filled in all questionnaires on the same day, how many participants filled in at least 2 questionnaires on the same day. how many at least 3 on the same day etc.
Any help would be highly appreciated.
Reproducible data:
Label = c(
"1/25/2015", "1/25/2016", "1/26/2014", "1/26/2015", "1/27/2014",
"1/27/2015", "1/28/2014", "1/28/2015", "1/29/2015", "1/3/2014",
"1/3/2015", "1/3/2016", "1/30/2015", "1/31/2014", "1/4/2014",
"1/4/2015", "1/4/2016", "1/5/2014", "1/5/2015", "1/6/2014",
"1/6/2015", "1/7/2014", "1/7/2015", "1/8/2014", "1/8/2015",
"1/9/2014", "1/9/2015", "1/9/2016", "10/1/2012", "10/1/2013",
"10/1/2014", "10/1/2015", "10/10/2013", "10/10/2014", "10/11/2013",
"10/11/2014", "10/11/2015", "10/12/2013", "10/12/2014", "10/12/2015",
"10/13/2013", "10/13/2014", "10/13/2015", "10/14/2013", "10/14/2014",
"10/14/2015", "10/15/2014", "10/15/2015", "10/16/2013", "10/16/2014",
"10/16/2015", "10/17/2013", "10/17/2014", "10/17/2015", "10/18/2013",
"10/18/2014", "10/18/2015", "10/19/2013", "10/19/2014", "10/19/2015",
"10/2/2013", "10/2/2014", "10/20/2013", "10/20/2014", "10/20/2015",
"10/21/2013", "10/21/2014", "10/22/2013", "10/22/2014", "10/22/2015",
"10/23/2012", "10/23/2013", "10/23/2014", "10/23/2015", "10/24/2013",
"10/24/2014", "10/24/2015", "10/25/2013", "10/25/2014", "10/26/2013",
"10/26/2014", "10/26/2015", "10/27/2013", "10/27/2014", "10/27/2015",
"10/28/2013", "10/28/2014", "10/29/2013", "10/29/2014", "10/3/2014",
"10/3/2015", "10/30/2014", "10/31/2012", "10/31/2013", "10/31/2014",
"10/31/2015", "10/4/2013", "10/4/2014", "10/4/2015", "10/5/2014",
"10/5/2015", "10/6/2013", "10/6/2014", "10/6/2015", "10/7/2013",
"10/7/2014", "10/8/2012", "10/8/2014", "10/8/2015", "10/9/2013",
"10/9/2014", "10/9/2015", "11/1/2013", "11/1/2014", "11/1/2015",
class = "factor")
Label = c(
"4/6/2015", "4/7/2015", "4/9/2012", "5/12/2015", "5/13/2014",
"5/14/2015", "5/15/2014", "5/15/2015", "5/17/2014", "5/19/2014",
"5/20/2014", "5/25/2014", "5/27/2014", "5/29/2014", "5/30/2014",
"5/30/2015", "5/31/2015", "5/4/2014", "5/9/2015", "6/1/2015",
"6/10/2014", "6/11/2014", "6/11/2015", "6/12/2015", "6/16/2014",
"6/16/2015", "6/18/2014", "6/21/2014", "6/24/2015", "6/25/2014",
"6/25/2015", "6/26/2015", "6/27/2015", "6/29/2015", "6/5/2014",
"6/6/2015", "6/8/2014", "7/1/2014", "7/13/2014", "7/14/2015",
"7/16/2014", "7/2/2014", "7/21/2014", "7/25/2014", "7/27/2014",
"7/27/2015", "7/28/2014", "7/29/2014", "7/30/2014", "7/31/2014",
"7/31/2015", "7/4/2014", "7/4/2015", "8/1/2014", "8/11/2014",
"8/11/2015", "8/25/2014", "8/27/2015", "8/5/2014", "8/8/2014",
"8/9/2015", "9/1/2014", "9/10/2015", "9/15/2015", "9/22/2013",
"9/3/2012", "9/30/2014", "9/8/2014", "9/8/2015"), class = "factor")
Label = c(" ",
"1/16/2016", "1/26/2015", "10/11/2015", "10/14/2015", "10/16/2015",
"10/6/2014", "10/7/2013", "11/11/2015", "11/15/2015", "11/17/2013",
"11/18/2013", "11/2/2015", "11/20/2013", "11/29/2013", "2/17/2014",
"2/17/2015", "2/21/2015", "2/23/2014", "2/25/2014", "2/25/2015",
"3/11/2016", "3/2/2014", "3/22/2015", "3/4/2014", "3/4/2016",
"4/11/2014", "4/12/2013", "4/18/2016", "4/21/2015", "4/23/2015",
"4/29/2015", "4/3/2015", "4/5/2016", "5/23/2015", "5/26/2015",
"5/27/2015", "5/28/2015", "5/29/2014", "5/29/2015", "5/8/2015",
"6/16/2015", "6/22/2015", "6/28/2015", "7/24/2015", "7/27/2015",
"7/4/2014", "7/8/2015", "9/14/2015", "9/15/2015", "9/16/2014",
"9/17/2014", "9/22/2014", "9/23/2014", "9/24/2014", "9/24/2015",
"9/26/2014", "9/28/2015", "9/30/2015", "9/9/2015"), class = "factor")), .Names = c("1A_RespDate",
"1B_RespDate", "1C_1_RespDate", "1C_2_RespDate",
"1C_RespDate", "2A_1_RespDate", "2A_RespDate", "2B_RespDate",
"2C_RespDate"), row.names = c(NA, -4831L), class = "data.frame")
I'll call you dataframe df:
sapply(apply(df,1,unique),length)
will give you the number of unique dates for each individual as a vector. The highest value is 7 and the min 1 (all questionnaires answered on the same day).
which(sapply(apply(df,1,unique),length)<7)
Will give you the index of the individuals who filled at least 2 questionnaires on the same day.
length(which(sapply(apply(df,1,unique),length)<7))
Will tell you how many individuals filled at least 2 questionnaires on the same day.
Edit:
This is inelegant (there must be a cleaner way) but it seems to work
which(sapply(sapply(sapply(apply(df,1,table),function(x) x==Z),which),function(x) any(x>0)))
Z is to be set to the number of questionnaires filled on the same day.
Explaination:
apply(df,1,table)
gives a list with for each individual the unique dates and how many times they appear.
sapply(apply(df,1,table),function(x) x==Z)
will give you the same list with True/False on whether a date appears exactly Z times.
sapply(sapply(apply(df,1,table),function(x) x==Z),which)
will give either "interger(0)" or a positive integer which is the index of the date for the individual (it's not something we are interested in).
sapply(sapply(sapply(apply(df,1,table),function(x) x==Z),which),function(x) any(x>0))
will give a vector of True/False corresponding to the index of the individual
then next step with "which" is to get the index for the True.
We therefore get the individuals for which a date appears exactly Z times.
I am trying to index a datetime that is being formed from 3 columns representing (year, dayofyear, and 2400hr time).
2014,323,1203,47.77,320.9
2014,323,1204,48.46,402.6
2014,323,1205,49.2,422.7
2014,323,1206,49.82,432.4
2014,323,1207,50.03,438.6
2014,323,1208,50.15,445.4
2014,323,1209,50.85,449.7
2014,323,1210,50.85,454.4
2014,323,1211,50.85,458.1
2014,323,1212,50.91,460.2
I am using the following code:
In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
In [2]:
def parse(yr, yearday, hrmn):
date_string = ' '.join([yr, yearday, hrmn])
print(date_string)
return datetime.strptime(date_string,"%Y %j %H%M")
In [3]:
df = pd.read_csv('home_prepped.dat', parse_dates={'datetime':[0,1,2]},
date_parser=parse, index_col='datetime', header=None)
I have had success bringing it in when the data was flawed (had extra data over DST change), and now that it is fixed (removed and stitched back together) I am having this error (in its entirety):
2014 92 2355
2014 92 2356
2014 92 2357
2014 92 2358
2014 92 2359
2014 92 2400
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-9c710834ee23> in <module>()
1
----> 2 df = pd.read_csv('home_prepped.dat', parse_dates={'datetime':[0,1,2]}, date_parser=parse, index_col='datetime', header=None)
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
463 skip_blank_lines=skip_blank_lines)
464
--> 465 return _read(filepath_or_buffer, kwds)
466
467 parser_f.__name__ = name
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
249 return parser
250
--> 251 return parser.read()
252
253 _parser_defaults = {
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in read(self, nrows)
708 raise ValueError('skip_footer not supported for iteration')
709
--> 710 ret = self._engine.read(nrows)
711
712 if self.options.get('as_recarray'):
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in read(self, nrows)
1209 data = dict((k, v) for k, (i, v) in zip(names, data))
1210
-> 1211 names, data = self._do_date_conversions(names, data)
1212 index, names = self._make_index(data, alldata, names)
1213
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _do_date_conversions(self, names, data)
1033 data, names = _process_date_conversion(
1034 data, self._date_conv, self.parse_dates, self.index_col,
-> 1035 self.index_names, names, keep_date_col=self.keep_date_col)
1036
1037 return names, data
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _process_date_conversion(data_dict, converter, parse_spec, index_col, index_names, columns, keep_date_col)
2100
2101 _, col, old_names = _try_convert_dates(converter, colspec,
-> 2102 data_dict, orig_names)
2103
2104 new_data[new_name] = col
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _try_convert_dates(parser, colspec, data_dict, columns)
2132 to_parse = [data_dict[c] for c in colnames if c in data_dict]
2133
-> 2134 new_col = parser(*to_parse)
2135 return new_name, new_col, colnames
2136
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in converter(*date_cols)
2048 dayfirst=dayfirst)
2049 except Exception:
-> 2050 return generic_parser(date_parser, *date_cols)
2051
2052 return converter
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/date_converters.pyc in generic_parser(parse_func, *cols)
36 for i in range(N):
37 args = [c[i] for c in cols]
---> 38 results[i] = parse_func(*args)
39
40 return results
<ipython-input-2-57e18ddd7deb> in parse(yr, yearday, hrmn)
1 def parse(yr, yearday, hrmn):
2 date_string = ' '.join([yr, yearday, hrmn])
----> 3 return datetime.strptime(date_string,"%Y %j %H%M")
/Volumes/anaconda/anaconda/python.app/Contents/lib/python2.7/_strptime.pyc in _strptime(data_string, format)
326 if len(data_string) != found.end():
327 raise ValueError("unconverted data remains: %s" %
--> 328 data_string[found.end():])
329
330 year = None
ValueError: unconverted data remains: 0
I am looking for suggestions as to how to debug or work around this. I have gone through the data and according to what I have read in similar posts I should be looking for extraneous time data, which is not there.
Thanks.
I'm trying to figure out how to keep rows of my dataframe based on a list I created from another dataframe. I've looked at several other questions that tackled this issue and tried the codes suggested ( R- keep dataframe rows only if the row name is in a list , Select rows in a dataframe in r based on values in one row ) but the codes aren't working for me. I'm sure it's a simple error on my part, but I can't figure out what it is.
One code I tried:
A <- AllSiteBA[AllSiteBA$Cofecha %in% keeps,]
Output (doesn't have any matches......there SHOULD be 76 matches when I run the entire AllSiteBA data.frame):
> A
[1] Cofecha DBHinBark RBHinBark BAtotal MeanBAww PercentBAww
[7] Plot
<0 rows> (or 0-length row.names)
Another code I tried:
A <- subset(AllSiteBA, Cofecha %in% keeps)
Had exact same output as the first code I tried.
Any help on how to fix the code would be greatly appreciated. Below are reproducible sections of my data.frame and list.
List of names to "keep":
keeps <- list(structure(c(69L, 166L, 50L, 232L, 252L, 234L, 148L, 307L,
194L, 240L, 245L, 297L, 248L, 221L, 257L, 218L, 265L, 45L, 208L,
216L, 223L, 258L, 205L, 269L, 270L, 142L, 4L, 58L, 207L, 220L,
231L, 256L, 206L, 268L, 203L, 267L, 111L, 144L, 290L, 74L, 36L,
266L, 169L, 78L, 143L, 186L, 272L, 185L, 312L, 73L, 150L, 129L,
225L, 131L, 123L, 12L, 300L, 302L, 264L, 284L, 128L, 130L, 1L,
3L, 10L, 124L, 303L, 56L, 51L, 55L, 108L, 2L, 11L, 298L, 310L,
121L), .Label = c("LB1A002", "LB1A003", "LB1A101", "LB1A102",
"LB1A103", "LB1A212", "LB1A228", "LB1A231", "LB1A233", "LB1B001",
"LB1B002", "LB1B003", "LB1B210", "LB1B216", "LB2A001", "LB2A002",
"LB2A003", "LB2A004", "LB2A008", "LB2A009", "LB2A011", "LB2B001",
"LB2B005", "LB2B008", "LB2B101", "LB2B102", "LB2B103", "LB2C003",
"LB2C004", "LB2C008", "LB2C009", "LB2C010", "LB2C001", "LB2D005",
"LB2D006", "LB2D007", "LB2D008", "LB2D009", "LB2D010", "LB2D101",
"SM1A005", "SM1A101", "SM1A301", "SM1A302", "SM1B003", "SM1C005",
"SM1C302", "SM1D006", "SM2A004", "SM2A005", "SM2A007", "SM2A210",
"SM2A301", "SM2B001", "SM2B005", "SM2B006", "SM2B101", "SM2C005",
"SM2C101", "SM2C301", "SM2D006", "SM2D101", "SM2D221", "IR1A004",
"IR1A009", "IR1A206", "IR1B001", "IR1B004", "IR1B005", "IR1B301",
"IR1B302", "IR1C005", "IR1C006", "IR1C007", "IR1C008", "IR1C204",
"IR1C205", "IR1D002", "IR1D101", "IR2A003", "IR2A101", "IR2A211",
"IR2A234", "IR2B002", "IR2B005", "IR2B101", "IR2B201", "IR2B210",
"IR2B229", "IR2C230", "IR2C256", "IR2C301", "IR2C302", "IR2C002",
"IR2C009", "IR2C101", "IR2C204", "IR2C215", "IR2D227", "IR2D228",
"IR2D237", "IR2D254", "IR2D301", "IR2D302", "IR2D003", "IR2D006",
"IR2D009", "IR2D011", "IR2D207", "IR2D216", "JA1A101", "JA1A224",
"JA1A301", "JA1B004", "JA1B101", "JA1B102", "JA1B219", "JA1B233",
"JA1C002", "JA1C232", "JA1D001", "JA1D101", "JA2A101", "JA2A102",
"JA2A206", "JA2A209", "JA2A210", "JA2A004", "JA2A005", "JA2A006",
"JA2A007", "JA2A008", "JA2B005", "JA2B206", "JA2C001", "JA2C002",
"JA2C007", "JA2C101", "JA2C202", "JA3N007", "JA3N008", "JA3N009",
"JA3N010", "JA3N011", "JA3N012", "JA3N001", "JA3N002", "JA3N003",
"JA3N004", "JA3N005", "JA3N006", "SF5A007", "SF5B223", "SF5B227",
"SF5B228", "SF5B301", "SF5B302", "SF5C201", "SF5C214", "SF5C216",
"SF5C301", "SF5C303", "SF5D004", "SF5D101", "SF5D207", "AP1A001",
"AP1A004", "AP1A005", "AP1A006", "AP1A008", "AP1A009", "AP1A010",
"AP1A101", "AP1B005", "AP1B007", "AP1B011", "AP1B101", "AP1B102",
"AP1C006", "AP1C007", "AP1C010", "AP1C011", "AP1C001", "AP1C002",
"AP1D001", "AP1D005", "AP1D007", "AP1D008", "AP1D009", "AP1D010",
"AP1D011", "AP1D012", "AP1D013", "AP1D101", "AP1D102", "AP1D103",
"AP1D104", "AP1C004", "AP1C005", "AP2A001", "AP2A002", "AP2A003",
"AP2B001", "AP2B003", "AP2B004", "AP2B101", "AP2B102", "AP2C001",
"AP2C002", "AP2C003", "AP2C004", "AP2C005", "AP2C007", "AP2C008",
"AP2C102", "AP2C103", "AP2C104", "AP2D001", "AP2D002", "AP2D005",
"AP2D006", "AP2D009", "AP2D101", "AP2D102", "AP2D103", "AP3A003",
"AP3A005", "AP3A008", "AP3A014", "AP3A015", "AP3A101", "AP3A102",
"AP3B101", "AP3B102", "AP3B103", "AP3B104", "AP3B003", "AP3B007",
"AP3B010", "AP3B012", "AP3C003", "AP3C004", "AP3C006", "AP3C007",
"AP3C009", "AP3C011", "AP3C101", "AP3C102", "AP3C103", "AP3C104",
"AP3C105", "AP3D006", "AP3D011", "AP3D101", "AP3D102", "BF1A101",
"BF1A102", "BF1A103", "BF1A104", "BF1B003", "BF1B005", "BF1B006",
"BF1B007", "BF1B101", "BF1C007", "BF1C101", "BF1C102", "BF1D003",
"BF1D007", "BF1D010", "BF1D101", "BF1D102", "BF1D103", "BF1D210",
"BF2A001", "BF2A002", "BF2B001", "BF2B214", "BF2B219", "BF2C001",
"BF2C004", "BF2C008", "BF2C101", "BF2C102", "BF2C201", "BF2C205",
"BF2C213", "BF2C219", "BF2C301", "BF2D004", "BF2D013", "BF2D014",
"BF2D015", "BF3A001", "BF3A002", "BF3A004", "BF3A005", "BF3A007",
"BF3A008", "BF3A009", "BF3A101", "BF3B003", "BF3B101", "BF3C002",
"BF3C003", "BF3C007", "BF3C009", "BF3C010", "BF3D002", "BF3D003",
"BF3D004", "BF3D009", "BF3D010"), class = "factor"))
Small portion of data.frame I'm trying to select rows from:
AllSiteBA <- structure(list(Cofecha = structure(30:45, .Label = c("LB1A002",
"LB1A003", "LB1A101", "LB1A102", "LB1A103", "LB1A212", "LB1A228",
"LB1A231", "LB1A233", "LB1B001", "LB1B002", "LB1B003", "LB1B210",
"LB1B216", "LB2A001", "LB2A002", "LB2A003", "LB2A004", "LB2A008",
"LB2A009", "LB2A011", "LB2B001", "LB2B005", "LB2B008", "LB2B101",
"LB2B102", "LB2B103", "LB2C001", "LB2C003", "LB2C004", "LB2C008",
"LB2C009", "LB2C010", "LB2D005", "LB2D006", "LB2D007", "LB2D008",
"LB2D009", "LB2D010", "LB2D101", "SM1A005", "SM1A101", "SM1A301",
"SM1A302", "SM1B003", "SM1C005", "SM1C302", "SM1D006", "SM2A004",
"SM2A005", "SM2A007", "SM2A210", "SM2A301", "SM2B001", "SM2B005",
"SM2B006", "SM2B101", "SM2C005", "SM2C101", "SM2C301", "SM2D006",
"SM2D101", "SM2D221", "IR1A004", "IR1A009", "IR1A206", "IR1B001",
"IR1B004", "IR1B005", "IR1B301", "IR1B302", "IR1C005", "IR1C006",
"IR1C007", "IR1C008", "IR1C204", "IR1C205", "IR1D002", "IR1D101",
"IR2A003", "IR2A101", "IR2A211", "IR2A234", "IR2B002", "IR2B005",
"IR2B101", "IR2B201", "IR2B210", "IR2B229", "IR2C002", "IR2C009",
"IR2C101", "IR2C204", "IR2C215", "IR2C230", "IR2C256", "IR2C301",
"IR2C302", "IR2D003", "IR2D006", "IR2D009", "IR2D011", "IR2D207",
"IR2D216", "IR2D227", "IR2D228", "IR2D237", "IR2D254", "IR2D301",
"IR2D302", "JA1A101", "JA1A224", "JA1A301", "JA1B004", "JA1B101",
"JA1B102", "JA1B219", "JA1B233", "JA1C002", "JA1C232", "JA1D001",
"JA1D101", "JA2A004", "JA2A005", "JA2A006", "JA2A007", "JA2A008",
"JA2A101", "JA2A102", "JA2A206", "JA2A209", "JA2A210", "JA2B005",
"JA2B206", "JA2C001", "JA2C002", "JA2C007", "JA2C101", "JA2C202",
"JA3N001", "JA3N002", "JA3N003", "JA3N004", "JA3N005", "JA3N006",
"JA3N007", "JA3N008", "JA3N009", "JA3N010", "JA3N011", "JA3N012",
"SF5A007", "SF5B223", "SF5B227", "SF5B228", "SF5B301", "SF5B302",
"SF5C201", "SF5C214", "SF5C216", "SF5C301", "SF5C303", "SF5D004",
"SF5D101", "SF5D207", "AP1A001", "AP1A004", "AP1A005", "AP1A006",
"AP1A008", "AP1A009", "AP1A010", "AP1A101", "AP1B005", "AP1B007",
"AP1B011", "AP1B101", "AP1B102", "AP1C001", "AP1C002", "AP1C004",
"AP1C005", "AP1C006", "AP1C007", "AP1C010", "AP1C011", "AP1D001",
"AP1D005", "AP1D007", "AP1D008", "AP1D009", "AP1D010", "AP1D011",
"AP1D012", "AP1D013", "AP1D101", "AP1D102", "AP1D103", "AP1D104",
"AP2A001", "AP2A002", "AP2A003", "AP2B001", "AP2B003", "AP2B004",
"AP2B101", "AP2B102", "AP2C001", "AP2C002", "AP2C003", "AP2C004",
"AP2C005", "AP2C007", "AP2C008", "AP2C102", "AP2C103", "AP2C104",
"AP2D001", "AP2D002", "AP2D005", "AP2D006", "AP2D009", "AP2D101",
"AP2D102", "AP2D103", "AP3A003", "AP3A005", "AP3A008", "AP3A014",
"AP3A015", "AP3A101", "AP3A102", "AP3B003", "AP3B007", "AP3B010",
"AP3B012", "AP3B101", "AP3B102", "AP3B103", "AP3B104", "AP3C003",
"AP3C004", "AP3C006", "AP3C007", "AP3C009", "AP3C011", "AP3C101",
"AP3C102", "AP3C103", "AP3C104", "AP3C105", "AP3D006", "AP3D011",
"AP3D101", "AP3D102", "BF1A101", "BF1A102", "BF1A103", "BF1A104",
"BF1B003", "BF1B005", "BF1B006", "BF1B007", "BF1B101", "BF1C007",
"BF1C101", "BF1C102", "BF1D003", "BF1D007", "BF1D010", "BF1D101",
"BF1D102", "BF1D103", "BF1D210", "BF2A001", "BF2A002", "BF2B001",
"BF2B214", "BF2B219", "BF2C001", "BF2C004", "BF2C008", "BF2C101",
"BF2C102", "BF2C201", "BF2C205", "BF2C213", "BF2C219", "BF2C301",
"BF2D004", "BF2D013", "BF2D014", "BF2D015", "BF3A001", "BF3A002",
"BF3A004", "BF3A005", "BF3A007", "BF3A008", "BF3A009", "BF3A101",
"BF3B003", "BF3B101", "BF3C002", "BF3C003", "BF3C007", "BF3C009",
"BF3C010", "BF3D002", "BF3D003", "BF3D004", "BF3D009", "BF3D010"
), class = "factor"), DBHinBark = c(144, 147.6, 135.9, 144, 163.8,
119.7, 234.9, 180.9, 144.9, 202.5, 152.1, 180, 184.5, 68.4, 88.2,
231.3), RBHinBark = c(72, 73.8, 67.95, 72, 81.9, 59.85, 117.45,
90.45, 72.45, 101.25, 76.05, 90, 92.25, 34.2, 44.1, 115.65),
BAtotal = c(16286.0163162095, 17110.4958922176, 14505.3694541364,
16286.0163162095, 21072.5782991454, 11253.2555709933, 43336.7077139261,
25702.0056715304, 16490.2276926745, 32206.2334378166, 18169.7231252836,
25446.9004940773, 26735.14983159, 3674.53243134477, 6109.80080862797,
42018.5582683328), MeanBAww = c(7287.19846816407, 3511.25221054135,
5836.77552643544, 3226.29613334421, 6580.83174422834, 1240.15336040198,
15513.5106521598, 4648.51222574233, 4555.16310970877, 4909.94773909597,
1791.58819676346, 6095.75422479859, 2391.72518367973, 3568.38437887589,
3398.70860742085, 7723.82631584503), PercentBAww = c(44.7451256751543,
20.5210429473197, 40.2387236318961, 19.8102228973765, 31.2293619262301,
11.0203962984599, 35.7976216249915, 18.0861847326233, 27.6234094192182,
15.2453336357263, 9.86029442721902, 23.9548004135802, 8.9459950617284,
97.1112500854964, 55.62715895126, 18.3819403476917), Plot = c("LB2",
"LB2", "LB2", "LB2", "LB2", "LB2", "LB2", "LB2", "LB2", "LB2",
"LB2", "SM1", "SM1", "SM1", "SM1", "SM1")), .Names = c("Cofecha",
"DBHinBark", "RBHinBark", "BAtotal", "MeanBAww", "PercentBAww",
"Plot"), row.names = 30:45, class = "data.frame")
I get two rows, but only after I subset the first element of keeps, which is a list.
> AllSiteBA[AllSiteBA$Cofecha %in% keeps[[1]],]
Cofecha DBHinBark RBHinBark BAtotal MeanBAww PercentBAww Plot
36 LB2D007 234.9 117.45 43336.71 15513.511 35.79762 LB2
45 SM1B003 231.3 115.65 42018.56 7723.826 18.38194 SM1
So, I had to change the name of the rows to calculate the Anova. Now I'd like to bring back the previous names of the rows. Is there any way to do it ? Data:
head(tbl_reo)
id Sequence variable value pv
75 AAAAGAAAVANQGKK BiotinControl1 3893050.50 0.8523174
192 AAAAGAAAVANQGKK BiotinControl2 900604.61 0.8523174
3770 AAFTKLDQVWGSE BiotinControl3 90008.14 0.8523174
934 AAIKFIKFINPKINDGE BiotinTreatment1 656399.20 0.8523174
1012 AAIKFIKFINPKINDGE BiotinTreatment2 359937.85 0.8523174
1970 AAIKFIKFINPKINDGE BiotinTreatment3 447089.35 0.8523174
Code:
levels(tbl_reo$variable) <- c("Control","Control","Control","Treat","Treat","Treat")
New data:
head(tbl_reo)
id Sequence variable value pv
75 AAAAGAAAVANQGKK Control 893050.50 0.8523174
192 AAAAGAAAVANQGKK Control 900604.61 0.8523174
3770 AAFTKLDQVWGSE Control 90008.14 0.8523174
934 AAIKFIKFINPKINDGE Treat 656399.20 0.8523174
1012 AAIKFIKFINPKINDGE Treat 359937.85 0.8523174
1970 AAIKFIKFINPKINDGE Treat 447089.35 0.8523174
No idea if it's necessary to use dput on such big data, but here you go.
> dput(head(tbl_reo,4))
structure(list(id = c(75L, 192L, 3770L, 934L), Sequence = structure(c(1L,
1L, 3L, 5L), .Label = c("AAAAGAAAVANQGKK", "AAANILFSDHAD", "AAFTKLDQVWGSE",
"AAIELRE", "AAIKFIKFINPKINDGE", "AAIYKLLKSHFRNE", "AAKKFEE",
"AAKYFRE", "AANVKKTLVE", "AARAGELLKE", "AARDSKSPIILQTSNGGAAYFAGKGISNE",
"AAVKARVASIDE", "AAVKASAPGSVILLE", "AEKLKAE", "AEQVKKE", "AFAKRQGKE",
"AFGSGTAAVVSPIKE", "AGIPAGVVNILPGSGRVVGE", "AGISKDGQTRE", "AGSHGKDAWGVFRVHAFE",
"AHSFSSSLASAE", "AIDALNGMLLNGQE", "AITYSWTLLTE", "AKAKKAAKKAANAKQQQE",
"AKALVAQGVKFIAE", "AKAQGVAVQLKRQPAQPRE", "AKIGKKVE", "AKKVVKKAFEE",
"AKNKKYALDIIKE", "ALANAGVTDIVLAVNYRPE", "ALDFFARGLVKSPIKVVGLSTLPE",
"ALFHPSVLGLE", "ALKIWKRFRE", "ALKPDSQKSYAE", "ALLLKVNQIGTLSE",
"ALRIGSE", "ALRLVE", "AMRIGSE", "ANIIKLVE", "ANNKWNKLFSE", "ANVTGLRDAMFKGE",
"AQKVRDQITSLKE", "AQRVQAKNQLE", "ARINFGIEKAKE", "ASAGIQLSPKFVKLVSWYDNE",
"ASFVLPTWAAKE", "ASKLAAYNKKSTISARE", "ATSSIVMFSLNE", "ATTNAPWYKGWEKE",
"ATVGAGLPIISFLRE", "AVDATHLAE", "AVIRLSKE", "AVSKVYARSVYDSRGNPTVE",
"AVVTVPAYFNDSQRQATKD", "AVWYGPPKAANLGGVAVSGLE", "AWFPYQKE", "AYSPFGSANAPLLKE",
"DGWVPGKNLFTVE", "DHSFINYKQNVE", "DILYKIYKGVSE", "DLYQTFARTVE",
"DNVKAHPRIE", "DQVRNYVMQYIQE", "DSVYLAKLAE", "DVHNLFTYAKE", "DVKISAGTLLKGTE",
"DVQKFRHE", "EAVAKAAE", "EKPLFLVFHGGSGSTVQE", "EQRPGTPLFTVKAYLPVNE",
"ERIAALE", "FAKSLPRLDGLSVQE", "FALKHFPDYE", "FAPYGTITSAKVMRTE",
"FASLFPLKQAPAFLGPKGLKLTE", "FEQKNAGGVGE", "FFIDRPE", "FFIRDTKNLE",
"FGINTTVQE", "FGNRPMILHQIE", "FGVTKARWPAKSVKFVQGLLQNAAANAE",
"FIFSKYQILE", "FKMAKPE", "FKSMIKNNE", "FKTKYNSDPYQLE", "FLAADKE",
"FLKKILVDNKLE", "FLLSYGKE", "FMIAPTGAKTFAE", "FMISQLGLQKKNIKIHGF",
"FNRGLALE", "FNSQPDYRE", "FQKDAKRFE", "FQKTGPFE", "FRNGMINVSPIGRNASTEE",
"FRTASAQLE", "FSGSLLRSKFNTSNE", "FSRWFNHIASKADE", "FSTNVKDE",
"FTADGKVKPE", "FTPETPSPLIGILE", "FVADGLLQGFGNE", "FVAYPIQLLVTKE",
"FVAYPIQLVVTKE", "FVDIINSRN", "FVSSKHGSVSAE", "FYKSLKLPYRIVGIVSGE",
"GAAPGGFPGGAPPAPE", "GAIWNINSFDQWGVE", "GARYGPSLMPGGSEE", "GEKTMGAKITKSLE",
"GGLGPINIPLLADTNHSLSRD", "GGTLIGTARSME", "GGVAPNIQTAEE", "GGVIIITHSAE",
"GIFSGTLSYIFNE", "GIKVPIHIVQE", "GILQRWTKGFDIPNIE", "GILQRWTKGFDIPNVE",
"GILVASPFVELE", "GIPAGWQGLDNGPE", "GIPPDQQRLIFAGKQLE", "GISKKTRIAGVVYHPSNNE",
"GKKIATYQE", "GKLKGVLGYTE", "GLDKIFQE", "GLGWGNNASAAIQRVGLGE",
"GLNHLPVQKQIVE", "GMMANGIAAGGQLTTTTE", "GMRWAGNANE", "GNDIHALAAKLLQE",
"GQALKKKKGKSAE", "GRGINSSYAVWNDVTGKAHE", "GRHSGYGKRKGTRE", "GSDVLNTWKKE",
"GSRKVDGQKVKASKE", "GTAVSQADVTVFKAFQSAYPE", "GTERSAYYTWAAE",
"GTRAVTKYSSSTQA", "GVVLGVEKRATSPLLE", "GWGTPHIKPYGNLSLDPSACVFHYAFE",
"GYSGVEVRVTPTKTE", "GYVGRTQPKKGGEGE", "GYWNGKITVDE", "HDHAGVPLKISPPVVAYRE",
"HIIQAKADAQRE", "HIVPGTYIQE", "HKFAIPAINVTSSSTAVAALE", "HKVVNGNSADAAYAQVE",
"HLQPHFTTNDGLE", "HNLRVISE", "HQIRAKFVE", "HQKYTREQVGCKE", "IAKLNIPTGIPLVFE",
"IAQHIFTKLNSNGYLEE", "IDASKPVKLTLLISE", "IDPINALQAAME", "IDVAWLKANAKE",
"IEAISSPLMGIFE", "IFDANVQRLKE", "IFFKIKKTTPLRRLME", "IFGGYLYFAQAPSAAE",
"IFGGYLYFAQAPSAAEFHTE", "IFGPVVTVAKFKTLEE", "IFGPVVTVTKFKSADE",
"IFLHSLPVKE", "IFSTYADNQPGVLIQVFE", "IGATTSVFPFNKSMIEYLE", "IGNSGMFRPEMLE",
"IGSLTRQGAKGGLLE", "IHPLLPQDE", "IIAGNVATRE", "IIGGTTRIPTLKQSISE",
"IIKKNFDLRPGVLVKE", "IISNAKGQVVGSIIALDRQE", "IKGFFSFATQKLDE",
"IKHLPFNVVNKDGKPAVE", "IKKVVKAAAE", "IKLFNKWSFEE", "IKLVKLIQE",
"ILIVTKDGVRKE", "ILKIVPGRVSTE", "ILKKLDE", "ILLARNLI", "ILNKLALRILKNE",
"ILNSQAPEE", "ILSKLASAGATE", "ILWRQKE", "IMLPVFDAPQNLVE", "IMLQLGIPREE",
"IMLWNLAAKKAMYTLSAQDE", "INLPFITADASGPKHINMKFSRAQFE", "INLWFKKEE",
"IQLNGGSISE", "IQPRANIQLDFPE", "IRVFQGE", "ISAMVLTKMKE", "ISDTMWPGQAMTLKVE",
"ISDVFAKWNNGVLD", "ISLAAFGRKE", "ISTMIRWGLKPYLFVLNNDGYTIE", "ITDIKQFLE",
"ITHPTIVDGWFRE", "ITLGKYLFE", "ITQLMSLIINTVYSNKE", "ITVDAPKAHE",
"ITYLNNRAAAEYE", "IVERLNTASIPE", "IVFASPPRKSDGKVSPE", "IVFNMSREE",
"IVNLTLPDGTVRQGQVLE", "IVPKLME", "IVRVNKNAPAVE", "IVSRFKTE",
"IYEKME", "IYFSLNNGVRIPALGLGTANPHEKLAE", "IYNHIQLRE", "IYNHIQLREE",
"IYPYGSRGPKGLKE", "KAAKVTKAAQKAAKK", "KAALQTYLPKIKE", "KAEQVFQNVKNILAE",
"KAITYSHAAKE", "KAKIGQKE", "KAKKANNPQHSITKDE", "KALTYTPPKKQKAEKPKAE",
"KAQRAPLEE", "KAVAYWKTLKTDE", "KDFATFAKE", "KDYASVVPPRHNE", "KEKLTQQKKE",
"KFAAGTKALLDE", "KFINSEIE", "KFNAVPIFLSDE", "KFSERLLNE", "KGAQTYVKKVIGSFKDWE",
"KGCDIIVKNHE", "KGLLSATQFTQPALTLME", "KGLSKKGGNIPMIPGWVMDFPTGKE",
"KGQIVGRYVVDTSK", "KGTGKSNKITITNDKGRLSKE", "KGVFRSIVPSGASTGVHE",
"KGVFRSIVPSGASTGVHEALE", "KGVITTQAE", "KGWVPVTKLGRLVKAGKITTIEE",
"KIFTVPTETLQAVTKHFISE", "KIGDLNFLIGPKLYE", "KKAADALLLKVNQIGTLSE",
"KKAGKTLDVVE", "KKAPAAAPAASKKKEE", "KKIDLASIRE", "KKPKTKKVKEE",
"KKQAIRSKQE", "KKTISLKTPLKTLVILHSDE", "KKVYPDVLYTSKLSRAIQTANIALE",
"KKYIATIGVE", "KLAAQRKAE", "KLIHGPKAQYNE", "KLIHNDSSYNHE", "KLKAAYSYMFDSLRE",
"KLKAERE", "KLKKVLSANTNAPFSVE", "KLSGIANNRDE", "KLVAVLK", "KLYVGRAQKKNE",
"KMIKPKEGRIE", "KMSSGYYLGE", "KMSTRAAPFEQRLPE", "KNLFTGWVDVKLSAKGQQE",
"KNLKPLAIIKGWGE", "KNMDAATALVGSGPAFVLLMLE", "KNVPLYQHLAD", "KQLYSFDLE",
"KQNLLAVE", "KRVSGMFLGE", "KSPFLDALKAKNFE", "KTFVNVAKPFHKE",
"KTKLQGMLNKAEE", "KTMGAKITKSLE", "KTQAFKKHVDE", "KTVRKHLERQGWIE",
"KVFNAYPAARE", "KVGLRLSPYGVFNSMSGGAE", "KVILKMKE", "KVIRFQEE",
"KVSVLKALGAE", "KYFPGLLRATNE", "KYSLAPVAKE", "LAKKQKKLE", "LDALIAQGRE",
"LDASNKAAKE", "LDLARPIYLPTASYGHFTNQE", "LDTAQKHID", "LDTIRNME",
"LEARIPE", "LFAKFGPIVSASLE", "LFAKHGE", "LFDTHRVIVNE", "LFLKQLVVGGLDRVYE",
"LFRSIGGE", "LGAVALKGALAKVPE", "LGAVNLHE", "LGDKAVYAGE", "LGDNAVFAGE",
"LGGEKKQKGQALQF", "LGGKSPNIVFADAE", "LGGTVVSLSDSKGCIISE", "LGILPRDILTKE",
"LGIYSHE", "LGKGSFKYAWVLD", "LGKVLAKVIGKE", "LGMLAGADRVE", "LGTPFGVTIDFE",
"LHGNAKKAAEE", "LICSYRSKIETE", "LIGGSADLTPSNLTRWKE", "LIHQNEVPLVLLSSGVGVTPLLAMLE",
"LIHSKVGGE", "LIKLTGKLIE", "LILNGGFSPLTGFLNE", "LINNLGTIAKSGTKAFME",
"LKADLRPLQIKSIRE", "LKAHNINVVDK", "LKAQSKPHVGDE", "LKHLVGPEKAAE",
"LKIQRPLHE", "LKKKGILFVGSGVSGGEE", "LKKQKTAE", "LKKQQKEAE", "LKKSANVGKKKE",
"LKPTVILKLLKE", "LKRAIDLNKE", "LKRIMINCFNE", "LKRIPE", "LKTNGKFQVDKSIIE",
"LKYFGKALE", "LLAAFKAYLE", "LLAVPVVKGRKTEKE", "LLAYQFASPVRWIE",
"LLESPNNKVVPATNQIE", "LLKTNRISNE", "LLKTNRISNEQYE", "LLLLDVAPLSLGIE",
"LLLQQRATE", "LLRLVLLE", "LMAKLANDSPLAIE", "LNAATGSKTSLYDKKE",
"LNAAYAADGYARIKGMSCIITTFGVGE", "LNKGLTKKGGNIPMIPGWVMEFPTGKE",
"LNNAAAKKYDLE", "LNQLLKLVSE", "LPAWSKLQKIYE", "LPGVAFLSEKK",
"LPKNSTSSKDKKKKN", "LQAQLDYLNAE", "LQDIANPIMSKLYQAGGAPGGAAGGAPGGFPGGAPPAPE",
"LQGKLTGMAFRVPTVD", "LQGKLTGMAFRVPTVDVSVVD", "LRPLQFKSIRE", "LRQATGGQAFPQMVFD",
"LSALNGIAGSYAE", "LSFALIHKE", "LSFPFEWAFAGTNE", "LSGIPPAPRGVPQIE",
"LSKISGVDSSLIFPALE", "LSKSKTSPYVLPVPFLNVLNGGSHAGGALALQE", "LSLLFNVIPDRYTKYD",
"LSNVINMFFE", "LSQGTWLNKPKSVFQE", "LSRIGEFE", "LTQFPAFVTPMGKGSIDE",
"LTQLKKE", "LTSLTDYVTRMPE", "LTVKLNKE", "LVARVQGGEE", "LVFGQTFTDHMLTIPWSAKE",
"LVGRINGQFGTVE", "LVILGDSNVVRE", "LVKVGHDNLVGE", "LVRDNKLIGNFTLAGIPPAPKGVPQIE",
"LVRFSYGQDLE", "LVSLPNIILTPHIGGSTEE", "LVTAPLDGTILE", "LYGNIVMSGGTTMFPGIAE",
"LYGTAIGARSQGAKTYLE", "LYPGRPLAIALD", "LYPGRPLAIALDTKGPE", "LYSTSSSSTSSSATKN",
"MATLYDGLE", "MAVAIKKE", "MAVKFGKNAF", "MFFVSKVMNNE", "MFNDFLNAGAE",
"MGQKKAPDGRIVE", "MIGSVVGIYNGKAFNQVE", "MIGSVVGIYNGKAFNQVEIRPE",
"MIISSPSDGQVKE", "MIVLHLPSPVTAQAYRAE", "MKPTSIEKE", "MLAQDKLVAE",
"MLGHYLGE", "MLGNWSFGDYFKKE", "MLNRISPKILDE", "MLRAAGKE", "MNNIIAASRRH",
"MPPQIPNE", "MQTAAQSSSIE", "MQTEIKE", "NAHLKKNFKPQGSIE", "NALKLAKWLE",
"NAVDRLLVE", "NFAPFLKTILPE", "NFIQSIME", "NGFINNPIVISPTTTVGE",
"NGHSPSEAFNE", "NGITFPKDAPSPFIFKTLE", "NGIVTYAAKE", "NGLLSFVQYVIAPIQE",
"NGLTAYRLGKE", "NGSFLLYIDRHLVHE", "NGTVTAANASPINDGAAAVILVSE",
"NGVAKGSFKD", "NISNNYMIGAINAE", "NKERILTE", "NKIIRMPLVE", "NKIVRKPLME",
"NKKVIITGAPAAFSPTCTVSHIPGYINYLDE", "NKVQYIAGARPWTHVQKVDIALPCATQNE",
"NKVSPADAAKKAL", "NLHILE", "NLKDTSSGGVTHANE", "NLKKGSTVTSNGLNWE",
"NLKPSKPSYYLDPE", "NLLPLQSGIGNIANAVIE", "NLTVNPNVNE", "NMKLKLIEPLRE",
"NNIFNMVVE", "NNIFNMVVEIPRWTNAKLE", "NNMLHGNTMTVTGDTLAE", "NNYKKRLLGTAFKSSPFGGSSHAKGIVLE",
"NQGLKTATYPYWANE", "NQQGVNNFDE", "NRGIKVANPVWSTIPFISE", "NRKISAVSTYFE",
"NRNVGKTLVNKSTGLKNASDALKGRVVE", "NRNVGKTLVNKSTGLKSASDALKGRVVE",
"NRVKSDMLKE", "NSLLSSGTSQDSLRE", "NTQLMNRDNIE", "NTSLSPKFVYQGE",
"NVKTDSRNMPVKE", "NVQLYNGFVFHTGSLEE", "NVQNGEVRFE", "NVRDKGNSALLE",
"NVSSNIVKNAE", "NYNLLDTSGVAKVIE", "NYTVERVNE", "PKLVLVRHGQSE",
"PKLVLVRHGQSEWNE", "QAKLTAATNAKQ", "QAVGGIYSVLNKKRGQVVSE", "QDKKKKSNH",
"QDPSKSKVVVFE", "QDVPLIVPVVNPE", "QGAQVLFASTDSE", "QGKPYISLPKGKGIKLSIAEE",
"QGLKIDGPGFE", "QHPRYGGVYVGTLSKPE", "QHPRYGGVYVGTLSKPEVKE", "QISSMVLGKMKE",
"QIVYALKLPGIIHIDAAE", "QLADYLIGVQY", "QLASQLVDLKKE", "QLGIHYE",
"QLYKNDSNNNNNNNGNNAE", "QQGKNATVSVEE", "QRPGTPLFTVKAYLPVNE",
"QRTKVEHVRE", "QSPRPGQQAFE", "QSPYVSWVSYPGLASHSHHE", "QSTKFGTEIITE",
"QTYKKPTGGIVTVRSE", "QVADISSAKVNFKSLE", "QVAKWVNLAQKE", "RAVPHGKWQDE",
"RDLIKKKTKNNE", "RFLNVFPKLVEE", "RGEIKKGE", "RGLPVKLTYTDNKTSKE",
"RHIQVGDGLE", "RLAIVGVE", "RLLWYLARE", "RLNTASIPE", "RLNTASIPENVE",
"RLTSLNVVAGSDLRRTSIIGTIGPKTNNPE", "RLVAIAEQD", "RMHVLKKQYE",
"RNAAYQKE", "RNAAYQKEYE", "RNKIHQYLFQE", "RQLNAVLE", "RQLNAVLEE",
"RTSLFLNLANDPTIE", "RTVLFPIKYHE", "RVKANAAKRAE", "RYKLVPGIFADVKNLKE",
"RYKYVDPNVLPE", "RYVFLLDPMLATGGSAIMATE", "SALSYAALILADSE", "SCKMPQTVE",
"SDKSKWLTGVE", "SEYWPRLTKE", "SFAANWGVMVSHRSGETE", "SFQAVYNKLTGKQIVFE",
"SGSKLKFYVLHGDGVE", "SGTLGTKGNTQVIIPRLTE", "SGTVFDSSYSRGSPIAFE",
"SHIGVAKKLE", "SHLIPSATTGE", "SIAVLSAMKME", "SIAYSLKNTISE", "SIQKTKE",
"SIRVGEYNAE", "SISDSLSSKPHNFE", "SKFAKAYAQGVSKKE", "SKFKGDITWLPVRRKAYWE",
"SKLPTYTAKDSAVATRKLSE", "SLDQAWSLLRIYPKE", "SLGSPSGATKARIVVAKSGEL",
"SLIDAKTGLPKE", "SLKTGNAGPRPACGVIGLTN", "SLQNKGQEVKE", "SLRGLGGILLNPITGRRFVNE",
"SLSSKLSVQD", "SNLKKITLE", "SNPTAKLNLIWPATPIHIKKYE", "SQGKTLSVKQE",
"SQLTNLFNEE", "SQRIASKNQLE", "SRFQEIVKE", "SRKLFAATVAKAKTIVWNGPPGVFE",
"SSANRKIVKE", "STKNGTETKRSLE", "STPDTPSPLIAVNE", "STVAGFLVGSE",
"STVLQFKE", "SVKLTKAVAE", "SVLAHL", "SYQKSQKSQKE", "SYVDKHPVVTFNQE",
"TAGLTVGDPVLRTGKPLSVE", "TAIGPSGQLKGYLRPE", "TAKGNYPINAVTTMAE",
"TAQGQFLNFNKLLE", "TARSTATGPSEAVWYGPPKAANLGGVAVSGLE", "TDPFKLSGAQVVD",
"TEPLGTAGPLKLAE", "TFKLFNKYRPE", "TFRTTNTL", "TGGPISVPVGRE",
"TGIVAQYAYVLGELE", "TGVIKPGMVVTFAPAGVTTE", "THINLKVSDGSSE", "TIGNPKYNVPDFE",
"TIQDKSSAGAVVVANAKSLE", "TKAGVVKGKTLLE", "TKKSCCSGK", "TKNFTPEQISSMVLGKMKE",
"TKTFSPQE", "TKYLSGIAKRLNKE", "TLKKFGEE", "TLKQLNASLADKSYIE",
"TLKVVDPE", "TLRKVVKHFIDE", "TMKAVVIE", "TMWDTKKE", "TNPGTDVTVSSVE",
"TQALLLAPIAPHFAE", "TQKGVIFYE", "TRLSLTHMVE", "TSGSTVNDPLANYE",
"TSNFIKKVGYNPKTVPFVPISGWNGDNMIE", "TSSKKPKHPLE", "TTAMITDIATAPAE",
"TTHIKFASDPGCAFTKSIGFE", "TTTGVHHLYRMVKE", "TVAASAVAAVFE", "TVAKSKNAE",
"TVFAGQKP", "TVLRQALGE", "TVQVNLPVSLE", "TVSKMAYLDKTGE", "VAAIMQDPVMQSILQQAQQNPAALQE",
"VAALAAENK", "VAKVVGANPAAIKQAIAANA", "VANPIMSKLYQAGGAPE", "VARVILTQVGSGPQETNE",
"VATRAKQGELLE", "VDASGFRIPQQE", "VDSVLKHMKE", "VEGMRWAGNANE",
"VFIDFTKE", "VFKFFGFTPE", "VFKFFGFTPEGVAE", "VFLNEHPE", "VFVATKE",
"VGAKGVQLLSSYITEE", "VGASMTRSIQTLE", "VGKSVAVDSSE", "VGLKRVVTKAMSSR",
"VGTMNVFFVFLNKVTGKKE", "VGVHVLPKILDE", "VHFVSNIDGTHIAE", "VIAGNVVTRE",
"VIDTILALVKD", "VKILNGFAFVE", "VKVVLFNHSQRD", "VLANYLTSALSE",
"VLAVQKKLIAKSNLAGKPVICATQMLE", "VLGPFAFRIGSVKE", "VLIKRGVKPE",
"VLLLDVTPLSLGIE", "VLLVAVGRRPYIAGLGAE", "VLNPQIIKDVLE", "VLPRAVGSLTFDE",
"VLVLRGTLE", "VLVVRGSKKGQE", "VLYTSKLSRAIQTANIALE", "VMSIGRTFEE",
"VMTKNPVTGIKGITLKE", "VQNSTLAQLTSKLIPE", "VQSAVLGFPRIGPNRE",
"VRVTPTKTE", "VSAAYKNSLE", "VSDVAQKAE", "VSGLQYIDLLLIHSPLE",
"VSNPNIIFFRGTYAAVSPE", "VSNPPAYGAKIVAKLLETPE", "VSNRSTPSVVGFGPKNRYLGE",
"VSSMPTLIFYKGGKE", "VSVVDLTVKLNKE", "VTAALRVTD", "VTPLVQNVTGDKE",
"VTRVVGANPAAIKQAIASNV", "VTSSFFYRGNYTDFE", "VVALNDPFITND", "VVKILRE",
"VVLPVDFIIAD", "VVLTHPADE", "VVNIGIGGSDLGPVMVTE", "VVTSASAPPENASE",
"VVVITKNVSGQDVAAALE", "VYHNLKSLTKKRYGASAGNVGDE", "VYKIGGIGTVPVGRVE",
"WAGLGKIDRE", "WFDIKAPSTFE", "WFRIYKIPDGKPE", "WGKFLTCVEVE",
"WLFKLREE", "WMWNQLGNFLQE", "WSKLISE", "WSLTAPLGKALHDLS", "WTKIFKAIHE",
"YAQLLAKRLSE", "YAQLLAKRLSERKAE", "YFATHITE", "YFATHITERE", "YFQLLNSALTE",
"YGYSARVVDLIE", "YGYSTRVVD", "YGYSTRVVDLVE", "YIALVDQLIAKYSS",
"YIASKKGSISAE", "YKDIPVPKPKANE", "YKPLFTE", "YLLKAIKE", "YLNLPEHIVPGTYIQE",
"YNKIGDILSGRLKLRAE", "YQSKIIVFPRDGKAPE", "YQTQVLKNAKALE", "YQYLRSVVNE",
"YRKVLPIVSVPE", "YRLSVLPDNVPIMSVE", "YSLLAWTNIPRKE", "YVAKVSARE",
"YVNKGQDFHIAGE", "YVPSVIEPSFGIGRIIYSVFE", "YVQAAKNSIAAGADGVE"
), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L), .Label = c("Control",
"Treat"), class = "factor"), value = c(3893050.501, 900604.6087,
90008.14298, 656399.2021), pv = c(0.852317422616546, 0.852317422616546,
0.852317422616546, 0.852317422616546)), .Names = c("id", "Sequence",
"variable", "value", "pv"), row.names = c(NA, 4L), class = "data.frame")
Just save the original variable before changing it.
var.save <- tbl_reo$variable
levels(tbl_reo$variable) <- c("Control","Control","Control","Treat","Treat","Treat")
# do whatever you need to do with changed variables
# ...
# ...
tbl_reo$variable <- var.save # change back