Match data from two different data frames in R like Excel VLOOKUP - r

I am trying to pull data from one data frame to another based on the equivalent of a VLOOKUP table in Excel. I have had a look at the most popular VLOOKUP question in R, but I cannot see how it applies to my specific problem. The key thing is that I don't want to pull all of the columns from the second data frame into my first one - I only want to pull in one column. I'm pretty sure this will be some kind of derivation of a merge function.
Referring to the below data, I am trying to create a new column called df1$Trait1Percentile. This needs to draw from LookupTable$Trait1Percentiles based on a match between df1$Trait1Scores and
LookupTable$Scores.
#Import data.
df1 <- structure(list(JobNumber = c(634L, 21L, 300L, 797L, 1112L, 147L,
1L, 4L, 260L, 194L, 981L, 1110L, 634L, 554L, 213L, 722L, 1036L,
855L, 624L, 1113L, 681L, 547L, 195L, 624L, 546L, 201L, 918L,
1069L, 300L, 294L, 587L, 933L, 918L, 620L, 918L, 298L, 749L,
295L, 635L, 515L, 624L, 147L, 200L, 527L, 800L, 827L, 4L, 568L,
252L, 655L, 559L, 629L, 639L, 933L, 214L, 750L, 1066L, 495L,
1113L, 1L, 1113L, 12L, 561L, 741L, 495L, 981L, 147L, 199L, 629L,
163L, 615L, 294L, 49L, 624L, 260L, 1L, 299L, 193L, 108L, 113L,
426L, 299L, 708L, 749L, 749L, 483L, 935L, 1036L, 295L, 12L, 1113L,
1038L, 4L, 973L, 448L, 295L, 197L, 76L, 1L, 1L), Trait1Score = c(3.89,
4.39, 4.22, 4.21, 3.94, 3.9, 4.58, 4.5, 4.29, 4.47, 4.41, 4.4,
4.14, 4.78, 4.09, 4.58, 4.27, 4.24, 3.96, 3.94, 4.3, 4.07, 4.28,
4.19, 4.57, 4.74, 3.29, 4.23, 3.51, 3.77, 4.46, 5.04, 4.25, 3.92,
3.78, 4.43, 4.12, 4.18, 4.63, 3.25, 3.87, 4.4, 3.83, 4.03, 3.42,
4.9, 4.09, 4.58, 4.29, 4.7, 4.38, 4.61, 4.41, 4.5, 4.6, 4.22,
3.72, 4.34, 4.34, 4.38, 4.15, 4.22, 3.93, 5, 3.81, 4.3, 4.6,
4.96, 4.29, 4.8, 5.05, 3.76, 4.81, 4.77, 4.25, 4.17, 4.75, 4.15,
4.35, 4.23, 5.31, 4.18, 3.67, 3.84, 4.06, 3.66, 3.58, 4.37, 4.43,
4.63, 4.74, 4.79, 5.04, 3.55, 3.64, 4.9, 4.38, 4.01, 4.47, 4.53
), Trait2Score = c(4, 2.94, 3.17, 3.83, 4.22, 3.83, 5.11, 3,
2.83, 2.78, 2.22, 2.22, 4.11, 2.39, 2.22, 2.06, 2.89, 3.61, 3.89,
4.89, 3.78, 4.22, 4.5, 4.39, 1.89, 4.78, 4.56, 3.78, 2.28, 4.61,
2.72, 1.89, 4.44, 4.06, 3.72, 2.44, 3.61, 2.06, 2.17, 6.44, 3.22,
2.78, 4.61, 2.72, 2.83, 2.44, 6.5, 2.28, 2.89, 2.11, 4.44, 2.83,
3, 6.33, 3.11, 3.17, 3.67, 4.5, 2.5, 4.33, 5, 2.89, 3.89, 1.72,
3.33, 4.28, 2.17, 3.17, 2.61, 2.89, 1.22, 3.39, 1.28, 2.61, 2.5,
4.56, 2.89, 4.89, 3.11, 3.5, 1.44, 2.39, 5.33, 3.78, 1.5, 3.44,
5.83, 3.17, 3.78, 2.67, 1.61, 1.83, 4.56, 4.67, 4.61, 2.5, 4.94,
3.94, 4.33, 2.72)), row.names = c(NA, -100L), class = "data.frame")
LookupTable <- structure(list(Scores = c(0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06,
0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17,
0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28,
0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39,
0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5,
0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72,
0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83,
0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94,
0.95, 0.96, 0.97, 0.98, 0.99, 1, 1.01, 1.02, 1.03, 1.04, 1.05,
1.06, 1.07, 1.08, 1.09, 1.1, 1.11, 1.12, 1.13, 1.14, 1.15, 1.16,
1.17, 1.18, 1.19, 1.2, 1.21, 1.22, 1.23, 1.24, 1.25, 1.26, 1.27,
1.28, 1.29, 1.3, 1.31, 1.32, 1.33, 1.34, 1.35, 1.36, 1.37, 1.38,
1.39, 1.4, 1.41, 1.42, 1.43, 1.44, 1.45, 1.46, 1.47, 1.48, 1.49,
1.5, 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59, 1.6,
1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69, 1.7, 1.71,
1.72, 1.73, 1.74, 1.75, 1.76, 1.77, 1.78, 1.79, 1.8, 1.81, 1.82,
1.83, 1.84, 1.85, 1.86, 1.87, 1.88, 1.89, 1.9, 1.91, 1.92, 1.93,
1.94, 1.95, 1.96, 1.97, 1.98, 1.99, 2, 2.01, 2.02, 2.03, 2.04,
2.05, 2.06, 2.07, 2.08, 2.09, 2.1, 2.11, 2.12, 2.13, 2.14, 2.15,
2.16, 2.17, 2.18, 2.19, 2.2, 2.21, 2.22, 2.23, 2.24, 2.25, 2.26,
2.27, 2.28, 2.29, 2.3, 2.31, 2.32, 2.33, 2.34, 2.35, 2.36, 2.37,
2.38, 2.39, 2.4, 2.41, 2.42, 2.43, 2.44, 2.45, 2.46, 2.47, 2.48,
2.49, 2.5, 2.51, 2.52, 2.53, 2.54, 2.55, 2.56, 2.57, 2.58, 2.59,
2.6, 2.61, 2.62, 2.63, 2.64, 2.65, 2.66, 2.67, 2.68, 2.69, 2.7,
2.71, 2.72, 2.73, 2.74, 2.75, 2.76, 2.77, 2.78, 2.79, 2.8, 2.81,
2.82, 2.83, 2.84, 2.85, 2.86, 2.87, 2.88, 2.89, 2.9, 2.91, 2.92,
2.93, 2.94, 2.95, 2.96, 2.97, 2.98, 2.99, 3, 3.01, 3.02, 3.03,
3.04, 3.05, 3.06, 3.07, 3.08, 3.09, 3.1, 3.11, 3.12, 3.13, 3.14,
3.15, 3.16, 3.17, 3.18, 3.19, 3.2, 3.21, 3.22, 3.23, 3.24, 3.25,
3.26, 3.27, 3.28, 3.29, 3.3, 3.31, 3.32, 3.33, 3.34, 3.35, 3.36,
3.37, 3.38, 3.39, 3.4, 3.41, 3.42, 3.43, 3.44, 3.45, 3.46, 3.47,
3.48, 3.49, 3.5, 3.51, 3.52, 3.53, 3.54, 3.55, 3.56, 3.57, 3.58,
3.59, 3.6, 3.61, 3.62, 3.63, 3.64, 3.65, 3.66, 3.67, 3.68, 3.69,
3.7, 3.71, 3.72, 3.73, 3.74, 3.75, 3.76, 3.77, 3.78, 3.79, 3.8,
3.81, 3.82, 3.83, 3.84, 3.85, 3.86, 3.87, 3.88, 3.89, 3.9, 3.91,
3.92, 3.93, 3.94, 3.95, 3.96, 3.97, 3.98, 3.99, 4, 4.01, 4.02,
4.03, 4.04, 4.05, 4.06, 4.07, 4.08, 4.09, 4.1, 4.11, 4.12, 4.13,
4.14, 4.15, 4.16, 4.17, 4.18, 4.19, 4.2, 4.21, 4.22, 4.23, 4.24,
4.25, 4.26, 4.27, 4.28, 4.29, 4.3, 4.31, 4.32, 4.33, 4.34, 4.35,
4.36, 4.37, 4.38, 4.39, 4.4, 4.41, 4.42, 4.43, 4.44, 4.45, 4.46,
4.47, 4.48, 4.49, 4.5, 4.51, 4.52, 4.53, 4.54, 4.55, 4.56, 4.57,
4.58, 4.59, 4.6, 4.61, 4.62, 4.63, 4.64, 4.65, 4.66, 4.67, 4.68,
4.69, 4.7, 4.71, 4.72, 4.73, 4.74, 4.75, 4.76, 4.77, 4.78, 4.79,
4.8, 4.81, 4.82, 4.83, 4.84, 4.85, 4.86, 4.87, 4.88, 4.89, 4.9,
4.91, 4.92, 4.93, 4.94, 4.95, 4.96, 4.97, 4.98, 4.99, 5, 5.01,
5.02, 5.03, 5.04, 5.05, 5.06, 5.07, 5.08, 5.09, 5.1, 5.11, 5.12,
5.13, 5.14, 5.15, 5.16, 5.17, 5.18, 5.19, 5.2, 5.21, 5.22, 5.23,
5.24, 5.25, 5.26, 5.27, 5.28, 5.29, 5.3, 5.31, 5.32, 5.33, 5.34,
5.35, 5.36, 5.37, 5.38, 5.39, 5.4, 5.41, 5.42, 5.43, 5.44, 5.45,
5.46, 5.47, 5.48, 5.49, 5.5, 5.51, 5.52, 5.53, 5.54, 5.55, 5.56,
5.57, 5.58, 5.59, 5.6, 5.61, 5.62, 5.63, 5.64, 5.65, 5.66, 5.67,
5.68, 5.69, 5.7, 5.71, 5.72, 5.73, 5.74, 5.75, 5.76, 5.77, 5.78,
5.79, 5.8, 5.81, 5.82, 5.83, 5.84, 5.85, 5.86, 5.87, 5.88, 5.89,
5.9, 5.91, 5.92, 5.93, 5.94, 5.95, 5.96, 5.97, 5.98, 5.99, 6,
6.01, 6.02, 6.03, 6.04, 6.05, 6.06, 6.07, 6.08, 6.09, 6.1, 6.11,
6.12, 6.13, 6.14, 6.15, 6.16, 6.17, 6.18, 6.19, 6.2, 6.21, 6.22,
6.23, 6.24, 6.25, 6.26, 6.27, 6.28, 6.29, 6.3, 6.31, 6.32, 6.33,
6.34, 6.35, 6.36, 6.37, 6.38, 6.39, 6.4, 6.41, 6.42, 6.43, 6.44,
6.45, 6.46, 6.47, 6.48, 6.49, 6.5, 6.51, 6.52, 6.53, 6.54, 6.55,
6.56, 6.57, 6.58, 6.59, 6.6, 6.61, 6.62, 6.63, 6.64, 6.65, 6.66,
6.67, 6.68, 6.69, 6.7, 6.71, 6.72, 6.73, 6.74, 6.75, 6.76, 6.77,
6.78, 6.79, 6.8, 6.81, 6.82, 6.83, 6.84, 6.85, 6.86, 6.87, 6.88,
6.89, 6.9, 6.91, 6.92, 6.93, 6.94, 6.95, 6.96, 6.97, 6.98, 6.99,
7), Trait1Percentiles = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03,
0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.06, 0.06, 0.06, 0.06,
0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06,
0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06, 0.06,
0.06, 0.06, 0.09, 0.09, 0.09, 0.09, 0.09, 0.09, 0.13, 0.13, 0.13,
0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.16, 0.19, 0.19, 0.19, 0.19,
0.25, 0.28, 0.35, 0.38, 0.41, 0.5, 0.5, 0.5, 0.5, 0.54, 0.57,
0.57, 0.6, 0.6, 0.66, 0.66, 0.69, 0.69, 0.85, 0.85, 0.91, 1.01,
1.04, 1.07, 1.1, 1.13, 1.2, 1.23, 1.32, 1.42, 1.48, 1.48, 1.67,
1.73, 1.89, 1.98, 2.14, 2.14, 2.14, 2.33, 2.33, 2.52, 2.52, 2.77,
2.77, 3.12, 3.34, 3.46, 3.75, 3.97, 4.16, 4.57, 4.82, 5.1, 5.26,
5.45, 5.61, 5.73, 6.14, 6.36, 6.65, 7.09, 7.43, 7.43, 8.31, 8.31,
9.01, 9.01, 9.51, 9.51, 10.65, 11.15, 11.69, 12.03, 12.6, 13.39,
14.08, 14.61, 14.96, 15.59, 16.5, 17.23, 18.02, 18.8, 19.78,
20.79, 21.57, 22.33, 22.93, 22.93, 25.01, 25.92, 26.9, 26.9,
28.79, 29.83, 31.28, 32.35, 33.45, 34.43, 35.43, 36.91, 37.95,
39.31, 40.88, 42.05, 43.15, 44.22, 45.61, 46.87, 48.22, 49.23,
50.77, 52.03, 52.03, 54.46, 55.81, 56.94, 56.94, 59.37, 60.66,
61.95, 61.95, 64.28, 65.48, 66.96, 68, 68.79, 69.73, 70.55, 71.43,
72.44, 73.48, 74.3, 74.99, 75.81, 76.76, 77.73, 78.46, 78.46,
80.16, 80.79, 81.64, 81.64, 83.28, 83.97, 84.63, 84.63, 85.76,
86.27, 86.99, 87.46, 87.91, 88.35, 88.69, 88.91, 89.32, 89.73,
90.11, 90.43, 90.8, 91.24, 91.5, 91.69, 91.69, 92.44, 92.72,
93.2, 93.2, 93.76, 93.92, 94.17, 94.17, 94.52, 94.83, 95.21,
95.46, 95.72, 95.87, 95.94, 96.19, 96.35, 96.6, 96.85, 96.98,
97.1, 97.1, 97.2, 97.32, 97.32, 97.54, 97.64, 97.76, 97.76, 97.92,
97.98, 98.05, 98.05, 98.24, 98.3, 98.36, 98.39, 98.55, 98.58,
98.68, 98.77, 98.83, 98.87, 98.87, 98.99, 99.06, 99.06, 99.06,
99.06, 99.06, 99.06, 99.15, 99.15, 99.15, 99.28, 99.28, 99.31,
99.31, 99.31, 99.5, 99.53, 99.59, 99.62, 99.62, 99.65, 99.65,
99.65, 99.65, 99.69, 99.69, 99.72, 99.78, 99.78, 99.81, 99.81,
99.81, 99.81, 99.81, 99.81, 99.81, 99.81, 99.81, 99.81, 99.81,
99.81, 99.81, 99.81, 99.81, 99.81, 99.81, 99.81, 99.81, 99.87,
99.87, 99.91, 99.91, 99.94, 99.94, 99.97, 99.97, 99.97, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100), Trait2Percentiles = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03, 0.03, 0.03,
0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.06, 0.06, 0.06, 0.09, 0.09,
0.09, 0.09, 0.13, 0.13, 0.13, 0.13, 0.19, 0.19, 0.19, 0.22, 0.25,
0.28, 0.31, 0.31, 0.31, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41, 0.57,
0.6, 0.63, 0.69, 0.76, 0.82, 0.82, 0.91, 0.98, 0.98, 1.13, 1.23,
1.23, 1.35, 1.51, 1.54, 1.57, 1.67, 1.67, 1.89, 1.89, 2.08, 2.08,
2.55, 2.55, 2.93, 3.02, 3.15, 3.34, 3.53, 3.94, 4.19, 4.6, 4.85,
5.04, 5.35, 5.57, 6.02, 6.27, 6.65, 7.02, 7.56, 8.06, 8.06, 8.85,
8.85, 9.7, 9.7, 10.55, 10.55, 11.62, 12.38, 12.98, 13.64, 14.3,
14.93, 15.62, 16.35, 17.35, 18.11, 19.15, 20.19, 21.39, 22.52,
23.5, 24.85, 25.61, 26.68, 26.68, 29.13, 29.13, 31.78, 31.78,
34.2, 34.2, 36.91, 38.33, 39.53, 41.35, 42.93, 44.72, 46.52,
48, 49.57, 50.96, 52.41, 54.27, 55.69, 57.64, 58.96, 60.85, 62.2,
64, 65.48, 65.48, 68.31, 69.86, 71.21, 71.21, 73.73, 75.02, 76.54,
77.45, 78.46, 79.78, 80.63, 81.8, 82.8, 83.81, 84.91, 85.86,
86.68, 87.4, 88.16, 88.54, 89.04, 89.86, 90.52, 90.93, 90.93,
91.81, 92.16, 92.47, 92.47, 93.48, 93.8, 94.27, 94.27, 94.68,
95.02, 95.37, 95.59, 95.94, 96.13, 96.41, 96.66, 96.76, 96.79,
97.01, 97.13, 97.32, 97.45, 97.51, 97.57, 97.57, 97.57, 97.95,
98.05, 98.05, 98.24, 98.36, 98.39, 98.39, 98.61, 98.61, 98.74,
98.77, 98.87, 98.93, 98.99, 99.02, 99.12, 99.21, 99.31, 99.4,
99.43, 99.53, 99.59, 99.59, 99.59, 99.59, 99.62, 99.65, 99.65,
99.65, 99.75, 99.75, 99.75, 99.78, 99.81, 99.84, 99.87, 99.87,
99.87, 99.87, 99.87, 99.87, 99.91, 99.91, 99.91, 99.94, 99.94,
99.94, 99.94, 99.94, 99.94, 99.94, 99.97, 99.97, 99.97, 99.97,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100)), class = "data.frame", row.names = c(NA, -701L))

You can use match here :
df1$Trait1Percentile <- LookupTable$Trait1Percentiles[match(df1$Trait1Score, LookupTable$Scores)]
head(df1)
# JobNumber Trait1Score Trait2Score Trait1Percentile
#1 634 3.89 4.00 14.08
#2 21 4.39 2.94 68.00
#3 300 4.22 3.17 46.87
#4 797 4.21 3.83 45.61
#5 1112 3.94 4.22 17.23
#6 147 3.90 3.83 14.61
With merge you need to select relevant columns
merge(df1, LookupTable, by.x = 'Trait1Score', by.y = 'Scores')[1:4]
Similarly in dplyr :
library(dplyr)
inner_join(df1, LookupTable, by = c('Trait1Score' = 'Scores')) %>% select(1:4)

Related

does R have a "alternate()" function?

I have a large df and I'm trying to relocate the columns with patterns instead of manually write each column name in select(). More details here.
A glimpse of the issue (edit): All my columns share a pattern ARG_G1_50_AAA or ARG_G2_50_AAA or NARR_G1_50_AAA or NARR_G2_50_AAA. The final parts are: AAA, AAC, AC and AB. I need two subsets of this data.
Set 1: I need to intercalate "G1" and "G2" columns (in the order 50, 100, 150 and 200) and in the order (AAA, AAC, AC and AB). Ex:
NARR_G1_50_AAA, NARR_G2_50_AAA,
NARR_G1_50_AAC, NARR_G2_50_AAC.... so on
Set 2: I need to intercalate "Narr" and "Arg" columns (again, 50 before 100, 150 and 200 and AAA before AAC, AC and AB). No need to intercalate G1 and G2 now. Ex:
NARR_G1_50_AAA, ARG_G1_50_AAA,
NARR_G2_50_AAA, ARG_G2_50_AAA... so on
Basically, I was able to partially solve my problem (cf. linked post above) with:
dfPaired <- merged_DF %>%
dplyr::select(ID, str_subset(names(merged_DF), "G?_50\\w*"))
head(dfPaired)
ID ARG_G1_50_AAA ARG_G1_50_AAC ARG_G1_50_AC ARG_G1_50_AB
ARG_G2_50_AAA ARG_G2_50_AAC, ARG_G2_50_AC ARG_G2_50_AB....
## I know that I'm only getting the "50" here, in fact I need all, but It wouldn't be "A" problem to repeat the code for 100, 150, 200)
How can I make R "intercalate" the strings? I mean, I need:
ARG_G1_50_AAA, ARG_G2_50_AAA
ARG_G1_50_AAC, ARG_G2_50_AAC,
ARG_G1_50_AC, ARG_G2_50_AC,
ARG_G1_50_AB, ARG_G2_50_AB ... (so on)
(intercalate G1 and G2 coluns in case of set 1)
Questions :
Could I use sth as seq(by = 2) ?
Is there a way to pass two patterns to str() and ask it to intercalate the output?
Is there an "intercalate()" function that I could pass to str_subset(names(merged_DF), "G?_50\\w*")) ?
** I mean, sth as int(str_subset(names(merged_DF), "G1_50\w*")), str_subset(names(merged_DF), "G2_50\w*")) Thanks in advance :)
EDIT:
dput(merged_DF[1:50])
structure(list(ID = structure(c("P1", "P2", "P3", "P4", "P5",
"P6", "P7", "P8", "P9", "P10", "P11", "P12", "P13", "P14", "P15",
"P16", "P17", "P18", "P19", "P20", "P21", "P22", "P23", "P24",
"P25", "P26", "P27", "P28", "P29", "P30", "P31", "P32", "P33",
"P34", "P35", "P36", "P37", "P38", "P39", "P40", "P41", "P42",
"P43", "P44", "P45", "P46", "P47", "P48", "P49", "P50", "P51",
"P52", "P53", "P54", "P55", "P56", "P57", "P58", "P59", "P60",
"P61", "P62", "P63", "P64", "P65", "P66", "P67", "P68", "P69",
"P70", "P71"), class = c("glue", "character")), ARG_G1_100_AAA = c(68.53,
65.9, 69.78, 68.29, NaN, 69.5, 67.05, 73.74, 73.59, 72.57, 64.33,
67.79, 72.94, 63.75, 71.56, 75.5, 68.16, NA, 65.64, 68.36, 69.75,
72.73, 67.67, 66.19, 62.94, 72.48, 72.19, 62.44, 72.5, 71.06,
70.4, 69.14, NA, 67.59, 69.1, 74.05, NA, 68.6, 68.27, 59.12,
NA, NA, 63.7, 67.18, NA, 68.38, 63.44, 72.56, 66.06, 66.53, 73.19,
NA, NA, NA, 73.44, 67.45, 72.91, 65.81, 73.96, 75, 75.89, 72,
NA, 68.2, 67.29, 69.91, NaN, 69.67, 68.39, 69.2, 67.55), ARG_G1_100_AAC = c(70.18,
67.65, 71.89, 70.42, NaN, 72.38, 69.67, 75.63, 76.7, 76.21, 66.5,
70.57, 76.72, 66.4, 74.75, 79.17, 70.84, NA, 67.82, 70, 71.88,
74.55, 69.33, 69.5, 65.25, 75.05, 75.44, 64.56, 74.88, 74.29,
72.4, 71.93, NA, 69.12, 71.43, 77.53, NA, 71.93, 70.4, 60.25,
NA, NA, 64.8, 69, NA, 71.19, 71.12, 75.04, 68.89, 68.26, 75.81,
NA, NA, NA, 75.89, 68.82, 77.35, 68.38, 76.71, 79.12, 78.89,
73.5, NA, 69.7, 69.82, 70.91, NaN, 72, 71.17, 71.85, 69.7), ARG_G1_100_AC = c(4.35,
4.95, 1.44, 2.71, NaN, 3.25, 3.95, 2.26, 0.85, 1.21, 5.33, 5.43,
0.83, 10.4, 2.56, 0.33, 4.92, NA, 10.55, 3.43, 2.94, 1.55, 5.33,
6.44, 5.25, 2, 3.12, 8.5, 1.38, 3.76, 1.9, 2.79, NA, 4.06, 5.57,
1.95, NA, 6.07, 2.67, 7, NA, NA, 8, 4.76, NA, 4.19, 2.68, 3,
4.94, 4.79, 2.19, NA, NA, NA, 1.78, 5.27, 2.52, 5.88, 1.96, 1.12,
0.67, 3.28, NA, 3.5, 3.41, 3.73, NaN, 3.83, 6.06, 3.3, 3.9),
ARG_G1_100_AB = c(4.94, 6.55, 2.44, 3, NaN, 3.25, 4.71, 2.84,
1.07, 2, 5.33, 5.43, 1.72, 10.55, 3, 1.17, 5.8, NA, 10.55,
4.21, 2.94, 3.55, 6.33, 8.25, 5.88, 2, 3.44, 9.22, 1.69,
4.18, 2.5, 4.71, NA, 4.41, 5.9, 2.21, NA, 6.67, 3.33, 7,
NA, NA, 8, 4.76, NA, 4.44, 2.68, 3.16, 4.94, 5.42, 2.81,
NA, NA, NA, 1.78, 6.09, 2.52, 6.56, 1.96, 1.12, 0.67, 3.78,
NA, 3.5, 3.65, 5.27, NaN, 4.33, 6.78, 3.6, 4.35), ARG_G1_150_AAA = c(93.38,
90.2, 98.33, 94.69, NaN, 99, 93.64, 104.22, 104.8, 103.17,
87, 93.83, 101.89, 87.5, 100.38, 107, 94.69, NA, 90.75, 91.5,
93.88, 99.5, NaN, 89.5, 86.5, 100.55, 101, 84.22, 101.88,
94.62, 97.2, 96.5, NA, 87.38, 96.82, 103.67, NA, 97.57, 95.86,
84, NA, NA, 85.5, 90.5, NA, 96.29, 89.71, 101.64, 92.33,
93.89, 104.43, NA, NA, NA, 101.33, 93.5, 105.42, 90.75, 104.23,
108.86, 102.67, 97, NA, 91.9, 91.38, 93.5, NaN, 98, 94.78,
95.1, 93.4), ARG_G1_150_AAC = c(96.38, 90.9, 100, 96.08,
NaN, 99.5, 95.82, 106.33, 106.6, 106.5, 92, 95.83, 104, 89,
103.75, 109, 96.92, NA, 93, 93.17, 95.12, 102.75, NaN, 93.5,
89.38, 102.09, 104.12, 85.44, 103.38, 96.75, 99.2, 98.5,
NA, 90.38, 99.18, 105.89, NA, 99.43, 97, 84, NA, NA, 86.75,
91.88, NA, 96.86, 98.64, 103.71, 94.22, 95.22, 105.71, NA,
NA, NA, 102.33, 94.25, 108.08, 91.75, 107, 112.29, 106.33,
98.22, NA, 93.5, 93.25, 94.25, NaN, 100, 96.78, 97.8, 95.5
), ARG_G1_150_AC = c(8.75, 10.1, 3.67, 5.23, NaN, 6.5, 6.73,
4.78, 2.27, 3.17, 12, 9.83, 3.44, 21.1, 4.25, 2, 11.85, NA,
17.5, 6.17, 7.25, 3, NaN, 13.5, 10.62, 5, 5.75, 17.44, 4,
10.75, 5, 5.5, NA, 9.5, 9.36, 3.56, NA, 10, 6.86, 9.5, NA,
NA, 16.25, 10.25, NA, 10.43, 6, 6.21, 9.22, 9.22, 5.14, NA,
NA, NA, 3, 10.75, 6, 12.88, 3.77, 2.57, 4.33, 7.22, NA, 8.6,
7.88, 10, NaN, 7, 11.67, 7.8, 7.7), ARG_G1_150_AB = c(10.12,
12.6, 5.33, 5.77, NaN, 6.5, 7.91, 5.44, 2.53, 4.33, 12, 9.83,
4.78, 21.4, 5.25, 3, 13.77, NA, 17.5, 7.33, 7.25, 6, NaN,
16.5, 11.5, 5, 6.25, 18.67, 4.5, 11.38, 5.8, 8.5, NA, 10,
9.82, 4.33, NA, 11, 7.71, 9.5, NA, NA, 16.25, 10.25, NA,
10.86, 6, 7, 9.22, 10.33, 6.43, NA, NA, NA, 3.33, 11.75,
6, 14, 3.77, 2.57, 4.33, 8.22, NA, 8.8, 9, 12, NaN, 8, 12.67,
8.2, 8.4), ARG_G1_200_AAA = c(121.5, 110.6, NaN, 120.57,
NaN, NaN, 115.67, 132.4, 131.11, 128.5, NaN, 114.5, 126.25,
107.4, 124.67, NaN, 120.5, NA, 108, 110.5, 114.33, 125, NaN,
114.67, 108, 123.5, 126.67, 105.5, 129.67, 117.75, 121, 120,
NA, 108.5, 122.83, 130.8, NA, 123.67, 119, NaN, NA, NA, NaN,
109.75, NA, 119, 114.75, 128.88, 115.25, 117, 134, NA, NA,
NA, NaN, 113, 131.86, 110.67, 133.57, 138.33, 127.5, 118.25,
NA, 112.8, 111.5, 113, NaN, NaN, 114.25, 118, 112.8), ARG_G1_200_AAC = c(123.25,
111.6, NaN, 121.29, NaN, NaN, 116.33, 133.4, 132.89, 130.5,
NaN, 115.5, 129.5, 108.2, 128.33, NaN, 123, NA, 108, 111.5,
115.67, 125, NaN, 118, 112, 125.17, 129, 105.75, 130.33,
119.5, 121.4, 121, NA, 109.75, 124.33, 133.4, NA, 125, 120.33,
NaN, NA, NA, NaN, 110.75, NA, 123, 124, 129.75, 117.5, 117.2,
134, NA, NA, NA, NaN, 116, 134.43, 111.33, 135, 141.33, 129.5,
119.5, NA, 114, 113.5, 113, NaN, NaN, 115.5, 120.6, 114),
ARG_G1_200_AC = c(12, 15.6, NaN, 8, NaN, NaN, 10.83, 7.8,
5.33, 6, NaN, 16.5, 6.75, 31.2, 9.33, NaN, 18, NA, 30, 14.5,
13, 11, NaN, 19.67, 17, 9, 9.33, 25.5, 8, 16.25, 9.6, 9,
NA, 16, 12.67, 6.2, NA, 13.67, 11.67, NaN, NA, NA, NaN, 17.5,
NA, 17, 9, 9.5, 14.75, 15.8, 8, NA, NA, NA, NaN, 23, 10.43,
21.33, 5.71, 4.67, 10.25, 13.25, NA, 14.6, 13.25, 19, NaN,
NaN, 21.5, 13.2, 14.6), ARG_G1_200_AB = c(14, 19.4, NaN,
8.71, NaN, NaN, 12.5, 9, 6, 8, NaN, 16.5, 8.5, 31.8, 11,
NaN, 21, NA, 30, 15.5, 13, 15, NaN, 24, 18, 9, 10, 27, 9,
17.25, 10.8, 12, NA, 17, 13.5, 7.2, NA, 14.67, 14, NaN, NA,
NA, NaN, 17.5, NA, 17.67, 9, 10.88, 14.75, 17, 9.67, NA,
NA, NA, NaN, 24, 10.43, 23.33, 5.71, 4.67, 10.5, 15, NA,
14.8, 14.75, 21, NaN, NaN, 23.25, 13.8, 15.8), ARG_G1_50_AAA = c(36.35,
35.88, 36.22, 35.72, 36.12, 36.96, 35.24, 37.62, 36.05, 34.63,
34.19, 33.71, 36.22, 34.43, 34.95, 34.59, 36.03, NA, 32.61,
35.29, 37.17, 37.13, 35.62, 34.64, 34.4, 35.69, 37.36, 36.4,
36.69, 35.8, 36.57, 35.97, NA, 36.44, 34.94, 35.26, NA, 34.44,
37.85, 33.15, NA, NA, 36.13, 34.91, NA, 35.54, 29.02, 35.55,
35.64, 35.79, 35.93, NA, NA, NA, 37, 32.58, 35.71, 34.98,
36.64, 33.29, 35.29, 37.2, NA, 36.29, 36.91, 31.26, 34, 37.48,
33.89, 36.34, 35.88), ARG_G1_50_AAC = c(41.19, 38.7, 41.22,
40.53, 44.12, 41.04, 40.18, 42.38, 42.17, 41.87, 38, 41.21,
42.24, 38.69, 42.64, 42.14, 41.53, NA, 39.65, 40.76, 41.88,
42.23, 39.62, 41.55, 38.19, 42.53, 42.24, 39.49, 42.07, 43.3,
40.92, 39.92, NA, 40.35, 40.49, 44.11, NA, 41.72, 40.64,
36.15, NA, NA, 39.03, 40.86, NA, 40.93, 37.95, 42.27, 39.47,
39.72, 42.12, NA, NA, NA, 42.11, 39.81, 42.82, 39.12, 42.67,
43.02, 43.58, 42.61, NA, 40.04, 41.42, 40.9, 41.5, 41.62,
40.02, 41.08, 40.18), ARG_G1_50_AC = c(0.98, 1.5, 0.37, 0.6,
0.88, 0.73, 1.51, 0.23, 0.25, 0.42, 1.67, 1.58, 0.31, 3.27,
0.62, 0.05, 0.83, NA, 3.71, 1.47, 1.07, 0.1, 1.81, 1.19,
1.62, 0.61, 0.76, 1.73, 0.24, 0.64, 0.33, 0.97, NA, 0.6,
1.98, 0.34, NA, 1.69, 0.26, 2.12, NA, NA, 1.5, 1.14, NA,
1, 0.65, 0.88, 1.62, 1.3, 0.39, NA, NA, NA, 0.57, 1.48, 0.58,
2.21, 0.43, 0.24, 0.16, 0.65, NA, 0.96, 0.4, 1.13, 1.5, 1.05,
1.91, 0.7, 0.94), ARG_G1_50_AB = c(1.09, 2.24, 0.74, 0.68,
0.88, 0.73, 1.82, 0.38, 0.36, 0.89, 1.67, 1.58, 0.76, 3.27,
0.83, 0.45, 1.15, NA, 3.71, 1.82, 1.07, 1.16, 2.25, 1.93,
1.86, 0.61, 1, 2.09, 0.31, 0.86, 0.61, 1.73, NA, 0.77, 2.18,
0.34, NA, 1.92, 0.49, 2.12, NA, NA, 1.5, 1.14, NA, 1.2, 0.65,
0.88, 1.62, 1.49, 0.63, NA, NA, NA, 0.57, 1.77, 0.58, 2.6,
0.43, 0.24, 0.16, 0.85, NA, 0.96, 0.4, 1.84, 1.5, 1.05, 2.4,
0.76, 1.14), ARG_G2_100_AAA = c(64.9, 63.8, 71.73, 67.67,
NA, NA, 52.5, 72.35, 65.28, 57.22, NA, NaN, 69, 66.67, NaN,
66.58, 69, 60.55, 56.29, 67.45, 68.4, 64.25, NaN, 50.86,
67.83, 65.96, 57, 53.07, 66.89, NaN, NA, 59, 61.5, NA, 65.9,
64.07, NA, NA, 57.91, 67.89, 68.75, 68.5, NaN, 63.24, 66.19,
60.59, 59.24, 54.33, 64.39, 65.83, 65.71, 63, 63.78, 63.62,
64, 65.08, NA, 67.61, 67.57, 72.71, 65.46, 61.71, NA, 57.62,
NA, NA, NA, 64, 61.33, 62.64, NA), ARG_G2_100_AAC = c(65.7,
65.8, 74.45, 68, NA, NA, 53.75, 73.94, 67.24, 58.22, NA,
NaN, 71.07, 68.07, NaN, 69.88, 71.32, 62.18, 58.65, 76.45,
71.13, 67.25, NaN, 51.76, 69.33, 68.17, 58, 54.27, 68.05,
NaN, NA, 61, 61.67, NA, 67.79, 65.93, NA, NA, 59.27, 69.67,
71.38, 70, NaN, 64.88, 68.19, 62.06, 61, 55.48, 65.67, 67.72,
68.47, 64, 65.11, 66, 67.5, 66.33, NA, 69.61, 69.33, 75.67,
68.17, 63, NA, 58.81, NA, NA, NA, 66.5, 62.33, 65, NA), ARG_G2_100_AC = c(7.1,
6.4, 0.18, 3.67, NA, NA, 12.75, 1.24, 2.96, 9.78, NA, NaN,
1.43, 1.33, NaN, 5.21, 2.76, 7.91, 8.06, 2.36, 2.87, 4, NaN,
15.52, 2.67, 4.17, 13, 10.07, 5.05, NaN, NA, 9.5, 8.17, NA,
5.86, 3.87, NA, NA, 7, 3.33, 1.75, 3, NaN, 7.94, 3.11, 5.29,
5.29, 13.1, 3.78, 3.33, 3.06, 5.18, 2.56, 5.04, 5.5, 5.75,
NA, 2.22, 2.48, 1, 3.83, 4.82, NA, 8.19, NA, NA, NA, 5, 6.44,
5.29, NA), ARG_G2_100_AB = c(7.1, 7.4, 1.09, 3.67, NA, NA,
12.75, 1.24, 3.28, 9.78, NA, NaN, 1.71, 1.93, NaN, 6.21,
2.76, 7.91, 8.65, 3.55, 3.4, 5, NaN, 16.05, 3.39, 4.52, 13,
11.6, 5.05, NaN, NA, 9.5, 9.67, NA, 7.03, 3.87, NA, NA, 8,
3.33, 2.19, 3, NaN, 8.53, 3.37, 5.47, 7.35, 13.48, 5.33,
3.83, 3.65, 5.82, 4, 6.17, 6, 6.42, NA, 3.83, 2.71, 2.19,
4.58, 5.18, NA, 9.75, NA, NA, NA, 5, 6.44, 5.36, NA), ARG_G2_150_AAA = c(85.25,
NaN, 99, NaN, NA, NA, 66.86, 101, 89.31, 71.33, NA, NaN,
94.5, 88.57, NaN, 95, 95.5, 81.5, 78.5, 107.75, 93.43, NaN,
NaN, 66.18, 92.33, 92.25, NaN, 67.43, 87.44, NaN, NA, NaN,
78, NA, 89.81, 86.43, NA, NA, 75.75, 91.67, 95, NaN, NaN,
85.12, 91.47, 81.88, 79.38, 72.45, 87.67, 91.22, 90.88, 83,
85, 89.23, NaN, 86.2, NA, 92, 93.09, 100.27, 88.62, 83.88,
NA, 75, NA, NA, NA, NaN, 80, 83.5, NA), ARG_G2_150_AAC = c(86.75,
NaN, 101, NaN, NA, NA, 67.29, 103.75, 91.15, 71.67, NA, NaN,
96.33, 88.86, NaN, 96.23, 97.5, 83.5, 79.12, 109.5, 95, NaN,
NaN, 66.45, 93.56, 93.42, NaN, 68, 88.33, NaN, NA, NaN, 78,
NA, 91.69, 87, NA, NA, 76.75, 93, 96.88, NaN, NaN, 85.5,
92.67, 83.38, 80.25, 73.09, 88.33, 92.44, 92.38, 84.25, 85.33,
91.23, NaN, 87.8, NA, 92.67, 94.09, 102.09, 90.15, 84.75,
NA, 76.14, NA, NA, NA, NaN, 81, 85.67, NA), ARG_G2_150_AC = c(15.75,
NaN, 1, NaN, NA, NA, 25.71, 2.62, 6.85, 19.33, NA, NaN, 3.83,
4.57, NaN, 9.85, 6.5, 15.5, 13.88, 3.75, 6.29, NaN, NaN,
27.36, 5.67, 8.42, NaN, 18.86, 11.33, NaN, NA, NaN, 19, NA,
11.25, 9.57, NA, NA, 12.75, 6, 4.5, NaN, NaN, 15.75, 5.67,
10.75, 9.75, 24.82, 8.67, 6.67, 5.88, 13.25, 7, 10, NaN,
10.6, NA, 6.56, 4.18, 2.55, 8.54, 9.75, NA, 17.86, NA, NA,
NA, NaN, 15.67, 13.17, NA), ARG_G2_150_AB = c(15.75, NaN,
2, NaN, NA, NA, 25.71, 2.62, 8.69, 19.33, NA, NaN, 4.33,
5.43, NaN, 11.31, 6.5, 15.5, 14.75, 6, 7.14, NaN, NaN, 28.27,
7.22, 9, NaN, 21.29, 11.33, NaN, NA, NaN, 22, NA, 13.44,
9.71, NA, NA, 14.75, 6, 5.12, NaN, NaN, 16.75, 6, 11.25,
12.75, 25.36, 11.11, 7.33, 6.62, 14.25, 9.33, 11.62, NaN,
11.8, NA, 9.22, 4.91, 4.64, 10, 10.38, NA, 19.86, NA, NA,
NA, NaN, 15.67, 13.33, NA), ARG_G2_200_AAA = c(NaN, NaN,
125, NaN, NA, NA, 81.33, 129.5, 112.25, NaN, NA, NaN, 117.5,
108.33, NaN, 120, 119.25, 99, 94, 134, 113.67, NaN, NaN,
77.67, 112.25, 112.86, NaN, 78.33, 106.6, NaN, NA, NaN, NaN,
NA, 112.4, 106.67, NA, NA, 93, NaN, 122, NaN, NaN, 104.25,
114.89, 101.25, 96.75, 87, 107, 112.25, 112.25, 100, NaN,
111.86, NaN, 101, NA, 114, 114.5, 124.17, 108.86, 103.25,
NA, 90.67, NA, NA, NA, NaN, NaN, 99, NA), ARG_G2_200_AAC = c(NaN,
NaN, 126, NaN, NA, NA, 82.33, 129.75, 113.5, NaN, NA, NaN,
118, 109.33, NaN, 120.71, 120.25, 101, 94.25, 136, 114, NaN,
NaN, 78, 114, 114, NaN, 78.67, 106.8, NaN, NA, NaN, NaN,
NA, 114, 108.33, NA, NA, 93, NaN, 123, NaN, NaN, 104.25,
116.67, 102.75, 97.25, 87.67, 107.75, 113.25, 113.25, 101,
NaN, 113.14, NaN, 101, NA, 114.5, 115, 126.17, 111.29, 104.25,
NA, 92, NA, NA, NA, NaN, NaN, 99, NA), ARG_G2_200_AC = c(NaN,
NaN, 1, NaN, NA, NA, 36, 5.25, 12.25, NaN, NA, NaN, 8.5,
8.33, NaN, 14.29, 11.38, 24, 22.25, 6, 11.67, NaN, NaN, 42.5,
9.25, 13.14, NaN, 32, 19.4, NaN, NA, NaN, NaN, NA, 15.6,
17, NA, NA, 24, NaN, 6.67, NaN, NaN, 21.5, 8.89, 17.5, 16,
37.83, 15.75, 12.25, 11.75, 20, NaN, 15.43, NaN, 26, NA,
12.25, 7.5, 5.67, 12.86, 14.75, NA, 27, NA, NA, NA, NaN,
NaN, 28.5, NA), ARG_G2_200_AB = c(NaN, NaN, 2, NaN, NA, NA,
36, 5.25, 16, NaN, NA, NaN, 10, 9.33, NaN, 16.57, 11.38,
24, 23.25, 9, 13, NaN, NaN, 44.33, 11.5, 14.29, NaN, 35,
19.4, NaN, NA, NaN, NaN, NA, 18.8, 17.33, NA, NA, 26, NaN,
7.67, NaN, NaN, 22.5, 9.33, 18.25, 20.25, 38.67, 19, 13.25,
13.25, 22, NaN, 18, NaN, 28, NA, 15.75, 8.83, 8.17, 15.14,
16, NA, 29.33, NA, NA, NA, NaN, NaN, 29, NA), ARG_G2_50_AAA = c(36.97,
35.4, 34.72, 33.81, NA, NA, 32.98, 35.7, 35.59, 35.36, NA,
36, 37.66, 36.35, 33.44, 34.72, 36.9, 34.32, 32.28, 33.74,
36.38, 35.06, 34.5, 31.47, 36.59, 36.18, 34.75, 31.9, 36.53,
32.62, NA, 33.85, 34.86, NA, 35.36, 34.52, NA, NA, 33.68,
35.89, 36.24, 37.21, 28, 34.05, 36.3, 34.16, 32.86, 32.06,
34.65, 35.57, 35.95, 33.19, 34.61, 34.6, 34.92, 34.24, NA,
34.33, 35.65, 36.16, 33.91, 34.37, NA, 33.44, NA, NA, NA,
33.93, 33.71, 35.42, NA), ARG_G2_50_AAC = c(40.2, 38.6, 42.09,
39.25, NA, NA, 35.68, 41.41, 39.12, 37.68, NA, 39, 41.16,
40.67, 36.11, 39.25, 40.65, 37.52, 35.14, 41.26, 41.13, 40.71,
36.25, 33.33, 40.59, 39.67, 36.83, 34.44, 40.57, 34, NA,
37, 36.45, NA, 39.52, 38.17, NA, NA, 36.52, 40.39, 40.69,
41.21, 29, 39.63, 40.23, 37.27, 36.58, 34.45, 38.87, 38.98,
39.51, 38.13, 37.68, 37.88, 38.85, 38.48, NA, 40, 40.43,
42.73, 39.93, 38.19, NA, 36.41, NA, NA, NA, 39.71, 36.43,
38.03, NA), ARG_G2_50_AC = c(0.8, 1.9, 0, 0.5, NA, NA, 2.93,
0.52, 0.58, 2.75, NA, 1.25, 0.21, 0.25, 2.11, 2, 0.85, 2.03,
2.67, 0.71, 0.82, 0.29, 0.75, 4.27, 0.63, 0.78, 2.92, 2.77,
1.17, 4.88, NA, 3, 2.64, NA, 1.78, 0.98, NA, NA, 2.29, 0.82,
0.45, 0.93, 6, 1.67, 0.86, 1.27, 1.79, 3.37, 1.11, 0.74,
0.79, 1.1, 0.71, 1.11, 1.08, 2.48, NA, 0.17, 0.75, 0.22,
0.91, 1.19, NA, 1.66, NA, NA, NA, 1.07, 1.75, 1.42, NA),
ARG_G2_50_AB = c(0.8, 2, 0.31, 0.5, NA, NA, 2.93, 0.52, 0.58,
2.75, NA, 1.25, 0.34, 0.5, 3.33, 2.44, 0.85, 2.03, 2.91,
1.42, 1, 0.94, 0.75, 4.63, 0.85, 0.96, 2.92, 3.49, 1.17,
4.88, NA, 3, 3.36, NA, 2.3, 0.98, NA, NA, 2.61, 0.82, 0.52,
0.93, 6, 1.91, 1.02, 1.34, 2.58, 3.67, 1.59, 0.96, 1.09,
1.39, 1.5, 1.65, 1.15, 2.76, NA, 0.93, 0.8, 0.82, 1.25, 1.44,
NA, 2.49, NA, NA, NA, 1.07, 1.75, 1.47, NA), NARR_G1_100_AAA = c(71.32,
NA, NA, 67.83, NaN, 71.6, 64.2, 71.68, 73.29, 70.53, 73.35,
59.31, 71.08, 74.06, 68.7, 74, 69.08, NA, 68.52, 63.47, 68.33,
NA, 65.64, 62.11, 63.9, 70.41, 60.36, 65.88, 68.81, 69.62,
70.68, 67.5, NA, 68.45, 67.16, 74.39, 60.6, 65.89, 71.94,
68.75, NA, NA, 67, 66.85, NA, NA, 62.56, 73.33, 69.81, 67.68,
73.06, 65.8, 63.85, NA, 67.64, 71.6, 68.47, 69.39, 71.16,
72.33, NA, 66.68, NA, 66.22, 67, 61.27, NaN, 72.33, 68.29,
71.33, 65.57), NARR_G1_100_AAC = c(74.26, NA, NA, 70.94,
NaN, 75, 66.14, 74.48, 77.07, 73.47, 76, 60.44, 73.92, 77.19,
71.4, 77.59, 72, NA, 70.38, 65.47, 70.54, NA, 68.09, 64.61,
66.5, 72.52, 62.59, 69.25, 71.48, 71.88, 74.4, 70.1, NA,
70, 69.6, 78.04, 62.3, 68.79, 73.44, 72.25, NA, NA, 67, 68.25,
NA, NA, 65.94, 75.71, 72.43, 69.68, 76, 68.6, 65.65, NA,
70.43, 74, 71.76, 71.17, 74.63, 74.22, NA, 69.47, NA, 68.72,
67, 62.82, NaN, 77.33, 69.76, 75.42, 67.62), NARR_G1_100_AC = c(3.05,
NA, NA, 2.33, NaN, 2.4, 1.89, 0.84, 0.07, 5.47, 1.12, 8.81,
2.39, 1.38, 3.6, 0.88, 2.65, NA, 2.05, 5.18, 2.38, NA, 5,
4.78, 6.4, 1.85, 7.41, 3.69, 1.85, 2.62, 1.28, 3.9, NA, 2.35,
3.8, 1.87, 5.1, 6.95, 1.67, 4.5, NA, NA, 4, 4.25, NA, NA,
7.17, 1.29, 2.62, 1.37, 1.47, 3.3, 7.27, NA, 3.64, 3.6, 2.59,
4.83, 0.63, 2.28, NA, 6.58, NA, 4.56, 6, 4.82, NaN, 0.67,
3.95, 1.75, 4.38), NARR_G1_100_AB = c(3.42, NA, NA, 3.17,
NaN, 2.5, 3.29, 1.64, 1.07, 6, 1.41, 9.25, 3.25, 2.69, 3.8,
1.32, 3.04, NA, 2.38, 5.18, 2.38, NA, 6.18, 6.11, 6.4, 1.85,
7.45, 3.69, 1.89, 3.25, 1.6, 4.8, NA, 2.8, 4.32, 2.3, 6.6,
7.42, 2.83, 4.75, NA, NA, 5, 4.75, NA, NA, 8, 1.71, 2.67,
2.05, 1.47, 4.8, 7.96, NA, 4.43, 3.8, 4.47, 4.91, 1.68, 2.78,
NA, 6.58, NA, 6.67, 6, 5.18, NaN, 1.67, 4.86, 2.08, 4.38),
NARR_G1_150_AAA = c(102, NA, NA, 96.22, NaN, 105.33, 87.1,
100.14, 106.17, 97.67, 99.88, 75.43, 99.62, 106.86, 95.3,
105.68, 97.14, NA, 92.82, 87.25, 96.23, NA, 88.5, 83.56,
89.75, 98.47, 80.64, 92.14, 96.07, 94.62, 99.46, 100, NA,
92.6, 94.54, 106.25, 82.5, 93.6, 100.33, 95, NA, NA, NaN,
90.9, NA, NA, 87.89, 101.08, 96.18, 95, 103.12, 92.75, 85.71,
NA, 94.17, NaN, 95.25, 97.5, 100.67, 100.44, NA, 90.9, NA,
90.11, NaN, 81.5, NaN, NaN, 94.45, 100.4, 91.64), NARR_G1_150_AAC = c(103.2,
NA, NA, 97.67, NaN, 106.67, 88.55, 102.43, 109.17, 98.78,
103.25, 76.57, 102.05, 109.43, 97.4, 108.42, 99.29, NA, 94.73,
89, 98, NA, 89.75, 85, 91.75, 100.47, 81.64, 93.14, 97.73,
96, 101.08, 101.33, NA, 94.1, 95.92, 110.33, 83.25, 95.5,
101.67, 98, NA, NA, NaN, 93, NA, NA, 90.56, 102.38, 99, 96.78,
106.5, 94.25, 87.43, NA, 98.33, NaN, 99, 98.92, 103.44, 103,
NA, 93.8, NA, 92, NaN, 82.25, NaN, NaN, 95.45, 102.8, 93.82
), NARR_G1_150_AC = c(6.4, NA, NA, 5.78, NaN, 5, 4.85, 2.29,
0.5, 12.44, 2.5, 19, 4.71, 3, 8, 1.63, 5.86, NA, 4.82, 9.25,
4.08, NA, 10.75, 9.44, 12.25, 3.6, 15.73, 7.14, 3.73, 7.12,
4.08, 6.33, NA, 5.1, 6.62, 3.08, 10.25, 12.5, 4.56, 7.5,
NA, NA, NaN, 8.6, NA, NA, 13.67, 3.15, 6, 2.22, 2.5, 8, 15,
NA, 6, NaN, 5.5, 8.75, 2.44, 4.33, NA, 13.9, NA, 8.78, NaN,
13.75, NaN, NaN, 7.73, 4.4, 9.36), NARR_G1_150_AB = c(7,
NA, NA, 7.33, NaN, 5.33, 7.4, 3.71, 2.17, 13.33, 2.88, 20.14,
6, 5.14, 8.5, 2.42, 6.43, NA, 5.18, 9.25, 4.08, NA, 12.5,
11.56, 12.25, 3.6, 15.73, 7.14, 4, 8.12, 4.46, 7.33, NA,
5.9, 7.54, 3.67, 13, 13.3, 6.78, 8, NA, NA, NaN, 9.1, NA,
NA, 15.11, 4.15, 6.09, 3.22, 2.5, 10.5, 16.29, NA, 7.33,
NaN, 8.38, 8.83, 4, 5.22, NA, 13.9, NA, 12.11, NaN, 15.25,
NaN, NaN, 9.27, 5, 9.36), NARR_G1_200_AAA = c(127.8, NA,
NA, 120.25, NaN, NaN, 105.85, 126.62, 134.5, 121.4, 126.25,
89.33, 126.23, 136, 120.4, 133.17, 124, NA, 115.5, 106.5,
120.86, NA, 115, 104.25, NaN, 123.22, 100, 114, 120.22, 115.67,
124.38, NaN, NA, 112.6, 119, 137.29, NaN, 118.4, 127, NaN,
NA, NA, NaN, 113.8, NA, NA, 111.5, 123.57, 122.33, 118.8,
130, NaN, 106.38, NA, 123.5, NaN, 123.75, 123.29, 127.2,
126.5, NA, 113.8, NA, 113.75, NaN, 101, NaN, NaN, 117.83,
125, 114.5), NARR_G1_200_AAC = c(130, NA, NA, 123, NaN, NaN,
107.54, 128.75, 136.5, 123, 128.5, 90, 128, 137.33, 121.6,
136.92, 125.5, NA, 117, 108.25, 122.29, NA, 115, 105, NaN,
125.11, 102, 116, 122.33, 117.33, 126.25, NaN, NA, 114.6,
121.12, 138.86, NaN, 119.2, 127.75, NaN, NA, NA, NaN, 114.4,
NA, NA, 113, 124.43, 124, 120.6, 133, NaN, 107, NA, 124.5,
NaN, 127.75, 123.57, 129, 127.5, NA, 115.6, NA, 117, NaN,
101, NaN, NaN, 118.5, 129, 115.5), NARR_G1_200_AC = c(11.2,
NA, NA, 12.5, NaN, NaN, 9.31, 4.25, 2, 17.8, 4.5, 32.33,
7.77, 5.67, 13.4, 2.67, 9.62, NA, 7.67, 15, 6.14, NA, 16,
14.75, NaN, 6.22, 24.33, 11, 6.67, 14.33, 7.62, NaN, NA,
9.4, 9.75, 4.86, NaN, 18.6, 8.25, NaN, NA, NA, NaN, 13.8,
NA, NA, 21.75, 6.14, 9.33, 6, 4.5, NaN, 23.75, NA, 8.5, NaN,
6.75, 13.86, 3.8, 6.75, NA, 21.4, NA, 12.75, NaN, 20, NaN,
NaN, 12.83, 7, 15.83), NARR_G1_200_AB = c(12, NA, NA, 14.5,
NaN, NaN, 12.85, 6.38, 4.5, 18.8, 5.25, 34.67, 9.54, 8.67,
14.4, 4, 10.62, NA, 8.33, 15, 6.29, NA, 18, 17.5, NaN, 6.22,
24.33, 11.33, 7, 15.33, 8.12, NaN, NA, 10.8, 11, 5.71, NaN,
19.6, 10.75, NaN, NA, NA, NaN, 14.6, NA, NA, 24, 7.57, 9.5,
8, 5, NaN, 25.75, NA, 10.5, NaN, 10.5, 14, 6, 8.75, NA, 21.4,
NA, 17.75, NaN, 22, NaN, NaN, 15.5, 8, 15.83), NARR_G1_50_AAA = c(37.69,
NA, NA, 37.02, 35.38, 34.34, 36.19, 37.25, 36.78, 36.83,
36.61, 34.2, 34.24, 37.51, 35.74, 34, 35.02, NA, 37.4, 36.18,
36.63, NA, 34.42, 34.38, 35.43, 37.2, 34.49, 34.2, 36.41,
37.07, 36.56, 34.93, NA, 36.06, 36.49, 35.31, 33.33, 34.27,
36.5, 36.5, NA, NA, 34.21, 36.02, NA, NA, 34.02, 35.59, 37.16,
36.02, 37.58, 36.53, 35.46, NA, 36.46, 38.42, 36.05, 37.39,
37.3, 36.22, NA, 35.31, NA, 33.96, 35.55, 35.03, 35, 35.31,
36.54, 36.06, 34.98), NARR_G1_50_AAC = c(41.85, NA, NA, 40.71,
37.5, 42.38, 39.05, 41.98, 42.51, 42.47, 43.43, 36.41, 42.17,
43.27, 40.42, 43.1, 40.52, NA, 41.65, 38.82, 40.63, NA, 40.35,
39.18, 38.93, 41.44, 38.3, 39.54, 40.73, 41.83, 42.54, 40.34,
NA, 40.69, 40.31, 43.51, 36.13, 39.1, 41.65, 41.62, NA, NA,
38.57, 40.02, NA, NA, 38.26, 42.66, 41.55, 39.7, 42.91, 40.43,
38.87, NA, 40.86, 43.26, 40.55, 40.84, 42.13, 42.09, NA,
40.31, NA, 39.69, 39.73, 36.97, 37.71, 43.44, 40.44, 42.33,
39.65), NARR_G1_50_AC = c(0.77, NA, NA, 0.69, 2.25, 0.45,
0.59, 0.12, 0, 1.15, 0.34, 2.61, 0.61, 0.24, 0.64, 0.26,
0.79, NA, 0.19, 1.43, 0.65, NA, 1.39, 1.11, 1.87, 0.31, 1.98,
1.07, 0.54, 0.29, 0.24, 0.76, NA, 0.59, 1.05, 0.62, 2.17,
2.25, 0.33, 1.62, NA, NA, 1.36, 1.53, NA, NA, 2.22, 0.22,
0.65, 0.45, 0.42, 0.9, 2.18, NA, 0.97, 0.05, 0.84, 0.98,
0, 0.44, NA, 1.83, NA, 1.71, 0.91, 1.16, 1.86, 0.12, 0.69,
0.45, 1.24), NARR_G1_50_AB = c(0.88, NA, NA, 0.82, 2.25,
0.45, 1.03, 0.45, 0.54, 1.36, 0.55, 2.71, 0.96, 0.73, 0.64,
0.47, 0.97, NA, 0.29, 1.43, 0.65, NA, 1.81, 1.69, 1.87, 0.31,
2.02, 1.07, 0.54, 0.52, 0.39, 1.1, NA, 0.8, 1.31, 0.82, 2.9,
2.44, 0.74, 1.62, NA, NA, 1.86, 1.76, NA, NA, 2.48, 0.38,
0.67, 0.66, 0.42, 1.67, 2.38, NA, 1.43, 0.16, 1.64, 1.04,
0.57, 0.69, NA, 1.83, NA, 2.6, 0.91, 1.16, 2.71, 0.75, 0.98,
0.58, 1.24), NARR_G2_100_AAA = c(64.25, 59, NA, 67.88, 67.08,
NA, 60.75, 64.42, 71.17, 58.42, NA, 49.8, 63.36, 65.2, NaN,
70.2, 62.85, NaN, 61.6, 53.92, 62.63, NA, NaN, 50.46, 65.14,
60.58, 63.29, NA, 64.33, NaN, NA, 68.57, NA, NA, 66.3, NA,
57.29, NA, 53.5, 63.48, NA, 57.07, NaN, 61.82, NA, 68.61,
57.1, 62.84, 63, 61.91, 58.38, NaN, 61.56, NA, NaN, 65.55,
63.8, 65, 63.14, 67.31, 67.75, 57.62, 63.31, 54.83, 66.43,
NA, NA, 64.67, 57.92, 59, NA)), row.names = c(NA, -71L), class = "data.frame")
I would suggest pulling your column names into a data frame, separating them into their components, and ordering them as desired:
library(dplyr)
library(tidyr)
col_df = data.frame(names = names(merged_DF)[-1]) ## -1 to skip the ID col
col_df = col_df %>%
separate(
col = names, sep = "_",
into = c("s1", "gnum", "num2", "astring"),
remove = FALSE, convert = TRUE
) %>%
arrange(s1, num2, astring, gnum)
## now we have the names in order:
col_df
# names s1 gnum num2 astring
# 1 ARG_G1_50_AAA ARG G1 50 AAA
# 2 ARG_G2_50_AAA ARG G2 50 AAA
# 3 ARG_G1_50_AAC ARG G1 50 AAC
# 4 ARG_G2_50_AAC ARG G2 50 AAC
# 5 ARG_G1_50_AB ARG G1 50 AB
# 6 ARG_G2_50_AB ARG G2 50 AB
# 7 ARG_G1_50_AC ARG G1 50 AC
# 8 ARG_G2_50_AC ARG G2 50 AC
# 9 ARG_G1_100_AAA ARG G1 100 AAA
# 10 ARG_G2_100_AAA ARG G2 100 AAA
# ...
## we can use this order to rearrange the columns
merged_DF = select(merged_DF, c(ID, col_df$names))
names(merged_DF)
# [1] "ID" "ARG_G1_50_AAA" "ARG_G2_50_AAA" "ARG_G1_50_AAC" "ARG_G2_50_AAC"
# [6] "ARG_G1_50_AB" "ARG_G2_50_AB" "ARG_G1_50_AC" "ARG_G2_50_AC" "ARG_G1_100_AAA"
# [11] "ARG_G2_100_AAA" "ARG_G1_100_AAC" "ARG_G2_100_AAC" "ARG_G1_100_AB" "ARG_G2_100_AB"
# [16] "ARG_G1_100_AC" "ARG_G2_100_AC" "ARG_G1_150_AAA" "ARG_G2_150_AAA" "ARG_G1_150_AAC"
# [21] "ARG_G2_150_AAC" "ARG_G1_150_AB" "ARG_G2_150_AB" "ARG_G1_150_AC" "ARG_G2_150_AC"
# [26] "ARG_G1_200_AAA" "ARG_G2_200_AAA" "ARG_G1_200_AAC" "ARG_G2_200_AAC" "ARG_G1_200_AB"
# [31] "ARG_G2_200_AB" "ARG_G1_200_AC" "ARG_G2_200_AC" "NARR_G1_50_AAA" "NARR_G1_50_AAC"
# [36] "NARR_G1_50_AB" "NARR_G1_50_AC" "NARR_G1_100_AAA" "NARR_G2_100_AAA" "NARR_G1_100_AAC"
# [41] "NARR_G1_100_AB" "NARR_G1_100_AC" "NARR_G1_150_AAA" "NARR_G1_150_AAC" "NARR_G1_150_AB"
# [46] "NARR_G1_150_AC" "NARR_G1_200_AAA" "NARR_G1_200_AAC" "NARR_G1_200_AB" "NARR_G1_200_AC"
I bet that there are simpler ways of doing this but this one seems to work.
intercalate <- function(X, pattern) {
f <- function(h, n) {
i <- seq(1, length(h), by = 2)
j <- seq(2, length(h), by = 2)
h[order(c(i, j))]
}
#
g <- function(x, y) {
nx <- length(x)
ny <- length(y)
if(nx == ny) {
h <- c(x, y)
f(h, nx)
} else if(nx > ny) {
h <- c(x[seq_along(y)], y)
h <- f(h, ny)
c(h, x[-seq_along(y)])
} else {
h <- c(x, y[seq_along(x)])
h <- f(h, nx)
c(h, y[-seq_along(x)])
}
}
#
s <- grepl(pattern = pattern, X)
s <- abs(c(0, diff(s)))
sp <- split(X, cumsum(s))
i_odd <- seq(1, length(sp), by = 2)
i_even <- seq(2, length(sp), by = 2)
new_names <- mapply(g, sp[i_odd], sp[i_even])
unname(unlist(new_names))
}
newnames <- intercalate(names(merged_DF)[-1], pattern = "G2")
newnames <- c(names(merged_DF)[1], newnames)
merged_DF[newnames]
This is probably insufficient to the task:
strings <- c('ARG_G1_50_AAA' ,'ARG_G1_50_AAC', 'ARG_G1_50_AC' ,'ARG_G1_50_AB',
'ARG_G2_50_AAA' ,'ARG_G2_50_AAC', 'ARG_G2_50_AC')
substring(strings, regexpr('_\\K[[:upper:]]{2,3}', strings, perl = TRUE), nchar(strings))
[1] "AAA" "AAC" "AC" "AB" "AAA" "AAC" "AC"
idx_strings <- order(substring(strings, regexpr('_\\K[[:upper:]]{2,3}', strings, perl = TRUE), nchar(strings)))
idx_strings
[1] 1 5 2 6 4 3 7
> strings[idx_strings]
[1] "ARG_G1_50_AAA" "ARG_G2_50_AAA" "ARG_G1_50_AAC" "ARG_G2_50_AAC"
[5] "ARG_G1_50_AB" "ARG_G1_50_AC" "ARG_G2_50_AC"
Getting nearly desired 'set1' results for 'NARR_' and 'ARG_' as follows
for 'NARR_', using #akrun data v1, though [7] & [8] appear reversed
idx_v1_N <- which(regexpr('^[N]', v1, perl = TRUE) == 1)
v1[idx_v1_N[order(
substring(v1[idx_v1_N],
regexpr('[^_.G][\\d_]\\d.+[[:upper:]]', v1[idx_v1_N], perl = TRUE),
nchar(v1[idx_v1_N]))[idx_v1_N])]]
[1] "NARR_G1_100_AAC" "NARR_G1_100_AB" "NARR_G2_100_AC" "NARR_G1_150_AAC"
[5] "NARR_G1_150_AB" "NARR_G1_100_AAA" "NARR_G2_150_AAA" "NARR_G2_100_AAA"
[9] "NARR_G1_100_AC" "NARR_G1_150_AAA" "NARR_G1_150_AC" "NARR_G2_100_AAC"
[13] "NARR_G2_50_AB" "NARR_G1_50_AC" "NARR_G1_50_AAA" "NARR_G2_150_AB"
[17] "NARR_G2_150_AAC" "NARR_G2_50_AC" "NARR_G1_50_AAC" "NARR_G2_150_AC"
[21] "NARR_G1_50_AB" "NARR_G2_100_AB" "NARR_G2_50_AAA" "NARR_G2_50_AAC"
the substring and regexpr '[^_.G][\\d_]\\d.+[[:upper:]]' return
substring(v1[idx_v1_N], regexpr('[^_.G][\\d_]\\d.+[[:upper:]]', v1[idx_v1_N], perl = TRUE), nchar(v1[idx_v1_N]))
[1] "1_100_AB" "1_150_AAC" "2_50_AB" "1_150_AB" "2_100_AAA" "1_100_AAC"
[7] "1_150_AAA" "2_100_AC" "1_100_AAA" "1_150_AC" "2_100_AAC" "2_150_AAA"
[13] "1_100_AC" "1_50_AC" "1_50_AAA" "2_150_AB" "2_150_AAC" "2_50_AC"
[19] "1_50_AAC" "2_150_AC" "1_50_AB" "2_100_AB" "2_50_AAA" "2_50_AAC"
which is then order([ed] nearly correctly. Results for 'ARG_' just need an index for starting with 'A'. There are better hammers for this nail, as seen above.

Divide each column of a dataframe by one row of the dataframe

I would like to divide each column of my dataframe by the values of one row.
I tried to transform my dataframe into a matrix and to extract one row of the dataframe as a vector then divide the matrix by the vector but it did not work. Indeed, only the first row of the matrix got divided by the vector.
Here is my original dataframe.
And this is the code I tried to run :
data <- read_excel("Documents/TFB/xlsx_geochimie/solfatara_maj.xlsx")
View(data)
data.mat <- as.matrix(data[,2:20])
vector <- data[12,2:20]
data.mat/vector
We replicate the vector to make the length same and then do the division
data.mat/unlist(vector)[col(data.mat)]
# FeO Total S SO4 Total N SiO2 Al2O3 Fe2O3 MnO MgO CaO Na2O K2O
#[1,] 0.10 16.5555556 NA NA 0.8908607 0.8987269 0.1835206 0.08333333 0.03680982 0.04175365 0.04823151 0.5738562
#[2,] 0.40 125.8333333 NA NA 0.5510204 0.4456019 0.2359551 0.08333333 0.04294479 0.01878914 0.04501608 0.2588235
#[3,] 0.85 0.6111111 NA NA 1.0021295 1.0162037 0.7715356 1.08333333 0.53987730 0.69728601 1.03858521 1.0457516
#[4,] 0.15 48.0555556 NA NA 1.1027507 0.2569444 NA 0.08333333 0.01840491 0.01878914 0.04180064 0.1647059
#[5,] 0.85 NA NA NA 1.0889086 1.0271991 0.6591760 0.75000000 0.59509202 0.53862213 1.02250804 1.1228758
#[6,] NA NA NA NA 1.3426797 0.6319444 0.0411985 0.08333333 0.03067485 0.11899791 0.65594855 0.7764706
# TiO2 P2O5 LOI LOI2 Total Total 2 Fe2O3(T)
#[1,] 0.7924528 0.3928571 7.0841837 6.6963855 0.9922233 0.9894632 0.14489796
#[2,] 0.5094340 0.3214286 14.5561224 13.7710843 0.9958126 0.9936382 0.31020408
#[3,] 0.8679245 0.6428571 1.5637755 1.5228916 0.9990030 0.9970179 0.80612245
#[4,] 1.4905660 0.2857143 7.4056122 7.0024096 0.9795613 0.9769384 0.05510204
#[5,] 1.0377358 0.2500000 0.3520408 0.3783133 0.9969093 0.9960239 0.74489796
#[6,] 0.3018868 0.2500000 1.2551020 1.1879518 1.0019940 1.0000000 0.04489796
Or use sweep
sweep(data.mat, MARGIN = 2, unlist(vector), FUN = `/`)
Or using mapply with asplit
mapply(`/`, asplit(data.mat, 2), vector)
data
data_mat <- structure(c(0.2, 0.8, 1.7, 0.3, 1.7, NA, 5.96, 45.3, 0.22, 17.3,
NA, NA, NA, 6.72, NA, 4.08, 0.06, 0.16, NA, NA, NA, NA, NA, NA,
50.2, 31.05, 56.47, 62.14, 61.36, 75.66, 15.53, 7.7, 17.56, 4.44,
17.75, 10.92, 0.49, 0.63, 2.06, NA, 1.76, 0.11, 0.01, 0.01, 0.13,
0.01, 0.09, 0.01, 0.06, 0.07, 0.88, 0.03, 0.97, 0.05, 0.2, 0.09,
3.34, 0.09, 2.58, 0.57, 0.15, 0.14, 3.23, 0.13, 3.18, 2.04, 4.39,
1.98, 8, 1.26, 8.59, 5.94, 0.42, 0.27, 0.46, 0.79, 0.55, 0.16,
0.11, 0.09, 0.18, 0.08, 0.07, 0.07, 27.77, 57.06, 6.13, 29.03,
1.38, 4.92, 27.79, 57.15, 6.32, 29.06, 1.57, 4.93, 99.52, 99.88,
100.2, 98.25, 99.99, 100.5, 99.54, 99.96, 100.3, 98.28, 100.2,
100.6, 0.71, 1.52, 3.95, 0.27, 3.65, 0.22), .Dim = c(6L, 19L), .Dimnames = list(
NULL, c("FeO", "Total S", "SO4", "Total N", "SiO2", "Al2O3",
"Fe2O3", "MnO", "MgO", "CaO", "Na2O", "K2O", "TiO2", "P2O5",
"LOI", "LOI2", "Total", "Total 2", "Fe2O3(T)")))
vector <- structure(list(FeO = 2, `Total S` = 0.36, SO4 = NA_real_, `Total N` = NA_real_,
SiO2 = 56.35, Al2O3 = 17.28, Fe2O3 = 2.67, MnO = 0.12, MgO = 1.63,
CaO = 4.79, Na2O = 3.11, K2O = 7.65, TiO2 = 0.53, P2O5 = 0.28,
LOI = 3.92, LOI2 = 4.15, Total = 100.3, `Total 2` = 100.6,
`Fe2O3(T)` = 4.9), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"))
To divide data frame, df, by the third row:
df/df[rep(3, nrow(df)), ]

How to compute/plot efficient frontiers per time period in one graph in R?

Currently we compute and sort data of stocks (X1 to X10). Historical data is stored in Excel and R for the time period 1950-1980, 1980-1999 and for 1950-1999.
The dataset:
date X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
1 1950-01-01 5.92 6.35 4.61 4.08 5.47 3.90 2.35 1.49 2.27 0.82
2 1950-02-01 2.43 2.16 2.10 1.58 -0.05 1.14 1.51 1.52 2.02 1.12
3 1950-03-01 -0.81 0.21 -1.67 -0.02 -0.79 0.18 -0.22 1.03 0.12 1.75
4 1950-04-01 5.68 6.45 5.41 5.94 6.10 5.87 3.82 3.34 3.44 3.97
5 1950-05-01 3.84 1.60 1.64 3.33 2.54 2.12 4.46 2.83 3.82 4.75
6 1950-06-01 -9.88 -10.56 -8.02 -7.86 -7.27 -7.44 -7.13 -7.76 -6.32 -5.04
7 1950-07-01 9.09 8.76 7.31 5.88 3.84 4.61 3.09 3.07 1.41 0.42
598 1999-10-01 -0.95 -1.88 -1.25 -0.52 1.65 0.72 5.41 4.38 5.58 6.59
599 1999-11-01 11.57 9.15 8.17 7.14 6.15 4.95 5.78 4.21 1.55 2.15
600 1999-12-01 12.32 14.97 9.29 11.77 11.09 5.89 11.88 11.26 6.23 5.64
The main question is, we would like to compute/plot efficient frontiers for these 4 time periods to see how the efficient frontier has evolved in 1 graph. Are there ways to do this in R?
The efficient frontier is the set of optimal portfolios that offers the highest expected return for a defined level of risk or the lowest risk for a given level of expected return.
In modern portfolio theory, the efficient frontier (or portfolio frontier) is an investment portfolio which occupies the 'efficient' parts of the risk-return spectrum. Formally, it is the set of portfolios which satisfy the condition that no other portfolio exists with a higher expected return but with the same standard deviation of return.
So, how would one go about computing this in R?
dput sample data (first 50 rows)
> dput(head(data,50))
structure(list(X__1 = structure(c(-631152000, -628473600, -626054400,
-623376000, -620784000, -618105600, -615513600, -612835200, -610156800,
-607564800, -604886400, -602294400, -599616000, -596937600, -594518400,
-591840000, -589248000, -586569600, -583977600, -581299200, -578620800,
-576028800, -573350400, -570758400, -568080000, -565401600, -562896000,
-560217600, -557625600, -554947200, -552355200, -549676800, -546998400,
-544406400, -541728000, -539136000, -536457600, -533779200, -531360000,
-528681600, -526089600, -523411200, -520819200, -518140800, -515462400,
-512870400, -510192000, -507600000, -504921600, -502243200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), X__2 = c(5.92, 2.43, -0.81, 5.68,
3.84, -9.88, 9.09, 4.93, 3.99, -0.5, 3.09, 15.77, 8.22, 0.36,
-7.36, 3.84, -2.81, -7.12, 3.57, 6.59, 1.04, -1.41, -1.42, -0.53,
1.86, -3.25, 0.68, -4.4, 0.57, 2.5, -0.36, -0.74, -1.11, -0.58,
3.22, 0.33, 5.01, 2.75, -1.25, -2.13, 1.3, -4.42, 0.25, -5.56,
-4.09, 2.71, 2.01, -3.15, 8.48, -0.16), X__3 = c(6.35, 2.16,
0.21, 6.45, 1.6, -10.56, 8.76, 4.63, 3.52, -1.2, 3.36, 10.98,
8.41, 0.81, -4.01, 3.56, -4.27, -6.11, 4.7, 5.3, 2.73, -3.07,
-0.13, 0.6, 1.1, -2.77, 2.37, -4.5, 1.87, 3.18, 1.51, 0.43, -1.91,
-1.52, 4.91, 1.43, 3.4, 3.03, -2.25, -2, 0.34, -4.75, 2.24, -6.53,
-1.87, 1.97, 1.78, -2.96, 7.38, 0.43), X__4 = c(4.61, 2.1, -1.67,
5.41, 1.64, -8.02, 7.31, 4.56, 5.18, -0.46, 3.52, 10.78, 8.46,
0.28, -4.88, 4.26, -3.25, -6.76, 6.78, 4.99, 3.86, -2.57, 0.59,
0.16, 1.75, -2.04, 2.49, -5.29, 1.76, 2.88, 0.76, 0.67, -1.67,
-1.45, 5.69, 2.95, 3.66, 1.15, -1.58, -2.34, 0.51, -3.82, 0.72,
-6.25, -2.33, 3.1, 2.19, -2.63, 7.3, 1.82), X__5 = c(4.08, 1.58,
-0.02, 5.94, 3.33, -7.86, 5.88, 4.68, 5.99, 0.75, 2.68, 9.29,
8, 1.08, -3.13, 4.21, -3.35, -5.01, 5.77, 4.85, 2.73, -3.44,
0.27, 1.56, 1.62, -2.35, 2.93, -4.62, 2.36, 2.56, 0.86, 0.16,
-1.8, -2.04, 5.12, 2.72, 3.21, 1.21, -2.17, -1.84, 0.32, -3.63,
1.47, -5.16, -0.65, 3.33, 1.34, -1.36, 6.24, 1.19), X__6 = c(5.47,
-0.05, -0.79, 6.1, 2.54, -7.27, 3.84, 6.29, 4.46, -0.24, 2.42,
6.12, 8.63, 0.88, -3.31, 4.56, -2.14, -5.62, 5.73, 5.36, 2.44,
-1.88, 0.83, 0.65, 1.47, -1.81, 2.31, -4.48, 2.56, 2.69, 0.9,
0.34, -0.62, -1.58, 6.59, 0.86, 3.58, 1.92, -1.85, -2.79, 0.7,
-3.4, 1.26, -5.26, -1.18, 4.26, 1.35, -0.97, 6.66, 1.77), X__7 = c(3.9,
1.14, 0.18, 5.87, 2.12, -7.44, 4.61, 4.57, 6.14, -0.84, 4.22,
8.37, 7.44, 0.69, -4.26, 4.13, -2.24, -6.75, 5.81, 4.35, 1.98,
-2.87, 0.93, 0.61, 1.27, -2.18, 2.97, -4.09, 2.27, 2.96, 1.16,
-0.38, -2.37, -0.71, 5.53, 2.45, 1.3, 0.31, -0.47, -2.03, 0.14,
-3.26, 1.79, -5.5, -1.47, 4.18, 1.96, -1.35, 7.06, 1.69), X__8 = c(2.35,
1.51, -0.22, 3.82, 4.46, -7.13, 3.09, 5.01, 5.84, -1.05, 3.81,
7.54, 6.46, 0.71, -3.56, 4.42, -1.87, -4.52, 7.3, 3.66, 2.11,
-2.92, 2.25, 2.17, 1.32, -1.71, 3.17, -4.63, 2.59, 3.89, 0.49,
0.21, -1.71, -1.18, 4.95, 3.21, 1.41, 0.89, -1.02, -2.89, 0.59,
-2.67, 1.47, -4.62, -0.69, 4.07, 2.83, -1.44, 6.11, 1.58), X__9 = c(1.49,
1.52, 1.03, 3.34, 2.83, -7.76, 3.07, 3.72, 6.21, -1.66, 3.46,
6.14, 7.17, 2.13, -3.19, 4.59, -2.65, -3.5, 7.43, 3.5, 2.41,
-2.73, 1.35, 1.97, 1.72, -1.8, 4.06, -5.35, 2.57, 3.14, 1.89,
-0.86, -1.73, -0.95, 6.07, 1.73, 1.09, 0.37, -1.34, -2.48, 0.31,
-3.2, 1.34, -4.99, -0.18, 4.35, 3.03, 0.09, 5.65, 2.39), X__10 = c(2.27,
2.02, 0.12, 3.44, 3.82, -6.32, 1.41, 4.54, 5.55, -0.97, 3.8,
5.69, 5.65, 1.78, -2.6, 4.21, -1.29, -2.63, 7.15, 3.52, 1.85,
-2.32, 0.96, 2.74, 1.9, -2.6, 3.83, -4.31, 3.15, 2.76, 0.93,
-0.39, -1.86, -1.57, 7.05, 2.36, -0.33, -0.23, -0.54, -2.6, 0.61,
-2.37, 2.12, -3.76, 0.47, 3.98, 3.03, 0.2, 5.63, 1.26), X__11 = c(0.82,
1.12, 1.75, 3.97, 4.75, -5.04, 0.42, 4.96, 4.32, 0.25, 2.26,
4.71, 5.05, 1.63, -1.53, 5.12, -2.59, -1.92, 6.89, 4.48, -0.09,
-2.49, 0.26, 4.03, 1.37, -2.82, 4.95, -5.1, 3.4, 4.29, 0.89,
-1.06, -2.18, -0.31, 5.76, 3.32, -1.04, -0.63, -1.78, -2.97,
0.55, -1.3, 2.75, -4.47, 0.48, 4.83, 2.85, 0.27, 4.4, 1.93)), .Names = c("date",
"X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8",
"X9", "X10"), row.names = c(NA, 50L), class = c("tbl_df",
"tbl", "data.frame"))
After a few correpondence via the comments with #Jonathan, I widened the example data from 3 columns to 12 columns with some sampling. And the code at the "With short-selling" section at the blog scales well for 10K observations:
# using code at:
# https://www.r-bloggers.com/a-gentle-introduction-to-finance-using-r-efficient-frontier-and-capm-part-1/
# https://datashenanigan.wordpress.com/2016/05/24/a-gentle-introduction-to-finance-using-r-efficient-frontier-and-capm-part-1/
library(data.table)
calcEFParams <- function(rets)
{
retbar <- colMeans(rets, na.rm = T)
covs <- var(rets, na.rm = T) # calculates the covariance of the returns
invS <- solve(covs)
i <- matrix(1, nrow = length(retbar))
alpha <- t(i) %*% invS %*% i
beta <- t(i) %*% invS %*% retbar
gamma <- t(retbar) %*% invS %*% retbar
delta <- alpha * gamma - beta * beta
retlist <- list(alpha = as.numeric(alpha),
beta = as.numeric(beta),
gamma = as.numeric(gamma),
delta = as.numeric(delta))
return(retlist)
}
# load data
link <- "https://raw.githubusercontent.com/DavZim/Efficient_Frontier/master/data/mult_assets.csv"
df <- data.table(read.csv(link))
df2 <- df[,lapply(.SD, sample),]
df3 <- cbind(df, df2)
df4 <- df3[,lapply(.SD, sample),]
df5 <- cbind(df3, df4)
Now loading the microbenchmark package, the performance is as such:
> library(microbenchmark)
> microbenchmark(calcEFParams(df5), times = 10)
Unit: milliseconds
expr min lq mean median uq max neval
calcEFParams(df5) 2.692514 2.764053 2.795127 2.777547 2.805447 3.024349 10
It seems that David Zimmermann's code is scalable and efficient enough!

R: Unable to get reasonable estimates from optim()

Here is my data:
test <- structure(list(date = structure(c(16436, 16437, 16438, 16439,
16440, 16441, 16442, 16443, 16444, 16445, 16446, 16447, 16448,
16449, 16450, 16451, 16452, 16453, 16454, 16455, 16456, 16457,
16458, 16459, 16460, 16461, 16462, 16463, 16464, 16465, 16466,
16467, 16468, 16469, 16470, 16471, 16472, 16473, 16474, 16475,
16476, 16477, 16478, 16479, 16480, 16481, 16482, 16483, 16484,
16485, 16486, 16487, 16488, 16489, 16490, 16491, 16492, 16493,
16494, 16495, 16496, 16497, 16498, 16499, 16500, 16501, 16502,
16503, 16504, 16505, 16506, 16507, 16508, 16509, 16510, 16511,
16512, 16513, 16514, 16515, 16516, 16517, 16518, 16519, 16520,
16521, 16522, 16523, 16524, 16525, 16526, 16527, 16528, 16529,
16530, 16531, 16532, 16533, 16534, 16535, 16536, 16537, 16538,
16539, 16540, 16541, 16542, 16543, 16544, 16545, 16546, 16547,
16548, 16549, 16550, 16551, 16552, 16553, 16554, 16555, 16556,
16557, 16558, 16559, 16560, 16561, 16562, 16563, 16564, 16565,
16566, 16567, 16568, 16569, 16570, 16571, 16572, 16573, 16574,
16575, 16576, 16577, 16578, 16579, 16580, 16581, 16582, 16583,
16584, 16585, 16586, 16587, 16588, 16589, 16590, 16591, 16592,
16593, 16594, 16595, 16596, 16597, 16598, 16599, 16600, 16601,
16602, 16603, 16604, 16605, 16606, 16607, 16608, 16609, 16610,
16611, 16612, 16613, 16614, 16615, 16616, 16617, 16618, 16619,
16620, 16621, 16622, 16623, 16624, 16625, 16626, 16627, 16628,
16629, 16630, 16631, 16632, 16633, 16634, 16635, 16636, 16637,
16638, 16639, 16640, 16641, 16642, 16643, 16644, 16645, 16646,
16647, 16648, 16649, 16650, 16651, 16652, 16653, 16654, 16655,
16656, 16657, 16658, 16659, 16660, 16661, 16662, 16663, 16664,
16665, 16666, 16667, 16668, 16669, 16670, 16671, 16672, 16673,
16674, 16675, 16676, 16677, 16678, 16679, 16680, 16681, 16682,
16683, 16684, 16685, 16686, 16687, 16688, 16689, 16690, 16691,
16692, 16693, 16694, 16695, 16696, 16697, 16698, 16699, 16700,
16701, 16702, 16703, 16704, 16705, 16706, 16707, 16708, 16709,
16710, 16711, 16712, 16713, 16714, 16715, 16716, 16717, 16718,
16719, 16720, 16721, 16722, 16723, 16724, 16725, 16726, 16727,
16728, 16729, 16730, 16731, 16732, 16733, 16734, 16735, 16736,
16737, 16738, 16739, 16740, 16741, 16742, 16743, 16744, 16745,
16746, 16747, 16748, 16749, 16750, 16751, 16752, 16753, 16754,
16755, 16756, 16757, 16758, 16759, 16760, 16761, 16762, 16763,
16764, 16765, 16766, 16767, 16768, 16769, 16770, 16771, 16772,
16773, 16774, 16775, 16776, 16777, 16778, 16779, 16780, 16781,
16782, 16783, 16784, 16785, 16786, 16787, 16788, 16789, 16790,
16791, 16792, 16793, 16794, 16795, 16796, 16797, 16798, 16799,
16800), class = "Date"), radn = c(9.66, 9.54, 8.21, 5, 5.98,
9.39, 8.54, 9.68, 6.74, 2.95, 9.24, 7.39, 10.47, 9.04, 7.1, 4.12,
6.42, 6.89, 10.96, 9.49, 11.72, 8.83, 11.48, 11.42, 11.49, 10.98,
2.87, 11.92, 8.92, 4, 12.92, 8.37, 5.73, 4.47, 8.73, 5.76, 9.34,
10.41, 6.72, 8.44, 13.34, 11.95, 12.2, 10.94, 10.5, 15.72, 14.63,
15.67, 15.91, 14.79, 14.11, 15.89, 17.07, 17.62, 17.22, 14.93,
11.17, 4.83, 8.78, 17.46, 10.35, 19.09, 19.39, 19.48, 19.12,
18.94, 19.93, 20.24, 17.47, 6.07, 19.4, 18.26, 10, 6.33, 10.67,
15.2, 21.39, 22.43, 18.02, 19.4, 18.55, 14.91, 9.15, 21.84, 22.8,
23.16, 23.43, 24.16, 22.56, 23.58, 23.45, 25.09, 25.46, 22.85,
17.05, 23.87, 12.45, 8.88, 25.7, 25.86, 17.28, 24.77, 25.08,
15.62, 27.4, 27.35, 27.71, 26.91, 27.93, 27.99, 26.42, 20.49,
27.9, 11.89, 10.38, 28.43, 28.74, 29.2, 27.62, 28.88, 28.81,
28.92, 29.07, 24.41, 29.1, 26.43, 18, 23.94, 30.68, 29.47, 18.88,
18.58, 25.79, 18.76, 12.18, 12.92, 20.18, 10.75, 14.09, 19.86,
19.47, 15.9, 12.82, 22.62, 21.23, 24.62, 29.5, 30.21, 30.12,
21.87, 25.45, 31.68, 32.18, 29.67, 17.27, 22.41, 24.28, 31.27,
30, 30.12, 21.6, 32.76, 32.27, 32.24, 32.81, 32.45, 32.66, 30.52,
30.5, 32.68, 32.85, 30.42, 32.62, 32.45, 31.29, 32.15, 25.84,
26.21, 27.22, 26.36, 30.72, 26.26, 24.34, 21.45, 18.58, 25.95,
29.09, 21.53, 21.88, 20.76, 17.56, 24.69, 22.83, 27.72, 28.07,
31.18, 30.23, 28.86, 30.61, 30.79, 30.08, 27.28, 16.81, 23.82,
30.09, 30.29, 30.45, 30.8, 31.12, 30.89, 30.19, 25.01, 24.27,
18.93, 28.27, 26.62, 27.97, 22.9, 11.1, 22.29, 24.4, 27.78, 28.17,
28.41, 26.01, 27.18, 25.08, 26.65, 27.95, 27.67, 24.39, 26.59,
26.9, 26.54, 26.02, 25.31, 26.03, 22.22, 24.29, 21.01, 19.73,
23.03, 25.38, 24.98, 24.74, 19.75, 20.24, 24.99, 21.01, 24.53,
24.3, 23.95, 23.36, 22.92, 20.66, 15.42, 6.66, 15.28, 16.1, 16.73,
22.14, 22.02, 21.59, 21.4, 21.41, 21.45, 15.48, 17.78, 19.93,
15.58, 19.22, 17.29, 8.64, 8.94, 15.46, 12.52, 17.79, 18.36,
18.28, 15.27, 13.04, 13.78, 17.88, 17.88, 17.5, 17.31, 16.84,
14.55, 15.17, 7.43, 4.34, 5.23, 12.79, 15.84, 13.32, 15.43, 11.48,
6.13, 14.64, 9.04, 5.09, 11.84, 9.86, 11.4, 4.92, 2.81, 5.76,
7.92, 9.15, 13.14, 13.14, 9.94, 9.77, 11.15, 12.45, 12.33, 11.99,
11.8, 6.92, 11.23, 6.2, 9.6, 4.89, 11.43, 11.05, 10.83, 7.44,
5.4, 6.17, 3.52, 10.71, 10.64, 10.67, 10.6, 10.17, 6.02, 6.96,
6.5, 7.43, 3.49, 2.03, 5.22, 5.02, 4.24, 4.44, 5.52, 2.72, 3.75,
2.31, 8.38, 1.88, 3.07, 2.02, 2.66, 1.67, 5.77, 7.59, 1.9, 1.5,
9.72, 2.66, 2.39, 1.67, 2.38, 9.88), maxt = c(-4.4, -1.9, 0.8,
4.8, 6.8, 11, 13, 12.6, 11.4, 7, 5.8, 10, 7.2, 6.5, 5.9, 5.5,
10.4, 12, 15.6, 11.2, 7.1, 6.3, 6.5, 9.4, 12.8, 14.6, 14.3, 7.8,
11.9, 9.6, 4.5, 10.8, 13.2, 11.4, 14, 14.8, 14.9, 16.3, 17.2,
15.4, 13.3, 12.4, 15.1, 17.6, 19.6, 19.8, 15.1, 12.8, 15.9, 18.7,
18, 13.1, 10.6, 6, 7.6, 12.7, 14, 9.2, 8.3, 7.1, 9.5, 10, 6,
10.1, 15.5, 18.4, 19.9, 19.6, 19.9, 21.5, 13.9, 17, 20.5, 20.6,
22.7, 18.4, 18.5, 16, 19.9, 22.2, 19.1, 19.3, 12.6, 11.7, 17.1,
22.2, 26.5, 19.7, 22.9, 26.3, 20.7, 12.2, 12.4, 16.3, 17.4, 12.7,
12.7, 13, 11.4, 16.4, 20.6, 16.6, 18.4, 24.4, 11.7, 11.8, 18.6,
23, 21.9, 23.3, 24.6, 26, 22.5, 21.6, 13.2, 11.9, 14.8, 21.2,
25.8, 25.5, 22.6, 26.7, 27.6, 26.9, 27.2, 24.2, 18.6, 14.1, 20.5,
21.6, 24.2, 22.6, 20.9, 19.6, 16.9, 14.8, 17.1, 20.6, 18.3, 16.9,
20.2, 21.2, 19.6, 19.2, 22.6, 24, 23.9, 25.6, 27.1, 29.3, 30.2,
31.6, 26.4, 24.7, 25.2, 21, 25.9, 26.4, 30.7, 33.4, 34.7, 29,
30.5, 32.3, 31.9, 32.6, 32.6, 32.7, 33.6, 34, 31.6, 32.4, 31.4,
31.5, 33.7, 35.9, 37.1, 38.8, 39.2, 38.9, 37.8, 38.4, 38.3, 38.6,
37.2, 35.7, 27.9, 33.4, 32.7, 27.5, 29.2, 26.3, 26.9, 28, 29.1,
31.1, 32, 33.1, 29.4, 29.2, 32.3, 34, 33, 29, 29.3, 30.8, 31.5,
30.4, 24.9, 28.5, 33.6, 36.3, 37.7, 38.2, 34.5, 33.2, 33.9, 29.2,
32.3, 25.4, 28.8, 32.4, 32.9, 34.9, 34.6, 36.2, 34.5, 32, 34.1,
33.7, 33.3, 34.8, 34.5, 32.7, 32.3, 35.7, 35.3, 35, 34.2, 33.5,
33.9, 31.4, 27.6, 30.9, 32.2, 30.5, 25.9, 23.5, 19.6, 24.1, 28.1,
30.8, 33.2, 34.8, 35.8, 35.4, 33.5, 27.7, 21.7, 19.4, 20.1, 23.7,
28.5, 31.5, 31.6, 31, 29.3, 31.2, 32.6, 30.5, 28.6, 29.8, 30.9,
26.8, 21.1, 21.8, 20.4, 22.5, 24.9, 26.7, 27.1, 28, 30.7, 29.6,
25.5, 29.3, 30.4, 30.8, 30.5, 29, 22, 18, 13.1, 16, 19, 19.1,
19.3, 20.1, 20, 20.4, 18.6, 15.2, 13.7, 17.1, 22.3, 18.1, 6.3,
6, 5.7, 7.1, 10.3, 11.1, 14.2, 8, 7.1, 8.9, 10.7, 12.3, 14.8,
10.8, 3.2, 7.6, 12.6, 14.4, 9.6, 10.6, 11.7, 12.3, 13.4, 1.3,
-0.9, -0.2, 0.6, 2.5, 4, 5.4, 7.3, 13, 8, 6.7, 11.5, 13.2, 14.2,
14.9, 12.3, 5.5, 6.1, 11.1, 0.3, 0.5, 2, 2.8, 7, 4.9, 2.4, 7.3,
6.2, 2.9, 0.5, -1.2, -2.5, -4, -2.7, -1.1, -3), mint = c(-15.9,
-16.5, -14.4, -11.2, -5.7, -2.4, -2.5, -3.2, -4.3, -4.6, -1.5,
-1, -0.9, -6.3, -7, -5.7, -1.2, -0.9, 0.3, -2.7, -5.9, -10.1,
-8.7, -7.3, -5.7, -3.5, -1.2, -0.4, -0.9, -0.7, -4.3, -4.3, -2.8,
1, 2.7, 3.1, 5.8, 6.2, 3.8, 2.2, -0.7, -1.5, -0.9, -0.3, 1, 1,
-1.6, -3.8, -3.9, -1.9, -0.6, -0.8, -3.8, -7, -8.8, -7, -2.2,
-0.3, -1.1, -2.9, -5.1, -5.2, -9.2, -9.7, -6.9, -4.2, -3.1, -3.5,
-3.8, -2.3, 3.5, 0.3, 0.7, 5.8, 7, 7.4, 2.3, -0.6, -2.2, 0.7,
0.9, 1.6, 3.8, -0.9, -2.5, 1, 2.6, 1.8, -1.6, 2.3, -4.2, -6.6,
-4.7, -4.2, -0.5, -1.4, -3, 0.3, -2.9, -2.3, 1.1, -0.4, -1.5,
0.5, -6.1, -7.3, -5, -0.5, 0.6, 0.7, 1.2, 2.9, 4.3, 4.7, 2.1,
0.3, 0.5, 1.4, 3.4, 5, 4.9, 4.2, 6.3, 6.7, 6, 6.3, 3.6, 3.5,
3.7, 1.1, 1.9, 4.9, 0.7, 1.2, 5.8, 5.6, 4, 6.2, 8.3, 7, 6, 4.7,
7, 9.2, 8.1, 6.9, 7.9, 8.6, 9.6, 9.4, 10.3, 10.4, 9.6, 8.2, 9.4,
9.8, 7.2, 9.4, 10.8, 12.4, 14.5, 11.8, 11, 10.7, 11.3, 10.8,
9.7, 10.4, 10.6, 12.1, 10.3, 10.5, 11.3, 10, 12.6, 13.6, 17.4,
19.9, 19.9, 18.9, 18.4, 18.9, 20.1, 19, 17, 16.9, 14.8, 13.1,
14, 11.5, 10.6, 11.1, 12.7, 11.4, 11.9, 12.5, 13.3, 13.6, 13.2,
11.8, 11.8, 12.6, 15, 11.4, 10, 9.6, 9.3, 9.3, 8.2, 9.6, 9.7,
12, 14.3, 16.1, 16.5, 12.8, 13.7, 11.3, 10.3, 12.2, 11.4, 11.8,
11.1, 10.9, 11.2, 13, 11.8, 9, 9.7, 8.9, 10.1, 10, 11.5, 10.6,
12.2, 10.9, 12.6, 11.9, 11.9, 13.1, 13.4, 11.4, 6.9, 6, 7.7,
9.7, 7.8, 2.2, 1.5, 0.9, 2.3, 4.8, 6.3, 8.3, 10.4, 11.2, 12.8,
11, 7.5, 6.1, 5.5, 2.4, 3.5, 5.8, 5.9, 6.2, 5.6, 6.1, 7.4, 9.9,
7.8, 6.4, 7.8, 11, 10.1, 4.8, 3.5, 6.6, 4.6, 5.5, 5.9, 9.8, 8.3,
8.6, 6.4, 4.4, 6, 7.1, 6.9, 7.5, 7.8, 6.9, 3.9, 1.8, 0.3, 0.3,
-0.5, 3.2, 2.4, -0.3, 0.2, 5.1, -1.5, -1.4, 4.7, 5.6, 1.6, -1.3,
-3.8, -4.1, -4.6, -3.5, -0.8, -1.4, -6.5, -6, -5, -4.9, -3.9,
-4.2, -6.1, -1.7, -0.2, -0.3, -3.6, -7.1, -6.4, -3.4, -5.2, -8.6,
-9.6, -13.8, -16.3, -15.6, -14.5, -11.8, -4.6, 0, -7.6, -7.7,
-1.3, 4.8, 4.6, 2.3, 0.1, -2.2, -1.4, -2.6, -4.7, -9, -6.8, -4.4,
-3.7, -3.9, -5.1, 0, -1.8, -3.2, -9, -14.2, -17.4, -13, -8.2,
-12.7, -17.5), rain = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.8,
0.96, 0, 0, 0, 1.38, 0.25, 0.32, 0, 0, 0, 0, 0, 0, 0, 0, 5.68,
0, 0, 0, 0, 0, 1.12, 0, 0, 0, 4.24, 0.13, 6.84, 1.44, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.28, 2.13, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65, 0, 3.6, 1.9, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.59, 1.19, 11.03, 5.43, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.77, 0, 0, 0, 0, 0, 5.06, 5.6,
0.01, 2.23, 5.45, 7.43, 4.47, 0.11, 4.02, 6.36, 0.38, 0.79, 1.46,
0, 0, 0, 0, 0, 0, 0, 0, 0.82, 3.06, 0.06, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.37, 0,
2.3, 1.74, 3.2, 1.72, 3.53, 2, 1.08, 0.46, 0.38, 0.3, 0, 0, 0,
0.47, 0, 0, 0.56, 4.86, 9.66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.86,
0, 0, 0, 0, 2.44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.55, 0.83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16.08,
0.93, 0.01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.24, 4.25, 14.52,
13.45, 0, 0, 0, 0, 0, 0, 0, 0, 1.2, 1.23, 0, 0, 4.15, 11.05,
2.29, 0, 0, 0, 0, 0.77, 3.04, 0, 0, 0, 0, 0, 0.88, 0, 0, 0, 0,
0, 0, 0, 0, 0.94, 0, 0, 0, 0, 0, 0, 0.02, 0, 0, 0, 0, 0, 0, 0,
0.66, 1.85, 0.95, 0.61, 3.89, 0, 0, 1.23, 4.81, 0, 1.96, 1.67,
6.94, 9.65, 0, 1.99, 0, 0, 2.24, 2.67, 0.16, 0.52), evap = c(8.48,
8.48, 8.48, 8.48, 8.48, 8.48, 8.48, 8.31, 8.31, 8.31, 8.31, 8.31,
8.31, 8.31, 8.09, 8.09, 8.09, 8.09, 8.09, 8.09, 8.09, 7.86, 7.86,
7.86, 7.86, 7.86, 7.86, 7.86, 7.62, 7.62, 7.62, 7.62, 7.62, 7.62,
7.62, 7.39, 7.39, 7.39, 7.39, 7.39, 7.39, 7.39, 7.16, 7.16, 7.16,
7.16, 7.16, 7.16, 7.16, 6.93, 6.93, 6.93, 6.93, 6.93, 6.93, 6.93,
6.71, 6.71, 6.71, 6.71, 6.71, 6.71, 6.71, 6.48, 6.48, 6.48, 6.48,
6.48, 6.48, 6.48, 6.23, 6.23, 6.23, 6.23, 6.23, 6.23, 6.23, 5.96,
5.96, 5.96, 5.96, 5.96, 5.96, 5.96, 5.66, 5.66, 5.66, 5.66, 5.66,
5.66, 5.66, 5.32, 5.32, 5.32, 5.32, 5.32, 5.32, 5.32, 4.95, 4.95,
4.95, 4.95, 4.95, 4.95, 4.95, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56,
4.56, 4.15, 4.15, 4.15, 4.15, 4.15, 4.15, 4.15, 3.75, 3.75, 3.75,
3.75, 3.75, 3.75, 3.75, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38,
3.05, 3.05, 3.05, 3.05, 3.05, 3.05, 3.05, 2.78, 2.78, 2.78, 2.78,
2.78, 2.78, 2.78, 2.58, 2.58, 2.58, 2.58, 2.58, 2.58, 2.58, 2.45,
2.45, 2.45, 2.45, 2.45, 2.45, 2.45, 2.37, 2.37, 2.37, 2.37, 2.37,
2.37, 2.37, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.38, 2.38,
2.38, 2.38, 2.38, 2.38, 2.38, 2.46, 2.46, 2.46, 2.46, 2.46, 2.46,
2.46, 2.57, 2.57, 2.57, 2.57, 2.57, 2.57, 2.57, 2.72, 2.72, 2.72,
2.72, 2.72, 2.72, 2.72, 2.9, 2.9, 2.9, 2.9, 2.9, 2.9, 2.9, 3.1,
3.1, 3.1, 3.1, 3.1, 3.1, 3.1, 3.33, 3.33, 3.33, 3.33, 3.33, 3.33,
3.33, 3.57, 3.57, 3.57, 3.57, 3.57, 3.57, 3.57, 3.83, 3.83, 3.83,
3.83, 3.83, 3.83, 3.83, 4.13, 4.13, 4.13, 4.13, 4.13, 4.13, 4.13,
4.47, 4.47, 4.47, 4.47, 4.47, 4.47, 4.47, 4.85, 4.85, 4.85, 4.85,
4.85, 4.85, 4.85, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26, 5.67,
5.67, 5.67, 5.67, 5.67, 5.67, 5.67, 6.08, 6.08, 6.08, 6.08, 6.08,
6.08, 6.08, 6.46, 6.46, 6.46, 6.46, 6.46, 6.46, 6.46, 6.79, 6.79,
6.79, 6.79, 6.79, 6.79, 6.79, 7.09, 7.09, 7.09, 7.09, 7.09, 7.09,
7.09, 7.35, 7.35, 7.35, 7.35, 7.35, 7.35, 7.35, 7.6, 7.6, 7.6,
7.6, 7.6, 7.6, 7.6, 7.84, 7.84, 7.84, 7.84, 7.84, 7.84, 7.84,
8.07, 8.07, 8.07, 8.07, 8.07, 8.07, 8.07, 8.28, 8.28, 8.28, 8.28,
8.28, 8.28, 8.28, 8.46, 8.46, 8.46, 8.46, 8.46, 8.46, 8.46, 8.58,
8.58, 8.58, 8.58, 8.58, 8.58, 8.58, 8.63, 8.63, 8.63, 8.63, 8.63,
8.63, 8.63, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6), index = 8767:9131), .Names = c("date",
"radn", "maxt", "mint", "rain", "evap", "index"), na.action = structure(1L, .Names = "1", class = "omit"), row.names = 8768:9132, class = "data.frame")
I am trying to optimize a function to it to simulate some data. I have done this in the past with other datasets with success, but with this data optim is converging but visually the fit is terrible. I do a much better job using guess and check. Here I am looking at minimum temperature. I have many years of data, but in the interest of space I only included 1 year.
Here is my optimization code:
TMIN <- function(a,b,x){a*sin(b*x)}
plot(h$mint~h$index,type='l')
curve(TMIN(x, a=20, b=.017),add=TRUE, col="red")
normTMIN<-function(params,k){
a=params[1]
b=params[2]
c=params[3]
Mean<-mean(a*sin(b*k))
-sum(dnorm(k,mean=Mean,sd=c,log=TRUE)) #shape= Mean(a,b)/scale
}
optTMIN <- optim(par=c(a=60,b=.017,c=1),k=test$mint,fn=normTMIN) #par doesn't equal params
optTMIN
curve(TMIN(optTMIN$par[1],optTMIN$par[2],x), add=TRUE,col="blue")
I can't figure out why optim is going so terribly wrong. Thanks in advance.
Do you want to do something like the following (find list square estimate):
head(test)
TMIN <- function(a,b,x){a*sin(b*x)}
plot(test$mint~test$index,type='l')
curve(TMIN(x, a=20, b=.017),add=TRUE, col="red")
normTMIN<-function(params,k,x){
a=params[1]
b=params[2]
sum((k - TMIN(a,b,x))^2)
}
optTMIN <- optim(par=c(a=1,b=0.001),k=test$mint,x=test$index,fn=normTMIN, control=list(trace = TRUE)) #par doesn't equal params
optTMIN
curve(TMIN(optTMIN$par[1],optTMIN$par[2],x), add=TRUE,col="blue")
#$par
# a b
#10.97271664 0.01349994

Weird behaviour (bug?) in car::bcPower

Consider the dataset Kort:
structure(list(V1 = c(-0.03, 0.22, -0.11, -0.01, 0.25, 0.29,
-0.74, 0.23, 0.39, -0.04, 0.18, 0.19, 0.4, 0.21, 0.21, -0.01,
-0.05, 0.02, -0.12, 0.37, -0.07, 0.51, 0.39, 0.14, 0.02, 0.73,
-0.25, 0.44, 0.29), V2 = c(35.39, 34.33, 32.74, 34.72, 33.07,
30.9, 29.89, 31.17, 31.62, 33.13, 30.64, 33.31, 33.61, 34.16,
30.06, 30.06, 31.18, 25.57, 30.52, 32.43, 31.54, 29.6, 34.66,
31.74, 27.22, 41, 32.02, 37.96, 29.25), V3 = c(37.24, 36.77,
37.21, 41.16, 40.3, 42.16, 40.77, 39.59, 37, 38.32, 34.6, 38.1,
36.07, 39.2, 36.97, 38.28, 38.72, 46.81, 39.63, 36, 45.33, 38.72,
36.2, 40.94, 37.7, 42.44, 37.92, 39.87, 37.15), V4 = c(-36L,
-18L, -2L, 20L, 37L, 39L, -7L, 31L, -23L, 32L, 73L, 10L, 14L,
18L, 126L, 98L, 13L, 14L, 15L, 37L, 66L, 3L, -50L, 9L, 6L, -20L,
4L, -26L, -2L), V5 = c(12.4, 10.5, 2.8, 9.5, 9.4, 10.7, 7.5,
14.8, 10.9, 13.5, 11.5, 11.8, 13.6, 8.6, 13.6, 13.1, 14.3, 11.3,
16.1, 14.5, 8.4, 15.4, 13.4, 14, 18.8, 17.4, 16.4, 16, 17.7),
V6 = c(27424L, 25597L, 20968L, 24730L, 25423L, 25801L, 23681L,
29527L, 26228L, 28262L, 27363L, 27134L, 27542L, 24647L, 28260L,
27922L, 29054L, 25650L, 30096L, 29103L, 24112L, 30035L, 28771L,
27818L, 32455L, 29722L, 30508L, 29896L, 31961L), V7 = c(68.8,
70.4, 61.6, 73.5, 71.8, 76.5, 72.7, 75.3, 71.7, 75, 72.9,
73.3, 73.7, 69, 72.7, 74.2, 73.4, 71.2, 76.4, 73, 62.5, 76,
73.7, 74.7, 74.3, 74.8, 74.6, 74.4, 74.4), V8 = c(8.1, 6.8,
11, 5.3, 6.3, 4.1, 5.5, 4, 5.9, 4.3, 5.5, 5.4, 4.2, 8.1,
5.2, 4.8, 4.4, 8.2, 3.8, 5.9, 12.9, 4.3, 5.2, 5, 3.6, 3.8,
4.6, 4.3, 4.5), V9 = c(0.38, 0.15, 0.16, 0.08, 0.12, 0.05,
0.07, 0.04, 0.08, 0.07, 0.13, 0.08, 0.08, 0.26, 0.05, 0.14,
0.05, 0.26, 0.03, 0.18, 0.26, 0.04, 0.04, 0.14, 0.05, 0,
0.02, 0.02, 0.1), V10 = c(9.8, 9.9, 19.4, 7, 9.2, 3, 8.5,
1.1, 3, 2.3, 5.1, 5.6, 1, 22.3, 4.4, 6.2, 2.2, 5.3, 1.5,
5, 18.7, 1.5, 3, 8.9, 1.6, 0, 5.1, 2.1, 3.6), V11 = c(6.3,
7.5, 5.5, 10.2, 5, 9.6, 9.3, 4.8, 4.3, 4.6, 4.1, 5.7, 6.4,
4, 7.2, 4.7, 4.2, 4.5, 7.6, 5.3, 6.2, 4.1, 4.9, 4.1, 5.1,
3.3, 5.4, 5, 5.6), V12 = c(153605L, 152867L, 115972L, 140341L,
139245L, 167038L, 143239L, 179712L, 135273L, 167487L, 160738L,
160648L, 154717L, 118800L, 168954L, 148412L, 147637L, 142615L,
210838L, 161840L, 114310L, 182670L, 160293L, 147747L, 192889L,
191077L, 164107L, 202051L, 192945L)), .Names = c("V1", "V2",
"V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12"
), class = "data.frame", row.names = c(NA, -29L))
Where the response is:
Kort$V12
[1] 153605 152867 115972 140341 139245 167038 143239 179712 135273 167487
[11] 160738 160648 154717 118800 168954 148412 147637 142615 210838 161840
[21] 114310 182670 160293 147747 192889 191077 164107 202051 192945
Doing a box-cox transform, using car::boxcox
boxcox(V12~.,data=Kort,lambda=seq(-4,4,4/10))
yields an optimal parameter of -2. Transforming the response using
car::bcPower
TVP<-bcPower(Kort$V12,lambda=-2)
turns TVP into a vector of constants:
TVP
[1] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
[20] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
but box cox transform should be a continuous map!
I don't think this is a bug, there's simply a limit to how many decimal places are printed out. The help file suggests that the calculation is (U^(lambda)-1)/lambda which is pretty close to 1/2 where U is large. You can see that TVP is being calculated correctly with
TVP-0.5
# [1] -2.119138e-11 -2.139650e-11 -3.717610e-11 ...
or
options(digits=20)
TVP
# [1] 0.49999999997880861802 0.49999999997860350431 0.49999999996282390446 ...

Resources