Hi Everyone i am facing a unique problem . I want to find out Transfer of Inventory based on condition if Quantity required for a particular Item id is more than Stock on hand. we should transfer the inventory from other ID.
For Example. item I60 is available for 7 IDs. For E1, E6 available stock is less than quantity so what i want to do is Transfer the excess inventory from E3 (i.e 6-2 =4) to E1 and E6. So transfer for E1 will be 1 and E6 will be 2 and remaining SOH from E3 will be 3. I hope everyone can understand it.
structure(list(ID = structure(c(1L, 6L, 7L, 3L, 5L, 2L, 4L, 8L,
1L, 7L, 3L, 5L, 2L, 9L, 8L, 1L, 7L, 3L, 5L, 2L, 9L, 8L, 1L, 7L,
3L, 5L, 2L, 9L, 8L, 1L, 7L, 3L, 5L, 2L, 9L, 8L, 1L, 7L, 3L, 5L,
2L, 9L, 8L, 1L, 7L, 3L, 5L, 2L, 9L), .Label = c("E1", "E2", "E3",
"E4", "E5", "E6", "E7", "E8", "E9"), class = "factor"), Item.Code = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), .Label = c("I60", "I67", "I68", "I69", "I70", "I71", "I72"
), class = "factor"), Quantity = c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Stock_on_hand = c(1L,
0L, 2L, 6L, 2L, 2L, 2L, 0L, 6L, 3L, -1L, 1L, 2L, 9L, 1L, 5L,
-1L, 9L, 3L, 38L, 5L, 10L, 2L, 3L, 2L, 2L, 1L, 8L, 0L, 2L, 2L,
4L, 2L, 1L, 5L, 1L, -1L, 4L, 3L, 1L, 2L, 11L, 1L, 2L, 0L, 3L,
1L, 4L, 1L), Transfer = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-49L))
highlighted columns should be generated with R code
Looks like an integer programming problem with Updated_SOH as the variable and objective function minimizing absolute difference between Quantity and SOH subject to constraint that total sum of SOH stays constant.
Here is a heuristic approach to solve this optimization problem:
1) Calculate the difference which is to be used to sort the dataset.
2) In a similar approach as here but shifting positive values and different aggregation, we use these excess SOH to net off SOH deficit in previous rows.
3) The final output is the sum of i) existing Quantity, ii) any unfulfilled Quantity and iii) excess SOH.
setDT(df)
df[, Diff := Stock_on_hand - Quantity]
setorder(df, Item.Code, Diff)
df[, Updated_SOH := {
posVal <- replace(Diff, Diff<0, 0)
negVal <- replace(Diff, Diff>0, 0)
n <- 1L
while (any(negVal < 0) && n < .N) {
negVal <- replace(negVal, negVal>0, 0) +
shift(posVal, 1L, type="lead", fill=0) +
c(posVal[1L], rep(0, .N-1L)) #for case where there are more Quantity than SOH
posVal <- replace(negVal, negVal<0, 0)
n <- n + 1L
}
excess <- negVal[negVal > 0]
Quantity + #existing Quantity
replace(negVal, negVal>0, 0) + #unfulfilled Quantity
c(rep(0, .N - length(excess)), excess) #shifting back down excess SOH
}, by=.(Item.Code)]
output:
ID Item.Code Quantity Stock_on_hand Transfer Diff Updated_SOH
1: E6 I60 2 0 0 -2 2
2: E1 I60 2 1 0 -1 2
3: E7 I60 2 2 0 0 2
4: E5 I60 2 2 0 0 2
5: E2 I60 2 2 0 0 2
6: E4 I60 2 2 0 0 2
7: E3 I60 2 6 0 4 3
8: E3 I67 2 -1 0 -3 2
9: E8 I67 2 0 4 -2 2
10: E5 I67 2 1 0 -1 2
11: E2 I67 2 2 0 0 2
12: E7 I67 2 3 0 1 2
13: E1 I67 2 6 0 4 2
14: E9 I67 2 9 0 7 8
15: E7 I68 2 -1 7 -3 2
16: E8 I68 2 1 3 -1 2
17: E5 I68 2 3 0 1 2
18: E1 I68 2 5 0 3 2
19: E9 I68 2 5 0 3 5
20: E3 I68 2 9 0 7 9
21: E2 I68 4 38 0 34 38
22: E2 I69 2 1 6 -1 2
23: E1 I69 2 2 0 0 2
24: E3 I69 2 2 0 0 2
25: E5 I69 2 2 0 0 2
26: E7 I69 2 3 0 1 2
27: E9 I69 2 8 0 6 8
28: E8 I69 2 10 0 8 10
29: E8 I70 2 0 0 -2 2
30: E2 I70 2 1 3 -1 2
31: E1 I70 2 2 0 0 2
32: E7 I70 2 2 0 0 2
33: E5 I70 2 2 0 0 2
34: E3 I70 2 4 0 2 2
35: E9 I70 2 5 0 3 4
36: E1 I71 2 -1 2 -3 2
37: E8 I71 2 1 0 -1 2
38: E5 I71 2 1 0 -1 2
39: E2 I71 2 2 0 0 2
40: E3 I71 2 3 0 1 2
41: E7 I71 2 4 0 2 2
42: E9 I71 2 11 0 9 9
43: E7 I72 2 0 1 -2 0
44: E8 I72 2 1 0 -1 2
45: E5 I72 2 1 2 -1 2
46: E9 I72 2 1 0 -1 2
47: E1 I72 2 2 0 0 2
48: E3 I72 2 3 0 1 2
49: E2 I72 2 4 0 2 2
ID Item.Code Quantity Stock_on_hand Transfer Diff Updated_SOH
data:
library(data.table)
df <- structure(list(ID = structure(c(1L, 6L, 7L, 3L, 5L, 2L, 4L, 8L,
1L, 7L, 3L, 5L, 2L, 9L, 8L, 1L, 7L, 3L, 5L, 2L, 9L, 8L, 1L, 7L,
3L, 5L, 2L, 9L, 8L, 1L, 7L, 3L, 5L, 2L, 9L, 8L, 1L, 7L, 3L, 5L,
2L, 9L, 8L, 1L, 7L, 3L, 5L, 2L, 9L), .Label = c("E1", "E2", "E3",
"E4", "E5", "E6", "E7", "E8", "E9"), class = "factor"), Item.Code = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), .Label = c("I60", "I67", "I68", "I69", "I70", "I71", "I72"
), class = "factor"), Quantity = c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Stock_on_hand = c(1L,
0L, 2L, 6L, 2L, 2L, 2L, 0L, 6L, 3L, -1L, 1L, 2L, 9L, 1L, 5L,
-1L, 9L, 3L, 38L, 5L, 10L, 2L, 3L, 2L, 2L, 1L, 8L, 0L, 2L, 2L,
4L, 2L, 1L, 5L, 1L, -1L, 4L, 3L, 1L, 2L, 11L, 1L, 2L, 0L, 3L,
1L, 4L, 1L), Transfer = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-49L))
Related
I have fish count data and am trying to create a new dataframe using averages of the measurements based on conditions of two different columns. here is my data:
df <- structure(list(SITE = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor"),
ZONE = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("CREST", "INNER_FLAT", "MID_FLAT",
"OUTER_FLAT", "SLOPE"), class = "factor"), C_TOTAL = c(44L,
7L, 20L, 14L, 0L, 4L, 2L, 3L, 1L, 8L, 28L, 24L, 31L, 12L,
33L, 6L, 16L, 33L, 75L, 21L, 60L, 81L, 37L, 89L, 21L, 35L,
71L, 5L, 2L, 0L, 0L, 10L, 23L, 0L, 5L, 11L, 3L, 1L, 5L, 0L,
0L, 8L, 7L, 6L, 42L), C_M2 = c(0.210465706, 0.029861994,
0.090324177, 0.066599319, 0, 0.022092452, 0.011750593, 0.015245519,
0.004710433, 0.033111594, 0.155094195, 0.110576495, 0.193659068,
0.059152822, 0.192379108, 0.047800772, 0.08917095, 0.141336411,
0.402538785, 0.130438337, 0.315206235, 0.460746849, 0.278643938,
0.467754275, 0.192830321, 0.119928472, 0.411502497, 0.015370489,
0.005150184, 0, 0, 0.034651441, 0.067824733, 0, 0.009805851,
0.034844309, 0.010614352, 0.004131048, 0.01850898, 0, 0,
0.029195413, 0.021409016, 0.030498145, 0.172406074), TRANS_A = c(209.0601875,
234.411677, 221.4246571, 210.2123593, 226.6158348, 181.0573136,
170.2041767, 196.7791332, 212.294701, 241.6072127, 180.5354478,
217.0443184, 160.0751279, 202.8643689, 171.536298, 125.5209863,
179.4306337, 233.485481, 186.3174499, 160.9956132, 190.3515643,
175.801528, 132.7859497, 190.2708425, 108.9040348, 291.8406241,
172.5384427, 325.2986863, 388.3356059, 303.1957479, 261.1574528,
288.5882879, 339.1093313, 239.1118021, 509.89965, 315.6899993,
282.6362022, 242.0693453, 270.1391425, 294.8864591, 321.2013381,
274.0156514, 326.9650539, 196.7332763, 243.6109069), SCARID_T = c(35L,
4L, 4L, 13L, 0L, 4L, 2L, 0L, 1L, 4L, 20L, 12L, 17L, 5L, 20L,
6L, 6L, 18L, 63L, 11L, 41L, 75L, 34L, 89L, 14L, 33L, 68L,
0L, 0L, 0L, 0L, 10L, 22L, 0L, 0L, 10L, 0L, 0L, 1L, 0L, 0L,
6L, 0L, 4L, 42L), ACAN_T = c(4L, 0L, 11L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 8L, 5L, 0L, 0L, 0L, 0L, 3L, 2L, 7L, 8L, 8L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L,
0L, 1L, 0L, 0L, 2L, 0L, 0L, 0L), SIG_T = c(5L, 3L, 5L, 1L,
0L, 0L, 0L, 3L, 0L, 4L, 0L, 7L, 14L, 7L, 13L, 0L, 7L, 13L,
5L, 2L, 11L, 5L, 2L, 0L, 7L, 1L, 3L, 5L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 3L, 1L, 3L, 0L, 0L, 0L, 7L, 2L, 0L)), row.names = c(NA,
-45L), class = "data.frame")
I want to average all the measurements by each zone, but also according to site. So I want anew data frame where each site has one measurement for each zone.
Can anyone help me? Thanks!
library(dplyr)
df %>%
group_by(SITE, ZONE) %>%
summarise(
across(where(is.numeric), mean)
)
# A tibble: 15 x 8
# Groups: SITE [3]
SITE ZONE C_TOTAL C_M2 TRANS_A SCARID_T ACAN_T SIG_T
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 CREST 20 0.0996 213. 12 4.33 3.67
2 1 INNER_FLAT 3 0.0111 265. 0.333 0.333 2.33
3 1 MID_FLAT 2.33 0.00684 339. 0 0 2.33
4 1 OUTER_FLAT 52 0.283 179. 38.3 7.67 6
5 1 SLOPE 23.7 0.110 222. 14.3 5 4.33
6 2 CREST 25.3 0.148 178. 14 0 11.3
7 2 INNER_FLAT 2.67 0.00973 297. 2 0.667 0
8 2 MID_FLAT 11 0.0342 296. 10.7 0.333 0
9 2 OUTER_FLAT 69 0.402 166. 66 0.667 2.33
10 2 SLOPE 6 0.0296 206. 5.67 0 0.333
11 3 CREST 18.3 0.0928 179. 10 1.67 6.67
12 3 INNER_FLAT 18.3 0.0748 256. 15.3 0 3
13 3 MID_FLAT 5.33 0.0149 355. 3.33 1.67 0.333
14 3 OUTER_FLAT 42.3 0.241 191. 38.3 0.333 3.67
15 3 SLOPE 2 0.0106 193. 1 0 1
I have a dataframe df with following information:
df <- structure(list(Samples = structure(c(1L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 2L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 2L, 1L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 2L, 1L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 2L), .Label = c("Sample1", "Sample10", "Sample2",
"Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8",
"Sample9"), class = "factor"), patient.vital_status = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L), years = c(3.909589041, 1.457534247,
2.336986301, 5.010958904, 1.665753425, 1.81369863, 1.191780822,
4.687671233, 2.167123288, 1.95890411, 3.909589041, 1.457534247,
2.336986301, 5.010958904, 1.665753425, 1.81369863, 1.191780822,
4.687671233, 2.167123288, 1.95890411, 3.909589041, 1.457534247,
2.336986301, 5.010958904, 1.665753425, 1.81369863, 1.191780822,
4.687671233, 2.167123288, 1.95890411, 3.909589041, 1.457534247,
2.336986301, 5.010958904, 1.665753425, 1.81369863, 1.191780822,
4.687671233, 2.167123288, 1.95890411), Genes = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A1BG", "A1CF", "A2M",
"A2ML1"), class = "factor"), value = c(0.034459012, 0.017698878,
0.023313851, 0.010456762, 0.032674019, 0.037561831, 0.03380681,
0, 0.019954956, 0.012392427, 0.835801613, 2.265192447, 2.431409095,
5.012117956, 2.139962802, 2.371946704, 4.555234385, 0.550293401,
0.924012327, 2.274642129, 92.85639578, 79.50897642, 23.72187602,
26.86025304, 32.80504253, 222.6449054, 71.78812505, 45.76371588,
29.93976676, 22.97515484, 0.03780441, 0.005825143, 0, 0.002867985,
0.011948708, 0.02060423, 0.004636111, 0.015903347, 0.005473063,
0.033988816)), class = "data.frame", row.names = c(NA, -40L))
I want to loop over the information based on the columns Genes and value and get a result. And again I want the result to be added to the dataframe df. The result will be with low or high.
I'm trying to do this with the following code, but it doesn't work:
genes <- as.character(unique(df$Genes))
library(survival)
library(survminer)
for(i in genes){
surv_rnaseq.cut <- surv_cutpoint(
df,
time = "years",
event = "patient.vital_status",
variables = c("Genes","value"))
df$cat <- surv_categorize(surv_rnaseq.cut)
}
Along with the above result I also wanted the summary for surv_rnaseq.cut for all the four genes with mentioning its name.
Any help please. thanq
An option would be to split by 'genes' (group_split), loop over the list, apply the functions and bind the list elements after creating the column
library(survminer)
library(survival)
library(dplyr)
library(purrr)
df %>%
group_split(Genes) %>%
map_dfr(~ surv_cutpoint(.x,
time = "years",
event = "patient.vital_status",
variables = c("Genes", "value")) %>%
surv_categorize %>%
pull(value) %>%
mutate(.x, cat = .))
# A tibble: 40 x 6
# Samples patient.vital_status years Genes value cat
# <fct> <int> <dbl> <fct> <dbl> <chr>
# 1 Sample1 0 3.91 A1BG 0.0345 high
# 2 Sample2 0 1.46 A1BG 0.0177 high
# 3 Sample3 0 2.34 A1BG 0.0233 high
# 4 Sample4 0 5.01 A1BG 0.0105 high
# 5 Sample5 0 1.67 A1BG 0.0327 high
# 6 Sample6 0 1.81 A1BG 0.0376 high
# 7 Sample7 0 1.19 A1BG 0.0338 high
# 8 Sample8 1 4.69 A1BG 0 low
# 9 Sample9 0 2.17 A1BG 0.0200 high
#10 Sample10 1 1.96 A1BG 0.0124 high
# … with 30 more rows
My data looks like this
df<- structure(list(Main = structure(c(5L, 3L, 1L, 2L, 4L, 4L, 2L,
1L, 5L, 2L, 5L, 4L, 5L, 2L), .Label = c("IsMainbody", "IsMainbodyCandidate",
"IsMainbodyRejected", "Main", "None"), class = "factor"), Group.IDs = c(52L,
NA, 2L, 12L, 38L, 38L, 6L, 3L, NA, 49L, 20L, 38L, 54L, 85L),
X..Number1 = c(12L, 6L, 1L, 5L, 1L, 1L, 1L, 1L, 17L, 1L,
4L, 1L, 1L, 4L), X..No = c(20L, 62L, 2L, 16L, 3L, 3L, 1L,
3L, 32L, 3L, 36L, 3L, 1L, 20L), X..Unique.N = c(0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-14L))
I am trying to find the row number of for specific strings.
Based on main column, I want to find this how many of my sample has "Main" , how many have "IsmainbodyCandidate" and how many are "IsMainbodyRejected"
Then I want to make a new dataset that only consists of Main and Ismainbody and Ismainbodycandidates like below .
Main Group IDs # Number1 # No # Unique N
IsMainbody. 2 1 2 1
IsMainbodyCandidate 12 5 16 0
Main 38 1 3 0
Main 38 1 3 0
IsMainbodyCandidate 6 1 1 0
IsMainbody 3 1 3 0
IsMainbodyCandidate 49 1 3 0
IsMainbodyCandidate 85 4 20 0
# count by main
table(df$Main)
# new dataframe without "None"
df[df$Main != "None", ]
# or more explicitly
df[df$Main %in% c("Main", "IsMainbody", "IsMainbodyCandidate"), ]
I am building an r-script that uses all the possible combinations of 4 different variables: Rounds, Stations, and Players (Male and Female). Below is my dput version of the dataset:
structure(list(x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("round1", "round2", "round3"
), class = "factor"), x.x = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("station1", "station2",
"station3"), class = "factor"), x.y = structure(c(1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L), .Label = c("male1",
"male2", "male3", "male4", "male5", "male6"), class = "factor"),
y = structure(c(2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 4L, 5L, 6L, 1L,
2L, 4L, 5L, 6L, 1L, 2L, 3L, 5L, 6L, 1L, 2L, 3L, 4L, 6L, 1L,
2L, 3L, 4L, 5L), .Label = c("female1", "female2", "female3",
"female4", "female5", "female6"), class = "factor")), .Names = c("x",
"x.x", "x.y", "y"), row.names = c(55L, 109L, 163L, 217L, 271L,
10L, 118L, 172L, 226L, 280L, 19L, 73L, 181L, 235L, 289L, 28L,
82L, 136L, 244L, 298L, 37L, 91L, 145L, 199L, 307L, 46L, 100L,
154L, 208L, 262L, 58L, 112L, 166L, 220L, 274L, 13L, 121L, 175L,
229L, 283L, 22L, 76L, 184L, 238L, 292L, 31L, 85L, 139L, 247L,
301L, 40L, 94L, 148L, 202L, 310L, 49L, 103L, 157L, 211L, 265L,
61L, 115L, 169L, 223L, 277L, 16L, 124L, 178L, 232L, 286L, 25L,
79L, 187L, 241L, 295L, 34L, 88L, 142L, 250L, 304L, 43L, 97L,
151L, 205L, 313L, 52L, 106L, 160L, 214L, 268L, 56L, 110L, 164L,
218L, 272L, 11L, 119L, 173L, 227L, 281L, 20L, 74L, 182L, 236L,
290L, 29L, 83L, 137L, 245L, 299L, 38L, 92L, 146L, 200L, 308L,
47L, 101L, 155L, 209L, 263L, 59L, 113L, 167L, 221L, 275L, 14L,
122L, 176L, 230L, 284L, 23L, 77L, 185L, 239L, 293L, 32L, 86L,
140L, 248L, 302L, 41L, 95L, 149L, 203L, 311L, 50L, 104L, 158L,
212L, 266L, 62L, 116L, 170L, 224L, 278L, 17L, 125L, 179L, 233L,
287L, 26L, 80L, 188L, 242L, 296L, 35L, 89L, 143L, 251L, 305L,
44L, 98L, 152L, 206L, 314L, 53L, 107L, 161L, 215L, 269L, 57L,
111L, 165L, 219L, 273L, 12L, 120L, 174L, 228L, 282L, 21L, 75L,
183L, 237L, 291L, 30L, 84L, 138L, 246L, 300L, 39L, 93L, 147L,
201L, 309L, 48L, 102L, 156L, 210L, 264L, 60L, 114L, 168L, 222L,
276L, 15L, 123L, 177L, 231L, 285L, 24L, 78L, 186L, 240L, 294L,
33L, 87L, 141L, 249L, 303L, 42L, 96L, 150L, 204L, 312L, 51L,
105L, 159L, 213L, 267L, 63L, 117L, 171L, 225L, 279L, 18L, 126L,
180L, 234L, 288L, 27L, 81L, 189L, 243L, 297L, 36L, 90L, 144L,
252L, 306L, 45L, 99L, 153L, 207L, 315L, 54L, 108L, 162L, 216L,
270L), class = "data.frame")`
Below are the first 10 rows:
Round Station Partner1 Partner2
55 round1 station1 male1 female2
109 round1 station1 male1 female3
163 round1 station1 male1 female4
217 round1 station1 male1 female5
271 round1 station1 male1 female6
10 round1 station1 male2 female1
118 round1 station1 male2 female3
172 round1 station1 male2 female4
226 round1 station1 male2 female5
280 round1 station1 male2 female6
In this dataset there are the following:
3 Rounds, 3 Stations, 6 Male, 6 Female
How do I do the following:
1) No person should be in the same station twice
2) No person should be in the same round twice
3) No person should be partnered with the same person twice
4) Each station requires 2 male and 2 female
5) Ideally, each person should be with people in a round that have not been with them in previous rounds.
6) A player with the same number at the end (male1, female1) should never be partnered
So the final output should look something like this for one of the rounds:
Round Station Partner1 Partner2
55 round1 station1 male1 female5
109 round1 station1 male3 female4
163 round1 station2 male2 female3
217 round1 station2 male4 female1
271 round1 station3 male5 female6
10 round1 station3 male6 female2
For round2, all 3 stations should still be present but the male and female partners need to change and should follow the rules described above.
Also, let me know if this is mathematically impossible without adding additional stations.
Any help would be great!
Here's a simple solution. I ignore (5) because it does not seem to be a strict requirement (and I don't think it is possible as a strict rule).
This function takes a valid first round as input, and outputs a good configuration for the second round (and the third, if applied again).
I modified your data to have numbers which are much easier to manipulate than strings. Strictly speaking, only the Partner columns (renamed to Male and Female) need to be numbers for the function to work.
# data
r1 = read.table(text = " Round Station Partner1 Partner2
55 round1 station1 male1 female5
109 round1 station1 male3 female4
163 round1 station2 male2 female3
217 round1 station2 male4 female1
271 round1 station3 male5 female6
10 round1 station3 male6 female2", header = T)
# numericize
r1$Round = as.numeric(substr(r1$Round, 6, 6))
r1$Station = as.numeric(substr(r1$Station, start = 8, stop = 8))
r1$Male = as.numeric(substr(r1$Partner1, 5, 5))
r1$Female = as.numeric(substr(r1$Partner2, 7, 7))
r1[c("Partner1", "Partner2")] = list(NULL)
# function
next_round = function(r) {
r$Male = r$Male[c(3, 4, 5, 6, 1, 2)]
r$Female = r$Female[c(5, 6, 1, 2, 3, 4)]
problems = which(r$Male == r$Female)
# switch male problems
switch_with = ifelse(problems %% 2 == 0, problems - 1, problems + 1)
r$Male[c(rbind(switch_with, problems))] = r$Male[c(rbind(problems, switch_with))]
# increment round
r$Round = r$Round + 1
return(r)
}
# demonstration
r1
# Round Station Male Female
# 55 1 1 1 5
# 109 1 1 3 4
# 163 1 2 4 3
# 217 1 2 2 1
# 271 1 3 5 6
# 10 1 3 6 2
(r2 = next_round(r1))
# Round Station Male Female
# 55 2 1 2 6
# 109 2 1 4 2
# 163 2 2 6 5
# 217 2 2 5 4
# 271 2 3 1 3
# 10 2 3 3 1
(r3 = next_round(r2))
# Round Station Male Female
# 55 3 1 6 3
# 109 3 1 5 1
# 163 3 2 1 6
# 217 3 2 3 2
# 271 3 3 4 5
# 10 3 3 2 4
Essentially what this does is that the males and females stay with the same-sex partners from their starting stations. Males move down one station number each round, females move up one station number each round, and if there is ever a Male = Female collision the males at that station switch to correct it. I was inspired by square dancing, where males and females moving in different directions to rotate partners is common.
There is probably a really simple solution to this problem, but I couldn't find it from googling, or the data.table FAQ.
I have a data.table like so:
> test
chr bp ID REF ALT AF AC AN EFFECT IMPACT FUNCLASS CODING GENE pos effRank
1: 1 860416 rs61464428 G A 0.5000000 14 28 UPSTREAM MODIFIER CODING SAMD11 1:860416 21
2: 1 860416 rs61464428 G A 0.5000000 14 28 UPSTREAM MODIFIER CODING SAMD11 1:860416 21
3: 1 860416 rs61464428 G A 0.5000000 14 28 DOWNSTREAM MODIFIER CODING AL645608.1 1:860416 22
4: 1 860461 rs57465118 G A 1.0000000 62 62 UPSTREAM MODIFIER CODING SAMD11 1:860461 21
5: 1 860461 rs57465118 G A 1.0000000 62 62 UPSTREAM MODIFIER CODING SAMD11 1:860461 21
6: 1 860461 rs57465118 G A 1.0000000 62 62 DOWNSTREAM MODIFIER CODING AL645608.1 1:860461 22
7: 1 860521 rs57924093 C A 0.9840000 61 62 UPSTREAM MODIFIER CODING SAMD11 1:860521 21
8: 1 860521 rs57924093 C A 0.9840000 61 62 UPSTREAM MODIFIER CODING SAMD11 1:860521 21
9: 1 860521 rs57924093 C A 0.9840000 61 62 DOWNSTREAM MODIFIER CODING AL645608.1 1:860521 22
10: 1 861261 rs144896029 G A 0.0027270 3 1100 UPSTREAM MODIFIER CODING SAMD11 1:861261 21
11: 1 861261 rs144896029 G A 0.0027270 3 1100 DOWNSTREAM MODIFIER CODING AL645608.1 1:861261 22
12: 1 861332 G A 0.0009074 1 1102 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING AL645608.1 1:861332 11
13: 1 861332 G A 0.0009074 1 1102 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING SAMD11 1:861332 11
14: 1 861332 G A 0.0009074 1 1102 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING SAMD11 1:861332 11
15: 1 861332 G A 0.0009074 1 1102 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING SAMD11 1:861332 11
16: 1 861332 G A 0.0009074 1 1102 UPSTREAM MODIFIER CODING SAMD11 1:861332 21
17: 1 865455 C G 0.0033190 3 904 UPSTREAM MODIFIER CODING SAMD11 1:865455 21
18: 1 865628 rs41285790 G A 0.0027780 3 1080 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING SAMD11 1:865628 11
19: 1 865628 rs41285790 G A 0.0027780 3 1080 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING SAMD11 1:865628 11
20: 1 865628 rs41285790 G A 0.0027780 3 1080 NON_SYNONYMOUS_CODING MODERATE MISSENSE CODING SAMD11 1:865628 11
21: 1 865628 rs41285790 G A 0.0027780 3 1080 SYNONYMOUS_CODING LOW SILENT CODING AL645608.1 1:865628 14
22: 1 865628 rs41285790 G A 0.0027780 3 1080 UPSTREAM MODIFIER CODING SAMD11 1:865628 21
23: 1 866437 rs139076934 C T 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING AL645608.1 1:866437 14
24: 1 866437 rs139076934 C T 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:866437 14
25: 1 866437 rs139076934 C T 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:866437 14
26: 1 866437 rs139076934 C T 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:866437 14
27: 1 866461 rs148884928 G A 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:866461 14
28: 1 866461 rs148884928 G A 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:866461 14
29: 1 866461 rs148884928 G A 0.0009074 1 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:866461 14
30: 1 866461 rs148884928 G A 0.0009074 1 1102 UPSTREAM MODIFIER CODING AL645608.1 1:866461 21
31: 1 866511 rs71576583 CCCCT CCCCTCCCT 1.0000000 148 148 UPSTREAM MODIFIER CODING AL645608.1 1:866511 21
32: 1 871057 C T 0.0009074 1 1102 UPSTREAM MODIFIER CODING SAMD11 1:871057 21
33: 1 871057 C T 0.0009074 1 1102 UPSTREAM MODIFIER CODING AL645608.1 1:871057 21
34: 1 871057 C T 0.0009074 1 1102 UPSTREAM MODIFIER CODING SAMD11 1:871057 21
35: 1 871215 rs28419423 C G 0.0036300 4 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:871215 14
36: 1 871215 rs28419423 C G 0.0036300 4 1102 SYNONYMOUS_CODING LOW SILENT CODING SAMD11 1:871215 14
37: 1 871215 rs28419423 C G 0.0036300 4 1102 UPSTREAM MODIFIER CODING SAMD11 1:871215 21
38: 1 871215 rs28419423 C G 0.0036300 4 1102 UPSTREAM MODIFIER CODING SAMD11 1:871215 21
39: 1 871215 rs28419423 C G 0.0036300 4 1102 UPSTREAM MODIFIER CODING AL645608.1 1:871215 21
40: 1 871215 rs28419423 C G 0.0036300 4 1102 DOWNSTREAM MODIFIER CODING SAMD11 1:871215 22
41: 1 871287 C G 0.0009107 1 1098 UPSTREAM MODIFIER CODING SAMD11 1:871287 21
42: 1 871287 C G 0.0009107 1 1098 UPSTREAM MODIFIER CODING SAMD11 1:871287 21
43: 1 871287 C G 0.0009107 1 1098 UPSTREAM MODIFIER CODING AL645608.1 1:871287 21
44: 1 871287 C G 0.0009107 1 1098 DOWNSTREAM MODIFIER CODING SAMD11 1:871287 22
45: 1 871334 rs4072383 G T 0.6680000 474 710 UPSTREAM MODIFIER CODING SAMD11 1:871334 21
46: 1 871334 rs4072383 G T 0.6680000 474 710 UPSTREAM MODIFIER CODING SAMD11 1:871334 21
47: 1 871334 rs4072383 G T 0.6680000 474 710 UPSTREAM MODIFIER CODING AL645608.1 1:871334 21
48: 1 871334 rs4072383 G T 0.6680000 474 710 DOWNSTREAM MODIFIER CODING SAMD11 1:871334 22
49: 1 874415 rs74047412 C T 0.0018250 2 1096 UPSTREAM MODIFIER CODING SAMD11 1:874415 21
50: 1 874415 rs74047412 C T 0.0018250 2 1096 UPSTREAM MODIFIER CODING SAMD11 1:874415 21
chr bp ID REF ALT AF AC AN EFFECT IMPACT FUNCLASS CODING GENE pos effRank
As you can see, the values in the many of the rows are repeats, for some of the columns. What I want to do is remove the duplicated rows, based on the value (the min) of the effRank variable. I have set the key to be chr, bp, and effRank. So the table should be sorted on the basis of those three columns. I got kind of close. The following command returns the rows that I want, but does not return all columns, which I want.
> test[,min(effRank), by=pos]
pos V1
1: 1:860416 21
2: 1:860461 21
3: 1:860521 21
4: 1:861261 21
5: 1:861332 11
6: 1:865455 21
7: 1:865628 11
8: 1:866437 14
9: 1:866461 14
10: 1:866511 21
11: 1:871057 21
12: 1:871215 14
13: 1:871287 21
14: 1:871334 21
15: 1:874415 21
All I need is a way to make the above command return all columns in the data.table, not just the ones mentioned in the expressions. Otherwise, works perfectly. Any help is appreciated. The output of dput is below, for those that with to make their own example.
Cheers,
Davy
> dput(test)
structure(list(chr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), bp = c(860416L, 860416L, 860416L,
860461L, 860461L, 860461L, 860521L, 860521L, 860521L, 861261L,
861261L, 861332L, 861332L, 861332L, 861332L, 861332L, 865455L,
865628L, 865628L, 865628L, 865628L, 865628L, 866437L, 866437L,
866437L, 866437L, 866461L, 866461L, 866461L, 866461L, 866511L,
871057L, 871057L, 871057L, 871215L, 871215L, 871215L, 871215L,
871215L, 871215L, 871287L, 871287L, 871287L, 871287L, 871334L,
871334L, 871334L, 871334L, 874415L, 874415L), ID = structure(c(10L,
10L, 10L, 8L, 8L, 8L, 9L, 9L, 9L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 11L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 6L, 6L, 6L,
6L, 12L, 12L), .Label = c("", "rs139076934", "rs144896029", "rs148884928",
"rs28419423", "rs4072383", "rs41285790", "rs57465118", "rs57924093",
"rs61464428", "rs71576583", "rs74047412"), class = "factor"),
REF = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("C",
"CCCCT", "G"), class = "factor"), ALT = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("A", "CCCCTCCCT", "G", "T"), class = "factor"),
AF = c(0.5, 0.5, 0.5, 1, 1, 1, 0.984, 0.984, 0.984, 0.002727,
0.002727, 0.0009074, 0.0009074, 0.0009074, 0.0009074, 0.0009074,
0.003319, 0.002778, 0.002778, 0.002778, 0.002778, 0.002778,
0.0009074, 0.0009074, 0.0009074, 0.0009074, 0.0009074, 0.0009074,
0.0009074, 0.0009074, 1, 0.0009074, 0.0009074, 0.0009074,
0.00363, 0.00363, 0.00363, 0.00363, 0.00363, 0.00363, 0.0009107,
0.0009107, 0.0009107, 0.0009107, 0.668, 0.668, 0.668, 0.668,
0.001825, 0.001825), AC = c(14L, 14L, 14L, 62L, 62L, 62L,
61L, 61L, 61L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 148L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 474L, 474L, 474L,
474L, 2L, 2L), AN = c(28L, 28L, 28L, 62L, 62L, 62L, 62L,
62L, 62L, 1100L, 1100L, 1102L, 1102L, 1102L, 1102L, 1102L,
904L, 1080L, 1080L, 1080L, 1080L, 1080L, 1102L, 1102L, 1102L,
1102L, 1102L, 1102L, 1102L, 1102L, 148L, 1102L, 1102L, 1102L,
1102L, 1102L, 1102L, 1102L, 1102L, 1102L, 1098L, 1098L, 1098L,
1098L, 710L, 710L, 710L, 710L, 1096L, 1096L), EFFECT = structure(c(4L,
4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 4L, 1L, 2L, 2L, 2L, 2L, 4L,
4L, 2L, 2L, 2L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 1L, 4L, 4L,
4L, 1L, 4L, 4L), .Label = c("DOWNSTREAM", "NON_SYNONYMOUS_CODING",
"SYNONYMOUS_CODING", "UPSTREAM"), class = "factor"), IMPACT = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L,
3L, 2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("LOW", "MODERATE", "MODIFIER"
), class = "factor"), FUNCLASS = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("", "MISSENSE", "SILENT"), class = "factor"),
CODING = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CODING", class = "factor"),
GENE = structure(c(2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("AL645608.1",
"SAMD11"), class = "factor"), pos = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L), .Label = c("1:860416", "1:860461",
"1:860521", "1:861261", "1:861332", "1:865455", "1:865628",
"1:866437", "1:866461", "1:866511", "1:871057", "1:871215",
"1:871287", "1:871334", "1:874415"), class = "factor"), effRank = c(21L,
21L, 22L, 21L, 21L, 22L, 21L, 21L, 22L, 21L, 22L, 11L, 11L,
11L, 11L, 21L, 21L, 11L, 11L, 11L, 14L, 21L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 21L, 21L, 21L, 21L, 21L, 14L, 14L, 21L,
21L, 21L, 22L, 21L, 21L, 21L, 22L, 21L, 21L, 21L, 22L, 21L,
21L)), .Names = c("chr", "bp", "ID", "REF", "ALT", "AF",
"AC", "AN", "EFFECT", "IMPACT", "FUNCLASS", "CODING", "GENE",
"pos", "effRank"), row.names = c(NA, -50L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000004260788>, sorted = c("chr",
"bp", "effRank"))
You can use the internal variable .I, which gives the row number. Then subset using those values, as follows:
DT[DT[, .I[which.min(effRank)], pos]$V1]
It's easier to understand if you write it in two lines as follows:
tmp <- DT[, .I[which.min(effRank)], pos]
DT[tmp$V1]
The first line generates a column V1 with all the row numbers of the minimum positions (from your j expression) grouped by pos. Then you just subset them.