loop to paste columns together if condition is met - r

I would like to create a loop that will create a new column, then paste together two columns if a condition is met in a separate column. If the condition is not met, then the column would equal whatever value is in the existing column. Finally, I would like to delete the old columns and rename the new columns to match the old columns. In my example below, I create columns called a1_t, a2_t, a3_t. Then, if a1 == A, paste a1 and a1_c together and place the value in a1_t, otherwise copy the value from a1 into a1_t. Repeat this procedure for a2_t and a3_t.
Here is the data:
set.seed(1)
dat <- data.frame(a1 = sample(LETTERS[1:9],15,replace=T),
a1_c = sample (1:100,15),
a2 = sample(LETTERS[1:9],15,replace=T),
a2_c = sample (1:100,15),
a3 = sample(LETTERS[1:9],15,replace=T),
a3_c = sample (1:100,15))
Here is the long hand way of creating my end goal:
dat$a1_t <- 'none'
dat$a1_t[dat$a1=="A"] <- paste((dat$a1[dat$a1=="A"]),(dat$a1_c[dat$a1=="A"]),sep="_")
dat$a1_t[dat$a1=="B"] <- 'B'
dat$a1_t[dat$a1=="C"] <- 'C'
dat$a1_t[dat$a1=="D"] <- 'D'
dat$a1_t[dat$a1=="E"] <- 'E'
dat$a1_t[dat$a1=="F"] <- 'F'
dat$a1_t[dat$a1=="G"] <- 'G'
dat$a1_t[dat$a1=="H"] <- 'H'
dat$a1_t[dat$a1=="I"] <- 'I'
dat$a2_t <- 'none'
dat$a2_t[dat$a2=="A"] <- paste((dat$a2[dat$a2=="A"]),(dat$a2_c[dat$a2=="A"]),sep="_")
dat$a2_t[dat$a2=="B"] <- 'B'
dat$a2_t[dat$a2=="C"] <- 'C'
dat$a2_t[dat$a2=="D"] <- 'D'
dat$a2_t[dat$a2=="E"] <- 'E'
dat$a2_t[dat$a2=="F"] <- 'F'
dat$a2_t[dat$a2=="G"] <- 'G'
dat$a2_t[dat$a2=="H"] <- 'H'
dat$a2_t[dat$a2=="I"] <- 'I'
dat$a3_t <- 'none'
dat$a3_t[dat$a3=="A"] <- paste((dat$a3[dat$a3=="A"]),(dat$a3_c[dat$a3=="A"]),sep="_")
dat$a3_t[dat$a3=="B"] <- 'B'
dat$a3_t[dat$a3=="C"] <- 'C'
dat$a3_t[dat$a3=="D"] <- 'D'
dat$a3_t[dat$a3=="E"] <- 'E'
dat$a3_t[dat$a3=="F"] <- 'F'
dat$a3_t[dat$a3=="G"] <- 'G'
dat$a3_t[dat$a3=="H"] <- 'H'
dat$a3_t[dat$a3=="I"] <- 'I'
-al

If you are dealing with a small number of columns, you might just want to use within and ifelse, like this:
within(dat, {
a1_t <- ifelse(a1 == "A", paste(a1, a1_c, sep = "_"),
as.character(a1))
a2_t <- ifelse(a2 == "A", paste(a2, a2_c, sep = "_"),
as.character(a2))
a3_t <- ifelse(a3 == "A", paste(a3, a3_c, sep = "_"),
as.character(a3))
})
You can, however, extend the idea programatically, if necessary.
Ive added comments throughout the code below so you can see what it's doing.
## What variables are we checking?
checkMe <- c("a1", "a2", "a3")
## Let's convert those to character first
dat[checkMe] <- lapply(dat[checkMe], as.character)
cbind(dat, ## We'll combine the original data using cbind
setNames( ## setNames is for the resulting column names
lapply(checkMe, function(x) { ## lapply is an optimized loop
Get <- c(x, paste0(x, "_c")) ## We need this for the "if" part
ifelse(dat[, x] == "A", ## logical comparison
## if matched, paste together the value from
## the relevant column
paste(dat[, Get[1]], dat[, Get[2]], sep = "_"),
dat[, x]) ## else return the original value
}),
paste0(checkMe, "_t"))) ## the column names we want
# a1 a1_c a2 a2_c a3 a3_c a1_t a2_t a3_t
# 1 C 50 E 79 I 90 C E I
# 2 D 72 F 3 C 86 D F C
# 3 F 98 E 47 E 39 F E E
# 4 I 37 B 72 C 76 I B C
# 5 B 75 H 67 F 93 B H F
# 6 I 89 G 46 C 42 I G C
# 7 I 20 H 81 E 67 I H E
# 8 F 61 A 41 G 38 F A_41 G
# 9 F 12 G 23 A 30 F G A_30
# 10 A 25 D 7 H 69 A_25 D H
# 11 B 35 H 9 D 19 B H D
# 12 B 2 F 29 H 64 B F H
# 13 G 34 H 95 D 11 G H D
# 14 D 76 E 58 D 22 D E D
# 15 G 30 E 35 E 13 G E E

Related

Run Functions on Multiple Columns of Multiple Data Frames

I have the following data frames:
a = sample(letters[1:10])
b = sample(1:1000000, 10)
c = sample(1:100000000, 10)
d = sample(letters[1:26], 10)
e = sample(1:1000000, 10)
f = sample(1:100000000, 10)
g = sample(letters[1:26], 10)
h = sample(1:1000000, 10)
i = sample(1:100000000, 10)
data = data.frame(a,b,c)
data2 = data.frame(d,e,f)
data3 = data.frame(g,h,i)
data
Col1 Col2 Col3
1 a 626275 52114901
2 j 26543 70683919
3 c 8953 284605
4 h 822415 35245405
5 f 595095 81093354
6 i 812429 71119567
7 g 100678 87776459
8 e 54772 9709717
9 d 19375 43611618
10 b 174711 7254034
data2
Col1 Col2 Col3
1 y 12495 78654339
2 p 423945 79628767
3 k 378461 36729002
4 x 795469 98196961
5 h 240119 71903172
6 v 691621 74276314
7 d 702074 64715230
8 n 718401 21247081
9 s 580166 52888162
10 b 194630 92287079
data3
Col1 Col2 Col3
1 m 391166 98761754
2 v 321615 71765127
3 g 959452 80114937
4 w 380126 25877104
5 f 655875 69610059
6 s 267364 7113484
7 h 391116 6801473
8 i 663616 73956544
9 o 936505 94244449
10 c 514173 82174024
I also have a table with all of the contents of column Col1 (this table is called table k. What I would like to do is write a function that allows me to subset the contents of the data frames by identifying all of the items in Col1 and table k as a match.
Table K:
k
Col1
1 a
2 j
3 c
4 h
5 f
6 i
7 g
8 e
9 d
10 b
11 y
12 p
13 k
14 x
15 h
16 v
17 d
18 n
19 s
20 b
21 m
22 v
23 g
24 w
25 f
26 s
27 h
28 i
29 o
30 c
I then want to only print the contents of column Col2 as an output of the function. Since I have multiple data frames, I know I have to put them in a list and then use lapply once I create the function. I have gotten this far, but my code does not work.
get_tair = function(df, col1, col2){
df[df[[col1]] %in% k$$Col1,]
print(df[[col2]])
}
Any help appreciated. Thanks.
We can place the 'data' objects in a list and use lapply
out_lst <- lapply(list(data, data2, data3),
function(dat) get_tair(dat, col1 = 'Col1', col2 = 'Col2'))
-function used
get_tair = function(df, col1, col2){
df[df[[col1]] %in% k$Col1,]
}
You can use merge :
get_tair = function(df, col){
unique(merge(df, k, by.x = col, by.y = 'Col1'))
}
list_data <- list(data, data2, data3)
lapply(list_data, function(x) get_tair(x, names(x)[1]))
Note that the sample data generated for 3 dataframes has different columns than what you have shown. (Col1, Col2, Col3).

How can a change variable name within for loop to get lists("divide") corresponding to 'slide1' and 'slide2'

Suppose have a dataframe like this :-
df<- read.table(text="groups names
1 a
1 b
1 c
1 d
2 e
2 f
2 g
2 h
", header=T)
I divided this data frame into two groups by using
split_groups <-split(df, df$groups)
Then I used for loop to obtain the overlapping lists of split_group[[1]] and split_group[[2]] as follows:
slide <- list()
for(i in 1:2){
slide[[i]] <- rollapply(split_groups[[i]][,2], width =2,by=1, matrix, align="right")
}
And obtained this :-
slide[[1]]:
a
b
**b**
c
**c**
d
slide[[2]] :
e
f
**f**
g
**g**
h
then I divided slide[[1]] and slide[[2]] into lists of equal rows:
divide <- split(slide[[1]], cumsum(seq_len(nrow(slide[[1]])) %%2 == 1))
and obtained divide[[1]] = a,b ; divide[[2]] = b,c and so on.
Similarly from slide[[2]], divide[[1]] = e,f and so on.
I want to rbind divide[[1]] from split[[1]] and split[[2]] ie set1 = a,b,e,f in the form of list or dataframe.
Similarly divide[[2]] from split[[1]] and split[[2]] ie set2= b,c,f,g.
ie
set1:
a
b
e
f
set2:
b
c
f
g
How can I do this ?
May be you want this: (The slide output is different than it was showed in the post)
divide1 <- split(slide[[1]], cumsum(seq_len(nrow(slide[[1]])) %%2 == 1))
divide2 <- split(slide[[2]], cumsum(seq_len(nrow(slide[[2]])) %%2 == 1))
nm1 <- paste0("set", 1:2)
Map(function(x,y,z) setNames(data.frame(c(x,y)),z), divide1, divide2, nm1)
#$`1`
# set1
#1 a
#2 b
#3 e
#4 f
#$`2`
# set2
#1 b
#2 f
Or if you have more list elements in slide, you could do:
divide <- lapply(slide, function(x) split(x, cumsum(!!seq_len(nrow(x)) %%2)))
divN <- unlist(divide)
lstN <- split(unname(divN), substr(names(divN),1,1))
nm1 <- paste0("set", seq_along(lstN))
Map(function(x,y) setNames(data.frame(x),y), lstN, nm1)
#$`1`
# set1
#1 a
#2 b
#3 e
#4 f
#$`2`
# set2
#1 b
#2 f

replace values in a data.frame with values from another data.frame

I have two dataframes with different dimensions,
df1 <- data.frame(names= sample(LETTERS[1:10]), duration=sample(0:100, 10))
>df1
names duration
1 J 97
2 G 57
3 H 53
4 A 23
5 E 100
6 D 90
7 C 73
8 F 60
9 B 37
10 I 67
df2 <- data.frame(names= LETTERS[1:5], names_new=letters[1:5])
> df2
names names_new
1 A a
2 B b
3 C c
4 D d
5 E e
I want to replace in df1 the values that match df1$names and df2$names but using the df2$names_new. My desired output would be:
> df1
names duration
1 J 97
2 G 57
3 H 53
4 a 23
5 e 100
6 d 90
7 c 73
8 F 60
9 b 37
10 I 67
This is the code I'm using but I wonder if there is a cleaner way to do it with no so many steps,
df2[,1] <- as.character(df2[,1])
df2[,2] <- as.character(df2[,2])
df1[,1] <- as.character(df1[,1])
match(df1[,1], df2[,1]) -> id
which(!is.na(id)==TRUE) -> idx
id[!is.na(id)] -> id
df1[idx,1] <- df2[id,2]
Many thanks
Here's an approach from qdapTools:
library(qdapTools)
df1$names <- df1$names %lc+% df2
The %l+% is a binary operator version of lookup. The left are the terms and the right side is the lookup table. The + means that any noncomparables will revert back to the original. This is a wrapper for the data.table package and is pretty speedy.
Here is the output including set.seed(1) for reproducibility:
set.seed(1)
df1 <- data.frame(names= sample(LETTERS[1:10]), duration=sample(0:100, 10),stringsAsFactors=F)
df2 <- data.frame(names= LETTERS[1:5], names_new=letters[1:5],stringsAsFactors=F)
library(qdapTools)
df1$names <- df1$names %lc+% df2
df1
## names duration
## 1 c 20
## 2 d 17
## 3 e 68
## 4 G 37
## 5 b 74
## 6 H 47
## 7 I 98
## 8 F 93
## 9 J 35
## 10 a 71
Are all names in df2 also in df1? And do you intent to keep them as a factor? If so, you might find this solution helpful.
idx <- match(levels(df2$names), levels(df1$names))
levels(df1$names)[idx] <- levels(df2$names_new)
This works but requires that names and names_new are character and not factor.
set.seed(1)
df1 <- data.frame(names= sample(LETTERS[1:10]), duration=sample(0:100, 10),stringsAsFactors=F)
df2 <- data.frame(names= LETTERS[1:5], names_new=letters[1:5],stringsAsFactors=F)
rownames(df1) <- df1$names
df1[df2$name,]$names <- df2$names_new
Another option using merge:
transform(merge(df1,df2,all.x=TRUE),
names=ifelse(is.na(names_new),as.character(names),
as.character(names_new)))
Another way using match would be (if df1$names and df1$names are characters of course)
df1[match(df2$names, df1$names), "names"] <- df2$names_new

Filter a data.frame with another data.frame using index notation instead of subset

Given:
df <- data.frame(rep = letters[sample(4, 30, replace=TRUE)], loc = LETTERS[sample(5:8, 30, replace=TRUE)], y= rnorm(30))
lookup <- data.frame(rep=letters[1:4], loc=LETTERS[5:8])
This will give me the rows in df that have rep,loc combinations that occur in lookup:
mdply(lookup, function(rep,loc){
r=rep
l=loc
subset(df, rep==r & loc==l)
})
But I've read that using subset() inside a function is poor practice due to scoping issues. So how do I get the desired result using index notation?
In this particular case, merge seems to make the most sense to me:
merge(df, lookup)
# rep loc y
# 1 a E 1.6612394
# 2 a E 1.1050825
# 3 a E -0.7016759
# 4 b F 0.4364568
# 5 d H 1.3246636
# 6 d H -2.2573545
# 7 d H 0.5061980
# 8 d H 0.1397326
A simple alternative might be to paste together the "rep" and "loc" columns from df and from lookup and subset based on that:
df[do.call(paste, df[c("rep", "loc")]) %in% do.call(paste, lookup), ]
# rep loc y
# 4 d H 1.3246636
# 10 b F 0.4364568
# 14 a E -0.7016759
# 15 a E 1.6612394
# 19 d H 0.5061980
# 20 a E 1.1050825
# 22 d H -2.2573545
# 28 d H 0.1397326

Generating a 96 or 384 well plate layout in R

I am trying to write some code which will take a .csv file which contains some sample names as input and will output a data.frame containing the sample names and either a 96 well plate or 384 well plate format (A1, B1, C1...). For those who do not know, a 96 well plate has eight alphabetically labeled rows (A, B, C, D, E, F, G, H) and 12 numerically labeled columns (1:12) and a 384 well plate has 16 alphabetically labeled rows (A:P) and 24 numerically labeled columns (1:24). I am trying to write some code that will generate either of these formats (there CAN be two different functions to do this) allowing for the samples to be labeled either DOWN (A1, B1, C1, D1, E1, F1, G1, H1, A2...) or ACROSS (A1, A2, A3, A4, A5 ...).
So far, I have figured out how to get the row names fairly easily
rowLetter <- rep(LETTERS[1:8], length.out = variable)
#variable will be based on how many samples I have
I just cannot figure out how to get the numeric column names to apply correctly... I have tried:
colNumber <- rep(1:12, times = variable)
but it isn't that simple. All 8 rows must be filled before the col number increases by 1 if you're going 'DOWN' or all 12 columns must be filled before the row letter increases by 1 if you're going 'ACROSS'.
EDIT:
Here is a clunky version. It takes the number of samples that you have, a 'plate format' which IS NOT functional yet, and a direction and will return a data.frame with the wells and plate numbers. Next, I am going to a) fix the plate format so that it will work correctly and b) give this function the ability to take a list of samples names or ID's or whatever and return the sample names, well positions, and plate numbers!
plateLayout <- function(numOfSamples, plateFormat = 96, direction = "DOWN"){
#This assumes that each well will be filled in order. I may need to change this, but lets get it working first.
#Calculate the number of plates required
platesRequired <- ceiling(numOfSamples/plateFormat)
rowLetter <- character(0)
colNumber <- numeric(0)
plateNumber <- numeric(0)
#The following will work if the samples are going DOWN
if(direction == "DOWN"){
for(k in 1:platesRequired){
rowLetter <- c(rowLetter, rep(LETTERS[1:8], length.out = 96))
for(i in 1:12){
colNumber <- c(colNumber, rep(i, times = 8))
}
plateNumber <- c(plateNumber, rep(k, times = 96))
}
plateLayout <- paste0(rowLetter, colNumber)
plateLayout <- data.frame(plateLayout, plateNumber)
plateLayout <- plateLayout[1:numOfSamples,]
return(plateLayout)
}
#The following will work if the samples are going ACROSS
if(direction == "ACROSS"){
for(k in 1:platesRequired){
colNumber <- c(colNumber, rep(1:12, times = 8))
for(i in 1:8){
rowLetter <- c(rowLetter, rep(LETTERS[i], times = 12))
}
plateNumber <- c(plateNumber, rep(k, times = 96))
}
plateLayout <- paste0(rowLetter, colNumber)
plateLayout <- data.frame(plateLayout, plateNumber)
plateLayout <- plateLayout[1:numOfSamples,]
return(plateLayout)
}
}
Does anybody have any thoughts on what else might make this cool? I'm going to use this function to generate .csv or .txt files to use as sample name imports for different instruments so I will be kind of constrained in terms of 'cool features', but I think it would be cool to use ggplot to make a graphic which shows the plates and sample names?
You don't need for loops. Here is a start:
#some sample ids
ids <- c(LETTERS, letters)
#plate size:
n <- 96
nrow <- 8
samples <- character(n)
samples[seq_along(ids)] <- ids
samples <- matrix(samples, nrow=nrow)
colnames(samples) <- seq_len(n/nrow)
rownames(samples) <- LETTERS[seq_len(nrow)]
# 1 2 3 4 5 6 7 8 9 10 11 12
# A "A" "I" "Q" "Y" "g" "o" "w" "" "" "" "" ""
# B "B" "J" "R" "Z" "h" "p" "x" "" "" "" "" ""
# C "C" "K" "S" "a" "i" "q" "y" "" "" "" "" ""
# D "D" "L" "T" "b" "j" "r" "z" "" "" "" "" ""
# E "E" "M" "U" "c" "k" "s" "" "" "" "" "" ""
# F "F" "N" "V" "d" "l" "t" "" "" "" "" "" ""
# G "G" "O" "W" "e" "m" "u" "" "" "" "" "" ""
# H "H" "P" "X" "f" "n" "v" "" "" "" "" "" ""
library(reshape2)
samples <- melt(samples)
samples$position <- paste0(samples$Var1, samples$Var2)
# Var1 Var2 value position
# 1 A 1 A A1
# 2 B 1 B B1
# 3 C 1 C C1
# 4 D 1 D D1
# 5 E 1 E E1
# 6 F 1 F F1
# 7 G 1 G G1
# 8 H 1 H H1
# 9 A 2 I A2
# 10 B 2 J B2
# 11 C 2 K C2
# 12 D 2 L D2
# 13 E 2 M E2
# 14 F 2 N F2
# 15 G 2 O G2
# 16 H 2 P H2
# 17 A 3 Q A3
# 18 B 3 R B3
# 19 C 3 S C3
# 20 D 3 T D3
# 21 E 3 U E3
# 22 F 3 V F3
# 23 G 3 W G3
# 24 H 3 X H3
# 25 A 4 Y A4
# 26 B 4 Z B4
# 27 C 4 a C4
# 28 D 4 b D4
# 29 E 4 c E4
# 30 F 4 d F4
# 31 G 4 e G4
# 32 H 4 f H4
# 33 A 5 g A5
# 34 B 5 h B5
# 35 C 5 i C5
# 36 D 5 j D5
# 37 E 5 k E5
# 38 F 5 l F5
# 39 G 5 m G5
# 40 H 5 n H5
# 41 A 6 o A6
# 42 B 6 p B6
# 43 C 6 q C6
# 44 D 6 r D6
# 45 E 6 s E6
# 46 F 6 t F6
# 47 G 6 u G6
# 48 H 6 v H6
# 49 A 7 w A7
# 50 B 7 x B7
# 51 C 7 y C7
# 52 D 7 z D7
# 53 E 7 E7
# 54 F 7 F7
# 55 G 7 G7
# 56 H 7 H7
# 57 A 8 A8
# 58 B 8 B8
# 59 C 8 C8
# 60 D 8 D8
# 61 E 8 E8
# 62 F 8 F8
# 63 G 8 G8
# 64 H 8 H8
# 65 A 9 A9
# 66 B 9 B9
# 67 C 9 C9
# 68 D 9 D9
# 69 E 9 E9
# 70 F 9 F9
# 71 G 9 G9
# 72 H 9 H9
# 73 A 10 A10
# 74 B 10 B10
# 75 C 10 C10
# 76 D 10 D10
# 77 E 10 E10
# 78 F 10 F10
# 79 G 10 G10
# 80 H 10 H10
# 81 A 11 A11
# 82 B 11 B11
# 83 C 11 C11
# 84 D 11 D11
# 85 E 11 E11
# 86 F 11 F11
# 87 G 11 G11
# 88 H 11 H11
# 89 A 12 A12
# 90 B 12 B12
# 91 C 12 C12
# 92 D 12 D12
# 93 E 12 E12
# 94 F 12 F12
# 95 G 12 G12
# 96 H 12 H12
Use the byrow argument to fill the matrix in the other direction:
samples <- matrix(samples, nrow=nrow, byrow=TRUE)
To fill more than one plate, you can use basically the same idea, but use an array instead of a matrix.
I've never written this code in R before but it should be the same as Perl, Python or Java
For Row major order (going across) the pseudocode algorithm is simply:
for each( i : 0..totalNumWells - 1){
column = (i % numColumns)
row = ((i % totalNumWells) / numColumns)
}
Where numColumns is 12 for 96 well plate, 24 or 384 and totalNumWells is 96 or 384 respectively. This will give you a column and row index in 0-based coordinates which is perfect for accessing arrays.
wellName = ABCs[row], column + 1
Where ABCs is an array of all the valid letters in your plate (or A-Z). +1 is to convert 0-based into 1-based, otherwise the first well will be A0 instead of A1.
I also want to point out that often 384 wells aren't in row major order. I've seen most often sequencing centers preferring a "checker board" pattern A01, A03, A05... then A02, A04, A06..., B01, B03... etc to be able to combine 4 96-well plates into a single 384 well without changing the layout and simplifying the picking robot's work. that's a much harder algorithm to compute the ith well for
The following code does what I set out to do. You can use it to make as many plates as you need, with the assumptions that whatever your import list is will be in order. It can make as many plates as you need and will add a column for "plateNumber" which will indicate which batch it's on. It can only handle 96 or 384 well plates, but that is all I deal in so that is fine.
plateLayout <- function(numOfSamples, plateFormat = 96, direction = "DOWN"){
#This assumes that each well will be filled in order.
#Calculate the number of plates required
platesRequired <- ceiling(numOfSamples/plateFormat)
rowLetter <- character(0)
colNumber <- numeric(0)
plateNumber <- numeric(0)
#define the number of columns and number of rows based on plate format (96 or 384 well plate)
switch(as.character(plateFormat),
"96" = {numberOfColumns = 12; numberOfRows = 8},
"384" = {numberOfColumns = 24; numberOfRows = 16})
#The following will work if the samples are going DOWN
if(direction == "DOWN"){
for(k in 1:platesRequired){
rowLetter <- c(rowLetter, rep(LETTERS[1:numberOfRows], length.out = plateFormat))
for(i in 1:numberOfColumns){
colNumber <- c(colNumber, rep(i, times = numberOfRows))
}
plateNumber <- c(plateNumber, rep(k, times = plateFormat))
}
plateLayout <- paste0(rowLetter, colNumber)
plateLayout <- data.frame(plateNumber,plateLayout)
plateLayout <- plateLayout[1:numOfSamples,]
return(plateLayout)
}
#The following will work if the samples are going ACROSS
if(direction == "ACROSS"){
for(k in 1:platesRequired){
colNumber <- c(colNumber, rep(1:numberOfColumns, times = numberOfRows))
for(i in 1:numberOfRows){
rowLetter <- c(rowLetter, rep(LETTERS[i], times = numberOfColumns))
}
plateNumber <- c(plateNumber, rep(k, times = plateFormat))
}
plateLayout <- paste0(rowLetter, colNumber)
plateLayout <- data.frame(plateNumber, plateLayout)
plateLayout <- plateLayout[1:numOfSamples,]
return(plateLayout)
}
}
An example of how to use this would be as follows
#load whatever data you're going to use to get a plate layout on (sample ID's or names or whatever)
thisData <- read.csv("data.csv")
#make a data.frame containing your sample names and the function's output
#alternatively you can use length() if you have a list
plateLayoutDataFrame <- data.frame(thisData$sampleNames, plateLayout(nrow(thisData), plateFormat = 96, direction = "DOWN")
#It will return something similar to the following, depending on your selections
#data plateNumber plateLayout
#sample1 1 A1
#sample2 1 B1
#sample3 1 C1
#sample4 1 D1
#sample5 1 E1
#sample6 1 F1
#sample7 1 G1
#sample8 1 H1
#sample9 1 A2
#sample10 1 B2
#sample11 1 C2
#sample12 1 D2
#sample13 1 E2
#sample14 1 F2
#sample15 1 G2
That sums up this function for now. Roland offered a good method of doing this which is less verbose, but I wanted to avoid the use of external packages if possible. I'm working on a shiny app now which actually uses this! I want it to be able to automatically subset based on the 'plateNumber' and write each plate as it's own file... for more on this, go to: Automatic multi-file download in R-Shiny
Here's how I'd do it.
put_samples_in_plates = function(sample_list, nwells=96, direction="across")
{
if(!nwells %in% c(96, 384)){
stop("Invalid plate size")
}
nsamples = nrow(sample_list)
nplates = ceiling(nsamples/nwells);
if(nwells==96){
rows = LETTERS[1:8]
cols = 1:12
}else if(nwells==384){
rows = LETTERS[1:16]
cols = 1:24
}else{
stop("Unrecognized nwells")
}
nrows = length(rows)
ncols = length(cols)
if(tolower(direction)=="down"){
single_plate_df = data.frame(row = rep(rows, times=ncols),
col = rep(cols, each=nrows))
}else if(tolower(direction)=="across"){
single_plate_df = data.frame(row = rep(rows, each=ncols),
col = rep(cols, times=nrows))
}else{
stop("Unrecognized direction")
}
single_plate_df = transform(single_plate_df,
well = sprintf("%s%02d", row, col))
toobig_plate_df = cbind(data.frame(plate=rep(1:nplates, each=nwells)),
do.call("rbind", replicate(nplates,
single_plate_df,
simplify=FALSE)))
res = cbind(sample_list, toobig_plate_df[1:nsamples,])
return(res)}
# Quick test
a_sample_list = data.frame(x=1:386, y=rnorm(386))
r.096.across = put_samples_in_plates(sample_list = a_sample_list,
nwells= 96,
direction="across")
r.096.down = put_samples_in_plates(sample_list = a_sample_list,
nwells= 96,
direction="down")
r.384.across = put_samples_in_plates(sample_list = a_sample_list,
nwells=384,
direction="across")
r.384.down = put_samples_in_plates(sample_list = a_sample_list,
nwells=384,
direction="down")
Two points worth noting in the function above:
the use of the times and each parameters within the rep function to differentiate "across" and "down" directions, and
the use of replicate to repeat the individual plate as many times as needed along with the use of a call to rbind from do.call.

Resources