I think I missed something in the use of the loess function and I can't understand what i did wrong. I have a data frame in which I store the output (count) of 3 different softwares for 26 different genes on the genomes of different patients. The 3 softwares were each used on the same genome but with different rate of downsampling.
I pooled the results of all the patients by genes. At the end I have a data frame with 4 columns: samplexxx (downsampling rate), software (name of the software I used), gene (the name of the gene) and count (count results given by the software).
My goal is to estimate the downsampling effect (samplexxx) on the count given by the software, and I want to do some regression to be able to compare them with each other.
rate <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90,
95, 100)
my attempts:
datalist <- list()
for (i in 1:22) {
name <- genes[i]
print(name)
mod <- paste("mod_", name)
xfit <- paste("xfit_", name)
df <- paste("df_", name)
mod <- loess(data2[data2$gene == name,]$count ~
data2[data2$gene == name,]$samplexxx)
xfit <- predict(mod, newdata=data2[data2$gene == name,]$samplexxx)
df <- setNames(data.frame(matrix(ncol=4, nrow=60)),
c("down", "software", "gene", "loess"))
df$down <- data2[data2$gene == name,]$samplexxx
df$software <- data2[data2$gene == name,]$software
df$gene <- data2[data2$gene == name,]$gene
df$loess <- xfit
print(xfit)
datalist[[i]] <- df
}
data_loess <- do.call(rbind, datalist)
ggplot(data_loess, aes(x=gene, y=loess, fill=software)) +
geom_boxplot()
and:
mod <- loess(data2$count ~ data$samplexxx)
xfit <- predict(mod, newdata=data2$samplexxx)
for (i in 1:20) {
down <- rate[i]
print(name)
title <- paste("loess_downsampling", down)
out <- paste("loess_downsampling", down, ".pdf", sep="")
pdf(out, width=10)
print(ggplot(data2, aes(x=down, y=loess, fill=software))) +
geom_boxplot() + ggtitle(title))
dev.off()
}
Sample data:
> dput(data2)
structure(list(samplexxx = c(5L, 10L, 15L, 20L, 25L, 30L, 35L,
40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L,
5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L,
100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L,
30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L,
95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L,
50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L,
15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L,
80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L,
5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L,
100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L,
30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L,
95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L,
50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L,
15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L,
80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L,
5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L,
100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L,
30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L,
95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L,
50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L,
15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L,
80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L,
5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L,
100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L,
30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L,
95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L,
50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L,
15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L,
80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L,
5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L,
100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L,
30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L,
95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L,
50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L,
15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L,
80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L,
5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L,
70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L,
100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L,
30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L,
95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
85L, 90L, 95L, 100L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L,
50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 95L, 100L, 5L, 10L,
15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L,
80L, 85L, 90L, 95L, 100L), software = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("EH", "GangSTR", "Tred"), class = "factor"),
gene = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L), .Label = c("AFF2", "AR", "ATN1", "ATXN1",
"ATXN10", "ATXN2", "ATXN3", "ATXN7", "C9ORF72", "CACNA1A",
"CBL", "CNBP", "CSTB", "DIP2B", "DMPK", "FMR1", "FXN", "HTT",
"JPH3", "NOP56", "PPP2R2B", "TBP"), class = "factor"), count = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 17L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 15L, 15L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, NA, NA, NA, NA, 20L, 34L, 31L, 33L, 34L, 34L, 34L, 34L,
34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, NA, NA, NA, NA, NA,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, NA, NA, NA, NA, NA, 22L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, NA, NA,
NA, NA, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, NA, NA, NA, NA, 6L, 8L, 8L,
8L, 8L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, NA, NA,
NA, NA, 11L, NA, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, NA, NA, NA, 12L, 5L, NA, 12L,
12L, 5L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, NA, NA, NA, NA, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 20L, 20L, 18L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, NA, NA, NA, NA, 27L, 24L,
21L, 14L, 27L, 14L, 21L, 27L, 27L, 14L, 27L, 27L, 27L, 27L,
27L, 27L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 68L, 73L,
78L, 54L, 79L, 76L, 87L, 72L, 62L, 63L, NA, NA, NA, NA, NA,
27L, 27L, 27L, 28L, 27L, 27L, 64L, 27L, 64L, 64L, 27L, 27L,
27L, 27L, 27L, NA, NA, NA, NA, NA, 18L, 20L, 18L, 20L, 20L,
18L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, NA, NA,
NA, NA, NA, 15L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 9L, 7L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, NA, NA, NA, NA, NA, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, NA, NA, NA, NA, NA, 35L, 29L, 35L, 35L, 30L, 35L,
32L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 11L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 20L, 11L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 33L, 33L, 32L, 33L, 33L, 33L, 33L, 33L, 33L, 33L,
33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, NA, 21L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 19L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 19L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 11L, NA, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 7L, 15L, 15L, 13L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 27L, 19L, 27L, 27L, 27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L,
27L, 27L, NA, 76L, 23L, 23L, 23L, 32L, 65L, 32L, 28L, 32L,
28L, 32L, 32L, 23L, 28L, 32L, 28L, 28L, 32L, 84L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 14L, 18L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 15L,
NA, NA, 15L, NA, 15L, NA, NA, 15L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 9L, NA, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, NA, 28L, 36L, 36L, NA, 36L, 36L, 36L,
36L, NA, 36L, NA, 36L, 36L, 36L, 36L, 36L, NA, 36L, 36L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1L, 8L, 18L, 16L, 15L, 14L, 15L, 16L, 15L, 16L, 14L, 15L,
14L, 14L, 14L, 14L, 16L, 16L, 16L, 16L, 31L, 28L, 31L, 31L,
32L, 32L, 32L, 33L, 31L, 33L, 32L, 31L, 32L, 32L, 32L, 32L,
32L, 32L, 32L, 32L, 7L, 18L, 22L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
19L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 5L, 6L, 6L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 12L, 11L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 5L, 7L, 7L, 7L, 7L, 11L, 11L, 7L,
11L, 15L, 15L, 11L, 7L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
1L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 20L, 17L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 2L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 15L, 6L, 22L, 13L, 14L, 13L, 14L, 13L, 14L, 14L,
27L, 27L, 14L, 14L, 27L, 14L, 27L, 14L, 27L, NA, 15L, 20L,
20L, 20L, 20L, 40L, 20L, 40L, 20L, 40L, 40L, 40L, 40L, 20L,
40L, 40L, 40L, 40L, 32L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 15L, 14L,
17L, 17L, 17L, 19L, 17L, 13L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 5L, 3L, 1L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 5L, 3L,
1L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 12L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, NA,
2L, 3L, 2L, 29L, 33L, 33L, 35L, 33L, 35L, 35L, 33L, 35L,
35L, 33L, 35L, 35L, 35L, 35L, 35L)), class = "data.frame", row.names = c(NA,
-1320L))
I believe the loess should be done on a split on the "software".
software <- unique(data2$software)
data_loess <- do.call(rbind, lapply(software, \(x) {
X <- subset(data2, software == x)
lo <- loess(count ~ samplexxx, X)
count_pred <- predict(lo, newdata=X)
return(cbind(X, count_pred))
}))
Note: R version 4.1.2 (2021-11-01)
Gives:
head(data_loess[data_loess$samplexxx > 80, ], 10)
# samplexxx software gene count count_pred
# 17 85 EH AFF2 24 22.69004
# 18 90 EH AFF2 24 22.31879
# 19 95 EH AFF2 24 21.83428
# 20 100 EH AFF2 24 21.25618
# 37 85 EH AR 21 22.69004
# 38 90 EH AR 21 22.31879
# 39 95 EH AR 21 21.83428
# 40 100 EH AR 21 21.25618
# 57 85 EH ATN1 NA 22.69004
# 58 90 EH ATN1 NA 22.31879
And here a plot of "count" predictions on "samplexxx".
plot(count_pred ~ samplexxx, data_loess, col=as.numeric(software) + 1,
pch=20, xlab='Downsampling', ylab='Count (LOESS)')
legend('topleft', legend=software, pch=19, col=as.numeric(software) + 1,
horiz=TRUE, cex=.7, title='Software')
Looks interesting, but I'm not sure if it's absolutely right.
In my answer you see something different from for loops, which is probably new to you, however it's the r-ish way and its much shorter to code. The looping job here does lapply().
Anyway, hope this helps.
I am having a dataframe of 2 variables
structure(list(X1 = structure(c(17L, 27L, 6L, 1L, 28L, 1L, 1L,4L, 17L, 28L, 28L, 12L, 21L, 28L, 28L, 8L, 28L, 1L, 1L, 10L, 4L, 21L, 30L, 1L, 8L, 28L, 1L, 1L, 1L, 1L, 8L, 1L, 17L, 1L, 1L, 28L, 8L, 23L, 15L, 23L, 25L, 13L, 8L, 4L, 28L, 10L, 1L, 30L, 13L, 4L, 1L, 1L, 17L, 13L, 13L, 8L, 4L, 4L, 4L, 28L, 28L, 13L,1L, 4L, 28L, 1L, 1L, 1L, 1L, 1L, 12L, 2L, 6L, 1L, 8L, 1L, 21L, 1L, 21L, 1L, 30L,13L, 25L, 17L, 1L, 28L, 13L, 1L, 1L, 1L, 1L,8L, 30L, 25L, 28L, 4L, 1L, 13L, 17L, 4L,1L, 1L, 28L, 1L, 1L, 8L, 1L, 8L, 1L, 13L, 1L, 1L, 1L, 4L, 6L, 1L, 1L, 30L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 1L, 15L, 21L, 10L, 21L, 1L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 28L, 28L, 1L, 30L, 15L, 25L, 6L, 17L, 25L, 15L, 8L, 18L, 22L, 14L, 22L, 28L, 30L, 3L, 30L, 14L, 18L, 22L, 24L, 10L, 26L, 26L, 18L, 26L, 30L, 29L, 18L, 14L, 9L, 9L, 16L, 16L, 29L, 18L, 16L, 27L, 24L, 14L, 26L, 5L, 22L, 28L, 22L, 11L, 9L, 26L, 30L, 18L, 28L, 16L, 26L, 7L, 30L, 7L, 28L, 5L, 18L, 9L, 26L, 24L, 27L, 16L, 16L, 14L, 26L, 29L, 5L, 22L, 24L, 26L, 18L, 27L, 9L, 18L, 11L, 14L, 18L, 22L, 29L, 26L, 22L, 26L, 20L, 24L, 14L, 7L, 16L, 24L, 26L, 29L, 24L, 24L, 24L, 20L, 20L, 24L, 11L, 20L, 29L, 16L, 18L, 24L, 24L, 7L, 24L, 18L, 11L, 11L, 24L, 24L, 7L, 11L, 18L, 24L, 24L, 16L, 29L, 7L, 30L, 24L, 22L, 24L, 18L, 26L, 9L, 9L, 24L, 29L, 9L, 24L, 30L, 11L, 24L, 16L, 26L, 26L, 26L, 30L, 26L, 16L, 26L, 24L, 29L, 20L, 24L, 14L, 9L, 7L, 29L, 29L, 15L, 6L, 15L, 2L, 6L, 6L, 3L, 2L, 17L, 30L, 27L, 23L, 2L, 15L, 8L, 13L, 21L, 28L, 23L, 25L, 1L, 25L, 19L, 27L, 23L, 15L, 19L, 19L, 23L, 2L, 27L, 27L, 15L, 2L, 2L, 3L, 23L, 2L, 23L, 6L, 2L, 15L, 13L,1L, 1L, 13L, 28L, 1L, 1L, 28L, 21L, 1L, 28L, 4L, 1L, 17L, 17L, 13L, 21L, 1L, 1L, 1L, 17L, 1L, 1L, 17L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 13L, 1L, 1L, 1L, 1L, 8L,25L, 1L, 28L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 8L, 4L, 1L, 25L, 28L, 13L, 1L, 1L, 28L, 1L, 4L, 1L, 1L, 8L, 1L, 8L, 13L, 4L, 28L, 21L, 28L, 28L, 28L, 28L, 28L, 8L, 1L, 1L, 1L, 1L, 13L, 21L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 28L, 4L, 1L, 17L, 17L, 28L, 1L, 13L, 8L, 17L, 1L, 13L, 13L, 8L, 4L, 1L, 17L, 25L, 1L, 1L, 8L, 8L, 1L, 4L, 17L, 21L),
.Label = c("<8", ">1024", "1024", "11", "11.000000000000007", "128", "128.00000000000009", "16", "16.000000000000007", "181", "181.00000000000006", "22", "23", "23.000000000000011", "256", "256.00000000000017", "32", "32.000000000000014", "362", "362.00000000000017", "45", "45.000000000000014", "512", "512.00000000000045", "64", "64.000000000000028", "724", "8", "8.0000000000000018", "90"),
class = "factor"),
X2 = structure(c(7L, 2L, 2L, 8L, 18L, 4L, 13L, 18L, 8L, 13L, 8L, 18L, 12L, 13L, 18L, 16L, 7L, 5L, 1L, 16L, 18L, 18L, 18L, 12L, 7L, 1L, 4L, 4L, 2L,16L, 12L, 12L, 2L, 2L, 13L, 13L, 18L, 2L, 16L, 2L, 16L, 16L, 2L, 12L, 16L, 2L, 12L,2L, 2L, 16L, 16L, 2L, 2L, 2L, 2L, 2L, 7L, 18L, 18L, 18L, 13L, 18L, 13L, 18L, 9L, 13L, 8L, 4L, 1L, 13L, 8L, 2L, 16L, 12L, 7L, 7L, 18L, 18L, 18L, 12L, 16L, 7L, 16L, 7L, 12L, 12L, 16L, 12L, 13L, 13L, 12L, 16L, 12L, 12L, 7L, 7L, 13L,16L, 7L, 18L, 16L, 13L, 18L, 4L, 12L, 7L, 4L, 18L, 18L, 18L, 9L, 17L, 13L, 7L, 12L, 7L, 18L, 12L, 18L, 13L, 9L, 1L, 18L, 1L, 13L, 13L, 13L, 1L, 1L, 13L, 12L, 4L, 1L,1L, 4L, 12L, 9L, 1L, 1L, 1L, 2L, 12L, 9L, 2L, 18L, 2L, 18L, 7L, 12L, 1L, 9L, 9L, 7L, 18L, 9L, 18L, 1L, 12L, 13L, 12L, 16L, 7L, 12L, 7L, 16L, 2L, 12L,7L, 16L, 12L, 16L, 2L, 12L, 2L, 15L, 7L, 7L, 2L, 7L, 3L, 12L, 16L, 1L, 17L, 2L, 18L, 5L, 7L, 1L, 16L, 7L, 10L, 1L, 12L, 18L, 16L, 16L, 13L, 12L, 7L, 2L, 1L, 9L, 18L, 12L, 13L, 2L, 2L, 12L, 2L, 2L, 2L, 16L, 2L, 1L, 18L, 12L, 7L, 2L, 2L, 12L, 7L, 12L, 4L, 2L, 18L, 13L, 2L, 16L, 7L, 2L, 2L, 12L, 2L, 14L, 12L, 12L, 16L, 1L, 2L, 4L, 2L, 2L, 2L, 17L, 2L, 2L, 2L, 18L, 16L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 12L, 2L, 2L, 1L, 2L, 12L, 18L, 2L, 15L, 16L, 16L, 2L, 2L, 2L, 2L, 11L, 12L, 14L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 16L, 16L, 12L, 2L, 12L, 2L, 2L, 2L, 12L, 2L,16L, 2L, 12L, 14L, 7L, 2L, 4L, 14L, 2L, 16L, 15L, 7L, 16L, 18L, 2L, 16L, 2L, 2L, 12L, 12L, 2L, 2L, 4L, 2L, 2L, 2L, 16L, 2L, 12L,18L, 3L, 16L, 2L, 2L, 13L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 16L, 16L, 2L, 2L, 4L, 4L, 11L, 13L, 4L, 4L, 8L, 4L, 13L, 1L, 4L, 1L, 1L, 2L, 2L, 11L, 18L, 8L, 8L, 4L, 7L, 8L, 4L, 8L, 4L, 4L, 8L, 8L, 1L, 4L, 8L, 4L, 13L, 1L, 6L, 1L, 17L, 2L, 2L, 8L, 18L, 8L, 8L, 4L, 7L, 8L, 17L, 8L, 4L, 1L, 4L, 13L, 1L, 2L, 4L, 16L, 13L, 4L, 4L, 17L, 4L, 7L, 4L, 4L, 1L, 1L, 4L, 1L, 17L, 8L, 1L, 8L, 1L, 4L, 1L, 8L, 8L, 8L, 1L, 13L, 16L, 16L, 17L, 8L, 13L, 1L, 4L, 7L, 1L, 1L, 4L, 4L, 8L, 6L, 4L, 1L, 12L, 13L, 8L, 4L, 4L, 18L, 2L, 4L, 8L, 13L, 17L,13L, 18L, 7L, 16L, 7L, 1L, 13L, 8L, 13L, 4L, 1L, 7L),
.Label = c("<8", ">1024", "1024", "11", "128", "16", "181", "22", "23", "256", "32", "362", "45", "512", "64", "724", "8", "90"), class = "factor")),
.Names = c("X1", "X2"),
row.names = c(NA, -471L),
class = "data.frame")
I have 2 questions
1) Each one is having some greater than values and some with less than values. i want to remove the > and < characters from data frame and retain only the number in the dataframe. I can do it in excel but i want to learn the code for learning it in R.
2) I want to reduce the number of decimals to integer/whole number as some are having more number of decimals.
It may be a small question, but i am struggling to do this. i highly appreciate for this help.
You can use dplyr::mutate_all and stringr::str_replace_all.
Decimals are directly approximated by as.numeric since it is ~10^(-13)magnitude.
your_df <- structure(list(X1 = structure(c(17L, 27L, 6L, 1L, 28L, 1L, 1L,4L, 17L, 28L, 28L, 12L, 21L, 28L, 28L, 8L, 28L, 1L, 1L, 10L, 4L, 21L, 30L, 1L, 8L, 28L, 1L, 1L, 1L, 1L, 8L, 1L, 17L, 1L, 1L, 28L, 8L, 23L, 15L, 23L, 25L, 13L, 8L, 4L, 28L, 10L, 1L, 30L, 13L, 4L, 1L, 1L, 17L, 13L, 13L, 8L, 4L, 4L, 4L, 28L, 28L, 13L,1L, 4L, 28L, 1L, 1L, 1L, 1L, 1L, 12L, 2L, 6L, 1L, 8L, 1L, 21L, 1L, 21L, 1L, 30L,13L, 25L, 17L, 1L, 28L, 13L, 1L, 1L, 1L, 1L,8L, 30L, 25L, 28L, 4L, 1L, 13L, 17L, 4L,1L, 1L, 28L, 1L, 1L, 8L, 1L, 8L, 1L, 13L, 1L, 1L, 1L, 4L, 6L, 1L, 1L, 30L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 1L, 15L, 21L, 10L, 21L, 1L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 28L, 28L, 1L, 30L, 15L, 25L, 6L, 17L, 25L, 15L, 8L, 18L, 22L, 14L, 22L, 28L, 30L, 3L, 30L, 14L, 18L, 22L, 24L, 10L, 26L, 26L, 18L, 26L, 30L, 29L, 18L, 14L, 9L, 9L, 16L, 16L, 29L, 18L, 16L, 27L, 24L, 14L, 26L, 5L, 22L, 28L, 22L, 11L, 9L, 26L, 30L, 18L, 28L, 16L, 26L, 7L, 30L, 7L, 28L, 5L, 18L, 9L, 26L, 24L, 27L, 16L, 16L, 14L, 26L, 29L, 5L, 22L, 24L, 26L, 18L, 27L, 9L, 18L, 11L, 14L, 18L, 22L, 29L, 26L, 22L, 26L, 20L, 24L, 14L, 7L, 16L, 24L, 26L, 29L, 24L, 24L, 24L, 20L, 20L, 24L, 11L, 20L, 29L, 16L, 18L, 24L, 24L, 7L, 24L, 18L, 11L, 11L, 24L, 24L, 7L, 11L, 18L, 24L, 24L, 16L, 29L, 7L, 30L, 24L, 22L, 24L, 18L, 26L, 9L, 9L, 24L, 29L, 9L, 24L, 30L, 11L, 24L, 16L, 26L, 26L, 26L, 30L, 26L, 16L, 26L, 24L, 29L, 20L, 24L, 14L, 9L, 7L, 29L, 29L, 15L, 6L, 15L, 2L, 6L, 6L, 3L, 2L, 17L, 30L, 27L, 23L, 2L, 15L, 8L, 13L, 21L, 28L, 23L, 25L, 1L, 25L, 19L, 27L, 23L, 15L, 19L, 19L, 23L, 2L, 27L, 27L, 15L, 2L, 2L, 3L, 23L, 2L, 23L, 6L, 2L, 15L, 13L,1L, 1L, 13L, 28L, 1L, 1L, 28L, 21L, 1L, 28L, 4L, 1L, 17L, 17L, 13L, 21L, 1L, 1L, 1L, 17L, 1L, 1L, 17L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 13L, 1L, 1L, 1L, 1L, 8L,25L, 1L, 28L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 8L, 4L, 1L, 25L, 28L, 13L, 1L, 1L, 28L, 1L, 4L, 1L, 1L, 8L, 1L, 8L, 13L, 4L, 28L, 21L, 28L, 28L, 28L, 28L, 28L, 8L, 1L, 1L, 1L, 1L, 13L, 21L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 28L, 4L, 1L, 17L, 17L, 28L, 1L, 13L, 8L, 17L, 1L, 13L, 13L, 8L, 4L, 1L, 17L, 25L, 1L, 1L, 8L, 8L, 1L, 4L, 17L, 21L), .Label = c("<8", ">1024", "1024", "11", "11.000000000000007", "128", "128.00000000000009", "16", "16.000000000000007", "181", "181.00000000000006", "22", "23", "23.000000000000011", "256", "256.00000000000017", "32", "32.000000000000014", "362", "362.00000000000017", "45", "45.000000000000014", "512", "512.00000000000045", "64", "64.000000000000028", "724", "8", "8.0000000000000018", "90"), class = "factor"), X2 = structure(c(7L, 2L, 2L, 8L, 18L, 4L, 13L, 18L, 8L, 13L, 8L, 18L, 12L, 13L, 18L, 16L, 7L, 5L, 1L, 16L, 18L, 18L, 18L, 12L, 7L, 1L, 4L, 4L, 2L,16L, 12L, 12L, 2L, 2L, 13L, 13L, 18L, 2L, 16L, 2L, 16L, 16L, 2L, 12L, 16L, 2L, 12L,2L, 2L, 16L, 16L, 2L, 2L, 2L, 2L, 2L, 7L, 18L, 18L, 18L, 13L, 18L, 13L, 18L, 9L, 13L, 8L, 4L, 1L, 13L, 8L, 2L, 16L, 12L, 7L, 7L, 18L, 18L, 18L, 12L, 16L, 7L, 16L, 7L, 12L, 12L, 16L, 12L, 13L, 13L, 12L, 16L, 12L, 12L, 7L, 7L, 13L,16L, 7L, 18L, 16L, 13L, 18L, 4L, 12L, 7L, 4L, 18L, 18L, 18L, 9L, 17L, 13L, 7L, 12L, 7L, 18L, 12L, 18L, 13L, 9L, 1L, 18L, 1L, 13L, 13L, 13L, 1L, 1L, 13L, 12L, 4L, 1L,1L, 4L, 12L, 9L, 1L, 1L, 1L, 2L, 12L, 9L, 2L, 18L, 2L, 18L, 7L, 12L, 1L, 9L, 9L, 7L, 18L, 9L, 18L, 1L, 12L, 13L,
12L, 16L, 7L, 12L, 7L, 16L, 2L, 12L,7L, 16L, 12L, 16L, 2L, 12L, 2L, 15L, 7L, 7L, 2L, 7L, 3L, 12L, 16L, 1L, 17L, 2L, 18L, 5L, 7L, 1L, 16L, 7L, 10L, 1L, 12L, 18L, 16L, 16L, 13L, 12L, 7L, 2L, 1L, 9L, 18L, 12L, 13L, 2L, 2L, 12L, 2L, 2L, 2L, 16L, 2L, 1L, 18L, 12L, 7L, 2L, 2L, 12L, 7L, 12L, 4L, 2L, 18L, 13L, 2L, 16L, 7L, 2L, 2L, 12L, 2L, 14L, 12L, 12L, 16L, 1L, 2L, 4L, 2L, 2L, 2L, 17L, 2L, 2L, 2L, 18L, 16L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 12L, 2L, 2L, 1L, 2L, 12L, 18L, 2L, 15L, 16L, 16L, 2L, 2L, 2L, 2L, 11L, 12L, 14L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 16L, 16L, 12L, 2L, 12L, 2L, 2L, 2L, 12L, 2L,16L, 2L, 12L, 14L, 7L, 2L, 4L, 14L, 2L, 16L, 15L, 7L, 16L, 18L, 2L, 16L, 2L, 2L, 12L, 12L, 2L, 2L, 4L, 2L, 2L, 2L, 16L, 2L, 12L,18L, 3L, 16L, 2L, 2L, 13L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 16L, 16L, 2L, 2L, 4L, 4L, 11L, 13L, 4L, 4L, 8L, 4L, 13L, 1L, 4L, 1L, 1L, 2L, 2L, 11L, 18L, 8L, 8L, 4L, 7L, 8L, 4L, 8L, 4L, 4L, 8L, 8L, 1L, 4L, 8L, 4L, 13L, 1L, 6L, 1L, 17L, 2L, 2L, 8L, 18L, 8L, 8L, 4L, 7L, 8L, 17L, 8L, 4L, 1L, 4L, 13L, 1L, 2L, 4L, 16L, 13L, 4L, 4L, 17L, 4L, 7L, 4L, 4L, 1L, 1L, 4L, 1L, 17L, 8L, 1L, 8L, 1L, 4L, 1L, 8L, 8L, 8L, 1L, 13L, 16L, 16L, 17L, 8L, 13L, 1L, 4L, 7L, 1L, 1L, 4L, 4L, 8L, 6L, 4L, 1L, 12L, 13L, 8L, 4L, 4L, 18L, 2L, 4L, 8L, 13L, 17L,13L, 18L, 7L, 16L, 7L, 1L, 13L, 8L, 13L, 4L, 1L, 7L),
.Label = c("<8", ">1024", "1024", "11", "128", "16", "181", "22", "23", "256", "32", "362", "45", "512", "64", "724", "8", "90"), class = "factor")), .Names = c("X1", "X2"), row.names = c(NA, -471L), class = "data.frame")
library(dplyr)
library(stringr)
mutate_all(your_df, function(x) as.numeric(str_replace_all(x, pattern = "<|>", replacement = "")))
#> X1 X2
#> 1 32 181
#> 2 724 1024
#> 3 128 1024
#> 4 8 22
#> 5 8 90
#> 6 8 11
#> 7 8 45
#> 8 11 90
#> 9 32 22
#> 10 8 45
#> 11 8 22
#> 12 22 90
#> 13 45 362
You can do this with base R:
my_df <- as.data.frame(sapply(my_df, gsub, pattern = "<|>", replacement = ""))
my_df <- as.data.frame(sapply(my_df, as.numeric))
my_df
# X1 X2
# 1 8 23
# 2 8 90
# 3 8 8
# 4 8 362
# 5 8 45
# 6 90 362
# 7 256 724
# 8 64 181
# 9 128 362
# 10 32 181
# 11 64 724
# 12 256 1024
# 13 16 362
# 14 32.000000000000014 181
# 15 45.000000000000014 724
# 16 23.000000000000011 362
# 17 45.000000000000014 724
# 18 8 1024
# 19 90 362
# 20 1024 1024
# 21 90 64
# 22 23.000000000000011 181
# 23 32.000000000000014 181
# 24 45.000000000000014 1024
# 25 512.00000000000045 181
If you only want to round the decimals, but keep the < and > signs you can do the following (without perfoming the steps above):
sapply(my_df,
function(x) paste0(gsub(x, pattern = "\\d|\\.", replacement = ""),
round(as.numeric(gsub(x, pattern = "<|>", replacement = "")))))
# X1 X2
# [1,] "<8" "23"
# [2,] "<8" "90"
# [3,] "8" "<8"
# [4,] "8" "362"
# [5,] "<8" "45"
# [6,] "90" "362"
# [7,] "256" "724"
# [8,] "64" "181"
# [9,] "128" "362"
# [10,] "32" "181"
# [11,] "64" "724"
# [12,] "256" ">1024"
# [13,] "16" "362"
# [14,] "32" "181"
# [15,] "45" "724"
# [16,] "23" "362"
# [17,] "45" "724"
# [18,] "8" ">1024"
# [19,] "90" "362"
# [20,] "1024" ">1024"
# [21,] "90" "64"
# [22,] "23" "181"
# [23,] "32" "181"
# [24,] "45" ">1024"
# [25,] "512" "181"
How it works
sapply takes the data.frame and applys the function specified after the comma to each column of the data.frame. gsub substitute the pattern with the replacement in x (a column of the data.frame). There I used regular expression, so \\d means all digits (0-9) and \\. the dot and | combines them with a OR logic.
stringr-solution
There's a shorter solution with stringr:
library(stringr)
sapply(my_df,
function(x) str_c(str_extract(x, "[<>]?"),
round(as.numeric(str_extract(x, "\\d+")))))
Here the pattern we want to have are extracted and then combined again after rounding the decimals.
Data
my_df <-
structure(list(X1 = structure(c(1L, 1L, 28L, 28L, 1L, 30L, 15L,
25L, 6L, 17L, 25L, 15L, 8L, 18L,
22L, 14L, 22L, 28L, 30L, 3L, 30L,
14L, 18L, 22L, 24L),
.Label = c("<8", ">1024", "1024", "11",
"11.000000000000007", "128",
"128.00000000000009", "16",
"16.000000000000007", "181",
"181.00000000000006", "22",
"23", "23.000000000000011",
"256", "256.00000000000017",
"32", "32.000000000000014",
"362", "362.00000000000017",
"45", "45.000000000000014",
"512", "512.00000000000045",
"64", "64.000000000000028",
"724", "8",
"8.0000000000000018", "90"),
class = "factor"),
X2 = structure(c(9L, 18L, 1L, 12L, 13L, 12L, 16L, 7L,
12L, 7L, 16L, 2L, 12L, 7L, 16L, 12L,
16L, 2L, 12L, 2L, 15L, 7L, 7L, 2L, 7L),
.Label = c("<8", ">1024", "1024", "11",
"128", "16", "181", "22", "23",
"256", "32", "362", "45", "512",
"64", "724", "8", "90"),
class = "factor")),
.Names = c("X1", "X2"),
row.names = c(NA, -25L),
class = "data.frame")
# X1 X2
# 1 <8 23
# 2 <8 90
# 3 8 <8
# 4 8 362
# 5 <8 45
# 6 90 362
# 7 256 724
# 8 64 181
# 9 128 362
# 10 32 181
# 11 64 724
# 12 256 >1024
# 13 16 362
# 14 32.000000000000014 181
# 15 45.000000000000014 724
# 16 23.000000000000011 362
# 17 45.000000000000014 724
# 18 8 >1024
# 19 90 362
# 20 1024 >1024
# 21 90 64
# 22 23.000000000000011 181
# 23 32.000000000000014 181
# 24 45.000000000000014 >1024
# 25 512.00000000000045 181
I have a dataframe of results. There are multiple comparisons for Cruise_Strata. I have two columns of cruise_strata (Cruise1_Strata1 and Cruise2_Strata2). The problem I found is that there are "duplicate" records in the dataframe. For example one row will have
Cruise_Strata1 Cruise_Strata2
201501.35 201502.35
and another row will have
Cruise_Strata1 Cruise_Strata2
201502.35 201501.35
The rows have the same results for the remaining columns. I would like to be able to identify rows where this happens and remove one row from the dataset, but do not know how to go about it. I cant use duplicate because they are not duplicates.
Any help would be appreciated.
Here is the dataframe.
dput(result5)
structure(list(Cruise_Strata1 = structure(c(1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L,
11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L,
17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L,
24L, 24L, 25L, 25L, 26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L,
30L, 31L, 31L, 32L, 32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L,
37L, 37L, 38L, 38L, 39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L,
43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L,
50L, 50L, 51L, 51L, 52L, 52L, 53L, 53L, 54L, 54L, 55L, 55L, 56L,
56L, 57L, 57L, 58L, 58L, 59L, 59L, 60L, 60L, 61L, 61L, 62L, 62L,
63L, 63L, 64L, 64L, 65L, 65L, 66L, 66L), .Label = c("201501.10",
"201501.11", "201501.13", "201501.14", "201501.15", "201501.17",
"201501.18", "201501.19", "201501.21", "201501.22", "201501.23",
"201501.24", "201501.25", "201501.26", "201501.27", "201501.29",
"201501.30", "201501.31", "201501.33", "201501.34", "201501.35",
"201501.9", "201502.10", "201502.11", "201502.13", "201502.14",
"201502.15", "201502.17", "201502.18", "201502.19", "201502.21",
"201502.22", "201502.23", "201502.24", "201502.25", "201502.26",
"201502.27", "201502.29", "201502.30", "201502.31", "201502.33",
"201502.34", "201502.35", "201502.9", "201503.10", "201503.11",
"201503.13", "201503.14", "201503.15", "201503.17", "201503.18",
"201503.19", "201503.21", "201503.22", "201503.23", "201503.24",
"201503.25", "201503.26", "201503.27", "201503.29", "201503.30",
"201503.31", "201503.33", "201503.34", "201503.35", "201503.9"
), class = "factor"), Cruise_Strata2 = structure(c(23L, 45L,
24L, 46L, 25L, 47L, 26L, 48L, 27L, 49L, 28L, 50L, 29L, 51L, 30L,
52L, 31L, 53L, 32L, 54L, 33L, 55L, 34L, 56L, 35L, 57L, 36L, 58L,
37L, 59L, 38L, 60L, 39L, 61L, 40L, 62L, 41L, 63L, 42L, 64L, 43L,
65L, 44L, 66L, 1L, 45L, 2L, 46L, 3L, 47L, 4L, 48L, 5L, 49L, 6L,
50L, 7L, 51L, 8L, 52L, 9L, 53L, 10L, 54L, 11L, 55L, 12L, 56L,
13L, 57L, 14L, 58L, 15L, 59L, 16L, 60L, 17L, 61L, 18L, 62L, 19L,
63L, 20L, 64L, 21L, 65L, 22L, 66L, 1L, 23L, 2L, 24L, 3L, 25L,
4L, 26L, 5L, 27L, 6L, 28L, 7L, 29L, 8L, 30L, 9L, 31L, 10L, 32L,
11L, 33L, 12L, 34L, 13L, 35L, 14L, 36L, 15L, 37L, 16L, 38L, 17L,
39L, 18L, 40L, 19L, 41L, 20L, 42L, 21L, 43L, 22L, 44L), .Label = c("201501.10",
"201501.11", "201501.13", "201501.14", "201501.15", "201501.17",
"201501.18", "201501.19", "201501.21", "201501.22", "201501.23",
"201501.24", "201501.25", "201501.26", "201501.27", "201501.29",
"201501.30", "201501.31", "201501.33", "201501.34", "201501.35",
"201501.9", "201502.10", "201502.11", "201502.13", "201502.14",
"201502.15", "201502.17", "201502.18", "201502.19", "201502.21",
"201502.22", "201502.23", "201502.24", "201502.25", "201502.26",
"201502.27", "201502.29", "201502.30", "201502.31", "201502.33",
"201502.34", "201502.35", "201502.9", "201503.10", "201503.11",
"201503.13", "201503.14", "201503.15", "201503.17", "201503.18",
"201503.19", "201503.21", "201503.22", "201503.23", "201503.24",
"201503.25", "201503.26", "201503.27", "201503.29", "201503.30",
"201503.31", "201503.33", "201503.34", "201503.35", "201503.9"
), class = "factor"), P_value = c(0.63, 0.6793, 0.0319, 0.0289,
0.9516, 0.8128, 0.9967, 0.3071, 0.9641, 0.0246, 0.7967, 0.2551,
0.2329, 0.3725, 0.0269, 0.3796, 0.0245, 0.5562, 0.9952, 0.5176,
0.5596, 0.9966, 0.32, 0.6402, 0.7691, 0.9671, 0.9396, 0.9, 0.9024,
0.3624, 0.0433, 0.3402, 0.5302, 0.787, 0.0295, 0.3638, 0.006,
0.701, 0.6323, 0.0366, 2e-04, 0.0011, 0.8849, 0.3, 0.63, 0.9738,
0.0319, 0.5197, 0.9516, 0.7369, 0.9967, 0.2276, 0.9641, 0.0158,
0.7967, 0.6332, 0.2329, 0.0322, 0.0269, 0.3013, 0.0245, 0.0129,
0.9952, 0.795, 0.5596, 0.7277, 0.32, 0.747, 0.7691, 0.3817, 0.9396,
0.7961, 0.9024, 0.4164, 0.0433, 0.0028, 0.5302, 0.2864, 0.0295,
0.7036, 0.006, 0, 0.6323, 0.002, 2e-04, 0.9548, 0.8849, 0.0546,
0.6793, 0.9738, 0.0289, 0.5197, 0.8128, 0.7369, 0.3071, 0.2276,
0.0246, 0.0158, 0.2551, 0.6332, 0.3725, 0.0322, 0.3796, 0.3013,
0.5562, 0.0129, 0.5176, 0.795, 0.9966, 0.7277, 0.6402, 0.747,
0.9671, 0.3817, 0.9, 0.7961, 0.3624, 0.4164, 0.3402, 0.0028,
0.787, 0.2864, 0.3638, 0.7036, 0.701, 0, 0.0366, 0.002, 0.0011,
0.9548, 0.3, 0.0546), Cruise1 = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("201501",
"201502", "201503"), class = "factor"), Cruise1_Strata1 = structure(c(1L,
1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L,
9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L,
16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L,
22L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L,
8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L,
14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L,
21L, 21L, 22L, 22L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L,
6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L,
13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L,
20L, 20L, 21L, 21L, 22L, 22L), .Label = c("10", "11", "13", "14",
"15", "17", "18", "19", "21", "22", "23", "24", "25", "26", "27",
"29", "30", "31", "33", "34", "35", "9"), class = "factor"),
Cruise2 = structure(c(2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L,
3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L,
2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L,
3L, 2L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L,
3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L), .Label = c("201501", "201502", "201503"), class = "factor"),
Cruise2_Strata2 = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L,
11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L,
17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 1L,
1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L,
9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L,
21L, 21L, 22L, 22L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L,
12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L,
18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L), .Label = c("10",
"11", "13", "14", "15", "17", "18", "19", "21", "22", "23",
"24", "25", "26", "27", "29", "30", "31", "33", "34", "35",
"9"), class = "factor"), adjuste_p = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.792, 1, 1, 1, 0.0264,
0.1452, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.3696, 1,
1, 1, 1, 0.792, 0, 1, 0.264, 0.0264, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0.3696, 1, 1, 1, 1, 1, 0, 1, 0.264,
0.1452, 1, 1, 1)), .Names = c("Cruise_Strata1", "Cruise_Strata2",
"P_value", "Cruise1", "Cruise1_Strata1", "Cruise2", "Cruise2_Strata2",
"adjuste_p"), row.names = c(1453L, 2905L, 1520L, 2972L, 1587L,
3039L, 1654L, 3106L, 1721L, 3173L, 1788L, 3240L, 1855L, 3307L,
1922L, 3374L, 1989L, 3441L, 2056L, 3508L, 2123L, 3575L, 2190L,
3642L, 2257L, 3709L, 2324L, 3776L, 2391L, 3843L, 2458L, 3910L,
2525L, 3977L, 2592L, 4044L, 2659L, 4111L, 2726L, 4178L, 2793L,
4245L, 2860L, 4312L, 23L, 2927L, 90L, 2994L, 157L, 3061L, 224L,
3128L, 291L, 3195L, 358L, 3262L, 425L, 3329L, 492L, 3396L, 559L,
3463L, 626L, 3530L, 693L, 3597L, 760L, 3664L, 827L, 3731L, 894L,
3798L, 961L, 3865L, 1028L, 3932L, 1095L, 3999L, 1162L, 4066L,
1229L, 4133L, 1296L, 4200L, 1363L, 4267L, 1430L, 4334L, 45L,
1497L, 112L, 1564L, 179L, 1631L, 246L, 1698L, 313L, 1765L, 380L,
1832L, 447L, 1899L, 514L, 1966L, 581L, 2033L, 648L, 2100L, 715L,
2167L, 782L, 2234L, 849L, 2301L, 916L, 2368L, 983L, 2435L, 1050L,
2502L, 1117L, 2569L, 1184L, 2636L, 1251L, 2703L, 1318L, 2770L,
1385L, 2837L, 1452L, 2904L), class = "data.frame")
R Info
R version 3.2.1 (2015-06-18)
Platform: i386-w64-mingw32/i386 (32-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Does this give you your desired result?
duplicated(apply(cbind(result5$Cruise_Strata1, df$Cruise_Strata2), 1,
function(x) paste(min(x), max(x))))
You can use the resulting logical vector to subset your data.
First you create a vector pasting the values in Cruise_Strata1 and Cruise_Strata2. Doing this you move the smaller of the two to the front and the larger one to the end (or you could do it vice versa). This is just a trick so that you can apply the duplicated function and recognize the duplicates.
Note: this approach will remove duplicates of the form:
Cruise_Strata1 Cruise_Strata2
x y
y x
As well as (if this is not desired let me know):
Cruise_Strata1 Cruise_Strata2
x y
x y
For a generic data frame df with duplicated values in Cruise_Strata1 and Cruise_Strata2:
df$dupe <- 0
for(i in 1:(length(df$Cruise_Strata1)-1))
{
for(j in (i+1):length(df$Cruise_Strata1))
if(df$Cruise_Strata1[i]==df$Cruise_Strata2[j])
{print(df[c(i,j),]); df$dupe[i] = 1;break}
}
df[df$dupe != 1,]