Related
I have a large dataset uploaded in r (see below for a short version): I want to calculate a value for each Cruiseid, Samplenr, Species and Age (so based on four conditions):
Cruiseid Samplenr Species Age Length LK TNumStat TNumLK
197502 37 154 0 12,5 2 2,791666667 5,583333
197502 37 154 0 17,5 3 2,166666667 6,5
197502 37 154 2 172,5 34 11,54166667 392,4167
197502 37 154 2 177,5 35 12,0625 422,1875
197502 37 154 2 182,5 36 2,083333333 75
197502 35 154 0 112,5 22 11,85654008 260,8439
197502 35 154 2 197,5 39 2,109704641 82,27848
197502 35 154 2 217,5 43 2,109704641 90,7173
197502 35 154 2 232,5 46 2,109704641 97,04641
197502 36 154 0 12,5 2 4,685314685 9,370629
197502 36 154 2 182,5 36 3,496503497 125,8741
197502 41 154 0 17,5 3 2,260869565 6,782609
197502 41 154 2 202,5 40 4,347826087 173,913
197502 41 154 2 212,5 42 2,173913043 91,30435
197502 41 154 2 242,5 48 2,173913043 104,3478
197503 56 154 0 17,5 3 7,428571429 22,28571
197503 56 154 0 147,5 29 10,30952381 298,9762
197503 56 154 2 172,5 34 13,19047619 448,4762
197503 56 154 2 187,5 37 2,380952381 88,09524
197503 54 154 0 12,5 2 3,35 6,7
197503 54 154 0 157,5 31 12 372
197503 54 154 0 167,5 33 13,25 437,25
197503 54 154 2 172,5 34 13,85 470,9
197503 54 154 2 187,5 37 2,5 92,5
197503 54 154 2 217,5 43 2,5 107,5
197503 53 154 0 12,5 2 2,875536481 5,751073
197503 53 154 0 97,5 19 4,806866953 91,33047
197503 53 154 0 107,5 21 5,622317597 118,0687
197503 53 154 0 142,5 28 8,776824034 245,7511
I want to calcuate:((TNumStat$TNumLK/TNumStat$TNumStat)*0.5+0.25)*10for each Cruiseid, Samplenr, Species and Age.
I have already tried something in a loop construction:
#######################
Cruise <- unique(TNumStat$Cruiseid)
Track <- unique(TNumStat$Samplenr)
#######################
AvrLengthCr <- c()
AvrLengthCr <- rep(NA, length(TNumStat$Species))
#######################
for(j in 1:length(Cruise)){
t1.ss <- which(TNumStat$Cruiseid == Cruise[j])
###
for(i in 1:length(Track)){
t2.ss <- which(TNumStat$Samplenr[t1.ss] == Track[i])
###
AvrLengthCr[t1.ss][t2.ss] <- ((TNumStat$TNumLK[t1.ss][t2.ss]/TNumStat$TNumStat[t1.ss][t2.ss])*0.5+0.25)*10
}}
But it doesn't seem to work. And I've also been looking at something with dcast:
TNumStat2<-dcast(TNumStat,Cruiseid+Samplenr+Species+Age,formula = (((TNumStat$TNumLK/TNumStat$TNumStat*0.5+0.25)*10) )),na.rm=TRUE)
Non of the options I have tried seem to work, and I dont know how to solve this. Can someone please help me?
Thank you
Good Morning,
the question is not totally clear in my opinion. But you could try something like (with dplyr)
sample <- sample %>%
mutate(calculate = ((TNumLK/TNumStat) * 0.5 + 0.25) * 10) %>%
group_by(Cruiseid, Samplenr, Species, Age)
summarisedDF <- sample %>%
summarise(avg.calculate = mean(calculate))
What strikes me is your columns "Length", "TNumStat", "TNumLK" have , instead of . and thus are in character format that can not be coerced to numeric so easily.
TNumStat[c("TNumStat", "TNumLK")] <-
lapply(TNumStat[c("TNumStat", "TNumLK")],
function(x) as.numeric(gsub(",", ".", x)))
Maybe this is dependent to your system locale, so just ignore this step if it works for you.
Then, you could use by to apply your formula.
l <- by(TNumStat, TNumStat[c("Cruiseid", "Samplenr", "Species")],
function(x) cbind(unique(x[1:3]),
value=with(x, ((mean(TNumLK)/mean(TNumStat))*0.5+0.25)*10)))
This gives you a list that you rbind to get the result.
TNumStat.new <- do.call(rbind, l)
TNumStat.new
# Cruiseid Samplenr Species value
# 6 197502 35 154 148.46288
# 10 197502 36 154 85.14956
# 1 197502 37 154 149.61421
# 12 197502 41 154 174.24600
# 26 197503 53 154 106.86347
# 20 197503 54 154 159.17545
# 16 197503 56 154 131.26698
Data
TNumStat <- structure(list(Cruiseid = c(197502L, 197502L, 197502L, 197502L,
197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L,
197502L, 197502L, 197502L, 197502L, 197503L, 197503L, 197503L,
197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L,
197503L, 197503L, 197503L, 197503L), Samplenr = c(37L, 37L, 37L,
37L, 37L, 35L, 35L, 35L, 35L, 36L, 36L, 41L, 41L, 41L, 41L, 56L,
56L, 56L, 56L, 54L, 54L, 54L, 54L, 54L, 54L, 53L, 53L, 53L, 53L
), Species = c(154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L,
154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L,
154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L),
Age = c(0L, 0L, 2L, 2L, 2L, 0L, 2L, 2L, 2L, 0L, 2L, 0L, 2L,
2L, 2L, 0L, 0L, 2L, 2L, 0L, 0L, 0L, 2L, 2L, 2L, 0L, 0L, 0L,
0L), Length = structure(c(3L, 8L, 9L, 10L, 11L, 2L, 13L,
16L, 17L, 3L, 11L, 8L, 14L, 15L, 18L, 8L, 5L, 9L, 12L, 3L,
6L, 7L, 9L, 12L, 16L, 3L, 19L, 1L, 4L), .Label = c("107,5",
"112,5", "12,5", "142,5", "147,5", "157,5", "167,5", "17,5",
"172,5", "177,5", "182,5", "187,5", "197,5", "202,5", "212,5",
"217,5", "232,5", "242,5", "97,5"), class = "factor"), LK = c(2L,
3L, 34L, 35L, 36L, 22L, 39L, 43L, 46L, 2L, 36L, 3L, 40L,
42L, 48L, 3L, 29L, 34L, 37L, 2L, 31L, 33L, 34L, 37L, 43L,
2L, 19L, 21L, 28L), TNumStat = structure(c(16L, 11L, 2L,
5L, 9L, 3L, 10L, 10L, 10L, 21L, 19L, 13L, 20L, 12L, 12L,
24L, 1L, 6L, 14L, 18L, 4L, 7L, 8L, 15L, 15L, 17L, 22L, 23L,
25L), .Label = c("10,30952381", "11,54166667", "11,85654008",
"12", "12,0625", "13,19047619", "13,25", "13,85", "2,083333333",
"2,109704641", "2,166666667", "2,173913043", "2,260869565",
"2,380952381", "2,5", "2,791666667", "2,875536481", "3,35",
"3,496503497", "4,347826087", "4,685314685", "4,806866953",
"5,622317597", "7,428571429", "8,776824034"), class = "factor"),
TNumLK = structure(c(16L, 18L, 11L, 12L, 21L, 8L, 22L, 25L,
29L, 24L, 4L, 20L, 5L, 26L, 1L, 6L, 9L, 14L, 23L, 19L, 10L,
13L, 15L, 28L, 2L, 17L, 27L, 3L, 7L), .Label = c("104,3478",
"107,5", "118,0687", "125,8741", "173,913", "22,28571", "245,7511",
"260,8439", "298,9762", "372", "392,4167", "422,1875", "437,25",
"448,4762", "470,9", "5,583333", "5,751073", "6,5", "6,7",
"6,782609", "75", "82,27848", "88,09524", "9,370629", "90,7173",
"91,30435", "91,33047", "92,5", "97,04641"), class = "factor")), class = "data.frame", row.names = c(NA,
-29L))
Given the following dataframe i would like to remove all rows with at least x percent (e.g 50%) of values = 0 in at least one group.
For example if a row has less than 50% of values in both groups (control and treatment) it will be removed.
If the row has 50% of non zero value in group control(or treatment) and no values in the other group it will be kept since there is still one group with at least 50% values.
Hope it´s clear.
treatment control control treatment control treatment
row1 0 21 21 21 45 34
row2 0 21 78 321 93 0
row3 34 32 98 87 34 0
row4 75 21 12 54 45 34
row5 46 21 13 45 0 0
row6 85 21 87 45 0 23
row7 24 84 0 0 45 5
row8 87 21 0 98 87 76
row9 43 2 0 45 12 9
row10 12 12 0 0 23 0
Here below the dataframe
df <- structure(list(structure(c(1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
2L), .Label = c("row1", "row10", "row2", "row3", "row4", "row5",
"row6", "row7", "row8", "row9"), class = "factor"), treatment = c(0L,
0L, 34L, 75L, 46L, 85L, 24L, 87L, 43L, 12L), control = c(21L,
21L, 32L, 21L, 21L, 21L, 84L, 21L, 2L, 12L), control = c(21L,
78L, 98L, 12L, 13L, 87L, 0L, 0L, 0L, 0L), treatment = c(21L,
321L, 87L, 54L, 45L, 45L, 0L, 98L, 45L, 0L), control = c(45L,
93L, 34L, 45L, 0L, 0L, 45L, 87L, 12L, 23L), treatment = c(34L,
0L, 0L, 34L, 0L, 23L, 5L, 76L, 9L, 0L)), .Names = c("", "treatment",
"control", "control", "treatment", "control", "treatment"), class = "data.frame", row.names = c(NA,
-10L))
Based on what you want, if a row has more than 3 "0", you want to remove the row.
rownames(df) <- df[,1]
df <- df[,-1]
df <- df[apply(df, 1, FUN = function(x){sum(x == 0)}) < 3,]
Row 10 is removed.
I have the following list-based data-frame:
df <- structure(c(5L, 300L, 251L, 42L, 187L, 16L, 2L, 249L, 158L, 17L, 77L,
3L, 2L, 166L, 92L, 16L, 86L, 6L, 5L, 104L, 82L, 17L, 37L, 3L, 1L, 248L,
239L, 10L, 81L, 2L, 0L, 136L, 107L, 6L, 24L, 3L, 6L, 164L, 147L, 18L, 83L,
3L, 1L, 121L, 96L, 1L, 57L, 2L, 0L, 191L, 153L, 15L, 98L, 3L, 5L, 187L,
200L, 8L, 83L, 2L, 1L, 289L, 211L, 19L, 113L, 3L, 2L, 169L, 80L, 13L, 48L,
1L), .Dim = c(6L, 12L), .Dimnames = list(c("0610005C13Rik", "0610007P14Rik",
"0610009B22Rik", "0610009L18Rik", "0610009O20Rik", "0610010B08Rik"), c("control",
"control", "control", "control", "control", "control", "treated", "treated",
"treated", "treated", "treated", "treated")))
str(df)
#> int [1:6, 1:12] 5 300 251 42 187 16 2 249 158 17 ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : chr [1:6] "0610005C13Rik" "0610007P14Rik" "0610009B22Rik" "0610009L18Rik" ...
#> ..$ : chr [1:12] "control" "control" "control" "control" ...
df
#> control control control control control control treated
#> 0610005C13Rik 5 2 2 5 1 0 6
#> 0610007P14Rik 300 249 166 104 248 136 164
#> 0610009B22Rik 251 158 92 82 239 107 147
#> 0610009L18Rik 42 17 16 17 10 6 18
#> 0610009O20Rik 187 77 86 37 81 24 83
#> 0610010B08Rik 16 3 6 3 2 3 3
#> treated treated treated treated treated
#> 0610005C13Rik 1 0 5 1 2
#> 0610007P14Rik 121 191 187 289 169
#> 0610009B22Rik 96 153 200 211 80
#> 0610009L18Rik 1 15 8 19 13
#> 0610009O20Rik 57 98 83 113 48
#> 0610010B08Rik 2 3 2 3 1
What I want to do is to convert row names into column I get this error:
> df$gene_symbol <- rownames(df)
Warning message:
In df$gene_symbol <- rownames(df) : Coercing LHS to a list
What's the right way to do it?
First of all, as pointed out in comments, df is a matrix not a data frame. You can check that:
> class(df)
#[1] "matrix"
> typeof(df)
#[1] "integer"
So, as you see, df is a matrix of integers. Note that the data in matrix can only be of the same type while in data frame, you can store data of different types. This is how matrix and data frame objects are different.
To achieve what you are after, you can convert matrix df to a data frame and then do what you want:
newdf <- data.frame(df)
> class(newdf)
#[1] "data.frame"
newdf$gene_symbol <- rownames(newdf)
Done!
I have a list of 200 variables and I want to sum those that are highly correlated.
Assuming this is my data
mydata <- structure(list(APPLE= c(1L, 2L, 5L, 4L, 366L, 65L, 43L, 456L, 876L, 78L, 687L, 378L, 378L, 34L, 53L, 43L),
PEAR= c(2L, 2L, 5L, 4L, 366L, 65L, 43L, 456L, 876L, 78L, 687L, 378L, 378L, 34L, 53L, 41L),
PLUM = c(10L, 20L, 10L, 20L, 10L, 20L, 1L, 0L, 1L, 2010L,20L, 10L, 10L, 10L, 10L, 10L),
BANANA= c(2L, 10L, 31L, 2L, 2L, 5L, 2L, 5L, 1L, 52L, 1L, 2L, 52L, 6L, 2L, 1L),
LEMON = c(4L, 10L, 31L, 2L, 2L, 5L, 2L, 5L, 1L, 52L, 1L, 2L, 52L, 6L, 2L, 3L)),
.Names = c("APPLE", "PEAR", "PLUM", "BANANA", "LEMON"),
class = "data.frame", row.names = c(NA,-16L))
I have found this code which I am not sure how to tweak in order to leverage it for my purpose
https://stackoverflow.com/a/39484353/4797853
var.corelation <- cor(as.matrix(mydata), method="pearson")
library(igraph)
# prevent duplicated pairs
var.corelation <- var.corelation*lower.tri(var.corelation)
check.corelation <- which(var.corelation>0.62, arr.ind=TRUE)
graph.cor <- graph.data.frame(check.corelation, directed = FALSE)
groups.cor <- split(unique(as.vector(check.corelation)), clusters(graph.cor)$membership)
lapply(groups.cor,FUN=function(list.cor){rownames(var.corelation)[list.cor]})
The output that I am looking for is 2 data frames as follow:
DF1
GROUP1 GROUP2
3 16
4 40
ETC..
The values are the sum of the values within a group
DF2
ORIGINAL_VAR GROUP
APPLE 1
PEAR 1
PLUM 2
BANANA 2
LEMON 2
Try this (assuming that you have only clustered into 2 groups):
DF1 <- cbind.data.frame(GROUP1=rowSums(mydata[,groups.cor[[1]]]),
GROUP2=rowSums(mydata[,groups.cor[[2]]]))
DF1
GROUP1 GROUP2
1 3 16
2 4 40
3 10 72
4 8 24
5 732 14
6 130 30
7 86 5
8 912 10
9 1752 3
10 156 2114
11 1374 22
12 756 14
13 756 114
14 68 22
15 106 14
16 84 14
DF2 <- NULL
for (i in 1:2) {
DF2 <- rbind(DF2,
cbind.data.frame(ORIGINAL_VAR=rownames(var.corelation)[groups.cor[[i]]],
GROUP=i))
}
DF2
ORIGINAL_VAR GROUP
1 PEAR 1
2 APPLE 1
3 BANANA 2
4 LEMON 2
5 PLUM 2
This is a question for an R Programming class, but I have been working on it for several hours, over a period of a few days. I have done internet searches and referenced three different books. I have tried very hard to solve it on my own. I am finally asking for help.
I was given a csv, which I read into the program. This is the resulting dataframe, named df:
name hw0 hw1 hw2 hw3 hw4 hw5 hw6 quiz1 quiz2 quiz3 quiz4 quiz5 quiz6 term1
1 20 14 30 100 50 60 36 12 15 30 15 25 25 100
2 A 20 13 30 100 50 60 30 11 15 0 14 25 25 100
3 B 20 14 30 100 50 60 36 8 11 24 8 13 9 95
4 C 20 14 28 100 50 60 36 12 4 25 13 24 14 95
5 D 20 12 30 100 50 0 33 7 15 26 12 22 0 100
6 E 20 14 30 90 30 0 0 10 15 30 15 21 15 100
7 F 20 13 30 100 48 0 36 12 15 30 15 25 23 95
8 G 20 14 26 85 40 42 33 11 15 23 11 17 16 90
9 H 20 0 0 85 50 0 0 0 15 0 0 15 10 85
10 I 20 14 15 0 10 48 30 11 0 27 11 14 16 60
11 J 20 14 29 80 35 0 36 11 13 24 12 14 0 70
12 K 20 14 29 97 50 60 36 4 7 19 11 20 15 100
13 L 20 14 30 100 45 0 36 10 6 26 8 16 7 80
14 M 20 14 30 100 50 60 36 7 15 28 14 25 25 100
15 N 20 11 0 95 20 0 0 8 14 26 7 9 0 95
16 O 20 12 28 97 0 40 0 11 10 27 11 15 15 70
17 P 20 13 0 90 45 0 20 4 13 30 10 20 17 90
18 Q 20 14 30 100 45 0 36 0 12 21 11 14 17 75
term2 term3 exam1 exam2 exam3 final
1 100 100 100 100 95 100
2 100 100 97 97 80 97
3 100 100 83 85 73 73
4 100 100 88 75 56 77
5 100 0 90 87 72 81
6 100 80 92 82 69 79
7 100 100 90 95 87 90
8 100 0 89 79 81 78
9 90 100 62 83 42 75
10 90 72 78 78 66 81
11 0 0 79 77 51 78
12 100 100 79 77 57 81
13 0 100 68 74 76 76
14 100 100 99 98 82 99
15 0 0 70 70 52 61
16 0 0 63 66 0 0
17 100 100 75 72 56 64
18 90 75 72 84 54 63
QUESTION:
checkStudent <- function(df, studentName);
This function extracts a particular student's grades data from a data frame and returns them.
REQUIRED FORMAT:
checkStudent <- function(df, studentName)
{
}
TIPS PROVIDED:
inputs:
df -- a data frame that contains all the grades data
studentName -- name of a student
return:
all the grades for the student whose name is given as studentName
purpose:
extracting a particular student's grades data from a data frame and returning them
PROJECT TESTER- line of code and expected results:
checkStudent(df,"A")
name hw0 hw1 hw2 hw3 hw4 hw5 hw6 quiz1 quiz2 quiz3 quiz4 quiz5
2 A 20 13 30 100 50 60 30 11 15 0 14 25
quiz6 term1 term2 term3 exam1 exam2 exam3 final
2 25 100 100 100 97 97 80 97
I feel like I have been given everything and still can't get it right. I have tried:
checkStudent <- function(df, studentName)
{
name <- studentName
df["name", ]
}
and
checkStudent <- function(df, studentName)
{
subset(df, "name" == studentName, 1:21)
}
and numerous other lines of code, too many to list.
Please help. I am truly stuck.
Again, this needs to be done strictly in R. If it matters, I'm using RStudio. Thank you so much.
You're really close.
Variables in R should never be encapsulated in quotes, but always are free standing. Additionally your code is just printing the row, it is not returning it.
Here's a slightly modify version of your first attempt, without the quotes.
checkStudent <- function(df, studentName)
{
name <- studentName
return(df[name, ])
}
Edit: Oops, I realized your rows aren't named as the students.
You'll need to make it more like this:
checkStudent <- function(df, studentName)
{
my_row <- which(df$name == studentName)
return(df[my_row, ])
}
Try with logical subsetting:
checkStudent <- function(x,y) x[x['name']==y,]
Test:
checkStudent(df,"A")
# name hw0 hw1 hw2 hw3 hw4 hw5 hw6 quiz1 quiz2 quiz3 quiz4 quiz5 quiz6 term1 term2 term3 exam1 exam2 exam3 final
#1 A 20 13 30 100 50 60 30 11 15 0 14 25 25 100 100 100 97 97 80 97
data:
df <- structure(list(name = structure(1:17, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
"Q"), class = "factor"), hw0 = c(20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), hw1 = c(13L,
14L, 14L, 12L, 14L, 13L, 14L, 0L, 14L, 14L, 14L, 14L, 14L, 11L,
12L, 13L, 14L), hw2 = c(30L, 30L, 28L, 30L, 30L, 30L, 26L, 0L,
15L, 29L, 29L, 30L, 30L, 0L, 28L, 0L, 30L), hw3 = c(100L, 100L,
100L, 100L, 90L, 100L, 85L, 85L, 0L, 80L, 97L, 100L, 100L, 95L,
97L, 90L, 100L), hw4 = c(50L, 50L, 50L, 50L, 30L, 48L, 40L, 50L,
10L, 35L, 50L, 45L, 50L, 20L, 0L, 45L, 45L), hw5 = c(60L, 60L,
60L, 0L, 0L, 0L, 42L, 0L, 48L, 0L, 60L, 0L, 60L, 0L, 40L, 0L,
0L), hw6 = c(30L, 36L, 36L, 33L, 0L, 36L, 33L, 0L, 30L, 36L,
36L, 36L, 36L, 0L, 0L, 20L, 36L), quiz1 = c(11L, 8L, 12L, 7L,
10L, 12L, 11L, 0L, 11L, 11L, 4L, 10L, 7L, 8L, 11L, 4L, 0L), quiz2 = c(15L,
11L, 4L, 15L, 15L, 15L, 15L, 15L, 0L, 13L, 7L, 6L, 15L, 14L,
10L, 13L, 12L), quiz3 = c(0L, 24L, 25L, 26L, 30L, 30L, 23L, 0L,
27L, 24L, 19L, 26L, 28L, 26L, 27L, 30L, 21L), quiz4 = c(14L,
8L, 13L, 12L, 15L, 15L, 11L, 0L, 11L, 12L, 11L, 8L, 14L, 7L,
11L, 10L, 11L), quiz5 = c(25L, 13L, 24L, 22L, 21L, 25L, 17L,
15L, 14L, 14L, 20L, 16L, 25L, 9L, 15L, 20L, 14L), quiz6 = c(25L,
9L, 14L, 0L, 15L, 23L, 16L, 10L, 16L, 0L, 15L, 7L, 25L, 0L, 15L,
17L, 17L), term1 = c(100L, 95L, 95L, 100L, 100L, 95L, 90L, 85L,
60L, 70L, 100L, 80L, 100L, 95L, 70L, 90L, 75L), term2 = c(100L,
100L, 100L, 100L, 100L, 100L, 100L, 90L, 90L, 0L, 100L, 0L, 100L,
0L, 0L, 100L, 90L), term3 = c(100L, 100L, 100L, 0L, 80L, 100L,
0L, 100L, 72L, 0L, 100L, 100L, 100L, 0L, 0L, 100L, 75L), exam1 = c(97L,
83L, 88L, 90L, 92L, 90L, 89L, 62L, 78L, 79L, 79L, 68L, 99L, 70L,
63L, 75L, 72L), exam2 = c(97L, 85L, 75L, 87L, 82L, 95L, 79L,
83L, 78L, 77L, 77L, 74L, 98L, 70L, 66L, 72L, 84L), exam3 = c(80L,
73L, 56L, 72L, 69L, 87L, 81L, 42L, 66L, 51L, 57L, 76L, 82L, 52L,
0L, 56L, 54L), final = c(97L, 73L, 77L, 81L, 79L, 90L, 78L, 75L,
81L, 78L, 81L, 76L, 99L, 61L, 0L, 64L, 63L)), .Names = c("name",
"hw0", "hw1", "hw2", "hw3", "hw4", "hw5", "hw6", "quiz1", "quiz2",
"quiz3", "quiz4", "quiz5", "quiz6", "term1", "term2", "term3",
"exam1", "exam2", "exam3", "final"), row.names = c(NA, -17L), class = "data.frame")