I have two data frames named "df" and "df1". what i want is merging df with df1 based on gender and district in such a way that after merging I only have one column of "prob.dis". more clearly, I want that if dis is 1, then the value from prob.dis1` should be used, and if dis is 5, then I want the value from prob.dis5.any help would be appreciated.
df<-
age gender dis
10 1 1
11 2 5
10 1 4
11 2 2
10 1 1
11 2 2
10 1 4
11 2 5
10 1 3
11 2 3
df1<-
age gender prob.dis1 prob.dis2 prob.dis3 prob.dis4 prob.dis5
10 1 0.0099 0.0124 0.0037 0.0176 0.1
11 2 0.0021 0.802 0.005 0.0029 0.2
Transform df1 into long format and join it with df on gender and dis:
library(dplyr)
library(tidyr)
df1 %>%
pivot_longer(
cols = matches("^prob.dis"),
names_to = c(".value", "dis"),
names_pattern = "([^0-9]+)([0-9]+)"
) %>%
mutate(dis = as.integer(dis)) %>%
select(-age) %>%
left_join(df, ., by = c("gender", "dis"))
age gender dis prob.dis
<dbl> <dbl> <dbl> <dbl>
1 10 1 1 0.0099
2 11 2 5 0.2
3 10 1 4 0.0176
4 11 2 2 0.802
5 10 1 1 0.0099
6 11 2 2 0.802
7 10 1 4 0.0176
8 11 2 5 0.2
9 10 1 3 0.0037
10 11 2 3 0.005
data:
df <- structure(list(age = c(10, 11, 10, 11, 10, 11, 10, 11, 10, 11
), gender = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2), dis = c(1, 5, 4,
2, 1, 2, 4, 5, 3, 3)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
df1 <- structure(list(age = c(10, 11), gender = c(1, 2), prob.dis1 = c(0.0099,
0.0021), prob.dis2 = c(0.0124, 0.802), prob.dis3 = c(0.0037,
0.005), prob.dis4 = c(0.0176, 0.0029), prob.dis5 = c(0.1, 0.2
)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame")
)
Update:
pivot_longer df2
right_join with df1
library(dplyr)
library(tidyr)
df1 <- df2 %>%
pivot_longer(
cols = starts_with("prob"),
names_to = "dis",
values_to = "prob.dis"
) %>%
mutate(dis = as.numeric(gsub("\\D", "", dis))) %>%
right_join(df1, by = c("age", "gender", "dis"))
Output:
age gender dis prob.dis
<int> <int> <dbl> <dbl>
1 10 1 1 0.0099
2 10 1 1 0.0099
3 10 1 3 0.0037
4 10 1 4 0.0176
5 10 1 4 0.0176
6 11 2 2 0.802
7 11 2 2 0.802
8 11 2 3 0.005
9 11 2 5 0.2
10 11 2 5 0.2
In case dis is only ranging from 1 to 5 and is sorted in the columns of df2 you can use match to find the row and use 2 + df$dis to get the column, which can be subseted with a matrix produced with cbind.
df$prop.dis <- df1[cbind(match(df$gender, df1$gender), 2 + df$dis)]
df
# age gender dis prop.dis
#1 10 1 1 0.0099
#2 11 2 5 0.2000
#3 10 1 4 0.0176
#4 11 2 2 0.8020
#5 10 1 1 0.0099
#6 11 2 2 0.8020
#7 10 1 4 0.0176
#8 11 2 5 0.2000
#9 10 1 3 0.0037
#10 11 2 3 0.0050
or using the names of df1 to match the colums:
df$prop.dis <- df1[cbind(match(df$gender, df1$gender)
, match(paste0("prob.dis", df$dis), names(df1)))]
In case also age should be matched use in addition interaction:
M <- c("age", "gender")
df$prop.dis <- df1[cbind(match(interaction(df[M]), interaction(df1[M])), 2 + df$dis)]
Related
I need to flag an id when they have different grade values in the grade columns. Here how my sample dataset looks like
df <- data.frame(id = c(11,22,33,44,55),
grade.1 = c(3,4,5,6,7),
grade.2 = c(3,4,5,NA,7),
grade.3 = c(4,4,6,5,7),
grade.4 = c(NA,NA,NA, 5, 7 ))
df$Grade <- paste0(df$grade.1, df$grade.2, df$grade.3, df$grade.4)
> df
id grade.1 grade.2 grade.3 grade.4 Grade
1 11 3 3 4 NA 334NA
2 22 4 4 4 NA 444NA
3 33 5 5 6 NA 556NA
4 44 6 NA 5 5 6NA55
5 55 7 7 7 7 7777
When an id has different grade values in grade.1 grade.2 grade.3 and grade.4, that row needs to be flagged. Having NA in that column does not affect the flagging.
In other words, if the Grade column at the end has any differential numbers, that id needs to be flagged.
My desired output should look like this:
> df
id grade.1 grade.2 grade.3 grade.4 flag
1 11 3 3 4 NA flagged
2 22 4 4 4 NA Not_flagged
3 33 5 5 6 NA flagged
4 44 6 NA 5 5 flagged
5 55 7 7 7 7 Not_flagged
Any ideas?
Thanks!
A base R solution using rle omitting NA values.
df$flag <- apply(df[,2:5], 1, function(x)
ifelse(length(rle(x[!is.na(x)])$lengths)==1, "not_flagged", "flagged"))
df
id grade.1 grade.2 grade.3 grade.4 flag
1 11 3 3 4 NA flagged
2 22 4 4 4 NA not_flagged
3 33 5 5 6 NA flagged
4 44 6 NA 5 5 flagged
5 55 7 7 7 7 not_flagged
Data
df <- structure(list(id = c(11, 22, 33, 44, 55), grade.1 = c(3, 4,
5, 6, 7), grade.2 = c(3, 4, 5, NA, 7), grade.3 = c(4, 4, 6, 5,
7), grade.4 = c(NA, NA, NA, 5, 7)), class = "data.frame", row.names = c(NA,
-5L))
Here is a base R approach.
df$flag <- c("not_flagged", "flagged")[
apply(df[-1L], 1L, \(x) length( (ux <- unique(x))[!is.na(ux)] ) > 1L) + 1L
]
Output
> df
id grade.1 grade.2 grade.3 grade.4 flag
1 11 3 3 4 NA flagged
2 22 4 4 4 NA not_flagged
3 33 5 5 6 NA flagged
4 44 6 NA 5 5 flagged
5 55 7 7 7 7 not_flagged
A possible solution:
library(tidyverse)
df <- data.frame(id = c(11,22,33,44,55),
grade.1 = c(3,4,5,6,7),
grade.2 = c(3,4,5,NA,7),
grade.3 = c(4,4,6,5,7),
grade.4 = c(NA,NA,NA, 5, 7 ))
df %>%
rowwise %>%
mutate(flag = if_else(length(unique(na.omit(c_across(2:5)))) == 1,
"not-flagged", "flagged")) %>% ungroup
#> # A tibble: 5 × 6
#> id grade.1 grade.2 grade.3 grade.4 flag
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 11 3 3 4 NA flagged
#> 2 22 4 4 4 NA not-flagged
#> 3 33 5 5 6 NA flagged
#> 4 44 6 NA 5 5 flagged
#> 5 55 7 7 7 7 not-flagged
Using data.table::uniqueN, that counts the number of unique elements in a vector (and that allows for NA removal):
library(data.table)
library(dplyr)
df %>%
rowwise %>%
mutate(flag = if_else(uniqueN(c_across(2:5), na.rm = T) == 1,
"not-flagged", "flagged")) %>% ungroup
n_distinct from dyplr is very helpful: Here a version using a combination of pivot_longer and pivot_wider:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(
-c(id, Grade),
names_to = "name",
values_to = "value"
) %>%
group_by(id) %>%
mutate(flag = ifelse(n_distinct(value, na.rm = TRUE)==1, "Not flagged", "Flagged")) %>%
pivot_wider(
names_from = name,
values_from = value
)
id Grade flag grade.1 grade.2 grade.3 grade.4
<dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 11 334NA Flagged 3 3 4 NA
2 22 444NA Not flagged 4 4 4 NA
3 33 556NA Flagged 5 5 6 NA
4 44 6NA55 Flagged 6 NA 5 5
5 55 7777 Not flagged 7 7 7 7
I have a dataset that looks like this:
if I want to get the subtotal by subject as well as for each date, and also arrange subject based on total number of the subject, what should I do?
The final output should looks like following (blue part is the one we need to add, and also total ELA (23) <total Math (47), so ELA will be in front of math):
The sample table can be build using codes:
df <- structure(list(Subject = c("Math", "Math", "Math", "Math", "ELA",
"ELA", "ELA"), date = c(1, 7, 14, 21, 1, 7, 21), A = c(1, 2,
0, 9, 2, 6, 0), B = c(3, 5, 5, 1, 0, 5, 0), C = c(2, 1, 0, 8,
0, 0, 0), D = c(0, 0, 2, 8, 0, 8, 2)), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame"))
Use package Janitor to do it fastly
df %>% mutate(date = as.character(date)) %>%
group_split(Subject) %>%
map_df(., janitor::adorn_totals, fill = "All Dates", name = "Sub-Total") %>%
adorn_totals(where = "col")
Subject date A B C D Total
ELA 1 2 0 0 0 2
ELA 7 6 5 0 8 19
ELA 21 0 0 0 2 2
Sub-Total All Dates 8 5 0 10 23
Math 1 1 3 2 0 6
Math 7 2 5 1 0 8
Math 14 0 5 0 2 7
Math 21 9 1 8 8 26
Sub-Total All Dates 12 14 11 10 47
If you won't change column date into a charachter one, it will be totalled also
Here is a base R solution. The main functions are
by and addmargins, to compute the totals per groups of Subject and the row totals;
a second loop (lapply) to put the column totals as the first row.
The rest of the code puts everything together.
res <- by(df[-1], df[1], FUN = function(x){
x <- as.matrix(x)
rownames(x) <- x[, 1]
addmargins(x[, -1], margin = 1:2)
})
res <- lapply(seq_along(res), function(i){
x <- as.data.frame(res[[i]])
row.names(x)[row.names(x) == "Sum"] <- "All dates"
y <- cbind.data.frame(Subject = names(res)[i], date = row.names(x), x)
names(y)[ncol(y)] <- "Total"
y[order(y[["Total"]], decreasing = TRUE), ]
})
i <- sapply(res, '[', 1, "Total")
res <- do.call(rbind.data.frame, res[order(i, decreasing = TRUE)])
row.names(res) <- NULL
res
# Subject date A B C D Total
#1 Math All dates 12 14 11 10 47
#2 Math 21 9 1 8 8 26
#3 Math 7 2 5 1 0 8
#4 Math 14 0 5 0 2 7
#5 Math 1 1 3 2 0 6
#6 ELA All dates 8 5 0 10 23
#7 ELA 7 6 5 0 8 19
#8 ELA 1 2 0 0 0 2
#9 ELA 21 0 0 0 2 2
Here's a dplyr way :
library(dplyr)
c_order <- c('All dates', 1, 7, 14, 21)
df %>%
group_by(Subject) %>%
summarise(across(A:D, sum)) %>%
mutate(date = 'All dates', .after = 'Subject') %>%
bind_rows(df %>% mutate(date = as.character(date))) %>%
arrange(Subject, match(date, c_order)) %>%
mutate(Total = rowSums(select(., A:D)))
# Subject date A B C D Total
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 ELA All dates 8 5 0 10 23
#2 ELA 1 2 0 0 0 2
#3 ELA 7 6 5 0 8 19
#4 ELA 21 0 0 0 2 2
#5 Math All dates 12 14 11 10 47
#6 Math 1 1 3 2 0 6
#7 Math 7 2 5 1 0 8
#8 Math 14 0 5 0 2 7
#9 Math 21 9 1 8 8 26
First for each Subject sum columns A:D and add a column 'date' with value 'All Dates'. Bind this to original dataframe and arrange the data according to required order and perform a rowwise sum.
Does this work:
library(dplyr)
library(tidyr)
df %>% rowwise() %>% mutate(Total = sum(c_across(A:D))) %>%
bind_rows(df %>% rowwise() %>% mutate(Total = sum(c_across(A:D))) %>% group_by(Subject) %>% summarise_at(vars(A:Total), sum)) %>%
mutate(date = replace_na(date, 'All Dates')) %>% arrange(Subject, desc(Total))
# A tibble: 9 x 7
# Rowwise:
Subject date A B C D Total
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ELA All Dates 8 5 0 10 23
2 ELA 7 6 5 0 8 19
3 ELA 1 2 0 0 0 2
4 ELA 21 0 0 0 2 2
5 Math All Dates 12 14 11 10 47
6 Math 21 9 1 8 8 26
7 Math 7 2 5 1 0 8
8 Math 14 0 5 0 2 7
9 Math 1 1 3 2 0 6
This question already has answers here:
Why are my dplyr group_by & summarize not working properly? (name-collision with plyr)
(5 answers)
Closed 2 years ago.
I would like to calculate a rolling sum (or a custom function) of 3 previous values, treating each group separately. I have tried this:
require(dplyr)
# Build dataframe
df <- data.frame(person = c(rep("Peter", 5), rep("James", 5)),
score1 = c(1,3,2,5,4,6,8,4,5,3),
score2 = c(1,1,1,5,1,3,4,8,9,0))
# Attempt rolling sum by group
df %>%
group_by(person) %>%
mutate(s1_rolling = rollsumr(score1, k = 3, fill = NA),
s2_rolling = rollsumr(score2, k = 3, fill = NA))
But the new columns do not treat each group separately, instead continuing down the whole dataset:
person score1 score2 s1_rolling s2_rolling
<chr> <dbl> <dbl> <dbl> <dbl>
1 Peter 1 1 NA NA
2 Peter 3 1 NA NA
3 Peter 2 1 6 3
4 Peter 5 5 10 7
5 Peter 4 1 11 7
6 James 6 3 15 9
7 James 8 4 18 8
8 James 4 8 18 15
9 James 5 9 17 21
10 James 3 0 12 17
I would like row 6 and 7 to show NA in the two new columns, because until row 8 there is insufficient James data to sum 3 rows.
How can I do this?
It could be that plyr was also loaded and the mutate from plyr masked the mutate from dplyr. We could use dplyr::mutate
library(dplyr)
library(zoo)
df %>%
group_by(person) %>%
dplyr::mutate(s1_rolling = rollsumr(score1, k = 3, fill = NA),
s2_rolling = rollsumr(score2, k = 3, fill = NA))
# A tibble: 10 x 5
# Groups: person [2]
# person score1 score2 s1_rolling s2_rolling
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 Peter 1 1 NA NA
# 2 Peter 3 1 NA NA
# 3 Peter 2 1 6 3
# 4 Peter 5 5 10 7
# 5 Peter 4 1 11 7
# 6 James 6 3 NA NA
# 7 James 8 4 NA NA
# 8 James 4 8 18 15
# 9 James 5 9 17 21
#10 James 3 0 12 17
If there are more than one column, we can also use across
df %>%
group_by(person) %>%
dplyr::mutate(across(starts_with('score'),
~ rollsumr(., k = 3, fill = NA), .names = '{col}_rolling'))
For a faster version, use RcppRoll::roll_sumr
df %>%
group_by(person) %>%
dplyr::mutate(across(starts_with('score'),
~ RcppRoll::roll_sumr(., 3, fill = NA), .names = '{col}_rolling'))
The behavior can be reproduced with plyr::mutate
df %>%
group_by(person) %>%
plyr::mutate(s1_rolling = rollsumr(score1, k = 3, fill = NA),
s2_rolling = rollsumr(score2, k = 3, fill = NA))
# A tibble: 10 x 5
# Groups: person [2]
# person score1 score2 s1_rolling s2_rolling
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 Peter 1 1 NA NA
# 2 Peter 3 1 NA NA
# 3 Peter 2 1 6 3
# 4 Peter 5 5 10 7
# 5 Peter 4 1 11 7
# 6 James 6 3 15 9
# 7 James 8 4 18 8
# 8 James 4 8 18 15
# 9 James 5 9 17 21
#10 James 3 0 12 17
I would suggest a slider approach with slide_dbl() function with works similar to zoo and it is compatible with dplyr:
library(slider)
library(dplyr)
#Code
# Build dataframe
df <- data.frame(person = c(rep("Peter", 5), rep("James", 5)),
score1 = c(1,3,2,5,4,6,8,4,5,3),
score2 = c(1,1,1,5,1,3,4,8,9,0))
# Attempt rolling sum by group
df %>%
group_by(person) %>%
mutate(s1_rolling = slide_dbl(score1, sum, .before = 2, .complete = TRUE),
s2_rolling = slide_dbl(score2, sum, .before = 2, .complete = TRUE))
Output:
# A tibble: 10 x 5
# Groups: person [2]
person score1 score2 s1_rolling s2_rolling
<fct> <dbl> <dbl> <dbl> <dbl>
1 Peter 1 1 NA NA
2 Peter 3 1 NA NA
3 Peter 2 1 6 3
4 Peter 5 5 10 7
5 Peter 4 1 11 7
6 James 6 3 NA NA
7 James 8 4 NA NA
8 James 4 8 18 15
9 James 5 9 17 21
10 James 3 0 12 17
x y
1 1 1
2 3 2
3 2 3
4 3 4
5 2 5
6 4 6
7 5 7
8 2 8
9 1 9
10 1 10
11 3 11
12 4 12
The above is part of the input.
Let's suppose that it also has a bunch of other columns
I want to:
group_by x
summarise y by sum
And for all other columns, I want to summarise_all by just taking the first value
Here's an approach that breaks it into two problems and combines them:
library(dplyr)
left_join(
# Here we want to treat column y specially
df %>%
group_by(x) %>%
summarize(sum_y = sum(y)),
# Here we exclude y and use a different summation for all the remaining columns
df %>%
group_by(x) %>%
select(-y) %>%
summarise_all(first)
)
# A tibble: 5 x 3
x sum_y z
<int> <int> <int>
1 1 20 1
2 2 16 3
3 3 17 2
4 4 18 2
5 5 7 3
Sample data:
df <- read.table(
header = T,
stringsAsFactors = F,
text="x y z
1 1 1
3 2 2
2 3 3
3 4 4
2 5 1
4 6 2
5 7 3
2 8 4
1 9 1
1 10 2
3 11 3
4 12 4")
library(dplyr)
df1 %>%
group_by(x) %>%
summarise_each(list(avg = mean), -y) %>%
bind_cols(.,{df1 %>%
group_by(x) %>%
summarise_at(vars(y), funs(sum)) %>%
select(-x)
})
#> # A tibble: 5 x 4
#> x r_avg r.1_avg y
#> <int> <dbl> <dbl> <int>
#> 1 1 6.67 6.67 20
#> 2 2 5.33 5.33 16
#> 3 3 5.67 5.67 17
#> 4 4 9 9 18
#> 5 5 7 7 7
Created on 2019-06-20 by the reprex package (v0.3.0)
Data:
df1 <- read.table(text="
r x y
1 1 1
2 3 2
3 2 3
4 3 4
5 2 5
6 4 6
7 5 7
8 2 8
9 1 9
10 1 10
11 3 11
12 4 12", header=T)
df1 <- df1[,c(2,3,1,1)]
library(tidyverse)
df <- tribble(~x, ~y, # making a sample data frame
1, 1,
3, 2,
2, 3,
3, 4,
2, 5,
4, 6,
5, 7,
2, 8,
1, 9,
1, 10,
3, 11,
4, 12)
df <- df %>%
add_column(z = sample(1:nrow(df))) #add another column for the example
df
# If there is only one additional column and you need the first value
df %>%
group_by(x) %>%
summarise(sum_y = sum(y), z_1st = z[1])
# otherwise use summarise_at to address all the other columns
f <- function(x){x[1]} # function to extract the first value
df %>%
group_by(x) %>%
summarise_at(.vars = vars(-c('y')), .funs = f) # exclude column y from the calculations
How do I apply rollapplyr on the following data to allow it be sensitive to the date field? Because currently I am able to apply the rolling (blind to the date) over the dataset with eg. 4-quarters period and minimum of 2 observations in the 4 quarters.
#creating the data
set.seed(123)
data.frame(id=c(1,1,1,1,1,2,2,2,2,2),
date=as.Date(as.character(c(20040930, 20041231, 20050331, 20050630, 20050930, 20040930, 20050331, 20050630, 20051231, 20060331)), format = "%Y%m%d"),
col_a=round(runif(10, 0, 100),0),
col_b=round(runif(10, 0, 100),0))
id date col_a col_b
1 1 2004-09-30 3 10
2 1 2004-12-31 8 5
3 1 2005-03-31 4 7
4 1 2005-06-30 9 6
5 1 2005-09-30 9 1
6 2 2004-09-30 0 9
<missing>
7 2 2005-03-31 5 2
8 2 2005-06-30 9 0
<missing>
9 2 2005-12-31 6 3
10 2 2006-03-31 5 10
This is what I have attempted so far, but this will not take into consideration of the missing records, eg. id=2's 2005-09-30 record.
library(zoo)
data %>%
group_by(id) %>%
mutate(score = (col_a + col_b) / rollapplyr(col_b, 4, mean, fill=NA, by.column=TRUE, partial=2)) %>%
ungroup %>% select(id, date, col_a, col_b, score)
And this is what I got after applying the above function
id date col_a col_b score
<dbl> <date> <dbl> <dbl> <dbl>
1 1 2004-09-30 3 10 NA
2 1 2004-12-31 8 5 1.73
3 1 2005-03-31 4 7 1.5
4 1 2005-06-30 9 6 2.14
5 1 2005-09-30 9 1 2.11
6 2 2004-09-30 0 9 NA
7 2 2005-03-31 5 2 1.27
8 2 2005-06-30 9 0 2.45
9 2 2005-12-31 6 3 2.57
10 2 2006-03-31 5 10 4
However what I am expecting is it will take into consideration the missing quarters itself automatically. This is my expected output
id date col_a col_b score
<dbl> <date> <dbl> <dbl> <dbl>
1 1 2004-09-30 3 10 NA
2 1 2004-12-31 8 5 1.73
3 1 2005-03-31 4 7 1.5
4 1 2005-06-30 9 6 2.14
5 1 2005-09-30 9 1 2.11
6 2 2004-09-30 0 9 NA
<missing>
7 2 2005-03-31 5 2 1.27
8 2 2005-06-30 9 0 2.45
<missing>
9 2 2005-12-31 6 3 **5.4**
10 2 2006-03-31 5 10 **3.46**
Note that the "<missing>" will not be shown in the output, I just put for visual purpose. So eg. row 10 will only use row 8,9 and 10's records because the missing row is counted as a row too. How do I achieve that?
Note that eg. for row 10, n=3 should be used for the averaging not n=4 as it shouldn't include the missing rows.
One option would be to create the complete rows of 'date' for all 'id's before the group_by
library(tidyverse)
library(zoo)
complete(data, id, date, fill = list(col_a = 0, col_b = 0)) %>%
group_by(id) %>%
mutate(score = (col_a + col_b) /
rollapplyr(col_b, 4, sum, fill=NA, by.column=TRUE, partial=2)) %>%
ungroup %>%
select(id, date, col_a, col_b, score) %>%
right_join(data)
# A tibble: 10 x 5
# id date col_a col_b score
# <dbl> <date> <dbl> <dbl> <dbl>
# 1 1 2004-09-30 3 10 NA
# 2 1 2004-12-31 8 5 0.867
# 3 1 2005-03-31 4 7 0.5
# 4 1 2005-06-30 9 6 0.536
# 5 1 2005-09-30 9 1 0.526
# 6 2 2004-09-30 0 9 NA
# 7 2 2005-03-31 5 2 0.636
# 8 2 2005-06-30 9 0 0.818
# 9 2 2005-12-31 6 3 1.8
#10 2 2006-03-31 5 10 1.15
data
data <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
date = structure(c(12691,
12783, 12873, 12964, 13056, 12691, 12873, 12964, 13148, 13238
), class = "Date"), col_a = c(3, 8, 4, 9, 9, 0, 5, 9, 6, 5),
col_b = c(10, 5, 7, 6, 1, 9, 2, 0, 3, 10)), row.names = c(NA,
-10L), class = "data.frame")