newdata <- as_tibble( # valid values shown below
dvcat="10-24", # "1-9" "10-24" "25-39" "40-54" "55+"
seatbelt="none", # "none" "belted"
frontal="frontal", # "notfrontal" "frontal"
sex="f", # "f" "m"
ageOFocc=22, # age in years, 16-97
yearVeh=2002, # year of vehicle, 1955-2003
airbag="none", # "none" "airbag"
occRole="pass" # "driver" "pass"
)
dvcat seatbelt frontal sex ageOFocc yearVeh airbag occRole
1 10-24 none frontal f 22 2002 none pass
I want to generate the possible combination of the variables above and put them into a tibble dataframe.
For example, let's say I want to have a dataset with 3 rows. Randomly the value will be pick to create a new row.
dvcat seatbelt frontal sex ageOFocc yearVeh airbag occRole
1 10-24 none frontal f 22 2002 none pass
2 25-39 none frontal m 54 2010 none drive
3 40-54 belted frontal f 14 2016 airbag driver
If we have a list of values to pick, then use
library(purrr)
map_dfr(lst1, ~ sample(.x, 3, replace = TRUE))
# A tibble: 3 x 8
# dvcat seatbelt frontal sex ageOFocc yearVeh airbag occRole
# <chr> <chr> <chr> <chr> <int> <int> <chr> <chr>
#1 40-54 none notfrontal f 71 1997 none driver
#2 40-54 none frontal m 87 1974 airbag driver
#3 25-39 belted notfrontal m 56 2001 none driver
Or in base R
data.frame(lapply(lst1, sample, size = 3, replace = TRUE))
data
lst1 <- list(dvcat = c("1-9", "10-24", "25-39", "40-54", "55+"),
seatbelt = c("none",
"belted"), frontal = c("notfrontal", "frontal"), sex = c("f",
"m"), ageOFocc = 16:97, yearVeh = 1955:2003, airbag = c("none",
"airbag"), occRole = c("driver", "pass"))
Related
Okay, I hope I manage to sum up what I need to achieve. I am running experiments in which I obtain data from two different source, with a date_time being the matching unifying variable. The data in the two separate sources have the same structure (in csv or txt). The distinction is in the filenames. I provide an example below:
list_of_files <- structure(
list
(
solid_epoxy1_10 = data.frame(
date_time = c("20/07/2022 13:46",
"20/07/2022 13:56",
"20/07/2022 14:06"),
frequency = c("30000",
"31000",
"32000"),
index = c("1", "2", "3")
),
solid_otherpaint_20 = data.frame(
date_time = c("20/07/2022 13:10",
"20/07/2022 13:20",
"20/07/2022 14:30"),
frequency = c("20000",
"21000",
"22000"),
index = c("1", "2", "3")
),
water_epoxy1_10 = data.frame(
date_time = c("20/07/2022 13:46",
"20/07/2022 13:56",
"20/07/2022 14:06"),
temperature = c("22.3",
"22.6",
"22.5")
),
water_otherpaint_20 = data.frame(
date_time = c("20/07/2022 13:10",
"20/07/2022 13:20",
"20/07/2022 14:30"),
temperature = c("24.5",
"24.6",
"24.8")
)
)
)
First I want to read the data files in two separate lists. One that contains the keyword "solid" in the file name, and the other one that contains "water".
Then I need to create a new columns from the filename in each data frame that will be separated by "_" (e.g paint = "epox1", thickness = "10"), by which I could do an inner join by the date_time column, paint, thickness,etc. Basically what I struggle so far is to create a function that loads that files in two separate lists. This is what I've tried so far
load_files <-
function(list_of_files) {
all.files.board <- list()
all.files.temp <- list()
for (i in 1:length(list_of_files))
{
if (exists("board")) {
all.files.board[[i]] = fread(list_of_files[i])
}
else{
all.files.temp[[i]] = fread(list_of_files[i])
}
return(list(all.files.board, all.files.temp))
}
}
But it doesn't do what I need it. I hope I made it as clear as possible. I'm pretty comfortable with the tidyverse package but writing still a newbie in writing custom functions. Any ideas welcomed.
Regarding question in the title -
first issue, calling return() too early and thus breaking a for-loop, was already mentioned in comments and that should be sorted.
next one is condition itself, if (exists("board")){} checks if there is an object called board; in provided sample it would evaluate to TRUE only if something was assigned to global board object before calling load_files() function and it would evaluate to FALSE only if there were no such assignment or board was explicitly removed. I.e. with
board <- "something"; dataframes <- load_files(file_list) that check will be TRUE while with
rm(board); dataframes <- load_files(file_list) it will be FALSE, there's nothing in function itself that would change the "existance" of board, so the result is actually determined before calling the function.
If core of the question is about joining 2 somewhat different datasets and splitting result by groups, I'd just drop loops, conditions and most of involved lists and would go with something like this with Tidyverse:
library(fs)
library(readr)
library(stringr)
library(dplyr)
library(tidyr)
# prepare input files for sample ------------------------------------------
sample_dfs <- structure(
list
(
solid_epoxy1_10 = data.frame(
date_time = c("20/07/2022 13:46", "20/07/2022 13:56", "20/07/2022 14:06"),
frequency = c("30000", "31000", "32000"),
index = c("1", "2", "3")
),
solid_otherpaint_20 = data.frame(
date_time = c("20/07/2022 13:10", "20/07/2022 13:20", "20/07/2022 14:30"),
frequency = c("20000", "21000", "22000"),
index = c("1", "2", "3")
),
water_epoxy1_10 = data.frame(
date_time = c("20/07/2022 13:46", "20/07/2022 13:56", "20/07/2022 14:06"),
temperature = c("22.3", "22.6", "22.5")
),
water_otherpaint_20 = data.frame(
date_time = c("20/07/2022 13:10", "20/07/2022 13:20", "20/07/2022 14:30"),
temperature = c("24.5", "24.6", "24.8")
)
)
)
tmp_path <- file_temp("reprex")
dir_create(tmp_path)
sample_filenames <- str_glue("{1:length(sample_dfs)}_{names(sample_dfs)}.csv")
for (i in seq_along(sample_dfs)) {
write_csv(sample_dfs[[i]], path(tmp_path, sample_filenames[i]))
}
dir_ls(tmp_path, type = "file")
#> Temp/RtmpqUoct8/reprex5cc517f177b/1_solid_epoxy1_10.csv
#> Temp/RtmpqUoct8/reprex5cc517f177b/2_solid_otherpaint_20.csv
#> Temp/RtmpqUoct8/reprex5cc517f177b/3_water_epoxy1_10.csv
#> Temp/RtmpqUoct8/reprex5cc517f177b/4_water_otherpaint_20.csv
# read files --------------------------------------------------------------
t_solid <- dir_ls(tmp_path, glob = "*solid*.csv", type = "file") %>%
read_csv(id = "filename") %>%
extract(filename, c("paint", "thickness"), "_([^_]+)_(\\d+)\\.csv")
t_solid
#> # A tibble: 6 × 5
#> paint thickness date_time frequency index
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 epoxy1 10 20/07/2022 13:46 30000 1
#> 2 epoxy1 10 20/07/2022 13:56 31000 2
#> 3 epoxy1 10 20/07/2022 14:06 32000 3
#> 4 otherpaint 20 20/07/2022 13:10 20000 1
#> 5 otherpaint 20 20/07/2022 13:20 21000 2
#> 6 otherpaint 20 20/07/2022 14:30 22000 3
t_water <- dir_ls(tmp_path, glob = "*water*.csv", type = "file") %>%
read_csv(id = "filename") %>%
extract(filename, c("paint", "thickness"), "_([^_]+)_(\\d+)\\.csv")
t_water
#> # A tibble: 6 × 4
#> paint thickness date_time temperature
#> <chr> <chr> <chr> <dbl>
#> 1 epoxy1 10 20/07/2022 13:46 22.3
#> 2 epoxy1 10 20/07/2022 13:56 22.6
#> 3 epoxy1 10 20/07/2022 14:06 22.5
#> 4 otherpaint 20 20/07/2022 13:10 24.5
#> 5 otherpaint 20 20/07/2022 13:20 24.6
#> 6 otherpaint 20 20/07/2022 14:30 24.8
# or implement as a function ----------------------------------------------
load_files <- function(csv_path, glob = "*.csv") {
return(
dir_ls(csv_path, glob = glob, type = "file") %>%
# store filenames in filename column
read_csv(id = "filename", show_col_types = FALSE) %>%
# extract each regex group to its own column
extract(filename, c("paint", "thickness"), "_([^_]+)_(\\d+)\\.csv"))
}
# join / group / split ----------------------------------------------------
t_solid <- load_files(tmp_path, "*solid*.csv")
t_water <- load_files(tmp_path, "*water*.csv")
# either join by multiple columns or select only required cols
# to avoid x.* & y.* columns in result
inner_join(t_solid, t_water, by = c("date_time", "paint", "thickness")) %>%
group_by(paint) %>%
group_split()
Final result as a list of tibbles:
#> <list_of<
#> tbl_df<
#> paint : character
#> thickness : character
#> date_time : character
#> frequency : double
#> index : double
#> temperature: double
#> >
#> >[2]>
#> [[1]]
#> # A tibble: 3 × 6
#> paint thickness date_time frequency index temperature
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 epoxy1 10 20/07/2022 13:46 30000 1 22.3
#> 2 epoxy1 10 20/07/2022 13:56 31000 2 22.6
#> 3 epoxy1 10 20/07/2022 14:06 32000 3 22.5
#>
#> [[2]]
#> # A tibble: 3 × 6
#> paint thickness date_time frequency index temperature
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 otherpaint 20 20/07/2022 13:10 20000 1 24.5
#> 2 otherpaint 20 20/07/2022 13:20 21000 2 24.6
#> 3 otherpaint 20 20/07/2022 14:30 22000 3 24.8
I have two data sets I would like to join. The income_range data is the master dataset and I would like to join data_occ to the income_range data based on what band the income falls inside. Where there are more than two observations(incomes) that are within the range I would like to take the lower income.
I was attempting to use data.table but was having trouble. I was would also like to keep all columns from both data.frames if possible.
The output dataset should only have 7 observations.
library(data.table)
library(dplyr)
income_range <- data.frame(id = "France"
,inc_lower = c(10, 21, 31, 41,51,61,71)
,inc_high = c(20, 30, 40, 50,60,70,80)
,perct = c(1,2,3,4,5,6,7))
data_occ <- data.frame(id = rep(c("France","Belgium"), each=50)
,income = sample(10:80, 50)
,occ = rep(c("manager","clerk","manual","skilled","office"), each=20))
setDT(income_range)
setDT(data_occ)
First attempt.
df2 <- income_range [data_occ ,
on = .(id, inc_lower <= income, inc_high >= income),
.(id, income, inc_lower,inc_high,perct,occ)]
Thank you in advance.
Since you tagged dplyr, here's one possible solution using that library:
library('fuzzyjoin')
# join dataframes on id == id, inc_lower <= income, inc_high >= income
joined <- income_range %>%
fuzzy_left_join(data_occ,
by = c('id' = 'id', 'inc_lower' = 'income', 'inc_high' = 'income'),
match_fun = list(`==`, `<=`, `>=`)) %>%
rename(id = id.x) %>%
select(-id.y)
# sort by income, and keep only the first row of every unique perct
result <- joined %>%
arrange(income) %>%
group_by(perct) %>%
slice(1)
And the (intermediate) results:
> head(joined)
id inc_lower inc_high perct income occ
1 France 10 20 1 10 manager
2 France 10 20 1 19 manager
3 France 10 20 1 14 manager
4 France 10 20 1 11 manager
5 France 10 20 1 17 manager
6 France 10 20 1 12 manager
> result
# A tibble: 7 x 6
# Groups: perct [7]
id inc_lower inc_high perct income occ
<chr> <dbl> <dbl> <dbl> <int> <chr>
1 France 10 20 1 10 manager
2 France 21 30 2 21 manual
3 France 31 40 3 31 manual
4 France 41 50 4 43 manager
5 France 51 60 5 51 clerk
6 France 61 70 6 61 manager
7 France 71 80 7 71 manager
I've added the intermediate dataframe joined for easy of understanding. You can omit it and just chain the two command chains together with %>%.
Here is one data.table approach:
cols = c("inc_lower", "inc_high")
data_occ[, (cols) := income]
result = data_occ[order(income)
][income_range,
on = .(id, inc_lower>=inc_lower, inc_high<=inc_high),
mult="first"]
data_occ[, (cols) := NULL]
# id income occ inc_lower inc_high perct
# 1: France 10 clerk 10 20 1
# 2: France 21 manager 21 30 2
# 3: France 31 clerk 31 40 3
# 4: France 41 clerk 41 50 4
# 5: France 51 clerk 51 60 5
# 6: France 62 manager 61 70 6
# 7: France 71 manager 71 80 7
I have data in long format, and I'm trying to test each row against the mean of a certain grouping combination, in order to generate a new column with the conclusion from that test.
Example
In this toy example, I have data about 20 cars. Each car could be of one of three possible makers. We have mpg data for each car, measured 8 times: in the city or highway, in the morning or evening, during the winter or spring.
library(tidyr)
set.seed(2021)
df_id_and_makers <-
data.frame(id = 1:20,
maker = sample(c("toyota", "audi", "bmw"), size = 20, replace = TRUE))
df <- tidyr::expand_grid(df_id_and_makers,
road_type = c("city", "highway"),
time_of_day = c("morning", "evening"),
season = c("winter", "spring"))
df$mpg_val <- sample(15:40, size = nrow(df), replace = TRUE)
df
#> # A tibble: 160 x 6
#> id maker road_type time_of_day season mpg_val
#> <int> <chr> <chr> <chr> <chr> <int>
#> 1 1 bmw city morning winter 28
#> 2 1 bmw city morning spring 22
#> 3 1 bmw city evening winter 40
#> 4 1 bmw city evening spring 18
#> 5 1 bmw highway morning winter 19
#> 6 1 bmw highway morning spring 36
#> 7 1 bmw highway evening winter 30
#> 8 1 bmw highway evening spring 16
#> 9 2 audi city morning winter 33
#> 10 2 audi city morning spring 18
#> # ... with 150 more rows
Created on 2021-07-07 by the reprex package (v2.0.0)
I want to analyze this data to test my hypothesis that mpg in city is larger than mpg in highway. To this end, I want to create a new column that tests whether the value in mpg_val when road_type is city is larger than the mean of mpg_val across rows where road_type is highway. Furthermore, I want to compare just among cars of the same makers.
So, for example, id = 1 is bmw, and therefore the new column I want to compute should test each value of mpg_val in rows where road_type == city (i.e., rows 1-4, but not 5-6), and see whether mpg_val is larger than mean(mpg_val) in rows where road_type == highway and maker == bmw.
Expected output
Here's the manual and dumb way of doing this. I'll show only how I do this for maker = bmw for the sake of demonstration.
library(dplyr)
# step 1 -- calculate the mean of `mpg_val` for `road_type = highway` and only across bmw
mean_bmw_highway_mpg <-
df %>%
filter(maker == "bmw",
road_type == "highway") %>%
pull(mpg_val) %>%
mean()
mean_bmw_highway_mpg
## [1] 26.22222
# step 2 -- compare each row where `maker = "bmw"` and `road_type = "city"` for its `mpg_val` against `mean_bmw_highway_mpg`
result_bmw_only <-
df %>%
mutate(is_mpg_city_larger_than_mpg_highway = case_when(maker != "bmw" ~ "not_relevant",
road_type != "city" ~ "not_relevant",
mpg_val > mean_bmw_highway_mpg ~ "yes",
TRUE ~ "no"))
result_bmw_only
## # A tibble: 160 x 7
## id maker road_type time_of_day season mpg_val is_mpg_city_larger_than_mpg_highway
## <int> <chr> <chr> <chr> <chr> <int> <chr>
## 1 1 bmw city morning winter 28 yes ## because 28 > 26.222
## 2 1 bmw city morning spring 22 no ## because 22 < 26.222
## 3 1 bmw city evening winter 40 yes
## 4 1 bmw city evening spring 18 no
## 5 1 bmw highway morning winter 19 not_relevant
## 6 1 bmw highway morning spring 36 not_relevant
## 7 1 bmw highway evening winter 30 not_relevant
## 8 1 bmw highway evening spring 16 not_relevant
## 9 2 audi city morning winter 33 not_relevant
## 10 2 audi city morning spring 18 not_relevant
## # ... with 150 more rows
How could I achieve the same result as result_bmw_only (but applied to the entire df) in a more elegant way? Hopefully using dplyr approach, because this is what I'm used to, but otherwise any method will do.
Thanks!
EDIT 1
One solution I could think of involves purrr, but I can't get this done yet.
library(purrr)
solution_purrr <-
df %>%
group_by(maker) %>%
nest(data = -maker) %>%
mutate(tbl_with_desired_new_col = map(.x = data,
.f = ~ .x %>%
mutate(is_mpg_city_lrgr_thn_mpg_hwy = case_when(road_type != "city" ~ "not_relevant",
mpg_val > mean(mpg_val) ~ "yes",
TRUE ~ "no"))))
It seems that solution_purrr gets the desired output, but not exactly. This is because the second logic in case_when (i.e., mpg_val > mean(mpg_val) ~ "yes") is not what I want. I want to compare mpg_val to mean(mpg_val) when that mean is computed based only on rows where road_type == "highway". But here mean(mpg_val) computes across all rows.
EDIT 2
Based on #Till's answer below, I'd like to clarify that I'm looking for a solution that avoids a separate calculation of the mean we want to test against. What I did above with mean_bmw_highway_mpg is the undesired way of working towards the output. I showed mean_bmw_highway_mpg only for demonstrating the kind of mean I need to calculate.
What you tried is already close. Take a look at the documentation of dplyr::group_by()
it is designed for these kinds of operations.
Below is how you can expand your BMW-only solution to the full dataset using group_by().
library(tidyverse)
mean_highway_mpg_df <-
df %>%
filter(road_type == "highway") %>%
group_by(maker) %>%
summarise(mean_highway_mpg = mean(mpg_val))
result_df <-
df %>%
filter(road_type == "city") %>%
group_by(maker) %>%
left_join(mean_highway_mpg_df) %>%
mutate(mpg_city_higher_highway = mpg_val > mean_highway_mpg)
#> Joining, by = "maker"
result_df %>%
select(-(time_of_day:season))
#> # A tibble: 80 x 6
#> # Groups: maker [3]
#> id maker road_type mpg_val mean_highway_mpg mpg_city_higher_highway
#> <int> <chr> <chr> <int> <dbl> <lgl>
#> 1 1 bmw city 28 26.2 TRUE
#> 2 1 bmw city 22 26.2 FALSE
#> 3 1 bmw city 40 26.2 TRUE
#> 4 1 bmw city 18 26.2 FALSE
#> 5 2 audi city 33 28.1 TRUE
#> 6 2 audi city 18 28.1 FALSE
#> 7 2 audi city 35 28.1 TRUE
#> 8 2 audi city 36 28.1 TRUE
#> 9 3 audi city 25 28.1 FALSE
#> 10 3 audi city 32 28.1 TRUE
#> # … with 70 more rows
I think I got this. The following solution is based on both my EDIT 1 above, as well as #MrFlick's comment here.
First, we define a helper function:
is_x_larger_than_mean_y <- function(x, y) {
x > mean(y)
}
Then, we run:
library(dplyr)
library(purrr)
library(tidyr)
df %>%
group_by(maker) %>%
nest(data = -maker) %>%
mutate(tbl_with_desired_new_col = map(.x = data,
.f = ~ .x %>%
mutate(is_mpg_city_lrgr_thn_mpg_hwy = case_when(road_type != "city" ~ "not_relevant",
is_x_larger_than_mean_y(mpg_val, mpg_val[road_type == "highway"]) ~ "yes",
TRUE ~ "no")))) %>%
select(-data) %>%
unnest(cols = tbl_with_desired_new_col)
This way, the line within case_when() that says is_x_larger_than_mean_y(mpg_val, mpg_val[road_type == "highway"]) ~ "yes" ensures that we compute the mean of mpg_val only based on rows in which road_type == "highway".
right now I'm trying to create a data frame that contains the mean of two columns for two separate labels/categories.
But, I don't know how to calculate the mean for two columns, it just returns the same mean for both winner and opponent/loser.
Currently, I'm using the tidyverse library.
Here is the original data frame:
winner_hand winner_ht winner_ioc winner_age opponent_hand opponent_ht opponent_ioc opponent_age result name
<chr> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <fct> <chr>
R 178 JPN 29.00479 R NA RUS 22.88569 winner Kei Nishikori
R NA RUS 22.88569 R 188 FRA 33.70568 winner Daniil Medvedev
R 178 JPN 29.00479 R 188 FRA 31.88227 winner Kei Nishikori
R 188 FRA 33.70568 R NA AUS 19.86858 winner Jo Wilfried Tsonga
R NA RUS 22.88569 R 196 CAN 28.01095 winner Daniil Medvedev
R 188 FRA 31.88227 R NA JPN 26.40383 winner Jeremy Chardy
My code:
age_summary <- game_data %>%
group_by(result) %>%
summarize(mean_age = mean(winner_age))
age_summary
Resulting Data frame:
result mean_age
<fct> <dbl>
winner 27.68495
loser 27.68495
If you want summaries from two columns, you need expressions for each column in the call to summarize().
Example with fake data, since your excerpt only has one value for the 'result' column:
library(tidyverse)
dat <- read_csv(
"result, winner_age, opponent_age
A, 5, 10
A, 6, 11,
B, 12, 2
B, 13, 1")
dat %>%
group_by(result) %>%
# note: two expressions here:
summarise(mean_winner_age = mean(winner_age),
mean_opponent_age = mean(opponent_age))
output:
# A tibble: 2 x 3
result mean_winner_age mean_opponent_age
<chr> <dbl> <dbl>
1 A 5.5 10.5
2 B 12.5 1.5
I've been trying to extract data from a data frame that has a nested list, but after trying to use string function (str_detect) I can only work on the variable Abstract here is a sample of my data
{r setup, include=FALSE}
library(RISmed)
library(reticulate)
library(dplyr)
library(tibble)
library(stringr)
RCT_topic <- 'Randomized Clinical Trial'
RCT_query <- EUtilsSummary(RCT_topic, mindate=2005, maxdate=2015, retmax=2000)
summary(RCT_query)
RCT_records <- EUtilsGet(RCT_query)
RCT_data <- data_frame('PMID'=PMID(RCT_records),
'Title'=ArticleTitle(RCT_records),
'Abstract'=AbstractText(RCT_records),
'Year Published'=YearPubmed(RCT_records),
'Month Published'=MonthPubmed(RCT_records),
'Country'= Country(RCT_records),
'Grant' =GrantID(RCT_records),
'Acronym' =Acronym(RCT_records),
'Agency' =Agency(RCT_records),
'Mesh'=Mesh(RCT_records))
#Latino
RCT_data$Latino <- grepl("Latino|latino|Hispanic|hispanic",RCT_data$Abstract)
table(RCT_data$Latino)
RCT_true = RCT_data[RCT_data$Latino == "TRUE",]
RCT_true %>% str_detect("Hispanic Americans")
RCT_true %>% mutate(Latino_Mesh = ifelse(Mesh %>% str_detect("Latino|latino|Hispanic|hispanic"), "yes", "no"))
This code will have created a sub of 11 observations that R has read have the word for latino or hispanic in the variable Abstract, I'm trying to find out a way to have R read the variable Mesh but it doesn't seem to be able to read the whole nested list even when you see each variable you can clearly see that the word Hispanic American is there in the list
{r}
RCT_true$Mesh
I'm trying to find a way to so that R can read RCT_true$Mesh and return that Yes there is a word "Hispanic Americans" in it in a new column like I did before with the Abstract variable
Since RCT_true$Mesh are lists containing a data frame, each data frame should be evaluated with grepl function and save results in an indexer vector (i.e. RCT_true$Mesh_Latino):
lapply(RCT_true$Mesh, function(x){
any( grepl("(Latino|latino|Hispanic|hispanic)", as.character(x$Heading) ) )
}) %>%
unlist() %>%
as.logical() -> RCT_true$Mesh_Latino
RCT_true[RCT_true$Mesh_Latino == "TRUE",]
# # A tibble: 5 x 12
# PMID Title Abstract `Year Published` `Month Publishe… Country Grant Acronym Agency Mesh Latino Mesh_Latino
# <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <lis> <lgl> <lgl>
#1 2671… Beyo… "Within… 2015 12 United… CA01… CA NCI N… <dat… TRUE TRUE
#2 2670… Trea… OBJECTI… 2015 12 United… KL2 … TR NCATS… <dat… TRUE TRUE
#3 2669… Vali… "Resear… 2015 12 England NA NA NA <dat… TRUE TRUE
#4 2668… The … BACKGRO… 2015 12 United… K23 … MH NIMH … <dat… TRUE TRUE
#5 2665… Heal… BACKGRO… 2015 12 United… R01 … HL NHLBI… <dat… TRUE TRUE