How to unnest a list containing data frames - r

I'm trying to expand a nested column that contains a list of data frames. They are either NULL or 1 row by n columns, so the goal is to just add n columns to the tibble. (NULL list items would preferably expand to NAs).
I've tried several solutions including those from this answer.
The goal for the output would be a flat tibble with the following columns:
full_address, address, location.x, location.y, score, attributes.StreetName, attributes.Match_addr.
require(tidyverse)
#> Loading required package: tidyverse
df <- structure(list(full_address = c("2379 ADDISON BLVD, HIGH POINT, NC 27262",
"1751 W LEXINGTON AVE, HIGH POINT, NC 27262", "2514 WILLARD DAIRY RD, HIGH POINT, NC 27265",
"126 MARYWOOD DR, HIGH POINT, NC 27265", "508 EDNEY RIDGE RD, GREENSBORO, NC 27408"
), json = list(NULL, NULL, structure(list(address = "2514 WILLARD DAIRY",
location = structure(list(x = -79.9766181813648, y = 36.0477204695356), class = "data.frame", row.names = 1L),
score = 92.8, attributes = structure(list(StreetName = "WILLARD DAIRY",
Match_addr = "2514 WILLARD DAIRY"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L),
structure(list(address = "126 MARYWOOD, HIGH POINT", location = structure(list(
x = -80.0202617159213, y = 36.0077059145502), class = "data.frame", row.names = 1L),
score = 97.24, attributes = structure(list(StreetName = "MARYWOOD",
Match_addr = "126 MARYWOOD, HIGH POINT"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L),
structure(list(address = "508 EDNEY RIDGE RD", location = structure(list(
x = -79.840872836677, y = 36.1105523384593), class = "data.frame", row.names = 1L),
score = 100L, attributes = structure(list(StreetName = "EDNEY RIDGE",
Match_addr = "508 EDNEY RIDGE RD"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L))), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -5L))
df
#> # A tibble: 5 x 2
#> full_address json
#> <chr> <list>
#> 1 2379 ADDISON BLVD, HIGH POINT, NC 27262 <NULL>
#> 2 1751 W LEXINGTON AVE, HIGH POINT, NC 27262 <NULL>
#> 3 2514 WILLARD DAIRY RD, HIGH POINT, NC 27265 <data.frame [1 × 4]>
#> 4 126 MARYWOOD DR, HIGH POINT, NC 27265 <data.frame [1 × 4]>
#> 5 508 EDNEY RIDGE RD, GREENSBORO, NC 27408 <data.frame [1 × 4]>
df %>% unnest(json)
#> Error: Argument 2 can't be a list containing data frames
df %>% map(unlist) %>% as_data_frame()
#> Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
#> This warning is displayed once per session.
#> Tibble columns must have consistent lengths, only values of length one are recycled:
#> * Length 5: Column `full_address`
#> * Length 18: Column `json`
df %>%
mutate_if(is.list, simplify_all) %>% # flatten each list element internally
unnest()
#> Error: Argument 2 can't be a list containing data frames
Created on 2019-04-19 by the reprex package (v0.2.1)

One of the issue is that there are nested data.frame within each column
library(tidyverse)
df %>%
mutate(json = map(json, ~ if(is.null(.x))
tibble(attributes.StreetName = NA_character_, attributes.Match_addr = NA_character_)
else do.call(data.frame, c(.x, stringsAsFactors = FALSE)))) %>%
unnest
# A tibble: 5 x 7
# full_address attributes.StreetNa… attributes.Match_ad… address location.x location.y score
# <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#1 2379 ADDISON BLVD, HIGH POINT, … <NA> <NA> <NA> NA NA NA
#2 1751 W LEXINGTON AVE, HIGH POIN… <NA> <NA> <NA> NA NA NA
#3 2514 WILLARD DAIRY RD, HIGH POI… WILLARD DAIRY 2514 WILLARD DAIRY 2514 WILLARD DAI… -80.0 36.0 92.8
#4 126 MARYWOOD DR, HIGH POINT, NC… MARYWOOD 126 MARYWOOD, HIGH … 126 MARYWOOD, HI… -80.0 36.0 97.2
#5 508 EDNEY RIDGE RD, GREENSBORO,… EDNEY RIDGE 508 EDNEY RIDGE RD 508 EDNEY RIDGE … -79.8 36.1 100
Or using map_if
f1 <- function(dat) {
dat %>%
flatten
}
f2 <- function(dat) {
tibble(attributes.StreetName = NA_character_,
attributes.Match_addr = NA_character_)
}
df %>%
mutate(json = map_if(json, is.data.frame, f1, .else = f2)) %>%
unnest
# A tibble: 5 x 7
# full_address attributes.StreetNa… attributes.Match_ad… address score location.x location.y
# <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#1 2379 ADDISON BLVD, HIGH POINT, … <NA> <NA> <NA> NA NA NA
#2 1751 W LEXINGTON AVE, HIGH POIN… <NA> <NA> <NA> NA NA NA
#3 2514 WILLARD DAIRY RD, HIGH POI… WILLARD DAIRY 2514 WILLARD DAIRY 2514 WILLARD DAI… 92.8 -80.0 36.0
#4 126 MARYWOOD DR, HIGH POINT, NC… MARYWOOD 126 MARYWOOD, HIGH … 126 MARYWOOD, HI… 97.2 -80.0 36.0
#5 508 EDNEY RIDGE RD, GREENSBORO,… EDNEY RIDGE 508 EDNEY RIDGE RD 508 EDNEY RIDGE … 100 -79.8 36.1

Related

Using Match and GrepL to insert values in one Column that Match a pattern in another column?

I am looking for help to insert values in one column of a database by comparing to another column in another database that has matching patterns of addresses. Please see below what I mean.
I am using R.
My data:
df1 <- structure(list(Names = c("A", "B", "C"), Address = c("101, 16 Ave NE",
"203, 4 Cross SE", "115, 48 Ave SW")), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L))
Names Address
<chr> <chr>
1 A 101, 16 Ave NE
2 B 203, 4 Cross SE
3 C 115, 48 Ave SW
df2 <- structure(list(ID = c(1415, 2106, 2107), Address = c("101 16 Ave",
"115 48 Ave SW Cresmont", "203 Skyview 4 Cross SE Near Walmart"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-3L))
ID Address
<dbl> <chr>
1 1415 101 16 Ave
2 2106 115 48 Ave SW Cresmont
3 2107 203 Skyview 4 Cross SE Near Walmart
Desired output:
Names Address ID
<chr> <chr> <dbl>
1 A 101, 16 Ave NE 1415
2 B 203, 4 Cross SE 2107
3 C 115, 48 Ave SW 2106
Dataframe Example
Try this:
We could join df1 df12 based on fuzzy string matching of their columns.
With max_dist we could define the Maximum distance to use for joining
See: ?stringdist_left_join
library(dplyr)
library(fuzzyjoin)
fuzzyjoin::stringdist_left_join(x=df1, y=df2, max_dist = .55,
by=c('Address'='Address'),
method = 'jaccard',
distance_col = "dist") %>%
select(Names, Address = Address.x, ID)
# A tibble: 3 x 3
Names Address ID
<chr> <chr> <dbl>
1 A 101, 16 Ave NE 1415
2 B 203, 4 Cross SE 2107
3 C 115, 48 Ave SW 2106

Summarise across each column by grouping their names

I want to calculate the weighted variance using the weights provided in the dataset, while group for the countries and cities, however the function returns NAs:
library(Hmisc) #for the 'wtd.var' function
weather_winter.std<-weather_winter %>%
group_by(country, capital_city) %>%
summarise(across(starts_with("winter"),wtd.var))
The provided output from the console (when in long format):
# A tibble: 35 x 3
# Groups: country [35]
country capital_city winter
<chr> <chr> <dbl>
1 ALBANIA Tirane NA
2 AUSTRIA Vienna NA
3 BELGIUM Brussels NA
4 BULGARIA Sofia NA
5 CROATIA Zagreb NA
6 CYPRUS Nicosia NA
7 CZECHIA Prague NA
8 DENMARK Copenhagen NA
9 ESTONIA Tallinn NA
10 FINLAND Helsinki NA
# … with 25 more rows
This is the code that I used to get the data from a wide format into a long format:
weather_winter <- weather_winter %>% pivot_longer(-c(31:33))
weather_winter$name <- NULL
names(weather_winter)[4] <- "winter"
Some example data:
structure(list(`dec-wet_2011` = c(12.6199998855591, 12.6099996566772,
14.75, 11.6899995803833, 18.2899990081787), `dec-wet_2012` = c(13.6300001144409,
14.2199993133545, 14.2299995422363, 16.1000003814697, 18.0299987792969
), `dec-wet_2013` = c(4.67999982833862, 5.17000007629395, 4.86999988555908,
7.56999969482422, 5.96000003814697), `dec-wet_2014` = c(14.2999992370605,
14.4799995422363, 13.9799995422363, 15.1499996185303, 16.1599998474121
), `dec-wet_2015` = c(0.429999977350235, 0.329999983310699, 1.92999994754791,
3.30999994277954, 7.42999982833862), `dec-wet_2016` = c(1.75,
1.29999995231628, 3.25999999046326, 6.60999965667725, 8.67999935150146
), `dec-wet_2017` = c(13.3400001525879, 13.3499994277954, 15.960000038147,
10.6599998474121, 14.4699993133545), `dec-wet_2018` = c(12.210000038147,
12.4399995803833, 11.1799993515015, 10.75, 18.6299991607666),
`dec-wet_2019` = c(12.7199993133545, 13.3800001144409, 13.9899997711182,
10.5299997329712, 12.3099994659424), `dec-wet_2020` = c(15.539999961853,
16.5200004577637, 11.1799993515015, 14.7299995422363, 13.5499992370605
), `jan-wet_2011` = c(8.01999950408936, 7.83999967575073,
10.2199993133545, 13.8899993896484, 14.5299997329712), `jan-wet_2012` = c(11.5999994277954,
11.1300001144409, 12.5500001907349, 10.1700000762939, 22.6199989318848
), `jan-wet_2013` = c(17.5, 17.4099998474121, 15.5599994659424,
13.3199996948242, 20.9099998474121), `jan-wet_2014` = c(12.5099992752075,
12.2299995422363, 15.210000038147, 9.73999977111816, 9.63000011444092
), `jan-wet_2015` = c(17.6900005340576, 16.9799995422363,
11.75, 9.9399995803833, 19), `jan-wet_2016` = c(15.6099996566772,
15.5, 14.5099992752075, 10.3899993896484, 18.4499988555908
), `jan-wet_2017` = c(9.17000007629395, 9.61999988555908,
9.30999946594238, 15.8499994277954, 11.210000038147), `jan-wet_2018` = c(8.55999946594238,
9.10999965667725, 13.2599992752075, 9.85999965667725, 15.8899993896484
), `jan-wet_2019` = c(17.0699996948242, 16.8699989318848,
14.5699996948242, 19.0100002288818, 19.4699993133545), `jan-wet_2020` = c(6.75999975204468,
6.25999975204468, 6.00999975204468, 5.35999965667725, 8.15999984741211
), `feb-wet_2011` = c(9.1899995803833, 8.63999938964844,
6.21999979019165, 9.82999992370605, 4.67999982833862), `feb-wet_2012` = c(12.2699995040894,
11.6899995803833, 8.27999973297119, 14.9399995803833, 13.0499992370605
), `feb-wet_2013` = c(15.3599996566772, 15.9099998474121,
17.0599994659424, 13.3599996566772, 16.75), `feb-wet_2014` = c(10.1999998092651,
11.1399993896484, 13.8599996566772, 10.7399997711182, 7.35999965667725
), `feb-wet_2015` = c(11.9200000762939, 12.2699995040894,
8.01000022888184, 14.5299997329712, 5.71999979019165), `feb-wet_2016` = c(14.6999998092651,
14.7799997329712, 16.7899990081787, 4.90000009536743, 19.3500003814697
), `feb-wet_2017` = c(8.98999977111816, 9.17999935150146,
11.7699995040894, 6.3899998664856, 13.9899997711182), `feb-wet_2018` = c(16.75,
16.8599987030029, 12.0599994659424, 16.1900005340576, 8.51000022888184
), `feb-wet_2019` = c(7.58999967575073, 7.26999998092651,
8.21000003814697, 7.57999992370605, 8.81999969482422), `feb-wet_2020` = c(10.6399993896484,
10.4399995803833, 13.4399995803833, 8.53999996185303, 19.939998626709
), country = c("SERBIA", "SERBIA", "SLOVENIA", "GREECE",
"CZECHIA"), capital_city = c("Belgrade", "Belgrade", "Ljubljana",
"Athens", "Prague"), weight = c(20.25, 19.75, 14.25, 23.75,
14.25)), row.names = c(76L, 75L, 83L, 16L, 5L), class = "data.frame")
Your code seems to provide the right answer, now there's more data:
# Groups: country [4]
country capital_city winter
<chr> <chr> <dbl>
1 CZECHIA Prague 27.2
2 GREECE Athens 14.6
3 SERBIA Belgrade 19.1
4 SLOVENIA Ljubljana 16.3
Is this what you were looking for?
I took the liberty of streamlining your code:
weather_winter <- weather_winter %>%
pivot_longer(-c(31:33), values_to = "winter") %>%
select(-name)
weather_winter.std <- weather_winter %>%
group_by(country, capital_city) %>%
summarise(winter = wtd.var(winter))
With only one "winter" column, there's no need for the across().
Finally, you are not using the weights. If these are needed, then change the last line to:
summarise(winter = wtd.var(winter, weights = weight))
To give:
# A tibble: 4 x 3
# Groups: country [4]
country capital_city winter
<chr> <chr> <dbl>
1 CZECHIA Prague 26.3
2 GREECE Athens 14.2
3 SERBIA Belgrade 18.8
4 SLOVENIA Ljubljana 15.8

Merge two dataframes: specifically merge a selection of columns based on two conditions?

I have two datasets on the same 2 patients. With the second dataset I want to add new information to the first, but I can't seem to get the code right.
My first (incomplete) dataset has a patient ID, measurement time (either T0 or FU1), year of birth, date of the CT scan, and two outcomes (legs_mass and total_mass):
library(tidyverse)
library(dplyr)
library(magrittr)
library(lubridate)
df1 <- structure(list(ID = c(115, 115, 370, 370), time = structure(c(1L,
6L, 1L, 6L), .Label = c("T0", "T1M0", "T1M6", "T1M12", "T2M0",
"FU1"), class = "factor"), year_of_birth = c(1970, 1970, 1961,
1961), date_ct = structure(c(16651, 17842, 16651, 18535), class = "Date"),
legs_mass = c(9.1, NA, NA, NA), total_mass = c(14.5, NA,
NA, NA)), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame"))
# Which gives the following dataframe
df1
# A tibble: 4 x 6
ID time year_of_birth date_ct legs_mass total_mass
<dbl> <fct> <dbl> <date> <dbl> <dbl>
1 115 T0 1970 2015-08-04 9.1 14.5
2 115 FU1 1970 2018-11-07 NA NA
3 370 T0 1961 2015-08-04 NA NA
4 370 FU1 1961 2020-09-30 NA NA
The second dataset adds to the legs_mass and total_mass columns:
df2 <- structure(list(ID = c(115, 370), date_ct = structure(c(17842,
18535), class = "Date"), ctscan_label = c("PXE115_CT_20181107_xxxxx-3.tif",
"PXE370_CT_20200930_xxxxx-403.tif"), legs_mass = c(956.1, 21.3
), total_mass = c(1015.9, 21.3)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
# Which gives the following dataframe:
df2
# A tibble: 2 x 5
ID date_ct ctscan_label legs_mass total_mass
<dbl> <date> <chr> <dbl> <dbl>
1 115 2018-11-07 PXE115_CT_20181107_xxxxx-3.tif 956. 1016.
2 370 2020-09-30 PXE370_CT_20200930_xxxxx-403.tif 21.3 21.3
What I am trying to do, is...
Add the legs_mass and total_mass column values from df2 to df1, based on ID number and date_ct.
Add the new columns of df2 (the one that is not in df1; ctscan_label) to df1, also based on the date of the ct and patient ID.
So that the final dataset df3 looks as follows:
df3 <- structure(list(ID = c(115, 115, 370, 370), time = structure(c(1L,
6L, 1L, 6L), .Label = c("T0", "T1M0", "T1M6", "T1M12", "T2M0",
"FU1"), class = "factor"), year_of_birth = c(1970, 1970, 1961,
1961), date_ct = structure(c(16651, 17842, 16651, 18535), class = "Date"),
legs_mass = c(9.1, 956.1, NA, 21.3), total_mass = c(14.5,
1015.9, NA, 21.3)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
# Corresponding to the following tibble:
# A tibble: 4 x 6
ID time year_of_birth date_ct legs_mass total_mass
<dbl> <fct> <dbl> <date> <dbl> <dbl>
1 115 T0 1970 2015-08-04 9.1 14.5
2 115 FU1 1970 2018-11-07 956. 1016.
3 370 T0 1961 2015-08-04 NA NA
4 370 FU1 1961 2020-09-30 21.3 21.3
I have tried the merge function and rbind from baseR, and bind_rows from dplyr but can't seem to get it right.
Any help?
You can join the two datasets and use coalesce to keep one non-NA value from the two datasets.
library(dplyr)
left_join(df1, df2, by = c("ID", "date_ct")) %>%
mutate(leg_mass = coalesce(legs_mass.x , legs_mass.y),
total_mass = coalesce(total_mass.x, total_mass.y)) %>%
select(-matches('\\.x|\\.y'), -ctscan_label)
# ID time year_of_birth date_ct leg_mass total_mass
# <dbl> <fct> <dbl> <date> <dbl> <dbl>
#1 115 T0 1970 2015-08-04 9.1 14.5
#2 115 FU1 1970 2018-11-07 956. 1016.
#3 370 T0 1961 2015-08-04 NA NA
#4 370 FU1 1961 2020-09-30 21.3 21.3
We can use data.table methods
library(data.table)
setDT(df1)[setDT(df2), c("legs_mass", "total_mass") :=
.(fcoalesce(legs_mass, i.legs_mass),
fcoalesce(total_mass, i.total_mass)), on = .(ID, date_ct)]
-output
df1
ID time year_of_birth date_ct legs_mass total_mass
1: 115 T0 1970 2015-08-04 9.1 14.5
2: 115 FU1 1970 2018-11-07 956.1 1015.9
3: 370 T0 1961 2015-08-04 NA NA
4: 370 FU1 1961 2020-09-30 21.3 21.3

R: create a data.frame or data.table from a list and unpacked list of lists

I have a list and a list of lists and would like to create a data.frame or data.table.
Here is the list:
head(stadte_namen)
[1] "Berlin" "Hamburg" "München"
and a list of lists
> head(result)
[[1]]
min max
x 13.22886 13.54886
y 52.35704 52.67704
[[2]]
min max
x 9.840654 10.16065
y 53.390341 53.71034
[[3]]
min max
x 11.36078 11.72291
y 48.06162 48.24812
How could I create a data.frame or a data.table with the following structure?
name xmin ymin xmax ymax
Berlin 13.22886 52.35704 13.54886 52.67704
Hamburg 9.840654 53.390341 10.16065 53.71034
München 11.36078 48.06162 11.72291 48.24812
...
Here is the data:
stadte_namen<-c("Berlin", "Hamburg", "München", "Köln", "Frankfurt am Main",
"Stuttgart")
result<-list(structure(c(13.2288599, 52.3570365, 13.5488599, 52.6770365
), .Dim = c(2L, 2L), .Dimnames = list(c("x", "y"), c("min", "max"
))), structure(c(9.840654, 53.390341, 10.160654, 53.710341), .Dim = c(2L,
2L), .Dimnames = list(c("x", "y"), c("min", "max"))), structure(c(11.360777,
48.0616244, 11.7229083, 48.2481162), .Dim = c(2L, 2L), .Dimnames = list(
c("x", "y"), c("min", "max"))), structure(c(6.7725303, 50.8304399,
7.162028, 51.0849743), .Dim = c(2L, 2L), .Dimnames = list(c("x",
"y"), c("min", "max"))), structure(c(8.4727933, 50.0155435, 8.8004716,
50.2271408), .Dim = c(2L, 2L), .Dimnames = list(c("x", "y"),
c("min", "max"))), structure(c(9.0386007, 48.6920188, 9.3160228,
48.8663994), .Dim = c(2L, 2L), .Dimnames = list(c("x", "y"),
c("min", "max"))))
You can also try:
l <- result
df <- data.frame(t(sapply(l,c)))
colnames(df) <- c("minX", "minY", "maxX", "maxY"); df
df$stadte_namen <- c("Berlin", "Hamburg", "München", "Köln", "Frankfurt am Main",
"Stuttgart");df
Answer:
minX minY maxX maxY stadte_namen
1 13.228860 52.35704 13.548860 52.67704 Berlin
2 9.840654 53.39034 10.160654 53.71034 Hamburg
3 11.360777 48.06162 11.722908 48.24812 München
4 6.772530 50.83044 7.162028 51.08497 Köln
5 8.472793 50.01554 8.800472 50.22714 Frankfurt am Main
6 9.038601 48.69202 9.316023 48.86640 Stuttgart
With lapply and purrr:
library(dplyr)
library(purrr)
data <- lapply(result, function(x) c(xmin = x[1,1],
xmax = x[1,2],
ymin = x[2,1],
ymax = x[2,2])) %>%
purrr::map_dfr(~.x)
data$stadte_namen <- stadte_namen
# A tibble: 6 x 5
xmin xmax ymin ymax stadte_namen
<dbl> <dbl> <dbl> <dbl> <chr>
1 13.2 13.5 52.4 52.7 Berlin
2 9.84 10.2 53.4 53.7 Hamburg
3 11.4 11.7 48.1 48.2 München
4 6.77 7.16 50.8 51.1 Köln
5 8.47 8.80 50.0 50.2 Frankfurt am Main
6 9.04 9.32 48.7 48.9 Stuttgart
Assign stadte_namen as names to result and bind the dataframe together in one dataframe. You can get the data in wide format using pivot_wider.
library(tidyverse)
map_df(setNames(result, stadte_namen), ~.x %>%
as.data.frame %>%
rownames_to_column('row'), .id = 'name') %>%
pivot_wider(names_from = row, values_from = c(min, max))
# name min_x min_y max_x max_y
# <chr> <dbl> <dbl> <dbl> <dbl>
#1 Berlin 13.2 52.4 13.5 52.7
#2 Hamburg 9.84 53.4 10.2 53.7
#3 München 11.4 48.1 11.7 48.2
#4 Köln 6.77 50.8 7.16 51.1
#5 Frankfurt am Main 8.47 50.0 8.80 50.2
#6 Stuttgart 9.04 48.7 9.32 48.9

code is running fine line by line but fails when ran as a whole chunk in rmarkdown

When I run just this line of the code, the results are as expected. When I run the chunk, the mutations stop on the third line. How can I fix this, I feel like this is something new that I did not face before with the same code.
Sample data:
> dput(head(out))
structure(list(SectionCut = c("S-1", "S-1", "S-1", "S-1", "S-2",
"S-2"), OutputCase = c("LL-1", "LL-2", "LL-3", "LL-4", "LL-1",
"LL-2"), V2 = c(81.782, 119.251, 119.924, 96.282, 72.503, 109.595
), M3 = c("-29.292000000000002", "-32.661999999999999", "-30.904",
"-23.632999999999999", "29.619", "32.994"), id = c("./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx")), row.names = c(NA,
-6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
SectionCut = c("S-1", "S-1", "S-1", "S-1", "S-2", "S-2"),
OutputCase = c("LL-1", "LL-2", "LL-3", "LL-4", "LL-1", "LL-2"
), id = c("./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx"
), .rows = list(1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
> dput(head(Beamline_Shear))
structure(list(VLL = c(159.512186, 154.3336, 149.4451613, 167.0207595,
161.2269091, 156.4116505)), row.names = c("84-9", "84-12", "84-15",
"92-9", "92-12", "92-15"), class = "data.frame")
Code that I am trying to run:
Shear <- out[,-4] %>% mutate(N_l = str_extract(OutputCase,"\\d+"),
UG = str_extract(id,"\\d+"), a = str_extract(id,"-\\d+"),
S = str_extract(a,"\\d+"), Sections = paste0(UG,"-",S),
Sample = str_remove_all(id, "./\\d+-\\d+-|.xlsx")) %>%
left_join(Beamline_Shear %>% rownames_to_column("Sections"), by = "Sections") %>%
select(-OutputCase,-id,-Sections,-a)
There are some group attributes in the data, which should work normally, but can be an issue if we are running in a different env. Also, the mutate step and the join step doesn't really need any grouping attributes as they are fairly very straightforward rowwise operations that are vectorized.
library(dplyr)
out %>%
select(-4) %>%
ungroup %>% # // removes group attributes
mutate(N_l = str_extract(OutputCase,"\\d+"),
UG = str_extract(id,"\\d+"), a = str_extract(id,"-\\d+"),
S = str_extract(a,"\\d+"), Sections = paste0(UG,"-",S),
Sample = str_remove_all(id, "./\\d+-\\d+-|.xlsx")) %>% left_join(Beamline_Shear %>% rownames_to_column("Sections"), by = "Sections")
# A tibble: 6 x 11
# SectionCut OutputCase V2 id N_l UG a S Sections Sample VLL
# <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
#1 S-1 LL-1 81.8 ./100-12-S01.xlsx 1 100 -12 12 100-12 S01 NA
#2 S-1 LL-2 119. ./100-12-S01.xlsx 2 100 -12 12 100-12 S01 NA
#3 S-1 LL-3 120. ./100-12-S01.xlsx 3 100 -12 12 100-12 S01 NA
#4 S-1 LL-4 96.3 ./100-12-S01.xlsx 4 100 -12 12 100-12 S01 NA
#5 S-2 LL-1 72.5 ./100-12-S01.xlsx 1 100 -12 12 100-12 S01 NA
#6 S-2 LL-2 110. ./100-12-S01.xlsx 2 100 -12 12 100-12 S01 NA

Resources