row bind list columns using dplyr - r

I would like to find a better way to bind together the results of any number of regressions after adding an identifier for each model. The code below is my current solution but is too manual for a large number of regressions. This is part of a larger tidy workflow so a solution inside of the tidyverse is preferred but whatever works is fine. Thanks
library(tidyverse)
library(broom)
model_dat=mtcars %>%
do(lm_1 = tidy(lm(disp~ wt*vs, data = .),conf.int=T),
lm_2=tidy(lm(cyl ~ wt*vs, data = .),conf.int=T ),
lm_3=tidy(lm(mpg ~ wt*vs, data = .),conf.int=T ))
df=model_dat %>%
select(lm_1) %>%
unnest(c(lm_1)) %>%
mutate(model="one") %>%
select(model,term,estimate,p.value:conf.high) %>%
bind_rows(
model_dat %>%
select(lm_2) %>%
unnest(c(lm_2)) %>%
mutate(model="two") %>%
select(model,term,estimate,p.value:conf.high)) %>%
bind_rows(
model_dat %>%
select(lm_3) %>%
unnest(c(lm_3)) %>%
mutate(model="three") %>%
select(model,term,estimate,p.value:conf.high))

It may be easier with map2 i.e. loop across the columns and the corresponding english word for the sequence of columns, pluck the list element, create the 'model' column with second argument i.e. engish words (.y), select the columns of interest, and create a single dataset by specifying _dfr in map
library(purrr)
library(english)
library(dplyr)
library(broom)
map2_dfr(model_dat, as.character(english(seq_along(model_dat))),
~ .x %>%
pluck(1) %>%
mutate(model = .y) %>%
select(model, term, estimate, p.value:conf.high) )
-output
# A tibble: 12 x 6
# model term estimate p.value conf.low conf.high
# <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 one (Intercept) -70.0 1.55e- 1 -168. 28.2
# 2 one wt 102. 8.20e- 9 76.4 128.
# 3 one vs 31.2 6.54e- 1 -110. 172.
# 4 one wt:vs -36.7 1.10e- 1 -82.2 8.82
# 5 two (Intercept) 4.31 1.28e- 5 2.64 5.99
# 6 two wt 0.849 4.90e- 4 0.408 1.29
# 7 two vs -2.19 7.28e- 2 -4.59 0.216
# 8 two wt:vs 0.0869 8.20e- 1 -0.689 0.862
# 9 three (Intercept) 29.5 6.55e-12 24.2 34.9
#10 three wt -3.50 2.33e- 5 -4.92 -2.08
#11 three vs 11.8 4.10e- 3 4.06 19.5
#12 three wt:vs -2.91 2.36e- 2 -5.40 -0.419
Or use summarise with across, unclass and then bind with bind_rows
model_dat %>%
summarise(across(everything(), ~ {
# // get the column name
nm1 <- cur_column()
# // extract the list element (.[[1]])
list(.[[1]] %>%
# // create new column by extracting the numeric part
mutate(model = english(readr::parse_number(nm1))) %>%
# // select the subset of columns, wrap in a list
select(model, term, estimate, p.value:conf.high))
}
)) %>%
# // unclass to list
unclass %>%
# // bind the list elements
bind_rows
-output
# A tibble: 12 x 6
# model term estimate p.value conf.low conf.high
# <english> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 one (Intercept) -70.0 1.55e- 1 -168. 28.2
# 2 one wt 102. 8.20e- 9 76.4 128.
# 3 one vs 31.2 6.54e- 1 -110. 172.
# 4 one wt:vs -36.7 1.10e- 1 -82.2 8.82
# 5 two (Intercept) 4.31 1.28e- 5 2.64 5.99
# 6 two wt 0.849 4.90e- 4 0.408 1.29
# 7 two vs -2.19 7.28e- 2 -4.59 0.216
# 8 two wt:vs 0.0869 8.20e- 1 -0.689 0.862
# 9 three (Intercept) 29.5 6.55e-12 24.2 34.9
#10 three wt -3.50 2.33e- 5 -4.92 -2.08
#11 three vs 11.8 4.10e- 3 4.06 19.5
#12 three wt:vs -2.91 2.36e- 2 -5.40 -0.419

Related

How to create a tidy table of t-tests?

I'm trying to figure out if there is a straightforward way to create a table of paired t-tests using tidyverse packages. There are already Q&As addressing this topic (e.g., here), but the existing answers all seem pretty convoluted.
Here's a reproducible example showing what I'm trying to accomplish -- a column of variable names, columns with the means for both items in the pair for each variable, and a column of p-values:
library(dplyr)
library(infer)
library(tidyr)
df <- mtcars %>%
mutate(engine = if_else(vs == 0, "V-shaped", "straight"))
v_shaped <- df %>%
filter(engine == "V-shaped") %>%
summarise(across(c(mpg, disp), mean)) %>%
pivot_longer(cols = everything()) %>%
rename(V_shaped = value)
straight <- df %>%
filter(engine == "straight") %>%
summarise(across(c(mpg, disp), mean)) %>%
pivot_longer(cols = everything()) %>%
rename(straight = value)
mpg <- df %>%
t_test(formula = mpg ~ engine, alternative = "two-sided") %>%
select(p_value) %>%
mutate(name = "mpg")
disp <- df %>%
t_test(formula = disp ~ engine, alternative = "two-sided") %>%
select(p_value) %>%
mutate(name = "disp")
p_values <- bind_rows(mpg, disp)
table <- v_shaped %>%
full_join(straight, by = "name") %>%
full_join(p_values, by = "name")
table
#> # A tibble: 2 × 4
#> name V_shaped straight p_value
#> <chr> <dbl> <dbl> <dbl>
#> 1 mpg 16.6 24.6 0.000110
#> 2 disp 307. 132. 0.00000248
Obviously, this is not a good way to address this problem even for two variables, and it certainly does not scale well. But it does illustrate the intended outcome. Is there a way to do this in one pipeline? My actual use case involves many more variables, so -- ideally -- I'd be able to feed a vector of variable names into the pipe.
Here's one way of doing it in a single pipe -
library(tidyverse)
library(infer)
df %>%
#select the columns you are interested in
select(mpg, disp, engine) %>%
#get them in long format
pivot_longer(cols = -engine) %>%
#Divide the data in a list of dataframes
split(.$name) %>%
#For each dataframe
map_df(~{
#Get the mean value for each engine
.x %>%
group_by(engine) %>%
summarise(value = mean(value)) %>%
#get the data in wide format
pivot_wider(names_from = engine) %>%
#Combine it with t.test result
bind_cols(t_test(.x, formula = value ~ engine, alternative = "two-sided"))
}, .id = "name")
# name `V-shaped` straight statistic t_df p_value alternative lower_ci upper_ci
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
#1 disp 307. 132. -5.94 27.0 0.00000248 two.sided -235. -114.
#2 mpg 16.6 24.6 4.67 22.7 0.000110 two.sided 4.42 11.5
Obviously, you can keep only the columns (using select) that is relevant to you.
This can be achieved with a call to summarise() and a flattening of the results. If we use stats::t.test() the group means are returned as part of the test and don't need to be calculated separately. broom::tidy() ensures that each result is returned in a 1-row tibble.
library(dplyr)
library(purrr)
library(broom)
mtcars %>%
summarise(across(-vs, \(x)
list(tidy(
t.test(x ~ vs, data = ., alternative = "two.sided")
)))) %>%
flatten_dfr(.id = "names") %>%
rename("V-shaped" = estimate1, straight = estimate2)
# A tibble: 10 × 11
names estimate `V-shaped` straight statistic p.value parameter conf.low conf.high method alternative
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 mpg -7.94 16.6 24.6 -4.67 0.000110 22.7 -11.5 -4.42 Welch Two Sample t-test two.sided
2 cyl 2.87 7.44 4.57 7.79 0.0000000112 29.9 2.12 3.63 Welch Two Sample t-test two.sided
3 disp 175. 307. 132. 5.94 0.00000248 27.0 114. 235. Welch Two Sample t-test two.sided
4 hp 98.4 190. 91.4 6.29 0.00000182 23.6 66.1 131. Welch Two Sample t-test two.sided
5 drat -0.467 3.39 3.86 -2.66 0.0129 27.1 -0.827 -0.107 Welch Two Sample t-test two.sided
6 wt 1.08 3.69 2.61 3.76 0.000728 30.0 0.493 1.66 Welch Two Sample t-test two.sided
7 qsec -2.64 16.7 19.3 -5.94 0.00000352 24.6 -3.56 -1.72 Welch Two Sample t-test two.sided
8 am -0.167 0.333 0.5 -0.927 0.362 27.1 -0.535 0.202 Welch Two Sample t-test two.sided
9 gear -0.302 3.56 3.86 -1.22 0.232 28.8 -0.807 0.204 Welch Two Sample t-test two.sided
10 carb 1.83 3.61 1.79 3.98 0.000413 29.6 0.888 2.76 Welch Two Sample t-test two.sided

R: paired t-test on multiple columns

I am trying to run a t-test on multiple columns. Basically trying to find the change from baseline to year 1 for a number of joint angles. I only want to conduct this on the study side. Below is an image with the first few rows and columns of the data. Sample Data
I have tried using both of these functions without success:
Code 1:
res <- FAI_SLS %>%
filter(study_side == "Study")%>%
select(-id,-subject,-activity,-side,-study_side,-year) %>%
map_df(~ broom::tidy(t.test(. ~ year)), .id = 'var')
I get the following error:
Error in eval(predvars, data, env) : object 'year' not found
I tried taking out -year but I still have the same issue.
Code 2:
t(sapply(FAI_SLS%>%filter(study_side == "Study")%>%select(-id,-subject,-activity,-side,-study_side,-year), function(x)
unlist(t.test(x~FAI_SLS$year)[c("estimate","p.value","statistic","conf.int")])))
I get the following error:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 't': variable lengths differ (found for 'FAI_SLS$year')
Again I tried taking -year out without success.
Any suggestions on how I can fix this? Thanks
Try fitting the t-test within summarise() on all the columns you want to test (selected in across()). Here's an example with a different dataset:
library(dplyr)
library(tidyr)
data("storms")
storms %>%
filter(year %in% c(2019, 2020)) %>%
summarise(across(-c(name, year, status, category),
~broom::tidy(t.test(. ~ year)))) %>%
pivot_longer(everything(), names_to = "variable") %>%
unnest(value)
#> # A tibble: 9 × 11
#> variable estimate estimate1 estimate2 statistic p.value parameter conf.low
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 month 0.0917 8.93 8.84 1.15 2.52e- 1 892. -0.0654
#> 2 day 4.29 18.2 13.9 7.49 2.34e-13 641. 3.17
#> 3 hour -0.0596 9.13 9.19 -0.128 8.99e- 1 687. -0.978
#> 4 lat 2.14 25.9 23.7 3.75 1.94e- 4 668. 1.02
#> 5 long 6.06 -60.7 -66.8 4.27 2.25e- 5 736. 3.27
#> 6 wind 8.42 58.8 50.4 4.42 1.18e- 5 529. 4.68
#> 7 pressure -4.46 989. 993. -3.03 2.59e- 3 537. -7.35
#> 8 tropicalst… 7.39 153. 145. 0.810 4.18e- 1 701. -10.5
#> 9 hurricane_… 10.9 24.1 13.2 3.92 1.02e- 4 508. 5.45
#> # … with 3 more variables: conf.high <dbl>, method <chr>, alternative <chr>
Created on 2022-06-02 by the reprex package (v2.0.1)

How to move R code into functions to generalise behaviour

I have a huge messy piece of R code with loads of ugly repetition. There is an opportunity to massively reduce it. Starting with this piece of code:
table <-
risk_assigned %>%
group_by(rental_type, room_type) %>%
summarise_all(funs( sum(!is.na(.)) / length(.) ) ) %>%
select(-c(device_id, ts, room, hhi, temp)) %>%
adorn_pct_formatting()
I would like to generalise it into a function so it can be reused.
LayKable = function(kableDetails) {
table <-
risk_assigned %>%
group_by(kableDetails$group1 , kableDetails$group2) %>%
summarise_all(funs( sum(!is.na(.)) / length(.) ) ) #%>%
select(-c(device_id, ts, room, hhi, temp)) %>%
adorn_pct_formatting()
...
kable <- table
return(kable)
}
kableDetails <- list(
group1 = "rental_type",
group2 = "room_type"
)
newKable <- LayKable(kableDetails)
This rather half-hearted attempt serves to explain what I want to do. How can I pass stuff into this function inside a list (I'm a C programmer, pretending it's a struct).
When passing function arguments to a dplyr verb inside a function you have to use rlang terms. But should be simple to define a function you can pass a number of grouping terms to:
library(dplyr)
test_func <- function(..., data = mtcars) {
# Passing `data` as a default argument as it's nice to be flexible!
data %>%
group_by(!!!enquos(...)) %>%
summarise(across(.fns = sum), .groups = "drop")
}
test_func(cyl, gear)
#> # A tibble: 8 x 11
#> cyl gear mpg disp hp drat wt qsec vs am carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 4 3 21.5 120. 97 3.7 2.46 20.0 1 0 1
#> 2 4 4 215. 821 608 32.9 19.0 157. 8 6 12
#> 3 4 5 56.4 215. 204 8.2 3.65 33.6 1 2 4
#> 4 6 3 39.5 483 215 5.84 6.68 39.7 2 0 2
#> 5 6 4 79 655. 466 15.6 12.4 70.7 2 2 16
#> 6 6 5 19.7 145 175 3.62 2.77 15.5 0 1 6
#> 7 8 3 181. 4291. 2330 37.4 49.2 206. 0 0 37
#> 8 8 5 30.8 652 599 7.76 6.74 29.1 0 2 12
Update - adding a list
I see your ideal would be to write a list of arguments for each function call and pass these rather than write out the arguments in each call. You can do this using do.call to pass a list of named arguments to a function. Again, when using dplyr verbs you can quote variable names in constructing your list (so that R doesn't try to find them in the global environment when compiling the list) and !!enquo each one in the calls to then use them there:
library(dplyr)
test_func2 <- function(.summary_var, .group_var, data = mtcars) {
data %>%
group_by(!!enquo(.group_var)) %>%
summarise(mean = mean(!!enquo(.summary_var)))
}
# Test with bare arguments
test_func2(hp, cyl)
#> # A tibble: 3 x 2
#> cyl mean
#> <dbl> <dbl>
#> 1 4 82.6
#> 2 6 122.
#> 3 8 209.
# Construct and pass list
args <- list(.summary_var = quote(hp), .group_var = quote(cyl))
do.call(test_func2, args = args)
#> # A tibble: 3 x 2
#> cyl mean
#> <dbl> <dbl>
#> 1 4 82.6
#> 2 6 122.
#> 3 8 209.
A handy guide to tidy evaluation where most of these ideas are explained more clearly.
Created on 2021-12-21 by the reprex package (v2.0.1)

how to get standard deviation of an aggregate of all values of a dataframe (or matrix), grouped by one variable

I have a data frame with 13 columns. the 13th column shows the group number each row belongs to. I want to take the standard deviation of all values in all rows of columns 3 to 12, for rows of each group.
for(i in 1: groupnumber) {
sd.vect[i] <- sd(as.vector(df[df$group==i,][,-c(1,2,13)]))}
I get error
Error in is.data.frame(x) :
'list' object cannot be coerced to type 'double'
how can I get sd of all values in each group?
You can use dplyr. You can group_by(grouping_column), then summarise() with the sd() function:
library(dplyr)
data_frame%>%>group_by(thirteenth_column)%>%summarise(across(-c(1,2), sd))
As an example with mtcars:
library(dplyr)
mtcars%>%group_by(cyl)%>%summarise(across(everything(), sd))
# A tibble: 3 x 11
cyl mpg disp hp drat wt qsec vs am gear carb
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 4.51 26.9 20.9 0.365 0.570 1.68 0.302 0.467 0.539 0.522
2 6 1.45 41.6 24.3 0.476 0.356 1.71 0.535 0.535 0.690 1.81
3 8 2.56 67.8 51.0 0.372 0.759 1.20 0 0.363 0.726 1.56
That said, with some corrections, your for loop works:
You just have to make sure to return a dataframe, as your desired outcome is actually a two-dimension object. See an example with the iris dataset:
sd.df<-data.frame()
for(i in unique(iris$Species)) {
for (j in 1:ncol(iris[-5])){
sd.df[i,j] <- sd(iris[iris$Species==i,j])
}
}
sd.df
V1 V2 V3 V4
setosa 0.3524897 0.3790644 0.1736640 0.1053856
versicolor 0.5161711 0.3137983 0.4699110 0.1977527
virginica 0.6358796 0.3224966 0.5518947 0.2746501
update
I understand from your comments you may want something quite odd, which would be to group your data by the grouping variable, than get the standard deviation for an aggregate of all values in each sub-dataframe. You probably would be better off working with matrices here.
For that, you may neet to group your data by unique() values in the grouping column, then call sd() on the rest of the dataframe (all values), which can be done if you coerce the dataframe into a matrix:
library(dplyr)
library(purrr)
map_dbl(unique(mtcars$cyl), ~as.matrix(mtcars%>%
filter(cyl==.x)%>%
select(-cyl))%>%
sd())%>%
set_names(., unique(mtcars$cyl))
6 4 8
62.47655 37.54494 118.18945
With your data:
map(unique(df[[13]]), ~as.matrix(df%>%
filter(df[[13]]==.x)%>%
select(-c(1,2,13)))%>%
sd()%>%
set_names(., unique(df[[13]]))
And a much simpler answer with base subsetting and split():
map_dbl(split(mtcars[-c(1,2, 10)], mtcars[10]), ~sd(as.matrix(.x)))
3 4 5
119.47824 47.97490 98.71733
You can subset the columns from cur_data() -
library(dplyr)
result <- df %>%
group_by(group) %>%
summarise(sd_value = sd(unlist(select(cur_data(), -(1:2)))))
Example using mtcars -
mtcars %>%
group_by(gear) %>%
summarise(sd_value = sd(unlist(select(cur_data(), -(1:2)))))
# gear sd_value
# <dbl> <dbl>
#1 3 119.
#2 4 48.0
#3 5 98.7
Using mtcars as an example,
aggregate(.~cyl, data=mtcars, FUN=sd)
# cyl mpg disp hp drat wt qsec vs am gear carb
# 1 4 4.509828 26.87159 20.93453 0.3654711 0.5695637 1.682445 0.3015113 0.4670994 0.5393599 0.522233
# 2 6 1.453567 41.56246 24.26049 0.4760552 0.3563455 1.706866 0.5345225 0.5345225 0.6900656 1.812654
# 3 8 2.560048 67.77132 50.97689 0.3723618 0.7594047 1.196014 0.0000000 0.3631365 0.7262730 1.556624

Create new column based on regular expression match

Problem
I would like to create a new column for relative standard deviation using following formula:stdev * 100 / abs(mean). I have over 40 variables, each with their own stdev and mean (so 80 columns). What I would like to do is use regular expressions to calculate the relative standard deviation from the 2 columns (stdev and mean) based on the preceding names. For example, for columns AceticAcid.stdevand AceticAcid.mean, calculate the relative standard deviation to automatically create a new column AcetiAcid.rsd. The equation being: AceticAcid.stdev * 100 / abs(AceticAcid.mean).
Example Dataframe
print(df)
AceticAcid.mean AceticAcid.stdev Glucose.mean Glucose.stdev Propanol.mean Propanol.stdev
1 28.75775 0.911130 48.27333 4.4991249 144.4770 38.34122
2 78.83051 10.562110 28.13337 1.2304387 134.6402 31.76264
3 40.89769 17.848381 37.10283 0.2102977 132.0253 33.76568
4 88.30174 11.028700 32.90534 1.6396036 149.7135 21.56639
5 94.04673 9.132295 14.11699 4.7725182 132.7853 15.88455
Desired Output (Don't care about the order of the new columns)
print(df_rsd)
AceticAcid.mean AceticAcid.stdev Glucose.mean Glucose.stdev Propanol.mean Propanol.stdev AceticAcid.rsd Glucose.rsd Propanol.rsd
1 28.75775 0.911130 48.27333 4.4991249 144.4770 38.34122 3.168294 9.3201039 26.53795
2 78.83051 10.562110 28.13337 1.2304387 134.6402 31.76264 13.398504 4.3735921 23.59076
3 40.89769 17.848381 37.10283 0.2102977 132.0253 33.76568 43.641536 0.5667969 25.57515
4 88.30174 11.028700 32.90534 1.6396036 149.7135 21.56639 12.489788 4.9827894 14.40511
5 94.04673 9.132295 14.11699 4.7725182 132.7853 15.88455 9.710380 33.8069175 11.96258
Repetitive Attempt...
I do not want to write these out 40 times (there has to be a nice regex way to achieve this):
df_rsd <- df %>% mutate(AceticAcid.rsd = AceticAcid.stdev * 100 / abs(AceticAcid.mean),
Glucose.rsd = Glucose.stdev * 100 / abs(Glucose.mean),
Propanol.rsd = Propanol.stdev * 100 / abs(Propanol.mean))
Reproducible Data
structure(list(AceticAcid.mean = c(28.7577520124614, 78.8305135443807,
40.89769218117, 88.3017404004931, 94.0467284293845), AceticAcid.stdev = c(0.911129987798631,
10.5621097609401, 17.8483808878809, 11.0287002893165, 9.13229470606893
), Glucose.mean = c(48.2733338139951, 28.1333662476391, 37.1028254181147,
32.9053360782564, 14.1169873066247), Glucose.stdev = c(4.49912485200912,
1.2304386717733, 0.210297667654231, 1.63960359641351, 4.77251824573614
), Propanol.mean = c(144.476965803187, 134.64017030783, 132.025340688415,
149.713488831185, 132.785289955791), Propanol.stdev = c(38.3412187267095,
31.7626409884542, 33.7656808178872, 21.5663894917816, 15.884545892477
)), class = "data.frame", row.names = c(NA, -5L))
We can use split.default to split the dataset into a list of data.frame columns based on removing the suffix part of the column names, then loop over the list with lapply, do the calculation and assign it to new column in 'df'
out <- lapply(split.default(df, sub("\\..*", "", names(df))),
function(x) x[[2]]* 100/abs(x[[1]]))
df[paste0(names(out), ".rsd")] <- out
df
# AceticAcid.mean AceticAcid.stdev Glucose.mean Glucose.stdev Propanol.mean Propanol.stdev AceticAcid.rsd Glucose.rsd Propanol.rsd
#1 28.75775 0.911130 48.27333 4.4991249 144.4770 38.34122 3.168294 9.3201039 26.53795
#2 78.83051 10.562110 28.13337 1.2304387 134.6402 31.76264 13.398504 4.3735921 23.59076
#3 40.89769 17.848381 37.10283 0.2102977 132.0253 33.76568 43.641536 0.5667969 25.57515
#4 88.30174 11.028700 32.90534 1.6396036 149.7135 21.56639 12.489788 4.9827894 14.40511
#5 94.04673 9.132295 14.11699 4.7725182 132.7853 15.88455 9.710380 33.8069175 11.96258
Or with tidyverse
library(purrr)
library(dplyr)
library(stringr)
df %>%
split.default(str_remove(names(.), "\\..*")) %>%
map_dfc(~ .x[[2]] * 100/abs(.x[[1]])) %>%
rename_all(~ str_c(., '.rsd')) %>%
bind_cols(df, .)
alternative, also with the tidyverse.
library(tidyverse)
df_long <- df %>%
mutate(measurement_number=row_number(), .before=1) %>%
pivot_longer(cols=-measurement_number, names_to="var", values_to="value") %>%
separate(var, into=c("var", "indicator")) %>%
pivot_wider(id_cols=c("measurement_number", "var"), names_from = indicator, values_from=value) %>%
mutate(rsd=stdev * 100 / abs(mean)) %>%
arrange(var, measurement_number)
df_long
#> # A tibble: 15 x 5
#> measurement_number var mean stdev rsd
#> <int> <chr> <dbl> <dbl> <dbl>
#> 1 1 AceticAcid 28.8 0.911 3.17
#> 2 2 AceticAcid 78.8 10.6 13.4
#> 3 3 AceticAcid 40.9 17.8 43.6
#> 4 4 AceticAcid 88.3 11.0 12.5
#> 5 5 AceticAcid 94.0 9.13 9.71
#> 6 1 Glucose 48.3 4.50 9.32
#> 7 2 Glucose 28.1 1.23 4.37
#> 8 3 Glucose 37.1 0.210 0.567
#> 9 4 Glucose 32.9 1.64 4.98
#> 10 5 Glucose 14.1 4.77 33.8
#> 11 1 Propanol 144. 38.3 26.5
#> 12 2 Propanol 135. 31.8 23.6
#> 13 3 Propanol 132. 33.8 25.6
#> 14 4 Propanol 150. 21.6 14.4
#> 15 5 Propanol 133. 15.9 12.0
df_wide <- df_long %>%
pivot_wider(id_cols=c("measurement_number"),
names_from = c(var),
values_from = c(mean, stdev, rsd),
names_sep = ".")
df_wide
#> # A tibble: 5 x 10
#> measurement_num~ mean.AceticAcid mean.Glucose mean.Propanol stdev.AceticAcid
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 28.8 48.3 144. 0.911
#> 2 2 78.8 28.1 135. 10.6
#> 3 3 40.9 37.1 132. 17.8
#> 4 4 88.3 32.9 150. 11.0
#> 5 5 94.0 14.1 133. 9.13
#> # ... with 5 more variables: stdev.Glucose <dbl>, stdev.Propanol <dbl>,
#> # rsd.AceticAcid <dbl>, rsd.Glucose <dbl>, rsd.Propanol <dbl>
Created on 2020-05-26 by the reprex package (v0.3.0)

Resources