Related
I have a dataframe games_h. This is just a snippet of the table but it has many teams and is sorted by date, team, game number. I am trying to create a weighted rolling average grouped by the team. I would like the most recent game to be weighted more than two games ago. So the weights would be (Game_1 * 1+ Game_2 *2)/3 or weights equal to 1 with same ratio so weights = c(1-.667, .667).
dput(games_h)
structure(list(GameId = c(16, 16, 37, 37, 57, 57), GameDate = structure(c(17905,
17905, 17916, 17916, 17926, 17926), class = "Date"), NeutralSite = c(0,
0, 0, 0, 0, 0), AwayTeam = c("Virginia Cavaliers", "Virginia Cavaliers",
"Florida State Seminoles", "Florida State Seminoles", "Syracuse Orange",
"Syracuse Orange"), HomeTeam = c("Boston College Eagles", "Boston College Eagles",
"Boston College Eagles", "Boston College Eagles", "Boston College Eagles",
"Boston College Eagles"), Team = c("Virginia Cavaliers", "Boston College Eagles",
"Florida State Seminoles", "Boston College Eagles", "Syracuse Orange",
"Boston College Eagles"), Home = c(0, 1, 0, 1, 0, 1), Score = c(83,
56, 82, 87, 77, 71), AST = c(17, 6, 12, 16, 11, 13), TOV = c(10,
8, 9, 13, 11, 11), STL = c(5, 4, 4, 6, 6, 5), BLK = c(6, 0, 4,
4, 1, 0), Rebounds = c(38, 18, 36, 33, 23, 23), ORB = c(7, 4,
16, 10, 7, 6), DRB = c(31, 14, 20, 23, 16, 17), FGA = c(55, 57,
67, 55, 52, 45), FGM = c(33, 22, 28, 27, 29, 21), X3FGM = c(8,
7, 8, 13, 11, 9), X3FGA = c(19, 25, 25, 21, 26, 22), FTA = c(14,
9, 24, 28, 15, 23), FTM = c(9, 5, 18, 20, 8, 20), Fouls = c(16,
12, 25, 20, 19, 19), Game_Number = 1:6, Count = c(1, 1, 1, 1,
1, 1)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), groups = structure(list(HomeTeam = "Boston College Eagles",
.rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
Here is an example output of the score column.
Here is my failed attempt. The function work correctly but I cannot apply it to the entire dataframe by group.
weighted_avg<-function(x, wt1, wt2) {
rs1 = rollsum(x,1,align = "right")
rs2 = rollsum(x,2,align = "right")
rs1=rs1[-1]
rs3 = rs2 - rs1
weighted_avg= ((rs3 * wt2)+ (rs1*wt1))/(wt1+wt2)
return(weighted_avg)
}
weighted_avg(csum$Score_Y, 2, 1)
apply(csum$Score_Y , 2, weighted_avg, wt1 = 2, wt2=1)
test<-csum %>%
group_by(Team)%>%
group_map(across(c(Score:Fouls), weighted_avg(.x$Team, 2, 1) ))
test<-csum %>%
group_by(Team)%>%
group_walk(across(c(Score:Fouls),weighted_avg(.~,2,1) ))
Here are some notes about the code:
I used slider::slide_dbl function. First we specify the vector for which we would like to compute the moving average Score.
As we need a sliding window of length 2, I used .before argument in slide_dbl to use the previous value and a current value to be used for calculating moving average.
Also I set .complete argument to TRUE to makes sure to only calculate moving average when we have a previous value. In other word we don't have any moveing average in first row.
For more info check the documentation for slider package.
library(tidyverse)
library(slider)
df %>%
group_by(HomeTeam) %>%
summarise(Example = c(NA, slide_dbl(Score, .before = 1, .complete = TRUE,
.f = ~ (.x[1] * 1 + .x[2] * 2) / 3)))
`summarise()` has grouped output by 'HomeTeam'. You can override using the `.groups` argument.
# A tibble: 7 × 2
# Groups: HomeTeam [1]
HomeTeam Example
<chr> <dbl>
1 Boston College Eagles NA
2 Boston College Eagles NA
3 Boston College Eagles 65
4 Boston College Eagles 73.3
5 Boston College Eagles 85.3
6 Boston College Eagles 80.3
7 Boston College Eagles 73
If it is going to calculate moving average for all numeric columns you could try:
df %>%
group_by(HomeTeam) %>%
summarise(across(where(is.numeric), ~ c(NA, slide_dbl(., .before = 1, .complete = TRUE,
.f = ~ (.x[1] * 1 + .x[2] * 2) / 3)))) %>%
ungroup()
`summarise()` has grouped output by 'HomeTeam'. You can override using the `.groups` argument.
# A tibble: 7 × 21
HomeTeam GameId NeutralSite Home Score AST TOV STL BLK Rebounds ORB DRB FGA FGM
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Boston C… NA NA NA NA NA NA NA NA NA NA NA NA NA
2 Boston C… NA NA NA NA NA NA NA NA NA NA NA NA NA
3 Boston C… 16 0 0.667 65 9.67 8.67 4.33 2 24.7 5 19.7 56.3 25.7
4 Boston C… 30 0 0.333 73.3 10 8.67 4 2.67 30 12 18 63.7 26
5 Boston C… 37 0 0.667 85.3 14.7 11.7 5.33 4 34 12 22 59 27.3
6 Boston C… 50.3 0 0.333 80.3 12.7 11.7 6 2 26.3 8 18.3 53 28.3
7 Boston C… 57 0 0.667 73 12.3 11 5.33 0.333 23 6.33 16.7 47.3 23.7
# … with 7 more variables: X3FGM <dbl>, X3FGA <dbl>, FTA <dbl>, FTM <dbl>, Fouls <dbl>,
# Game_Number <dbl>, Count <dbl>
First of all I've seen several questions similar to it but not specifying the name of the dfs
My df
structure(list(paciente = structure(c(6163, 6553, 6357, 6331,
6228, 6280, 6383, 6198, 6316, 6148), label = "Paciente", format.spss = "F6.0"),
grupo_int_v00 = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L), .Label = c("A", "B"), label = "Grupo de intervención", class = "factor"),
time = structure(c(3L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("00", "01", "66"), class = "factor"), peso1 = c(76.3,
95.4, 103.5, 82.1, 103.3, 77.6, 76, 88.3, 64, 101), cintura1 = c(104.5,
120, 116, 104, 120.5, 104, NA, 110, 104, 119), tasis2_e = c(145,
137, 123, 153, 131, 147, NA, 147, 121, 131), tadias2_e = c(64,
61, 76, 75, 65, 84, NA, 76, 59, 96), p17_total = c(12, 3,
9, 6, 8, 10, 9, 7, 12, 9), geaf_tot = c(1678.32, 1398.6,
1566.43, 587.41, 4876.46, 3776.22, 1762.24, 3188.81, 7192.54,
1678.32), glucosa = c(273, 149, 96, 115, 101, 94, NA, 125,
104, 107), albumi = c(4.15, 4.75, 4.59, 4.83, 4.64, 4.49,
NA, 4.71, 4.33, 4.09), coltot = c(137, 174, 252, 270, 211,
164, NA, 192, 281, 234), hdl = c(30, 56, 45, 74, 66, 51,
NA, 34, 62, 44), ldl_calc = c(51, 95, NA, 177, 127, 90, NA,
130, 186, 170), trigli = c(280, 114, 309, 96, 89, 115, NA,
139, 165, 99), hba1c = c(13.77, 6.57, 5.65, 6.52, 5.69, 6.02,
NA, 6.25, 5.95, 5.93), i_hucpeptide = c(3567.05, 1407.53,
1259.29, 1028.31, 649.19, 893.52, NA, 815.82, 342.68, NA),
i_hughrelin = c(1214.83, 874.6, 1015.68, 919.51, 456.28,
650.22, NA, 143.32, 1159.1, NA), i_hugip = c(2.67, 2.67,
2.67, 2.67, 2.67, 2.67, NA, 2.67, 2.67, NA), i_huglp1 = c(538.62,
264.67, 106.76, 164.82, 141.23, 14.14, NA, 112.57, 14.14,
NA), i_huglucagon = c(720.19, 801.94, 321.68, 629.04, 186.88,
238.33, NA, 238, 265.84, NA), i_huinsulin = c(1646.21, 545.57,
297.96, 333.05, 232.17, 263.55, NA, 263.87, 136.97, NA),
i_huleptin = c(8476.58, 10680.93, 6034.91, 14225.58, 2160.27,
2778.49, NA, 2829.59, 6102.63, NA), i_hupai1 = c(3787.2,
2401.66, 1040.35, 2123.09, 1625.27, 1932.06, NA, 2483.08,
919.81, NA), i_huresistin = c(11350.35, 5171.75, 5794.31,
2814.22, 2994.15, 3215.24, NA, 2577.84, 3227.73, NA), i_huvisfatin = c(1652.92,
2125.95, 407.98, 3544.59, 8.64, 132.49, NA, 8.64, 189.96,
NA), col_rema = c(56, 23, NA, 19, 18, 23, NA, 28, 33, 20),
homa = c(19974.0146666667, 3612.88577777778, 1271.296, 1702.25555555556,
1042.18533333333, 1101.05333333333, NA, 1465.94444444444,
633.105777777778, NA), i_pcr = c(0.39, 0.57, 0.04, 0.22,
0.04, 1.01, NA, 0.1, 0.04, NA), i_ratiolg = c(6.97758534115885,
12.2123599359707, 5.94174346250788, 15.4708268534328, 4.73452704479705,
4.273153701824, NA, 19.743162154619, 5.26497282374256, NA
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
The df looks like:
paciente grupo_int_v00 time peso1 cintura1 tasis2_e tadias2_e
<dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
1 6163 A 66 76.3 104. 145 64
2 6553 A 01 95.4 120 137 61
3 6357 B 01 104. 116 123 76
4 6331 B 00 82.1 104 153 75
5 6228 A 01 103. 120. 131 65
6 6280 B 01 77.6 104 147 84
split(df, df$grupo_int_v00) %>% list2env(envir = globalenv())
I am generating dfs with the name of the grupo_int_v00, now imagine I want to set my name of the df, how can I do that? Because my grupo_int_v00 here is A or B, but usually are numbers, and I don't want them to be numbers, I want them to be group_A , group_B, group_C ...
Not sure if you can provide with dplyr option (group_split and set_names or similar)
Thank you
You can change the names with paste + set_names:
library(magrittr)
split(df, df$grupo_int_v00) %>%
set_names(paste("group", names(.), sep = "_"))
The base R alternative is:
spl <- split(df, df$grupo_int_v00)
names(spl) <- paste("group", names(spl), sep = "_")
An even shorter alternative is to add the paste call in the split function:
split(df, ~ paste0("group_", grupo_int_v00))
output
> spl
$group_A
# A tibble: 6 × 30
paciente grupo_i…¹ time peso1 cintu…² tasis…³ tadia…⁴ p17_t…⁵ geaf_…⁶ glucosa albumi coltot
<dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 6163 A 66 76.3 104. 145 64 12 1678. 273 4.15 137
2 6553 A 01 95.4 120 137 61 3 1399. 149 4.75 174
3 6228 A 01 103. 120. 131 65 8 4876. 101 4.64 211
4 6383 A 01 76 NA NA NA 9 1762. NA NA NA
5 6198 A 01 88.3 110 147 76 7 3189. 125 4.71 192
6 6316 A 01 64 104 121 59 12 7193. 104 4.33 281
# … with 18 more variables: hdl <dbl>, ldl_calc <dbl>, trigli <dbl>, hba1c <dbl>,
# i_hucpeptide <dbl>, i_hughrelin <dbl>, i_hugip <dbl>, i_huglp1 <dbl>, i_huglucagon <dbl>,
# i_huinsulin <dbl>, i_huleptin <dbl>, i_hupai1 <dbl>, i_huresistin <dbl>,
# i_huvisfatin <dbl>, col_rema <dbl>, homa <dbl>, i_pcr <dbl>, i_ratiolg <dbl>, and
# abbreviated variable names ¹grupo_int_v00, ²cintura1, ³tasis2_e, ⁴tadias2_e, ⁵p17_total,
# ⁶geaf_tot
# ℹ Use `colnames()` to see all variable names
$group_B
# A tibble: 4 × 30
paciente grupo_i…¹ time peso1 cintu…² tasis…³ tadia…⁴ p17_t…⁵ geaf_…⁶ glucosa albumi coltot
<dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 6357 B 01 104. 116 123 76 9 1566. 96 4.59 252
2 6331 B 00 82.1 104 153 75 6 587. 115 4.83 270
3 6280 B 01 77.6 104 147 84 10 3776. 94 4.49 164
4 6148 B 01 101 119 131 96 9 1678. 107 4.09 234
# … with 18 more variables: hdl <dbl>, ldl_calc <dbl>, trigli <dbl>, hba1c <dbl>,
# i_hucpeptide <dbl>, i_hughrelin <dbl>, i_hugip <dbl>, i_huglp1 <dbl>, i_huglucagon <dbl>,
# i_huinsulin <dbl>, i_huleptin <dbl>, i_hupai1 <dbl>, i_huresistin <dbl>,
# i_huvisfatin <dbl>, col_rema <dbl>, homa <dbl>, i_pcr <dbl>, i_ratiolg <dbl>, and
# abbreviated variable names ¹grupo_int_v00, ²cintura1, ³tasis2_e, ⁴tadias2_e, ⁵p17_total,
# ⁶geaf_tot
# ℹ Use `colnames()` to see all variable names
I have a database a little bit tidier than the original thanks to a StackOverflow colleague I'm really grateful
The data frame is composed of a patient ID which has several measurements along time (00 = basal, 66 = 6 months, 01 = 12 months). I know I put the data frame not chronologically ordered
df1<-data.frame(pacient<- c(6430, 6430, 6430, 6494, 6494, 6494, 6165, 6165, 6165),
time<- c(00, 01, 66, 00, 01, 66, 00, 01, 66),
weight <- c(115, 112, 110, 98, 95, 94, 88, 87, 86),
waist <- c(123, NA, 112, 115, 112, 113, 112, 110, NA),
p14_total<- c(7, NA, 4, 12, 5, NA, 15, 12, 13))
t.test
I am trying to perform comparisons between the measurements between the different time points, NOT paired. For example weight at 00 vs weight at 66 / weight at 00 vs weight at 01 /weight at 66 vs weight at 01
I am looking for a data frame or data.table to export with the statistics (t, pvalue, mean..)
Create a column with the difference between the different time measurements for each patient.
For example: patient ID: 6430
Weight_6months = Weight01 - Weight66
Weight_12months= Weight01 - Weight00
I am really trying to perform this with purrr::map functions but I'm not reaching the objective
If I understand the question, then a simple solution is pivot the data wider, then perform the differencing between years:
NB If there are lots of years, then using mutate() with across(), allows selection of the columns, without specifying them.
library(tidyverse)
df1<-data.frame(pacient = c(6430, 6430, 6430, 6494, 6494, 6494, 6165, 6165, 6165),
time = c(00, 01, 66, 00, 01, 66, 00, 01, 66),
weight = c(115, 112, 110, 98, 95, 94, 88, 87, 86),
waist = c(123, NA, 112, 115, 112, 113, 112, 110, NA),
p14_total = c(7, NA, 4, 12, 5, NA, 15, 12, 13)) %>%
as_tibble()
df2 <- df1 %>%
group_by(pacient) %>%
pivot_wider(names_from = time, values_from = c(weight, waist, p14_total)) %>%
rowwise() %>%
mutate(weight_diff_1 = weight_66 - weight_1, weight_diff_2 = weight_1 - weight_0)
# A tibble: 3 x 12
# Rowwise: pacient
pacient weight_0 weight_1 weight_66 waist_0 waist_1 waist_66 p14_total_0 p14_total_1 p14_total_66 weight_diff_1 weight_diff_2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 6430 115 112 110 123 NA 112 7 NA 4 -2 -3
2 6494 98 95 94 115 112 113 12 5 NA -1 -3
3 6165 88 87 86 112 110 NA 15 12 13 -1 -1
And the t.test on the columns
t.test(df2$waist_0, df2$weight_1)
Welch Two Sample t-test
data: df2$waist_0 and df2$weight_1
t = 2.3133, df = 2.7634, p-value = 0.1112
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-8.302625 45.635958
sample estimates:
mean of x mean of y
116.6667 98.0000
I want to group daily data from Google Trends into weekly observations and smooth them by 7-day centered moving average? How can I do this? In which order?
Should I first group data? Or should I use centered moving average on daily data?
This is my data:
dput(multiTimeline)
structure(list(day = structure(c(1598400000, 1598486400, 1598572800,
1598659200, 1598745600, 1598832000, 1598918400, 1599004800, 1599091200,
1599177600, 1599264000, 1599350400, 1599436800, 1599523200, 1599609600,
1599696000, 1599782400, 1599868800, 1599955200, 1600041600, 1600128000,
1600214400, 1600300800, 1600387200, 1600473600, 1600560000, 1600646400,
1600732800, 1600819200, 1600905600, 1600992000, 1601078400, 1601164800,
1601251200, 1601337600, 1601424000, 1601510400, 1601596800, 1601683200,
1601769600, 1601856000, 1601942400, 1602028800, 1602115200, 1602201600,
1602288000, 1602374400, 1602460800, 1602547200, 1602633600, 1602720000,
1602806400, 1602892800, 1602979200, 1603065600, 1603152000, 1603238400,
1603324800, 1603411200, 1603497600, 1603584000, 1603670400, 1603756800,
1603843200, 1603929600, 1604016000, 1604102400, 1604188800, 1604275200,
1604361600, 1604448000, 1604534400, 1604620800, 1604707200, 1604793600,
1604880000, 1604966400, 1605052800, 1605139200, 1605225600, 1605312000,
1605398400, 1605484800, 1605571200, 1605657600, 1605744000, 1605830400,
1605916800, 1606003200, 1606089600), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), football = c(36, 36, 41, 60, 45, 38, 38, 39,
43, 49, 70, 49, 44, 46, 50, 62, 71, 92, 96, 61, 51, 45, 50, 58,
87, 81, 54, 50, 43, 49, 58, 97, 84, 55, 48, 41, 51, 56, 94, 83,
51, 47, 46, 49, 62, 97, 84, 51, 55, 51, 47, 52, 96, 79, 51, 49,
42, 44, 52, 100, 82, 49, 45, 41, 42, 50, 89, 73, 48, 40, 21,
29, 36, 75, 69, 45, 37, 39, 45, 51, 87, 69, 47, 48, 43, 37, 45,
79, 66, 46)), row.names = c(NA, -90L), class = c("tbl_df", "tbl",
"data.frame"))
Data is from 2020-08-26 to 2020-11-23.
I allowed myself to use the packages dplyr, to make data manipulation easier, and lubidrate, which makes date manipualtion easy.
The code is:
library(dplyr)
library(lubridate)
df2 <- df %>%
mutate(week = week(day)) %>%
group_by(week) %>%
summarise(average = mean(football))
The only function I used from lubidrate there was week(), if you're interested.
What I did was: first, I created another column (could have been the same one, though) that states the week. Note that this only works because your column was already in date-time format (though just date would have workes too, maybe even better). From that, I grouped by week and took the average. I hope I understood your question correctly and this will help.
It worked; this was the output:
> df2
# A tibble: 13 x 2
week average
<dbl> <dbl>
1 35 42
2 36 48.6
3 37 69
4 38 60.7
5 39 62
6 40 60.4
7 41 63.4
8 42 60.7
9 43 59.1
10 44 54.7
11 45 44.6
12 46 55.1
13 47 52.7
You can use rollmean from zoo package to do all this as a one-liner.
multiTimeline$rolling <- zoo::rollmean(multiTimeline$football, 7, na.pad = TRUE)
multiTimeline
#> # A tibble: 90 x 3
#> day football rolling
#> <dttm> <dbl> <dbl>
#> 1 2020-08-26 00:00:00 36 NA
#> 2 2020-08-27 00:00:00 36 NA
#> 3 2020-08-28 00:00:00 41 NA
#> 4 2020-08-29 00:00:00 60 42
#> 5 2020-08-30 00:00:00 45 42.4
#> 6 2020-08-31 00:00:00 38 43.4
#> 7 2020-09-01 00:00:00 38 44.6
#> 8 2020-09-02 00:00:00 39 46
#> 9 2020-09-03 00:00:00 43 46.6
#> 10 2020-09-04 00:00:00 49 47.4
#> # ... with 80 more rows
If you want to pick out the smoothed average for each week from Saturday to Friday, just use filter to select only Tuesdays. This will give you the 7-day average from the previous Saturday to the following Friday.
multiTimeline %>% filter(lubridate::wday(day) == 3)
#> # A tibble: 12 x 3
#> day football rolling
#> <dttm> <dbl> <dbl>
#> 1 2020-09-01 00:00:00 38 44.6
#> 2 2020-09-08 00:00:00 46 56
#> 3 2020-09-15 00:00:00 51 64.7
#> 4 2020-09-22 00:00:00 50 60.3
#> 5 2020-09-29 00:00:00 48 61.7
#> 6 2020-10-06 00:00:00 47 61.7
#> 7 2020-10-13 00:00:00 55 62.4
#> 8 2020-10-20 00:00:00 49 59
#> 9 2020-10-27 00:00:00 45 58.4
#> 10 2020-11-03 00:00:00 40 48
#> 11 2020-11-10 00:00:00 37 51.6
#> 12 2020-11-17 00:00:00 48 53.7
To show this is what you want, we can plot your data and the averaged line using ggplot:
ggplot(multiTimeline, aes(day, football)) +
geom_line() +
geom_line(data = multiTimeline %>% filter(lubridate::wday(day) == 3),
aes(y = rolling), col = "red", lty = 2, size = 1.5)
My apologies if this question has already been answered but I can't find it.
I have a table in R: (see below example copied from txt, the actual table has more data and NA)
I need to compute the mean and sd from column c, e, and f by the group in column b
I can calculate the mean and sd separate by group for all of the separate e.g.
mean(c[b == 1], na.rm=TRUE)
var(e[b == 2], na.rm=TRUE)
I can also calculate the mean and SD for all the columns and generate a table with the results
library(data.table)
new <- data.table(project2016)
wide <- setnames(new[, sapply(.SD, function(x) list(mean = round(mean(x), 3), sd = round(sd(x), 3))), by = b], c("b", sapply(names(new)[-1], paste0, c(".mean", ".SD"))))
wide
But I am not able to do it for only the needed colums and separated by group.
Thx in advance,
Nimby
"id" "a" "b" "c" "d" "e" "f" "g"
1 78 2 83 4 2.53 1.07 3
2 72 2 117 4 2.50 1.16 2
3 72 2 132 4 2.43 1.13 2
4 73 2 102 4 2.48 .81 2
5 73 2 114 4 2.33 1.13 2
6 73 2 88 43 2.13 .84 2
7 65 2 213 4 2.55 1.26 1
8 68 2 153 4 2.45 1.23 1
library(dplyr)
# Some reproducible data
d <- matrix(c(1, 78, 2, 83, 4, 2.53, 1.07, 3, 2, 72, 2, 117, 4, 2.50, 1.16, 2, 3, 72, 2, 132, 4, 2.43, 1.13, 2, 4, 73, 2, 102, 4, 2.48, .81, 2, 5, 73, 2, 114, 4, 2.33, 1.13, 2, 6, 73, 2, 88, 43, 2.13, .84, 2, 7, 65, 2, 213, 4, 2.55, 1.26, 1, 8, 68, 2, 153, 4, 2.45, 1.23, 1),
ncol = 8, byrow = TRUE) %>%
as.data.frame
names(d) <- c("id", "a", "b", "c", "d", "e", "f", "g")
# Your data only included one group in column b
d$b[5:8] <- 1
# Calc mean and sd for the 3 columns, grouped by b
d %>%
group_by(b) %>%
summarise(mean_c = mean(c), sd_c = sd(c),
mean_e = mean(e), sd_e = sd(e),
mean_f = mean(f), sd_f = sd(f))
d
This yields
# A tibble: 2 × 7
b mean_c sd_c mean_e sd_e mean_f sd_f
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 142.0 54.35071 2.365 0.18064699 1.1150 0.1915724
2 2 108.5 20.95233 2.485 0.04203173 1.0425 0.1594522
There'll also be non dplyr ways to do it.