This question is very similar to this one Error in UseMethod("select") : no applicable method for 'select' applied to an object of class "character", what is different is a few things in the return_coef function and it's also that I'm filtering the function by Id, date and Category, and in the answered question, it was just by date and Category.
You will see below that I can generate the coefficients for each Id/date/Category, but when I ask to do it for everyone at once, I get the following error:
Error: Problem with `mutate()` column `coef`.
i `coef = mapply(...)`.
x negative length vectors are not allowed
Executable code:
library(dplyr)
library(tidyverse)
library(lubridate)
library(data.table)
df1<- structure(
list(
Id = c(1, 1, 1, 1),
date1 = c("2022-01-06","2022-01-06","2022-01-06","2022-01-06"),
date2 = c("2022-01-02","2022-01-03","2022-01-09","2022-01-10"),
Week = c("Sunday","Monday","Sunday","Monday"),
Category = c("EFG", "ABC","EFG","ABC"),
DR1 = c(200, 300, 200, 200),
DRM01 = c(300, 300, 300, 300),
DRM02 = c(300, 300, 300, 300),
DRM03 = c(300,300,300,300),
DRM04 = c(300,300,300,300),
DRM05 = c(300,250,350,350)),row.names = c(NA, 4L), class = "data.frame")
return_coef <- function (df1, idd, dmda, CategoryChosse) {
selection = startsWith(names(df1), "DRM0")
df1[selection][is.na(df1[selection])] = 0
data1<-subset(df1,df1$date2<df1$date1)
dt1 <- as.data.table(data1)
cols <- grep("^DRM0", colnames(dt1), value = TRUE)
med <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Id, Category, Week), .SDcols = paste0(cols, "_PV") ]
SPV<-df1%>%
inner_join(med, by = c('Id','Category', 'Week')) %>%
mutate(across(matches("^DRM0\\d+$"), ~.x +
get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(Id:Category, DRM01_DRM01_PV:last_col())%>%
data.frame()
mat1 <- df1 %>%
dplyr::filter(Id==idd, date2 == ymd(dmda), Category == CategoryChosse) %>%
select(starts_with("DRM0")) %>%
pivot_longer(cols = everything()) %>%
arrange(desc(row_number())) %>%
mutate(cs = cumsum(value)) %>%
dplyr::filter(cs == 0) %>%
pull(name)
(dropnames <- paste0(mat1,"_",mat1, "_PV"))
SPV <- SPV %>%
filter(Id==idd,date2 == ymd(dmda), Category == CategoryChosse) %>%
select(-any_of(dropnames))
if(length(grep("DRM0", names(SPV))) == 0) {
SPV[head(mat1,10)] <- NA_real_
}
datas <-SPV %>%
dplyr::filter(Id==idd,date2 == ymd(dmda)) %>%
group_by(Category) %>%
dplyr::summarize(dplyr::across(starts_with("DRM0"), sum)) %>%
pivot_longer(cols= -Category, names_pattern = "DRM0(.+)", values_to = "val") %>%
mutate(name = readr::parse_number(name))
colnames(datas)[-1]<-c("var1","var2")
datas$days <- datas[[as.name("var1")]]
datas$numbers <- datas[[as.name("var2")]]
datas <- datas %>%
group_by(Category) %>%
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Category == first(Category)])):max(days)+1) %>%
ungroup
m<-df1 %>%
group_by(Id,Category,Week) %>%
dplyr::summarize(dplyr::across(starts_with("DR1"), mean), .groups = 'drop')
m<-subset(m, Week == df1$Week[match(ymd(dmda), ymd(df1$date2))] & Category == CategoryChosse)$DR1
if (nrow(datas)<=2){
val<-as.numeric(m)
}
else{
mod <- nls(numbers ~ b1*days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
coef<-coef(mod)[2]
val<-as.numeric(coef(mod)[2])
}
return(val)
}
Find the coef one by one (It works):
return_coef(df1,"1","2022-01-09","EFG")
[1] 200
return_coef(df1,"1","2022-01-10","ABC")
[1] 250
Find all coef at once (Does not work)
subset_df1 <- subset(df1, date2 > date1)
All<-subset_df1%>%
transmute(
Id,date2,Category,
coef = mapply(return_coef, list(cur_data()), Id, as.Date(date2), Category))
Error: Problem with `mutate()` column `coef`.
i `coef = mapply(...)`.
x negative length vectors are not allowed
dput(head(df1))
structure(list(Id = c(1, 1, 1, 1), date1 = structure(c(1641427200,
1641427200, 1641427200, 1641427200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), date2 = structure(c(1641081600, 1641168000, 1641686400,
1641772800), tzone = "UTC", class = c("POSIXct", "POSIXt")),
Week = c("Sunday", "Monday", "Sunday", "Monday"), Category = c("EFG",
"ABC", "EFG", "ABC"), DR1 = c(200, 300, 200, 200), DRM01 = c(300,
300, 300, 300), DRM02 = c(300, 300, 300, 300), DRM03 = c(300,
300, 300, 300), DRM04 = c(300, 300, 300, 300), DRM05 = c(300,
250, 350, 350)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
The function seems to be use df1 as the full data
library(dplyr)
subset_df1 %>%
rowwise %>%
mutate(coef = return_coef(df1, Id, date2, Category)) %>%
ungroup
-output
# A tibble: 2 × 12
Id date1 date2 Week Category DR1 DRM01 DRM02 DRM03 DRM04 DRM05 coef
<dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2022-01-06 2022-01-09 Sunday EFG 200 300 300 300 300 350 200
2 1 2022-01-06 2022-01-10 Monday ABC 200 300 300 300 300 350 250
Update
In the new dataset, the columns 'date1', 'date2' are POSIXct, convert to Date class and it should work
df2 <- df1 %>%
mutate(across(c(date1, date2), as.Date))
subset_df2 <- subset(df2, date2 > date1)
subset_df2 %>%
rowwise %>%
mutate(coef = return_coef(df2, Id, date2, Category)) %>%
ungroup
# A tibble: 2 × 12
Id date1 date2 Week Category DR1 DRM01 DRM02 DRM03 DRM04 DRM05 coef
<dbl> <date> <date> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2022-01-06 2022-01-09 Sunday EFG 200 300 300 300 300 350 200
2 1 2022-01-06 2022-01-10 Monday ABC 200 300 300 300 300 350 250
Or using pmap
library(purrr)
subset_df1 %>%
transmute(Id, date2, Category,
coeff = pmap_dbl(across(c(Id, date2, Category)),
~ return_coef(df1, ..1, ..2, ..3)))
-output
Id date2 Category coeff
3 1 2022-01-09 EFG 200
4 1 2022-01-10 ABC 250
Related
I am trying to disaggregate the monthly data and spread them into weekly data in two ways.
First, To find the first Monday from the start date and then create days which are Mondays till the last date (month) of the sequence. And then spread the data within the respective week which is in the month.
Second, To create a weekly sequence from start date and end date and spread the data within the respective week which is in the month.
The data which I am working with is given below:
structure(list(`Row Labels` = c("X6", "X7", "X8", "X9"), `2022-11-01` = c(100,
200, 300, 400), `2022-12-01` = c(160, 200, 300, 400), `2023-01-01` = c(500,
550, 600, 650)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-4L))
And it looks like this:
The expected output 1 is given below, as you can see all the dates are Mondays:
The expected output 2 is given below:
Is this doable, or is it a bit too much to expect from R?
For Mondays we can create a list of Mondays between the dates in the dataframe, join it with the data in long format, count number of the Mondays for each variable in each month, divide the values by the number of Mondays, and revert back the format to wide;
library(dplyr)
library(tidyr)
library(lubridate)
all_dates <- as.Date(names(df1)[-1])
MON <- seq(min(floor_date(all_dates, "month")),
max(ceiling_date(all_dates, "month")),
by="1 day") %>%
.[wday(.,label = TRUE) == "Mon"] %>%
data.frame("Mondays" = .) %>%
mutate(mmm = format(Mondays, "%Y-%m"))
df1 %>%
pivot_longer(cols = -`Row Labels`, names_to = "dates") %>%
mutate(dates = as.Date(dates),
mmm = format(dates, "%Y-%m")) %>%
right_join(MON, by = "mmm") %>%
arrange(mmm) %>%
group_by(`Row Labels`, dates) %>%
mutate(value = value / n()) %>%
ungroup() %>%
select(`Row Labels`, Mondays, value) %>%
pivot_wider(`Row Labels`, names_from = "Mondays", values_from = "value")
#> # A tibble: 4 x 14
#> `Row Labels` `2022-11-07` `2022-11-14` `2022-11-21` `2022-11-28` `2022-12-05`
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 X6 25 25 25 25 40
#> 2 X7 50 50 50 50 50
#> 3 X8 75 75 75 75 75
#> 4 X9 100 100 100 100 100
#> # ... with 8 more variables: 2022-12-12 <dbl>, 2022-12-19 <dbl>,
#> # 2022-12-26 <dbl>, 2023-01-02 <dbl>, 2023-01-09 <dbl>, 2023-01-16 <dbl>,
#> # 2023-01-23 <dbl>, 2023-01-30 <dbl>
Same principal goes to doing it weekly:
WKLY <- seq(min(floor_date(all_dates, "month")),
max(ceiling_date(all_dates, "month")),
by="week") %>%
data.frame("Weekly" = .) %>%
mutate(mmm = format(Weekly, "%Y-%m"))
df1 %>%
pivot_longer(cols = -`Row Labels`, names_to = "dates") %>%
mutate(dates = as.Date(dates),
mmm = format(dates, "%Y-%m")) %>%
right_join(WKLY, by = "mmm") %>%
arrange(mmm) %>%
group_by(`Row Labels`, dates) %>%
mutate(value = value / n()) %>%
ungroup() %>%
select(`Row Labels`, Weekly, value) %>%
pivot_wider(`Row Labels`, names_from = "Weekly", values_from = "value")
#> # A tibble: 4 x 15
#> `Row Labels` `2022-11-01` `2022-11-08` `2022-11-15` `2022-11-22` `2022-11-29`
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 X6 20 20 20 20 20
#> 2 X7 40 40 40 40 40
#> 3 X8 60 60 60 60 60
#> 4 X9 80 80 80 80 80
#> # ... with 9 more variables: 2022-12-06 <dbl>, 2022-12-13 <dbl>,
#> # 2022-12-20 <dbl>, 2022-12-27 <dbl>, 2023-01-03 <dbl>, 2023-01-10 <dbl>,
#> # 2023-01-17 <dbl>, 2023-01-24 <dbl>, 2023-01-31 <dbl>
Data:
df1 <- structure(list(`Row Labels` = c("X6", "X7", "X8", "X9"),
`2022-11-01` = c(100, 200, 300, 400),
`2022-12-01` = c(160, 200, 300, 400),
`2023-01-01` = c(500, 550, 600, 650)),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L))
I have two dataframes :
> df1 <- data.frame(date = as.Date( c( "2021-06-01", "2021-06-02", "2021-06-03", "2021-06-04",
"2021-06-05", "2021-06-06", "2021-06-07", "2021-06-08",
"2021-06-09", "2021-06-10", "2021-06-11", "2021-06-12",
"2021-06-13") ),
temperature = c( 17, 30, 28, 29, 16, 21, 20, 11, 28, 29, 25, 26, 19) )
and
> df2 <- data.frame( ID = c( 1 : 4 ),
date.pose = as.Date(c("2021-06-01", "2021-06-03", "2021-06-06", "2021-06-10") ),
date.withdrawal = as.Date(c("2021-06-02", "2021-06-05", "2021-06-09", "2021-06-13") ) )
I want to store the mean temperature for each period that is in df2 in a new colomn (df2$mean.temperature).
For ID = 1 from df2, the mean temperature would be calculated with the temperatures from 2021-06-01 and 2021-06-02, witch is mean(17, 30)
In other words, I want to get this :
> df2 <- data.frame(ID = c( 1 : 4 ),
date.pose = as.Date( c("2021-06-01", "2021-06-03", "2021-06-06", "2021-06-10") ) ,
date.withdrawal = as.Date( c("2021-06-03", "2021-06-06", "2021-06-10", "2021-06-13") ),
mean.Temperature = c(23.5, 24.3, 20.0, 24.8) )
I'm trying to add the ID from df2 in a new colomn in df1. Once I do that, I could aggregate like this :
> df3 <- aggregate(df1$temperature, list(df1$ID, df2$date.pose), FUN = mean)
I don't know how to add the corresponding ID in df1.
Or maybe there is a better way to do this?
Here's an approach using uncount from tidyr and some joins.
df2 %>%
mutate(days = (date.witdrawal - date.pose + 1) %>% as.integer) %>%
tidyr::uncount(days, .id = "row") %>%
transmute(ID, date = date.pose + row - 1) %>%
left_join(df1) %>%
group_by(ID) %>%
summarize(mean.Temperature = mean(temperature)) %>%
right_join(df2)
Result
# A tibble: 4 × 4
ID mean.Temperature date.pose date.witdrawal
<int> <dbl> <date> <date>
1 1 23.5 2021-06-01 2021-06-02
2 2 24.3 2021-06-03 2021-06-05
3 3 20 2021-06-06 2021-06-09
4 4 24.8 2021-06-10 2021-06-13
Update. thanks to #Jon Spring:
Here is how we could do it:
logic:
join both df's by date after long pivoting df1
arrange by date and fill
then after grouping by ID use summarise with mean()
and re-join finally:
library(dplyr)
library(tidyr)
df2 %>%
pivot_longer(-ID, values_to = "date") %>%
full_join(df1, by= "date") %>%
arrange(date) %>%
fill(ID, .direction = "down") %>%
group_by(ID) %>%
summarise(mean_temp = mean(temperature, na.rm = TRUE)) %>%
left_join(df2, by="ID")
ID mean_temp date.pose date.witdrawal
<int> <dbl> <date> <date>
1 1 23.5 2021-06-01 2021-06-02
2 2 24.3 2021-06-03 2021-06-05
3 3 20 2021-06-06 2021-06-09
4 4 24.8 2021-06-10 2021-06-13
Could you help me to insert the column Category in my generated table? That way I can know specifically the coef for each day and category.
library(purrr)
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
Category = c("FDE","ABC","FDE","ABC"),
Week= c("Wednesday","Wednesday","Friday","Friday"),
DR1 = c(4,1,6,3),
DR01 = c(4,1,4,3), DR02= c(4,2,6,2),DR03= c(9,5,4,7),
DR04 = c(5,4,3,2),DR05 = c(5,4,5,4),
DR06 = c(2,4,3,2),DR07 = c(2,5,4,4),
DR08 = c(3,4,5,4),DR09 = c(2,3,4,4)),
class = "data.frame", row.names = c(NA, -4L))
dates <- subset(df1, date2 > date1, select = date2)$date2
map_dfr(dates, ~ {
datas <- df1 %>%
filter(date2 == ymd(.x)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name))
colnames(datas)<-c("Days","Numbers")
mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
tibble(dates = .x, coef = coef(mod)[2])
}) %>%
mutate(dates = format(ymd(dates), "%d/%m/%Y"))
# A tibble: 4 x 2
dates coef
<chr> <dbl>
1 30/06/2021 7.89
2 30/06/2021 7.89
3 01/07/2021 7.95
4 01/07/2021 7.95
In this case, it looks like this:
dates Category coef
<chr> <dbl>
1 30/06/2021 FDE 7.89
2 30/06/2021 ABC 7.89
3 01/07/2021 FDE 7.95
4 01/07/2021 ABC 7.95
You can use bind_cols() with a dataset containing the Category column
##Select the Category column as a subset
categories <- subset(df1, date2 > date1, select = Category)
map_dfr(dates, ~ {
datas <- df1 %>%
filter(date2 == ymd(.x)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name))
colnames(datas)<-c("Days","Numbers")
mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
tibble(dates = .x[[1]], coef = coef(mod)[2])
}) %>% bind_cols(categories) %>% #add the categories to the tibble
mutate(dates = format(ymd(dates), "%d/%m/%Y"))
I have a dataframe that looks like this
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
I am running a code where I group by region and then take average and weighted average for all 'sales' columns by 'Emp'
Result <- df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, .data[[Emp]], na.rm = T),
weightedsales19 = weighted.mean(Sales19, .data[[Emp]], na.rm = T))
However, I get the following err
Error in splice(dot_call(capture_dots, frame_env = frame_env, named = named, :
object 'Emp' not found
Can't figure out what I am doing wrong
An option could be:
library(tidyverse)
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
df %>%
group_by(Region) %>%
summarise(across(
.cols = starts_with("Sales"),
.fns = list(w_mean = ~ weighted.mean(.x, w = Emp), mean = ~ mean(.x)),
.names = "{.col}_{.fn}")
)
#> # A tibble: 3 x 5
#> Region Sales18_w_mean Sales18_mean Sales19_w_mean Sales19_mean
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Africa 1140 1140 1605 1605
#> 2 Asia 10360 8410 13435 11200
#> 3 Europe 12224. 10872. 3407. 3938.
Created on 2021-05-25 by the reprex package (v2.0.0)
This works. The data masking already takes place, you don't need the .data pronoun.
library(tidyverse)
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
Result <- df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, Emp, na.rm = T),
weightedsales19 = weighted.mean(Sales19, Emp, na.rm = T))
Result
#> # A tibble: 3 x 5
#> Region sales18 sales19 weightedsales18 weightedsales19
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Africa 1140 1605 1140 1605
#> 2 Asia 8410 11200 10360 13435
#> 3 Europe 10872. 3938. 12224. 3407.
Created on 2021-05-25 by the reprex package (v2.0.0)
Unquoted Emp inside [[ tells R to search for string variable called Emp that presumably contains name of other variable that contains weights, like here:
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
x = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
Emp <- 'x'
df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, .data[[Emp]], na.rm = T),
weightedsales19 = weighted.mean(Sales19, .data[[Emp]], na.rm = T))
# A tibble: 3 x 5
Region sales18 sales19 weightedsales18 weightedsales19
<chr> <dbl> <dbl> <dbl> <dbl>
1 Africa 1140 1605 1140 1605
2 Asia 8410 11200 10360 13435
3 Europe 10872. 3938. 12224. 3407.
Since, you do not have this kind of Emp, R throws an error.
What to do? Just quote Emp inside [[:
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, .data[['Emp']], na.rm = T),
weightedsales19 = weighted.mean(Sales19, .data[['Emp']], na.rm = T))
# A tibble: 3 x 5
Region sales18 sales19 weightedsales18 weightedsales19
<chr> <dbl> <dbl> <dbl> <dbl>
1 Africa 1140 1605 1140 1605
2 Asia 8410 11200 10360 13435
3 Europe 10872. 3938. 12224. 3407.
I have the data in a data frame, with the first column is date and the second column is individual weight. Here's a sample from the data:
df <- data.frame(
date = c("2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01",
"2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01",
"2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02", "2019-01-02",
"2019-01-02", "2019-01-02", "2019-01-02", "2019-01-02",
"2019-01-02", "2019-01-02", "2019-01-02"),
weight = c(2174.8, 2174.8, 2174.8, 8896.53, 8896.53, 2133.51, 2133.51,
2892.32, 2892.32, 2892.32, 2892.32, 5287.78, 5287.78, 6674.03,
6674.03, 6674.03, 6674.03, 6674.03, 5535.11, 5535.11)
)
I would like to run simple summary statistic for each date first and then find number of records whose weight is in the given range, defining the category by the % of total range of weights. Finally store number of each record in a separate column
Lowest 10%
10-20%
20-40%
40-60%
60-80%
80-90%
90-100%
The logic = (MinWeight + (MaxWeight-MinWeight)*X%)
Here is my expected outcome ( I only show two columns for % range)
df %>%
group_by(date) %>%
summarise(mean(weight), min(weight), max(weight))
date `mean(weight)` `min(weight)` `max(weight)` `Lowest 10%` `10-20%`
2019-01-01 3726. 2134. 8897. num records. num records.
Check this solution:
library(tidyverse)
library(wrapr)
df %>%
group_by(date) %>%
mutate(
rn = row_number(),
temp = weight - min(weight),
temp = (temp / max(temp)) * 100,
temp = cut(temp, seq(0, 100, 10), include.lowest = TRUE),
temp = str_remove(temp, '\\(|\\[') %>%
str_replace(',', '-') %>%
str_replace('\\]', '%'),
one = 1
) %>%
spread(temp, one, fill = 0) %.>%
left_join(
summarise(.,
`mean(weight)` = mean(weight),
`min(weight)` = min(weight),
`max(weight)` = max(weight)
),
summarise_at(., vars(matches('\\d+-\\d+.')), sum)
)
Output:
date `mean(weight)` `min(weight)` `max(weight)` `0-10%` `10-20%` `60-70%` `90-100%`
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2019-01-01 3726. 2134. 8897. 5 3 0 2
2 2019-01-02 5791. 2892. 6674. 1 0 4 5
Could be done this way:
library(tidyverse)
df %>%
group_by(date) %>%
mutate(
wrange = cut((weight - min(weight)) / (max(weight - min(weight))) * 100, 10,
labels = paste(
seq(0, 90, by = 10),
paste0(seq(10, 100, by = 10), "%"),
sep = '-')
)
) %>%
left_join(
x = summarise_at(., vars(weight), funs(mean, min, max)),
y = count(., wrange) %>% complete(wrange, fill = list(n = 0)) %>% spread(wrange, n),
by = 'date'
) %>%
rename_at(vars(matches("mean|min|max")), funs(paste(., "(weight)", sep = "")))
Which outputs:
# date mean(weight) min(weight) max(weight) 0-10% 10-20% 20-30% 30-40% 40-50%
# 1 2019-01-01 3726.144 2133.51 8896.53 5 3 0 0 0
# 2 2019-01-02 5790.825 2892.32 6674.03 1 0 0 0 0
# 50-60% 60-70% 70-80% 80-90% 90-100%
# 0 0 0 0 2
# 0 4 0 0 5
(I reformatted the output, to show all the data)