Related
I am trying to make my function dynamic like i want to show q25 dynamically.
like i want to declare percentile25 dynamically i rmarkdown if i want to otherwise by default wanted to keep.
i have tried set option, get option but getting error everytime.
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","cvbnm","jhfd","cvbnm","sdfghj","cvbnm","dfghj","cvbnm"),
sale=c(27,28,27,16,14,25,14,14,19,18,28),
city=c("CA","TX","MN","NY","TX","MT","HU","KL","TX","SA","TX"),
Dept = c("HH","MM","NN","MM","AA","VV","MM","HU","JJ","MM","ZZ"))
percentile25 <- "25th Perc"
t1<- function(dataset,var,name,options(percentile25 = "25th percentile"),..)
{
var <- rlang::parse_expr(var)
tabl1 <- dataset %>% dplyr::filter(!is.na(!!var)) %>% summarise(
q25 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
N = sum(!is.na(!!var)))
summ_tab<-tabl1 %>%
mutate(" "= !!name,
q25 = q25)
summ_tab <- summ_tab %>% dplyr::rename(
!!p25 := q25)
summ_tab <- summ_tab %>% select(" ",everything(),N)
summ_tab
}
t1(data = df,var = "sale",name = "listd")
for Mediana
t1<- function(dataset,var,name,p25 = getOption("percentile25", default = "25th percentile")
med=getOption(Med_n ,default ="Median")){
var <- rlang::parse_expr(var)
tabl1 <- dataset %>% dplyr::filter(!is.na(!!var)) %>% summarise(
q25 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
med = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],
N = sum(!is.na(!!var)))
summ_tab<-tabl1 %>%
mutate(" "= !!name,
q25 = q25,
Median = Median)
summ_tab <- summ_tab %>% dplyr::rename(
!!p25 := q25,
!!med:=Median)
summ_tab <- summ_tab %>% select(" ",everything(),N)
summ_tab
}
You have misunderstood my comment on your former question:
Stick with the definition of the function in your former post:
library(dplyr)
t1 <- function(dataset, var, name, p25 = getOption("percentile25", default = "25th percentile")) {
var <- rlang::parse_expr(var)
tabl1 <- dataset %>%
filter(!is.na(!!var)) %>%
summarise(
q25 = quantile(!!var, type = 6, probs = seq(0, 1, 0.25), na.rm = TRUE)[2],
N = sum(!is.na(!!var))
)
summ_tab <- tabl1 %>%
mutate(
" " = !!name,
q25 = q25
)
summ_tab <- summ_tab %>% dplyr::rename(
!!p25 := q25
)
summ_tab <- summ_tab %>% select(" ", everything(), N)
summ_tab
}
Then option one to pass your desired label to the function would be to pass the name directly to the p25 argument:
t1(data = df, var = "sale", name = "listd", p25 = "25th Percentilen")
#> 25th Percentilen N
#> 1 listd 14 11
Second option would be to set your desired label via options outside of your function. In that case getOption will automatically pick the label:
options(percentile25 = "25th Percentilen")
t1(data = df, var = "sale", name = "listd")
#> 25th Percentilen N
#> 1 listd 14 11
UPDATE And here is the updated function which now includes the median. Additionally I made some slight adjustments, e.g. if you want to compute just one quantile you could do so by using e.g. probs = .25. Also I collapsed the rename and select into one step:
t1 <- function(dataset, var, name,
p25 = getOption("percentile25", default = "25th percentile"),
med = getOption("Med_n", default = "Median")) {
var <- rlang::parse_expr(var)
tabl1 <- dataset %>%
filter(!is.na(!!var)) %>%
summarise(
q25 = quantile(!!var, type = 6, probs = .25, na.rm = TRUE),
med = quantile(!!var, type = 6, probs = .5, na.rm = TRUE),
N = sum(!is.na(!!var))
)
summ_tab <- tabl1 %>%
mutate(
" " = !!name
) %>%
select(" ", !!p25 := q25, !!med := med, N)
summ_tab
}
t1(
data = df, var = "sale", name = "listd",
p25 = "25th Percentilen", med = "Mediana"
)
#> 25th Percentilen Mediana N
#> 1 listd 14 19 11
options(
percentile25 = "25th Percentilen",
Med_n = "Mediana"
)
t1(data = df, var = "sale", name = "listd")
#> 25th Percentilen Mediana N
#> 1 listd 14 19 11
I want to check if the names in the summary table matches with the name in stats then run the first condition otherwise run the second condition. there is one more thing like there cane be only one column of summary(25th percentile) or can be two column (25th percentile,75th percentile) or three column summary or four column summary like below
but its showing the error
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","cvbnm","jhfd","cvbnm","sdfghj","cvbnm","dfghj","cvbnm"),
sale=c(27,28,27,16,14,25,14,14,19,18,28),
sale2=c(32,25,29,36,44,24,17,15,11,13,22),
city=c("CA","TX","MN","NY","TX","MT","HU","KL","TX","SA","TX"),
Dept = c("HH","MM","NN","MM","AA","VV","MM","HU","JJ","MM","ZZ"))
options(p25 = "25 P")
options(p25 = "75 P")
options(meann = "MEAN")
options(med = "Meadiana")
stats <- c('25th percentile','Median','Mean','75th percentile')
p25 = getOption("p25",default = "Perc 25")
p75 = getOption("p75",default = "Perc 75")
med = getOption("med",default = "Median")
meann = getOption("meann",default = "Mean")
summ_tab1<- df %>% filter(!is.na(sale)) %>% summarise(
q25 = round(quantile(sale, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = 1),
Median =round(quantile(sale, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = 1),
Average = round( mean(sale, na.rm=TRUE),digits = 1),
q75 = round(quantile(sale, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = 1) ,
N = sum(!is.na(sale)))
summ_tab_suff <- summ_tab1 %>% mutate(" "= "ttt",
q25 = q25,
Median = Median,
Average = Average,
q75 = q75)
summ_tab_suff <- summ_tab_suff %>% dplyr::rename(
!!p25 := q25,
!!med := Median,
!!meann := Average ,
!!p75 := q75)
if (names(summ_tab_suff[1:4]) %in% stats & ncol(summ_tab_suff) ==
6){
summ_tab_suff <- summ_tab_suff %>% select(" ",N,stats)}
else{summ_tab_suff <- summ_tab_suff %>% select(" ",N,everything())
}
I have created a function, for getting summary of average, percentile. but not I want that summary for particular subsets. so I have created subsets accordingly.
but my function is not working properly.
so actually I am trying to update my function so that I can get a summary for list of variables as variable name and summary can be rbind for multiple list of variables.
I have no Idea how can i put "ALL", "MM" as name of variable in my function.
so that the summary for both can be rbind itself
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","cvbnm","jhfd","cvbnm","sdfghj","cvbnm","dfghj","cvbnm"),
sale=c(27,28,27,16,14,25,14,14,19,18,28),
city=c("CA","TX","MN","NY","TX","MT","HU","KL","TX","SA","TX"),
Dept = c("HH","MM","NN","MM","AA","VV","MM","HU","JJ","MM","ZZ"))
df1<- df
df$cc1<-1
df2<- subset(df, Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1, df$cc2)
listd<-list("ALL" = df1, "MM" =df2)
#I want to run my function for listd so that i can get a combined summary for all variables in listd
tt2<-function(data,var,footer,Name_of_variable,decimal){
for (d in 1:length(data)) {
cat('\n\n#### ', names(data)[d], '\n\n')
md<-data[[d]]
table_list<-list()
for (i in 1:length(d))
table_list[[i]]<-t1(md,var,footer,decimal,Name_of_variable)
tt<- do.call(rbind,table_list)
}
cat(knit_print(tt))
cat('\n\n')
}
t1<-function(dataset,var,Suff,decimal,Name_of_variable){
numdig <- if (decimal == TRUE) {1} else {0}
var <- rlang::parse_expr(var)
summ_tab1<- dataset %>% filter(!is.na(!!var)) %>% summarise(
q25 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = numdig),nsmall = numdig),
Median = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = numdig),nsmall = numdig),
Average = format(round( mean(!! var, na.rm=TRUE),digits = numdig),nsmall = numdig),
q75 = format(round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = numdig) ,nsmall = numdig),
N = sum(!is.na(!!var)))
summ_tab<-summ_tab1 %>%
mutate(" "=!!Name_of_variable,
q25 = q25,
Median =Median,
Average =Average,
q75 = q75)%>%
dplyr::rename(
`25th percentile` = q25,
`75th percentile` = q75)%>%select(" ",N,everything())
summ_tab1
}
tt2(data = listd,var = "sale",Name_of_variable = "listd",decimal = TRUE)
Previously I was getting summary like below
but now the output summary should be like , name of variable should be in rows.
I've slightly rewritten your t1 function and make use of the fact that it returns a dataframe. This can be used together with purrr::map_dfr:
library(dplyr)
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","cvbnm","jhfd","cvbnm","sdfghj","cvbnm","dfghj","cvbnm"),
sale=c(27,28,27,16,14,25,14,14,19,18,28),
city=c("CA","TX","MN","NY","TX","MT","HU","KL","TX","SA","TX"),
Dept = c("HH","MM","NN","MM","AA","VV","MM","HU","JJ","MM","ZZ"))
df1<- df
df$cc1<-1
df2<- subset(df, Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1, df$cc2)
listd<-list("ALL" = df1, "MM" =df2)
t1 <- function(dataset, var, decimal){
numdig <- if (decimal == TRUE) {
1
} else {
0
}
var <- rlang::parse_expr(var)
dataset %>%
filter(!is.na(!!var)) %>%
summarise(
q25 = format(round(quantile(!!var,
type = 6,
probs = seq(0, 1, 0.25),
na.rm=TRUE)[2],
digits = numdig),
nsmall = numdig),
Median = format(round(quantile(!!var,
type = 6,
probs = seq(0, 1, 0.25), na.rm=TRUE)[3],
digits = numdig),
nsmall = numdig),
Average = format(round(mean(!!var,
na.rm = TRUE),
digits = numdig),
nsmall = numdig),
q75 = format(round(quantile(!!var,
type = 6,
probs = seq(0, 1, 0.25),
na.rm = TRUE)[4],
digits = numdig),
nsmall = numdig),
N = sum(!is.na(!!var))) %>%
rename(
`25th percentile` = q25,
`75th percentile` = q75)
}
listd %>%
purrr::map_dfr(~t1(dataset = .x, var = "sale", decimal = TRUE), .id = " ")
#> 25th percentile Median Average 75th percentile N
#> 1 ALL 14.0 19.0 20.9 27.0 11
#> 2 MM 14.5 17.0 19.0 25.5 4
Created on 2020-09-23 by the reprex package (v0.3.0)
I have multiple observations from each of a few groups and I'd like to make a matrix of QQ plots (or another type of plot), comparing each group to every other group.
Here's an example of what I'm talking about:
library(tidyverse)
set.seed(27599)
n <- 30
d <- data_frame(person = c(rep('Alice', n),
rep('Bob', n),
rep('Charlie', n),
rep('Danielle', n)),
score = c(rnorm(n = n),
rnorm(n = n, mean = 0.1),
rnorm(n = n, sd = 2),
rnorm(n = n, mean = 0.3, sd = 1.4)))
by_hand <- data_frame(a = sort(d$score[d$person == 'Alice']),
b = sort(d$score[d$person == 'Bob']),
c = sort(d$score[d$person == 'Charlie']),
d = sort(d$score[d$person == 'Danielle']))
pairs(x = by_hand,
lower.panel = function(x, y) { points(x, y); abline(0, 1);})
Here, I've manipulated the data by hand and used graphics::pairs() to make the plot. Can the same be done inside the tidyverse?
Here's what I've tried.
d %>%
group_by(person) %>%
mutate(score = sort(score)) %>%
glimpse()
This seems promising.
d %>%
group_by(person) %>%
mutate(score = sort(score)) %>%
spread(key = person, value = score)
This gives the 'duplicate identifiers' error.
Maybe reshape2 would be better to use here?
d %>%
group_by(person) %>%
mutate(score = sort(score)) %>%
dcast(formula = score ~ person)
This creates a data.frame with 120 rows, and most of the values (90 per person) are NA. How can I create a wide data.frame without introducing so many NA?
You need a variable that links the row position for each person. Try
by_tidyverse <- d %>%
group_by(person) %>%
mutate(rowID=1:n(),
score=sort(score)
) %>%
spread(key = person, value = score) %>%
select(-rowID)
pairs(x = by_tidyverse, lower.panel = function(x, y) { points(x, y); abline(0, 1);})
I want to summarise several columns from a data.frame. The grouping and summary was achieved with dplyr, as in the example below.
df = data.frame (time = rep(c("day", "night"), 10) ,
who =rep(c("Paul", "Simon"), each=10) ,
var1 = runif(20, 5, 15), var2 = runif(20, 10, 12), var3 = runif(20, 2, 7), var4 = runif(20, 1, 3))
Writting the function I need
quantil_x = function (var, num) {
quantile(var, num, na.rm=T)
}
Using it at var1 and exporting
percentiles = df %>% group_by(time, who) %>% summarise(
P0 = quantil_x (var1, 0),
P25 = quantil_x (var1, .25),
P75 = quantil_x (var1, .75)
)
write.table(percentiles, file = "summary_var1.csv",row.names=FALSE, dec=",",sep=";")
What I want is to repeat this same task for 'var2', 'var3' and 'var4'. I have tried to run a loop with no success to perform this task multiple times. Unfortunately I couldn't find a way to handle distinct calls of variables within the code. That is, within the loop I have tried to use summarise_(), tried to use get() inside the fuction quantil_x() or within summarise, also as.name but none of this worked.
I'm pretty sure this is a bad coding skill issue, but that's all I came up with so far. Here is an example of what I tried to do:
list = c("var1", "var2", "var3", "var4")
for (i in list){
percentiles = df %>% group_by(time, who) %>% summarise(
P0 = quantil_x (get(i), 0),
P25 = quantil_x (get(i), .25),
P75 = quantil_x (get(i), .75)
)
write.table(percentiles, file = paste0("summary_",i,".csv",row.names=FALSE, dec=",",sep=";")
}
I read this post, but didn't help much on my case.
Thanks in advance.
You can do this with summarise_each()
df %>%
group_by(time, who) %>%
summarise_each(funs (`0` = quantile(., 0, na.rm=T),
`25`= quantile(., .25, na.rm = T),
`75`= quantile(., .75, na.rm = T)))
You can do this with gather()
percentiles = df %>%
gather(Var,Value,var1,var2,var3) %>%
group_by(Var,time, who) %>%
summarise(
P0 = quantil_x (Value, 0),
P25 = quantil_x (Value, .25),
P75 = quantil_x (Value, .75)
)