highlight cell if previous column meets specific condition R - r

I have a dataframe
library(flextable)
df = structure(list(col1 = c(1, NA, 1, 1, 1), col2 = c(NA, 1, NA,
1, 1), col3 = c(1, 1, NA, 1, NA), col4 = c(1, 1, 1, 1, NA)), class = "data.frame", row.names = c(NA,
-5L))
df %>% flextable()
I want
to return the last 3 columns highlighted based on the following logic:
red if it is blank
green if and only if the preceeding column was blank.
Based on this, I am trying to create a color matrix to identify the green highlights, but have hit a brick wall.
To identify the red matrix, I used the following code ifelse(is.na(df),"red","").
what would be the best method to identify the green labels

Not the prettiest, but works
df=data.frame(col1 = c(1,NA,1,1,1,1),
col2 = c(NA,1,NA,1,1,1),
col3 = c(1,1,NA,1, NA,1),
col4 = c(1,1,1,1,NA,1))
df %>% flextable()
red = ifelse(is.na(df),1,0)
green = data.frame()
for(n in 1:(ncol(red)-1)){
print(n)
r=ifelse(red[,n]==1 & red[,n+1] == 0,1,0)
green = rbind(green, r)
}
green = t(green)
colnames(green) = paste0("col",2:4)
green
red[,2:4]
ft = df[,2:4] %>%
flextable() %>%
bg(i = ~ is.na(col2), j = 1,bg='red') %>%
bg(i = ~ is.na(col3), j = 2,bg='red') %>%
bg(i = ~ is.na(col4), j = 3,bg='red') %>%
bg(i = ~ green[,1]==1,j = 1, bg='green') %>%
bg(i = ~ green[,2]==1,j = 2, bg='green') %>%
bg(i = ~ green[,3]==1,j = 3, bg='green')
ft

Related

Convert multiple columns that are onehot encoded into a factor column

Some columns in the dataset are one hot encoded. I wish to convert them into one factor column.
I wish to write a code where I specify which columns to be combined and converted to factor column.
Below is an example with desired result.
library(tidyverse)
tbl <- tibble(
# one hot encoded
a1_blue = c(1, 0, 0),
a1_red = c(0, 1, 0),
a1_green = c(0, 0, 1),
# one hot encoded
a2_square = c(1, 0, 0),
a2_circle = c(0, 1, 0),
a2_dot = c(0, 0, 1),
a3_letters = factor(c("A", "B", "C"))
)
tbl_desired <- tibble(
a1_colors = factor(c("blue", "red", "green"),
levels = c("blue", "red", "green")),
a2_shapes = factor(c("square", "circle", "dot"),
levels = c("square", "circle", "dot")),
a3_letters = factor(c("A", "B", "C"))
)
This will give you the structure that you need. You can convert the columns to factors using mutate(across(everything(), as.factor)).
tbl %>%
pivot_longer(-a3_letters) %>%
filter(value != 0) %>%
separate(name, into = c("var", "val")) %>%
pivot_wider(-value, values_from = val, names_from = var)
#> # A tibble: 3 x 3
#> a3_letters a1 a2
#> <fct> <chr> <chr>
#> 1 A blue square
#> 2 B red circle
#> 3 C green dot

Using summary_row() values to calculate group percentage with {gt} package?

I am trying to calculate the percentage for an entire group while using the summary_rows() function from the {gt} package. The problem I am encountering is how to create a function that uses summary_rows() values to calculate a percentage rowwise.
Sample df:
df <- tibble(
division = c("Science", "Science", "Science"),
department = c("Biology", "Biology", "Biology"),
course_num = c('101', '201', "301"),
widthraws = c(1, 2, 3),
unsucessful = c(0, 0 , 0),
successfull = c(1, 3, 4),
total_enrolled = c(2, 5, 7),
percent_successful = c(.50, .60, .57)
)
Sample of gt table:
df %>%
group_by(division, department) %>%
gt() %>%
summary_rows(
groups = TRUE,
columns = 4:7,
missing_text = " ",
fns = list(
total = ~sum(.)),
)
What I would want is the total row of the percent_successful column to be .57. Open to other ideas that would help me achieve this too.
Compute the percentage for total outside and add a layer
library(gt)
library(dplyr)
total_success_perc <- with(df, round(sum(successfull)/sum(total_enrolled), 2))
df %>%
group_by(division, department) %>%
gt() %>%
summary_rows(
groups = TRUE,
columns = 4:7,
missing_text = " ",
fns = list(
total = ~sum(.)),
) %>%
summary_rows(groups = TRUE, columns = 8, missing_text = " ",
fns = list(total = ~ c(total_success_perc)))
-output
library(tidyverse)
library(gt)
df <- tibble(
division = c("Science", "Science", "Science"),
department = c("Biology", "Biology", "Biology"),
course_num = c('101', '201', "301"),
widthraws = c(1, 2, 3),
unsucessful = c(0, 0 , 0),
successfull = c(1, 3, 4),
total_enrolled = c(2, 5, 7),
percent_successful = c(.50, .60, .57)
)
df %>%
group_by(division, department) %>%
gt() %>%
summary_rows(
groups = TRUE,
columns = c(widthraws:percent_successful),
missing_text = " ",
fns = list(
total = ~sum(.),
max = ~max(.),
min = ~min(.),
medain = ~median(.))
)

How to determine the first row after which a variable continuously decreases?

Problem
Given the following dataset, I want to find the first row after which vn continuously decreases (highlighted with a red circle). I know which can be used, but I don't know how to ensure a continuous decrease. Please guide me.
Data
df <- structure(list(Time = c(152.216666666667, 152.233333333333, 152.25,
152.266666666667, 152.283333333333, 152.3, 152.316666666667,
152.333333333333, 152.35, 152.366666666667, 152.383333333333),
vn = c(22.8733019569441, 22.8485877814354, 22.8539833863057,
22.8293883815954, 22.8347839864658, 22.8101348273251, 22.8047392224548,
22.7798917511031, 22.7744961462328, 22.7496737884944, 22.7442781836241
), diff_vn = c(0.00539560487035118, -0.024714175508727, 0.00539560487035118,
-0.0245950047103243, 0.00539560487035118, -0.0246491591406404,
-0.00539560487035118, -0.0248474713516487, -0.00539560487035118,
-0.0248223577383548, -0.00539560487035118), sign_diff_vn = c(1,
-1, 1, -1, 1, -1, -1, -1, -1, -1, -1)), row.names = c(NA,
-11L), class = "data.frame")
Try with diff
with(df, which(c(diff(sign_diff_vn) == 0, FALSE))[1])
[1] 6
Or may also be
v1 <- cumsum(df$sign_diff_vn >=0)
match(max(v1), v1) + 1
[1] 6
Or another option is rleid
library(dplyr)
library(data.table)
df %>%
mutate(rn = row_number(), grp = rleid(sign_diff_vn)) %>%
filter(grp == max(grp) & sign_diff_vn < 0) %>%
pull(rn) %>%
first
[1] 6

Use multiple functions in mutate efficiently in R

I used mutate to add new variables in my dataframe. The code below is an example and in my real data I need to mutate lag1 to lag100 and lag01 to lag0100. I don't want to type these one by one.
Is there an efficient way to do this?
c <- mtcars %>% mutate_all( .funs = list(lag1 = ~lag(., 1),
lag2 = ~lag(., 2),
lag3 = ~lag(., 3),
lag4 = ~lag(., 4),
lag5 = ~lag(., 5),
lag6 = ~lag(., 6),
lag01 = ~rollmean(., 2, fill = NA, align = 'right'),
lag02 = ~rollmean(., 3, fill = NA, align = 'right'),
lag03 = ~rollmean(., 4, fill = NA, align = 'right'),
lag04 = ~rollmean(., 5, fill = NA, align = 'right'),
lag05 = ~rollmean(., 6, fill = NA, align = 'right'),
lag06 = ~rollmean(., 7, fill = NA, align = 'right')
))
Any help will be highly appreciated!
You can use across in new dplyr 1.0.0 to apply multiple functions to all columns. rollmean with align = 'right is same as using rollmeanr. You can assign relevant names using .names specification.
library(dplyr)
library(purrr)
library(zoo)
x <- 5
map_dfc(1:x, function(i) mtcars %>%
transmute(across(.fns = list(lag = ~lag(., i),
mean = ~rollmeanr(., i + 1, fill = NA)),
.names = paste0('{col}_{fn}_', i))))
In the example that follows I have subset mtcars keeping only 20 rows and 5 columns.
The code uses bind_cols/Map to apply the functions lag and rollmeanr (rollmean, align = "right").
library(dplyr)
library(zoo)
bind_cols(
mtcars[1:20, 1:5] %>%
mutate_all(.funs = Map(function(n) lag = ~lag(., n), 1:2)),
mtcars[1:20, 1:5] %>%
mutate_all(.funs = Map(function(n) lag0 = ~rollmeanr(., n, fill = NA), 1:2)) %>%
select(-(1:5))
)
Then it's a matter of assigning new column names.

How to Convert a Data Frame to a List for Clickstream Analysis

I am a novice R user and new to the forum.
I have a data frame that I need to convert so that each row is a character vector.
I want to remove the 0's from the data frame so that each row can have varying lengths. So in essence each row is a separate character vector in a list.
Where I am at is the following:
mydf<-matrix(sample(0:1,12*5, replace = T),ncol =4)
colnames(mydf)<-letters[1:ncol(mydf)]
swapcol <-which(mydf == 1, arr.ind = T)
mydf[swapcol]<-colnames(mydf)[swapcol[,2]]
mydf
The code produces a data frame in which the column labels are values. I need the following output:
Desired List Result
the format appears to be what I need in order to read in data to the package clickstream.
Thanks
Try this solution:
library(tidyverse)
s <- sample(x = 0:1, size = 15 * 4, replace = TRUE)
mx <- matrix(data = s, nrow = 15, ncol = 4, byrow = TRUE,
dimnames = list(c(paste("User", 1:15, sep = " ")), c("V1", "V2", "V3", "V4")))
df2 <- mx %>% as.data.frame() %>% rownames_to_column() %>% as_tibble()
%>% mutate(
V1 = ifelse(test = V1 == 1, yes = "a", no = NA),
V2 = ifelse(test = V2 == 1, yes = "b", no = NA),
V3 = ifelse(test = V3 == 1, yes = "c", no = NA),
V4 = ifelse(test = V4 == 1, yes = "d", no = NA))
mx2 <- t(apply(X = df2, MARGIN = 1, FUN = function(x{return(c(x[!is.na(x)],
x[is.na(x)]))}))
This returns a list with the formart you are asking for:
list(
apply(mydf, 1, function(a_row) {
my_paste <- function(...){
paste(..., sep = ", ")
}
a_row <- Reduce(my_paste, a_row)
a_row <- gsub("0(, )*", "", a_row)
a_row <- gsub(", $", "", a_row)
})
)
This returns a list of length 1. Replacing list with as.list, returns a list of length 15.

Resources