Mutate_if or mutate_at in dplyr with Dates - r

I have a data set that is over 100 columns, but for example lets suppose I have a data set that looks like
dput(tib)
structure(list(f_1 = c("A", "O", "AC", "AC", "AC", "O", "A", "AC", "O", "O"), f_2 = c("New", "New",
"New", "New", "Renewal", "Renewal", "New", "Renewal", "New",
"New"), first_dt = c("07-MAY-18", "25-JUL-16", "09-JUN-18", "22-APR-19",
"03-MAR-19", "10-OCT-16", "08-APR-19", "27-FEB-17", "02-MAY-16",
"26-MAY-15"), second_dt = c(NA, "27-JUN-16", NA, "18-APR-19",
"27-FEB-19", "06-OCT-16", "04-APR-19", "27-FEB-17", "25-APR-16",
NA), third_dt = c("04-APR-16", "21-JUL-16", "05-JUN-18", "18-APR-19",
"27-FEB-19", "06-OCT-16", "04-APR-19", "27-FEB-17", "25-APR-16",
"19-MAY-15"), fourth_dt = c("05-FEB-15", "25-JAN-16", "05-JUN-18",
"10-OCT-18", "08-JAN-19", "02-SEP-16", "24-OCT-18", "29-SEP-16",
"27-JAN-15", "14-MAY-15"), fifth_dt = structure(c(1459728000,
1469059200, 1528156800, 1555545600, 1551225600, 1475712000, 1554336000,
1488153600, 1461542400, 1431993600), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), sex = c("M", "M", "F", "F", "M", "F", "F",
"F", "F", "F")), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
Most of the date (ends_with(dt)) columns are strings, but I want to convert them into dates. I tried mutate_at but received the following:
tib %>% mutate_at(vars(ends_with("dt")), funs(parse_date_time(.))) %>% glimpse()
Error in mutate_impl(.data, dots) :
Evaluation error: argument "orders" is missing, with no default.
Any thoughts on what caused this error? Should I use a different mutate function?

As akrun noted, one of the columns is already in dttm format. Once that column is ignored the following code works for me:
tib %>%
select(-fifth_dt) %>%
mutate_at(vars(ends_with("dt")), parse_date_time, orders = "%d-%m-%y")

The funs is deprecated. In place, list can be used
library(dplyr)
tib %>%
mutate_at(3:6, list(~ parse_date_time(., "%d-%m-%y")))

Related

echarts4r error at plotting barchart's colors

As you can see in the annexed image it seems that there is a problem with the e_visual_map() function from echarts4r. After y = aa the colors stop changing. Is there a way to fix the bar's colors?
data
library(dplyr)
library(viridis)
library(echarts4r)
df <- structure(
list(
Market = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j",
"k", "l", "m", "n", "o",
"p", "q", "r", "s", "t",
"u", "v", "w", "x", "y",
"z", "aa", "bb", "cc",
"dd", "ee", "ff", "gg",
"hh", "ii", "jj", "kk",
"ll", "mm", "nn", "oo",
"pp", "qq", "rr", "ss",
"tt", "uu", "vv"),
Percent_Change = c(5.16901350940851, 3.91868856906443, 3.41802504497987, 3.16413673886071,
3.12684219659363, 2.89249688621206, 2.87284606849977, 2.84454222482254,
2.57058275282915, 2.43282934768581, 2.34818492965906, 2.30880001810456,
2.2918613260413, 2.24101659933832, 2.18752627680741, 2.10073586714032,
1.86045092759311, 1.85290305266011, 1.68128474330245, 1.54700002004653,
1.5303536712395, 1.52152376952798, 1.45917880532612, 1.4355692973819,
1.4257368870892, 1.36409659669896, 1.22315092771929, 1.04309133074753,
0.939025651002292, 0.844389462321624, 0.797407599768931, 0.681691408815433,
0.242176237950194, 0.237798995363376, 0.219182593926239, -0.0280421490193321,
-0.111286439923117, -0.124395342178022, -0.175922623382462, -0.188080671185304,
-0.870155958402443, -1.60611679230328, -1.66206110148814, -1.82732601610943,
-3.68051100830324, -4.43292411223474, -9.42691532047856, -10.5405968097707)),
row.names = c(NA, -48L), class = c("tbl_df", "tbl", "data.frame"))
code to plot
df %>% arrange(Percent_Change) %>%
# mutate(Market = fct_reorder(Market, -Percent_Change)) %>%
e_chart(Market) %>% e_bar(Percent_Change) %>%
e_visual_map(Percent_Change, scale = e_scale, color = viridis(100)) %>%
e_flip_coords() %>%
e_legend(show = F) %>%
e_color(background = c("#343E48"))
Results
Not 100% about your desired result. If you want to color your bars according to the value of PercentChange than this could be achieved by setting dimension=0 to color by the x axis and getting rid of scale:
library(dplyr)
library(viridis)
library(echarts4r)
df %>%
arrange(Percent_Change) %>%
e_chart(Market) %>%
e_bar(Percent_Change) %>%
e_visual_map(Percent_Change,
color = viridis(10),
dimension = 0
) %>%
e_flip_coords() %>%
e_legend(show = FALSE) %>%
e_color(background = c("#343E48"))

Sort table in descending order

Based on the data and code below, is it possible to sort the table in descending order?
Data (df):
structure(list(CITYNAME = c("a", "b", "c",
"d", "e", "f", "g",
"h", "i", "j", "k",
"l", "m", "n", "p", "q",
"r", "s", "t", "u",
"w", "x", "y", "z"), AvgPpt = c(127.785,
131.456, 128.357, 114.792, 131.383, 129.696, 137.008, 136.129,
132.881, 131.676, 129.103, 132.475, 122.263, 132.393, 134.552,
120.322, 125.987, 132.337, 131.18, 122.705, 123.285, 128.853,
134.494, 114.154)), row.names = c(NA, -24L), class = c("tbl_df",
"tbl", "data.frame"))
Code:
library(ggpubr)
tbl_ppt = df %>%
ggtexttable(cols = c("Municipilality", "Average Precipitaiton (mm)"),
rows = NULL,
theme = ttheme("mBlue"))
tbl_ppt
You could arrange your data in your desired order before passing it to ggtexttable:
library(ggpubr)
library(dplyr)
df %>%
arrange(desc(AvgPpt)) %>%
ggtexttable(cols = c("Municipilality", "Average Precipitaiton (mm)"),
rows = NULL,
theme = ttheme("mBlue"))

how to filter the value base one the upper row value

I have a df:
df<-structure(list(Name = c("test", "a", "nb", "c", "r", "f", NA,
"d", "ee", "test", "value", "test", "b")), row.names = c(NA,
-13L), class = c("tbl_df", "tbl", "data.frame"))
How can I only keep the row which upper row=="test" and row value !="value"?
The new df1 will looks like this (any of either case is Ok):
library(dplyr)
df %>%
filter(lag(Name == "test"), Name != "value")
# A tibble: 2 x 1
Name
<chr>
1 a
2 b

calculate duration in a complex table

I have a table as shown.
df <- data.frame("name" = c("jack", "william", "david", "john"),
"01-Jan-19" = c(NA,"A",NA,"A"),
"01-Feb-19" = c("A","A",NA,"A"),
"01-Mar-19" = c("A","A","A","A"),
"01-Apr-19" = c("A","A","A","A"),
"01-May-19" = c(NA,"A","A","A"),
"01-Jun-19" = c("A","SA","A","SA"),
"01-Jul-19" = c("A","SA","A","SA"),
"01-Aug-19" = c(NA,"SA","A","SA"),
"01-Sep-19" = c(NA,"SA","A","SA"),
"01-Oct-19" = c("SA","SA","A","SA"),
"01-Nov-19" = c("SA","SA",NA,"SA"),
"01-Dec-19" = c("SA","SA","SA",NA),
"01-Jan-20" = c("SA","M","A","M"),
"01-Feb-20" = c("M","M","M","M"))
Over a time period, each person journeys through of position progression (3 position categories from A to SA to M). My objective is:
Calculate the average duration of A (assistant) position and SA (senior assistant) position. i.e. the duration between the date the first of one category appears, and the date the last of this category appears, regardless of missing data in between.
I transposed the data using R “gather” function
df1 <- gather (df, "date", "position", 2:15)
then I am not sure how to best proceed. What might be the best way to further approach this?
We can get the data in longer format and calculate the number of days between first date when the person was "SA" and the first date when he was "A".
library(dplyr)
df %>%
tidyr::pivot_longer(cols = -name, names_to = 'person', values_drop_na = TRUE) %>%
mutate(person = dmy(person)) %>%
group_by(name) %>%
summarise(avg_duration = person[match('SA', value)] - person[match('A', value)])
# name duration
# <fct> <drtn>
#1 david 275 days
#2 jack 242 days
#3 john 151 days
#4 william 151 days
If needed the mean value we can pull and then calculate mean by adding to the above chain
%>% pull(duration) %>% mean
#Time difference of 204.75 days
data
df <- structure(list(name = c("jack", "william", "david", "john"),
`01-Jan-19` = c(NA, "A", NA, "A"), `01-Feb-19` = c("A", "A",
NA, "A"), `01-Mar-19` = c("A", "A", "A", "A"), `01-Apr-19` = c("A",
"A", "A", "A"), `01-May-19` = c(NA, "A", "A", "A"), `01-Jun-19` = c("A",
"SA", "A", "SA"), `01-Jul-19` = c("A", "SA", "A", "SA"),
`01-Aug-19` = c(NA, "SA", "A", "SA"), `01-Sep-19` = c(NA,
"SA", "A", "SA"), `01-Oct-19` = c("SA", "SA", "A", "SA"),
`01-Nov-19` = c("SA", "SA", NA, "SA"), `01-Dec-19` = c("SA",
"SA", "SA", NA), `01-Jan-20` = c("SA", "M", "A", "M"), `01-Feb-20` = c("M",
"M", "M", "M")), row.names = c(NA, -4L), class = "data.frame")

Applying colors dynamically to table cells in R

I have a data frame in R that looks like this:
It is 84 rows high and 365 rows wide. The dput is below. What I'm trying to figure out is how to get each cell to change color based on the symbol that's in the cell (also, I don't want to see the column name, row name, or gridlines). I've tried kable, DT, base R, heatmap, and huxtable. The closest I've gotten is with DT:
datatable(cover, rownames=FALSE, options = list(dom = 't')) %>% formatStyle(names(cover), backgroundColor=styleEqual(hex$Symbol, hex$Hex))
Here's the result from that code:
I haven't been able to figure out how to also remove the column names (so the columns are only as wide as the symbol) or the gridlines. I'm sure there's a way to do this but I've been spinning my wheels for a couple days so I thought I'd ask the experts. I'm still fairly new with R (I'm a data analyst, not a professional coder). My ultimate goal is for it to look something like this (which was created with Google Sheets conditional formatting):
dput of the head of the first 10 columns of the data table:
structure(list(`2019-01-01` = c("f", "f", "f", "<U+263D>", "<U+263D>", "<U+263D>"), `2019-01-02` = c("<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>"), `2019-01-03` = c("t", "t", "t", "d", "d", "d"), `2019-01-04` = c("d", "d", "d", "<U+2699>", "<U+2699>", "<U+2699>"), `2019-01-05` = c("&", "&", "&", "&", "&", "&"), `2019-01-06` = c("<U+2699>", "<U+2699>", "<U+2699>", "&", "&", "&"), `2019-01-07` = c("^", "^", "^", "^", "^", "^"), `2019-01-08` = c("&", "&", "&", "<U+270E>", "<U+270E>", "<U+270E>"), `2019-01-09` = c("<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>"), `2019-01-10` = c("s", "s", "s", "s", "s", "s")), row.names = c(NA, 6L), class = "data.frame")
dput of the Symbol to Hex lookup table:
structure(list(Symbol = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "a","i", "k", "b", "l", "r", "c", "x", "#", "%", "^", "e", "m", "s", "#", "<U+270E>", "&", "<U+2699>", "d", "t", "y", "n", "<U+25C0>", "<U+263D>", "f", "<U+2689>", "<U+2726>", "<U+0394>", "¥", "p", "u", "<U+2326>", "<U+26AF>", "z", "<U+2714>", "o", "+", "v", "g", "<U+262F>", "<U+2724>", "<U+272B>", "<U+2766>", "j", "q", "h", "<U+2665>", "w"), Hex = c("#572433", "#72375D", "#633666", "#803A6B", "#6C3A6E", "#776B98", "#ADA7C7", "#5C7294", "#7B8EAB", "#707DA2", "#555B7B", "#464563", "#0E365C", "#11416D", "#13477D", "#2C597C", "#396987", "#4781A5", "#35668B", "#5A8FB8", "#3B768F", "#4F93A7", "#5BA3B3", "#90C3CC", "#C4DECC", "#7BAC94", "#5B9071", "#396F52", "#044D33", "#313919", "#424D21", "#4C5826", "#72843C", "#94AB4F", "#AEBF79", "#CCD9B1", "#D8E498", "#FFFB8B", "#FDF9CD", "#FFF1AF", "#FDD755", "#FFC840", "#FFBF57", "#FFA32B", "#FF8B00", "#F67F00", "#F27842", "#FF836F", "#E96A67", "#FF7992", "#E74967", "#BA4A4A", "#B33B4B", "#970B23", "#87071F", "#A7132B", "#913546")), row.names = c(NA, -57L), spec = structure(list(cols = list(Index = structure(list(), class = c("collector_double", "collector")), Color = structure(list(), class = c("collector_double", "collector")), `Color name` = structure(list(), class = c("collector_character", "collector")), Symbol = structure(list(), class = c("collector_character", "collector")), Hex = structure(list(), class = c("collector_character", "collector"))), default = structure(list(), class = c("collector_guess", "collector")), skip = 1), class = "col_spec"), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
Here's the code I'm using per the comments below. It worked last week but now it isn't. I've determined through going line by line that value2 isn't rendering properly, but I've checked it against the code provided and it looks exactly the same. I'm calling the dataset "cover" and the color table "hex".
hexcol <- hex$Hex
names(hexcol) <- hex$Symbol
bcol <- function(x){hexcol[as.character(x)]}
x <- cover %>%
dplyr::mutate(row.id = 1:n()) %>%
gather(key = "key", value = "value", -row.id) %>%
mutate(value2 = " ", value2 = cell_spec(value2, background = mapply(bcol, value), color = mapply(bcol, value))) %>%
select(-value) %>%
spread(key = key, value = value2) %>%
select(-row.id) %>%
kable(format = "html", escape = F) %>%
kable_styling(full_width = F)
x2 <- gsub("<thead>.*</thead>", "", x)
x3.splits <- unlist(str_split(x2, pattern = "\n"))
x3.cols <- str_extract(x3.splits, pattern = "#[0-9a-fA-F]{6}")
x3.vals <- str_extract(x3.splits, pattern = "(a-Z0-9)+")
# cycle through each row of HTML code to find and replace any value with HTML/CSS code to color the background of that specific cell
for (i in 1:length(x3.splits)){
if (!is.na(x3.cols[i])){
x2 <- gsub(pattern = x3.splits[i],
replacement = paste0('<td style="text-align:center; background-color: ', x3.cols[i], '; border-top: 1px solid ',
x3.cols[i], ';"><span style="margin-left:5px;margin-right:5px"> </span></td>'), x = x2)
}
}
Here's the session info:
Have you tried using the kableExtra package? I was able to do the following which I think does what you're hoping to do using this package as well as some HTML syntax/regular expression substitutions. Let me know if this doesn't seem to work for you though!
library(kableExtra)
library(stringr)
library(dplyr)
library(tidyr)
library(magick)
library(webshot)
dat <- structure(list(`2019-01-01` = c("f", "f", "f", "<U+263D>", "<U+263D>", "<U+263D>"), `2019-01-02` = c("<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>"), `2019-01-03` = c("t", "t", "t", "d", "d", "d"), `2019-01-04` = c("d", "d", "d", "<U+2699>", "<U+2699>", "<U+2699>"), `2019-01-05` = c("&", "&", "&", "&", "&", "&"), `2019-01-06` = c("<U+2699>", "<U+2699>", "<U+2699>", "&", "&", "&"), `2019-01-07` = c("^", "^", "^", "^", "^", "^"), `2019-01-08` = c("&", "&", "&", "<U+270E>", "<U+270E>", "<U+270E>"), `2019-01-09` = c("<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>"), `2019-01-10` = c("s", "s", "s", "s", "s", "s")), row.names = c(NA, 6L), class = "data.frame")
col.tab <- structure(list(Symbol = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "a","i", "k", "b", "l", "r", "c", "x", "#", "%", "^", "e", "m", "s", "#", "<U+270E>", "&", "<U+2699>", "d", "t", "y", "n", "<U+25C0>", "<U+263D>", "f", "<U+2689>", "<U+2726>", "<U+0394>", "¥", "p", "u", "<U+2326>", "<U+26AF>", "z", "<U+2714>", "o", "+", "v", "g", "<U+262F>", "<U+2724>", "<U+272B>", "<U+2766>", "j", "q", "h", "<U+2665>", "w"), Hex = c("#572433", "#72375D", "#633666", "#803A6B", "#6C3A6E", "#776B98", "#ADA7C7", "#5C7294", "#7B8EAB", "#707DA2", "#555B7B", "#464563", "#0E365C", "#11416D", "#13477D", "#2C597C", "#396987", "#4781A5", "#35668B", "#5A8FB8", "#3B768F", "#4F93A7", "#5BA3B3", "#90C3CC", "#C4DECC", "#7BAC94", "#5B9071", "#396F52", "#044D33", "#313919", "#424D21", "#4C5826", "#72843C", "#94AB4F", "#AEBF79", "#CCD9B1", "#D8E498", "#FFFB8B", "#FDF9CD", "#FFF1AF", "#FDD755", "#FFC840", "#FFBF57", "#FFA32B", "#FF8B00", "#F67F00", "#F27842", "#FF836F", "#E96A67", "#FF7992", "#E74967", "#BA4A4A", "#B33B4B", "#970B23", "#87071F", "#A7132B", "#913546")), row.names = c(NA, -57L), spec = structure(list(cols = list(Index = structure(list(), class = c("collector_double", "collector")), Color = structure(list(), class = c("collector_double", "collector")), `Color name` = structure(list(), class = c("collector_character", "collector")), Symbol = structure(list(), class = c("collector_character", "collector")), Hex = structure(list(), class = c("collector_character", "collector"))), default = structure(list(), class = c("collector_guess", "collector")), skip = 1), class = "col_spec"), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
color_mapper <- col.tab$Hex
names(color_mapper) <- col.tab$Symbol
c_func <- function(x){
color_mapper[as.character(x)]
}
x <- dat %>%
mutate(row.id = 1:n()) %>%
gather(key = "key", value = "value", -row.id) %>%
mutate(value2 = " ",
value2 = cell_spec(value2, background = mapply(c_func, value), color = mapply(c_func, value))
) %>%
select(-value) %>%
spread(key = key, value = value2) %>%
select(-row.id) %>%
kable(format = "html", escape = F) %>%
kable_styling(full_width = F)
x2 <- gsub("<thead>.*</thead>", "", x)
x3.splits <- unlist(str_split(x2, pattern = "\n"))
x3.cols <- str_extract(x3.splits, pattern = "#[0-9a-fA-F]{6}")
x3.vals <- str_extract(x3.splits, pattern = "(a-Z0-9)+")
## cycle through each row of HTML code to find and replace any value with
## HTML/CSS code to color the background of that specific cell
for (i in 1:length(x3.splits)){
if (!is.na(x3.cols[i])){
x2 <- gsub(
pattern = x3.splits[i],
replacement = paste0('<td style="text-align:center; background-color: ', x3.cols[i], '; border-top: 1px solid ', x3.cols[i], ';"><span style="margin-left:5px;margin-right:5px"> </span></td>'),
x = x2
)
}
}
x2 %>%
save_kable("my_image.png")
With the PNG output:
Here's a quick example using huxtable (I'm the package author):
tmp <- structure(list(`2019-01-01` = c("f", "f", "f", "<U+263D>", "<U+263D>", "<U+263D>"), `2019-01-02` = c("<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>", "<U+270E>"), `2019-01-03` = c("t", "t", "t", "d", "d", "d"), `2019-01-04` = c("d", "d", "d", "<U+2699>", "<U+2699>", "<U+2699>"), `2019-01-05` = c("&", "&", "&", "&", "&", "&"), `2019-01-06` = c("<U+2699>", "<U+2699>", "<U+2699>", "&", "&", "&"), `2019-01-07` = c("^", "^", "^", "^", "^", "^"), `2019-01-08` = c("&", "&", "&", "<U+270E>", "<U+270E>", "<U+270E>"), `2019-01-09` = c("<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>", "<U+2699>"), `2019-01-10` = c("s", "s", "s", "s", "s", "s")), row.names = c(NA, 6L), class = "data.frame")
ht <- as_hux(tmp)
ht <- map_background_color(ht, by_values("<U+270E>" = "red", "<U+2699>" = "green"))
I haven't used your exact symbol table. If it is large, you might want to do something like do.call(by_values, my_symbols) where my_symbols would be something like list("1" = "#572433", ...).

Resources