How to highlight specific cells in a dataframe in R markdown HTML - r

I have a dataframe as shown below (the data is listed at the end of question):
As can be seen, I want to highlight few cells in the final R-Markdown report for the presentation. My current code is only able to show the table:
cluster_summary%>% kbl(caption = '<b>Clustering results</b>') %>%
kable_classic(full_width = F, html_font = "Cambria")
How can I highlight those cells??
DATA
structure(list(cluster = structure(1:7, .Label = c("1", "2",
"3", "4", "5", "6", "7"), class = "factor"), n = c(512L, 1048L,
662L, 1968L, 576L, 1738L, 1188L), ave_price_per_sqft_adjusted = c(5.16299733157459,
3.32371811588978, 3.96858531607868, 3.32922072520205, 3.42896017156734,
4.16418851265888, 4.08627345683475), ave_age = c(12.0393129995492,
12.6062546474121, 9.32033699503113, 25.5092197801581, 19.1151284494788,
12.2180810585854, 12.0248580167839), ave_DOM = c(47.706537201211,
42.0442099665614, 49.9960193152193, 34.2190863941281, 44.5416652882415,
37.1891219996921, 33.3872422432855), ave_activity_rate = c(1.20118970114087,
1.14598100690658, 1.47458159497434, 1.58286371628597, 1.31320615630511,
1.32586511589676, 2.90376115653893), topic_1 = c(0.0873152283441761,
0.0402887288191615, 0.0671677410154403, 0.0658325530416239, 0.0486383977595131,
0.678477957074527, 0.124182893709105), topic_2 = c(0.0432613598954236,
0.0696506982126008, 0.0443719103703934, 0.714018587278257, 0.106997881943579,
0.0858546713546651, 0.123859196751554), topic_3 = c(0.734165987470995,
0.0151590853651532, 0.0274370600921245, 0.0267196491438714, 0.0186524676995082,
0.0422361263557554, 0.0476136227502999), topic_4 = c(0.0268470362758521,
0.0222984614059603, 0.035088529448869, 0.0682401425738628, 0.733361959255753,
0.0345517467883103, 0.0701685629335576), topic_5 = c(0.0236832387869678,
0.0195300786802868, 0.681931511958987, 0.01084326403663, 0.00780696913319592,
0.0271831270677069, 0.0256968988305932), topic_6 = c(0.00241582961309524,
0.00512777524684262, 0.043572436212494, 0.00284832693741011,
0.00466231684981685, 0.00447461706422522, 0.00578628373290925
), topic_7 = c(0.0293156710834479, 0.0165055511133993, 0.0243384949312766,
0.0479052429538088, 0.0240980295134035, 0.035084908174513, 0.531063470492252
), topic_8 = c(0.0519347465414063, 0.808840100571256, 0.0730651082702796,
0.0592810817199474, 0.0538401481417729, 0.0805723035106479, 0.0664648058614109
)), class = "data.frame", row.names = c(NA, -7L))

You could use formattable, see these examples.
color_tile formatter combined with area option allows to change color of a specific row & col.
library(formattable)
highlight <- color_tile("yellow","yellow")
formattable(data, list(
area(col = 3, row = 1 ) ~ highlight,
area(col = 4, row = 4 ) ~ highlight
))

Related

Group by percentage not working as a percent of the total group

I've scoured the web and stack, but haven't had luck trying multiple things. I have a dataset that I'm trying to get a grouping calculation, but also a percentage of that group. Here's an example of what I am getting:
structure(list(Advanced = c("Task5", "Advanced", "0", "2", "1",
" 8.500000", "0.0", "3.191667", "25.00000", "4.500000", "0.6428571",
"4.50", "0.6428571", "4.500000", "0.6428571", "5.000000", "0.7142857",
"6.500000", "0.9285714", "2", "1.00", NA, NA), Advanced = c("Task5",
"Advanced", "1", "4", "1", "26.500000", "0.5", "9.037500", "63.25000",
"6.000000", "0.8571429", "4.75", "0.6785714", "5.250000", "0.7500000",
"6.000000", "0.8571429", "5.250000", "0.7500000", "1", "0.25",
"4.333333", "0.6190476"), Novice = c("Task5", "Novice", "0",
"2", "1", " 6.000000", "0.0", "4.850000", "49.00000", "6.500000",
"0.9285714", "6.00", "0.8571429", "6.000000", "0.8571429", "6.500000",
"0.9285714", "6.500000", "0.9285714", "2", "1.00", NA, NA), Novice = c("Task5",
"Novice", "1", "6", "1", " 7.666667", "1.0", "8.672222", "69.83333",
"5.333333", "0.7619048", "4.00", "0.5714286", "3.833333", "0.5476190",
"4.333333", "0.6190476", "4.166667", "0.5952381", "0", "0.00",
"4.000000", "0.5714286")), class = "data.frame", row.names = c("Task",
"segment", "t5_pass_fail", "N", "SuccessRate", "UniquePages",
"Timedout_percent", "TimeonTask", "Clicks", "Useful_raw", "Useful_percent",
"UserFriendly_raw", "UserFriendly_percent", "Learned_raw", "Learned_percent",
"Satisfied_raw", "Satisfied_percent", "Confident_raw", "Confident_percent",
"Experienced_Difficulty_raw", "Experienced_Difficulty_percent",
"difficulty_level_raw", "difficulty_level_percent"))
The process to get the above is from the following:
Task5_Strict <- cleanuxq4 %>%
dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
group_by (segment,t5_pass_fail)%>%
summarize(
Task = "Task5",
N =n(),
SuccessRate = ((t5_pass_fail = 1)/sum(t5_pass_fail)),
UniquePages = (mean(t5_unique_pageviews)),
Timedout_percent = sum(t5_effectiveness)/N,
TimeonTask = mean(t5_time_task)/60,
Clicks = mean(t5_clicks),
Useful_raw = mean(t5_useful),
Useful_percent = Useful_raw/7,
UserFriendly_raw = mean(t5_user_friendly),
UserFriendly_percent = UserFriendly_raw/7,
Learned_raw = mean(t5_learned),
Learned_percent = Learned_raw/7,
Satisfied_raw = mean(t5_satisfied),
Satisfied_percent = Satisfied_raw/7,
Confident_raw = mean(t5_confident),
Confident_percent = Confident_raw/7,
Experienced_Difficulty_raw = sum(t5_exp_difficulty),
Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
difficulty_level_percent = difficulty_level_raw/7
)
#Move columsn and then use the second row as the header for the pivot columns--------
Task5_Strict <- Task5_Strict %>%
select("segment",everything())%>%
select("Task",everything())
Task5_Strict_Pivot <- as.data.frame(t(Task5_Strict))
zTask5_Strict_Pivot <-Task5_Strict_Pivot[-2,]
colnames(Task5_Strict_Pivot)<-Task5_Strict_Pivot[2,]
In the end, my SuccessRate should be a percentage of the number of Advance who passed
(t1_pass_fail = 1) divided by the entire population of Advance, i.e. both
`(t1_pass_fail = 0, = 1).
I'm fairly noob so any help is appreciated.
To calculate the SuccessRate you can use mean(t5_pass_fail == 1) * 100.
library(dplyr)
Task5_Strict <- cleanuxq4 %>%
dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
group_by(segment,t5_pass_fail)%>%
summarize(
Task = "Task5",
N =n(),
SuccessRate = mean(t5_pass_fail == 1) * 100,
UniquePages = (mean(t5_unique_pageviews)),
Timedout_percent = sum(t5_effectiveness)/N,
TimeonTask = mean(t5_time_task)/60,
Clicks = mean(t5_clicks),
Useful_raw = mean(t5_useful),
Useful_percent = Useful_raw/7,
UserFriendly_raw = mean(t5_user_friendly),
UserFriendly_percent = UserFriendly_raw/7,
Learned_raw = mean(t5_learned),
Learned_percent = Learned_raw/7,
Satisfied_raw = mean(t5_satisfied),
Satisfied_percent = Satisfied_raw/7,
Confident_raw = mean(t5_confident),
Confident_percent = Confident_raw/7,
Experienced_Difficulty_raw = sum(t5_exp_difficulty),
Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
difficulty_level_percent = difficulty_level_raw/7
)
Thanks to both Stefan and Ronak, but I was attempting to do something illogical. I needed to move a sum of the segment outside of the group by and then use that to obtain a proper % within the group by.
The solution is at the very bottom:
Task5_Strict <- cleanuxq4 %>%
dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
group_by (segment,t5_pass_fail)%>%
summarize(
Task = "Task5",
N =n(),
UniquePages = (mean(t5_unique_pageviews)),
Timedout_percent = sum(t5_effectiveness)/N,
TimeonTask = mean(t5_time_task)/60,
Clicks = mean(t5_clicks),
Useful_raw = mean(t5_useful),
Useful_percent = Useful_raw/7,
UserFriendly_raw = mean(t5_user_friendly),
UserFriendly_percent = UserFriendly_raw/7,
Learned_raw = mean(t5_learned),
Learned_percent = Learned_raw/7,
Satisfied_raw = mean(t5_satisfied),
Satisfied_percent = Satisfied_raw/7,
Confident_raw = mean(t5_confident),
Confident_percent = Confident_raw/7,
Experienced_Difficulty_raw = sum(t5_exp_difficulty),
Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
difficulty_level_percent = difficulty_level_raw/7
)
Task5_Strict <- mutate(Task5_Strict, zcount = sum(N))
Task5_Strict <- mutate(Task5_Strict, SuccessRate =(N)/(zcount))

Lexis function not found in R

I am using this code from the R help guide in the Epi
package:
# A small bogus cohort
xcoh <- structure( list( id = c("A", "B", "C"),
birth = c("14/07/1952", "01/04/1954",
"10/06/1987"),
entry = c("04/08/1965", "08/09/1972",
"23/12/1991"),
exit = c("27/06/1997", "23/05/1995",
"24/07/1998"),
fail = c(1, 0, 1) ),
.Names = c("id", "birth", "entry", "exit",
"fail"),
row.names = c("1", "2", "3"),
class = "data.frame" )
# Define a Lexis object with timescales calendar time and
age
Lcoh <- Lexis( entry = list( per=entry ),
exit = list( per=exit,
age=exit-birth ),
exit.status = fail,
data = xcoh )
But I get this error:
Error in Lexis(entry = list(per = entry), exit = list(per = exit, age = exit - :
could not find function "Lexis"
Any thoughts?
Epi package first needs to be installed in the environment using:
install.packages("Epi")
And then the library for Epi needs to be loaded.
library(Epi)
Hence your code being modified as follows:
install.packages("Epi")
library(Epi)
xcoh <- structure( list( id = c("A", "B", "C"),
birth = c("14/07/1952", "01/04/1954",
"10/06/1987"),
entry = c("04/08/1965", "08/09/1972",
"23/12/1991"),
exit = c("27/06/1997", "23/05/1995",
"24/07/1998"),
fail = c(1, 0, 1) ),
.Names = c("id", "birth", "entry", "exit",
"fail"),
row.names = c("1", "2", "3"),
class = "data.frame" )
# Define a Lexis object with timescales calendar time and
Lcoh <- Lexis( entry = list( per=entry ),
exit = list( per=exit,
age=exit-birth ),
exit.status = fail,
data = xcoh )
Note: I have removed the line that says age. Assuming it is not relevant to the question posted here.

How to create a row by dividing First row by third row

I have a dataset which has values in first row & total in third row. I want to create a fourth row which is percentage of first by total which can be done by dividing first row with fourth row.
below is structure of dataframe
ds = structure(list(t1 = structure(c("1", "2", "Total"), label = "currently smoke any tobacco product", labels = c(no = 0,
yes = 1), class = "haven_labelled"), c1Female = c(679357.516868591,
8394232.81394577, 9073590.33081436), c1Male = c(2254232.8617363,
5802560.20343018, 8056793.06516647), se.c1Female = c(63743.4459540534,
421866.610586848, 485610.056540901), se.c1Male = c(185544.754820322,
386138.725133411, 571683.479953732), Total_1 = c(`1` = 2933590.37860489,
`2` = 14196793.0173759, `3` = 17130383.3959808), per = c(`1` = 0.171250713471665,
`2` = 0.828749286528335, `3` = 1)), class = "data.frame", row.names = c(NA,
-3L))
My try & what is wrong with this
ds %>% mutate(percentage = .[1,]/.[3,])
OUTPUT SHOULD BE : Below is the dput of Output Dataframe that I want
structure(list(t1 = structure(c(1L, 2L, 4L, 3L), .Label = c("1",
"2", "Percentage", "Total"), class = "factor"), c1Female = c(679357.517,
8394232.814, 9073590.331, 0.074871963), c1Male = c(2254232.86,
5802560.2, 8056793.07, 0.279792821), se.c1Female = c(63743.446,
421866.611, 485610.057, 0.131264674), se.c1Male = c(185544.755,
386138.725, 571683.48, 0.324558539), Total_1 = c(2933590.38,
14196793.02, 17130383.4, 0.171250714), per = c(0.171250713, 0.828749287,
1, 0.171250713)), class = "data.frame", row.names = c(NA, -4L
))
Do share the tidyverse way to do this. Also, do tell what is wrong with this approach below line code
ds %>% mutate(percentage = .[1,]/.[3,])
We can use summarise_at to divide multiple column values to return a single row and then bind with the original dataset
library(dplyr)
ds %>%
summarise_at(-1, ~ .[1]/.[3]) %>%
mutate(t1 = 'Percentage') %>%
bind_rows(ds, .)
# t1 c1Female c1Male se.c1Female se.c1Male Total_1 per
#1 1 6.793575e+05 2.254233e+06 6.374345e+04 1.855448e+05 2.933590e+06 0.1712507
#2 2 8.394233e+06 5.802560e+06 4.218666e+05 3.861387e+05 1.419679e+07 0.8287493
#3 Total 9.073590e+06 8.056793e+06 4.856101e+05 5.716835e+05 1.713038e+07 1.0000000
#4 Percentage 7.487196e-02 2.797928e-01 1.312647e-01 3.245585e-01 1.712507e-01 0.1712507
Or another option is add_row
ds %>%
add_row(t1 = 'Percentage') %>%
mutate_at(-1, ~ replace_na(., .[1]/.[3]))
Or do this within the add_row step itself
ds %>%
add_row(t1 = 'Percentage', !!!as.list(.[-1][1,]/.[-1][3,]))
# t1 c1Female c1Male se.c1Female se.c1Male Total_1 per
#1 1 6.793575e+05 2.254233e+06 6.374345e+04 1.855448e+05 2.933590e+06 0.1712507
#2 2 8.394233e+06 5.802560e+06 4.218666e+05 3.861387e+05 1.419679e+07 0.8287493
#3 Total 9.073590e+06 8.056793e+06 4.856101e+05 5.716835e+05 1.713038e+07 1.0000000
#4 Percentage 7.487196e-02 2.797928e-01 1.312647e-01 3.245585e-01 1.712507e-01 0.1712507

Reading multiple excel files into R using the map function

I know there are similar questions to this but I haven't came across ones using the map function from the purrr package. I am having a difficult time trying to read in some excel files(.xlsx) using purrr::map(). I would like each one to be it's own data frame. I tried the approach in this similar question: How can I reading multiple (excel) files into R?.
However, I keep getting this error:
Error: path does not exist: "tab3_DOfinal_HUClevel_assessment.xlsx"
I know for sure I have the right path. Not sure why I am getting this error. I have about 9 excel spreadsheets that I want to read in.
Code I tried:
# load necessary package
library(purrr)
file.list <- list.files(path="2016_Data_Tables",pattern='*.xlsx')
file.list <- setNames(file.list, file.list)
# store all .xlsx files as individual data frames inside of one list
df <- map(file.list, read_xlsx)
The file name pattern goes as follows:
tab3_DOfinal_HUClevel_assessment.xlsx
The only thing that changes is the DOfinal part.
Some sample data:
structure(list(ID = 1, WMA = 15, Number = "02040302020030-01",
HUC14 = "HUC02040302020030", Name = "Absecon Creek (AC Reserviors) (gage to SB)",
Region = "Atlantic Coast", NumofStations = "2", ListofStations = "01410455, R32",
ListofAssessment = "2, 2", HUCTier = "2", swqs = "PL, SE1",
TotalNumSamples5yrs = "NA", flgusgsprelim = "NA, 0", auassess = 2,
auassesstrout = -999, finalauassess = 2, finalauassesstrout = -999,
Changefrom2014 = "No Change-2", Changetroutfrom2014 = "No Change",
listHUC14assess5 = "NA", listHUC14assess3 = "NA", listHUC14assess2 = "01410455, R32",
His2014 = "Attaining", His2014trout = "-999", Notes = NA_character_,
OldStations2014 = "01410455", OldStationsAssess2014 = "2",
Error = NA_character_), .Names = c("ID", "WMA", "Number",
"HUC14", "Name", "Region", "NumofStations", "ListofStations",
"ListofAssessment", "HUCTier", "swqs", "TotalNumSamples5yrs",
"flgusgsprelim", "auassess", "auassesstrout", "finalauassess",
"finalauassesstrout", "Changefrom2014", "Changetroutfrom2014",
"listHUC14assess5", "listHUC14assess3", "listHUC14assess2", "His2014",
"His2014trout", "Notes", "OldStations2014", "OldStationsAssess2014",
"Error"), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"))
structure(list(WMA = 15, Number = "02040302020030-01", HUC14 = "HUC02040302020030",
Name = "Absecon Creek (AC Reserviors) (gage to SB)", Region = "Atlantic Coast",
NumofStations = "1", ListofStations = "01410455", ListofAssessment = "2",
MaxStaAssessment = "2", MinStaAssessment = "2", TotalNumSamples5yrs = "NA",
auassess = "2", ChangeFrom2014 = "No Change-2", liststaassess2 = "01410455",
liststaassess3 = "NA", liststaassess5 = "NA", Assessment2014 = "Attaining",
Comments = NA_character_), .Names = c("WMA", "Number", "HUC14",
"Name", "Region", "NumofStations", "ListofStations", "ListofAssessment",
"MaxStaAssessment", "MinStaAssessment", "TotalNumSamples5yrs",
"auassess", "ChangeFrom2014", "liststaassess2", "liststaassess3",
"liststaassess5", "Assessment2014", "Comments"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(WMA = 15, Number = "02040302020030-01", HUC14 = "HUC02040302020030",
Name = "Absecon Creek (AC Reserviors) (gage to SB)", Region = "Atlantic Coast",
NumofStations = "1", ListofStations = "R32", ListofAssessment = "3",
MaxStaAssessment = "3", MinStaAssessment = "3", TotalNumSamples5yrs = "9",
auassess = "3", ChangeFrom2014 = "No Change-3", liststaassess2 = "NA",
liststaassess3 = "R32", liststaassess5 = "NA", Assessment2014 = "N/A",
Comments = NA_character_), .Names = c("WMA", "Number", "HUC14",
"Name", "Region", "NumofStations", "ListofStations", "ListofAssessment",
"MaxStaAssessment", "MinStaAssessment", "TotalNumSamples5yrs",
"auassess", "ChangeFrom2014", "liststaassess2", "liststaassess3",
"liststaassess5", "Assessment2014", "Comments"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
Aurèle makes a really good point regarding your file paths.
I would like each one to be it's own data frame
If this is the goal, then a combination of purrr::iwalk and assign could easily get you there. The process goes as follows:
Get a list of all of the .xlsx files located in 2016_Data_Tables/.
Then use purrr::set_names to name each element in this list with its filename sans the .xlsx extension.
Then use purrr::iwalk to apply the assign function to each element in the list. Specifically, use read_xlsx to read each .xlsx file from disk into a data frame and then assign that data frame as a named object to R's global environment
list.files('data/mpg', pattern = '.xlsx', full.names = T) %>%
purrr::set_names(stringr::str_remove(basename(.), '.xlsx$')) %>%
purrr::iwalk(function(x, i) assign(i, readxl::read_xlsx(x), .GlobalEnv))

Chaning labels in choropleth hcmap

I have the following dataset:
structure(list(code = structure(1:6, .Label = c("?elino", "?tip",
"?uto Orizari", "Aerodrom", "Aracinovo", "Berovo", "Bitola",
"Bogdanci", "Bogovinje", "Bosilovo", "Brod", "Brvenica", "Butel",
"Ca?ka", "Cair", "Ce?inovo-Oble?evo", "Centar", "Centar ?upa",
"Cucer Sandevo", "Debar", "Debarca", "Delcevo", "Demir Hisar",
"Demir Kapija", "Dojran", "Dolneni", "Drugovo", "Gazi Baba",
"Gjorce Petrov", "Gostivar", "Gradsko", "Ilinden", "Jegunovce",
"Karbinci", "Karpo?", "Kavadartsi", "Kicevo", "Kisela Voda",
"Kocani", "Konce", "Kratovo", "Kriva Palanka", "Krivoga?tani",
"Kru?evo", "Kumanovo", "Lipkovo", "Lozovo", "Makedonska Kamenica",
"Mavrovo and Rostusa", "Negotino", "Northeastern", "Novatsi",
"Novo Selo", "Ohrid", "Oslomej", "Pelagonia", "Phecevo", "Plasnica",
"Polog", "Prilep", "Probistip", "Radovis", "Rankovce", "Resen",
"Saraj", "Skopje", "Sopiste", "Southeastern", "Struga", "Studenicani",
"Sveti Nikole", "Tearce", "Tetovo", "Valandovo", "Vardar", "Vasilevo",
"Veles", "Vev?ani", "Vinitsa", "Vrane?tica", "Zajas", "Zelenikovo",
"Zrnovci"), class = "factor"), value = c(48L, 1810L, 205L, 1507L,
38L, 66L), OPSTINA_NAZIV = c("ЖЕЛИНО", "ШТИП", "ШУТО ОРИЗАРИ",
"АЕРОДРОМ", "АРАЧИНОВО", "БЕРОВО"), `postal-code` = c("ZE", "ST",
"SO", "AD", "AR", "BR")), .Names = c("code", "value", "OPSTINA_NAZIV",
"postal-code"), row.names = c(NA, 6L), class = "data.frame")
and I'm plotting a choropleth map with the hcmap function below:
hcmap("countries/mk/mk-all.js", data = data_fake,
name = "Manucipalities", value = "value", joinBy = c("name", "code"),
borderColor = "transparent") %>%
hc_colorAxis(dataClasses = color_classes(c(seq(0, 2000, by = 500), 13000))) %>%
hc_legend(layout = "vertical", align = "right",
floating = TRUE, valueDecimals = 0, valueSuffix = "") %>%
hc_mapNavigation(enabled = TRUE)
However, at the moment the labels that appear on the map are from the "code" variable, which contain encoding problems. I want to plot the labels from the "OPSTINA_NAZIV" label.
Any ideas how I can do this?
I tried:
dataLabels = list(enabled = TRUE, format = '{point.OPSTINA_NAZIV}')
But it didn't work out.
You can access to the mapData info using the options accesor. Example {point.options.OPSTINA_NAZIV}:
hcmap("countries/mk/mk-all.js", data = data_fake,
name = "Manucipalities", value = "value", joinBy = c("name", "code"),
borderColor = "transparent" ,
dataLabels = list(enabled = TRUE, format = "{point.options.OPSTINA_NAZIV}"))

Resources