Heatmap with the data point categorized by their class label - r

I have a dataframe with columns for different attributes and a column for the class label. I am trying to create a Heatmap/matrix plot of all the attributes with the data points categorized by their class label.
If I turn my dataframe into a numeric matrix, I can use the heatmap function to create a heatmap:
q3 <- read.arff("diabetes.arff")
q3_m <- as.matrix(q3[,1:8])
heatmap(q3_m, Colv=NA, Rowv=NA)
However, I can't figure out how to order these by the class variable, as I had to remove it from the matrix because it isn't numeric.
If I transform the data into the long format, I can also make the following heatmap using ggplot:
q3_long <- pivot_longer(q3, preg:age, names_to = "Attribute",
values_to = "Value")
ggplot(data = q3_long, mapping = aes(x = Attribute, y=class, fill = Value)) +
geom_raster() +
xlab(label = "Attribute")
However, this averages the values of every case in a given class rather than showing every case as a separate row with its own fill.
How can I combine these approaches to get a heatmap that clusters the cases by class?
(Apologies in advance - I attempted to include images here ,but I just joined stackoverflow and therefore don't have the 10 reputation points needed to include images).
Thanks for your help.
Edit: here is a sample of the data. It is also publicly available - the diabetes.arff dataset is automatically downloaded with Weka installation (https://waikato.github.io/weka-wiki/downloading_weka/).
structure(list(preg = c(6, 1, 8, 1, 0, 5, 3, 10, 2, 8, 4, 10,
10, 1, 5, 7, 0, 7, 1, 1), plas = c(148, 85, 183, 89, 137, 116,
78, 115, 197, 125, 110, 168, 139, 189, 166, 100, 118, 107, 103,
115), pres = c(72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74,
80, 60, 72, 0, 84, 74, 30, 70), skin = c(35, 29, 0, 23, 35, 0,
32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0, 38, 30), insu = c(0,
0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230, 0,
83, 96), mass = c(33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31, 35.3,
30.5, 0, 37.6, 38, 27.1, 30.1, 25.8, 30, 45.8, 29.6, 43.3, 34.6
), pedi = c(0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248,
0.134, 0.158, 0.232, 0.191, 0.537, 1.441, 0.398, 0.587, 0.484,
0.551, 0.254, 0.183, 0.529), age = c(50, 31, 32, 21, 33, 30,
26, 29, 53, 54, 30, 34, 57, 59, 51, 32, 31, 31, 33, 32), class = structure(c(2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 2L), .Label = c("tested_negative", "tested_positive"), class = "factor")), row.names = c(NA,
20L), class = "data.frame")

Maybe this is what you are looking for. To get a heatmap by cases you could add an id variable to your dataset which you could map on x and make use of faceting to cluster the cases by class:
library(tidyr)
library(ggplot2)
library(dplyr)
q3_long <- q3 %>%
mutate(id = row_number(), id = factor(id)) %>%
pivot_longer(-c(class, id), names_to = "Attribute", values_to = "Value")
ggplot(data = q3_long, mapping = aes(x = Attribute, y = id, fill = Value)) +
geom_raster() +
xlab(label = "Attribute") +
facet_wrap(~class, scales = "free_y")

Related

How do I customise the colours of the groups and overlay a contour with values over an NMDS ordination spider graph plot using ggplot2?

I've ran an NMDS using vegan on some ecological data showing species frequency and habitat types and produced a spider graph plot showing the differences between the groups(habitats) using some code I found.
I'd like to be able to change the colours of the habitats to represent a gradient from one main habitat to the next and add a contour over the top showing the change in Shannon diversity using ordisurf with figures on the contours.
Here is some example data (apologies if this is too large of a subset) named dataNMDS.
structure(list(Plot_ID = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L),
Plot_gradient = c("Grassland", "Grassland", "Grassland",
"EGA", "EGA", "EGA", "EGB", "EGB", "EGB", "EGC", "EGC", "EGC",
"EHC", "EHC", "EHC", "EHB", "EHB", "EHB", "EHA", "EHA", "EHA",
"Heath", "Heath", "Heath"), SSD_stdev = c(6.475059845, 7.730873172,
5.451752012, 1.854723699, 3.847076812, 1.939071943, 3.382306905,
2.332380758, 1.673320053, 2.481934729, 4.955804677, 1.624807681,
3.2, 6.337191807, 0.748331477, 0.632455532, 3.521363372,
0.8, 1.264911064, 9.583318841, 0.748331477, 1.977473135,
6.66765326, 3.404937591), log_mean = c(3.77, 3.77, 3.68,
3.65, 3.68, 3.41, 3.64, 3.8, 3.4, 3.65, 3.72, 3.43, 3.65,
3.73, 3.36, 3.57, 3.88, 3.38, 3.6, 3.79, 3.38, 3.84, 3.53,
3.3), conductivity = c(770L, 555L, 508L, 508L, 553L, 532L,
605L, 443L, 454L, 561L, 533L, 554L, 502L, 435L, 523L, 452L,
384L, 494L, 502L, 411L, 378L, 461L, 507L, 383L), percentage_c = c(32.144,
28.316, 31.498, 9.612, 21.184, 20.764, 14.819, 21.187, 16.619,
19.554, 28.565, 23.694, 7.539, 14.12, 23.769, 9.696, 13.022,
18.966, 10.053, 15.969, 12.395, 6.013, 17.848, 10.716), percentage_n = c(2.398,
2.224, 2.321, 0.799, 1.798, 1.497, 1.192, 1.875, 1.091, 1.618,
2.149, 1.679, 0.557, 1.172, 1.361, 0.679, 1.121, 1.325, 0.715,
1.368, 0.83, 0.465, 1.394, 0.734), percentage_ic = c(0.047,
0.049, 0.046, 0.048, 0.049, 0.048, 0.049, 0.048, 0.05, 0.051,
0.048, 0.047, 0.049, 0.051, 0.048, 0.05, 0.048, 0.047, 0.051,
0.046, 0.05, 0.05, 0.046, 0.05), Racomitrium_lanuginosum = c(2.4,
11.2, 3.2, 52, 32, 24, 16, 40, 0, 48, 0, 16, 88, 8, 100,
96, 76, 64, 100, 40, 68, 71, 57, 74), Vaccinium_myrtillus = c(2.4,
22.4, 9.6, 28, 24, 20, 32, 20, 28, 16, 44, 60, 52, 52, 44,
28, 16, 16, 16, 52, 48, 34.5, 18.5, 48), Agrostis_capillaris = c(37.6,
54.4, 11.2, 60, 64, 16, 96, 36, 16, 68, 44, 24, 88, 76, 84,
92, 92, 0, 68, 76, 88, 57.5, 35.5, 16.5), Galium_saxatile = c(0.8,
24, 18.4, 4, 12, 24, 32, 24, 4, 4, 24, 8, 4, 44, 56, 48,
56, 4, 0, 36, 52, 15.5, 23.5, 11.5), Carex_pilulifera = c(31.2,
15.2, 25.6, 24, 0, 76, 0, 20, 92, 24, 0, 80, 4, 0, 12, 0,
0, 0, 20, 0, 0, 0, 0, 0), Festuca_ovina = c(0, 1.6, 0, 16,
0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 44, 0, 20, 7.5,
27.5, 12), Juncus_squarrosus = c(68L, 56L, 0L, 44L, 0L, 0L,
0L, 0L, 0L, 96L, 0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 12L, 0L,
0L, 0L, 0L, 0L), Hylocomium_splendens = c(21.6, 32.8, 0.8,
28, 0, 0, 68, 56, 0, 16, 24, 0, 8, 8, 0, 12, 12, 0, 0, 4,
0, 0, 0, 0), Rhytidiadelphus_loreus = c(70.4, 40, 32.8, 32,
48, 40, 56, 64, 76, 12, 52, 40, 0, 24, 0, 8, 24, 24, 0, 44,
12, 1, 3, 0)), class = "data.frame", row.names = c(NA, -24L
))
This is the code I've used to run the NMDS and produce the spider graph plot in ggplot2.
#subset the dataframe on which to base the ordination: the species data (dataframe 1)
data_1 <- dataNMDS[,9:17]
#Identify the columns that contains the descriptive/environmental data (dataframe 2)
data_2 <- dataNMDS[,2:8]
#ordination by NMDS
NMDS <- metaMDS(data_1, distance = "bray", k = 2)
stressplot(NMDS)
#########################
#Data visualisation
#Extract the axes scores
datascores <- as.data.frame(scores(NMDS)) #extract the site scores
#Add/calculate spider diagram
scores <- cbind(as.data.frame(datascores), Habitat = data_2$Plot_gradient)
centroids <- aggregate(cbind(NMDS1, NMDS2) ~ Habitat, data = scores, FUN = mean)
seg <- merge(scores, setNames(centroids, c('Habitat','oNMDS1','oNMDS2')),
by = 'Habitat', sort = FALSE)
#NMDS spider graph plot
ggplot(scores, aes(x = NMDS1, y = NMDS2, colour = Habitat)) +
geom_segment(data = seg,
mapping = aes(xend = oNMDS1, yend = oNMDS2)) + # add spiders
geom_point(data = centroids, size = 4) + # add centroids
geom_point() +
coord_fixed()+
theme_bw()+
theme(legend.position="right",legend.text=element_text(size=10),legend.direction='vertical')
This code produces this image below...
However, I'd like to change the colour of the habitat types showing a gradient from Grassland to heath.
I've tried things like...
scale_fill_manual(values = c("darkgreen" , "green4" , "green3" , "green2", "orangered" , "orangered2" , "orangered3" , "orangered4"),
breaks = c("Grassland","EGA","EGB","EGC","EHC","EHB","EHA","Heath"))
But my limited R skill causes me to fail though, from what I can tell it seems to relate to the aes (colour = Habitat).
I've also tried to add the contours of the Shannon diversity which I've calculated with...
ordisurf(NMDS, diversity(data_1), add = T)
and
shannon <- ordisurf(NMDS, diversity(data_1), add = T)
shannoncurve <- ordisurfgrid.long(shannon)
+ geom_contour(data=shannoncurve,aes(x=x, y=y, z=z))
Though I can't get that to work either.
Sorry for the incredibly long-winded and probably simple question but any help would be extremely appreciated.

Robust Independent T-test

This is my first time asking a question, so I apologize for any formatting issues or anything that makes this difficult to answer. Please let me know what I need to add to be able to the answer question.
I'm attempting to compare differences between 2 unequal group sizes (one ~ 97 the other ~ 714). The reason for the large discrepancy is I am looking at a program done by one class to see if it is significantly different than what has occurred in previous classes. I've been reading about robust stats recently and decided to use a yuen bootstrap in R-Studio from the WRS2 package for a more valid comparison, especially with the difference in sample size.
My formula is
yuenbt(DataExample$PT500 ~ DataExample3$ClassPT500, tr = 0.2, nboot = 599, side = TRUE)
and it returns
Call:
yuenbt(formula = DataExample$PT500 ~ DataExample$ClassPT500,
tr = 0.2, nboot = 599, side = TRUE)
Test statistic: NA (df = NA), p-value = 0
Trimmed mean difference: -65
95 percent confidence interval:
NA NA
The NA's return on other variables that I've tried out as well, or in some cases the confidence interval will state INF. Any ideas why this is happening (such a big difference in sample size?) and suggestions on what the next best step would be are greatly appreciated.
Here is a sample of data:
structure(list(PrePT500 = c(74, 105, 121, 128), PostPT500 = c(191,
264, 327, 314), PT500 = c(117, 159, 206, 186), PrePullups = c(0,
NA, NA, 2), PostPullups = c(3, NA, NA, 3), Pullups = c(3, NA,
NA, 1), PreSitups = c(46, 40, 25, 33), PostSitups = c(41, 61,
39, 49), Situps = c(-5, 21, 14, 16), PreMC = c(8, 16, 29, 19),
PostMC = c(41, 45, 60, 60), MC = c(33, 29, 31, 41), PrePushups = c(20,
16, 28, 30), PostPushups = c(40, 47, 50, 50), Pushups = c(20,
31, 22, 20), Pre1.5 = c(1048, 917, 902, 905), Post1.5 = c(846,
748, 696, 760), X1.5 = c(-202, -169, -206, -145), Pre220 = c(43,
50, 41, 45), Post220 = c(39, 40, 32, 34), X220 = c(-4, -10,
-9, -11), PreAgility = c(20.96, NA, 21.1, 19.88), PostAgility = c(19.69,
NA, 18.8, 20.79), Agility = c(-1.27, NA, -2.3, 0.91), PreBD = c(6.17,
7.82, 5.08, 7), PostBD = c(5, 4.87, 4.68, 6.2), BD = c(-1.17,
-2.95, -0.4, -0.8), PreCL = c(7.05, 13.6, 14.4, 8.8), PostCL = c(8.1,
8.9, 8.27, 7.6), CL = c(1.05, -4.7, -6.13, -1.2), PreSW = c(10.2,
NA, 20.34, 8), PostSW = c(11.4, NA, 9.3, 7.4), SW = c(1.2,
NA, -11.04, -0.6), Pre500 = c(115, 128, 107, 114), Post500 = c(105,
112, 93, 99), X500 = c(-10, -16, -14, -15), PreTotal = c(446,
91, 255, NA), PostTotal = c(493, 439, 503, NA), Total = c(47,
348, 248, NA), ClassPrePT500 = c(338, 213, 215, 243), ClassPostPT500 = c(430,
396, 333, 314), ClassPT500 = c(92, 183, 118, 71), ClassPrePullups = c(6,
5, 2, 0), ClassPostPullups = c(13, 7, 15, 0), ClassPullups = c(7,
2, 13, 0), ClassPreSitups = c(59, 42, 45, 53), ClassPostSitups = c(75,
70, 51, 53), ClassSitups = c(16, 28, 6, 0), ClassPreMC = c(60,
43, 31, 48), ClassPostMC = c(60, 60, 31, 60), ClassMC = c(0,
17, 0, 12), ClassPrePushups = c(50, 37, 26, 30), ClassPostPushups = c(50,
50, 47, 34), ClassPushups = c(0, 13, 21, 4), ClassPre1.5 = c(803,
810, 803, 741), ClassPost1.5 = c(700, 690, 664, 661), Class1.5 = c(-103,
-120, -139, -80), ClassPre220 = c(32, 41, 31, 40), ClassPost220 = c(31,
33, 30, 37), Class220 = c(-1, -8, -1, -3), ClassPreAgility = c(19,
23, 18, 22.1), ClassPostAgility = c(16.4, 18, 16.5, 20.3),
ClassAgility = c(-2.6, -5, -1.5, -1.8), ClassPreBD = c(6.4,
8.5, 5.8, 11.2), ClassPostBD = c(5.3, 5.8, 5.5, 7.5), ClassBD = c(-1.1,
-2.7, -0.3, -3.7), ClassPreCL = c(7.8, 9.3, 7.3, 9.6), ClassPostCL = c(7.6,
7.4, 7.4, 9.2), ClassCL = c(-0.2, -1.9, 0.100000000000001,
-0.4), ClassPreSW = c(8.5, 8.4, 7.7, NA), ClassPostSW = c(7.8,
8.1, 7.6, 8), ClassSW = c(-0.7, -0.300000000000001, -0.100000000000001,
NA), ClassPre500 = c(102, 104, 100, 108), ClassPost500 = c(94,
88, 98, 101), Class500 = c(-8, -16, -2, -7), ClassPreTotal = c(495,
418, 528, 264), ClassPostTotal = c(561, 539, 562, 482), ClassTotal = c(66,
121, 34, 218)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
Thank you in advance for any help.
The R function
yuenbt(x, y, tr=0.2, alpha=0.05, nboot=599, side=F) computes a 1 − α confidence interval for μt 1 − μt 2 using the bootstrap-t method, where the default amount of trimming (tr) is 0.2, the default value for α is 0.05, and the default value
for nboot (B) is 599. So far, simulations suggest that in terms of probability coverage, there is little or no advantage to using B > 599 when α = 0.05. However, there is no recommended choice for B when α < 0.05 simply because little is known about how the bootstrap-t performs for this special case. Finally, the default value for side is FALSE, indicating that the equal-tailed two-sided confidence interval is to be used. Using side=TRUE results in the symmetric two-sided confidence interval.
Try:
yuenbt(DataExample$PT500, DataExample3$ClassPT500, tr = 0.2, nboot = 599, side = TRUE)

How to label a ternary plot

I am trying to create a triangular plot,that three dimensions of which represent three herbal strategies.
One dimension represents the strategy of C (competitive plant), the second dimension “S” (stress tolerant plants) and the third dimension ”R” (ruderal plants), the points on it represent the plant species.
I want to write the species name outside the triangle and connect it to the points inside the triangle with an arrow. How do I draw this ternary plot?
The following is the data structure and my code
require(Ternary)
TernaryPlot()
#Plot two stylised plots side by side, and plot data
par(mfrow=c(1, 1), mar=rep(0.3, 4))
TernaryPlot(atip='C%', btip='R%', ctip='S%',
point='UP', lab.cex=0.8, grid.minor.lines=0,
grid.lty='solid', col='#FFFFFF', grid.col='GREY',
axis.col=rgb(0.1, 0.1, 0.1), ticks.col=rgb(0.1, 0.1, 0.1),
padding=0.08)
data_points <- list("Bromus dantonia" = c(47, 59, 149),
"Calamagrosis psoudo phragmatis" = c(90, 102, 63),
"Carex diluta" = c(109, 64, 82),
"Carex divisa" = c(96, 99, 59),
"Carex pseudocyperus" = c(130, 71, 54),
"Carex stenophylla" = c(97, 98, 59),
"Catabrosa aquatica" = c(100, 5, 150),
"Centaurea iberica" = c(124, 85, 46),
"Cirsium hygrophilum" = c(158, 42, 55),
"Cladium mariscus" = c(159, 96, 0),
"cod2" = c(54, 82, 119),
"Cynodon dactylon" = c(121, 54, 80),
"Eleocharis palustri" = c(124, 100, 31),
"Epilobium parviflorum" = c(67, 80, 107),
"Eromopoa persica" = c(83, 15, 157),
"Funaria cf.microstoma" = c(8, 0, 247),
"Glaux maritime" = c(4, 196, 55),
"Hordeum brevisubulatum" = c(76, 70, 109),
"Hordeum glaucum" = c(40, 79, 136),
"Inula britannica" = c(95, 108, 51),
"Juncus articulatus" = c(107, 79, 69),
"Blysmus compressus" = c(81, 127, 47),
"Juncusinflexus"= c(149, 106, 0),
"Medicago polymorpha" = c(60, 86, 109),
"Mentha spicata" = c(150, 23, 82),
"Ononis spinosa" = c(66, 112, 77),
"Phragmites australis" = c(234, 0, 21),
"Plantago amplexicaulis" = c(108, 83, 64),
"Poa trivialis" = c(90, 28, 138),
"Polygonum paronychioides" = c(20, 12, 223),
"Potentila reptans" = c(106, 41, 108),
"Potentilla anserina" = c(105, 58, 91),
"Ranunculus grandiflorus" = c(129, 25, 101),
"Schoenus nigricans" = c(143, 91, 21),
"Setaria viridis" = c(10, 7, 238),
"Sonchus oleraceus" = c(178, 0, 77),
"Taraxacum officinale" = c(117, 28, 110),
"Trifolium repens" = c(94, 4, 157),
"Triglochin martima" = c(63, 96, 95),
"Veronica anagallis-aquatica" = c(55, 37, 163)
)
AddToTernary(points, data_points, pch=21, cex=1.2,
bg=vapply(data_points,
function (x) rgb(x[1], x[2], x[3], 128,
maxColorValue=255),
character(1))
)
AddToTernary(text, data_points, names(data_points), cex=0.8, font=1)

I would like to plot two data frames on a single plot

I am fairly new to R and am attempting to plot data frames simultaneously using ggplot2.
I have two data frames.
One is called WorkSchedMonday and consist of 96 rows and 4 columns.
structure(c(9, 9, 9, 9, 18, 18, 36, 36, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivMondayAtHome",
"WorkSchedAndIndivMondayAtSingleWorkPlace", "WorkSchedAndIndivMondayAtVarietyOfPlaces",
"WorkSchedAndIndivMondayWorkingOnTheMove")))
The other is called WorkSchedTuesday and consist of 96 rows and 4 columns.
structure(c(0, 0, 0, 0, 9, 9, 27, 27, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivTuesdayAtHome",
"WorkSchedAndIndivTuesdayAtSingleWorkPlace", "WorkSchedAndIndivTuesdayAtVarietyOfPlaces",
"WorkSchedAndIndivTuesdayWorkingOnTheMove")))
Using the following code a plotted the 2 data frames.
WorkSchedWeek<-as.matrix(cbind(WorkSchedAndIndivMondayAtHome,WorkSchedAndIndivMondayAtSingleWorkPlace,WorkSchedAndIndivMondayAtVarietyOfPlaces, WorkSchedAndIndivMondayWorkingOnTheMove, WorkSchedAndIndivTuesdayAtHome,WorkSchedAndIndivTuesdayAtSingleWorkPlace,WorkSchedAndIndivTuesdayAtVarietyOfPlaces, WorkSchedAndIndivTuesdayWorkingOnTheMove))
####
melted_WorkSchedWeek<- melt(WorkSchedWeek)
plot<-ggplot(melted_WorkSchedWeek) + geom_col(aes(x = Var1,y = value,fill = Var2),position = "fill") + theme(legend.position="right", axis.text.x = element_text(angle = 90, hjust = 1))
plot + labs(x="Time", y="Probabilities", colour="Work schedules", fill="Work schedules")
However I would like to create the above plot using ggplot (or lattice) . On x axis is time (0400 till 0345 _ 24hours) per days (Monday and Tuesday), y axis probability distributions. The plot is filled with work schedules values. Can somebody help me? Thanks
You can use facet_grid to make two graphs side by side but sharing an axis. But this requires you to first merge your two dataframes.
To do this we standardize your variables, add a day column, a time column and then use rbind:
WorkSchedMonday = data.frame(structure(c(9, 9, 9, 9, 18, 18, 36, 36, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivMondayAtHome",
"WorkSchedAndIndivMondayAtSingleWorkPlace", "WorkSchedAndIndivMondayAtVarietyOfPlaces",
"WorkSchedAndIndivMondayWorkingOnTheMove"))))
names(WorkSchedMonday) = c("AtHome", "SingleWork", "Variety", "OnTheMove")
WorkSchedMonday$time = rownames(WorkSchedMonday)
WorkSchedTuesday = data.frame(structure(c(0, 0, 0, 0, 9, 9, 27, 27, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivMondayAtHome",
"WorkSchedAndIndivMondayAtSingleWorkPlace", "WorkSchedAndIndivMondayAtVarietyOfPlaces",
"WorkSchedAndIndivMondayWorkingOnTheMove"))))
names(WorkSchedTuesday) = c("AtHome", "SingleWork", "Variety", "OnTheMove")
WorkSchedTuesday$time = rownames(WorkSchedTuesday)
WorkSchedMonday$day = "Monday"
WorkSchedTuesday$day = "Tuesday"
WorkSched = rbind(WorkSchedMonday, WorkSchedTuesday)
With that done, you can melt your dataframe like you did before and run the same ggplot, but with facet_grid along the variable that you want your graph to be separated by (day).
WorkSched_melt = melt(WorkSched, id.vars = c("time", "day"))
ggplot(WorkSched_melt, aes(x = time, y = value, fill = variable)) + geom_col(position = "fill") +
facet_grid(. ~ day) + theme(legend.position="right", axis.text.x = element_text(angle = 90, hjust = 1))
As a general rule, avoid using really big and clunky variable names, and also avoid having a necessary variable (in this case, time) as your row name.
Here is a solution with the data preparation code done with package dplyr.
library(ggplot2)
library(dplyr)
WorkSchedWeek <- cbind(WorkSchedMonday, WorkSchedTuesday)
WorkSchedWeek <- as.data.frame(WorkSchedWeek)
WorkSchedWeek <- cbind.data.frame(Hour = row.names(WorkSchedWeek), WorkSchedWeek)
melted_WorkSchedWeek <- reshape2::melt(WorkSchedWeek, id.vars = "Hour")
melted_WorkSchedWeek %>%
mutate(variable = sub("^WorkSchedAndIndiv", "", variable),
Month = sub("(^.{3}).*", "\\1", variable),
variable = sub("^.*day", "", variable)) %>%
ggplot(aes(x = Hour,y = value, fill = variable)) +
geom_col(position = "fill") +
theme(legend.position = "right",
axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~ Month)

Conditionnally formatting the background color of a table (formattable in R)

I have a table similar to the below:
library(tibble)
library(formattable)
mytable <- tibble(
id = c(NA, 748, 17, 717, 39, 734, 10, 762),
NPS = c(65, 63, 56, 62, 73, 80, 50, 54),
`NPS Chge vs. month ago` = c(-2, -5, -2, -8, -1, 6, 7, -9),
`Cumulative Response` = c(766, 102, 154, 81, 239, 79, 50, 61),
`Response Rate` = c(0.25, 0.24, 0.25, 0.34, 0.21, 0.34, 0.32, 0.27),
`Response for Month` = c(161, 43, 7, 37, 7, 32, 15, 20)
)
formattable(mytable)
And I wish to set a conditional formatting to the background of the rows such that if the NPS score is below 60 the background is set to red, otherwise it's set to green. In my limited knowledge of HTML I figured I could use "td". Unfortunately it appears to mess the format of the table as a whole:
html_tag <- "td"
my_format <- formatter(html_tag, style = x ~ ifelse(mytable$NPS < 60, "background-color:red", "background-color:green"))
formattable(mytable, list(
area(col = 2:6) ~ my_format
))
The headers of the table are no longer aligned with the rest of the rows. What am I doing wrong? What should I use instead of "td"?
You can also change the background color conditionally without using HTML. A simple version of code without extra aesthetics could be like this:
library(dplyr)
library(formattable)
tibble(
id = c(NA, 748, 17, 717, 39, 734, 10, 762),
NPS = c(65, 63, 56, 62, 73, 80, 50, 54),
`NPS Chge vs. month ago` = c(-2, -5, -2, -8, -1, 6, 7, -9),
`Cumulative Response` = c(766, 102, 154, 81, 239, 79, 50, 61),
`Response Rate` = c(0.25, 0.24, 0.25, 0.34, 0.21, 0.34, 0.32, 0.27),
`Response for Month` = c(161, 43, 7, 37, 7, 32, 15, 20)
) %>%
formattable(align = rep("c", ncol(.)),
list(
`NPS` = formatter("span", style = ~ style (display = "block",
`background-color` = ifelse(NPS < 60, "red", "green")))
)
)

Resources