data restructure for ggplot geom_bar() stacked bar plotting - r

avg data frame
structure(list(cluster = 1:10, `B cells` = c(0.0369711424087593,
0.00526325696315245, 0.0601665087700304, 0.0231936137674591,
0.00766480549892195, 0.0285649960414246, 0.0044030329888148,
0.00345795624392323, 0.00309644760567017, 0.00757469580646642
), DCreg = c(0.0304752063136609, 0.174423402403555, 0.0163287878795231,
0.0192154395050034, 0.124511133655915, 0.0296144152010606, 0.205920199256583,
0.114542510479173, 0.485649315606826, 0.0260997195368302), `Dendritic cells` = c(0.156500506395882,
0.0106345235402551, 0.185348445999056, 0.395476210792188, 0.0719924126421944,
0.104614178324861, 0.0226961213600642, 0.00292885066859525, 0.0122661582750054,
0.118394797602606), `Dendritic cells CD103` = c(0.0482626330670718,
0.0140976438812366, 0.030373962919268, 0.0614351282717271, 0.189884617234425,
0.35658217311524, 0.0170390739879794, 0.0042469791834164, 0.0233514821789908,
0.0619204360724114), Endothelium = c(0.11337268119519, 0.027025412632833,
0.43869939276274, 0.0662483745710424, 0.0331520081202891, 0.164940771021627,
0.050135082662031, 0.00351285357934976, 0.0201434603120533, 0.0658151087814588
), Epithelium = c(0.00418217375070304, 0.000413203430326014,
0.0104665752013841, 0.00525017082076173, 0.00415698684351819,
0.0333637286413386, 0.000431569929321054, 0, 0.0011976402913935,
0.000419107154908937), Fibroblasts = c(0.00612607297867521, 0.0116371963351148,
0.0108995123396445, 0.0117009481628146, 0.00674570810846355,
0.0145571600114712, 0.0120879220427041, 0.00272604244680674,
0.00772202564316953, 0.0272894372187893), `Macrophages other` = c(0.00101589948056542,
0.000645130694683314, 0, 0, 0.000639755622911849, 0, 0.000197788594031649,
0.00136588418173722, 0, 0.000420171738310913), `Macrophages type 1` = c(0.221136736926214,
0.0101728310491049, 0.0295121583899105, 0.0455316207473085, 0.0230660380060092,
0.0222078529371378, 0.015179095607796, 0.00459851371158574, 0.0112212936162074,
0.02937463664781), `Macrophages type 2` = c(0.0411011962682536,
0.0522714029078864, 0.012334445025602, 0.0568282306829578, 0.0453391303748083,
0.0181451496347937, 0.239616155787136, 0.0115489617356957, 0.04981525808734,
0.462030477544264), Neutrophils = c(0.0766806635700175, 0.00442125133471751,
0.0476726698091672, 0.0236749605376406, 0.00911361867045396,
0.0236169696110325, 0.00537803767758349, 0.0032239571528306,
0.00201957474248881, 0.0160311845078706), `NK cells` = c(0, 0,
0.000108464194313773, 0, 0, 8.99698299254026e-05, 0.000114169258081956,
0, 4.57749702462694e-05, 2.78396436525612e-05), `T cells CD4` = c(0.0330641154468336,
0.0213946654236908, 0.0323515137814534, 0.148686432010321, 0.0500449048718068,
0.0685338874314457, 0.0273478878575203, 0.00472971607890761,
0.0328998359523529, 0.0354818425253482), `T cells CD8` = c(0.0172498783937768,
0.00877876825324442, 0.0156948623402281, 0.0207354640030442,
0.0145536348676947, 0.0146643634343241, 0.0155197086731341, 0.00171509323694132,
0.0135851481885585, 0.0159896002840603), `T reg cells` = c(0.00451599932441037,
0.0058712074137469, 0.00274652046695111, 0.0167445990360021,
0.0127422536359504, 0.0142171857157357, 0.00996063310868601,
0.00089148571457417, 0.0113706843090688, 0.00663049091849752),
Tumour = c(0.0765887917753441, 0.651476092235795, 0.0173767962070959,
0.0647526184622169, 0.395840854655601, 0.0472273714361081,
0.368387800802699, 0.839842321316499, 0.323145170321728,
0.111585860905902), Unclassified = c(0.132756302704642, 0.00147401150065844,
0.0899193839136316, 0.0405261886295129, 0.0105521371910369,
0.0590598276124738, 0.00558572040583437, 0.000668874269964592,
0.00247072989889988, 0.0149145931108126)), class = "data.frame", row.names = c(NA,
-10L))
cluster B cells DCreg Dendritic cells Dendritic cells CD103 Endothelium Epithelium Neutrophils NK cells T cells CD4 T cells CD8 T reg cells Tumour Unclassified
1 1 0.036971142 0.03047521 0.156500506 0.048262633 0.113372681 0.0041821738 0.076680664 0.000000e+00 0.033064115 0.017249878 0.0045159993 0.07658879 0.1327563027
2 2 0.005263257 0.17442340 0.010634524 0.014097644 0.027025413 0.0004132034 0.004421251 0.000000e+00 0.021394665 0.008778768 0.0058712074 0.65147609 0.0014740115
3 3 0.060166509 0.01632879 0.185348446 0.030373963 0.438699393 0.0104665752 0.047672670 1.084642e-04 0.032351514 0.015694862 0.0027465205 0.01737680 0.0899193839
4 4 0.023193614 0.01921544 0.395476211 0.061435128 0.066248375 0.0052501708 0.023674961 0.000000e+00 0.148686432 0.020735464 0.0167445990 0.06475262 0.0405261886
5 5 0.007664805 0.12451113 0.071992413 0.189884617 0.033152008 0.0041569868 0.009113619 0.000000e+00 0.050044905 0.014553635 0.0127422536 0.39584085 0.0105521372
6 6 0.028564996 0.02961442 0.104614178 0.356582173 0.164940771 0.0333637286 0.023616970 8.996983e-05 0.068533887 0.014664363 0.0142171857 0.04722737 0.0590598276
7 7 0.004403033 0.20592020 0.022696121 0.017039074 0.050135083 0.0004315699 0.005378038 1.141693e-04 0.027347888 0.015519709 0.0099606331 0.36838780 0.0055857204
8 8 0.003457956 0.11454251 0.002928851 0.004246979 0.003512854 0.0000000000 0.003223957 0.000000e+00 0.004729716 0.001715093 0.0008914857 0.83984232 0.0006688743
9 9 0.003096448 0.48564932 0.012266158 0.023351482 0.020143460 0.0011976403 0.002019575 4.577497e-05 0.032899836 0.013585148 0.0113706843 0.32314517 0.0024707299
10 10 0.007574696 0.02609972 0.118394798 0.061920436 0.065815109 0.0004191072 0.016031185 2.783964e-05 0.035481843 0.015989600 0.0066304909 0.11158586 0.0149145931
I have the above data frame and am trying to create a stacked bar using ggplot geom_bar() where each bar = 1 cluster (10 clusters, so 10 bars) and each bar is filled with the proportions of each cell type contributing to a cluster (proportion values for each cluster add up to 1).
I have started by changing the layout of the data :
avgt = avg %>% pivot_longer(cols = -cluster)
Which gave me this layout:
cluster name value
1 1 B cells 0.0370
2 1 DCreg 0.0305
3 1 Dendritic cells 0.157
4 1 Dendritic cells CD103 0.0483
5 1 Endothelium 0.113
6 1 Epithelium 0.00418
7 1 Fibroblasts 0.00613
8 1 Macrophages other 0.00102
9 1 Macrophages type 1 0.221
10 1 Macrophages type 2 0.0411
However I am not sure what to do next as if I use the 'cluster' column as X and 'name' column for the 'fill' I, as expected, get equal proportions for each cell type
p = ggplot(avgt, aes(x = as.factor(cluster), fill = as.factor(name)))+
geom_bar(position = "fill") +
theme_classic()+
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
xlab("Community")+
ylab("Percentage distribution") +
labs( fill = "")
p
geom_bar() stacked plot result
Any ideas of how I can get this to work?
Thanks in advance

Related

Customize DCA plot labeling

I have a DCA object
summary(dca)
Site scores:
DCA1 DCA2 DCA3 DCA4 Totals
AH_T1 -0.083971 -0.600384 0.513428 0.794499 18
AH_T2 -0.017126 -0.410023 -0.194760 0.282255 14
AH_T3 -0.079178 -0.244031 0.274927 0.570446 16
SB_T1 -0.413546 0.033417 0.762520 0.454288 17
HZ_T1 -0.205265 -0.401048 -0.329853 0.098905 20
HZ_T2 -0.051373 -0.707477 -0.626761 -0.090690 11
HZ_T3 -0.209482 -0.284500 -0.456123 0.287953 17
IH_T1 -0.017786 0.212286 0.402395 -0.408806 18
IH_T2 -0.394654 -0.129818 0.358900 -0.429682 15
IH_T3 0.059865 -0.261604 0.568370 -0.300224 16
IH_T4 -0.159250 -0.144365 0.498412 -0.599404 12
IH_T5 -0.547112 -0.189500 0.758668 -0.303647 20
IH_T6 -0.673832 -0.467925 0.924921 -0.210702 17
OST_T1 -0.080579 -0.168180 -0.074228 -0.395828 14
RW_T1 -0.347305 -0.005233 -0.492337 -0.074018 18
RW_T2 -0.430563 -0.081961 -0.039556 -0.289489 11
RW_T3 -0.427977 -0.401413 -0.733190 0.092576 20
RW_T4 -0.437579 0.115553 -0.466525 -0.155855 13
RW_T5 -0.497717 -0.057785 -0.089060 -0.257333 13
RWB_T1 0.992864 -0.084741 -0.172626 0.254606 22
RWB_T2 0.492169 -0.203401 -0.506953 0.511755 19
RWB_T3 1.372265 0.336062 0.070865 0.019864 16
RWB_T4 0.789543 0.168187 0.703618 -0.672646 17
WM_T1 -0.112521 -0.797035 -0.372285 0.077007 14
WM_T2 0.008648 -0.546527 -0.272787 -0.099172 15
WM_T3 -0.161854 -0.781419 -0.499302 -0.475927 18
WM_T4 -0.247226 -0.792233 -0.167708 -0.112969 12
WM_T5 -0.474015 -0.822478 -0.194942 -0.321107 11
WR_T3 -0.186818 0.314175 -0.157542 -0.245089 4
WR_T4 -0.421249 0.283086 0.021062 0.081024 9
FS_T1 -0.303028 2.147182 -0.215759 0.372133 13
FS_T2 -0.450260 1.934448 -0.277489 -0.023677 7
FS_T3 -0.342402 1.536942 -0.385465 0.105576 9
FS_T4 -0.770140 1.360177 -0.163095 0.172052 11
WR_T1 -1.268393 1.374820 -0.000533 0.180091 12
WR_T2 -1.099601 0.896554 0.059584 0.372984 16
RWB_Si 4.133449 0.575623 0.163993 0.115948 20
When I try to plot the object with the base plot function, there is a lack of customization.
plot (dca, display = 'sites', type = 'p',
main = "DCA",
cols = c("black"), pch = 3, cex = 0.7)
text(dca, display = 'sites', cex=0.7, pos=2)
The plot labels overlap, is there a way to sort out this issue?
I have tried to extract the data with a function I found to a similiar issue:
ggvegan_data <- function(object, axes = c(1, 2), layers = c("species", "sites"), ...){
obj <- fortify(object, axes = axes, ...)
obj <- obj[obj$Score %in% layers, , drop = FALSE]
want <- obj$Score %in% c("species", "sites")
obj[want, , drop = FALSE]
}
and then try to use ggplot
lichen.plot.data = ggvegan_data(dca)
p <- ggplot(data = plot.data, aes(x = DCA1, DCA2, colour = Score)) +
geom_point() +
geom_text(aes(label = Label), nudge_y = 0.3)
p
But there is following error message:
Error in `fortify()`:
! `data` must be a <data.frame>, or an object coercible by `fortify()`, not an S3 object with class <decorana>.
Run `rlang::last_error()` to see where the error occurred.
I have found kind of a work around.
t2<-scores(dca)
#transorm it into a dataframe
t2<-as.data.frame(t2)
class(t2)
site.label <- c("AH_T1","AH_T2","AH_T3","SB_T1", "HZ_T1", "HZ_T2", "HZ_T3", "IH_T1","IH_T2","IH_T3","IH_T4","IH_T5","IH_T6","OST_T1","RW_T1 ",
"RW_T2","RW_T3","RW_T4","RW_T5","RWB_T1","RWB_T2","RWB_T3","RWB_T4","WM_T1 ","WM_T2","WM_T3","WM_T4","WM_T5","WR_T3","WR_T4",
"FS_T1","FS_T2","FS_T3","FS_T4","WR_T1","WR_T2","RWB_Si")
t2$label <- site.label
#plot the data with ggplot
ggplot(data = t2, aes(x = DCA1, y = DCA2)) + theme_bw() +
geom_text_repel(aes(label = label),
box.padding = unit(0.45, "lines")) +
geom_point(colour = "green", size = 3)
This gives me the ability to use data with ggplot and do different stuff with the label problem.

How to combine multiple ggplot geom_col() into one graph?

Question
I am trying to combine three ggplot geom_bar into one geom_bar plot utilising dodge so I can visually compare data across two categorical and one numeric variables. What am I doing wrong?
Individual graphs work
Each graph works on it's own (with formatting issues) and I've been following answers on SO like How to overlay two geom_bar? but I'm not understanding what's needed to be done.
ONE <- ggplot(Ireland, aes(TargetGroup, FirstDosePC))+geom_bar(stat = 'identity',width = 0.8, fill = "green") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Ireland by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
TWO <- ggplot(Italy, aes(TargetGroup, FirstDosePC))+ geom_bar(stat = 'identity',width = 0.8, fill = "blue") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Italy by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
THREE <- ggplot(Latvia, aes(TargetGroup, FirstDosePC))+geom_bar(stat = 'identity',width = 0.8, fill = "red") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Latvia by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
An example of failed code
My coding attempts look close to this but it seems to fail - I don't understand why. I am hoping to learn how to add three graphs together with labels and to use dodge
OneTwo <- ONE + geom_bar(FDPercent=Italy, aes(TargetGroup, FirstDose))+ geom_bar(stat = 'identity',width = 0.8, fill = "blue") +
facet_grid(.~Vaccine) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title="1st Dose Italy by Group & Vaccine Type",
caption = "(ECDC, 2021)",
x="Target Groups over 18",
y="First Dose Administered")
My individual graphs look like this
The graph type I'm aiming for
and what I am aiming for is something like this but breaking it out by vaccine type to stretch my learning, etc (source https://towardsdatascience.com/track-covid-19-data-yourself-with-r-eb3e641cd4b3)
My raw data comes from
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/csv/data.csv", na.strings = "", fileEncoding = "UTF-8-BOM")
and is manipulated to test out R functions that has left me with a dataframe called FDPercent with a numeric column called FirstDosePC (Percentage of 1st Dose per country population) that is linked to Country with 30 EU countries (ISO 3166-1-alpha-2 categorical data) and 10 TargetGroup types (categorical) in the data frame.
> dput(head(FDPercent,3))
structure(list(Country = c("AT", "AT", "AT"), NumberDosesReceived = c(0L,
0L, 61425L), NumberDosesExported = c(0L, 0L, 0L), FirstDose = c(0L,
0L, 87L), FirstDoseRefused = c(NA_integer_, NA_integer_, NA_integer_
), SecondDose = c(0L, 0L, 0L), UnknownDose = c(0L, 0L, 0L), TargetGroup = c("Age18_24",
"Age18_24", "Age18_24"), Vaccine = c("UNK", "AZ", "COM"), Population = c(8901064L,
8901064L, 8901064L), Date = structure(c(18624, 18624, 18624), class = "Date"),
FirstDosePC = c("0.0000", "0.0000", "0.0010")), row.names = 21:23, class = "data.frame")
> str(FDPercent)
'data.frame': 116532 obs. of 12 variables:
$ Country : chr "AT" "AT" "AT" "AT" ...
$ NumberDosesReceived: int 0 0 61425 0 0 0 61425 0 0 0 ...
$ NumberDosesExported: int 0 0 0 0 0 0 0 0 0 0 ...
$ FirstDose : int 0 0 87 0 0 0 1299 0 0 0 ...
$ FirstDoseRefused : int NA NA NA NA NA NA NA NA NA NA ...
$ SecondDose : int 0 0 0 0 0 0 0 0 0 0 ...
$ UnknownDose : int 0 0 0 0 0 0 0 0 0 0 ...
$ TargetGroup : chr "Age18_24" "Age18_24" "Age18_24" "Age18_24" ...
$ Vaccine : chr "UNK" "AZ" "COM" "MOD" ...
$ Population : int 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 8901064 ...
$ Date : Date, format: "2020-12-28" "2020-12-28" "2020-12-28" "2020-12-28" ...
$ FirstDosePC : chr "0.0000" "0.0000" "0.0010" "0.0000" ...
With help from #kat in the comments - changed from geom_bar() to geom_col() and dropped the third variable
Ireland <- subset(FDPercent, Country == "IE") #contructed a subset by country
Italy <- subset(FDPercent, Country == "IT")
Latvia <- subset(FDPercent, Country == "LV")
ONE1 <- ggplot(Italy, aes(Date, as.numeric(FirstDosePC))) +
geom_col(fill = "red", alpha = 1, width = 7) + theme_minimal(base_size = 8) +
xlab(NULL) + ylab(NULL) + scale_x_date(date_labels = "%Y/%m/%d") #reduced the theme formating
OneTwo <- ONE1 + geom_col(data=Ireland, aes(Date,
as.numeric(FirstDosePC)),
fill="Green", alpha = 1,width = 5)
OneTwoThree <- OneTwo + geom_col(data=Latvia, aes(Date,
as.numeric(FirstDosePC)),
fill="black", alpha = 1, width = 2)
OneTwoThree + labs(title="Ireland, Italy, & Latvia - First Dose comparision", #added labels to the data
subtitle="Using First Dose Delivered per day as a percentage of population",
caption = "(ECDC, 2021)",
x="Date Administered",
y="% Population treated")

create directed arrow plots of two variables using ggplot2 in R

I have two variables (V1,V2) measured on same subject (id) at two time points (timepoint). I want to have a scatterplot with arrow paths to show how values moved from T1 to T2 for the same subject.
In my example, some subjects do not have change in V1 nor V2, it would be ideal to show just as one dot for those sub (sub 1 for example), but I am OK with two dots for two visits, since they will be overlap. There are also sub with a decrease in either V1 or V2 (sub 2 for example), those sub were shown in red arrow above. The third group of subjects show an increase in either V1 or V2 (sub 6 and 7): these sub were in green.
However, what I really need is all arrows point from T1 to T2. That is I hope the green arrow change direction.
The dataset can be generated by:
datatest <- data.frame(timepoint =rep(seq(2,1),8),
id = c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8),
V1= c( 30.29, 30.29, 21.60, 31.43, 20.75,20.75, 21.60, 30.03, 21.60, 31.30, 31.60, 21.72, 31.6, 20.02, 11.60, 20.16),
V2=c(40, 40, 30.78, 41.63, 40.41, 40.41,30.78, 40.97, 20.78, 40.84, 41.85, 41.85, 40.78, 31.79,20.78, 30.23))
which looks like this:
timepoint id V1 V2
1 2 1 30.29 40.00
2 1 1 30.29 40.00
3 2 2 21.60 30.78
4 1 2 31.43 41.63
5 2 3 20.75 40.41
6 1 3 20.75 40.41
7 2 4 21.60 30.78
8 1 4 30.03 40.97
9 2 5 21.60 20.78
10 1 5 31.30 40.84
11 2 6 31.60 41.85
12 1 6 21.72 41.85
13 2 7 31.60 40.78
14 1 7 20.02 31.79
15 2 8 11.60 20.78
16 1 8 20.16 30.23
To generate the (wrong) plot I currently have, please run the codes below:
library(ggplot2)
library(lemon)
ggplot(datatest, aes(V1,V2,color=as.factor(timepoint),group=id)) +ggtitle("V2 vs V1 from T1 to T2")+
geom_pointline(linesize=1, size=2, distance=4, arrow = arrow(angle = 30, length = unit(0.1, "inches"), ends = "first", type = "open") )+
scale_x_continuous(limits = c(0,33), breaks=seq(0,30,10), expand = c(0, 0)) +
scale_y_continuous(limits = c(0,43), breaks=seq(0,44,10),expand = c(0, 0))+
scale_color_manual(values=c("green","red"))+labs(color = "Timepoint")
The plot currently looks like this:
Thank you!
Would this get you closer?
library(dplyr)
library(tidyr)
library(ggplot2)
data <- data.frame(timepoint =rep(seq(2,1),8),
id = c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8),
V1= c( 30.29, 30.29, 21.60, 31.43, 20.75,20.75, 21.60, 30.03, 21.60, 31.30, 31.60, 21.72, 31.6, 20.02, 11.60, 20.16),
V2=c(40, 40, 30.78, 41.63, 40.41, 40.41,30.78, 40.97, 20.78, 40.84, 41.85, 41.85, 40.78, 31.79,20.78, 30.23))
data <- data %>%
mutate(row_id = paste0("T", timepoint)) %>%
pivot_wider(id_cols = id,
names_from = row_id,
values_from = c(V1, V2)) %>%
mutate(colour = ifelse((V1_T1 > V1_T2) | (V2_T1 > V2_T2), "red", "green"))
ggplot(data = data) +
geom_point(aes(x = V1_T1, y = V2_T1)) +
geom_point(aes(x = V1_T2, y = V2_T2)) +
geom_segment(aes(x = V1_T1, xend = V1_T2, y = V2_T1 , yend = V2_T2, colour = colour),
arrow = arrow(length = unit(0.3,"cm"))) +
scale_x_continuous(
limits = c(0, 33),
breaks = seq(0, 30, 10),
expand = c(0, 0)
) +
scale_y_continuous(
limits = c(0, 43),
breaks = seq(0, 44, 10),
expand = c(0, 0)
)
You can filter the object data to remove those lines where V1 and V2 do not change and not draw the lines with length zero.

What is wrong with my custom colour palette in this plot?

Using ggsurvplot to draw some Kaplan-Meier curves.
5 curves should be plotted and I want control over their colours.
Here is the output of the survfit being plotted:
> elective_30Decadesurv
Call: survfit(formula = elective30Surv ~ electives$Decade)
n events median 0.95LCL 0.95UCL
electives$Decade=50 14 0 NA NA NA
electives$Decade=60 173 2 NA NA NA
electives$Decade=70 442 5 NA NA NA
electives$Decade=80 168 4 NA NA NA
electives$Decade=90 2 0 NA NA NA
Here is a working plot using the default colour palette, "hue":
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = "hue",
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
See plot in section 3.1.4 of this webpage for the output of the above
The Decade group has 5 entries, so I'm trying to provide five colours to palette.
However, both:
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
),
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
And:
> fiveColours <- c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
)
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = fiveColours,
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
Give the same error:
Error in names(.cols) <- grp.levels :
'names' attribute [5] must be the same length as the vector [4]
What vector is length [4]?
Is 'names' attribute my colour vector?
If I take one of the colours out of the custom palette, eg fiveColours <- c("#440154","#3B528B","#21908C","#5DC863") I get this error:
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Which implies the number of colours provided is correct but something else is causing the issue.
I've troubleshot to the limits of my own ability. Help please!
FYI:
> electives %>% select(Decade) %>% group_by(Decade) %>% summarise(n())
# A tibble: 5 x 2
Decade `n()`
<fct> <int>
1 50 14
2 60 173
3 70 442
4 80 168
5 90 2
Should prove the length of the Decade variable and here is how the survival object and survfit were generated:
> elective5Surv <- Surv(electives$surv5Y, electives$dead5Y)
> elective_5Decadesurv <- survfit(elective5Surv ~ electives$Decade)
Ok, I have sorted my own mistake by proof-reading!
Of the five hex colours I’d provided, two were identical (not on purpose.)
I changed the fifth colour to a different hex value (what it was meant to be in the first place) and it works now.
Thanks, Rui, for your response earlier, it helped me down the path!

Draw a graph in R with header elaborate on two columns

I have a table with header expanded on two columns. How to draw a 3D graph on this table OR what would be a way to draw a graph on tables having elaborated headers. Kindly suggest me alternate ways to achieve this (if any)
Crime Table:
year
2014 2015 2016
Reported Detected Reported Detected Reported Detected
Murder 221 208 178 172 26 20
Murder(Gain) 20 16 11 9 1 1
Dacoity 51 45 44 36 5 1
Robbery 538 316 351 201 23 10
Chain Snatching 528 394 342 229 23 0
Code:
library(tables)
#CLASS 1 CRIMES 2014
c14 <- structure(list(`Reported` = c(221, 20, 51,
538, 528), `Detected` = c(208, 16, 45, 316, 394)), .Names = c("Reported",
"Detected"), row.names = c("Murder", "Murder(Gain)", "Dacoity", "Robbery", "Chain Snatching"), class = "data.frame")
c14
#CLASS 1 CRIMES 2015
c15 <- structure(list(`Reported` = c(178, 11, 44,
351, 342), `Detected` = c(172, 9,
36, 201, 229)), .Names = c("Reported",
"Detected"), row.names = c("Murder", "Murder(Gain)", "Dacoity",
"Robbery", "Chain Snatching"), class = "data.frame")
c15
#CLASS 1 CRIMES 31-01-2016
c16 <- structure(list(`Reported` = c(26, 1, 5,
23, 23), `Detected` = c(20, 1,
1, 10, 0)), .Names = c("Reported",
"Detected"), row.names = c("Murder", "Murder(Gain)", "Dacoity",
"Robbery", "Chain Snatching"), class = "data.frame")
c16
# rbind with rownames as a column
st <- rbind(
data.frame(c14, year = '2014', what = factor(rownames(c14), levels = rownames(c14)),
row.names= NULL, check.names = FALSE),
data.frame(c15,year = '2015',what = factor(rownames(c15), levels = rownames(c15)),
row.names = NULL,check.names = FALSE),
data.frame(c16,year = '2016',what = factor(rownames(c16), levels = rownames(c16)),
row.names = NULL,check.names = FALSE)
)
crimetable <- tabular(Heading()*what ~ year*(`Reported` +`Detected`)*Heading()*(identity),data=st)
crimetable
As I hate 3D plots for 3-way tables and I like ggplot2, I suggest this:
Gather your data into "long" format:
library(tidyr)
st_long = gather(st, type, count, -c(year, what))
head(st_long, 3)
# year what type count
# 1 2014 Murder Reported 221
# 2 2014 Murder(Gain) Reported 20
# 3 2014 Dacoity Reported 51
As you can see, both Detected and Reported columns are now included in the same column called type. This is useful for ggplot2, as it can easily create facets. Facets are separate elements within the plot that share the same aesthetic components but work with on different groups of data:
library(ggplot2)
ggplot(st_long, aes(year, count, group = what, color = what)) +
geom_line() +
facet_wrap(~ type)
(I am not saying that line plot is the only/best plot here, but it is often used when comparing frequencies across different time-points.)

Resources