Add hatch on geom_segment within the gggenes in ggplot2 [R]

Add hatch on geom_segment within the gggenes in ggplot2 [R] - r

I have a dataframe such as :
> head(tab)
molecule gene start end strand orientation hatch
1 Genome5 genA 405113 407035 forward -1 NO
2 Genome5 genB 407035 407916 forward -1 NO
3 Genome5 genC 407927 408394 forward -1 NO
4 Genome5 genD 408387 408737 reverse -1 NO
5 Genome5 genE 408751 409830 forward 1 NO
6 Genome5 genF 409836 410315 forward -1 NO
And I can produce the following plot using this script :
library(ggplot2)
library(gggenes)
ggplot(example_genes, aes(xmin = start, xmax = end, y = molecule, fill = gene)) +
geom_gene_arrow() +
facet_wrap(~ molecule, scales = "free", ncol = 1) +
scale_fill_brewer(palette = "Set3")
And I would like simply to add hatchs on genes containing hatch=="YES" within the tab$hatch column in the tab
And then get the following result ;
Here is the dput format of the table :
structure(list(molecule = c("Genome5", "Genome5", "Genome5",
"Genome5", "Genome5", "Genome5", "Genome5", "Genome5", "Genome5",
"Genome5", "Genome3", "Genome3", "Genome3", "Genome3", "Genome3",
"Genome3", "Genome3", "Genome3", "Genome4", "Genome4", "Genome4",
"Genome4", "Genome4", "Genome4", "Genome2", "Genome2", "Genome2",
"Genome2", "Genome2", "Genome2", "Genome2", "Genome2", "Genome1",
"Genome1", "Genome1", "Genome1", "Genome1", "Genome1", "Genome1",
"Genome1", "Genome1", "Genome1", "Genome6", "Genome6", "Genome6",
"Genome6", "Genome6", "Genome6", "Genome6", "Genome6", "Genome7",
"Genome7", "Genome7", "Genome7", "Genome7", "Genome7", "Genome7",
"Genome7", "Genome7", "Genome7", "Genome7", "Genome8", "Genome8",
"Genome8", "Genome8", "Genome8", "Genome8", "Genome8", "Genome8",
"Genome8", "Genome8", "Genome8"), gene = c("genA", "genB", "genC",
"genD", "genE", "genF", "protF", "protC", "protD", "protE", "genA",
"genB", "genC", "genD", "genE", "genF", "protA", "protB", "genA",
"genB", "genC", "genD", "genE", "genF", "genA", "genB", "genC",
"genD", "genE", "genF", "protA", "protB", "genA", "genB", "genC",
"genD", "genE", "genF", "protF", "protC", "protD", "protE", "genA",
"genB", "genC", "genD", "genE", "genF", "protA", "protB", "genB",
"genC", "genD", "genE", "genF", "protA", "protB", "protF", "protC",
"protD", "protE", "genB", "genC", "genD", "genE", "genF", "protA",
"protB", "protF", "protC", "protD", "protE"), start = c(405113,
407035, 407927, 408387, 408751, 409836, 410335, 412621, 412830,
413867, -67849, -65867, -64997, -64507, -64127, -63011, -62550,
-62187, -47353, -45431, -44522, -44070, -43701, -42614, 8345,
10327, 11394, 11878, 12258, 13365, 13726, 14260, 15389, 17301,
18176, 18641, 18999, 20086, 20474, 22777, 22986, 24024, 65751,
67698, 68605, 69128, 69501, 70614, 71008, 71375, -9390, -8984,
-8500, -8130, -7019, -6662, -6306, -5695, -3446, -3188, -2116,
2, 413, 898, 1268, 2376, 2733, 3089, 3700, 5949, 6217, 7307),
end = c(407035, 407916, 408394, 408737, 409830, 410315, 412596,
412833, 413870, 414850, -65864, -65013, -64548, -64127, -63048,
-62640, -62209, -61549, -45443, -44571, -44070, -43723, -42625,
-42201, 10330, 11181, 11843, 12255, 13337, 13733, 14067,
14919, 17299, 18161, 18640, 18985, 20078, 20451, 22720, 22989,
24023, 25010, 67691, 68570, 69135, 69511, 70583, 71015, 71349,
72034, -8992, -8511, -8123, -7048, -6663, -6321, -5653, -3449,
-3207, -2136, -1127, 406, 886, 1275, 2350, 2732, 3074, 3742,
5946, 6182, 7269, 8296), strand = c("forward", "forward",
"forward", "reverse", "forward", "forward", "reverse", "forward",
"forward", "forward", "reverse", "reverse", "reverse", "forward",
"reverse", "reverse", "reverse", "reverse", "reverse", "reverse",
"forward", "reverse", "forward", "forward", "forward", "forward",
"forward", "forward", "forward", "reverse", "forward", "reverse",
"reverse", "forward", "reverse", "forward", "reverse", "forward",
"forward", "forward", "forward", "forward", "forward", "forward",
"reverse", "forward", "reverse", "forward", "forward", "forward",
"reverse", "forward", "reverse", "reverse", "forward", "reverse",
"forward", "forward", "reverse", "reverse", "forward", "forward",
"forward", "forward", "forward", "forward", "reverse", "forward",
"forward", "reverse", "reverse", "reverse"), orientation = c(-1,
-1, -1, -1, 1, -1, 1, 1, -1, -1, -1, 1, -1, -1, 1, 1, 1,
-1, 1, 1, 1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, -1,
1, -1, 1, 1, -1, 1, -1, -1, -1, 1, -1, -1, 1, 1, -1, -1,
1, -1, -1, -1, -1, 1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1,
-1, -1, 1, 1, -1), hatch = c("NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "YES", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "YES", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO")), row.names = c(NA,
-72L), class = "data.frame")

Related

Running Fishers discriminate test in R

I have tried the following code to try and run fishers discriminant on my data set but it doesn't like the negative values.
Split_user <- user_col%>%
select(-Heroin)%>%
filter(User == "Yes")
Split_user1 <- Split_user%>%
select(-User)
Split_nonuser <- user_col%>%
select(-Heroin)%>%
filter(User == "No")
Split_nonuser1 <- Split_user%>%
select(-User)
#Calculate the Mean by class
m1 = colMeans(Split_user1)
m1
m2 = colMeans(Split_nonuser1)
m2
#Calculate the covariances by class
S1 = cov(Split_user1)
S2 = cov(Split_nonuser1)
dat <- data.frame(
"User"=c(m1),
"Non User" = c(m2),
stringsAsFactors = FALSE
)
dat
test <- fisher.test(dat)
test
structure(list(Nscore = c(0.31287, -0.67825, -0.46725, -0.14882,
0.73545, -0.67825, -0.46725, -1.32828, 0.62967, -0.24649, -1.05308,
-1.32828, 2.28554, -0.79151, -0.92104, -2.05048, -1.55078, 0.52135,
1.37297, -0.34799, -0.79151, -1.1943, 0.41667, 1.60383, -0.14882
), Escore = c(-0.57545, 1.93886, 0.80523, -0.80615, -1.6334,
-0.30033, -1.09207, 1.93886, 2.57309, 0.00332, 0.80523, 0.00332,
0.16767, 0.80523, 1.45421, -1.50796, -0.80615, -1.23177, -0.15487,
-1.7625, 0.80523, 0.47617, -0.94779, -3.27393, 0.63779), Oscore = c(-0.58331,
1.43533, -0.84732, -0.01928, -0.45174, -1.55521, -0.45174, -0.84732,
-0.97631, -1.42424, -1.11902, 0.14143, 0.44585, -0.01928, 0.44585,
-1.55521, -1.68062, -0.31776, -0.17779, -2.39883, 0.7233, -1.11902,
-0.84732, -1.27553, 1.24033), Ascore = c(-0.91699, 0.76096, -1.6209,
0.59042, -0.30172, 2.03972, -0.30172, -0.30172, 0.76096, 0.59042,
-0.76096, -1.92595, -1.6209, 0.94156, -0.60633, -1.07533, 0.28783,
-0.45321, -1.92595, -1.92595, 1.61108, -0.60633, 1.11406, 0.28783,
0.76096), Cscore = c(-0.00665, -0.14277, -1.0145, 0.58489, 1.30612,
1.63088, 0.93949, 1.63088, 1.13407, 0.12331, 1.81175, -0.52745,
-0.78155, 3.46436, 1.63088, 1.13407, 0.7583, -1.38502, -1.5184,
0.7583, -1.13788, 1.81175, -0.89891, -1.0145, 1.46191), Impulsivity = c(-0.21712,
-0.71126, -1.37983, -1.37983, -0.21712, -1.37983, -0.21712, 0.19268,
-1.37983, -1.37983, 0.19268, 0.52975, 1.29221, -0.71126, 1.29221,
-0.71126, -0.21712, -1.37983, -0.71126, -1.37983, 0.19268, -0.21712,
-0.71126, -1.37983, -0.21712), SS = c(-1.18084, -0.21575, 0.40148,
-1.18084, -0.21575, -1.54858, 0.07987, -0.52593, -1.54858, -0.84637,
0.07987, 1.2247, 0.07987, -0.84637, 0.7654, -0.52593, -2.07848,
-0.84637, -0.21575, -2.07848, -0.21575, -1.18084, 0.07987, -1.54858,
-0.52593), Heroin = c("CL0", "CL0", "CL0", "CL0", "CL0", "CL0",
"CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0",
"CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL1",
"CL0"), User = c("No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No")), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame"))
matrix - non negative enteries
structure(c(0.610545701853111, 0.96447451044118, 0.851040908078699,
0.589544977621028, 0.827711005318391, 0.904054295338232, 0.589544977621028,
0.456502942067452, 0.688702581355738, 0.533249474881585, 0.389562632031922,
0.456502942067452, 0.632276676717644, 0.436140184047234, 0.653498081085907,
0.166581403886779, 0.495545270523754, 0.476097331977154, 0.671587833703474,
0.533249474881585, 0.436140184047234, 0.71082275573317, 0.610545701853111,
0.550877805396231, 0.495545270523754, 0.606851082126989, 0.606851082126989,
0.529890274393643, 0.17257337601325, 0.40854553851544, 0.238467663593076,
0.327676651856099, 0.580185689487682, 0.367919991081809, 0.479144172121096,
1, 0.505317471692705, 0.453951873616486, 0.0885974551303489,
0.259420636058159, 0.505317471692705, 0.63189368241683, 0.386973070246684,
0, 0.327676651856099, 0.479144172121096, 0.327676651856099, 0.386973070246684,
0.350233306260252, 0.63189368241683, 0.535574626985272, 0.563618843854673,
0.750572944212062, 0.223135727679215, 0.4774929083129, 0.706259916342132,
0.355063144061413, 0.275029248201035, 0.667195538247031, 0.563618843854673,
0.850078930076767, 0.223135727679215, 0.796364408545283, 0.163192941968364,
0.223135727679215, 0.355063144061413, 0.163192941968364, 0.595629597576807,
0.4774929083129, 0.223135727679215, 0.535574626985272, 0.706259916342132,
0.508037245380387, 0.383103354327933, 0.414857685465647, 0.403077115158141,
0.233825658261037, 0.558377116170461, 0.712895943634026, 0.210211608610118,
0.285534960468907, 0.658493870402802, 0.429792238205629, 0.533600585795802,
0.210211608610118, 0.482206789967235, 0.684122436721568, 0.285534960468907,
0.456117618078684, 0.533600585795802, 0.508041194672498, 0.18731799331194,
0.456117618078684, 0.357934394919504, 0.712895943634026, 0.558377116170461,
0.558377116170461, 0.156244832950339, 0.581589612247721, 0.658493870402802,
0.318731025010843, 0.202385427208327, 0.202385427208327, 0.249965664305335,
0.406440653462484, 0.599528335983808, 0.498561515107706, 0.571262830706954,
0.571262830706954, 0.788549949400029, 0.694141246205002, 0.694141246205002,
0.729304611825936, 0.474472314587249, 0.429044383403209, 0.694141246205002,
0.36191629319069, 0.474472314587249, 0.154790732976724, 0.474472314587249,
0.429044383403209, 0.297072430244326, 0.474472314587249, 0.154790732976724,
0.406440653462484, 0.446013490788146, 0.757189169998879, 1, 0,
0.528084009118427, 0.446013490788146, 0.271569845659404, 0.624098434171681,
0.271569845659404, 0.528084009118427, 0.757189169998879, 0.528084009118427,
0.446013490788146, 0.367285305878396, 0.156155405657909, 0.271569845659404,
0.624098434171681, 0.757189169998879, 0.528084009118427, 0, 0.367285305878396,
0.528084009118427, 0.528084009118427, 0.156155405657909, 0.528084009118427,
0.825751648038478, 1, 0.619957452233758, 0.308011329405206, 0.619957452233758,
0.53955917314341, 0.710932676034508, 0.46565805295222, 0.53955917314341,
1, 0.825751648038478, 0.619957452233758, 0.825751648038478, 0.710932676034508,
0.619957452233758, 0.53955917314341, 1, 0.710932676034508, 0.710932676034508,
0.825751648038478, 0.825751648038478, 0.619957452233758, 0.46565805295222,
0, 0.825751648038478), dim = c(25L, 7L), dimnames = list(c("30",
"67", "115", "20", "3", "18", "16", "8", "10", "77", "84", "71",
"54", "64", "85", "105", "58", "2", "102", "17", "79", "87",
"63", "11", "39"), c("Nscore", "Escore", "Oscore", "Ascore",
"Cscore", "Impulsivity", "SS")))

Cannot make a logistic curve in R

I am trying to make a logistic curve in R but the line does not appear in the plot.
My data are:
dput(los1)
structure(list(X1 = c("5.51688462301445", "2.55660506920185",
"4.17130300764484", "15.0032350113684", "0.0672790807684578",
"0", "10.7646529229551", "1.6819770192119", "4.44041933071867",
"2.69116323073877", "0", "0.740069888453036", "1.54741885767498",
"0.201837242305373", "1.81653518074882", "6.12239634993057",
"3.49851219996026", "22.4039338958996", "0.538232646147662",
"0.134558161536916", "1.2783025346007", "1.6819770192119", "16.9543283536541",
"60.0129400454734", "9.62090854989083", "0.470953565379205",
"33.7740985457708", "6.8624662383836", "0", "0", "4.50769841148758",
"62.6368241954438", "264.137671097005", "14.5995605267576", "0",
"0", "0", "6.12239634993057", "10.1591411960385", "22.9421665420477",
"0.470953565379205", "2.28748874612802", "13.8594906383046",
"11.0337692460289", "18.6363053728655", "27.2480277112295", "0.0672790807684578",
"0.470953565379205", "0", "0"), X2 = c("No", "No", "Yes", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "Yes", "Yes",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No")), row.names = c(NA, 50L), class = "data.frame")
and the code I use for the curve is:
los1 %>%
mutate(prob = ifelse(X2 == "Yes", 1, 0)) %>%
ggplot(aes(X1, prob)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "lm", se=FALSE, method.args = list(family = "binomial")) +
labs(
title = "Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabete-pos"
)
Any idea how to make the curve?

Another option using stat_smooth with "glm" method and "X1" converted to numeric like this:
library(tidyverse)
los1 %>%
mutate(prob = ifelse(X2 == "Yes", 1, 0)) %>%
mutate(X1 = as.numeric(X1)) %>%
ggplot(aes(X1, prob)) +
geom_point(alpha = 0.2) +
stat_smooth(method="glm", color="green", se=FALSE, method.args = list(family=binomial)) +
labs(
title = "Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabete-pos"
)
#> `geom_smooth()` using formula 'y ~ x'
Created on 2022-08-30 with reprex v2.0.2

los1 <- structure(list(X1 = c("5.51688462301445", "2.55660506920185",
"4.17130300764484", "15.0032350113684", "0.0672790807684578",
"0", "10.7646529229551", "1.6819770192119", "4.44041933071867",
"2.69116323073877", "0", "0.740069888453036", "1.54741885767498",
"0.201837242305373", "1.81653518074882", "6.12239634993057",
"3.49851219996026", "22.4039338958996", "0.538232646147662",
"0.134558161536916", "1.2783025346007", "1.6819770192119", "16.9543283536541",
"60.0129400454734", "9.62090854989083", "0.470953565379205",
"33.7740985457708", "6.8624662383836", "0", "0", "4.50769841148758",
"62.6368241954438", "264.137671097005", "14.5995605267576", "0",
"0", "0", "6.12239634993057", "10.1591411960385", "22.9421665420477",
"0.470953565379205", "2.28748874612802", "13.8594906383046",
"11.0337692460289", "18.6363053728655", "27.2480277112295", "0.0672790807684578",
"0.470953565379205", "0", "0"), X2 = c("No", "No", "Yes", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "Yes", "Yes",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No")), row.names = c(NA, 50L), class = "data.frame")
str(los1)
los1$X1 <- as.numeric(los1$X1)
los1$Y <- ifelse(los1$X2 == "Yes", 1, 0)
library(ggplot2)
los1 |>
ggplot(aes(X1, Y)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "glm", se=FALSE, method.args = list(family = "binomial")) +
labs(
title = "Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabete-pos"
)

how to group levels into a subgroup in a ggplot forest plot

> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________

I think this can help. First you have to use melt function to arrange your data based on the target columns you want and then you can plot. Even though, since I dont know what do you want to do, my plot has many points for the subgroup = site and I think this doesn't convey any graphical information.
df <- melt(df,id.vars=c("subgroup",'adj_or','ci_low','ci_high','col'))
ggplot(df, aes(x=adj_or, y=subgroup, color=variable))+
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5,height = .2) +
geom_point(aes(color = col), size = 2) + theme_bw()

how to group levels of each subgroup in a forest plot

> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________

That was quite of a workaround. The adjusted hight for the facet_wrap() height is borrowed from here
Code
library(dplyr)
library(ggplot2)
fig2b_data_cleared <- fig2b_data %>%
mutate(subgroup = fct_recode(subgroup, "Age >= 70" = "age_70_plus",
"AI Comorbidities" = "ai_comorbid_bool",
"ECOG" = "ecog_combined",
"Indication" = "indication_combined",
"Non-AI Comorbidities" = "non_ai_comorbid_bool",
"Sex" = "sex",
"Site" = "site"),
subgroup_level = fct_relevel(subgroup_level, "age_70_plus_Yes", "age_70_plus_No",
"ai_comorbid_bool_Yes", "ai_comorbid_bool_No",
"ecog_combined_1+", "ecog_combined_0",
"indication_combined_Metastatic / Unresectable", "indication_combined_Adjuvant",
"non_ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No",
"sex_Male", "sex_Female",
"site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton",
"site_Cambridge"))
p <- ggplot(data = fig2b_data_cleared, aes(x = adj_or, y = subgroup_level)) +
geom_vline(xintercept = 1, linetype = 2, color = "red") +
geom_point(aes(color = col), size = 3) +
xlab("Adjusted Odds Ratio") +
ylab("") +
geom_errorbar(aes(xmax = ci_high, xmin = ci_low, color = col), size = 0.8, width = 0.5) +
theme(plot.title.x = element_text(size = 16, face = "bold"),
axis.text.y = element_blank(),
axis.text.x = element_text(face = "bold"),
axis.title.y = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 1, angle = 180, face = "bold"),
legend.title = element_blank()) +
theme_bw() +
scale_y_discrete(breaks=c("age_70_plus_No", "age_70_plus_Yes",
"ai_comorbid_bool_No", "ai_comorbid_bool_Yes",
"ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"sex_Female", "sex_Male",
"site_Cambridge", "site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton"),
labels=c("No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"Female (Ref)", "Male",
"Cambridge (Ref)", "Belfast",
"Cardiff", "Liverpool",
"Norwich", "Preston",
"Southampton", "Tanton")) +
scale_color_discrete(limits = c("REF", "INSIG", "DOWN", "UP"),
name = "")
p.grid <- p + facet_grid(subgroup ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ subgroup, ncol = 1, scales = "free_y")
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <-
gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]
grid::grid.draw(gp.wrap)
Output

A graph with two values as points and mean value with other shape

 Problem:
I can't find the right way to make a plot with values from a given variable with points and plot the value of the mean with another different shape. So far I find a way of doing this, but mean value appears in the color legend also which is something I don't want to. How could I get the desired output? Should I use stat_summary?
NOTE: Variables must be ordered by the mean value among groups by multimorbidity (if it is something important for the solution proposed) this is why I am using reorder_within and scale_x_reordered.
source("https://raw.githubusercontent.com/dgrtwo/drlib/master/R/reorder_within.R")
library(tidyverse)
foo %>%
group_by(multimorbidity, variables) %>%
mutate(Mean = mean(varimportance),
aux_mean = Mean) %>%
ungroup() %>%
spread(Gender, varimportance) %>%
gather(Gender, varimportance, -multimorbidity, -variables, -aux_mean) %>%
mutate(type = if_else(Gender %in% c("Male", "Female"), "Gender", "Mean")) %>%
ggplot(aes(reorder_within(variables, aux_mean, multimorbidity), varimportance,
color = Gender, shape = type)) +
geom_point() +
scale_x_reordered() +
scale_shape_manual(values = c(21, 24)) +
coord_flip() +
facet_wrap(multimorbidity~., scales = "free")
Created on 2019-03-20 by the reprex package (v0.2.1)
The desired output:
dput for foo:
foo <- structure(list(
Gender = c(
"Male", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Male"
), multimorbidity = c(
"Yes", "Yes", "Yes", "Yes", "Yes",
"No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes",
"No", "No", "No", "No", "No"
), variables = c(
"bmi", "income",
"soccap", "alternattr", "occhaz", "bmi", "income", "soccap",
"alternattr", "occhaz", "bmi", "income", "soccap", "alternattr",
"occhaz", "bmi", "income", "soccap", "alternattr", "occhaz"
),
varimportance = c(
73.1234145437324, 51.0029811829917, 100,
0, 90.9926659603591, 81.1949541852942, 48.2402164701156,
100, 0, 9.10509052698692, 66.7759248406279, 31.69991730502,
100, 4.7914221037359, 93.4636133674693, 70.8853809607131,
75.004433319282, 100, 0, 43.7326141975936
)
), class = c(
"tbl_df",
"tbl", "data.frame"
), row.names = c(NA, -20L))

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Add hatch on geom_segment within the gggenes in ggplot2 [R] - r

Related

Running Fishers discriminate test in R

Cannot make a logistic curve in R

how to group levels into a subgroup in a ggplot forest plot

how to group levels of each subgroup in a forest plot

A graph with two values as points and mean value with other shape

Categories

Resources