I have a dataframe such as :
> head(tab)
molecule gene start end strand orientation hatch
1 Genome5 genA 405113 407035 forward -1 NO
2 Genome5 genB 407035 407916 forward -1 NO
3 Genome5 genC 407927 408394 forward -1 NO
4 Genome5 genD 408387 408737 reverse -1 NO
5 Genome5 genE 408751 409830 forward 1 NO
6 Genome5 genF 409836 410315 forward -1 NO
And I can produce the following plot using this script :
library(ggplot2)
library(gggenes)
ggplot(example_genes, aes(xmin = start, xmax = end, y = molecule, fill = gene)) +
geom_gene_arrow() +
facet_wrap(~ molecule, scales = "free", ncol = 1) +
scale_fill_brewer(palette = "Set3")
And I would like simply to add hatchs on genes containing hatch=="YES" within the tab$hatch column in the tab
And then get the following result ;
Here is the dput format of the table :
structure(list(molecule = c("Genome5", "Genome5", "Genome5",
"Genome5", "Genome5", "Genome5", "Genome5", "Genome5", "Genome5",
"Genome5", "Genome3", "Genome3", "Genome3", "Genome3", "Genome3",
"Genome3", "Genome3", "Genome3", "Genome4", "Genome4", "Genome4",
"Genome4", "Genome4", "Genome4", "Genome2", "Genome2", "Genome2",
"Genome2", "Genome2", "Genome2", "Genome2", "Genome2", "Genome1",
"Genome1", "Genome1", "Genome1", "Genome1", "Genome1", "Genome1",
"Genome1", "Genome1", "Genome1", "Genome6", "Genome6", "Genome6",
"Genome6", "Genome6", "Genome6", "Genome6", "Genome6", "Genome7",
"Genome7", "Genome7", "Genome7", "Genome7", "Genome7", "Genome7",
"Genome7", "Genome7", "Genome7", "Genome7", "Genome8", "Genome8",
"Genome8", "Genome8", "Genome8", "Genome8", "Genome8", "Genome8",
"Genome8", "Genome8", "Genome8"), gene = c("genA", "genB", "genC",
"genD", "genE", "genF", "protF", "protC", "protD", "protE", "genA",
"genB", "genC", "genD", "genE", "genF", "protA", "protB", "genA",
"genB", "genC", "genD", "genE", "genF", "genA", "genB", "genC",
"genD", "genE", "genF", "protA", "protB", "genA", "genB", "genC",
"genD", "genE", "genF", "protF", "protC", "protD", "protE", "genA",
"genB", "genC", "genD", "genE", "genF", "protA", "protB", "genB",
"genC", "genD", "genE", "genF", "protA", "protB", "protF", "protC",
"protD", "protE", "genB", "genC", "genD", "genE", "genF", "protA",
"protB", "protF", "protC", "protD", "protE"), start = c(405113,
407035, 407927, 408387, 408751, 409836, 410335, 412621, 412830,
413867, -67849, -65867, -64997, -64507, -64127, -63011, -62550,
-62187, -47353, -45431, -44522, -44070, -43701, -42614, 8345,
10327, 11394, 11878, 12258, 13365, 13726, 14260, 15389, 17301,
18176, 18641, 18999, 20086, 20474, 22777, 22986, 24024, 65751,
67698, 68605, 69128, 69501, 70614, 71008, 71375, -9390, -8984,
-8500, -8130, -7019, -6662, -6306, -5695, -3446, -3188, -2116,
2, 413, 898, 1268, 2376, 2733, 3089, 3700, 5949, 6217, 7307),
end = c(407035, 407916, 408394, 408737, 409830, 410315, 412596,
412833, 413870, 414850, -65864, -65013, -64548, -64127, -63048,
-62640, -62209, -61549, -45443, -44571, -44070, -43723, -42625,
-42201, 10330, 11181, 11843, 12255, 13337, 13733, 14067,
14919, 17299, 18161, 18640, 18985, 20078, 20451, 22720, 22989,
24023, 25010, 67691, 68570, 69135, 69511, 70583, 71015, 71349,
72034, -8992, -8511, -8123, -7048, -6663, -6321, -5653, -3449,
-3207, -2136, -1127, 406, 886, 1275, 2350, 2732, 3074, 3742,
5946, 6182, 7269, 8296), strand = c("forward", "forward",
"forward", "reverse", "forward", "forward", "reverse", "forward",
"forward", "forward", "reverse", "reverse", "reverse", "forward",
"reverse", "reverse", "reverse", "reverse", "reverse", "reverse",
"forward", "reverse", "forward", "forward", "forward", "forward",
"forward", "forward", "forward", "reverse", "forward", "reverse",
"reverse", "forward", "reverse", "forward", "reverse", "forward",
"forward", "forward", "forward", "forward", "forward", "forward",
"reverse", "forward", "reverse", "forward", "forward", "forward",
"reverse", "forward", "reverse", "reverse", "forward", "reverse",
"forward", "forward", "reverse", "reverse", "forward", "forward",
"forward", "forward", "forward", "forward", "reverse", "forward",
"forward", "reverse", "reverse", "reverse"), orientation = c(-1,
-1, -1, -1, 1, -1, 1, 1, -1, -1, -1, 1, -1, -1, 1, 1, 1,
-1, 1, 1, 1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, -1,
1, -1, 1, 1, -1, 1, -1, -1, -1, 1, -1, -1, 1, 1, -1, -1,
1, -1, -1, -1, -1, 1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1,
-1, -1, 1, 1, -1), hatch = c("NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "YES", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "YES", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO",
"NO", "NO", "NO", "NO", "NO", "NO", "NO")), row.names = c(NA,
-72L), class = "data.frame")
Related
I have tried the following code to try and run fishers discriminant on my data set but it doesn't like the negative values.
Split_user <- user_col%>%
select(-Heroin)%>%
filter(User == "Yes")
Split_user1 <- Split_user%>%
select(-User)
Split_nonuser <- user_col%>%
select(-Heroin)%>%
filter(User == "No")
Split_nonuser1 <- Split_user%>%
select(-User)
#Calculate the Mean by class
m1 = colMeans(Split_user1)
m1
m2 = colMeans(Split_nonuser1)
m2
#Calculate the covariances by class
S1 = cov(Split_user1)
S2 = cov(Split_nonuser1)
dat <- data.frame(
"User"=c(m1),
"Non User" = c(m2),
stringsAsFactors = FALSE
)
dat
test <- fisher.test(dat)
test
structure(list(Nscore = c(0.31287, -0.67825, -0.46725, -0.14882,
0.73545, -0.67825, -0.46725, -1.32828, 0.62967, -0.24649, -1.05308,
-1.32828, 2.28554, -0.79151, -0.92104, -2.05048, -1.55078, 0.52135,
1.37297, -0.34799, -0.79151, -1.1943, 0.41667, 1.60383, -0.14882
), Escore = c(-0.57545, 1.93886, 0.80523, -0.80615, -1.6334,
-0.30033, -1.09207, 1.93886, 2.57309, 0.00332, 0.80523, 0.00332,
0.16767, 0.80523, 1.45421, -1.50796, -0.80615, -1.23177, -0.15487,
-1.7625, 0.80523, 0.47617, -0.94779, -3.27393, 0.63779), Oscore = c(-0.58331,
1.43533, -0.84732, -0.01928, -0.45174, -1.55521, -0.45174, -0.84732,
-0.97631, -1.42424, -1.11902, 0.14143, 0.44585, -0.01928, 0.44585,
-1.55521, -1.68062, -0.31776, -0.17779, -2.39883, 0.7233, -1.11902,
-0.84732, -1.27553, 1.24033), Ascore = c(-0.91699, 0.76096, -1.6209,
0.59042, -0.30172, 2.03972, -0.30172, -0.30172, 0.76096, 0.59042,
-0.76096, -1.92595, -1.6209, 0.94156, -0.60633, -1.07533, 0.28783,
-0.45321, -1.92595, -1.92595, 1.61108, -0.60633, 1.11406, 0.28783,
0.76096), Cscore = c(-0.00665, -0.14277, -1.0145, 0.58489, 1.30612,
1.63088, 0.93949, 1.63088, 1.13407, 0.12331, 1.81175, -0.52745,
-0.78155, 3.46436, 1.63088, 1.13407, 0.7583, -1.38502, -1.5184,
0.7583, -1.13788, 1.81175, -0.89891, -1.0145, 1.46191), Impulsivity = c(-0.21712,
-0.71126, -1.37983, -1.37983, -0.21712, -1.37983, -0.21712, 0.19268,
-1.37983, -1.37983, 0.19268, 0.52975, 1.29221, -0.71126, 1.29221,
-0.71126, -0.21712, -1.37983, -0.71126, -1.37983, 0.19268, -0.21712,
-0.71126, -1.37983, -0.21712), SS = c(-1.18084, -0.21575, 0.40148,
-1.18084, -0.21575, -1.54858, 0.07987, -0.52593, -1.54858, -0.84637,
0.07987, 1.2247, 0.07987, -0.84637, 0.7654, -0.52593, -2.07848,
-0.84637, -0.21575, -2.07848, -0.21575, -1.18084, 0.07987, -1.54858,
-0.52593), Heroin = c("CL0", "CL0", "CL0", "CL0", "CL0", "CL0",
"CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0",
"CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL0", "CL1",
"CL0"), User = c("No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No")), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame"))
matrix - non negative enteries
structure(c(0.610545701853111, 0.96447451044118, 0.851040908078699,
0.589544977621028, 0.827711005318391, 0.904054295338232, 0.589544977621028,
0.456502942067452, 0.688702581355738, 0.533249474881585, 0.389562632031922,
0.456502942067452, 0.632276676717644, 0.436140184047234, 0.653498081085907,
0.166581403886779, 0.495545270523754, 0.476097331977154, 0.671587833703474,
0.533249474881585, 0.436140184047234, 0.71082275573317, 0.610545701853111,
0.550877805396231, 0.495545270523754, 0.606851082126989, 0.606851082126989,
0.529890274393643, 0.17257337601325, 0.40854553851544, 0.238467663593076,
0.327676651856099, 0.580185689487682, 0.367919991081809, 0.479144172121096,
1, 0.505317471692705, 0.453951873616486, 0.0885974551303489,
0.259420636058159, 0.505317471692705, 0.63189368241683, 0.386973070246684,
0, 0.327676651856099, 0.479144172121096, 0.327676651856099, 0.386973070246684,
0.350233306260252, 0.63189368241683, 0.535574626985272, 0.563618843854673,
0.750572944212062, 0.223135727679215, 0.4774929083129, 0.706259916342132,
0.355063144061413, 0.275029248201035, 0.667195538247031, 0.563618843854673,
0.850078930076767, 0.223135727679215, 0.796364408545283, 0.163192941968364,
0.223135727679215, 0.355063144061413, 0.163192941968364, 0.595629597576807,
0.4774929083129, 0.223135727679215, 0.535574626985272, 0.706259916342132,
0.508037245380387, 0.383103354327933, 0.414857685465647, 0.403077115158141,
0.233825658261037, 0.558377116170461, 0.712895943634026, 0.210211608610118,
0.285534960468907, 0.658493870402802, 0.429792238205629, 0.533600585795802,
0.210211608610118, 0.482206789967235, 0.684122436721568, 0.285534960468907,
0.456117618078684, 0.533600585795802, 0.508041194672498, 0.18731799331194,
0.456117618078684, 0.357934394919504, 0.712895943634026, 0.558377116170461,
0.558377116170461, 0.156244832950339, 0.581589612247721, 0.658493870402802,
0.318731025010843, 0.202385427208327, 0.202385427208327, 0.249965664305335,
0.406440653462484, 0.599528335983808, 0.498561515107706, 0.571262830706954,
0.571262830706954, 0.788549949400029, 0.694141246205002, 0.694141246205002,
0.729304611825936, 0.474472314587249, 0.429044383403209, 0.694141246205002,
0.36191629319069, 0.474472314587249, 0.154790732976724, 0.474472314587249,
0.429044383403209, 0.297072430244326, 0.474472314587249, 0.154790732976724,
0.406440653462484, 0.446013490788146, 0.757189169998879, 1, 0,
0.528084009118427, 0.446013490788146, 0.271569845659404, 0.624098434171681,
0.271569845659404, 0.528084009118427, 0.757189169998879, 0.528084009118427,
0.446013490788146, 0.367285305878396, 0.156155405657909, 0.271569845659404,
0.624098434171681, 0.757189169998879, 0.528084009118427, 0, 0.367285305878396,
0.528084009118427, 0.528084009118427, 0.156155405657909, 0.528084009118427,
0.825751648038478, 1, 0.619957452233758, 0.308011329405206, 0.619957452233758,
0.53955917314341, 0.710932676034508, 0.46565805295222, 0.53955917314341,
1, 0.825751648038478, 0.619957452233758, 0.825751648038478, 0.710932676034508,
0.619957452233758, 0.53955917314341, 1, 0.710932676034508, 0.710932676034508,
0.825751648038478, 0.825751648038478, 0.619957452233758, 0.46565805295222,
0, 0.825751648038478), dim = c(25L, 7L), dimnames = list(c("30",
"67", "115", "20", "3", "18", "16", "8", "10", "77", "84", "71",
"54", "64", "85", "105", "58", "2", "102", "17", "79", "87",
"63", "11", "39"), c("Nscore", "Escore", "Oscore", "Ascore",
"Cscore", "Impulsivity", "SS")))
I am trying to make a logistic curve in R but the line does not appear in the plot.
My data are:
dput(los1)
structure(list(X1 = c("5.51688462301445", "2.55660506920185",
"4.17130300764484", "15.0032350113684", "0.0672790807684578",
"0", "10.7646529229551", "1.6819770192119", "4.44041933071867",
"2.69116323073877", "0", "0.740069888453036", "1.54741885767498",
"0.201837242305373", "1.81653518074882", "6.12239634993057",
"3.49851219996026", "22.4039338958996", "0.538232646147662",
"0.134558161536916", "1.2783025346007", "1.6819770192119", "16.9543283536541",
"60.0129400454734", "9.62090854989083", "0.470953565379205",
"33.7740985457708", "6.8624662383836", "0", "0", "4.50769841148758",
"62.6368241954438", "264.137671097005", "14.5995605267576", "0",
"0", "0", "6.12239634993057", "10.1591411960385", "22.9421665420477",
"0.470953565379205", "2.28748874612802", "13.8594906383046",
"11.0337692460289", "18.6363053728655", "27.2480277112295", "0.0672790807684578",
"0.470953565379205", "0", "0"), X2 = c("No", "No", "Yes", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "Yes", "Yes",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No")), row.names = c(NA, 50L), class = "data.frame")
and the code I use for the curve is:
los1 %>%
mutate(prob = ifelse(X2 == "Yes", 1, 0)) %>%
ggplot(aes(X1, prob)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "lm", se=FALSE, method.args = list(family = "binomial")) +
labs(
title = "Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabete-pos"
)
Any idea how to make the curve?
Another option using stat_smooth with "glm" method and "X1" converted to numeric like this:
library(tidyverse)
los1 %>%
mutate(prob = ifelse(X2 == "Yes", 1, 0)) %>%
mutate(X1 = as.numeric(X1)) %>%
ggplot(aes(X1, prob)) +
geom_point(alpha = 0.2) +
stat_smooth(method="glm", color="green", se=FALSE, method.args = list(family=binomial)) +
labs(
title = "Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabete-pos"
)
#> `geom_smooth()` using formula 'y ~ x'
Created on 2022-08-30 with reprex v2.0.2
los1 <- structure(list(X1 = c("5.51688462301445", "2.55660506920185",
"4.17130300764484", "15.0032350113684", "0.0672790807684578",
"0", "10.7646529229551", "1.6819770192119", "4.44041933071867",
"2.69116323073877", "0", "0.740069888453036", "1.54741885767498",
"0.201837242305373", "1.81653518074882", "6.12239634993057",
"3.49851219996026", "22.4039338958996", "0.538232646147662",
"0.134558161536916", "1.2783025346007", "1.6819770192119", "16.9543283536541",
"60.0129400454734", "9.62090854989083", "0.470953565379205",
"33.7740985457708", "6.8624662383836", "0", "0", "4.50769841148758",
"62.6368241954438", "264.137671097005", "14.5995605267576", "0",
"0", "0", "6.12239634993057", "10.1591411960385", "22.9421665420477",
"0.470953565379205", "2.28748874612802", "13.8594906383046",
"11.0337692460289", "18.6363053728655", "27.2480277112295", "0.0672790807684578",
"0.470953565379205", "0", "0"), X2 = c("No", "No", "Yes", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "Yes", "Yes",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No")), row.names = c(NA, 50L), class = "data.frame")
str(los1)
los1$X1 <- as.numeric(los1$X1)
los1$Y <- ifelse(los1$X2 == "Yes", 1, 0)
library(ggplot2)
los1 |>
ggplot(aes(X1, Y)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "glm", se=FALSE, method.args = list(family = "binomial")) +
labs(
title = "Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabete-pos"
)
> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________
I think this can help. First you have to use melt function to arrange your data based on the target columns you want and then you can plot. Even though, since I dont know what do you want to do, my plot has many points for the subgroup = site and I think this doesn't convey any graphical information.
df <- melt(df,id.vars=c("subgroup",'adj_or','ci_low','ci_high','col'))
ggplot(df, aes(x=adj_or, y=subgroup, color=variable))+
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5,height = .2) +
geom_point(aes(color = col), size = 2) + theme_bw()
> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________
That was quite of a workaround. The adjusted hight for the facet_wrap() height is borrowed from here
Code
library(dplyr)
library(ggplot2)
fig2b_data_cleared <- fig2b_data %>%
mutate(subgroup = fct_recode(subgroup, "Age >= 70" = "age_70_plus",
"AI Comorbidities" = "ai_comorbid_bool",
"ECOG" = "ecog_combined",
"Indication" = "indication_combined",
"Non-AI Comorbidities" = "non_ai_comorbid_bool",
"Sex" = "sex",
"Site" = "site"),
subgroup_level = fct_relevel(subgroup_level, "age_70_plus_Yes", "age_70_plus_No",
"ai_comorbid_bool_Yes", "ai_comorbid_bool_No",
"ecog_combined_1+", "ecog_combined_0",
"indication_combined_Metastatic / Unresectable", "indication_combined_Adjuvant",
"non_ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No",
"sex_Male", "sex_Female",
"site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton",
"site_Cambridge"))
p <- ggplot(data = fig2b_data_cleared, aes(x = adj_or, y = subgroup_level)) +
geom_vline(xintercept = 1, linetype = 2, color = "red") +
geom_point(aes(color = col), size = 3) +
xlab("Adjusted Odds Ratio") +
ylab("") +
geom_errorbar(aes(xmax = ci_high, xmin = ci_low, color = col), size = 0.8, width = 0.5) +
theme(plot.title.x = element_text(size = 16, face = "bold"),
axis.text.y = element_blank(),
axis.text.x = element_text(face = "bold"),
axis.title.y = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 1, angle = 180, face = "bold"),
legend.title = element_blank()) +
theme_bw() +
scale_y_discrete(breaks=c("age_70_plus_No", "age_70_plus_Yes",
"ai_comorbid_bool_No", "ai_comorbid_bool_Yes",
"ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"sex_Female", "sex_Male",
"site_Cambridge", "site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton"),
labels=c("No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"Female (Ref)", "Male",
"Cambridge (Ref)", "Belfast",
"Cardiff", "Liverpool",
"Norwich", "Preston",
"Southampton", "Tanton")) +
scale_color_discrete(limits = c("REF", "INSIG", "DOWN", "UP"),
name = "")
p.grid <- p + facet_grid(subgroup ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ subgroup, ncol = 1, scales = "free_y")
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <-
gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]
grid::grid.draw(gp.wrap)
Output
Problem:
I can't find the right way to make a plot with values from a given variable with points and plot the value of the mean with another different shape. So far I find a way of doing this, but mean value appears in the color legend also which is something I don't want to. How could I get the desired output? Should I use stat_summary?
NOTE: Variables must be ordered by the mean value among groups by multimorbidity (if it is something important for the solution proposed) this is why I am using reorder_within and scale_x_reordered.
source("https://raw.githubusercontent.com/dgrtwo/drlib/master/R/reorder_within.R")
library(tidyverse)
foo %>%
group_by(multimorbidity, variables) %>%
mutate(Mean = mean(varimportance),
aux_mean = Mean) %>%
ungroup() %>%
spread(Gender, varimportance) %>%
gather(Gender, varimportance, -multimorbidity, -variables, -aux_mean) %>%
mutate(type = if_else(Gender %in% c("Male", "Female"), "Gender", "Mean")) %>%
ggplot(aes(reorder_within(variables, aux_mean, multimorbidity), varimportance,
color = Gender, shape = type)) +
geom_point() +
scale_x_reordered() +
scale_shape_manual(values = c(21, 24)) +
coord_flip() +
facet_wrap(multimorbidity~., scales = "free")
Created on 2019-03-20 by the reprex package (v0.2.1)
The desired output:
dput for foo:
foo <- structure(list(
Gender = c(
"Male", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Male"
), multimorbidity = c(
"Yes", "Yes", "Yes", "Yes", "Yes",
"No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes",
"No", "No", "No", "No", "No"
), variables = c(
"bmi", "income",
"soccap", "alternattr", "occhaz", "bmi", "income", "soccap",
"alternattr", "occhaz", "bmi", "income", "soccap", "alternattr",
"occhaz", "bmi", "income", "soccap", "alternattr", "occhaz"
),
varimportance = c(
73.1234145437324, 51.0029811829917, 100,
0, 90.9926659603591, 81.1949541852942, 48.2402164701156,
100, 0, 9.10509052698692, 66.7759248406279, 31.69991730502,
100, 4.7914221037359, 93.4636133674693, 70.8853809607131,
75.004433319282, 100, 0, 43.7326141975936
)
), class = c(
"tbl_df",
"tbl", "data.frame"
), row.names = c(NA, -20L))