Switching colors in ggplot - r

I have a pretty simple question. I want the boxes that are currently red to be blue and boxes that are blue to be red. I would like for the legend to stay exactly the way it is at the moment. How can I best achieve this?
labels1 <- c("Male sex", "Male sex", "Age", "Age", "Body mass index", "Body mass index" , "SBP", "SBP", "Black", "Black", "Asian", "Asian", "Deprivation", "Deprivation")
labels1 <- factor(labels1, levels = labels1)
Joint <- c("Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee")
Joint <- factor(Joint, levels = Joint)
#rr <- c(1.02, 0.79, 2.18, 2.45, 2.93, 1.70, 0.98, 0.98, 0.98, 0.42, 1.32, 0.26, 0.99, 0.99)
#rrlow <- c(0.98, 0.76, 2.11, 2.36, 2.83, 1.62, 0.97, 0.97, 0.83, 0.31, 1.14, 0.19, 0.98, 0.98)
#rrhigh <- c(1.06, 0.83, 2.25, 2.54, 3.03, 1.78, 0.99, 1.00, 1.17, 0.57, 1.52, 0.37, 1.00, 1.00)
rr <- c(0.79, 1.02, 2.45, 2.18, 1.70, 2.93, 0.98, 0.98, 0.42, 0.98, 0.26, 1.32, 0.99, 0.99)
rrlow <- c(0.76, 0.98, 2.36, 2.11, 1.62, 2.83, 0.97, 0.97, 0.31, 0.83, 0.19, 1.14, 0.98, 0.98)
rrhigh <- c(0.83, 1.06, 2.54, 2.25, 1.78, 3.03, 1.00, 0.99, 0.57, 1.17, 0.37, 1.52, 1.00, 1.00)
x <- c(2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13)
rr <- rr[x]
rrlow <- rrlow[x]
rrhigh <- rrhigh[x]
forestdf <- data.frame(labels1, rr, rrhigh, rrlow, Joint)
#fplottable <- data.frame(labels1, figure1table[c(-16,-17), "rrfull"])
#fplottable <- data.frame(labels1, figure1table[c(-16,-17), "rrfull"])
#colors
dotCOLS = c("red3", "dodgerblue4")
barCOLS = c("red3", "dodgerblue4")
forestdf$color <- c(rep(c("white", "gray95"), 7))
p <- ggplot(forestdf, aes(x=rr, y=labels1, xmin=rrlow, xmax=rrhigh))+
geom_pointrange(shape=22, position = position_dodge(width = 0.75), aes(col = Joint, fill = Joint))+
geom_vline(xintercept = 1, linetype=3)+
#scale_colour_identity() +
xlab("Adjusted Hazard Ratio with 95% Confidence Interval")+theme_bw()+scale_y_discrete(limits=rev(labels1))+
scale_x_log10(limits = c(0.125, 4), breaks = c(0.125, 0.25, 0.5, 1, 2, 4), labels=c("0.125", "0.25", "0.5", "1", "2", "4"), expand = c(0,0))+
theme(axis.title.y=element_blank(), axis.text=element_text(size=11, color = "black"))+
theme(plot.margin = margin(30, 5, 10, 12)) +
scale_fill_manual(values = rev(dotCOLS))+scale_color_manual(values = rev(barCOLS))
p

In both your scale_fill_manual and scale_color_manual you are reversing the order of the color values you specify. If you drop the rev around the values the colors will switch. Is this what you want to achieve?
**labels1 <- c("Male sex", "Male sex", "Age", "Age", "Body mass index", "Body mass index" , "SBP", "SBP", "Black", "Black", "Asian", "Asian", "Deprivation", "Deprivation")
labels1 <- factor(labels1, levels = labels1)
Joint <- c("Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee", "Hip", "Knee")
Joint <- factor(Joint, levels = Joint)
#rr <- c(1.02, 0.79, 2.18, 2.45, 2.93, 1.70, 0.98, 0.98, 0.98, 0.42, 1.32, 0.26, 0.99, 0.99)
#rrlow <- c(0.98, 0.76, 2.11, 2.36, 2.83, 1.62, 0.97, 0.97, 0.83, 0.31, 1.14, 0.19, 0.98, 0.98)
#rrhigh <- c(1.06, 0.83, 2.25, 2.54, 3.03, 1.78, 0.99, 1.00, 1.17, 0.57, 1.52, 0.37, 1.00, 1.00)
rr <- c(0.79, 1.02, 2.45, 2.18, 1.70, 2.93, 0.98, 0.98, 0.42, 0.98, 0.26, 1.32, 0.99, 0.99)
rrlow <- c(0.76, 0.98, 2.36, 2.11, 1.62, 2.83, 0.97, 0.97, 0.31, 0.83, 0.19, 1.14, 0.98, 0.98)
rrhigh <- c(0.83, 1.06, 2.54, 2.25, 1.78, 3.03, 1.00, 0.99, 0.57, 1.17, 0.37, 1.52, 1.00, 1.00)
x <- c(2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13)
rr <- rr[x]
rrlow <- rrlow[x]
rrhigh <- rrhigh[x]
forestdf <- data.frame(labels1, rr, rrhigh, rrlow, Joint)
#fplottable <- data.frame(labels1, figure1table[c(-16,-17), "rrfull"])
#fplottable <- data.frame(labels1, figure1table[c(-16,-17), "rrfull"])
#colors
dotCOLS = c("red3", "dodgerblue4")
barCOLS = c("red3", "dodgerblue4")
forestdf$color <- c(rep(c("white", "gray95"), 7))
p <- ggplot(forestdf, aes(x=rr, y=labels1, xmin=rrlow, xmax=rrhigh))+
geom_pointrange(shape=22, position = position_dodge(width = 0.75), aes(col = Joint, fill = Joint))+
geom_vline(xintercept = 1, linetype=3)+
#scale_colour_identity() +
xlab("Adjusted Hazard Ratio with 95% Confidence Interval")+theme_bw()+scale_y_discrete(limits=rev(labels1))+
scale_x_log10(limits = c(0.125, 4), breaks = c(0.125, 0.25, 0.5, 1, 2, 4), labels=c("0.125", "0.25", "0.5", "1", "2", "4"), expand = c(0,0))+
theme(axis.title.y=element_blank(), axis.text=element_text(size=11, color = "black"))+
theme(plot.margin = margin(30, 5, 10, 12)) +
scale_fill_manual(values = dotCOLS) +
scale_color_manual(values = barCOLS)
p

Related

How to model quantiles regression curves for probabilities depending on a predictor in R?

I'd' like to model the 25th, 50th and 75th quantile regression curves (q25, q50, q75) for 241 values of probability ('prob') depending on x0.
For that purpose, I used the qgamV package as follows. However, this approach led to some q25, q50, q75 values <0 and >1, which is not expected for probabilities.
Graphically, one would expect the q25 and q75 regression curves to approach the 'prob' limits 0 and 1 in a more tangential way (see below).
How to model these quantiles curves as best as possible, knowing that they represent probabilities?
Thanks for help.
Initial dataframe (df0):
df0 <- structure(list(x0 = c(2.65, 3.1, 2.15, 2.45, 2.9, 1.55, 2.05,
2.75, 2, 2.45, 4.05, 1.95, 3.35, 2.15, 2.5, 1.75, 1.6, 2.3, 3.35,
3.55, 2.1, 3.15, 2.5, 1.05, 2.3, 2.3, 2.95, 0.8, 1.75, 2.95,
2.55, 1.65, 2.4, 2.8, 2.2, 3.45, 2.15, 2.9, 1.7, 2.7, 2.05, 2.75,
2.35, 3.75, 2.2, 1.1, 2.35, 2.5, 3.05, 1, 4.4, 1.3, 2.2, 2.5,
1.35, 1.95, 1.95, 5.45, 2, 1.65, 2.7, 2, 1.5, 1.05, 4.15, 2.15,
1.9, 1.85, 4.2, 2.2, 3.35, 1.55, 1.95, 2.3, 1.9, 3.45, 2.2, 3.55,
1.4, 2.5, 2.35, 2.5, 2.4, 3.35, 2, 2.6, 3.05, 2.75, 1.6, 1.65,
2.45, 1.55, 1.65, 2.25, 0.9, 2.4, 2.2, 2, 1.65, 1.35, 1.95, 2.5,
1.6, 1.25, 3.8, 2.25, 2.85, 1.45, 2.4, 2.8, 3.75, 3.05, 1.8,
1.25, 1.55, 2, 2.55, 2.75, 3.55, 2.2, 2.1, 3.55, 3.65, 2.3, 1.25,
2.45, 2.2, 1.95, 1.65, 0.7, 2, 1.5, 2.8, 3.4, 3.95, 2.55, 2.45,
2.65, 1.75, 1.7, 2.5, 2.05, 2.75, 2.05, 3, 2.25, 3.6, 2.35, 3.25,
1.6, 3.3, 2.05, 1.95, 2.15, 2.3, 4.1, 2.45, 1.6, 2.3, 0.6, 2.35,
2.45, 1.9, 2.5, 1.35, 3.2, 2.25, 1.65, 2.75, 1.8, 3, 0.95, 2.7,
2.15, 3.75, 2.5, 1.95, 2.7, 3.75, 2.4, 2.4, 3.05, 1.8, 3.6, 2.05,
2.75, 2.15, 1.35, 3.15, 2.25, 3.1, 2, 2.35, 3.3, 2.05, 0.75,
2.55, 2.2, 3.15, 3.1, 1.75, 3.2, 3.15, 2.8, 2.5, 1.8, 2.2, 1.85,
3.35, 1.35, 2.75, 1.85, 2.8, 2.65, 3.15, 1.15, 2.5, 3.75, 2.75,
4.55, 2.3, 2.65, 3.1, 3.65, 0.8, 2.45, 3.25, 3.65, 3.75, 1.75,
2.55, 1.15, 2.05, 2.05, 3.5, 0.75, 2.55, 2.2, 2.1, 2.15, 2.75
), prob = c(0.043824528975438, 0.0743831343145038, 0.0444802301649798,
0.0184204002808217, 0.012747152819121, 0.109320069103749, 0.868637913750677,
0.389605665620339, 0.846536935687218, 0.104932383728924, 0.000796924809569913,
0.844673988202945, 0.00120791067227541, 0.91751061807481, 0.0140582427585067,
0.61360854266884, 0.55603090737844, 0.0121424615930165, 0.000392412410090414,
0.00731972612592678, 0.450730636411052, 0.0111896050578429, 0.0552971757296455,
0.949825608148576, 0.00216318997302124, 0.620876890784462, 0.00434032271743834,
0.809464444601336, 0.890796570916792, 0.0070834616944228, 0.0563350845256127,
0.913156468748195, 0.00605085671490011, 0.00585882020388307,
0.0139577135093548, 0.0151356267602558, 0.00357231467872644,
0.000268107682417655, 0.047883018897558, 0.137688264298974, 0.846219411361109,
0.455395192661041, 0.440089914302649, 0.312776912863294, 0.721283899836456,
0.945808616162847, 0.160122538485323, 0.274966581834218, 0.223500907500226,
0.957169102670141, 3.29173412975754e-05, 0.920710197397359, 0.752055893010363,
0.204573327883464, 0.824869881489217, 0.0336636091577387, 0.834235793851965,
0.00377210373002217, 0.611370672834389, 0.876156793482752, 0.04563653558985,
0.742493995255321, 0.42035122692417, 0.916359628728296, 0.182755925347698,
0.139504394672643, 0.415836463269909, 0.0143112277191436, 0.00611022961831899,
0.794529254262237, 0.000295836911230635, 0.88504245090271, 0.0320097205131667,
0.386424550101868, 0.724747784339428, 0.0374198694261709, 0.772894216412908,
0.243626917726206, 0.884082536765856, 0.649357153222083, 0.651665475576256,
0.248153637183556, 0.621116026311962, 0.254679380328883, 0.815492354289526,
0.00384382735772974, 0.00098493832845314, 0.0289740210412282,
0.919537164719931, 0.029914235716672, 0.791051705450356, 0.535062926433525,
0.930153425256182, 0.739648381556949, 0.962078822556967, 0.717404075711021,
0.00426200695619151, 0.0688025266083751, 0.30592683399928, 0.76857384388609,
0.817428136470741, 0.0101583095649087, 0.190150584186769, 0.949353043876038,
0.000942385744019884, 0.00752842476126574, 0.451811230189468,
0.878142444707428, 0.085390660867941, 0.705492062082986, 0.00776625091631656,
0.120499683875168, 0.871558791341612, 0.204175216963286, 0.88865934672351,
0.735067195665991, 0.111767657566763, 0.0718305257427526, 0.001998068594943,
0.726375812318976, 0.628064249939129, 0.0163105011142307, 0.585565544471761,
0.225632568540361, 0.914834452659588, 0.755043268549628, 0.44993311080756,
0.876058522964169, 0.876909380258345, 0.935545943209396, 0.856566304797687,
0.891579321327903, 0.67586664661773, 0.305274362445618, 0.0416387565225755,
0.244843991055886, 0.651782914419153, 0.615583040148267, 0.0164959661557421,
0.545479687527543, 0.0254178939123714, 0.00480000384583597, 0.0256296636591875,
0.776444262284288, 0.00686736233661002, 0.738267311816833, 0.00284628668554737,
0.0240371572079387, 0.00549270830047392, 0.91880163437759, 0.336534358175717,
0.276841848679916, 0.718008645244615, 0.0897424253787563, 0.0719730540202573,
0.00215797941000608, 0.0219160132143199, 0.797680147185277, 0.66612383359622,
0.946965411044528, 0.133399527090937, 0.343056247984854, 0.202570454449074,
0.00349712323805031, 0.919979740593237, 0.577123238372546, 0.759418264563034,
0.904569159000302, 0.0179587619909363, 0.785657258439329, 0.235867625712547,
0.959688292861383, 0.668060191654474, 0.0014774986557077, 0.00831528722028647,
0.669655207261098, 0.157824457113222, 0.110637023939517, 0.262525772704882,
0.112654002253028, 0.22606090266161, 0.157513622503487, 0.25688454756606,
0.00201570863346944, 0.70318409224183, 0.25568985167711, 0.810637054896326,
0.92708070974999, 0.608664352336801, 0.707490903842404, 0.00094520948858089,
0.106177223644193, 0.582785205597368, 0.0585327568963445, 0.377814739935042,
0.972447647118833, 0.0111118791692372, 0.58947840090326, 0.0111189166236961,
0.00317374095338712, 0.0664218007312096, 0.00227258301798719,
0.00198861129291917, 0.337443337988163, 0.750708293355867, 0.837530172974158,
0.627428065068903, 0.744110974625108, 0.00320417425932798, 0.871800026765784,
0.613647987816266, 0.808457030433619, 0.00486495461698562, 0.597950577021363,
0.000885253981642748, 0.0800527366346806, 0.00951706823839207,
0.125222576598629, 0.346018567766834, 0.0376933970313487, 0.157903106929268,
0.0371982251307384, 0.00407175432189843, 0.0946588147179984,
0.967274516618573, 0.169109953293894, 0.00124072042059317, 0.00259042255361196,
0.000400511359506596, 0.841289470209085, 0.807106898740506, 0.926962245924993,
0.814160745645036, 0.662558468801531, 0.000288068688170646, 0.698932091902567,
0.00242011818508616, 0.645573844423654, 0.517121859568318, 0.0931231998319089,
0.000877774529895907)), row.names = c(NA, -241L), class = "data.frame")
Quantiles regressions and plot:
library(mgcViz)
library(qgam)
library(ggplot2)
# Quantile regressions
q50 <- qgamV(prob ~ s(x0, bs="cr", k=10), data = df0, qu = 0.5)
q25 <- qgamV(prob ~ s(x0, bs="cr", k=10), data = df0, qu = 0.25)
q75 <- qgamV(prob ~ s(x0, bs="cr", k=10), data = df0, qu = 0.75)
# New dataframe including fitted quantile values
df1 <- df0
df1$q50 <- q50[["fitted.values"]]
df1$q25 <- q25[["fitted.values"]]
df1$q75 <- q75[["fitted.values"]]
# Plot
x_brk <- seq(0, 6, 1); x_lab <- seq(0, 6, 1)
y_brk <- seq(0, 1, 0.1); y_lab <- seq(0, 1, 0.1)
ggplot(df1, aes(x = x0, y = prob))+
scale_x_continuous(limits=c(0, 20), expand=c(0, 0), breaks=x_brk, labels=x_lab)+
scale_y_continuous(limits=c(-1, 2),expand=c(0, 0), breaks=y_brk, labels=y_lab)+
geom_vline(xintercept=x_brk, colour="grey25", size=0.2)+
geom_hline(yintercept=y_brk, colour="grey50", size=0.2)+
geom_hline(yintercept=0.5, linetype="solid", color = "black", size=0.2)+
geom_point(data = df1, aes(x = x0, y = prob), colour = "grey50", size=0.75, inherit.aes = TRUE)+
xlab(~paste("x0"))+
ylab(~paste("Prob"))+
theme(plot.title = element_blank())+
theme(plot.margin=unit(c(0.2,0.5,0.01,0.3),"cm"))+
theme(axis.text.x=element_text(colour="black", size=9.5, margin=margin(b=10),vjust=-1))+
theme(axis.text.y=element_text(colour="black", size=9.5,hjust=0.5))+
theme(axis.title.x=element_text(colour="black", size=11.5, margin=margin(b=2), vjust=1))+
theme(axis.title.y=element_text(colour="black", size=11.5, margin=margin(b=2), vjust=4))+
theme(panel.background=element_rect(fill="white"), panel.border = element_rect(colour = "black", fill=NA))+
geom_line(aes(x=x0, y = q50), data=df1, colour="black",size=0.8, inherit.aes = TRUE)+
geom_line(aes(x=x0, y = q25), data=df1, colour="black",size=0.6, linetype = "longdash")+
geom_line(aes(x=x0, y = q75), data=df1, colour="black",size=0.6, linetype = "longdash")+
coord_cartesian(xlim = c(0, 6), ylim = c(0, 1))
Continuation of the solution proposed by user2974951:
Given the non-normal distribution of Prob, I think better to use qgam rather than quantreg, by taking inspiration from user2974951's solution.
The difference between these 2 quantile regression approaches is very slight on example x0, but much more obvious with another predictor x1:
Example x0:
Example x1:
You can use the logit transform and then use regular quantile regresion
library(quantreg)
df0 <- df0[order(df0$x0), ] # ordering just for easier visualization
df0$probL <- log(df0$prob/(1 - df0$prob))
t <- c(0.25, 0.5, 0.75)
mod <- lapply(t, function(x){rq(probL ~ x0, data=df0, tau=x)})
names(mod) <- paste0("Q_", t)
pre <- as.data.frame(do.call(cbind, lapply(mod, function(x){1/(1 + exp(-predict(x)))})))
plot(prob ~ x0, data=df0)
lines(pre$Q_0.25 ~ df0$x0, col="red")
lines(pre$Q_0.5 ~ df0$x0, col="green")
lines(pre$Q_0.75 ~ df0$x0, col="red")

Clustering differences between pheatmap and corrplot() using ward.D2 method

I have performed Spearman correlation for my data. Then I tried to cluster and plot my data using the "ward.D2" method for corrplot()and pheatmap(). However, the order of the variables is different between the two plots.
Could someone help me clarify this point, thus correcting my code and creating the two plots with the same order of clustered variables?
Thank you so much.
#A sample of my dataset:
dput(Data_corr)
structure(list(S_cHDLP = c(0.299999999999999, -2.78, 0.880000000000001,
2.48, 2.15, 5.31, 3.02, 1.19, 2.1, -1.18, -0.34, 1.25, -3.25,
-3.16, 0.19, -0.100000000000001, -2.16, -0.220000000000001, 0.77,
-2.12), H7P = c(-0.18, -0.48, -0.13, -0.21, 0.07, 0.64, -0.13,
-0.1, 0.12, -0.22, 0.09, -0.0399999999999999, -1.56, 0.39, 0.58,
-0.49, 0.2, 0.13, 0.11, 0.06), H6P = c(0, 0, 0, 0.16, -0.23,
0, 0, 0, -0.26, -0.28, 0.06, -0.17, 1.16, -0.12, -0.32, -0.17,
0.38, 0.05, 0.01, 0), H5P = c(0, 0.84, 0.47, 1.21, 0.01, 0.21,
1.36, 0.2, -0.12, 0.93, -1.01, 0, -0.58, -0.97, -1, 0.97, -0.89,
0.35, -0.59, -0.12), H4P = c(-0.12, -1.27, -0.18, 0.25, 1.02,
1.26, -0.62, -0.16, 0.25, -0.01, 0.44, 0.17, 0.19, 0.97, 2.35,
0.3, -0.18, 0.03, 0.0899999999999999, 0.38), H3P = c(-0.31, 0.39,
0.13, 0.29, 0, 0.02, -0.07, 0, 0, -0.32, 0, -0.79, 0, -0.53,
-0.71, -0.2, 2.08, 0.86, 0, 0), H2P = c(-1.28, -0.619999999999999,
-1.07, 1.96, 0.15, 4.92, 1.55, 3, -0.459999999999999, -0.56,
1.12, 3.44, -1.48, -1.27, 1.45, 0.609999999999999, -1.59, -1.57,
2.04, 2.03), H1P = c(1.58, -2.15, 1.96, 0.51, 2, 0.37, 1.47,
-1.83, 2.56, -0.62, -1.46, -2.19, -1.77, -1.9, -1.25, -0.73,
-0.57, 1.35, -1.28, -4.14), TRLZ_TRL = c(4.61, 1.49, -2.71, 1.54,
-5.46, 2.18, 3.48, 12.83, 7.51, 7.74, -8.38, -0.729999999999997,
6.11, -19.74, -0.869999999999997, -1.82, -1.57000000000001, 0.609999999999999,
-14.79, -18.65), LDLZ = c(-0.0599999999999987, -0.400000000000002,
-0.289999999999999, -1.2, -0.479999999999997, -0.59, -1.29, -0.0599999999999987,
0.210000000000001, -1.58, 1.97, 0.0800000000000018, -1, 1.95,
1.41, 0.00999999999999801, 0.430000000000003, -0.289999999999999,
0.68, 0.52), HDLZ = c(-0.200000000000001, -0.200000000000001,
-0.0700000000000003, 0, -0.0200000000000014, -0.0199999999999996,
-0.0399999999999991, -0.119999999999999, -0.0900000000000016,
-0.0500000000000007, -0.15, -0.16, -0.640000000000001, 0.42,
0.16, -0.130000000000001, 0.15, 0.41, -0.0300000000000011, 0.18
)), class = "data.frame", row.names = c(NA, -20L))
library(pheatmap)
library(corrplot)
CorMethod <- "spearman"
CorMatrix <- cor(Data_corr, method=CorMethod, use="pairwise.complete.obs")
## 1st Plot
Plot3<-pheatmap(CorMatrix, cluster_cols=T, cluster_rows=T, cutree_rows = 3, angle_col=45, fontsize_col=5, fontsize_row = 7, treeheight_col=0, clustering_method="ward.D2")
#2nd Plot
Plot8 <-corrplot(CorMatrix, method="square", type="lower", order="hclust", hclust.method="ward.D2", tl.pos="ld", tl.cex = 0.5, tl.col="black", tl.srt=45)
You can create a corrplot with the same order given by pheatmap as follows:
#2nd Plot
library(RColorBrewer)
ord <- Plot3$tree_row$order
ReordCorMatrix <- CorMatrix[ord, ord]
Plot8 <-corrplot(ReordCorMatrix, method="square", type="lower", order="original",
hclust.method="ward.D2",
tl.pos="ld", tl.cex = 0.5, tl.col="black", tl.srt=45,
col=colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100))

Using pheatmap to sort data by row annotations?

I'm trying to create a heatmap with columns of test data and rows of individual study participants. The participants can be classified into three distinct groups. I'd like to annotate the plot with the three groups and then cluster the data within each group to understand the differences between them.
I'm new to creating heatmaps, and I can't get the row annotations to work. I'm also not sure how to cluster only within each group once I do get the annotations working. I was thinking that the package "pheatmap.type" would work, but unfortunately, it's not available for R version 4.0.2.
I can't post exact data (confidential) but I've attached and example file and I'll describe what I've done so far and post the code. I have a data frame with the first column as labels that include the participant ID and the group (did this using row.names=1) and then 12 columns with numeric data (no NA's). I then ordered the data by the row names and used the scale function to scale the data and generate a matrix. I then tried to create an annotation row by adding the group info to a data frame in several different ways. What I've tried so far is below:
#dataframe with Group and ID as row names and 12 numerical columns
df_1_HM <- data.frame(df_1$Group_ID, df_1$Test1, df_1$Test2, df_1$Test3, df_1$Test4, df_1$Test5, df_1$Test6, df_1$Test7, df_1$Test8, df_1$Test9, df_1$Test10, df_1$Test11, df_1$Test12, row.names=1)
#ordering the dataframe so that the groups are in order
df_1_HM_ordered <- df_1_HM[ order(row.names(df_1_HM)), ]
#Z-scoring (scaling) data
df_HM_matrix_1 <- scale(df_1_HM)
#creating a color palette
my_palette <- colorRampPalette(c("white", "grey", "black"))(n = 100)
#Plotting heatmap
install.packages("gplots")
library(gplots)
#trying to plot the heatmap with annotation_row data
#The method below does not work for me. The plot will run with no errors but does not actually plot - it ends up becoming a list of 4 with no data.
pheatmap(df_HM_matrix_1,
scale="none",
color=my_palette,
fontsize=14,
annotation_row=annotation_row)
annotation_row = data.frame(
df_Group = factor(rep(c("Group 1", "Group 2", "Group 3"), c(11, 10, 7)))
)
rownames(annotation_row) = paste("df_Group", 1:28, sep = "")
rownames(annotation_row) = rownames(df_HM_matrix_1) # name matching
#I also tried to use a dataframe with just the groups as column 1 to get row annotation
pheatmap(df_HM_matrix_1,
scale="none",
color=my_palette,
fontsize=14,
annotation_row=df_Group)
df_Group <- data.frame(df_1$Group, df_1$ID)
#Also tried using the select function to create a dataframe for the row annotation
df_Group_1 <- select(df_1, Group)
#When I use either of the data frame methods above I get the following error: Error in cut.default(a, breaks = 100) : 'x' must be numeric
Any help with this at all would be awesome!!
Here is the example data:
structure(list(Group_ID = structure(1:28, .Label = c("Group1_10",
"Group1_13", "Group1_15", "Group1_2", "Group1_20", "Group1_26",
"Group1_27", "Group1_3", "Group1_6", "Group1_8", "Group2_1",
"Group2_12", "Group2_14", "Group2_16", "Group2_21", "Group2_23",
"Group2_25", "Group2_28", "Group2_7", "Group2_9", "Group3_11",
"Group3_17", "Group3_18", "Group3_19", "Group3_24", "Group3_4",
"Group3_5", "Group3_6"), class = "factor"), Test1 = c(1.44, 4.36,
0.75, 0.59, 1.67, 0.41, 2.42, 0.57, 0.89, 0.45, 0.31, 1.56, 2.13,
0.86, 0.12, 0.26, 1.47, 2.64, 3.92, 2.19, 0.43, 0.98, 1.93, 1.49,
1.43, 2.58, 2.49, 2.64), Test2 = c(1.44, 4.36, 0.75, 0.59, 1.67,
0.41, 2.42, 0.57, 0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26,
1.47, 2.64, 3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49,
2.64), Test3 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64, 3.92,
2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 2.64), Test4 = c(1.44,
4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57, 0.89, 0.45, 0.31, 1.56,
2.13, 0.86, 0.12, 0.26, 1.47, 2.64, 3.92, 2.19, 0.43, 0.98, 1.93,
1.49, 1.43, 2.58, 2.49, 0.31), Test5 = c(1.44, 4.36, 0.75, 0.59,
1.67, 0.41, 2.42, 0.57, 0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12,
0.26, 1.47, 2.64, 3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58,
2.49, 0.31), Test6 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42,
0.57, 0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 0.31),
Test7 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 1.49
), Test8 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 1.49
), Test9 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 1.49
), Test10 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 3.92
), Test11 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 3.92
), Test12 = c(1.44, 4.36, 0.75, 0.59, 1.67, 0.41, 2.42, 0.57,
0.89, 0.45, 0.31, 1.56, 2.13, 0.86, 0.12, 0.26, 1.47, 2.64,
3.92, 2.19, 0.43, 0.98, 1.93, 1.49, 1.43, 2.58, 2.49, 3.92
)), class = "data.frame", row.names = c(NA, -28L))
For annotations to work with pheatmap, factors must be ordered. To do this, add ordered = TRUE to factor():
annotation_row = data.frame(df_Group = factor(rep(c("Group 1", "Group 2", "Group 3"), c(11, 10, 7)), ordered = TRUE))
You could also use as.ordered() to accomplish the same thing.
To sort your heatmap row by annotation group, just add the argument cluster_rows = F to pheatmap():
pheatmap(df_HM_matrix_1,
scale="none",
color=my_palette,
fontsize=14,
annotation_row=annotation_row,
cluster_rows = F)
And here is what it looks like now:

How do I plot a linear regression line in a specified bin in a histogram?

So we are trying to determine speciation rate as a function of animal weight. Animal weight follows a gaussian distribution when they are plotted altogether; hence, we only want to fit the regression line in the decreasing trend of the histogram. Specifically, the line should start from x = 2.1 and y = 3.0. Fig. 1 is my current plot using the code below, while Fig. 2 is the outcome I would like to acquire (superimposed line via paint), which I don't know how to do. Any help on the matter will be greatly appreciated.
Attached is my code:
x.log = c(-2.9, -2.7, -2.5, -2.3, -2.1, -1.9, -1.7, -1.5, -1.3, -1.1,
-0.9,-0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.5, 0.7, 0.9, 1.1,
1.3, 1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 2.9, 3.1, 3.3, 3.5, 3.7,
3.9, 4.1, 4.3, 4.5, 4.7, 4.9, 5.1, 5.3, 5.5, 5.7, 5.9, 6.1,
6.3, 6.5,6.9, 7.1, 7.3, 7.5, 7.7, 7.9)
y.log = c(0, 0, 0, 0.47, 0.60, 0.95, 1.14, 1.38, 1.68, 1.79, 2.10, 2.26,
2.29, 2.39, 2.48, 2.52, 2.79, 2.68, 2.80, 2.84, 2.96, 2.92,
2.91, 3.01, 2.95, 3.05, 2.94, 2.96, 2.98, 2.83, 2.85, 2.83,
2.71, 2.63, 2.61, 2.57, 2.37, 2.26, 2.17, 1.99, 1.87, 1.74,
1.62, 1.36, 1.30, 1.07, 1.20, 0.90, 0.30, 0.69, 0.30, 0.47, 0
0.30, 0)
# plot the histogram
names(log.nspecies) = logbio
log.nspecies = log.nspecies[order (as.numeric(names(log.nspecies)))]
xpos = barplot(log.nspecies, las = 2, space = 0, col = 'red',
xlab = 'ln Weight', ylab = 'ln Number of species')

where did I go wrong?

I have tried the following code to estimate the following parameters: alpha, beta, k, lambda & p for my proposed distribution. The r code is given below.
library('bbmle')
xvec <- c(0.55, 0.93, 1.25, 1.36, 1.49, 1.52, 1.58, 1.61, 1.64, 1.68, 1.73,
1.81, 2,0.74, 1.04, 1.27, 1.39, 1.49, 1.53, 1.59, 1.61, 1.66, 1.68, 1.76,
1.82,2.01, 0.77, 1.11, 1.28, 1.42, 1.5, 1.54, 1.6, 1.62, 1.66, 1.69, 1.76,
1.84, 2.24, 0.81, 1.13, 1.29, 1.48, 1.5, 1.55, 1.61, 1.62, 1.66, 1.7, 1.77,
1.84, 0.84, 1.24, 1.3, 1.48, 1.51, 1.55, 1.61, 1.63, 1.67, 1.7, 1.78, 1.89)
n <- length(xvec)
ln <- function(alpha, beta, k, lambda, p) {
-sum(log(alpha*beta*xvec**(alpha-1) + k*lambda*xvec**(k-1)*(1+xvec**k)**(-1))) + sum(lambda*log(1+xvec**k))-(n*log(1-p)) + beta*sum(xvec**alpha)-2*sum(log(1-p*(exp(-beta*xvec**alpha)/(1+xvec**k)**lambda)))
}
mle2(minuslogl = ln, start = list(alpha = 1, beta = 15, k = 1,
lambda = 0.5, p = 0.4), hessian.opts = TRUE)
summary(mle.result1)
I have used different initial values for the parameters with no good estimate (with the last set shown in the code above). The last solution that was obtained was:
Maximum likelihood estimation
Call:
mle2(minuslogl = ln, start = list(alpha = 1, beta = 15, k = 1,
lambda = 0.5, p = 0.4), hessian.opts = TRUE)
Coefficients:
Estimate Std.Error z value Pr(z)
alpha 0.15002 NA NA NA
beta 15.10368 NA NA NA
k 0.98189 NA NA NA
lambda 0.58166 NA NA NA
p -0.05312 67.82338 -8e-04 0.9994
-2 log L: -1.229098e-06
Warning message:
In sqrt(diag(object#vcov)) : NaNs produced
Please, where did I go wrong? Thank you.

Resources