troubleshooting ggtree to display node tip color and shape in accordance with external csv variables - ggtree

i'm having a hard time getting my node tips on my tree to accommodate for species and host via tip color and tip shape. here is my code, the tree, and the csv data i am using to annotate the tree:
rm(list=ls())
#time to make Fig3A
library(ggplot2)
library(ggtree)
library(ape)
install.packages("devtools")
devtools::install_github("eliocamp/ggnewscale#dev")
install.packages("remotes")
remotes::install_github("emmanuelparadis/ape")
library(ggnewscale)
homewd= '/Users/mfv2446/Desktop/'
setwd(paste0(homewd))
#load the fig3a tree
treeC <- ape::read.tree('node_4_kobu.newick')
#load tree data prepared from elsewhere
dat <- read.csv(file = paste0(homewd, '/kobuviruses_taxid_194960.csv'), header = T, stringsAsFactors = F)
#species name
colz = c("Kobuvirus cattle/Kagoshima-2-24-KoV/2015/JPN" = "#CC79A7", "Aichivirus D"= "#E69F00", "Aichivirus B"="darkgoldenrod1", "Aichivirus A" = "black", "Canine kobuvirus" = "royalblue", "Caprine kobuvirus" = "tomato", "Marmot kobuvirus" = "mediumseagreen", "Aichivirus E" = "purple", "Ovine kobuvirus" = "green", "Kobuvirus sheep/TB3/HUN/2009" = "yellow", "Mouse kobuvirus M-5/USA/2010" = "blue", "Aichivirus C" = "orange")
#pick order for the labels
dat$Species <- factor(dat$Species, levels =c("Kobuvirus cattle/Kagoshima-2-24-KoV/2015/JPN", "Aichivirus D", "Aichivirus B", "Aichivirus A", "Canine kobuvirus", "Caprine kobuvirus", "Marmot kobuvirus", "Aichivirus E", "Ovine kobuvirus", "Kobuvirus sheep/TB3/HUN/2009", "Mouse kobuvirus M-5/USA/2010", "Aichivirus C")
#take a glance
ggtree(treeC) %<+% dat2 + geom_tippoint(aes(fill=Species, shape=Host), shape=21) + geom_tiplab(linesize= 0.1, size=2.5) +
geom_nodelab(size=1, nudge_x = -0.01, nudge_y = 0.25) + scale_fill_manual(values=colz) +
theme(legend.position = c(.2,.85), legend.title = element_blank())

Related

Fail to set the color and dot size separately when using ggplot with two datasets

I have a data set as shown here:
ALL<- structure(list(GI = c(38.448275862069, 40.2659574468085, 85.3378378378378,
56.4606741573034, 26.5714285714286, 16.8944099378882), GI_D = c(31.5275862068966,
37.0446808510638, 64.0033783783784, 45.7331460674157, 20.7257142857143,
14.1913043478261), GI_W = c(34.84375, 39.4270833333333, 83.0921052631579,
54.6195652173913, 25.5963302752294, 16.4848484848485), NEE_D_mean = c(9.9644036070938,
-5.49181483024952, -29.5841687938457, -10.950117466455, -9.76133775037159,
-1.17370950853892), NEE_D_se = c(24.4055666454516, 8.31286897717958,
43.0803839446216, 42.0054504158082, 28.7765100449838, 8.86774764999355
), NEE_W_mean = c(-10.6866769282934, 20.9456806199394, -24.0380682586804,
52.3723812566745, -62.2858574112861, 56.3557615426375), NEE_W_se = c(15.2426118086142,
17.8227858145903, 22.7452815581715, 38.4251278858896, 19.1950340008666,
25.59062272811), GPP_D_mean = c(2.76586256588453, -14.0740484535984,
22.0551675189495, 38.2196758481854, -22.2452106112792, 2.92247497333855
), GPP_D_se = c(10.0301104827162, 4.76830515667558, 10.1200654792974,
13.6220945562145, 12.5521089272372, 4.02070599220442), GPP_W_mean = c(-13.3583364224079,
5.5457128851295, 6.96224944388818, 30.9347346550519, -24.0637392356731,
31.1919112040759), GPP_W_se = c(7.79177565854901, 7.68225824264646,
7.53759987843893, 9.21062180693269, 11.5998936888688, 4.91032534186175
), RE_D_mean = c(-6.92656657644594, -20.2249090077204, -1.55891573291113,
15.3619823271736, -59.6169736724781, 0.0398744940922411), RE_D_se = c(8.81296607135718,
3.17951327169943, 7.26103092218914, 9.79375075847273, 33.89046634443,
3.15632251128507), RE_W_mean = c(-11.2826765406364, -5.50930629197934,
-7.35527862198859, -3.3802491396303, -5.7039196948544, 15.5927675710877
), RE_W_se = c(7.82782177993256, 3.28089787167971, 5.27000717925753,
5.7667863399033, 10.1830962186111, 3.17699751136105), site = c("DK_M",
"DK_B", "UK", "NL", "HU", "IT")), row.names = c(NA, -6L), class = "data.frame")
And now I want to make a plot similar to below,
My code is
library(dplyr)
require(ggplot2)
require(ggpmisc)
library(tidyr)
library(tidyverse)
target1<- c("UK", "DK_M", "NL","DK_B") #What about "DK_B"?
dat<- filter(ALL, site %in% target1)
fit<- lm(NEE_D_mean~GI,dat)
summary(fit)
target2<- c("HU", "DK_M","NL","DK_B")
df<- filter(ALL, site %in% target2)
fit<- lm(RE_D_mean~GI,df)
summary(fit)
ggplot(ALL, mapping = aes(x=GI, y=NEE_D_mean))+
geom_point(aes(x=GI, y=NEE_D_mean,shape=site,color= 'green', size=1))+
geom_hline(yintercept = 0)+ #add a horizontal line= 0
geom_errorbar(aes(ymin=NEE_D_mean-NEE_D_se, ymax=NEE_D_mean+NEE_D_se), width=0.5) +
labs(y='Drought change of NEE from control % ', x= 'Gaussen Index of Aridity', color= ' ')+ #here, note: x and y axis title is reversed.
geom_smooth (data = dat,aes(x=GI, y=NEE_D_mean),method='lm', formula = y~x,color= 'black', se=FALSE,inherit.aes = FALSE) +
#stat_poly_eq(formula = y~x, eq.with.lhs = "italic(hat(y))~`=`~", aes(x = 65, y = -20,label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE) +
geom_point(aes(x=GI, y=RE_D_mean,shape= site,color= "blue",size=2))+ #if I add color= "bule" here, it doesn't work at all. why?
geom_hline(yintercept = 0)+ #add a horizontal line= 0
geom_errorbar(aes(ymin=RE_D_mean-RE_D_se, ymax=RE_D_mean+RE_D_se, color= "blue"), width=0.5, size=1) + #if I add color= "blue" here, it doesn't work at all. why?
labs(y='Drought change of Reco from control % ', x= 'Gaussen Index of Aridity', color= ' ')+
scale_color_manual(values = c("NEE"="black", "RE"="green"), drop= F)+ #change the color and match the color with the second legend
geom_smooth (data = df,aes(x=GI, y=RE_D_mean),method='lm', formula = y~x,color= 'green', se=FALSE,inherit.aes = FALSE) +
theme_bw()+
#theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
theme(legend.position = c(0.85, 0.3))+ #change the legend position
theme(legend.title = element_blank())+ #Change the legend title to blank
theme_bw()+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#+annotate(geom="text", x=60, y=10, label="NEE",color="red")
The problem is I can't change the size and color of the dots separately. I had set the size of the dots separately, it showed a weird legend on the left. Meanwhile, even if I set the color of the second (RE) graph's dots and error bars to blue, the output color didn't change.

ggplot2 code runs and updates plot but no I see a bunch of numbers instead of data

I used the same code to plot my data last week, and got it all right.
I updated the data with new rows, so this is the only thing I changed, but then ggplot does not plot the data but a bunch of numbers as shown in the pictures I attached.
I'm sure it is something about how the data is coded, but when it happened last week, what I did was to use as.numeric, and that's it.
**# Read the data
experiment_6 <- read.csv("Data_Experiments.xlsx - experiment_6.csv", header=TRUE)
#clean the irrelevant rows
experiment_6_clean <- experiment_6[-c(1,2), ]
#check if it data.frame
is.data.frame(experiment_6_clean)
#remove Nas from dataset
#experiment_6_clean %>% drop_na()
#select relevant data
experiment_6_clean<- experiment_6_clean[1:517,]
#rename columns
experiment_6_clean<- dplyr::rename(experiment_6_clean,
mean_S_both_Par= X, plusSD_S_both_Par= X.1, minusSD_S_both_Par=X.2,
mean_S_both_Agl=X.3,plusSD_S_both_Agl= X.4, minusSD_S_both_Agl= X.5,
mean_M_both_Par= X.6,plusSD_M_both_Par= X.7, minusSD_M_both_Par= X.8,
mean_M_both_Agl= X.9, plusSD_M_both_Agl= X.10,minusSD_M_both_Agl= X.11,
mean_L_water_Sch= X.12, plusSD_L_water_Sch= X.13, minusSD_L_water_Sch= X.14,
mean_L_both_Sch= X.15, plusSD_L_both_Sch=X.16, minusSD_L_both_Sch= X.17,
Time= X1)
#make all relevant data numeric
Time<- as.numeric(experiment_6_clean$Time)
#small_pots_both_par------4"_2mm_BothSides_Par
mean_S_both_Par<- as.numeric(experiment_6_clean$mean_S_both_Par)
plusSD_S_both_Par<- as.numeric(experiment_6_clean$plusSD_S_both_Par)
minusSD_S_both_Par<- as.numeric(experiment_6_clean$minusSD_S_both_Par)
#small_pots_both_agl---4"_2mm_BothSides_Agl
mean_S_both_Agl<- as.numeric(experiment_6_clean$mean_S_both_Agl)
plusSD_S_both_Agl<- as.numeric(experiment_6_clean$plusSD_S_both_Agl)
minusSD_S_both_Agl<- as.numeric(experiment_6_clean$minusSD_S_both_Agl)
#medium_pots_both_par---6"_2mm_BothSides_Par
mean_M_both_Par<- as.numeric(experiment_6_clean$mean_M_both_Par)
plusSD_M_both_Par<- as.numeric(experiment_6_clean$plusSD_M_both_Par)
minusSD_M_both_Par<- as.numeric(experiment_6_clean$minusSD_M_both_Par)
#medium_pots_both_agl---6"_2mm_BothSides_Agl
mean_M_both_Agl<- as.numeric(experiment_6_clean$mean_M_both_Agl)
plusSD_M_both_Agl<- as.numeric(experiment_6_clean$plusSD_M_both_Agl)
minusSD_M_both_Agl<- as.numeric(experiment_6_clean$minusSD_M_both_Agl)
#large_pots_water_Sch---10"_4mm_WaterSide_Sch
mean_L_water_Sch<- as.numeric(experiment_6_clean$mean_L_water_Sch)
plusSD_L_water_Sch<- as.numeric(experiment_6_clean$plusSD_L_water_Sch)
minusSD_L_water_Sch<- as.numeric(experiment_6_clean$minusSD_L_water_Sch)
#large_pots_both_Sch---10"_5mm_BothSides_Sch
mean_L_both_Sch<- as.numeric(experiment_6_clean$mean_L_both_Sch)
plusSD_L_both_Sch<- as.numeric(experiment_6_clean$plusSD_L_both_Sch)
minusSD_L_both_Sch<- as.numeric(experiment_6_clean$minusSD_L_both_Sch)
experiment_6_clean<- as.data.frame(experiment_6_clean)
#plot
#create key
colors <- c(mean_S_both_Par = "light blue", mean_S_both_Agl = "red", mean_M_both_Par = "orange",
mean_M_both_Agl = "violet", mean_L_water_Sch = "pink", mean_L_both_Sch = "yellow")
#compare all six pots
all_six<- ggplot(experiment_6_clean, aes(x=Time))+
geom_smooth(aes(y=mean_S_both_Par, colour = "mean_S_both_Par"), size = 2, se=TRUE ) +
geom_smooth(aes(y=mean_S_both_Agl, colour = "mean_S_both_Agl"), size = 2, se=TRUE)+
geom_smooth(aes(y=mean_M_both_Par, colour = "mean_M_both_Par"), size = 2, se=TRUE)+
geom_smooth(aes(y=mean_M_both_Agl, colour = "mean_M_both_Agl"),size = 2, se=TRUE )+
geom_smooth(aes(y=mean_L_water_Sch, colour = "mean_L_water_Sch"),size = 2, se=TRUE)+
geom_smooth(aes(y=mean_L_both_Sch,colour = "mean_L_both_Sch"),size = 2, se=TRUE)+
labs(title="Experiment 6", subtitle="All Plants", caption="20 days",
y="Mositure Level", x="Time", color = "Group") +
scale_color_manual(values = colors)+
theme(plot.title=element_text(size=20, face="bold"), axis.text.x=element_text(size=15),
axis.text.y=element_text(size=15))+
coord_cartesian(ylim=c(-100, 150), xlim=c(0, 25))+
theme_bw()
all_six**
what I got this weekwhat I had last week

geom_dumbell spacing, legends in different places, and multiple aesthetics (timelines)

I saw this interesting way of creating a publication timeline using geom_dumbell, so I created my own by first loading the libraries:
library(tidyverse)
library(ggalt)
library(ggrepel)
Entering in some data:
# create data frame
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2))
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
and, finally, creating my own basic timeline using this code:
ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
#theme_ipsum_tw() +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
As you can see from the above image, I can't see the x-axis. Additionally, I'd like to put another aesthetic and legend on the right side for the journal, perhaps putting a different shape on each line. Any other bells and whistles using the above data would be fun, too. Thanks!
Ok, I finally found some time to figure this out with help from this terrific post. To start, let's load the revised list of packages:
library(tidyverse)
library(ggalt)
library(ggrepel)
library(gridExtra)
library(gtable)
library(grid)
For comprehensiveness, let's reload the data:
# create dataframe
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
Journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2)
)
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
First, let's create the plot with the color legend and extract it. Because I want that legend to be on top, I make sure indicate that as my legend position. Note that I specify my preferred colors using the scale_color_manual argument:
# make plot with color legend
p1 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
# Extract the color legend - leg1
leg1 <- gtable_filter(ggplot_gtable(ggplot_build(p1)), "guide-box")
Second, let's make the plot with the shape legend and extract it. Because I want this legend to be positioned on the right side, I don't need to even specify the legend position here. Note that I specify my preferred shapes using the scale_shape_manual argument:
# make plot with shape legend
p2 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
shape = Journal)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE)
# Extract the shape legend - leg2
leg2 <- gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
Third, let's make the full plot with no legend, specifying both the scale_color_manual and scale_shape_manual arguments as well as theme(legend.position = 'none'):
# make plot without legend
plot <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color =status, shape = Journal)) +
geom_dumbbell(size = 1, size_x = 3) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, nudge_x = -5.25, show.legend = FALSE) +
theme(legend.position = 'none')
Fourth, let's arrange everything according to our liking:
# Arrange the three components (plot, leg1, leg2)
# The two legends are positioned outside the plot:
# one at the top and the other to the side.
plotNew <- arrangeGrob(leg1, plot,
heights = unit.c(leg1$height, unit(1, "npc") - leg1$height), ncol = 1)
plotNew <- arrangeGrob(plotNew, leg2,
widths = unit.c(unit(1, "npc") - leg2$width, leg2$width), nrow = 1)
Finally, plot and enjoy the final product:
grid.newpage()
grid.draw(plotNew)
As everyone will no doubt recognize, I relied very heavily on this post. However, I did change a few things, I tried be comprehensive with my explanation, and some others spent time trying to help, so I think it is still helpful to have this answer here.

R - ggplot2 : insert subheadings in a legend

I am producing a map with ggplot2 with colours to represent six types of areas, which can be further regrouped in three categories.
With
scale_fill_manual(labels=MyLabels, values=MyColors)
I can get a simple legend like this, showing the 6 areas:
But for publication purpose, what I would like to get is:
Id est, with main categories included as subheadings in the legend.
How is that feasible ?
You can try to add fake rows into your data and with these fake rows you can create subheadings.
There is a sample data with fake values:
df <- data.frame(x = 1:9,
y = c(0, 0, 0, rnorm(6)),
g = factor(c("fakedog", "bigdog", "smalldog",
"fakerabbit", "rabbit",
"fakecat", "white", "black", "gray")))
Now I create set labels and colours to use in ggplot:
lbl <- c(expression(bold("dogs")), "big dogs", "small dogs",
expression(bold("rabbits")), "rabbits",
expression(bold("cats")), "white", "black", "gray")
colo <- c(NA, "red1", "red3",
NA, "blue1",
NA, "green1", "green2", "green3")
And plot sample data:
library(ggplot2)
ggplot(df, aes(x = x, y = y, col = g)) +
geom_point() +
scale_colour_manual("Terrirories where people like", values = colo, label = lbl) +
theme(legend.key = element_rect(fill = NA))
Final output:

ggplot2: Legend does not show all categories even with drop=FALSE

I have a p-value matrix (pvalmat) and I want to draw a tile graph to depict different ranges of p-values. Previously on stackoverflow, people have noted drop=FALSE argument would be sufficient for retaining all the categories in the tile graph. Yet it doesn't work for me.
The code I use is as follows:
library(reshape)
library(ggplot2)
t1 <- "
PC1 PC2 PC3 PC4 PC5
Sample_Group 0.8736898 0.97622168 0.2561840 0.42037376 0.1014430
Patient_ID 0.5715401 0.11196997 0.7373194 0.29259420 0.4492927
Batch 0.2372638 0.31829279 0.6886578 0.13898381 0.8962650
Gender 0.2849828 0.19308078 0.7906396 0.70711634 0.1862483
Race 0.9625020 0.86909694 0.9539444 0.45216929 0.4484681
Vital_Status 0.6132153 0.59893269 0.1587745 0.77892172 0.7018237
Family_History 0.5434387 0.19100356 1.0000000 0.20342504 0.8735441
Tissue_Source_Site 0.5448434 0.06034538 0.2239321 0.03223223 0.9604476
Initial_Weight 0.3545216 0.42727010 0.3310045 0.72190824 0.5736651
Age 0.5180032 0.28494126 0.4975151 0.37259105 0.4632363
"
con <- textConnection(t1)
pvalmat <- read.table(con, row.names = NULL)
pvalmat.m <- melt(pvalmat)
colnames(pvalmat.m) <- c("Clinical_Variables", "Principal_Component", "pval")
pvalmat.m$colorcut <- cut(pvalmat.m$pval,breaks = c(-Inf,0.001, 0.01, 0.05, 0.1, Inf), right = FALSE)
p <- ggplot(pvalmat.m, aes(Principal_Component, Clinical_Variables)) + geom_tile(aes(fill = colorcut), colour = "white") +
scale_fill_manual(breaks=c("[-Inf, 0.001)", "[0.001, 0.01)", "[0.01, 0.05)",
"[0.05, 0.1)", "[0.1, Inf)"),
values = c("darkred", "red", "orange", "yellow", "gray"),
name="P-value", labels=c("< 0.001", "< 0.01", "< 0.05", "< 0.1", "> 0.1"),
drop=FALSE) +
labs(x="Principal Components", y="Clinical Variables")
Yet i receive only one category in the legend:
Although it shows the colors correctly, why it is not showing the legend with all categories?
Thanks!
You have already broken pval into discrete categories so you do not need to do so again in scale_fill_manual.
ggplot(pvalmat.m, aes(Principal_Component, Clinical_Variables)) +
geom_tile(aes(fill = colorcut), colour = "white") +
scale_fill_manual(values = c("darkred", "red", "orange", "yellow", "gray"),
drop = FALSE,
name="P-value",
labels=c("< 0.001", "< 0.01", "< 0.05", "< 0.1", "> 0.1"))

Resources