ggplot label color category - r

I am working in ggplot and I would like to manually change the fill color of my labels according to a factor. There is a column called 'Gene' and it works when I use fill=factor(Gene) but I would like to chose my colors, since I have two graphs and now the "Genes" are colored differently in the two graphs.
My code looks like this:
dat<- read.delim("SCN.study_Pvariants_42.txt",sep="\t",check.names=F)
ggplot(dat, aes(x = dat$Index, y = as.numeric(dat$Parazscore))) +
geom_bar(stat = "identity", fill = "darkcyan") +
theme_bw() +
labs(title="SCN Paralog conservation",
y="Parazscore",
x="aminoacid sequence") +
geom_label_repel(aes(label=dat$'Variant',fill=factor(Gene)))
I changed it with using the scale_color_manual function and adding a vector cols:
dat<- read.delim("SCN.study_Pvariants_42.txt",sep="\t",check.names=F)
cols <- c("GENE1"="lightblue","GENE2"="green","GENE3"="yellow",etc)
ggplot(dat, aes(x = dat$Index, y = as.numeric(dat$Parazscore))) +
geom_bar(stat = "identity", fill = "darkcyan") +
theme_bw() +
labs(title="SCN Paralog conservation",
y="Parazscore",
x="aminoacid sequence") +
geom_label_repel(aes(label=dat$'Variant', fill=factor(Gene))) +
scale_color_manual(values=cols)
But regrettably, this does not work.
Could someone help me to solve this?
Thank you,
Anne

Related

ggplot reorder factors in plot without affecting legend order [duplicate]

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Grouped Barplot, One numerical vs three factorial variables

I'm having issues with the following. I need to barplot 3 factor variables vs 1 numerical variable.
My dataset:
Site,Gall,Status,Count
Site1,absent,unhealthy,35
Site1,absent,healthy,1750
Site1,present,unhealthy,23
Site1,present,healthy,1146
Site2,absent,unhealthy,146
Site2,absent,healthy,1642
Site2,present,unhealthy,30
Site2,present,healthy,333
I have tried using ggplot, but then it only lets me define x, y, and one more option, so I have used fill=Gall.
My code looks as following, I am still missing one factor variable.
ggplot(dat, aes(Status, Count, fill = Gall)) +
geom_bar(stat = "identity", position = "dodge")
Can anyone help me please ?
Thank you, much appreciated
There are a couple of solutions. If you are intent on filling by two factors, you can use interaction:
ggplot(dat, aes(Status, Count)) +
geom_col(aes(fill = interaction(Site, Gall)), position = "dodge")
In general though, it's better to use faceting for multiple factors. For example:
ggplot(dat, aes(Status, Count)) +
geom_col(aes(fill = Gall), position = "dodge") + facet_grid(Site ~ .)
You might be better off with points instead of bars. For example:
library(dplyr)
library(ggplot2)
ggplot(dat %>% mutate(Site = gsub("([0-9]$)", " \\1", Site)),
aes(Status, Count, colour=Status, shape=Gall)) +
geom_point(size=3, position=position_dodge(0.5), stroke=1) +
facet_grid(~ Site, switch="x") +
theme_classic() +
theme(strip.placement = "outside",
strip.background=element_blank()) +
scale_colour_manual(values=hcl(c(195,15),100,65)) +
scale_shape_manual(values=c(1,16)) +
labs(x="") +
guides(colour=FALSE)

Stacked Bar Plot for Temperature vs Home Runs

I am trying to make some changes to my plot, but am having difficulty doing so.
(1) I would like warm, avg, and cold to be filled in as the colors red, yellow, and blue, respectively.
(2) I am trying to make the y-axis read "Count" and have it be horizontally written.
(3) In the legend, I would like the title to be Temperatures, rather than variable
Any help making these changes would be much appreciated along with other suggestions to make the plot look nicer.
df <- read.table(textConnection(
'Statistic Warm Avg Cold
Homers(Away) 1.151 1.028 .841
Homers(Home) 1.202 1.058 .949'), header = TRUE)
library(ggplot2)
library(reshape2)
df <- melt(df, id = 'Statistic')
ggplot(
data = df,
aes(
y = value,
x = Statistic,
group = variable,
shape = variable,
fill = variable
)
) +
geom_bar(stat = "identity")
You are on the right lines by trying to reshape the data into long format. My preference is to use gather from the tidyr package for that. You can also create the variable names Temperatures and Count in the gather step.
The next step is to turn the 3 classes of temperature into a factor, ordered from cold, through average, to warm.
Now you can plot. You want position = "dodge" to get the bars side by side, since it makes no sense to stack the values in a single bar. Fill colours you specify using scale_fill_manual.
You rotate the y-axis title by manipulating axis.title.y.
So putting all of that together (plus a black/white theme):
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
gather(Temperatures, Count, -Statistic) %>%
mutate(Temperatures = factor(Temperatures, c("Cold", "Avg", "Warm"))) %>%
ggplot(aes(Statistic, Count)) +
geom_col(aes(fill = Temperatures), position = "dodge") +
scale_fill_manual(values = c("blue", "yellow", "red")) +
theme_bw() +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5))
Result:
I'd question whether Count is a sensible variable name in this case.
You are almost there. To map specific colors to specific factor levels you can use scale_fill_manual and create your own scale:
scale_fill_manual(values=c("Warm"="red", "Avg"="yellow", "Cold"="blue")) +
Changing the y axis legend is also easy in ggplot:
ylab("Count") +
And to change the legend title you can use:
labs(fill='TEMPERATURE') +
Giving us:
ggplot(df, aes(y = value, x = Statistic, group= variable, fill = variable)) +
geom_bar(stat = "identity") +
scale_fill_manual(values=c("Warm"="red", "Avg"="yellow", "Cold"="blue")) +
labs(fill='TEMPERATURE') +
ylab("Count") +
xlab("") +
theme_bw() +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5))

Color outliers multiple factors in boxplot

Let's say I have the following data frame:
library(ggplot2)
set.seed(101)
n=10
df<- data.frame(delta=rep(rep(c(0.1,0.2,0.3),each=3),n), metric=rep(rep(c('P','R','C'),3),n),value=rnorm(9*n, 0.0, 1.0))
My goal is to do a boxplot by multiple factors:
p<- ggplot(data = df, aes(x = factor(delta), y = value)) +
geom_boxplot(aes(fill=factor(metric)))
The output is:
So far so good, but if I do:
p+ geom_point(aes(color = factor(metric)))
I get:
I do not know what it is doing. My goal is to color the outliers as it is done here. Note that this solution changes the inside color of the boxes to white and set the border to different colors. I want to keep the same color of the boxes while having the outliers inherit those colors. I want to know how to make the outliers get the same colors from their respective boxplots.
Do you want just to change the outliers' colour ? If so, you can do it easily by drawing boxplot twice.
p <- ggplot(data = df, aes(x = factor(delta), y = value)) +
geom_boxplot(aes(colour=factor(metric))) +
geom_boxplot(aes(fill=factor(metric)), outlier.colour = NA)
# outlier.shape = 21 # if you want a boarder
[EDITED]
colss <- c(P="firebrick3",R="skyblue", C="mediumseagreen")
p + scale_colour_manual(values = colss) + # outliers colours
scale_fill_manual(values = colss) # boxes colours
# the development version (2.1.0.9001)'s geom_boxplot() has an argument outlier.fill,
# so I guess under code would return the similar output in the near future.
p2 <- ggplot(data = df, aes(x = factor(delta), y = value)) +
geom_boxplot(aes(fill=factor(metric)), outlier.shape = 21, outlier.colour = NA)
Maybe this:
ggplot(data = df, aes(x = as.factor(delta), y = value,fill=as.factor(metric))) +
geom_boxplot(outlier.size = 1)+ geom_point(pch = 21,position=position_jitterdodge(jitter.width=0))

Sort legend in ggplot2

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Resources