Hello I have a df such as :
tab
X molecule gene start_gene end_gene start_scaff end_scaff strand direction COL1 COL2
1 7 scaffold_1254 G7 6708 11967 1 20072 backward -1 10 20
2 5 scaffold_7638 G5 9567 10665 1 15336 backward -1 18 1
3 4 scaffold_7638 G4 3456 4479 1 15336 forward 1 18 1
4 2 scaffold_15158 G2 10105 10609 1 13487 backward -1 5 9
5 6 scaffold_8315 G6 2760 3849 1 10827 forward 1 25 7
6 3 scaffold_7180 G3 9814 10132 1 10155 backward -1 21 9
7 1 scaffold_74038 G1 1476 2010 1 2010 forward 1 8 34
so far with this code :
ggplot(tab, aes(x = start_scaff, xend = end_scaff,
y = molecule, yend = molecule)) +
geom_segment(size = 3, col = "grey80") +
geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
xend = ifelse(direction == 1, end_gene, start_gene)),
data = tab,
arrow = arrow(length = unit(0.1, "inches")), size = 2) +
geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
data = tab, nudge_y = 0.5,size=2) +
scale_y_discrete(limits = rev(levels(tab$molecule))) +
theme_minimal()
I mannaged to get this plot :
and I wondered if there were a way to add a text just next to geom_segment with COL1 and COL2 values and color the text depending on a threshold : green values > 10, red values <= 10
and get something like
dput(tab)
structure(list(X = c(7L, 5L, 4L, 2L, 6L, 3L, 1L), molecule = structure(c(1L,
5L, 5L, 2L, 6L, 3L, 4L), .Label = c("scaffold_1254", "scaffold_15158",
"scaffold_7180", "scaffold_74038", "scaffold_7638", "scaffold_8315"
), class = "factor"), gene = structure(c(7L, 5L, 4L, 2L, 6L,
3L, 1L), .Label = c("G1", "G2", "G3", "G4", "G5", "G6", "G7"), class = "factor"),
start_gene = c(6708L, 9567L, 3456L, 10105L, 2760L, 9814L,
1476L), end_gene = c(11967L, 10665L, 4479L, 10609L, 3849L,
10132L, 2010L), start_scaff = c(1L, 1L, 1L, 1L, 1L, 1L, 1L
), end_scaff = c(20072L, 15336L, 15336L, 13487L, 10827L,
10155L, 2010L), strand = structure(c(1L, 1L, 2L, 1L, 2L,
1L, 2L), .Label = c("backward", "forward"), class = "factor"),
direction = c(-1L, -1L, 1L, -1L, 1L, -1L, 1L), COL1 = c(10L,
18L, 18L, 5L, 25L, 21L, 8L), COL2 = c(20L, 1L, 1L, 9L, 7L,
9L, 34L)), class = "data.frame", row.names = c(NA, -7L))
An approximation would be
ggplot(tab, aes(x = start_scaff, xend = end_scaff,
y = molecule, yend = molecule)) +
geom_segment(size = 3, col = "grey80") +
geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
xend = ifelse(direction == 1, end_gene, start_gene)),
data = tab,
arrow = arrow(length = unit(0.1, "inches")), size = 2) +
geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
data = tab, nudge_y = 0.5,size=2) +
scale_y_discrete(limits = rev(levels(tab$molecule))) +
theme_minimal() +
geom_text(data = mutate(tab, COLr1 = COL1<10), aes(color = COLr1, label = COL1), position = position_nudge(x=20000)) +
geom_text(data = mutate(tab, COLr2 = COL2<10), aes(color = COLr2, label = COL2), position = position_nudge(x=22000)) +
geom_text(data = mutate(tab, txt = "-"), aes(label = txt), position = position_nudge(x=21100)) +
scale_color_manual(values = c("darkgreen", "red")) +
xlim(c(NA,23000)) +
theme(legend.position = "none")
Related
I need help in order to add colors to ggplot objects (specificaly geom_bar).
Here is my data
Names Family Groups Values
H.sapiens A G1 2
H.erectus A G1 6
H.erectus B G2 12
M.griseus C G2 3
A.mellifera D G3 3
L.niger D G3 8
H.erectus D G3 2
L.niger A G1 3
L.niger B G2 3
A.mellifera A G1 8
And so far I suceeded to create this plot :
with this code :
library(ggplot2)
library(ggstance)
library(ggthemes)
ggplot(table, aes(fill=Family, y=Names, x=Values)) +
geom_barh(stat="identity",colour="white")+ theme_minimal() +
scale_x_continuous(limits = c(0,60), expand = c(0, 0))
and now I would like to change the color depending of Groups. More precisely I would like to choose a major color for each group, for instance: G1= blue ; G2 = Green ; G3= Red.
and for each Family to get a gradient within these colors. For instance, B will be darkblue and C ligthblue.
Does someone have an idea, please ?
Here are the data :
dput(table)
structure(list(Names = structure(c(3L, 2L, 2L, 5L, 1L, 4L, 2L,
4L, 4L, 1L), .Label = c("A.mellifera", "H.erectus", "H.sapiens",
"L.niger", "M.griseus"), class = "factor"), Family = structure(c(1L,
1L, 2L, 3L, 4L, 4L, 4L, 1L, 2L, 1L), .Label = c("A", "B", "C",
"D"), class = "factor"), Groups = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 3L, 1L, 2L, 1L), .Label = c("G1", "G2", "G3"), class = "factor"),
Values = c(2L, 6L, 12L, 3L, 3L, 8L, 2L, 3L, 3L, 8L)), class = "data.frame", row.names = c(NA,
-10L))
You may perhaps tweak this one to suit your requirements (I have changed your sample data a bit to show you different gradient among same Group)
df <- read.table(header = T, text = "Names Family Groups Values
H.sapiens A G1 2
H.erectus B G1 6
H.erectus B G2 12
M.griseus C G2 3
A.mellifera D G3 3
L.niger D G3 8
H.erectus A G3 2
L.niger A G1 3
L.niger B G2 3
A.mellifera C G1 8")
library(tidyverse)
df %>% ggplot() +
geom_col(aes(x = Names, y = Values, fill = Groups, alpha = as.integer(as.factor(Family)))) +
coord_flip() +
scale_fill_manual(name = "Groups", values = c("blue", "green", 'red')) +
scale_alpha_continuous(name = "Family", range = c(0.2, 0.7)) +
theme_classic()
Created on 2021-06-12 by the reprex package (v2.0.0)
We can create range of colours for each Group then match on order of Family. You might need to play around with colours to make the difference more prominent:
cols <- lapply(list(G1 = c("darkblue", "lightblue"),
G2 = c("darkgreen", "lightgreen"),
G3 = c("red4", "red")),
function(i) colorRampPalette(i)(length(unique(table$Family))))
table$col <- mapply(function(g, i) cols[[ g ]][ i ],
g = table$Groups, i = as.numeric(table$Family))
ggplot(table, aes(x = Values, y = Names, fill = col )) +
geom_barh(stat = "identity", colour = "white") +
scale_x_continuous(limits = c(0, 60), expand = c(0, 0)) +
scale_fill_identity() +
theme_minimal()
I have to plot a 100% Stack chart with %age labels in ggplot2 . Attaching the code here with the data I am working with . The question may be a duplicate but have tried the solutions but giving errors or showing the same plot as below.
dput output :
structure(list(Category = structure(c(2L, 3L, 4L, 5L, 2L, 3L,
4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L), .Label = c("_",
"1", "2", "3", "4"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L), .Label = c("IL1_Flag", "IL2_Flag", "IL3_Flag", "IL4_Flag",
"IL5_Flag"), class = "factor"), value = c(21, 17, 16, 219, 20,
17, 15, 207, 20, 15, 15, 204, 20, 15, 15, 202, 20, 15, 15, 208
)), row.names = c(NA, -20L), .Names = c("Category", "variable",
"value"), class = "data.frame")
library(ggplot2)
library(Hmisc)
library(plyr)
library(reshape2)
library(dplyr)
library(scales)
library(tidyverse)
#Plotting the 100% Stacked Chart
ggplot() + geom_bar(
aes(y = value, x = Category, fill = variable),
data = agg_melt,
stat = "identity" ,
position = "fill"
) + scale_y_continuous(labels = percent) +labs(x="Category" , y= "Percentage (%)")
I have used this code to try to replace the values but it is not working.
agg_melt %>%
mutate(Percentage = value / sum(value)) %>%
ggplot(aes(
x = Category,
y = Percentage,
fill = `variable`,
label = paste0(round(Percentage * 1000), "%")
)) +
geom_bar(position = "fill",
color = "black" ,
stat = "identity") +
geom_text(position = position_fill(vjust = .5)) +
scale_y_continuous(labels = scales::percent_format())
Can anybody help in getting the percentage labels to them ?
You can calculate the percentages in your dataframe before passing the results to ggplot.
Here, I assume you want the percentage within each category (though if you want the overall percentage, just comment out the group_by() line):
library(dplyr)
agg_melt <- agg_melt %>%
group_by(Category) %>%
mutate(p = value / sum(value))
> head(agg_melt)
# A tibble: 6 x 4
# Groups: Category [4]
Category variable value p
<fctr> <fctr> <dbl> <dbl>
1 1 IL1_Flag 21.0 0.208
2 2 IL1_Flag 17.0 0.215
3 3 IL1_Flag 16.0 0.211
4 4 IL1_Flag 219 0.211
5 1 IL2_Flag 20.0 0.198
6 2 IL2_Flag 17.0 0.215
Pass this modified dataframe to ggplot. You can also specify the common aesthetic parameters in the top level ggplot() rather than inside individual geoms:
ggplot(data = agg_melt,
aes(y = value,
x = Category,
label = percent(p),
fill = variable)) +
geom_col(position = "fill") +
geom_text(position = position_fill(vjust = 0.5)) +
scale_y_continuous(labels = percent) +
labs(x = "Category",
y = "Percentage (%)")
I am plotting the following data using geom_tile and geom_textin ggplot2
mydf
Var1 Var2 dc1 bin
1 H G 0.93333333 0
2 G H 0.06666667 1
3 I G 0.80000000 0
4 G I 0.20000000 1
5 J G 0.33333333 1
6 G J 0.66666667 0
7 K G 0.57894737 1
8 G K 0.42105263 0
9 I H 0.80000000 0
10 H I 0.20000000 1
11 J H 0.25000000 0
12 H J 0.75000000 1
13 K H 0.20000000 0
14 H K 0.80000000 1
15 J I 0.12500000 0
16 I J 0.87500000 1
17 K I 0.32000000 0
18 I K 0.68000000 1
19 K J 0.28571429 0
20 J K 0.71428571 1
I am plotting 'Var1' vs 'Var2', and then using the 'bin' variable as my geom_text. Currently, I have filled each tile based upon scale_fill_gradient using the variable 'dc1'.
### Plotting
ggplot(mydf, aes(Var2, Var1, fill = dc1)) +
geom_tile(colour="gray20", size=1.5, family="bold", stat="identity", height=1, width=1) +
geom_text(data=mydf, aes(Var2, Var1, label = bin), color="black", size=rel(4.5)) +
scale_fill_gradient(low = "white", high = "firebrick3", space = "Lab", na.value = "gray20",
guide = "colourbar") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
xlab("") +
ylab("") +
theme(axis.text.x = element_text(vjust = 1),
axis.text.y = element_text(hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(fill=NA,color="gray20", size=0.5, linetype="solid"),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_text(color="white", size=rel(1.5)),
panel.background = element_rect(fill="gray20"),
plot.background = element_rect(fill="gray20"),
legend.position = "none"
)
Which gives this:
What I am trying to do (unsuccessfully) is to make the fill conditional upon the 'bin' variable. If bin==1then I would like to fill according to 'dc1'. If bin==0 then I would like to fill with 'white'.
This would give the following which I have manually created as an example desired plot:
I tried messing around with scale_fill_gradient to try and introduce a second fill option, but cannot seem to figure this out. Thanks for any help/pointers.
This is the dput for mydf:
structure(list(Var1 = structure(c(4L, 5L, 3L, 5L, 2L, 5L, 1L,
5L, 3L, 4L, 2L, 4L, 1L, 4L, 2L, 3L, 1L, 3L, 1L, 2L), .Label = c("K",
"J", "I", "H", "G"), class = "factor"), Var2 = structure(c(1L,
2L, 1L, 3L, 1L, 4L, 1L, 5L, 2L, 3L, 2L, 4L, 2L, 5L, 3L, 4L, 3L,
5L, 4L, 5L), .Label = c("G", "H", "I", "J", "K"), class = "factor"),
dc1 = c(0.933333333333333, 0.0666666666666667, 0.8, 0.2,
0.333333333333333, 0.666666666666667, 0.578947368421053,
0.421052631578947, 0.8, 0.2, 0.25, 0.75, 0.2, 0.8, 0.125,
0.875, 0.32, 0.68, 0.285714285714286, 0.714285714285714),
bin = c(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1)), .Names = c("Var1", "Var2", "dc1", "bin"), row.names = c(NA,
-20L), class = "data.frame")
Perhaps replace fill = dc1 with fill = dc1 * bin? A stripped-down version of your code:
ggplot(data = mydf, aes(x = Var2, y = Var1, fill = dc1 * bin, label = bin)) +
geom_tile() +
geom_text() +
scale_fill_gradient(low = "white", high = "firebrick3")
I have two types of data that looks like this:
Type 1 (http://dpaste.com/1697615/plain/)
Cluster-6 abTcells 1456.74119
Cluster-6 Macrophages 5656.38478
Cluster-6 Monocytes 4415.69078
Cluster-6 StemCells 1752.11026
Cluster-6 Bcells 1869.37056
Cluster-6 gdTCells 1511.35291
Cluster-6 NKCells 1412.61504
Cluster-6 DendriticCells 3326.87741
Cluster-6 StromalCells 2008.20603
Cluster-6 Neutrophils 12867.50224
Cluster-3 abTcells 471.67118
Cluster-3 Macrophages 1000.98164
Cluster-3 Monocytes 712.92273
Cluster-3 StemCells 557.88648
Cluster-3 Bcells 599.94109
Cluster-3 gdTCells 492.61994
Cluster-3 NKCells 524.42522
Cluster-3 DendriticCells 647.28811
Cluster-3 StromalCells 876.27875
Cluster-3 Neutrophils 1025.24105
And type two, (http://dpaste.com/1697602/plain/).
These values are identical with Cluster-6 in type 1 above:
abTcells 1456.74119
Macrophages 5656.38478
Monocytes 4415.69078
StemCells 1752.11026
Bcells 1869.37056
gdTCells 1511.35291
NKCells 1412.61504
DendriticCells 3326.87741
StromalCells 2008.20603
Neutrophils 12867.50224
But why when dealing with type 1 data with this code:
library(ggplot2);
library(RColorBrewer);
filcol <- brewer.pal(10, "Set3")
dat <- read.table("http://dpaste.com/1697615/plain/")
ggplot(dat,aes(x=factor(1),y=dat$V3,fill=dat$V2))+
facet_wrap(~V1)+
xlab("") +
ylab("") +
geom_bar(width=1,stat="identity",position = "fill") +
scale_fill_manual(values = filcol,guide = guide_legend(title = "")) +
coord_polar(theta="y")+
theme(strip.text.x = element_text(size = 8, colour = "black", angle = 0))
Ready data:
> dput(dat)
structure(list(V1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cluster-3",
"Cluster-6"), class = "factor"), V2 = structure(c(1L, 5L, 6L,
9L, 2L, 4L, 8L, 3L, 10L, 7L, 1L, 5L, 6L, 9L, 2L, 4L, 8L, 3L,
10L, 7L), .Label = c("abTcells", "Bcells", "DendriticCells",
"gdTCells", "Macrophages", "Monocytes", "Neutrophils", "NKCells",
"StemCells", "StromalCells"), class = "factor"), V3 = c(1456.74119,
5656.38478, 4415.69078, 1752.11026, 1869.37056, 1511.35291, 1412.61504,
3326.87741, 2008.20603, 12867.50224, 471.67118, 1000.98164, 712.92273,
557.88648, 599.94109, 492.61994, 524.42522, 647.28811, 876.27875,
1025.24105)), .Names = c("V1", "V2", "V3"), class = "data.frame", row.names = c(NA,
-20L))
Generated this following figures:
Notice that the Facet label is misplaced, Cluster-3 should be Cluster-6,
where Neutrophils takes larger proportions.
How can I resolve the problem?
When dealing with type 2 data have no problem at all.
library(ggplot2)
df <- read.table("http://dpaste.com/1697602/plain/");
library(RColorBrewer);
filcol <- brewer.pal(10, "Set3")
ggplot(df,aes(x=factor(1),y=V2,fill=V1))+
geom_bar(width=1,stat="identity")+coord_polar(theta="y")+
theme(axis.title = element_blank())+
scale_fill_manual(values = filcol,guide = guide_legend(title = "")) +
theme(strip.text.x = element_text(size = 8, colour = "black", angle = 0))
Ready data:
> dput(df)
structure(list(V1 = structure(c(1L, 5L, 6L, 9L, 2L, 4L, 8L, 3L,
10L, 7L), .Label = c("abTcells", "Bcells", "DendriticCells",
"gdTCells", "Macrophages", "Monocytes", "Neutrophils", "NKCells",
"StemCells", "StromalCells"), class = "factor"), V2 = c(1456.74119,
5656.38478, 4415.69078, 1752.11026, 1869.37056, 1511.35291, 1412.61504,
3326.87741, 2008.20603, 12867.50224)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA,
-10L))
It's because you use the data frame name in aes(...). This fixes the problem.
ggplot(dat,aes(x=factor(1),y=V3,fill=V2))+
facet_wrap(~V1)+
xlab("") +
ylab("") +
geom_bar(width=1,stat="identity",position = "fill") +
scale_fill_manual(values = filcol,guide = guide_legend(title = "")) +
coord_polar(theta="y")+
theme(strip.text.x = element_text(size = 8, colour = "black", angle = 0))
In defining the facets, you reference V1 in the context of the default dataset, and ggplot sorts alphabetically by level (so "Cluster-3" comes first). In your call to aes(...) you reference dat$V3 directly, so ggplot goes out of the context of the default dataset to the original dataframe. There, Cluster-6 is first.
As a general comment, one should never reference data in aes(...) outside the context of the dataset defined with data=.... So:
ggplot(data=dat, aes(y=V3...)) # good
ggplot(data=dat, aes(y=dat$V3...)) # bad
Your problem is a perfect example of why the second option is bad.
I have written the following code:
library(ggplot2)
data <- structure(list(x = c(1L, 6L, 3L, 4L, 2L, 3L, 6L, 1L, 5L, 2L,
1L, 5L), y = c(1L, 7L, 5L, 6L, 3L, 4L, 6L, 2L, 5L, 6L, 5L, 2L
), year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("2010", "2011"), class = "factor"), matching = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("person1",
"person2", "person3", "person4", "person5", "person6"), class = "factor")), .Names = c("x",
"y", "year", "matching"), row.names = c(NA, -12L), class = "data.frame")
data$year <- factor(data$year)
colors <- c("#4cb5ee", "#a0d099", "red")
p <- ggplot(data, aes(x=x, y=y)) +
geom_point(aes(colour=year), shape=16, size=6) +
geom_line(aes(group=matching), arrow=arrow(length=unit(0.15,"cm")), colour="black", size=1) +
xlab("x") + ylab("y") +
scale_colour_manual("year", values=colors) +
scale_x_continuous(limits=c(1,7), breaks=seq(1,7, by=1)) +
scale_y_continuous(limits=c(1,7), breaks=seq(1,7, by=1))
print(p)
It gives the following output:
But what I want geom_line() to do is: always points at the point where year=2011. I can't figure out why the arrow of the line is point sometimes at a point which refers to year=2010 and sometimes points at a point where year=2011.
What I found out is that arrow takes several arguments:
arrow(angle = 30, length = unit(0.25, "inches"), ends = "last", type = "open")
So that I could say ends="first". But I can't generalize that ends is always first or always last.
I tried to add a column to my data.frame which has the information if the arrow should end first or last, but it didn't gives me the output I wanted.
Every help is highly appreciated :-)
Thanks in advance!
geom_path should do the trick:
p <- ggplot(data, aes(x=x, y=y)) +
geom_point(aes(colour=year), shape=16, size=6) +
geom_path(aes(group=matching),
arrow=arrow(length=unit(0.15,"cm")),
colour="black", size=1) +
xlab("x") + ylab("y") +
scale_colour_manual("year", values=colors) +
scale_x_continuous(limits=c(1,7), breaks=seq(1,7, by=1)) +
scale_y_continuous(limits=c(1,7), breaks=seq(1,7, by=1))
print(p)
There is probably a more efficient way to do this, but one approach is to use geom_segment() instead of geom_line(). This will allow you to specify the beginning and ending points of the line with ease. We have to restructure the data so that we can specify x, y, xend, and yend. I will restructure with merge, though you could probably do this with cast or reshape.
zz <- merge(data[data$year == 2010 ,], data[data$year == 2011 ,]
, by = "matching", suffixes = 1:2)
matching x1 y1 year1 x2 y2 year2
1 person1 1 1 2010 6 6 2011
2 person2 6 7 2010 1 2 2011
3 person3 3 5 2010 5 5 2011
4 person4 4 6 2010 2 6 2011
5 person5 2 3 2010 1 5 2011
6 person6 3 4 2010 5 2 2011
We will then use two datasets in our call to ggplot:
ggplot() + #Blank call to ggplot
geom_point(data = data, aes(x=x, y=y, colour=year), shape=16, size=6) + #Points
geom_segment(data = zz, aes(x = x1, y = y1, xend = x2, yend = y2), #Segments
arrow = arrow(length = unit(0.15, "cm")), colour = "black", size = 1) +
xlab("x") + ylab("y") +
scale_colour_manual("year", values=colors) +
scale_x_continuous(limits=c(1,7), breaks=seq(1,7, by=1)) +
scale_y_continuous(limits=c(1,7), breaks=seq(1,7, by=1))