Remove shape from legend of combined geom_line() and geom_pont() - r

I want to create a graph of geom_line() coloured by a variable (Var1) then plot geom_point() with shapes according to a different variable (Var2) with the same colours as geom_line().
After reading a lot about this but not being able to find anything that I could interpret as being the same issue I have attempted the following:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, colour = Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
Which results in the above. The issue with this graph is that the second legend has both IDs are circles when one is a circle and one is a triangle. I would ideally like it to just be a coloured line with no shapes at all.
I've also tried this:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
This issue with this graph is that the shapes are not filled in by colour in the graph.
This is my data.
dput(data)
structure(list(X = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L), Y = c(1L, 1L, 1L, 2L, 4L, 13L, 18L, 19L,
21L, 24L, 34L, 43L, 70L, 90L, 129L, 169L, 1L, 3L, 3L, 3L, 3L,
4L, 21L, 79L, 157L, 229L, 323L, 470L, 655L, 889L, 1128L, 1701L,
2036L, 2502L, 3089L, 3858L, 4636L, 5883L, 7375L, 9172L, 10149L
), Var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("",
"ID1", "ID2"), class = "factor"), Var2 = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 2L, 2L), .Label = c("", "0", "Point1", "Point2"
), class = "factor")), row.names = c(NA, -41L), class = "data.frame")

How about this
ggplot(data, aes(X, Y))+
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, color=Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()+
guides(colour = guide_legend(override.aes = list(shape = NA)))

Related

Cannot plot the correct x-axis in ggplot2

I am plotting the following data using ggplot2 in R.
dat<-structure(list(Month = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L), grp1 = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("(-Inf,2]", "(2,7]", "(7,14]",
"(14, Inf]"), class = "factor"), n = c(71L, 59L, 36L, 10L, 55L,
73L, 18L, 10L, 97L, 82L, 22L, 5L, 120L, 79L, 15L, 2L, 140L, 62L,
15L, 174L, 60L, 11L, 188L, 71L, 2L, 183L, 53L, 2L, 211L, 50L,
2L, 171L, 69L, 7L, 1L, 98L, 85L, 13L, 6L, 72L, 62L, 24L, 9L)), class
= "data.frame", row.names = c(NA,-43L))
Here's my script:
library(ggplot2)
p<-ggplot(data=test,aes(Month, n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
Here's the output:
Why is it that I cannot plot the x-axis values?
If I don't set the scale_x_discrete, the plot will look like this:
Any ideas on how to solve this?
I'll appreciate any help.
If you want the Month name along the xaxis, then you can add in as.factor(Month) to your ggplot script. Heres an example:-
p<-ggplot(data=dat,aes(as.factor(Month), n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
p
Which gives you this:-

ggarrange() function overvrites the color of my boxplots

I am making two boxplots and want to arrange them beside each other. I have made each of them look like I want when displaying them separately but when I use ggarrange() the colors disappear. This is my code for the plots:
BOX1_data <- read.table(file = "clipboard",
sep = "\t", header=TRUE)
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
BOX2_data <- read.table(file = "clipboard",
sep = "\t", header=TRUE)
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
To arrange the plots I then write:
BOX_list <- list(BOX1plot, BOX2plot)
ggarrange(plotlist = BOX_list, labels = c('A', 'B'), ncol = 2)
The easiest way of getting rid of gridlines etc I thought was by using theme_set() and I think that this might be my problem.
My code is:
theme_set(theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "grey")))
I realize that theme_bw() overwrites my colors in the boxes. But I have tried removing it, switching it for theme_transparent() (this removes all my labels) and neither works. I have searched for a way of just adding a transparency to my boxes in the theme so that my colors will shine through. I am also suspicious that maybe the palette that I chose might give me the same colors in the two plots which I also do not want. To add, if it matters, I have 4 groups in the first plot and 2 in the second.
dput(BOX1_data)
structure(list(Diagnosis = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
No.Variants = c(3L, 4L, 6L, 14L, 3L, 3L, 4L, 3L, 3L, 3L,
8L, 6L, 22L, 10L, 6L, 9L, 1L, 9L, 3L, 4L, 8L, 2L, 13L, 3L,
11L, 19L, 5L, 5L, 3L, 12L, 4L, 2L, 4L, 18L, 8L, 7L, 7L, 12L,
4L, 1L, 6L, 3L, 2L, 8L, 10L, 3L, 15L, 9L, 13L, 13L, 15L,
10L, 10L, 12L, 6L, 3L, 12L, 9L, 15L, 10L, 18L, 3L, 6L, 3L,
6L, 1L, 3L, 3L, 7L, 1L, 2L, 10L, 7L, 7L, 1L, 0L, 2L)), row.names = c(NA,
-77L), class = "data.frame")
dput(BOX2_data)
structure(list(No.Variants = c(3L, 4L, 6L, 14L, 3L, 3L, 4L, 3L,
3L, 3L, 8L, 6L, 22L, 10L, 6L, 9L, 1L, 9L, 3L, 4L, 8L, 2L, 13L,
3L, 11L, 19L, 5L, 5L, 3L, 12L, 4L, 2L, 4L, 18L, 8L, 7L, 7L, 12L,
4L, 1L, 6L, 3L, 2L, 8L, 10L, 3L, 15L, 9L, 13L, 13L, 15L, 10L,
10L, 12L, 6L, 3L, 12L, 9L, 15L, 10L, 18L), Stage = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor")), row.names = c(NA, -61L), class = "data.frame")
Grateful for any tips!
As already pointed out, it seems the OP's issue with theme_set() removing the fill colors set in your two plots was solved by updating to a new version of ggplot2. Herein, I have a solution for the second part of OP's question (that was clarified in the comments). Represented here for convenience:
Now it is just the problem that I want the palette to continue on the second plot's boxes and not restart so that I will get different colors on all boxes.
In order to do this, one has to realize that there are 4 fill colors for the first plot BOX1plot, and 2 fill colors for BOX2plot. For BOX1plot, we want the color palette to begin at the first color, but for BOX2plot, we want the palette to start on the 5th color sequence in the palette. There's no way to do this through the scale_*_brewer() functions, so the approach here will be to access the Brewer palette from RcolorBrewer::brewer.pal(), and then assign where to begin and end in that sequence based on the number of levels of each factor using scale_fill_manual() to just set the color values from the extracted Brewer color palette.
You can just "know" that you need to "use colors 1-4" for BOX1plot and "use color 5 and 6" for BOX2plot; however, it is much more elegant to just calculate this automatically based on the number of levels (in case you want to run this again). The code below does this:
library(ggplot2)
library(ggpubr)
library(RColorBrewer)
# ... read in your data as before
# create factors (as OP did before)
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
# make color palette based on Brewer "Dark2" palette
lev_diag <- length(levels(BOX1_data$Diagnosis))
lev_stage <- length(levels(BOX2_data$Stage))
lev_total <- lev_diag + lev_stage
my_colors <- brewer.pal(lev_total, "Dark2")
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_manual(values=my_colors[1:lev_diag]) +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_manual(values = my_colors[(lev_diag+1):lev_total]) +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
BOX_list <- list(BOX1plot, BOX2plot)
ggarrange(plotlist = BOX_list, labels = c('A', 'B'), ncol = 2)
If you have issues with ggarrange() I would suggest next approach using patchwork:
library(ggplot2)
library(patchwork)
#Data format
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
#Plot 1
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
#Data format
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
#Plot 2
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
#Arrange plots
BOX1plot+BOX2plot+plot_annotation(tag_levels = 'A')
The output:

Remove three sides of border around ggplot facet strip label

I have the following graph:
And would like to make what I thought would be a very simple change: I would like to remove the top, right and bottom sides of the left facet label border lines.
How do I do I remove those lines, or draw the equivalent of the right hand lines? I would rather not muck about with grobs, if possible, but won't say no to any solution that works.
Graph code:
library(ggplot2)
library(dplyr)
library(forcats)
posthoc1 %>%
mutate(ordering = -as.numeric(Dataset) + Test.stat,
Species2 = fct_reorder(Species2, ordering, .desc = F)) %>%
ggplot(aes(x=Coef, y=Species2, reorder(Coef, Taxa), group=Species2, colour=Taxa)) +
geom_point(size=posthoc1$Test.stat*.25, show.legend = FALSE) +
ylab("") +
theme_classic(base_size = 20) +
facet_grid(Taxa~Dataset, scales = "free_y", space = "free_y", switch = "y") +
geom_vline(xintercept = 0) +
theme(axis.text.x=element_text(colour = "black"),
strip.placement = "outside",
strip.background.x=element_rect(color = NA, fill=NA),
strip.background.y=element_rect(color = "black", fill=NA)) +
coord_cartesian(clip = "off") +
scale_x_continuous(limits=NULL)
Data:
structure(list(Dataset = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("All.habitat", "Aut.habitat", "Habitat.season",
"Lit.season", "Spr.habitat"), class = "factor"), Species = structure(c(1L,
2L, 3L, 5L, 6L, 10L, 11L, 12L, 13L, 1L, 3L, 5L, 6L, 13L, 1L,
2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L), .Label = c("Ar.sp1",
"Ar.sp2", "Arc.sp1", "B.pus", "Dal.sp1.bumps", "Dip.unID", "I.palladium",
"Pale", "Ph.sp3", "Port", "Somethus", "sty", "Sty.sp1"), class = "factor"),
Species2 = structure(c(2L, 9L, 1L, 4L, 5L, 7L, 11L, 12L,
13L, 2L, 1L, 4L, 5L, 13L, 2L, 9L, 4L, 5L, 6L, 10L, 8L, 7L,
11L, 13L), .Label = c("Arcitalitrus sp1", "Armadillidae sp1 ",
"Brachyiulus pusillus ", "Dalodesmidae sp1", "Diplopoda",
"Isocladosoma pallidulum ", "Ommatoiulus moreleti ", "Philosciidae sp2",
"Porcellionidae sp1", "Siphonotidae sp2", "Somethus sp1",
"Styloniscidae ", "Styloniscidae sp1"), class = "factor"),
Taxa = structure(c(3L, 3L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L), .Label = c("Amphipoda",
"Diplopoda", "Isopoda"), class = "factor"), Variable = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Autumn", "Litter",
"Spring", "Summer"), class = "factor"), Coef = c(1.911502938,
2.086917154, 1.571872993, 12.61184801, 15.6161116, -1.430032837,
-12.51944478, 12.33934516, -8.040249562, 8.08258816, 1.780142396,
12.88982576, 16.78107544, -13.22641153, 1.68810887, 2.093965381,
12.27209197, 15.08328526, -6.334640911, -11.29985948, -11.62658947,
-1.676293808, -6.246555908, -3.470297147), SE = c(0.403497472,
2.21607562, 0.348600794, 2.423896379, 0.509468128, 3.423013791,
2.382857733, 1.775086895, 2.087788334, 2.23631504, 0.33402261,
2.518562443, 0.459720131, 1.950974996, 0.2476205, 0.235648095,
1.815155489, 0.325804415, 2.564680067, 2.437104984, 2.212583358,
2.677618401, 2.324019051, 0.420436743), Test.stat = c(18.36532749,
13.27324683, 13.29039037, 20.50277493, 44.06097153, 10.55234932,
14.64951518, 13.22575401, 20.16415411, 16.55627107, 11.81407568,
15.15213717, 40.67205188, 12.62233207, 37.60085488, 16.90879258,
20.20215107, 80.30520371, 13.35250626, 13.01692428, 17.52987519,
20.03658771, 12.02467914, 53.5052683)), row.names = 10:33, class = "data.frame")
This solution is based on grobs: find positions of "strip-l" (left strips) and then substitute the rect grobs with line grobs.
p <- posthoc1 %>%
mutate(ordering = -as.numeric(Dataset) + Test.stat,
Species2 = fct_reorder(Species2, ordering, .desc = F)) %>%
ggplot(aes(x=Coef, y=Species2, reorder(Coef, Taxa), group=Species2, colour=Taxa)) +
geom_point(size=posthoc1$Test.stat*.25, show.legend = FALSE) +
ylab("") +
theme_classic(base_size = 20) +
facet_grid(Taxa~Dataset, scales = "free_y", space = "free_y", switch = "y") +
geom_vline(xintercept = 0) +
theme(axis.text.x=element_text(colour = "black"),
strip.placement = "outside",
#strip.background.x=element_rect(color = "white", fill=NULL),
strip.background.y=element_rect(color = NA)
) +
coord_cartesian(clip = "off") +
scale_x_continuous(limits=NULL)
library(grid)
q <- ggplotGrob(p)
lg <- linesGrob(x=unit(c(0,0),"npc"), y=unit(c(0,1),"npc"),
gp=gpar(col="red", lwd=4))
for (k in grep("strip-l",q$layout$name)) {
q$grobs[[k]]$grobs[[1]]$children[[1]] <- lg
}
grid.draw(q)

Why does ggtern distort data

I can't work out why my data points in the ternary diagram appear distorted, particularly visible in Fe02 scale where none of the values approaching 50% seem to be plotting correctly. Does ggtern require some data transformation or am I missing something?
The dataset:
KiDaSm<-structure(list(Site = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Dakawa", "Fukuchani",
"Kilwa", "Mkokotoni", "Tumbe Chwaka", "Unguja Ukuu"), class = "factor"),
Sample = structure(c(7L, 8L, 9L, 10L, 11L, 14L, 15L, 16L,
17L, 19L, 20L, 21L, 23L, 24L, 25L, 26L), .Label = c("EB005",
"EB008", "EB009", "EB017", "EB018", "EB023", "EB028", "EB030",
"EB033", "EB034", "EB035", "EB036", "EB037", "EB038", "EB040",
"EBDAK002", "EBDAK006", "EBDAK007", "EBDAK009", "EBDAK012",
"EBDAK014", "EBDAK015", "EBDAK017", "EBDAK020", "EBDAK021",
"EBDAK022", "FKCH002", "FKCH003", "FKCH005", "FKCH006", "FKCH008",
"FKCH009", "FKCH010", "FKCH012", "FKCH014", "FKCH015", "FKCH016",
"FKCH017", "FKCH018", "FKCH019", "FKCH023", "MKK002", "MKK003",
"MKK007", "MKK009", "MKK011", "MKK013", "MKK014", "MKK017",
"MKK018", "MKK020", "MKK06", "TBCH001", "TBCH002", "TBCH003",
"TBCH005", "TBCH007", "TBCH008", "TBCH009", "TBCH010", "TBCH011",
"TBCH014", "TBCH017", "TBCH018", "TBCH021", "TBCH022", "UU001",
"UU003", "UU004", "UU005", "UU007", "UU008", "UU010", "UU011",
"UU012", "UU014", "UU018", "UU020", "UU022", "UU023", "UU026",
"UU031", "UU033"), class = "factor"), ID = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("ND", "Smelting", "Smithing"), class = "factor"),
Iron = c(52.2866002788889, 57.437955161, 55.880450631, 50.213473286,
53.068958017, 55.776340727, 56.764639409, 61.37738424, 75.741474131,
75.459980082, 69.785922113, 76.298245515, 75.860464737, 77.221978734,
76.602317775, 67.582636787), Aluminium = c(8.07348620588889,
6.9369729006, 6.4314347298, 7.7061493869, 7.3254949831, 7.2108549156,
7.2113019865, 8.2022565362, 4.570137602, 4.3668232665, 5.8538177888,
4.5660791632, 4.2671637947, 4.727287541, 4.7084385736, 6.0287010895
), Silicon = c(24.6786504477778, 22.516695383, 24.261662172,
26.81463386, 25.558654883, 23.062108874, 23.144722305, 26.480492462,
17.138349267, 16.917779397, 19.620246624, 16.265818105, 17.628059944,
15.696017597, 15.786928218, 22.04500569)), .Names = c("Site",
"Sample", "ID", "Iron", "Aluminium", "Silicon"), row.names = c(NA,
-16L), class = "data.frame")
My code:
library(ggtern)
ggtern(KiDaSm, aes(Iron,Silicon, Aluminium, color=Site, shape=Site )) + geom_point() +
labs(x = expression(FeO[2]), y=expression(SiO[2]), z=expression(Al[2]*O[3])) +
scale_color_manual(values = c("#FFC300", "#FF5733")) +
theme_bw()
Ternary diagram:

how to avoid overlapping labels with identical data points in scatterplot / ggplot?

Is there any function etc which avoids overlapping data labels for identical data points in a scatter plot?
I have checked the various questions/responses to textxy, direct.label, and geom_text(), but I haven't been successful. Maybe it's simply not possible.
Here's a sample of the relevant data:
structure(list(cowc = structure(c(5L, 7L, 24L, 24L, 23L, 36L,
34L, 38L, 23L, 6L, 8L, 38L, 38L, 23L, 5L, 7L, 24L, 24L, 23L,
36L, 34L, 38L, 23L, 6L, 8L, 38L, 38L, 23L), .Label = c("AFG",
"ANG", "AZE", "BNG", "BOS", "BUI", "CAM", "CDI", "CHA", "COL",
"CRO", "DOM", "DRC", "ETH", "GNB", "GRG", "GUA", "IND", "INS",
"IRQ", "KEN", "LAO", "LBR", "LEB", "MAL", "MLD", "MZM", "NEP",
"NIC", "PHI", "PNG", "RUS", "RWA", "SAF", "SAL", "SIE", "SOM",
"SUD", "TAJ", "UKG", "YAR", "ZIM"), class = "factor"), conflict = c("Bosnia 92-95",
"Cambodia 70-91", "Lebanon 58-58", "Lebanon 75-89", "Liberia 89-93",
"SieLeo 91-96", "Stafrica 83-91", "Sudan 63-72", "Liberia 94-96",
"Burundi 1993-2005", "Cote d'Ivoire 2002-2007", "Darfur, Sudan 2003-2010",
"Sudan 83-05", "Liberia 1999-2003", "Bosnia 92-95", "Cambodia 70-91",
"Lebanon 58-58", "Lebanon 75-89", "Liberia 89-93", "SieLeo 91-96",
"Stafrica 83-91", "Sudan 63-72", "Liberia 94-96", "Burundi 1993-2005",
"Cote d'Ivoire 2002-2007", "Darfur, Sudan 2003-2010", "Sudan 83-05",
"Liberia 1999-2003"), totalps = c(3L, 2L, 2L, 2L, 1L, 3L, 4L,
3L, 1L, 3L, 3L, 4L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 3L, 4L, 3L, 1L,
3L, 3L, 4L, 3L, 3L), vetotype = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("strictvetos", "lenientvetos"
), class = "factor"), intensity = c(3L, 4L, 2L, 5L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 4L, 2L, 6L, 2L, 2L, 4L, 2L, 2L,
3L, 3L, 2L, 2L, 2L)), .Names = c("cowc", "conflict", "totalps",
"vetotype", "intensity"), class = "data.frame", row.names = c(NA,
-28L))
Here's my code:
vetotype.plot <- ggplot(vetotype.x, aes(x=totalps, y=intensity, color=conflict))+
geom_point() +
labs(x="number of power-sharing arenas", y="intensity") +
ggtitle("Number of Power-Sharing areas and Veto intensity") +
geom_text(aes(label=conflict),hjust=0, vjust=0, size=4)+
scale_x_continuous(limits=c(1, 5))+
theme(legend.position="none")+
facet_wrap(~vetotype, nrow=2)
plot(vetotype.plot)
And below is my graph. I manually highlighted those data points which are overlapping.
What I am looking for is an 'automatic' way to get the labels of the overlapping data points displayed in way so that they don't overlap. Is there any function for this purpose? Many thanks!
This is not a completely general solution, but it does seem to work in your case.
library(ggplot2)
# identify duplicated points
dupes <- aggregate(conflict~totalps+intensity+vetotype,vetotype.x,length)
colnames(dupes)[4] = "dupe"
df <- merge(vetotype.x,dupes) # add dupe column
df$vjust <- 0 # default vertical offset is 0
# calculate vertical offsets based on number of dupes
for (i in 2:max(df$dupe)) df[df$dupe==i,]$vjust<-seq(-trunc(i/2),-trunc(i/2)+i-1)
# render the plot
vetotype.plot <- ggplot(df, aes(x=totalps, y=intensity, color=conflict))+
geom_point() +
labs(x="number of power-sharing arenas", y="intensity") +
ggtitle("Number of Power-Sharing areas and Veto intensity") +
geom_text(aes(label=conflict,vjust=vjust), hjust=0,size=4)+
scale_x_continuous(limits=c(1, 5))+
scale_y_continuous(limits=c(1, 6))+
theme(legend.position="none")+
facet_wrap(~vetotype, nrow=2)
plot(vetotype.plot)
ggrepel can now do this easily:
https://twitter.com/slowkow/status/686341190749392896
Here's what your plot looks like with ggrepel:
library(ggrepel)
ggplot(vetotype.x, aes(x=totalps, y=intensity, color=conflict))+
geom_point() +
labs(x="number of power-sharing arenas", y="intensity") +
ggtitle("Number of Power-Sharing areas and Veto intensity") +
geom_text_repel(
aes(label=conflict), size=4, box.padding = unit(0.5, "lines")
)+
scale_x_continuous(limits=c(1, 5))+
theme(legend.position="none")+
facet_wrap(~vetotype, nrow=2)

Resources