I have a data way much bigger than this representative one
df<- structure(list(Pama1 = structure(c(2L, 5L, 3L, 5L, 3L, 3L, 3L,
3L, 3L, 4L, 1L), .Label = c("", "DD1", "n/a", "PAMANA", "zf"), class = "factor"),
X = structure(c(11L, 3L, 10L, 2L, 4L, 5L, 6L, 7L, 8L, 9L,
1L), .Label = c("", "116", "12", "138", "197", "219", "224",
"230", "280", "85", "Start1"), class = "factor"), X.1 = structure(c(11L,
10L, 2L, 4L, 3L, 5L, 8L, 6L, 7L, 9L, 1L), .Label = c("",
"101", "145", "199", "222", "227", "233", "238", "331", "89",
"End1"), class = "factor"), Pama2 = structure(c(2L, 4L, 4L,
4L, 3L, 4L, 4L, 6L, 5L, 1L, 1L), .Label = c("", "DD2", "GGTR",
"n/a", "PAMANA", "T_reg"), class = "factor"), X.2 = structure(c(9L,
2L, 2L, 8L, 3L, 4L, 5L, 6L, 7L, 1L, 1L), .Label = c("", "1",
"115", "208", "214", "232", "376", "85", "Start2"), class = "factor"),
X.3 = structure(c(10L, 8L, 2L, 9L, 3L, 4L, 5L, 6L, 7L, 1L,
1L), .Label = c("", "15", "195", "229", "231", "362", "577",
"76", "86", "End2"), class = "factor"), Pama3 = structure(c(1L,
3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("DD3",
"GGTR", "n/a"), class = "factor"), X.4 = structure(c(10L,
1L, 7L, 8L, 9L, 9L, 2L, 3L, 4L, 5L, 6L), .Label = c("1",
"129", "136", "153", "166", "178", "48", "65", "66", "Start1"
), class = "factor"), X.5 = structure(c(10L, 6L, 7L, 8L,
3L, 9L, 1L, 2L, 4L, 4L, 5L), .Label = c("131", "138", "144",
"168", "180", "34", "51", "70", "79", "End2"), class = "factor")), .Names = c("Pama1",
"X", "X.1", "Pama2", "X.2", "X.3", "Pama3", "X.4", "X.5"), class = "data.frame", row.names = c(NA,
-11L))
At first I put all starts and ends together and find the range
In this example it is 1 to 577
Then I want to plot or highlight the regions that there is a string
for example, something like this figure that I made
or even like this
The code below tidies up the data into a form suitable for plotting and then uses geom_segment to lay out the sequences. To tidy the data, we want each column to be a variable and each row to be an observation.
library(tidyverse)
## Clean up data frame and convert to long form
df = map_df(seq(1,ncol(df),3), # Turn each group of three columns into separate data frames that we'll stack into long format
~ setNames(df[-1,.x:(.x+2)], c("DD","Start","End")), # Column names appear to be in the first data row, so we'll remove this row and provide new column names
.id="Pama") %>% # This line and next add a "Pama" column
mutate(Pama = paste0("Pama", Pama)) %>%
filter(!DD %in% c("n/a","")) %>% # Remove empty rows
mutate_at(vars(matches("^[SE]")), funs(as.numeric(as.character(.)))) # All columns are in character format. Convert the numbers to numeric format.
The data frame now looks like this:
Pama DD Start End
<chr> <chr> <dbl> <dbl>
1 Pama1 zf 12 89
2 Pama1 zf 116 199
3 Pama1 PAMANA 280 331
4 Pama2 GGTR 115 195
5 Pama2 T_reg 232 362
6 Pama2 PAMANA 376 577
7 Pama3 GGTR 66 144
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End),
aes(x=value, label=value, colour=DD), size=2.5,
fontface="bold", position=position_nudge(0,-0.3)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
UPDATE: To address your comment, here's another way of positioning the numbers to avoid overlap.
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End),
aes(x=ifelse(key=="Start", value + 10, value - 10), label=value),
colour="white", size=2.8, fontface="bold", position=position_nudge(0,-0.2)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
UPDATE 2: To address your second comment, we'll add a grouping column that we'll use to alternate high and low number labels:
# Add grouping variable to alternate high and low labels
df = df %>% group_by(Pama) %>% arrange(Start) %>%
mutate(hilow = rep(c("high","low"),nrow(df))[1:n()])
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End) %>% filter(hilow=="high"),
aes(x=value, label=value, colour=DD), hjust=0.5,
size=3, fontface="bold", position=position_nudge(0,0.3)) +
geom_text(data=gather(df, key, value, Start:End) %>% filter(hilow=="low"),
aes(x=value, label=value, colour=DD), hjust=0.5,
size=3, fontface="bold", position=position_nudge(0,-0.3)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
That is one weird data structure you have. This will probably work out better down the line if you can modify the source data in some way to obtain a tidy dataframe from the start, where each column is a single variable and each row is an observation.
We can wrangle the data to obtain such dataframe (this is in base R, you can arguably achieve the same in other ways, also using dplyr or data.table):
df2 <- rbind(setNames(cbind(rep('DD1', nrow(df) - 1), df[2:nrow(df), 1:3]), c('DD', 'Pama', 'Start', 'End')),
setNames(cbind(rep('DD2', nrow(df) - 1), df[2:nrow(df), 4:6]), c('DD', 'Pama', 'Start', 'End')),
setNames(cbind(rep('DD3', nrow(df) - 1), df[2:nrow(df), 7:9]), c('DD', 'Pama', 'Start', 'End'))
)
df2$Start <- as.numeric(as.character(df2$Start))
df2$End <- as.numeric(as.character(df2$End))
df2 <- df2[!df2$Pama %in% c('','n/a'), ]
df2
#> DD Pama Start End
#> 2 DD1 zf 12 89
#> 4 DD1 zf 116 199
#> 10 DD1 PAMANA 280 331
#> 51 DD2 GGTR 115 195
#> 81 DD2 T_reg 232 362
#> 91 DD2 PAMANA 376 577
#> 52 DD3 GGTR 66 144
This gives us a nice dataset, where we can map any ggplot2's aestethic to a simple column:
library(ggplot2)
ggplot(df2, aes(y = DD, color = Pama)) +
geom_segment(aes(x = Start, xend = End, yend = DD), size = 10) +
geom_text(aes(label = Start, x = Start), size = 2.5, nudge_y = -.15) +
geom_text(aes(label = End, x = End), size = 2.5, nudge_y = -.15) +
scale_y_discrete(position = 'right') +
theme(panel.background = element_rect(fill = 'white'),
axis.text.x = element_blank(),
axis.text.y.right = element_text(size = 14),
axis.ticks.y = element_blank(),
axis.title = element_blank())
Update:
The text positioning can indeed cause problems, it does so in this example, where we had to reduce the text size to get it somewhat right.
Here is a solution, based on the ggrepel package:
library(ggplot2)
library(ggrepel)
ggplot(df2, aes(y = DD, color = Pama)) +
geom_segment(aes(x = Start, xend = End, yend = DD), size = 10) +
geom_text_repel(data = function(d) tidyr::gather(d, k, p, -DD, -Pama), aes(label = p, x = p), size = 5, nudge_y = -.15, segment.size = 0) +
# geom_label_repel(aes(label = End, x = End), size = 5, nudge_y = -.15) +
geom_text(aes(x = (Start + End) / 2, label = Pama), colour = "white", size = 2.5) +
scale_y_discrete(position = 'right') +
guides(color = FALSE) +
theme(panel.background = element_rect(fill = 'white'),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y.right = element_text(size = 14),
axis.ticks.y = element_blank(),
axis.title = element_blank())
(The text size is forcibly increased to show it does not overlap)
PS: Yea, this update makes it even more similar to #eipi10 answer.. it's a great answer, of course I'd steal from him :P
Related
I tried to reproduce the answer given by Roman in this post: The same width of the bars in geom_bar(position = "dodge")
but I couldnot fix my problem. When bars have the same width, the distance between the groups are too big. Same problem when I use facet_grid
My df:
df <- structure(list(discipline = structure(c(2L, 3L, 3L, 2L, 2L, 2L, 4L, 6L, 7L, 3L, 4L, 6L, 8L, 8L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("", "Biogeochemistry", "Ecology", "Geochemistry", "Geography", "Management", "Microbiology", "Oceanography"), class = "factor"), focus = structure(c(34L, 55L, 40L, 47L, 54L, 57L, 47L, 19L, 31L, 25L, 23L, 25L, 47L, 52L,13L, 20L, 23L, 16L, 26L, 27L), .Label = c("", "Abiotic measures", "Acidification", "Biogeochemichal budgets", "Biogeochemistry", "Biogeochemistry, discharge", "Blue Carbon", "Chromophoric Dissolved organic matter, river plume", "Coastal anthromes", "Connectivity", "Coral reefs", "Ecology", "Ecosystem Function", "Ecosystem Services", "Embryo plants", "Fisheries", "Food webs", "Global change", "Governance", "Groundwater", "Hidrology", "Integrative Magamenet", "Isotopes", "Land-sea interactions","Land-sea interface", "Land use", "Life history", "Life traits", "Livelihoods", "Management", "Microbial community", "Modelling water quality", "Nitrogen fluxes", "Nutrients", "Parasites", "ph, CO2", "Planning", "Pollutants", "Pollution", "Primary production", "Remote Sensing", "Resilience", "resilience, self-organization", "Restoration",
"Salinization", "Sea level rise", "Sediment flux", "Sediments", "socio land-sea interactions", "Species interaction", "Submarine ground water", "Submarine groundwater", "Subsidies", "Trace metals", "Trophic interactions", "Water quality", "Water resources"), class = "factor"), n = c(39L, 17L, 11L, 9L, 6L, 5L, 5L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L)), row.names = c(NA, -20L), class = c("tbl_df","tbl", "data.frame"))
First I tried with position = position_dodge2(preserve = "single")
ggplot(df, aes(x = (discipline), y = n, fill = reorder(focus, n))) +
geom_bar(position = position_dodge2(width = 0.9, preserve = "single"), stat = "identity") + ylab("N") + theme_classic() + geom_text(aes(label=focus), position = position_dodge2(width = 0.9, preserve = "single"), angle = 90, hjust = -0.1) + theme(legend.position = "none")
Then I used facet_grid
ggplot(df, aes(x = (discipline), y = n, fill = reorder(focus, n))) +
geom_bar(position = "dodge", stat = "identity") + ylab("N") + theme_classic() + geom_text(aes(label=focus), position = position_dodge2(width = 0.9, preserve = "single"), angle = 90, hjust = -0.1) + theme(legend.position = "none") + facet_grid(scales = "free_x", space = "free_x", switch = "x")
Even when width of bars are equal, distance between groups are too big.
What can I do to solve this problem?
Maybe try this. It looks like the issue is with position. If you define position_dodge2() for the bars you can avoid the big bars you got. Here the code:
library(ggplot2)
#Code
ggplot(df, aes(x = (discipline), y = n, fill = reorder(focus, n))) +
geom_bar(position = position_dodge2(0.9,preserve = 'single'),
stat = "identity") + ylab("N") +
theme_classic() +
geom_text(aes(label=focus), position = position_dodge2(width = 0.9, preserve = "single"),
angle = 90, hjust = -0.1) + theme(legend.position = "none") +
facet_grid(scales = "free_x", space = "free_x", switch = "x")
Output:
Whereas, the original code produces this (using position = "dodge"):
I have the following data which I am trying to plot as combined bar and line plot (with CI)
A data frame of Feature, Count, Odds Ratio and Confidence Interval values for OR
I am trying to get a plot as
A bar plot for count over lapped with a line plot for Odds Ratio with CI bars
I tried to plot in ggplot2 using following code:
ggplot(feat)+
geom_bar(aes(x=Feat, y=Count),stat="identity", fill = "steelblue") +
geom_line(aes(x=Feat, y=OR*max(feat$Count)),stat="identity", group = 1) +
geom_point(aes(x=Feat, y=OR*max(feat$Count))) +
geom_errorbar(aes(x=Feat, ymin=CI1, ymax=CI2), width=.1, colour="orange",
position = position_dodge(0.05))
However, I am not getting the CI bars for the line graph, as can be seen in pic: Rather, I am getting them for barplot
Can someone can please help me out to sort this issue.
Thanks
Edit - Dput:
df <- structure(list(Feat = structure(1:8, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H"), class = "factor"), Count = structure(c(2L,
8L, 7L, 5L, 4L, 1L, 6L, 3L), .Label = c("13", "145", "2", "25",
"26", "3", "37", "43"), class = "factor"), OR = structure(c(4L,
2L, 1L, 5L, 3L, 7L, 6L, 8L), .Label = c("0.38", "1.24", "1.33",
"1.51", "1.91", "2.08", "2.27", "3.58"), class = "factor"), CI1 = structure(c(7L,
4L, 1L, 6L, 3L, 5L, 2L, 2L), .Label = c("0.26", "0.43", "0.85",
"0.89", "1.2", "1.24", "1.25"), class = "factor"), CI2 = structure(c(3L,
2L, 1L, 6L, 4L, 7L, 8L, 5L), .Label = c("0.53", "1.7", "1.82",
"1.98", "13.07", "2.83", "3.92", "6.13"), class = "factor")), class = "data.frame", row.names = c(NA,
-8L))
Is this what you had in mind?
ratio <- max(feat$Count)/max(feat$CI2)
ggplot(feat) +
geom_bar(aes(x=Feat, y=Count),stat="identity", fill = "steelblue") +
geom_line(aes(x=Feat, y=OR*ratio),stat="identity", group = 1) +
geom_point(aes(x=Feat, y=OR*ratio)) +
geom_errorbar(aes(x=Feat, ymin=CI1*ratio, ymax=CI2*ratio), width=.1, colour="orange",
position = position_dodge(0.05)) +
scale_y_continuous("Count", sec.axis = sec_axis(~ . / ratio, name = "Odds Ratio"))
Edit: Just for fun with the legend too.
ggplot(feat) +
geom_bar(aes(x=Feat, y=Count, fill = "Count"),stat="identity") + scale_fill_manual(values="steelblue") +
geom_line(aes(x=Feat, y=OR*ratio, color = "Odds Ratio"),stat="identity", group = 1) + scale_color_manual(values="orange") +
geom_point(aes(x=Feat, y=OR*ratio)) +
geom_errorbar(aes(x=Feat, ymin=CI1*ratio, ymax=CI2*ratio), width=.1, colour="orange",
position = position_dodge(0.05)) +
scale_y_continuous("Count", sec.axis = sec_axis(~ . / ratio, name = "Odds Ratio")) +
theme(legend.key=element_blank(), legend.title=element_blank(), legend.box="horizontal",legend.position = "bottom")
Since you asked about adding p values for comparisons in the comments, here is a way you can do that. Unfortunately, because you don't really want to add **all* the comparisons, there's a little bit of hard coding to do.
library(ggplot2)
library(ggsignif)
ggplot(feat,aes(x=Feat, y=Count)) +
geom_bar(aes(fill = "Count"),stat="identity") + scale_fill_manual(values="steelblue") +
geom_line(aes(x=Feat, y=OR*ratio, color = "Odds Ratio"),stat="identity", group = 1) + scale_color_manual(values="orange") +
geom_point(aes(x=Feat, y=OR*ratio)) +
geom_errorbar(aes(x=Feat, ymin=CI1*ratio, ymax=CI2*ratio), width=.1, colour="orange",
position = position_dodge(0.05)) +
scale_y_continuous("Count", sec.axis = sec_axis(~ . / ratio, name = "Odds Ratio")) +
theme(legend.key=element_blank(), legend.title=element_blank(), legend.box="horizontal",legend.position = "bottom") +
geom_signif(comparisons = list(c("A","H"),c("B","F"),c("D","E")),
y_position = c(150,60,40),
annotation = c("***","***","n.s."))
I want to set the number of observations at the top of each bar.
This is some sample data
structure(list(Treatment = structure(c(3L, 3L, 3L, 3L, 3L, 4L,
4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L), .Label = c("", "{\"ImportId\":\"Treatment\"}",
"Altruism", "Altruism - White", "Piece Rate - 0 cents", "Piece Rate - 3 cents",
"Piece Rate - 6 cents", "Piece Rate - 9 cents", "Reciprocity",
"Reciprocity - Black", "Reciprocity - White", "Treatment"), class = "factor"),
Gender = structure(c(5L, 3L, 5L, 5L, 5L, 3L, 3L, 7L, 3L,
3L, 5L, 5L, 3L, 3L, 5L), .Label = c("", "{\"ImportId\":\"QID2\"}",
"Female", "Gender you most closely identify with: - Selected Choice",
"Male", "Other", "Prefer not to answer"), class = "factor")), row.names = c(NA,
15L), class = "data.frame")
My approach was using the following code
totals <- Data1 %>%
group_by(Gender) %>%
summarize(total = n)
Data1 %>%
count(Treatment, Gender) %>%
ggplot(aes(Treatment, n))+
geom_col(aes(fill = Gender), position = "fill")+
ggtitle("Gender")+
ylab("Fraction")+
theme(axis.text.x = element_text(angle = 90, vjust=0.3, hjust=1))+
scale_fill_manual("Gender",
values = c("Female" = "pink", "Male" = "light blue",
"Other"="coral", "Prefer not to answer"="violet"))+
geom_text(aes(label=n, group=Gender),size=3,
position = position_fill(vjust=0.5),data<-totals)
I only want the total number of observations appear at the top of each bar.
My graph thus far looks like this
Now I only want to know how to display the total number of observations for each bar.
I couldn't get your sample data to work, here is an example of adding totals to each bar.
You will need to create another dataset that shows totals per each group (for your example, it will be Treatment). Then add geom_text for your totals.
library(dplyr)
library(ggplot2)
library(scales)
# Sample Data
Data1 <- data.frame(
Gender = factor(c("Female","Female","Male","Male")),
Treatment = factor(c("a","b","a","b"), levels=c("a","b")),
value = c(10, 12, 13, 11)
)
# Totals for each bar
totals <- Data1 %>%
group_by(Treatment) %>%
summarize(value = sum(value))
# Bar chart
ggplot(data=Data1, aes(x=Treatment, y=value)) +
geom_bar(stat="identity", aes(fill=Gender)) +
# comment this out if you don't want to show labels for each stacked bar
geom_text(aes(label = value),position = position_stack(vjust = 0.5))+
# Add totals for each bar
geom_text(data = totals, aes(x = Treatment, y = value, label = value))
EDIT (with sample data provided)
library(dplyr)
library(ggplot2)
library(scales)
totals <- Data1 %>%
count(Treatment)
Data1 %>%
count(Treatment, Gender) %>%
ggplot(aes(x = Treatment, y = n)) + geom_bar(stat = "identity", aes(fill = Gender)) +
ggtitle("Gender") + ylab("Fraction") +
theme(axis.text.x = element_text(angle = 90, vjust=0.3, hjust=1)) +
scale_fill_manual("Gender",
values = c("Female" = "pink", "Male" = "light blue",
"Other"="coral", "Prefer not to answer"="violet")) +
# Add totals for each bar
geom_text(data = totals, aes(label = n))
Using ggplot2, I am plotting percentage values for 15 species across three sites (each species occurs in each site). The data points associated with site 'C' are my reference points.
Now, instead of plotting sites 'A' and 'B' as points, I would like to display them using vertical lines or column-like structures. As such, these data points should be extended as vertical lines to the top or bottom side of the site 'C' points (green colour), i.e. to the top where values are larger than the reference value and bottom for smaller values.
Specifically, I would hope for a red line from a red point to the green point and a blue line from the blue point to the green point. The red line should ideally have the same width as the red point (and same for blue). The line should also be offset as are the red and blue points (relative to the green point), so that lines do not overlap. Finally, the line should not go to the center but the edge of a point.
For this purpose I have offset points for 'A' and 'B' and also reduced their size to half of the reference point size.
library(ggplot2)
MyData$species <- as.character(MyData$species)
MyData$species <- factor(MyData$species, levels=unique(MyData$species))
pos <- position_dodge(width=0.21)
cols <- c("C" = "darkgreen", "B" = "blue", "A" = "red")
tiff(file = "MyData.tiff", height=10, width=10, units="in", res=300, compression="lzw")
ggplot(data = MyData, aes(x=species, y=value, group=site, colour=site)) +
geom_point(data=subset(MyData, site=="C"), size = 4, shape=15, alpha=1, position=pos) +
geom_line(data=subset(MyData, site=="C"), size = 2, lwd=2, alpha=0.4, show_guide=FALSE) +
geom_point(data=subset(MyData, site!="C"), size = 1.8, shape=15, alpha=1, position = pos) +
scale_colour_manual(values = cols) +
xlab("Species") +
ylab("Value (%)") +
scale_y_continuous(expand=c(0.01,0.01),
limits=c(0.0,100),
breaks=c(0,20,40,60,80,100),
labels=c("0","20","40","60","80","100")) +
theme_bw() +
theme(legend.position="none") +
theme(axis.title.x = element_text(vjust=0.1,face="bold", size=16),
axis.text.x = element_text(vjust=0.4, size=14, angle=90, hjust=1.0)) +
theme(axis.title.y = element_text(vjust=0.1,face="bold", size=16),
axis.text.y = element_text(face="bold", size=14, angle=0)) +
theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) +
theme(panel.border = element_rect(size=1, color = "black")) +
theme(plot.margin = unit(c(0.3,0.4,0.3,0.3),"lines"))
dev.off()
This is my current plot. So basically, I would like to replace the red and blue points with lines that extend to the green points (without overlapping them).
Many thanks in advance for any advice on an elegant solution for this.
This is a dput() of my dataset.
structure(list(site = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("A", "B", "C"), class = "factor"),
species = structure(c(13L, 11L, 2L, 14L, 1L, 9L, 12L, 10L,
6L, 8L, 15L, 7L, 3L, 4L, 5L, 13L, 11L, 2L, 14L, 1L, 9L, 12L,
10L, 6L, 8L, 15L, 7L, 3L, 4L, 5L, 13L, 11L, 2L, 14L, 1L,
9L, 12L, 10L, 6L, 8L, 15L, 7L, 3L, 4L, 5L), .Label = c("Species 1",
"Species 10", "Species 11", "Species 12", "Species 13", "Species 14",
"Species 15", "Species 2", "Species 3", "Species 4", "Species 5",
"Species 6", "Species 7", "Species 8", "Species 9"), class = "factor"),
value = c(2, 3.25, 3.53, 4.31, 4.59, 5.26, 6.02, 6.42, 6.6,
7.26, 8.89, 12.45, 35.62, 72.42, 73.55, 1.36, 2.36, 2.17,
10.34, 6.84, 1.88, 5.09, 7.35, 3.87, 10.55, 6.6, 14.64, 39.57,
88.06, 64.54, 5.03, 12.34, 5.42, 3.63, 5.16, 6.04, 3, 8.94,
3.28, 7.64, 6.25, 21.96, 39.35, 78.55, 47.35)), .Names = c("site",
"species", "value"), class = "data.frame", row.names = c(NA,
-45L))
You can try geom_linerange() for the lines from points A/B to point C.
Define the ymin/ymax values for each site/species, & reorder site such that A / B lines drop down to each side of point C:
library(dplyr)
MyData <- MyData %>%
group_by(species) %>%
mutate(value.C = value[site == "C"]) %>%
rowwise() %>%
mutate(value.min = min(value, value.C),
value.max = max(value, value.C)) %>%
ungroup() %>%
mutate(site = factor(site, levels = c("A", "C", "B")))
Plot:
# vary dodge width such that the lines drop to the edge of point C
# for your chosen dimensions (for mine 0.5 was about right)
pos <- position_dodge(width = 0.5)
ggplot(data = MyData,
aes(x = species, y = value,
ymin = value.min, ymax = value.max,
group = site, colour = site, size = site)) +
geom_linerange(size = 1.8, alpha = 0.4, position = pos) +
geom_line(data = subset(MyData, site == "C"),
size = 2, lwd = 2, alpha = 0.4) +
geom_point(data = subset(MyData, site == "C"),
size = 4, shape = 15, position = pos) +
scale_color_manual(values = cols) +
theme_classic() +
theme(legend.position = "none")
# + other theme-related settings...
You can add geom_line to draw the vertical lines
library(ggplot2)
MyData$species <- as.character(MyData$species)
MyData$species <- factor(MyData$species, levels=unique(MyData$species))
pos <- position_dodge(width=0.21)
cols <- c("C" = "darkgreen", "B" = "blue", "A" = "red")
windows()
ggplot(data = MyData, aes(x=species, y=value, group=site, colour=site)) +
geom_point(data=subset(MyData, site=="C"), size = 4, shape=15, alpha=1, position=pos) +
geom_line(data=subset(MyData, site=="C"), size = 2, lwd=2, alpha=0.4, show_guide=FALSE) +
geom_point(data=subset(MyData, site!="C"), size = 1.8, shape=15, alpha=1, position = pos) +
geom_line(aes(group = species)) + #New code Added
scale_colour_manual(values = cols) +
xlab("Species") +
ylab("Value (%)") +
scale_y_continuous(expand=c(0.01,0.01),
limits=c(0.0,100),
breaks=c(0,20,40,60,80,100),
labels=c("0","20","40","60","80","100")) +
theme_bw() +
theme(legend.position="none") +
theme(axis.title.x = element_text(vjust=0.1,face="bold", size=16),
axis.text.x = element_text(vjust=0.4, size=14, angle=90, hjust=1.0)) +
theme(axis.title.y = element_text(vjust=0.1,face="bold", size=16),
axis.text.y = element_text(face="bold", size=14, angle=0)) +
theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) +
theme(panel.border = element_rect(size=1, color = "black")) +
theme(plot.margin = unit(c(0.3,0.4,0.3,0.3),"lines"))
I want to add a legend under the existing legend that represents the dashed line, such that the dashed line could be labeled "avg tx effect" and be placed under study 3.
library(ggplot2)
library(ggthemes)
#dput(df)
df=structure(list(study = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("study1", "study2",
"study3"), class = "factor"), d = c(-0.205, 0.1075, 0.3525, -0.37,
0.3, 0.42, -0.28, 0.09, 0.59, 0.11, -0.05, 0.25, 0, 0.25, 0.49
), Outcome = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L,
5L, 5L, 5L), Outcome2 = structure(c(1L, 1L, 1L, 4L, 4L, 4L, 7L,
7L, 7L, 10L, 10L, 10L, 13L, 13L, 13L), .Label = c("1", "1", "1",
"2", "2", "2", "3", "3", "3", "4", "4", "4", "5", "5", "5"), class = "factor")), .Names = c("study",
"d", "Outcome", "Outcome2"), row.names = c(NA, -15L), class = "data.frame")
ggplot(df, aes(x=Outcome2, y=d, fill=study)) +
geom_bar(position=position_dodge(), aes(x=Outcome2),stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
xlab("Outcome") +
ylab("Cohen's D Effect Size") +
scale_fill_grey(name="Study",
labels=c("study1","study2", "study3"))+
theme_bw()+
geom_hline(yintercept=.15,linetype=2)
A general feature of ggplot is that to generate a legend, you need to map your aesthetics (e.g. linetype) to a variable in the data, instead of setting it to constant. In the case of geom_hline, this may be achieved by putting the intercept in a separate data frame. Also note show_guide = TRUE.
Then customize the legend using scale_linetype_manual. The black lines in the fill legend are removed using override.aes.
Here's a stripped down version of your code to show only the most necessary steps:
df2 <- data.frame(yi = 0.15)
ggplot(data = df, aes(x = Outcome2, y = d, fill = study)) +
geom_bar(position = "dodge", stat = "identity") +
geom_hline(data = df2, aes(yintercept = yi, linetype = factor(yi)), show_guide = TRUE) +
scale_linetype_manual(name = "avg tx effect", values = "dashed", labels = "") +
guides(fill = guide_legend(override.aes = list(linetype = "blank")))
As #Gregor suggested, you could use a direct label for this line by adding annotate() as shown below:
ggplot(df, aes(x=Outcome2, y=d, fill=study)) +
geom_bar(position=position_dodge(), aes(x=Outcome2),stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
xlab("Outcome") +
ylab("Cohen's D Effect Size") +
scale_fill_grey(name="Study",
labels=c("study1","study2", "study3"))+
theme_bw()+
geom_hline(yintercept=.15,linetype=2) +annotate("text",x=.7,y=.17,size=3,label=c('avg tx ef'))
If space is an issue you can use the wrapper described here to wrap the text. Just run wrapper <- function(x, ...) paste(strwrap(x, ...), collapse = "\n") then add +annotate("text",x=.7,y=.18,size=3,label=wrapper('avg tx effect',10)). Which produces: