Bars in geom_bar have unwanted different widths when using facet_wrap - r

I can'd find a solution for the following problem(s). I would appreciate some help a lot!
The following code produces bar charts using facet. However, due to "extra space" ggplot2 has in some groups it makes the bars much wider, even if I specify a width of 0.1 or similar. I find that very annoying since it makes it look very unprofessional. I want all the bars to look the same (except for the fill). I hope somebody can tell me how to fix this.
Secondly, how can I reorder the different classes in the facet windows so that the order is always C1, C2 ... C5, M, F, All where applicable. I tried it with ordering the levels of the factor, but since not all classes are present in every graph part it did not work, or at least I assume that was the reason.
Thirdly, how can I reduce the space between the bars? So that the whole graph is more compressed. Even if I make the image smaller for exporting, R will scale the bars smaller but the spaces between the bars are still huge.
I would appreciate feedback for any of those answers!
My Data:
http://pastebin.com/embed_iframe.php?i=kNVnmcR1
My Code:
library(dplyr)
library(gdata)
library(ggplot2)
library(directlabels)
library(scales)
all<-read.xls('all_auto_visual_c.xls')
all$station<-as.factor(all$station)
#all$group.new<-factor(all$group, levels=c('C. hyperboreus','C. glacialis','Special Calanus','M. longa','Pseudocalanus sp.','Copepoda'))
allp <- ggplot(data = all, aes(x=shortname2, y=perc_correct, group=group,fill=sample_size)) +
geom_bar(aes(fill=sample_size),stat="identity", position="dodge", width=0.1, colour="NA") + scale_fill_gradient("Sample size (n)",low="lightblue",high="navyblue")+
facet_wrap(group~station,ncol=2,scales="free_x")+
xlab("Species and stages") + ylab("Automatic identification and visual validation concur (%)") +
ggtitle("Visual validation of predictions") +
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1), axis.text.x = element_text(colour="grey20",size=12,angle=0,hjust=.5,vjust=.5,face="bold"), axis.text.y = element_text(colour="grey20",size=12,angle=0,hjust=1,vjust=0,face="bold"), axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=0,face="bold"), axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="bold"),legend.position="none", strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0), strip.text.y = element_text(size = 12, face="bold", colour = "black"))
allp
#ggsave(allp, file="auto_visual_stackover.jpeg", height= 11, width= 8.5, dpi= 400,)
The current graph that needs some fixing:
Thanks a lot!

Here what I did after suggestion from Gregor. Using geom_segment and geom_point makes a nice graph as I think.
library(ggplot2)
all<-read.xls('all_auto_visual_c.xls')
all$station<-as.factor(all$station)
all$group.new<-factor(all$group, levels=c('C. hyperboreus','C. glacialis','Combined','M. longa','Pseudocalanus sp.','Copepoda'))
all$shortname2.new<-factor(all$shortname2, levels=c('All','F','M','C5','C4','C3','C2','C1','Micro', 'Oith','Tric','Cegg','Cnaup','C3&2','C2&1'))
allp<-ggplot(all, aes(x=perc_correct, y=shortname2.new)) +
geom_segment(aes(yend=shortname2.new), xend=0, colour="grey50") +
geom_point(size=4, aes(colour=sample_size)) +
scale_colour_gradient("Sample size (n)",low="lightblue",high="navyblue") +
geom_text(aes(label = perc_correct, hjust = -0.5)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(group.new~station,scales="free_y",space="free") +
xlab("Automatic identification and visual validation concur (%)") + ylab("Species and stages")+
ggtitle("Visual validation of predictions")+
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1), axis.text.x = element_text(colour="grey20",size=12,angle=0,hjust=.5,vjust=.5,face="bold"), axis.text.y = element_text(colour="grey20",size=12,angle=0,hjust=1,vjust=0,face="bold"), axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=0,face="bold"), axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="bold"),legend.position="none", strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0), strip.text.y = element_text(size = 8, face="bold", colour = "black"))
allp
ggsave(allp, file="auto_visual_no_label.jpeg", height= 11, width= 8.5, dpi= 400,)
This is what it produces!

Assuming the bar widths are inversely proportional to the number of x-breaks, an appropriate scaling factor can be entered as a width aesthetic to control the width of the bars. But first, calculate the number of x-breaks in each panel, calculate the scaling factor, and put them back into the "all" data frame.
Updating to ggplot2 2.0.0 Each column mentioned in facet_wrap gets its own line in the strip. In the edit, a new label variable is setup in the dataframe so that the strip label remains on one line.
library(ggplot2)
library(plyr)
all = structure(list(station = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Station 101",
"Station 126"), class = "factor"), shortname2 = structure(c(2L,
7L, 8L, 11L, 1L, 5L, 7L, 8L, 11L, 1L, 2L, 3L, 5L, 7L, 8L, 12L,
11L, 1L, 6L, 8L, 15L, 14L, 9L, 10L, 4L, 6L, 2L, 7L, 8L, 11L,
1L, 5L, 7L, 8L, 11L, 1L, 2L, 3L, 5L, 7L, 8L, 12L, 11L, 1L, 8L,
11L, 1L, 15L, 14L, 13L, 9L, 10L), .Label = c("All", "C1", "C2",
"C2&1", "C3", "C3&2", "C4", "C5", "Cegg", "Cnaup", "F", "M",
"Micro", "Oith", "Tric"), class = "factor"), color = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 21L, 26L, 30L, 31L, 33L, 34L, 20L, 21L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 26L, 28L, 29L, 30L, 31L, 32L, 33L, 34L), group = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 6L, 5L, 3L, 3L, 3L, 3L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 3L, 3L,
3L, 3L, 3L), .Label = c("cgla", "Chyp", "Cope", "mlong", "pseudo",
"specC"), class = "factor"), sample_size = c(11L, 37L, 55L, 16L,
119L, 21L, 55L, 42L, 40L, 158L, 24L, 16L, 17L, 27L, 14L, 45L,
98L, 241L, 30L, 34L, 51L, 22L, 14L, 47L, 13L, 41L, 24L, 41L,
74L, 20L, 159L, 18L, 100L, 32L, 29L, 184L, 31L, 17L, 27L, 23L,
21L, 17L, 49L, 185L, 30L, 16L, 46L, 57L, 16L, 12L, 30L, 42L),
perc_correct = c(91L, 78L, 89L, 81L, 85L, 90L, 91L, 93L,
80L, 89L, 75L, 75L, 76L, 81L, 86L, 76L, 79L, 78L, 90L, 97L,
75L, 86L, 93L, 74L, 85L, 88L, 88L, 90L, 92L, 90L, 91L, 89L,
89L, 91L, 90L, 89L, 81L, 88L, 74L, 78L, 90L, 82L, 84L, 82L,
90L, 94L, 91L, 81L, 69L, 83L, 90L, 81L)), class = "data.frame", row.names = c(NA,
-52L))
all$station <- as.factor(all$station)
# Calculate scaling factor and insert into data frame
library(plyr)
N = ddply(all, .(station, group), function(x) length(row.names(x)))
N$Fac = N$V1 / max(N$V1)
all = merge(all, N[,-3], by = c("station", "group"))
all$label = paste(all$group, all$station, sep = ", ")
allp <- ggplot(data = all, aes(x=shortname2, y=perc_correct, group=group, fill=sample_size, width = .5*Fac)) +
geom_bar(stat="identity", position="dodge", colour="NA") +
scale_fill_gradient("Sample size (n)",low="lightblue",high="navyblue")+
facet_wrap(~label,ncol=2,scales="free_x") +
xlab("Species and stages") + ylab("Automatic identification and visual validation concur (%)") +
ggtitle("Visual validation of predictions") +
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1),
axis.text.x = element_text(colour="grey20",size=12,angle=0,hjust=.5,vjust=.5,face="bold"),
axis.text.y = element_text(colour="grey20",size=12,angle=0,hjust=1,vjust=0,face="bold"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=0,face="bold"),
axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="bold"),
legend.position="none",
strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0),
strip.text.y = element_text(size = 12, face="bold", colour = "black"))
allp

Related

order ggplot by a separate column

I am trying to order this ggplot on the base of a different column which is not plot. Is that possible?
This is my subset of data
test<- structure(list(
Mutations = c(
"P9L",
"P9S",
"P9L",
"P9L",
"P9S",
"P9L",
"P9Q",
"P9S",
"P9S",
"P9L",
"P9S",
"P9L",
"P9S",
"P9L",
"P9Q",
"P9S",
"P9L",
"P9Q",
"P9S",
"P9L",
"P9S",
"P9L",
"S12P",
"S12P",
"S12P",
"S12P",
"S12P",
"S12P",
"S12P",
"C15S",
"C15S",
"C15F",
"C15F",
"C15F",
"C15F",
"C15F",
"C15S",
"C15F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F",
"L18F"
),
x = c(
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
12L,
12L,
12L,
12L,
12L,
12L,
12L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L
),
epi_week = c(
51L,
53L,
53L,
54L,
55L,
55L,
55L,
56L,
57L,
57L,
58L,
58L,
59L,
59L,
59L,
60L,
60L,
60L,
61L,
61L,
62L,
62L,
53L,
55L,
56L,
57L,
58L,
60L,
61L,
52L,
54L,
56L,
57L,
58L,
59L,
60L,
61L,
62L,
49L,
50L,
51L,
52L,
53L,
54L,
55L,
56L,
57L,
58L,
59L,
60L
),
n = c(
0.018412815,
0.015021782,
0.015021782,
0.010515247,
0.024205261,
0.032273681,
0.00806842,
0.023421032,
0.080858175,
0.048514905,
0.036714089,
0.073428178,
0.045506257,
0.053090633,
0.007584376,
0.040013338,
0.026675559,
0.00666889,
0.134355489,
0.023709792,
0.037509377,
0.012503126,
0.015021782,
0.01613684,
0.015614021,
0.016171635,
0.009178522,
0.013337779,
0.031613056,
0.027144408,
0.010515247,
0.007807011,
0.016171635,
0.018357045,
0.037921881,
0.013337779,
0.007903264,
0.012503126,
0.16722408,
0.353495679,
0.699686982,
1.221498371,
1.727504882,
3.238696109,
4.260125867,
3.70833008,
3.913535658,
2.065167508,
2.935153584,
2.734244748
),
mab = c(
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L
),
support = c(
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L
)
),
row.names = c(NA,
50L), class = "data.frame")
This is the code for the plot
p1 <- ggplot(test, aes(x = epi_week, y = Mutations, fill = n))+
geom_tile() + coord_equal() + scale_fill_gradient(trans = "log",low="white", high="darkgreen", guide_legend(title = "Percentage (%)", title.position = "top"), labels = function(x) sprintf("%.2f", x)) + theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Remove panel background
panel.background = element_blank(),
# Change axis line
axis.line = element_line(colour = "black"),
axis.text.y = element_text( hjust = 1, size=5,color="black")
)
p1
p1 + geom_tile(data=test, aes(colour=factor(support, c(1,2,3)), size=factor(support,c(1,2,3))), alpha=0) +
scale_colour_manual("Confidence", values=c("white", "c(1,2,3)), size=factor(support,c(1,2,3))), alpha=0) +
scale_colour_manual("Confidence", values=c("white", "blue4","red")) +
scale_size_manual("Confidence", values=c(0,0.2,0.2))
Mutations are names with a letter follow by a number and then another letter (e.g. R377L).
in my data I have a column x that have all the numbers.
How can I order my y axis in the ggplot (which is Mutations) by descending x values ?
I am also wondering if it is possible to have some column annotations aside the plot with data from the mab column?
THanks
You can use reorder(Mutations,-epi_week) to reorder Mutations based on the corresponding mean values of epi_week within each Mutation, see help page too:
ggplot(test, aes(x = epi_week, y = reorder(Mutations,-epi_week), fill = n))+
geom_tile() + coord_equal() +
scale_fill_gradient(trans = "log",low="white", high="darkgreen",
guide_legend(title = "Percentage (%)", title.position = "top"), labels = function(x) sprintf("%.2f", x)) + theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Remove panel background
panel.background = element_blank(),
# Change axis line
axis.line = element_line(colour = "black"),
axis.text.y = element_text( hjust = 1, size=5,color="black")
)
If you want to reorder by its numerical value after stripping it of strings, it will be:
ggplot(test, aes(x = epi_week,
y = reorder(Mutations,-as.numeric(gsub("[^0-9]*","",Mutations))),
fill = n))+
geom_tile() + coord_equal() + ylab("Mutation")

Why is geom_line() not connecting through geom_point()?

Question: why is geom_line() not connecting through geom_point()?
I have:
Written with
ggplot(a,
aes(x = month, color = year, fill = year)) +
scale_color_manual(values = colsze) +
scale_fill_manual(values = alpha(colsze, .2)) +
scale_x_discrete(labels = c("January", "February", "March", "April", "May",
"June", "July", "August", "Septemer",
"October", "November", "December")) +
geom_point(aes(y = n), size = 4, shape=19) +
geom_line(aes(y = n)) +
scale_y_continuous(breaks = seq(0, 120, 10), limits = c(0, 120)) +
facet_wrap(.~year)
I cannot figure out why this does not work? E.g. following tutorials like this
geom_line() seems to appear in the legend but not in plot.
a <- structure(list(month = structure(c(4L, 1L, 4L, 7L, 1L, 9L, 2L,
8L, 8L, 10L, 7L, 10L, 9L, 9L, 9L, 2L, 10L, 7L, 4L, 2L, 2L, 3L,
11L, 11L, 12L, 9L, 12L, 10L, 10L, 10L, 11L, 5L, 10L, 10L, 10L,
10L, 10L, 12L, 11L, 7L, 12L, 6L, 9L, 9L, 9L, 7L, 9L, 4L, 12L,
12L, 11L, 3L, 3L, 11L, 11L, 11L, 7L, 11L, 12L, 12L, 12L, 2L,
4L, 1L, 11L, 11L, 1L, 4L, 8L, 2L, 10L, 5L, 5L, 6L, 7L, 11L, 11L,
11L, 11L, 11L, 12L, 11L, 10L, 7L, 12L, 9L, 9L, 7L, 10L, 8L, 8L,
5L, 9L, 10L, 9L, 3L, 8L, 10L, 10L, 8L), .Label = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"),
year = structure(c(3L, 3L, 2L, 1L, 4L, 4L, 4L, 1L, 1L, 1L,
3L, 1L, 2L, 1L, 1L, 3L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
1L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L,
4L, 4L, 2L, 2L, 2L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 4L, 1L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 2L, 1L, 4L,
2L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 4L, 2L, 3L, 3L, 3L, 4L, 4L,
1L, 1L, 3L, 4L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L
), .Label = c("2017", "2018", "2019", "2020"), class = "factor"),
n = c(92L, 95L, 83L, 95L, 70L, 88L, 94L, 103L, 103L, 98L,
95L, 98L, 90L, 89L, 89L, 76L, 98L, 97L, 79L, 103L, 103L,
111L, 104L, 104L, 73L, 89L, 73L, 107L, 107L, 107L, 88L, 111L,
107L, 107L, 107L, 107L, 107L, 73L, 104L, 78L, 87L, 92L, 90L,
90L, 90L, 78L, 89L, 92L, 98L, 98L, 85L, 111L, 111L, 85L,
85L, 85L, 97L, 104L, 73L, 73L, 73L, 71L, 92L, 99L, 85L, 104L,
99L, 83L, 103L, 94L, 90L, 90L, 90L, 92L, 97L, 85L, 85L, 88L,
88L, 85L, 73L, 89L, 107L, 97L, 87L, 89L, 89L, 95L, 96L, 103L,
103L, 75L, 90L, 90L, 90L, 88L, 87L, 98L, 98L, 103L)), row.names = c(NA,
-100L), groups = structure(list(month = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 7L, 7L, 7L,
7L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12"), class = "factor"), year = structure(c(2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 3L, 1L, 2L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 3L, 4L), .Label = c("2017", "2018", "2019", "2020"), class = "factor"),
.rows = structure(list(c(64L, 67L), 2L, 5L, 20:21, 62L, 16L,
c(7L, 70L), c(22L, 52L, 53L), 96L, 19L, c(3L, 68L), c(1L,
48L, 63L), 72:73, 92L, 32L, c(42L, 74L), 4L, c(40L, 46L
), c(11L, 88L), c(18L, 57L, 75L, 84L), c(8L, 9L, 69L,
90L, 91L, 100L), 97L, c(14L, 15L, 26L, 47L, 86L, 87L),
c(13L, 43L, 44L, 45L, 93L, 95L), 6L, c(10L, 12L, 17L,
98L, 99L), c(71L, 94L), c(28L, 29L, 30L, 33L, 34L, 35L,
36L, 37L, 83L), 89L, c(23L, 24L, 39L, 58L, 66L), c(51L,
54L, 55L, 56L, 65L, 76L, 77L, 80L), 82L, c(31L, 78L,
79L), 49:50, c(25L, 27L, 38L, 59L, 60L, 61L, 81L), c(41L,
85L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 36L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Try this:
ggplot(a,
aes(x = as.numeric(month), color = year, fill = year)) +
# scale_color_manual(values = colsze) +
# scale_fill_manual(values = alpha(colsze, .2)) +
scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12),
labels = c("January", "February", "March", "April", "May",
"June", "July", "August", "Septemer",
"October", "November", "December")) +
geom_point(aes(y = n), size = 4, shape=19) +
geom_line(aes(y = n)) +
scale_y_continuous(breaks = seq(0, 120, 10), limits = c(0, 120)) +
facet_wrap(.~year)
I commented on those two lines because in your reproducible example there is no variable colsze.
The problem is that month is a factor and must first be converted to numeric. For a better visualization, evaluate whether to rotate the labels on the x axis by 45 °

Cannot plot the correct x-axis in ggplot2

I am plotting the following data using ggplot2 in R.
dat<-structure(list(Month = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L), grp1 = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("(-Inf,2]", "(2,7]", "(7,14]",
"(14, Inf]"), class = "factor"), n = c(71L, 59L, 36L, 10L, 55L,
73L, 18L, 10L, 97L, 82L, 22L, 5L, 120L, 79L, 15L, 2L, 140L, 62L,
15L, 174L, 60L, 11L, 188L, 71L, 2L, 183L, 53L, 2L, 211L, 50L,
2L, 171L, 69L, 7L, 1L, 98L, 85L, 13L, 6L, 72L, 62L, 24L, 9L)), class
= "data.frame", row.names = c(NA,-43L))
Here's my script:
library(ggplot2)
p<-ggplot(data=test,aes(Month, n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
Here's the output:
Why is it that I cannot plot the x-axis values?
If I don't set the scale_x_discrete, the plot will look like this:
Any ideas on how to solve this?
I'll appreciate any help.
If you want the Month name along the xaxis, then you can add in as.factor(Month) to your ggplot script. Heres an example:-
p<-ggplot(data=dat,aes(as.factor(Month), n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
p
Which gives you this:-

Variable label position in ggplot line chart

I have the following data frame summary created with dplyr
structure(list(maxrep = c(7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L,
11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L,
18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L,
24L, 26L, 26L), div = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Premier Division",
"Second Division"), class = "factor"), freq = c(1L, 10L, 4L,
39L, 26L, 89L, 73L, 146L, 107L, 162L, 117L, 133L, 121L, 125L,
116L, 91L, 110L, 65L, 95L, 43L, 75L, 38L, 43L, 24L, 38L, 16L,
36L, 5L, 15L, 2L, 9L, 7L, 9L, 1L, 3L, 3L, 2L, 1L)), .Names = c("maxrep",
"div", "freq"), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -38L))
My intention is to use ggplot2 to plot line graphs of 2 lines with different colour with text labels for each value.
What I did was
ggplot(df, aes(x=maxrep, y=freq, colour=div)) +
geom_line() +
geom_text(aes(label=freq), vjust=-.5)
The result was
Now my question: All the labels in the chart are above the points in respective lines. I want to have the labels for the different colours to be in different relative position, e.g. labels for cyan above the line, and labels for red below the line (i.e. variable vjust). Is there a way to do that?
Also, is there a way to get read of the letter a in the colour legend on the right?
What about plotting the lines separately wich differing vjust values? You can get rid of a in the legend setting show_guide = FALSE.
ggplot(df, aes(x=maxrep, y=freq, colour=div, label = freq)) +
geom_line() +
geom_text(data = df[df$div == "Second Division",], vjust=2, show_guide = FALSE) + geom_text(data = df[df$div == "Premier Division",], vjust=-2, show_guide = FALSE)
Which returns:
Create a new variable in the data.frame holding the vjust adjustment parameter:
df$pos <- c(2, -2)[(df$div == "Premier Division")+1]
And you could call vjust inside aes with the new pos vector:
ggplot(df, aes(x=maxrep, y=freq, colour=div)) +
geom_line() +
geom_text(aes(label=freq, vjust=pos))

ggplot2 manually specifying colour with geom_line

I'm trying to plot the graph below, and want to manually specify colours.
I need to plot by genotype, since there are multiple genotypes belonging to the same Bgrnd_All, and I want them to come up separately in the lines plotted.
However, I want to colour the lines by Bgrnd_All, and specifically in the order/colour I use in scale_fill_manual.
When I do this, the values in scale_fill_manual do not overwrite the existing colour as defined in geom_line. How can I do this?
I'd be grateful for pointers.
[Data for graph below][1]https://www.dropbox.com/s/9nmu87wkh2yqfxn/summary_200_exp2.csv?dl=0
pd <- position_dodge(1)
ggplot(data=summary.200.exp2, aes(x=Time, y=Length, colour=Genotype, group=Genotype)) +
geom_errorbar(aes(ymin=Length - se, ymax=Length + se), colour="black", width=1, position=pd) +
geom_line(aes(colour=Bgrnd_All), position=pd, size =1) +
scale_x_continuous(breaks=c(0,17,22,41,89)) + #using breaks of when sampled
scale_fill_manual(values=c(Avalon="#000066",Av_A="#663399",Av_B="#339999",Cadenza="CC0033",Cad_A="FF6600",Cad_B="FF9933"))+
ylab("leaf segment width (mm)") +
xlab("Time") +
theme(axis.title = element_text(size=14,face="bold"),
axis.text = element_text(size=14),
strip.text.y = element_text(size=14))
A dput of the data:
summary.200.exp2 <- structure(list(X = 1:40,
Genotype = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L), .Label = c("4.18", "4.41", "7.50", "7.59", "8.51", "8.77", "Avalon", "Cadenza"), class = "factor"),
Time = c(0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L),
Bgrnd_All = structure(c(4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L), .Label = c("Av_A", "Av_B", "Avalon", "Cad_A", "Cad_B", "Cadenza"), class = "factor"),
N = c(43L, 48L, 44L, 47L, 48L, 22L, 21L, 26L, 27L, 25L, 36L, 24L, 44L, 48L, 45L, 50L, 26L, 52L, 54L, 53L, 38L, 52L, 52L, 49L, 50L, 39L, 39L, 42L, 38L, 42L, 84L, 42L, 84L, 42L, 42L, 50L, 26L, 53L, 27L, 27L),
Length = c(1.17423255813953, 1.58852083333333, 1.71263636363636, 1.86736170212766, 2.0331875, 1.07563636363636, 1.49866666666667, 1.48734615384615, 1.66796296296296, 2.15416, 1.08716666666667, 1.09858333333333, 1.24593181818182, 1.30827083333333, 1.81537777777778, 1.15672, 1.8475, 1.96815384615385, 2.01822222222222, 2.5057358490566, 1.14697368421053, 1.40276923076923, 1.49832692307692, 1.76981632653061, 2.27954, 1.18312820512821, 1.75928205128205, 1.86195238095238, 1.91426315789474, 2.26883333333333, 1.10839285714286, 1.97902380952381, 2.03271428571429, 2.15685714285714, 2.8227380952381, 1.08658, 1.68880769230769, 1.7277358490566, 1.9232962962963, 2.13466666666667),
sd = c(0.218740641945063, 0.357307960001092, 0.377931031662453, 0.416137123383518, 0.440003996899158, 0.176915784499843, 0.426273190962478, 0.305677731254037, 0.450036449932454, 0.48642939535627, 0.15212823538055, 0.175160775008132, 0.293836087650785, 0.282464815326021, 0.346608194369436, 0.211422397593258, 0.408328617659845, 0.413460118977535, 0.419730221832425, 0.508692484972064, 0.217587942685885, 0.207510416973071, 0.245473270071832, 0.377310585673427, 0.536134471785516, 0.159925670150259, 0.298319411009668, 0.338847829173593, 0.296186727462412, 0.445638589029855, 0.162594700328365, 0.308723610551514, 0.318831396748337, 0.381781291715339, 0.402059458017902, 0.167826451905484, 0.257140275994371, 0.338637947743116, 0.362428434825926, 0.343680867174096),
se = c(0.0333576351702583, 0.0515729617225566, 0.0569752467571038, 0.0606998379642952, 0.06350910651356, 0.0377185719899813, 0.0930204363959963, 0.0599483352513503, 0.0866095551712153, 0.097285879071254, 0.0253547058967583, 0.0357545434766975, 0.0442974569365289, 0.040770284291269, 0.0516692989445678, 0.0298996422065822, 0.0800798303617661, 0.0573366022820362, 0.0571180485063685, 0.0698742866122227, 0.0352974252834232, 0.0287765172534354, 0.0340410177692235, 0.053901512239061, 0.0758208641254813, 0.0256086023072023, 0.0477693365291991, 0.052285355168868, 0.0480478318490224, 0.0687635271596866, 0.0177405362346046, 0.0476370873204908, 0.0347873573697084, 0.0589101322645314, 0.0620391212561054, 0.0237342444409691, 0.0504293571163821, 0.046515499476421, 0.0697493848029077, 0.0661414137260961),
ci = c(0.0673184331863912, 0.103751416510302, 0.114901535684132, 0.122182436693452, 0.127763842564108, 0.0784400645137227, 0.194037230170767, 0.123465907623535, 0.178028490322197, 0.200788185881879, 0.0514727894594648, 0.0739639084701291, 0.0893343358495282, 0.0820192326650262, 0.104132629687123, 0.0600855805773719, 0.164927497928001, 0.11510803218647, 0.11456429705202, 0.140213013986381, 0.0715193770736051, 0.0577712690042106, 0.0683401947985261, 0.108376253996364, 0.152367731004308, 0.0518419050566429, 0.0967039660836575, 0.105592416917608, 0.0973541547573791, 0.138870760371045, 0.0352852130493688, 0.0962050495562246, 0.06919065466693, 0.118971425682342, 0.125290547146885, 0.0476957499005439, 0.103861205171753, 0.0933401784102089, 0.143371913789607, 0.135955623027448)),
.Names = c("X", "Genotype", "Time", "Bgrnd_All", "N", "Length", "sd", "se", "ci"), class = "data.frame", row.names = c(NA, -40L))
As stated by #juba in the comments, you should use scale_colour_manual instead of scale_fill_manual. Moreover, you are trying to plot to many lines and errorbars in one plot. They overlap each other to much and it is therefore hard to distuinguish between the lines/errorbars.
An example with the use of facetting (and some simplification of your code):
ggplot(summary.200.exp2, aes(x=Time, y=Length, group=Genotype)) +
geom_line(aes(colour=Bgrnd_All), size =1) +
geom_errorbar(aes(ymin=Length-se, ymax=Length+se, colour=Bgrnd_All), width=2) +
scale_x_continuous("Time", breaks=c(0,17,22,41,89)) +
scale_colour_manual(values=c(Avalon="#000066",Av_A="#663399",Av_B="#339999",Cadenza="#CC0033",Cad_A="#FF6600",Cad_B="#FF9933"))+
ylab("leaf segment width (mm)") +
theme_bw() +
theme(axis.title = element_text(size=14,face="bold"), axis.text = element_text(size=10)) +
facet_wrap(~Bgrnd_All, ncol=3)
this gives:

Resources