Cannot plot the correct x-axis in ggplot2 - r

I am plotting the following data using ggplot2 in R.
dat<-structure(list(Month = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L), grp1 = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("(-Inf,2]", "(2,7]", "(7,14]",
"(14, Inf]"), class = "factor"), n = c(71L, 59L, 36L, 10L, 55L,
73L, 18L, 10L, 97L, 82L, 22L, 5L, 120L, 79L, 15L, 2L, 140L, 62L,
15L, 174L, 60L, 11L, 188L, 71L, 2L, 183L, 53L, 2L, 211L, 50L,
2L, 171L, 69L, 7L, 1L, 98L, 85L, 13L, 6L, 72L, 62L, 24L, 9L)), class
= "data.frame", row.names = c(NA,-43L))
Here's my script:
library(ggplot2)
p<-ggplot(data=test,aes(Month, n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
Here's the output:
Why is it that I cannot plot the x-axis values?
If I don't set the scale_x_discrete, the plot will look like this:
Any ideas on how to solve this?
I'll appreciate any help.

If you want the Month name along the xaxis, then you can add in as.factor(Month) to your ggplot script. Heres an example:-
p<-ggplot(data=dat,aes(as.factor(Month), n, fill = grp1))
p<- p + geom_col()
p <- p + theme(panel.background=element_rect(fill="white"),
plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
panel.border=element_rect(colour="black",fill=NA,size=1),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
axis.text=element_text(size=20,colour="black",family="sans"),
axis.title=element_text(size=20,colour="black",family="sans"),
legend.position = "right", legend.key = element_rect(fill = 'white'))
p <- p + scale_y_continuous(limits = c(0,300),breaks=c(seq(0,300,50)), expand=c(0,0))
p <- p + scale_x_discrete(breaks=c(seq(1,12,1)),labels=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"),expand=c(0,0))
p <- p + labs(x = "Month", y = "Number of Days")
p
Which gives you this:-

Related

Linking values of stacked barplot with a table in R Shiny

I have created a stacked barplot in the shiny app in R:
library(shiny)
library(ggplot2)
ui = shinyUI(fluidPage(
titlePanel("Competency"),
fluidRow(
column(6,
plotOutput("Competency.Name", click = "plot1_click")
),
column(5,
br(), br(), br(),
htmlOutput("x_value"),
verbatimTextOutput("selected_rows"))),
))
server <- function(input, output) {
report <- structure(list(Competency.Official.Rating = structure(c(1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L
), .Label = c("0", "1", "100", "2", "3"), class = "factor"),
Competency.Name = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L), .Label = c("Agile",
"Co-creating the future", "Collaboration", "Entrepreneurship",
"Feedback", "Impact", "One company", "One voice", "Responsibility",
"Simplification"), class = "factor"), Freq = c(2L, 9L, 308L,
221L, 95L, 7L, 76L, 310L, 191L, 51L, 2L, 12L, 308L, 193L,
120L, 2L, 43L, 310L, 220L, 60L, 2L, 49L, 311L, 211L, 62L,
3L, 58L, 310L, 208L, 56L, 4L, 22L, 312L, 182L, 115L, 3L,
11L, 310L, 196L, 115L, 2L, 9L, 309L, 161L, 154L, 3L, 38L,
309L, 226L, 59L)), class = "data.frame", row.names = c(NA,
-50L))
output$Competency.Name <- renderPlot({
ggplot(report, aes(x = Competency.Name, y = Freq, fill = Competency.Official.Rating, label = Freq)) +
geom_bar(stat = "identity") + # position = fill will give the %; stack will give #of people
geom_text(size = 3, position = position_stack(vjust = 0.5))
})
# Print the name of the x value
output$x_value <- renderText({
if (is.null(input$plot1_click$x)) return("")
else {
lvls <- levels(report$Competency.Name)
name <- lvls[round(input$plot1_click$x)]
HTML("You've selected <code>", name, "</code>",
"<br><br>Here are the first 10 rows that ",
"match that category:")
}
})
# Print the rows of the data frame which match the x value
output$selected_rows <- renderPrint({
if (is.null(input$plot1_click$x)) return()
else {
keeprows <- round(input$plot1_click$x) == as.numeric(report$Competency.Name)
head(report[keeprows, ], 10)
}
})
}
shinyApp(ui, server)
In the app, when I select a column on my barplot it shows the table for the whole bar (the fact that it is a stacked barplot with different values is not taken in account by my code). In the table I would like to see values only for a selected stack. I know in this example it does not male sense but I have a bigger table with more variables and I could use this modification there.
Thank you!
You need to calculate the cumulative sum of your input and then you can compare it to input$plot1_click$y like this:
library(shiny)
library(ggplot2)
library(dplyr)
report <- structure(
list(Competency.Official.Rating =
structure(c(1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L
), .Label = c("0", "1", "100", "2", "3"), class = "factor"),
Competency.Name =
structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L),
.Label =
c("Agile",
"Co-creating the future", "Collaboration", "Entrepreneurship",
"Feedback", "Impact", "One company", "One voice",
"Responsibility", "Simplification"), class = "factor"),
Freq = c(2L, 9L, 308L,
221L, 95L, 7L, 76L, 310L, 191L, 51L, 2L, 12L, 308L, 193L,
120L, 2L, 43L, 310L, 220L, 60L, 2L, 49L, 311L, 211L, 62L,
3L, 58L, 310L, 208L, 56L, 4L, 22L, 312L, 182L, 115L, 3L,
11L, 310L, 196L, 115L, 2L, 9L, 309L, 161L, 154L, 3L, 38L,
309L, 226L, 59L)), class = "data.frame",
row.names = c(NA,
-50L))
report_stats <- report %>%
arrange(Competency.Name, desc(Competency.Official.Rating)) %>%
group_by(Competency.Name) %>%
mutate(cumsum = cumsum(Freq))
ui = shinyUI(fluidPage(
titlePanel("Competency"),
fluidRow(
column(6,
plotOutput("Competency.Name", click = "plot1_click")
),
column(5,
br(), br(), br(),
htmlOutput("x_value"),
verbatimTextOutput("selected_rows"))),
))
server <- function(input, output) {
x_val <- reactive({
x <- req(input$plot1_click$x)
lvls <- levels(report$Competency.Name)
lvls[round(input$plot1_click$x)]
})
y_val <- reactive({
x <- req(x_val())
y <- req(input$plot1_click$y)
report_stats %>%
filter(Competency.Name == x,
y <= cumsum) %>%
slice(1L) %>%
pull(Competency.Official.Rating)
})
output$Competency.Name <- renderPlot({
ggplot(report, aes(x = Competency.Name, y = Freq,
fill = Competency.Official.Rating, label = Freq)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5))
})
# Print the name of the x value
output$x_value <- renderText({
HTML("You've selected <code>", req(x_val()), "</code>",
"<br><br>Here are the first 10 rows that ",
"match that category:")
})
# Print the rows of the data frame which match the x value
output$selected_rows <- renderPrint({
x <- req(x_val())
y <- req(y_val())
head(report[report$Competency.Name == x & report$Competency.Official.Rating == y, ], 10)
})
}
shinyApp(ui, server)

Change x axis labels to hours (time) on geom_tile()

Here is a geom_tile displaying hours and days of the week, how can it made to display each hour (i.e. 00:00 through to 23:00 on the x axis)?
library(tidyverse)
df %>%
ggplot(aes(hour, day, fill = value)) +
geom_tile(colour = "ivory")
Currently it displays every fifth hour:
I have tried a bunch of different things, and would prefer a 'best practice' way (i.e. without manually generating labels), but in case labels are needed, here's one way to produce them hour_labs <- 0:23 %>% { ifelse(nchar(.) == 1, paste0("0", .), .) } %>% paste0(., ":00")
Data for reproducible example
df <- structure(list(day = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), hour = c(0L, 2L, 3L, 5L,
6L, 7L, 8L, 10L, 11L, 12L, 13L, 18L, 21L, 22L, 23L, 0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 20L, 21L, 22L,
23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 13L, 14L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 20L, 21L, 22L, 23L, 0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 13L, 14L, 15L, 16L,
19L, 21L, 0L, 1L, 2L, 3L, 7L, 8L, 10L, 13L, 14L, 22L, 23L), value = c(1L,
1L, 1L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 13L, 13L, 24L,
39L, 21L, 17L, 25L, 22L, 27L, 28L, 19L, 6L, 2L, 2L, 1L, 2L, 2L,
7L, 23L, 38L, 18L, 26L, 21L, 20L, 31L, 40L, 35L, 22L, 5L, 3L,
2L, 7L, 4L, 3L, 3L, 3L, 17L, 13L, 23L, 24L, 19L, 31L, 13L, 35L,
50L, 22L, 13L, 7L, 2L, 1L, 1L, 1L, 1L, 3L, 14L, 17L, 33L, 32L,
32L, 25L, 29L, 27L, 38L, 26L, 11L, 8L, 4L, 5L, 5L, 3L, 1L, 1L,
3L, 14L, 21L, 24L, 22L, 25L, 26L, 23L, 58L, 36L, 26L, 6L, 3L,
1L, 5L, 3L, 1L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L)), row.names = c(NA, -116L), groups = structure(list(day = structure(1:7, .Label = c("Sunday",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
), class = c("ordered", "factor")), .rows = structure(list(1:15,
16:33, 34:51, 52:69, 70:88, 89:105, 106:116), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, 7L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
Here's one way using sprintf to construct labels.
library(dplyr)
library(ggplot2)
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
To complete the missing times apart from #Eric Watt's suggestion we can also use complete.
df %>%
mutate(lab = sprintf('%02d:00', hour)) %>%
tidyr::complete(lab = sprintf('%02d:00', 0:23)) %>%
ggplot() + aes(lab, day, fill = value) +
geom_tile(colour = "ivory") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
I would suggest making sure your data type is correctly representing your data. If your hour column is representing time in hours, then it should be a time based structure. For example:
df$hour <- as.POSIXct(as.character(df$hour), format = "%H", tz = "UTC")
Then you can tell ggplot that the x axis is a datetime variable using scale_x_datetime.
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
If you want a break for every hour, you can input that as breaks:
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = as.POSIXct(as.character(0:23), format = "%H", tz = "UTC"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
You can also use the scales package which has handy formatting options such as date_breaks:
library(scales)
ggplot(df, aes(hour, day, fill = value)) +
geom_tile(colour = "ivory") +
scale_x_datetime(breaks = date_breaks("1 hour"),
labels = date_format("%H:%M")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Remove shape from legend of combined geom_line() and geom_pont()

I want to create a graph of geom_line() coloured by a variable (Var1) then plot geom_point() with shapes according to a different variable (Var2) with the same colours as geom_line().
After reading a lot about this but not being able to find anything that I could interpret as being the same issue I have attempted the following:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, colour = Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
Which results in the above. The issue with this graph is that the second legend has both IDs are circles when one is a circle and one is a triangle. I would ideally like it to just be a coloured line with no shapes at all.
I've also tried this:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
This issue with this graph is that the shapes are not filled in by colour in the graph.
This is my data.
dput(data)
structure(list(X = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L), Y = c(1L, 1L, 1L, 2L, 4L, 13L, 18L, 19L,
21L, 24L, 34L, 43L, 70L, 90L, 129L, 169L, 1L, 3L, 3L, 3L, 3L,
4L, 21L, 79L, 157L, 229L, 323L, 470L, 655L, 889L, 1128L, 1701L,
2036L, 2502L, 3089L, 3858L, 4636L, 5883L, 7375L, 9172L, 10149L
), Var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("",
"ID1", "ID2"), class = "factor"), Var2 = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 2L, 2L), .Label = c("", "0", "Point1", "Point2"
), class = "factor")), row.names = c(NA, -41L), class = "data.frame")
How about this
ggplot(data, aes(X, Y))+
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, color=Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()+
guides(colour = guide_legend(override.aes = list(shape = NA)))

Bars in geom_bar have unwanted different widths when using facet_wrap

I can'd find a solution for the following problem(s). I would appreciate some help a lot!
The following code produces bar charts using facet. However, due to "extra space" ggplot2 has in some groups it makes the bars much wider, even if I specify a width of 0.1 or similar. I find that very annoying since it makes it look very unprofessional. I want all the bars to look the same (except for the fill). I hope somebody can tell me how to fix this.
Secondly, how can I reorder the different classes in the facet windows so that the order is always C1, C2 ... C5, M, F, All where applicable. I tried it with ordering the levels of the factor, but since not all classes are present in every graph part it did not work, or at least I assume that was the reason.
Thirdly, how can I reduce the space between the bars? So that the whole graph is more compressed. Even if I make the image smaller for exporting, R will scale the bars smaller but the spaces between the bars are still huge.
I would appreciate feedback for any of those answers!
My Data:
http://pastebin.com/embed_iframe.php?i=kNVnmcR1
My Code:
library(dplyr)
library(gdata)
library(ggplot2)
library(directlabels)
library(scales)
all<-read.xls('all_auto_visual_c.xls')
all$station<-as.factor(all$station)
#all$group.new<-factor(all$group, levels=c('C. hyperboreus','C. glacialis','Special Calanus','M. longa','Pseudocalanus sp.','Copepoda'))
allp <- ggplot(data = all, aes(x=shortname2, y=perc_correct, group=group,fill=sample_size)) +
geom_bar(aes(fill=sample_size),stat="identity", position="dodge", width=0.1, colour="NA") + scale_fill_gradient("Sample size (n)",low="lightblue",high="navyblue")+
facet_wrap(group~station,ncol=2,scales="free_x")+
xlab("Species and stages") + ylab("Automatic identification and visual validation concur (%)") +
ggtitle("Visual validation of predictions") +
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1), axis.text.x = element_text(colour="grey20",size=12,angle=0,hjust=.5,vjust=.5,face="bold"), axis.text.y = element_text(colour="grey20",size=12,angle=0,hjust=1,vjust=0,face="bold"), axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=0,face="bold"), axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="bold"),legend.position="none", strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0), strip.text.y = element_text(size = 12, face="bold", colour = "black"))
allp
#ggsave(allp, file="auto_visual_stackover.jpeg", height= 11, width= 8.5, dpi= 400,)
The current graph that needs some fixing:
Thanks a lot!
Here what I did after suggestion from Gregor. Using geom_segment and geom_point makes a nice graph as I think.
library(ggplot2)
all<-read.xls('all_auto_visual_c.xls')
all$station<-as.factor(all$station)
all$group.new<-factor(all$group, levels=c('C. hyperboreus','C. glacialis','Combined','M. longa','Pseudocalanus sp.','Copepoda'))
all$shortname2.new<-factor(all$shortname2, levels=c('All','F','M','C5','C4','C3','C2','C1','Micro', 'Oith','Tric','Cegg','Cnaup','C3&2','C2&1'))
allp<-ggplot(all, aes(x=perc_correct, y=shortname2.new)) +
geom_segment(aes(yend=shortname2.new), xend=0, colour="grey50") +
geom_point(size=4, aes(colour=sample_size)) +
scale_colour_gradient("Sample size (n)",low="lightblue",high="navyblue") +
geom_text(aes(label = perc_correct, hjust = -0.5)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(group.new~station,scales="free_y",space="free") +
xlab("Automatic identification and visual validation concur (%)") + ylab("Species and stages")+
ggtitle("Visual validation of predictions")+
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1), axis.text.x = element_text(colour="grey20",size=12,angle=0,hjust=.5,vjust=.5,face="bold"), axis.text.y = element_text(colour="grey20",size=12,angle=0,hjust=1,vjust=0,face="bold"), axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=0,face="bold"), axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="bold"),legend.position="none", strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0), strip.text.y = element_text(size = 8, face="bold", colour = "black"))
allp
ggsave(allp, file="auto_visual_no_label.jpeg", height= 11, width= 8.5, dpi= 400,)
This is what it produces!
Assuming the bar widths are inversely proportional to the number of x-breaks, an appropriate scaling factor can be entered as a width aesthetic to control the width of the bars. But first, calculate the number of x-breaks in each panel, calculate the scaling factor, and put them back into the "all" data frame.
Updating to ggplot2 2.0.0 Each column mentioned in facet_wrap gets its own line in the strip. In the edit, a new label variable is setup in the dataframe so that the strip label remains on one line.
library(ggplot2)
library(plyr)
all = structure(list(station = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Station 101",
"Station 126"), class = "factor"), shortname2 = structure(c(2L,
7L, 8L, 11L, 1L, 5L, 7L, 8L, 11L, 1L, 2L, 3L, 5L, 7L, 8L, 12L,
11L, 1L, 6L, 8L, 15L, 14L, 9L, 10L, 4L, 6L, 2L, 7L, 8L, 11L,
1L, 5L, 7L, 8L, 11L, 1L, 2L, 3L, 5L, 7L, 8L, 12L, 11L, 1L, 8L,
11L, 1L, 15L, 14L, 13L, 9L, 10L), .Label = c("All", "C1", "C2",
"C2&1", "C3", "C3&2", "C4", "C5", "Cegg", "Cnaup", "F", "M",
"Micro", "Oith", "Tric"), class = "factor"), color = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 21L, 26L, 30L, 31L, 33L, 34L, 20L, 21L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 26L, 28L, 29L, 30L, 31L, 32L, 33L, 34L), group = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 6L, 5L, 3L, 3L, 3L, 3L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 3L, 3L,
3L, 3L, 3L), .Label = c("cgla", "Chyp", "Cope", "mlong", "pseudo",
"specC"), class = "factor"), sample_size = c(11L, 37L, 55L, 16L,
119L, 21L, 55L, 42L, 40L, 158L, 24L, 16L, 17L, 27L, 14L, 45L,
98L, 241L, 30L, 34L, 51L, 22L, 14L, 47L, 13L, 41L, 24L, 41L,
74L, 20L, 159L, 18L, 100L, 32L, 29L, 184L, 31L, 17L, 27L, 23L,
21L, 17L, 49L, 185L, 30L, 16L, 46L, 57L, 16L, 12L, 30L, 42L),
perc_correct = c(91L, 78L, 89L, 81L, 85L, 90L, 91L, 93L,
80L, 89L, 75L, 75L, 76L, 81L, 86L, 76L, 79L, 78L, 90L, 97L,
75L, 86L, 93L, 74L, 85L, 88L, 88L, 90L, 92L, 90L, 91L, 89L,
89L, 91L, 90L, 89L, 81L, 88L, 74L, 78L, 90L, 82L, 84L, 82L,
90L, 94L, 91L, 81L, 69L, 83L, 90L, 81L)), class = "data.frame", row.names = c(NA,
-52L))
all$station <- as.factor(all$station)
# Calculate scaling factor and insert into data frame
library(plyr)
N = ddply(all, .(station, group), function(x) length(row.names(x)))
N$Fac = N$V1 / max(N$V1)
all = merge(all, N[,-3], by = c("station", "group"))
all$label = paste(all$group, all$station, sep = ", ")
allp <- ggplot(data = all, aes(x=shortname2, y=perc_correct, group=group, fill=sample_size, width = .5*Fac)) +
geom_bar(stat="identity", position="dodge", colour="NA") +
scale_fill_gradient("Sample size (n)",low="lightblue",high="navyblue")+
facet_wrap(~label,ncol=2,scales="free_x") +
xlab("Species and stages") + ylab("Automatic identification and visual validation concur (%)") +
ggtitle("Visual validation of predictions") +
theme_bw() +
theme(plot.title = element_text(lineheight=.8, face="bold", size=20,vjust=1),
axis.text.x = element_text(colour="grey20",size=12,angle=0,hjust=.5,vjust=.5,face="bold"),
axis.text.y = element_text(colour="grey20",size=12,angle=0,hjust=1,vjust=0,face="bold"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=0,face="bold"),
axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="bold"),
legend.position="none",
strip.text.x = element_text(size = 12, face="bold", colour = "black", angle = 0),
strip.text.y = element_text(size = 12, face="bold", colour = "black"))
allp

add secondary grouping label to x axis in ggplot2

I would like to create a stacked bar graph that contains two levels of x-axis labels. For each stacked bar there is the primary label (dat$HUC_12_NAM), then I would like to group these stacked bars by dat$HUC_10_NAM and label this group as well. I could likely use annotate to manually define and place the labels, but that would be very time consuming, clunky, and could easily result in mis-labeling.
Here is the data....
dat <- structure(list(HUC_12_NAM = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Apostle Islands",
"Raspberry River-Frontal Lake Superior", "Sand River", "Saxine Creek-Frontal Lake Superior"
), class = "factor"), HUC_10_NAM = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Chequamegon Bay-Frontal Lake Superior",
"Sand River-Frontal Lake Superior"), class = "factor"), variable = structure(c(9L,
8L, 4L, 1L, 6L, 11L, 14L, 13L, 10L, 7L, NA, 5L, 15L, 3L, 2L,
12L, 8L, 6L, 3L, 2L, 4L, 1L, 15L, 5L, 11L, 14L, 10L, 9L, 13L,
7L, 12L, NA, 12L, 4L, 10L, 8L, 3L, NA, 2L, 6L, 1L, 13L, 7L, 11L,
9L, 14L, 5L, 15L, 9L, 1L, 8L, 12L, 10L, 4L, 3L, 11L, NA, 7L,
15L, 13L, 14L, 6L, 5L, 2L), .Label = c("Agriculture", "Barren land",
"Developed - High intensity", "Developed - Medium intensity",
"Developed - Low intensity", "Developed - Open space", "Evergreen forest",
"Deciduous forest", "Mixed forest", "Herbaceous", "Pasture",
"Shrub", "Woody wetland", "Herbaceous wetland", "Water"), class = "factor"),
perc_veg = c(11.8839579283911, 57.2626205743974, 0.00544969027593598,
0.514995731075951, 2.59586913477084, 2.53864738687351, 0.108085523806064,
5.3007320750604, 0.731166778688078, 6.04007338916238, 0,
0.0953695798288797, 0.11807662264528, 0, 0.00363312685062399,
12.8013224581736, 58.9563880536275, 4.47423752571726, 0.0158260043860641,
0.101738599624698, 0.0633040175442563, 0.180868621555018,
1.07390744048292, 0.300694083335217, 2.65876873685876, 0.00226085776943772,
0.065564875313694, 15.484614862879, 2.68363817232258, 7.99665393050123,
5.94153421808234, 0, 2.79708137828397, 0.0260443580892536,
0.0078546476777114, 30.3801236073503, 0.028524773145373,
0, 0.470038653134625, 1.99838773021352, 0.0355526158043779,
4.43084809524794, 23.6515843651171, 0.169081626325472, 32.6501167862089,
0.595713015978007, 0.174455858947064, 2.5845924884764, 23.2366527830367,
0.25141991669822, 52.6482393032942, 3.73494888299886, 0.136312003029156,
0.00605831124574025, 0, 1.85535781900795, 0, 11.0851950018932,
0.427110942824688, 2.85800833017796, 0, 3.54714123438092,
0.146914047709201, 0.0666414237031428)), .Names = c("HUC_12_NAM",
"HUC_10_NAM", "variable", "perc_veg"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L,
91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L), class = "data.frame")
And here is the current stacked bar plot...
library(ggplot2)
p <- ggplot () + geom_bar(data=dat,aes(x=HUC_12_NAM,y=perc_veg,fill=variable),stat='identity')
p <- p + coord_flip() #this helps fit the xlabel
p
And the resulting plot...
The next label, or grouping, would be from dat$HUC_10_NAM and in this example would add two additional labels, 'Sand River-Frontal Lake Superior' and 'Chequamegon Bay-Frontal Lake Superior'.
Maybe this would just be too cluttered...especially with the long names. But, I would like to see if there is a way to add these second level labels quickly and easily.
Thanks
-cherrytree
If you're willing to facet instead of adding a second row of labels, then you can do this:
ggplot(data=dat, aes(x=HUC_12_NAM, y=perc_veg, fill=variable)) +
geom_bar(stat='identity') +
facet_grid(. ~ HUC_10_NAM, scales="free")
Incidentally, you can reformat the longer labels with a line-break, for example:
dat[,1:2] = lapply(1:2, function(x) gsub("-","\n", dat[,x]))

Resources