I'm using facet_grid() to display some data, and I have facet labels that span multiple lines of text (they contain the "\n" character).
require(ggplot2)
#Generate example data
set.seed(3)
df = data.frame(facet_label_text = rep(c("Label A",
"Label B\nvery long label",
"Label C\nshort",
"Label D"),
each = 5),
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
#Plot test data
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0))
So by using the hjust = 0 argument, I can left-align facet label text as a unit.
What I would like to do is left-align each individual line of text. So "Label B" and "very long label" are both aligned along the left side, rather than centered relative to each other (ditto for "Label C" and "short"). Is this possible in ggplot2?
This is fairly straightforward using grid's grid.gedit function to edit the strips.
library(ggplot2) # v2.1.0
library(grid)
# Your data
set.seed(3)
df = data.frame(facet_label_text = rep(c("Label A",
"Label B\nvery long label",
"Label C\nshort",
"Label D"),
each = 5),
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
# Your plot
p = ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0))
p
# Get a list of grobs in the plot
grid.ls(grid.force())
# It looks like we need the GRID.text grobs.
# But some care is needed:
# There are GRID.text grobs that are children of the strips;
# but also there are GRID.text grobs that are children of the axes.
# Therefore, a gPath should be set up
# to get to the GRID.text grobs in the strips
# The edit
grid.gedit(gPath("GRID.stripGrob", "GRID.text"),
just = "left", x = unit(0, "npc"))
Or, a few more lines of code to work with a grob object (in place of editing on screen as above):
# Get the ggplot grob
gp = ggplotGrob(p)
grid.ls(grid.force(gp))
# Edit the grob
gp = editGrob(grid.force(gp), gPath("GRID.stripGrob", "GRID.text"), grep = TRUE, global = TRUE,
just = "left", x = unit(0, "npc"))
# Draw it
grid.newpage()
grid.draw(gp)
Until someone comes along with a real solution, here's a hack: Add space in the labels to get the justification you want.
require(ggplot2)
#Generate example data
set.seed(3)
df = data.frame(facet_label_text = rep(c("Label A",
"Label B \nvery long label",
"Label C\nshort ",
"Label D"),
each = 5),
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
#Plot test data
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0))
There may be a cleaner way to do this but I didn't find a way to do this within ggplot2. The padwrap function could be more generalized as it basically does just what you requested. To get the justification right, I had to use a mono-spaced font.
# Wrap text with embedded newlines: space padded and lef justified.
# There may be a cleaner way to do this but this works on the one
# example. If using for ggplot2 plots, make the font `family`
# a monospaced font (e.g. 'Courier')
padwrap <- function(x) {
# Operates on one string
padwrap_str <- function(s) {
sres <- strsplit(s, "\n")
max_len <- max(nchar(sres[[1]]))
paste( sprintf(paste0('%-', max_len, 's'), sres[[1]]), collapse = "\n" )
}
# Applys 'padwrap' to a vector of strings
unlist(lapply(x, padwrap_str))
}
require(ggplot2)
facet_label_text = rep(c("Label A",
"Label B\nvery long label",
"Label C\nshort",
"Label D"), 5)
new_facet_label_text <- padwrap(facet_label_text)
#Generate example data
set.seed(3)
df = data.frame(facet_label_text = new_facet_label_text,
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
#Plot test data
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0, family = 'Courier'))
The strip text is left justified in the image below
Related
I have to produce a scatter plot with many points.
I am already using the package "ggrepel" in order to avoid overlapping, but it sometimes still doesnt work. Is there a possibility to insert a linebreak into the labels (e.g. after a certain length)?
Thanks for help!
items <- c("A long description of the item",
"Another very long text descrbing the item",
"And finally another one ",
"This text exceeds the available space by far",
"Incredibly long text",
"Here we go with another one",
"A linebreak would help here",
"This has at least 20 characters")
items <- rep(items, 4)
df <- data.frame(
descs = items,
x = rnorm(n = length(items), mean = 2, sd = 2),
y = rnorm(n = length(items), mean = 2, sd = 2),
cat = as.factor(runif(length(items), min = 1, max = 6))
)
library(ggplot2)
library(tidyverse)
library(ggrepel)
df %>% ggplot(aes(x = x, y = y, color = cat)) + geom_point() +
#geom_text(aes(label = descs)) +
geom_text_repel(aes(label = descs)) +
theme_light() +
theme(legend.position="none")
rm(items)
rm(df)
You could use stringr::str_wrap to achieve line breaks at an appropriate point. For example, to limit lines to 20 characters, you can do:
df %>%
ggplot(aes(x = x, y = y, color = cat)) + geom_point() +
geom_text_repel(aes(label = stringr::str_wrap(descs, 20))) +
theme_light() +
theme(legend.position = "none")
I'm currently in the process of creating a heatmap with plotly. Below is the sample dataset:
library(tidyverse)
library(plotly)
library(hrbrthemes)
set.seed(9999)
df <- data.frame(group.int = rep(c(rep("Prevention", 3), "Diagnosis", rep("Intervention", 2)), 6),
int = rep(c("Prevention 1", "Prevention 2", "Prevention 3", "Diagnosis 1", "Intervention 1", "Intervention 2"), 6),
group.outcome = c(rep("Efficacy", 12), rep("Safety", 18), rep("Cost-effectiveness", 6)),
outcome = c(rep("Efficacy 1", 6), rep("Efficacy 2", 6), rep("Safety 1", 6), rep("Safety 2", 6), rep("Safety 3", 6), rep("Cost-effectiveness 1", 6)),
n = sample(50:250, 36, rep = TRUE)
)
df$group.int <- factor(df$group.int, levels = c("Prevention", "Diagnosis", "Intervention"))
df$group.outcome <- factor(df$group.outcome, levels = c("Efficacy", "Safety", "Cost-effectiveness"))
I want to make a heatmap based on the variable outcome against int, with n as the fill of each heatmap cell. Here is the desired plot:
I tried using ggplotly from the created ggplot:
plotly.df <- ggplot(df,
aes(x = int, y = outcome, fill= n)) +
geom_tile() +
scale_fill_gradient(low="white", high="darkred") +
scale_y_discrete(position = "right") +
facet_grid(group.outcome ~ group.int,
scales = "free", space = "free", switch = "x") +
theme_bw() +
theme(axis.ticks = element_blank(),
legend.position = "left",
strip.placement = "outside",
strip.background = element_blank())
ggplotly(plotly.df)
However, ggplotly seems to ignore space = "free" in facet_grid, so the size of the cells are not proportional:
Is there a way to adjust facet widths with ggplotly?
Thank you very much in advance
You don't have to reinvent the wheel. Go back to the first ggplotly object. Domain is what plotly uses to govern the spaces each facet (or as it is in plotly-subplot). You can retrieve this information by assigning the ggplotly graph to an object and calling plotly_json.
However, I've worked around layout shortcuts before. You can retrieve and modify the domains like this:
p = ggplotly(plotly.df)
p$x$layout$xaxis$domain <- c(0, 1/2) # 6 blocks, 3 in this group 1/6 * 3
p$x$layout$xaxis2$domain <- c(1/2, 2/3) # start at previous position, 1 in this group
p$x$layout$xaxis3$domain <- c(2/3, 1) # remaining space
p$x$layout$yaxis3$domain <- c(0, 1/6) # 1 block in bottom chunks
p$x$layout$yaxis2$domain <- c(1/6, 2/3) # 3 in mid group
p$x$layout$yaxis$domain <- c(2/3, 1) # remaining space
p
That got me this far:
Your bottom labels are still aligned, but the top is not. Additionally, the left bottom label is cut off.
To fix the top labels I used plotly_json to figure out where they were at then used the guess-and-check method. To adjust for labels, I modified the margin.
# prevention
p$x$layout$annotations[[3]]$x <- 1/4
# diagnosis
p$x$layout$annotations[[4]]$x <- 7/12
p %>% layout(margin = list(t = 40, r = 50, b = 80, l = 130))
Update based on comments
Consider the following as a replacement for everything that follows p = ggplotly(plotly.df) (So you won't use anything about this, but you'll see that the code above is still here.)
The facets
#------------- position and spacing facets -------------
p$x$layout$xaxis$domain <- c(0, 1/2) # 6 blocks, 3 in this group 1/6 * 3
p$x$layout$xaxis2$domain <- c(1/2, 2/3) # 1 in this group
p$x$layout$xaxis3$domain <- c(2/3, 1) # remaining space
p$x$layout$yaxis3$domain <- c(0, 1/6) # 1 block in bottom chunks
p$x$layout$yaxis2$domain <- c(1/6, 2/3) # 3 in mid group
p$x$layout$yaxis$domain <- c(2/3, 1) # remaining space
The labels
#------------- position and spacing labels -------------
# prevention
p$x$layout$annotations[[3]]$x <- 1/4
# diagnosis
p$x$layout$annotations[[4]]$x <- 7/12
# bottom group labels: prevention, diagnosis, intervention/ adjust down
lapply(3:5, function(i){
p$x$layout$annotations[[i]]$y <<- -0.1575
})
# efficacy, safety and cost effectiveness/ shift right
lapply(6:8, function(i){
p$x$layout$annotations[[i]]$x <<- 1.25
p$x$layout$annotations[[i]]$yanchor <<- "top"
})
# int
p$x$layout$annotations[[1]]$y <- -0.07
# outcome
p$x$layout$annotations[[2]]$x <- 1.475
p$x$layout$annotations[[2]]$textangle <- 90 # 180 degree flip
The legend
#------------- position and spacing legend -------------
# capture the font sizes of the other annotations
tf <- p$x$layout$xaxis$tickfont
# change the font of the group labels
lapply(3:8, function(i){
p$x$layout$annotations[[i]]$font <<- tf
})
# update the ticks to represent the values of n, not the scale
getCol <- data.frame(p$x$data[[10]]$marker$colorscale) # capture the scale
getCol$n <- seq(from = 50, to = 208, along.with = 1:300) %>% round(digits = 0)
summary(getCol)
(getVals <- filter(getCol, n %in% seq(50, 200, by = 50)))
# X1 X2 n
# 1 0.0000000 #FFFFFF 50
# 2 0.3143813 #E5B4A8 100
# 3 0.3177258 #E5B3A7 100
# 4 0.6321070 #C16B57 150
# 5 0.6354515 #C06A56 150
# 6 0.9464883 #941B0E 200
# 7 0.9498328 #931A0E 200
# the legend
p$x$data[[10]]$marker$colorbar <- list(x = -.2, tickfont = tf,
tickmode = "array",
ticktext = seq(50, 200, by = 50),
# from getVals output
tickvals = c(0, .318, .636, .95),
outlinewidth = 0,
thickness = 20)
and finally...
# legend and yaxis labels; the final plot
p %>% layout(margin = list(t = 10, r = 170, b = 120, l = 10),
yaxis = list(side = "right", anchor = "free", position = 1),
yaxis2 = list(side = "right", anchor = "free", position = 1),
yaxis3 = list(side = "right", anchor = "free", position = 1))
I am trying to highlight selected points based on their order statistics in a ggplot stat_qq output:
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ggplot(ydata, aes(sample=x)) + stat_qq() + facet_wrap(~sample) + scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) + theme_bw(base_size = 14, base_family = "sans") + labs(title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",
x = "Standard Deviation\n[%exceeding]") +
geom_text(data = ydata[ydata$order %in% c(2,16,50,84,98),], aes(x=qnorm(pnorm(x)), y=x, label = order), nudge_y = 1)
Which produced this:
Obviously my text notation is not highlighting the right points (the 2, 16, 50 84, 98th points). I wish I could also highlight the actual points in red. Would appreciate any suggestions.
You could calculate the qq values outside of ggplot and create a separate column to group the qq values into highlighted and not highlighted. Then you could plot them using geom_point with the grouping variable as a colour aesthetic. For example:
library(tidyverse)
# Generate data reproducibly
set.seed(2)
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
# Quantile indices to highlight
pts = c(2,16,50,84,98)
# Add qq values and grouping column to data frame and pipe into ggplot
# Use split and map to calculate the qq values separately for each Sample
split(ydata, ydata$sample) %>%
map_df(~ .x %>% mutate(xq = qqnorm(x, plot.it=FALSE)$x,
group = ifelse(order %in% pts, "A", "B"))) %>%
ggplot(aes(xq, x, colour=group)) +
geom_point(size=1) +
geom_text(aes(label=ifelse(group=="A", order, "")),
nudge_y=1, size=3) +
facet_wrap(~ sample) +
theme_bw(base_size = 14, base_family = "sans") +
scale_colour_manual(values=c("red", "black")) +
guides(colour=FALSE)
As an alternative, a quick hack would be to use ggplot_build to highlight specific points in your original plot (note though that something is not quite right with how you placed the labels relative to the highlighted points):
pts = rep(c(2,16,50,84,98), 4) + rep(seq(0,300,100), each=5)
# Assuming you've assigned your plot to the object p
pb = ggplot_build(p)
# Change point colors
pb$data[[1]][pts, "colour"] = "red"
# Change label colors
pb$data[[2]][["colour"]] = "red"
# Regenerate plot object
p = ggplot_gtable(pb)
plot(p)
You can apply stat="qq" to your geom_point and then use the colors assigned to new variable
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ydata$highlight = ifelse(ydata$order %in% c(2,16,50,84,98), "#FF0000", "#000000")
ydata$order_txt = ifelse(ydata$order %in% c(2,16,50,84,98), ydata$order, "")
ggplot(ydata, aes(sample=x)) +
geom_point(color=ydata$highlight, stat="qq") +
geom_text(label=ydata$order_txt, stat="qq", nudge_y=1) +
facet_wrap(~sample) +
scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) +
theme_bw(base_size = 14, base_family = "sans") +
labs(
title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",x = "Standard Deviation\n[%exceeding]")
I'm producing a whole pile of graphs of changing sizes. I want each graph to display a symbol (say, asterisk) at a specific point on the graph margin (top y-axis value), regardless of plot size. Right now I do it manually by defining x/y for each textGrob, but there has got to be a better way.
Plot size is determined by number of categories in the dataset (toy data below). Ideally, the output plots would have identical panel sizes (I'm assuming that can be controlled through defining margin sizes in inches and adding that value to the height parameter?). Widths don't usually change, but it would be nice to automate both x and y placements based on the defined device width (and plot margins).
Thanks so much!
library(ggplot2)
library(gridExtra)
set.seed(123)
df <- data.frame(x = rnorm(20, 0, 1), y = rnorm(20, 0, 1), category = rep(c("a", "b"), each = 10))
## plot 1
sub <- df[df$category == "a",]
height = 2*length(unique(sub$category))
p <- ggplot(sub) +
geom_point(aes(x = x, y = y)) +
facet_grid(category ~ .)
jpeg(filename = "fig1.jpg",
width = 6, height = height, units = "in", pointsize = 12, res = 900,
quality = 100)
g <- arrangeGrob(p, sub = textGrob("*", x = 0.07, y = 10.15, hjust = 0, vjust=0, #### puts the top discharge value; might need to be adjusted manually in following years
gp = gpar(fontsize = 15)))
grid.draw(g)
dev.off()
## plot 2
height = 2*length(unique(df$category))
p <- ggplot(df) +
geom_point(aes(x = x, y = y)) +
facet_grid(category ~ .)
jpeg(filename = "fig2.jpg",
width = 6, height = height, units = "in", pointsize = 12, res = 900,
quality = 100)
g <- arrangeGrob(p, sub = textGrob("*", x = 0.07, y = 23.1, hjust = 0, vjust=0, #### puts the top discharge value; might need to be adjusted manually in following years
gp = gpar(fontsize = 15)))
grid.draw(g)
dev.off()
From a data frame I want to plot a pie chart for five categories with their percentages as labels in the same graph in order from highest to lowest, going clockwise.
My code is:
League<-c("A","B","A","C","D","E","A","E","D","A","D")
data<-data.frame(League) # I have more variables
p<-ggplot(data,aes(x="",fill=League))
p<-p+geom_bar(width=1)
p<-p+coord_polar(theta="y")
p<-p+geom_text(data,aes(y=cumsum(sort(table(data)))-0.5*sort(table(data)),label=paste(as.character(round(sort(table(data))/sum(table(data)),2)),rep("%",5),sep="")))
p
I use
cumsum(sort(table(data)))-0.5*sort(table(data))
to place the label in the corresponding portion and
label=paste(as.character(round(sort(table(data))/sum(table(data)),2)),rep("%",5),sep="")
for the labels which is the percentages.
I get the following output:
Error: ggplot2 doesn't know how to deal with data of class uneval
I've preserved most of your code. I found this pretty easy to debug by leaving out the coord_polar... easier to see what's going on as a bar graph.
The main thing was to reorder the factor from highest to lowest to get the plotting order correct, then just playing with the label positions to get them right. I also simplified your code for the labels (you don't need the as.character or the rep, and paste0 is a shortcut for sep = "".)
League<-c("A","B","A","C","D","E","A","E","D","A","D")
data<-data.frame(League) # I have more variables
data$League <- reorder(data$League, X = data$League, FUN = function(x) -length(x))
at <- nrow(data) - as.numeric(cumsum(sort(table(data)))-0.5*sort(table(data)))
label=paste0(round(sort(table(data))/sum(table(data)),2) * 100,"%")
p <- ggplot(data,aes(x="", fill = League,fill=League)) +
geom_bar(width = 1) +
coord_polar(theta="y") +
annotate(geom = "text", y = at, x = 1, label = label)
p
The at calculation is finding the centers of the wedges. (It's easier to think of them as the centers of bars in a stacked bar plot, just run the above plot without the coord_polar line to see.) The at calculation can be broken out as follows:
table(data) is the number of rows in each group, and sort(table(data)) puts them in the order they'll be plotted. Taking the cumsum() of that gives us the edges of each bar when stacked on top of each other, and multiplying by 0.5 gives us the half the heights of each bar in the stack (or half the widths of the wedges of the pie).
as.numeric() simply ensures we have a numeric vector rather than an object of class table.
Subtracting the half-widths from the cumulative heights gives the centers each bar when stacked up. But ggplot will stack the bars with the biggest on the bottom, whereas all our sort()ing puts the smallest first, so we need to do nrow - everything because what we've actually calculate are the label positions relative to the top of the bar, not the bottom. (And, with the original disaggregated data, nrow() is the total number of rows hence the total height of the bar.)
Preface: I did not make pie charts of my own free will.
Here's a modification of the ggpie function that includes percentages:
library(ggplot2)
library(dplyr)
#
# df$main should contain observations of interest
# df$condition can optionally be used to facet wrap
#
# labels should be a character vector of same length as group_by(df, main) or
# group_by(df, condition, main) if facet wrapping
#
pie_chart <- function(df, main, labels = NULL, condition = NULL) {
# convert the data into percentages. group by conditional variable if needed
df <- group_by_(df, .dots = c(condition, main)) %>%
summarize(counts = n()) %>%
mutate(perc = counts / sum(counts)) %>%
arrange(desc(perc)) %>%
mutate(label_pos = cumsum(perc) - perc / 2,
perc_text = paste0(round(perc * 100), "%"))
# reorder the category factor levels to order the legend
df[[main]] <- factor(df[[main]], levels = unique(df[[main]]))
# if labels haven't been specified, use what's already there
if (is.null(labels)) labels <- as.character(df[[main]])
p <- ggplot(data = df, aes_string(x = factor(1), y = "perc", fill = main)) +
# make stacked bar chart with black border
geom_bar(stat = "identity", color = "black", width = 1) +
# add the percents to the interior of the chart
geom_text(aes(x = 1.25, y = label_pos, label = perc_text), size = 4) +
# add the category labels to the chart
# increase x / play with label strings if labels aren't pretty
geom_text(aes(x = 1.82, y = label_pos, label = labels), size = 4) +
# convert to polar coordinates
coord_polar(theta = "y") +
# formatting
scale_y_continuous(breaks = NULL) +
scale_fill_discrete(name = "", labels = unique(labels)) +
theme(text = element_text(size = 22),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank())
# facet wrap if that's happening
if (!is.null(condition)) p <- p + facet_wrap(condition)
return(p)
}
Example:
# sample data
resps <- c("A", "A", "A", "F", "C", "C", "D", "D", "E")
cond <- c(rep("cat A", 5), rep("cat B", 4))
example <- data.frame(resps, cond)
Just like a typical ggplot call:
ex_labs <- c("alpha", "charlie", "delta", "echo", "foxtrot")
pie_chart(example, main = "resps", labels = ex_labs) +
labs(title = "unfacetted example")
ex_labs2 <- c("alpha", "charlie", "foxtrot", "delta", "charlie", "echo")
pie_chart(example, main = "resps", labels = ex_labs2, condition = "cond") +
labs(title = "facetted example")
It worked on all included function greatly inspired from here
ggpie <- function (data)
{
# prepare name
deparse( substitute(data) ) -> name ;
# prepare percents for legend
table( factor(data) ) -> tmp.count1
prop.table( tmp.count1 ) * 100 -> tmp.percent1 ;
paste( tmp.percent1, " %", sep = "" ) -> tmp.percent2 ;
as.vector(tmp.count1) -> tmp.count1 ;
# find breaks for legend
rev( tmp.count1 ) -> tmp.count2 ;
rev( cumsum( tmp.count2 ) - (tmp.count2 / 2) ) -> tmp.breaks1 ;
# prepare data
data.frame( vector1 = tmp.count1, names1 = names(tmp.percent1) ) -> tmp.df1 ;
# plot data
tmp.graph1 <- ggplot(tmp.df1, aes(x = 1, y = vector1, fill = names1 ) ) +
geom_bar(stat = "identity", color = "black" ) +
guides( fill = guide_legend(override.aes = list( colour = NA ) ) ) +
coord_polar( theta = "y" ) +
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text( colour = "black"),
axis.title = element_blank(),
plot.title = element_text( hjust = 0.5, vjust = 0.5) ) +
scale_y_continuous( breaks = tmp.breaks1, labels = tmp.percent2 ) +
ggtitle( name ) +
scale_fill_grey( name = "") ;
return( tmp.graph1 )
} ;
An example :
sample( LETTERS[1:6], 200, replace = TRUE) -> vector1 ;
ggpie(vector1)
Output