Expand top of scale/axis to include text - r

I'm trying to annotate the highest value in each facet of a graph.
I can't figure out how to remove extra space at the bottom of the y axis without clipping the text above the highest value.
A) Is there a non-symmetrical version of scale_y_continuous(expand=c(0,0))?
B) Or, is there a way to make ggplot include text as part of the graph range?
# a simple dataset
count <- 40
data <- data.frame(
category = sample(LETTERS[1:3], count, TRUE),
x = rnorm(count),
y = abs(rnorm(count))
)
# find the highest value in each category
require(plyr)
data <- data[order(-data$y),]
topValues <- ddply(data, .(category), head, 1)
require(ggplot2)
ggplot(data) +
geom_line(aes(x=x, y=y)) +
geom_text(data=topValues, aes(x=x, y=y, label=y)) + # label the highest y value
# add vjust=-1 to put text above point if possible
facet_grid(category ~ ., scale="free") +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))

The answer comes thanks to baptiste.
Just add this call to the plot to make a blank point at the top of the text:
geom_blank(data=topValues, aes(x=x, y=y*1.1, label=y))

You can use the vjust argument of geom_text to tweak the vertical position of the label relative to the x and y coordinate:
ggplot(data) +
geom_line(aes(x=x, y=y)) +
geom_text(data=topValues, aes(x=x, y=y, label=y), vjust = 1.5) + # label the highest y value
facet_grid(category ~ ., scale="free") +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))

Related

One label for multiple points

I'm making a scatterplot and want to label several points with the same label.
data.frame(label=rep(c("a","b","c"),2), x=rep(c(1:3),2), y=(5,4,7,2,6,9))
As you can see, the labels occur twice each at the same x values, only y differs. I want both [1,5] and [1,2] to be labeled using a single "a", not one "a" for each coordinate.
I'm using R, ggplot2 and ggrepel.
This can work:
dat <- data.frame(label=rep(c("a","b","c"),2), x=rep(c(1:3),2), y=c(5,4,7,2,6,9))
ggplot() + geom_point(data=dat, aes(x=x, y=y)) + geom_text(data=dat[duplicated(dat$label),], aes(x=x, y=y, label=label))
I think this is what you want.
I am using the dplyr or tidyverse package.
library(tidyverse)
Dataset
dat1 <- data.frame(label=rep(c("a","b","c"),2), x=rep(c(1:3),2), y=c(5,4,7,2,6,9))
Creating a dataset for the labels. This creates a label dataset which will pick a labeling point at midpoint Y for a given X.
lab1 <- dat1 %>% group_by(label) %>% mutate(x = x, y = mean(y))
This creates the plot using the original dataset for the points and the label dataset for the labels.
ggplot() +
geom_point(data=dat1, aes(x=x, y=y)) +
geom_text(data=lab1, aes(x=x, y=y, label=label), size = 5) +
theme_grey()
The above actually plots the labels twice on top of each other, but you can't notice. If you really just wanted it once, then you could do the following and update the previous code with lab2. I also changed size so you can see.
lab2 <-unique(lab1)
ggplot() +
geom_point(data=dat1, aes(x=x, y=y)) +
geom_text(data=lab2, aes(x=x, y=y, label=label), size=10) +
theme_grey()
If you wanted the x direction more to the right or higher, you could update your label dataset by adding an offset to your label dataset.
lab1 <- dat1 %>% group_by(label) %>% mutate(x = x+.3, y = mean(y) + .5)
Or you can accomplish the same within geom_text itself using nudge.
ggplot() + geom_point(data=dat1, aes(x=x, y=y)) +
geom_text(data=lab1, aes(x=x, y=y, label=label), size=10, nudge_x = .3, nudge_y = .5) +
theme_grey()

ggplot2, introduce breaks on a x log scale

I have a plot like this:
p<-ggplot() +
geom_line(data= myData, aes(x = myData$x , y = myData$y)) +
scale_x_log10()+
scale_y_log10()
My x value is seq(9880000, 12220000, 10000)
There is only one break on the x-axis of the plot, what should I do if to get at least 3 breaks on the plot x-axis?
Here is fully reproducible example of the original poster's problem where a log-scaled plot only displays one break value on the x-axis. I demonstrate three possible solutions below.
library(ggplot2)
# Create a reproducible example data.frame using R functions.
x = seq(9880000, 12220000, 10000)
# Use set.seed() so that anyone who runs this code
# will get the same sequence of 'random' values.
set.seed(31415)
y = cumsum(runif(n=length(x), min=-1e5, max=1e5)) + 1e6
dat = data.frame(x=x, y=y)
# Original poster's plot.
p1 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_x_log10() +
scale_y_log10() +
labs(title="1. Plot has only one x-axis break.")
# Add extra x-axis breaks manually.
x_breaks = c(10^7.0, 10^7.04, 10^7.08)
p2 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_x_log10(breaks=x_breaks) +
scale_y_log10() +
labs(title="2. Add some x-axis breaks manually.")
# Add extra x-axis breaks in semi-automated manner.
x_breaks = 10^pretty(log10(x))
x_labels = formatC(x_breaks, format = "e", digits = 2)
p3 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_x_log10(breaks=x_breaks, labels=x_labels) +
scale_y_log10() +
labs(title="3. Create x-axis breaks with R functions.")
# Skip the log10 scale because the x-values don't span multiple orders of magnitude.
p4 = ggplot(data=dat, aes(x=x, y=y)) +
geom_line() +
scale_y_log10() +
labs(title="4. Check appearance without log10 scale for x-axis.")
library(gridExtra)
ggsave("example.png", plot=arrangeGrob(p1, p2, p3, p4, nrow=2),
width=10, height=5, dpi=150)
I add: scale_x_log10(breaks=seq(9880000, 12220000, 1000000)).
This is my reproducible example:
library(random)
library(ggplot2)
z <- randomStrings(n=235, len=5, digits=TRUE, upperalpha=TRUE, loweralpha=TRUE, unique=TRUE, check=TRUE)
x <- seq(9880000, 12220000, 10000)
y <- randomNumbers(n=235, min=9880000, max=12220000, col=1)
df <- data.frame(z, x, y)
head(df)
V1 x V1.1
1 378VO 9880000 11501626
2 AStRK 9890000 10929705
3 sotp4 9900000 11305700
4 AS4DR 9910000 11302110
5 7iFdk 9920000 11611918
6 HIS7z 9930000 11175074
p<-ggplot() + geom_line(data= df, aes(x = df$x , y = df$V1.1)) + scale_y_log10()
p + scale_x_log10(breaks=seq(9880000, 12220000, 1000000))
Hope it is useful...
Add this between your parenthesis: breaks=seq(specify, breaks, here)
For example, if you wanted a break at 0, 10, 100:
scale_x_log10((breaks=seq(0,10,100))

Scaling really large bar compared to other bars in ggplot2 in R

I am trying to make a plot in ggplot2 in R with the following code:
feature
[1] abs_deg_sum_1 NumAfterEdits_1 N_1 NumAfterEdits_3
[5] TimeSinceLastEdit_2 wt_product_1 NumAfterEdits_2 dwdt_1
52 Levels: abs_deg_diff_1 abs_deg_diff_2 abs_deg_diff_3 abs_deg_diff_4 ... Z_4
relative_importance
[1] 61.048212 17.235435 1.891542 1.409848 1.356924 1.264824 1.220593 1.184612
library(ggplot2)
df = data.frame(feature, relative_importance)
c <- ggplot(df, aes(x = feature, y = relative_importance, fill = feature)) + geom_bar(stat = "identity")
c + coord_flip()
positions <- c("abs_deg_sum_1", "NumAfterEdits_1", "N_1", "NumAfterEdits_3","TimeSinceLastEdit_2", "wt_product_1", "NumAfterEdits_2",
"dwdt_1")
c <- c + scale_x_discrete(limits = positions)
c + coord_flip()
Since the first value in relative_importance is really large compared to all other values, the plot doesn't show much about the other values. I get the following plot:
How can I change my code to capture more information in my plot? Especially about the smaller values
Here are several options, though I prefer the first or second (or maybe the third if you really want to go with a bar plot):
# Fake data
dat = data.frame(group=LETTERS[1:5], values=c(1.5,0.6,12.6,2.1,85))
# Value labels instead of bars, plus we add a horizontal segment to provide
# better visual guidance as to the relative values. This also requires
# some factor gymnastics to be able to get both the segments and the
# correct x-axis labels. I've left in the legend, but it's not necessary
# and can be removed if you wish.
ggplot(dat, aes(as.numeric(group), values, colour=group)) +
geom_segment(aes(x=as.numeric(group)-0.35, xend=as.numeric(group)+0.35,
yend=values), alpha=0.75) +
geom_text(aes(label=values), fontface="bold", show_guide=FALSE) +
scale_x_continuous(breaks=1:5, labels=levels(dat$group))
#scale_y_log10(limits=c(0.1,100), breaks=c(0.1, 0.3,1,3,10,30,100)) # For a log scale, if desired
#coord_flip() # Flip to horizontal orientation, if desired
# Value labels instead of bars
ggplot(dat, aes(group, values, colour=group)) +
geom_text(aes(label=values), fontface="bold")
# Bar plot with value labels added
ggplot(dat, aes(group, values, fill=group)) +
geom_bar(stat="identity") +
geom_text(aes(label=values, y=0.5*values), size=5, colour="black")
# Value labels instead of bars; log scale
ggplot(dat, aes(group, values, colour=group)) +
geom_text(aes(label=values)) +
scale_y_log10(limits=c(0.1,100), breaks=c(0.1,0.3,1,3,10,30,100)) +
coord_flip()
# Bar plot with log scale. Note that bar baseline is 1 instead of
# zero for a log scale, so this doesn't work so well.
ggplot(dat, aes(group, values, fill=group)) +
geom_bar(stat="identity") +
scale_y_log10(limits=c(0.1,100), breaks=c(0.1,0.3,1,3,10,30,100)) +
coord_flip()
# Points instead of bars; log scale
ggplot(dat, aes(group, values, fill=group)) +
geom_point(pch=21, size=4) +
scale_y_log10(limits=c(0.1,100), breaks=c(0.1,0.3,1,3,10,30,100)) +
coord_flip()
If the logarithmic axis doesn't work for you and if you have some flexibility in the plot format, you could divide the features into two groups based on the value of relative_importance and show each in it's own panel with appropriate y-scales. Code including adjustment of bar widths would look like:
library(ggplot2)
# assign rows to Large or Small group
cut_off_for_small_values <- 3
small_value_title <- "Expanded_Scale_for_Smaller_Values"
df <- data.frame(feature, relative_importance,
importance_grp = ifelse(relative_importance > cut_off_for_small_values,
"All", small_value_title))
# calculate relative bar widths
width_adj <- .8*nrow(df[df$importance_grp==small_value_title,])/nrow(df)
# plot data
c <- ggplot(df, aes(x = feature, y = relative_importance, fill = feature))
c <- c + geom_bar(data=transform(df, importance_grp="All"),
stat = "identity")
c <- c + geom_bar(data=df[df$importance_grp==small_value_title,],
stat = "identity", width=width_adj)
c <- c + geom_text(aes(x = feature, y = relative_importance,
label = format(relative_importance, digits=3), vjust=-.5))
c <- c + theme(axis.text.x = element_text(angle=90))
c <- c + facet_wrap( ~ importance_grp, scales="free" )
which gives plot

R - How to overlay the average of a set of iid RVs

In the code below I build a 40x1000 data frame where in each column I have the cumulative means for successive random draws from an exponential distribution with parameter lambda = 0.2.
I add an additional column to host the specific number of the "draw".
I also calculate the rowmeans as df_means.
How do I add df_means (as a black line) on top of all my simulated RVs? I don't understand ggplot well enough to do this.
df <- data.frame(replicate(1000,cumsum(rexp(40,lambda))/(1:40)))
df$draw <- seq(1,40)
df_means <- rowMeans(df)
Molten <- melt(df, id.vars="draw")
ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none") + geom_line(df_means)
How would I add plot(df_means, type="l") to my ggplot, below?
Thank you,
You can make another data.frame with the means and ids and use that to draw the line,
df_means <- rowMeans(df)
means <- data.frame(id=1:40, mu=df_means)
ggplot(Molten, aes(x=draw, y=value, colour=variable)) +
geom_line() +
theme(legend.position = "none") +
geom_line(data=means, aes(x=id, y=mu), color="black")
As described here
stat_sum_df <- function(fun, geom="crossbar", ...) {
stat_summary(fun.data=fun, colour="red", geom=geom, width=0.2, ...)
}
k<-ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none")
k+stat_sum_single(mean) #gives you the required plot

Grouping labels when x is a factor variable in ggplot2

I'm trying to replace the x-axis labels "A0" and "A1" by one "A" which can be placed in the middle of "A0" and "A1". It would be better if there is a method which works like the following question:
grouping of axis labels ggplot2
By that, I mean to redraw the x-axis only for each group, and leave a blank between groups.
Here is the code I'm working on:
y = 1*round(runif(20)*10,1)
x1 = c("A","B")
x2 = c(0,1)
x = expand.grid(x1,x2)
xy = cbind(x,y)
xy$z = paste(xy$Var1,xy$Var2,sep="")
p <- ggplot(xy, aes(x=factor(z), y=y,fill=factor(Var2)))
p + geom_boxplot() + geom_jitter(position=position_jitter(width=.2)) + theme_bw() + xlab("X") + ylab("Y") + scale_fill_discrete(name="Var2",breaks=c(0, 1),labels=c("T", "C"))
Try this. No need for the variable z, just use position="dodge":
p <- ggplot(xy, aes(x=factor(Var1), y=y,fill=factor(Var2)))
p + geom_boxplot(position="dodge") + geom_jitter(position=position_jitter(width=.2)) + theme_bw() + xlab("X") + ylab("Y") + scale_fill_discrete(name="Var2",breaks=c(0, 1),labels=c("T", "C"))

Resources