Nonstandard breaks in plot for yaxis - r

I would like to create a simple plot but with nonstandard breaks.
That's the code for my data:
> dput(dt1)
c(15.9540654816514, 37.5416557213931, 143.317585514018, 317.329051086954,
736.342269565211, 611.759999999995, 1145.49376842104, 3287.57274999997
)
> dput(dt2)
c(7.74957214839424, 17.5499521829522, 47.8167516932271, 72.1468924428822,
131.457629238329, 119.135097468354, 193.812365333332, 339.109355072461
)
> dput(dt3)
c(3.43850794565666, 11.4081262121212, 24.6747108504399, 54.7253625128734,
85.7360432084306, 89.7801271317832, 117.764457806452, 152.859368367347
)
and I would like to achieve something like that:
Just ignore red point on that graph.
That's the code which I have written so far. However, approach of changing the y breaks doesn't work.
plot(dt1,col="blue",cex = 1.8,xlim=c(0,10), ylim = c(1,5000), yaxt = "n", bty="n",xlab="",ylab="")
axis(side = 2, at = C(10,100,1000,5000)
points(dt2,col="green",cex = 1.8)
points(dt3,col="red",cex = 1.8)
Is it possible ? I would like to create identical xlabel like on the attached picture. I can change it as well in other software so do not focus mostly on that.

This is the closest I can think of using ggplot2.
library(data.table)
library(dplyr)
library(ggplot2)
theme_set(theme_bw())
dat <- rbindlist(list(
data.table(dt = "dt1",
y = c(15.9540654816514, 37.5416557213931, 143.317585514018, 317.329051086954,
736.342269565211, 611.759999999995, 1145.49376842104, 3287.57274999997)),
data.table(dt = "dt2",
y = c(7.74957214839424, 17.5499521829522, 47.8167516932271, 72.1468924428822,
131.457629238329, 119.135097468354, 193.812365333332, 339.109355072461)),
data.table(dt = "dt3",
y = c(3.43850794565666, 11.4081262121212, 24.6747108504399, 54.7253625128734,
85.7360432084306, 89.7801271317832, 117.764457806452, 152.859368367347))))
## generate lables
labs <- paste(rep(1:4, c(2,3,2,1)), rep(c(1,2,3,4,3,4), c(1,2,1,1,1,2)), sep = '\n-\n')
## create x variable
dat[, x := rep(1:8, 3) %>% factor(labels = labs)]
## plot
ggplot(dat, aes(x = x, y = y, colour = dt)) +
geom_point() +
scale_y_log10(limits = c(1, 10000),
breaks = 10^(0:4)) +
xlab("") + ylab("")
ggsave('temp.png', width = 4, height = 3)
The output looks like this:

Related

Display custom axis labels in ggplot2

I'd like to plot histogram and density on the same plot. What I would like to add to the following is custom y-axis label which would be something like sprintf("[%s] %s", ..density.., ..count..) - two numbers at one tick value. Is it possible to obtain this with scale_y_continuous or do I need to work this around somehow?
Below current progress using scales::trans_new and sec_axis. sec_axis is kind of acceptable but the most desirable output is as on the image below.
set.seed(1)
var <- rnorm(4000)
binwidth <- 2 * IQR(var) / length(var) ^ (1 / 3)
count_and_proportion_label <- function(x) {
sprintf("%s [%.2f%%]", x, x/sum(x) * 100)
}
ggplot(data = data.frame(var = var), aes(x = var, y = ..count..)) +
geom_histogram(binwidth = binwidth) +
geom_density(aes(y = ..count.. * binwidth)) +
scale_y_continuous(
# this way
trans = trans_new(name = "count_and_proportion",
format = count_and_proportion_label,
transform = function(x) x,
inverse = function(x) x),
# or this way
sec.axis = sec_axis(trans = ~./sum(.),
labels = percent,
name = "proportion (in %)")
)
I've tried to create object with breaks before basing on the graphics::hist output - but these two histogram differs.
bins <- (max(var) - min(var))/binwidth
hdata <- hist(var, breaks = bins, right = FALSE)
# hist generates different bins than `ggplot2`
At the end I would like to get something like this:
Would it be acceptable to add percentage as a secondary axis? E.g.
your_plot + scale_y_continuous(sec.axis = sec_axis(~.*2, name = "[%]"))
Perhaps it would be possible to overlay the secondary axis on the primary one, but I'm not sure how you would go about doing that.
You can achieve your desired output by creating a custom set of labels, and adding it to the plot:
library(tidyverse)
library(ggplot2)
set.seed(1)
var <- rnorm(400)
bins <- .1
df <- data.frame(yvals = seq(0, 20, 5), labels = c("[0%]", "[10%]", "[20%]", "[30%]", "[40%]"))
df <- df %>% tidyr::unite("custom_labels", labels, yvals, sep = " ", remove = TRUE)
ggplot(data = data.frame(var = var), aes(x = var, y = ..count..)) +
geom_histogram(aes(y = ..count..), binwidth = bins) +
geom_density(aes(y = ..count.. * bins), color = "black", alpha = 0.7) +
ylab("[density] count") +
scale_y_continuous(breaks = seq(0, 20, 5), labels = df$custom_labels)

Superscripts within ggplot2's axis text

I would like to create a graph that has superscripts on the axis instead of displaying unformatted numbers using ggplot2. I know that there are a lot of answers which change the axis label, but not the axis text. I am not trying to change the label of the graph, but the text on the axis.
Example:
x<-c('2^-5','2^-3','2^-1','2^1','2^2','2^3','2^5','2^7','2^9','2^11','2^13')
y<-c('2^-5','2^-3','2^-1','2^1','2^2','2^3','2^5','2^7','2^9','2^11','2^13')
df<-data.frame(x,y)
p<-ggplot()+
geom_point(data=df,aes(x=x,y=y),size=4)
p
So I would like the x-axis to display the same numbers but without the carrot.
EDIT:
A purely base approach:
df %>%
mutate_all(as.character)->new_df
res<-unlist(Map(function(x) eval(parse(text=x)),new_df$x))#replace with y for y
to_use<-unlist(lapply(res,as.expression))
split_text<-strsplit(gsub("\\^"," ",names(to_use))," ")
join_1<-as.numeric(sapply(split_text,"[[",1)) #tidyr::separate might help, less robust for numeric(I think)
join_2<-as.numeric(sapply(split_text,"[[",2))
to_use_1<-sapply(seq_along(join_1),function(x) parse(text=paste(join_1[x],"^",
join_2[x])))
The above can be reduced to less step, I posted the stepwise approach I took. The result for only x, the same can be done for y:
new_df %>%
ggplot()+
geom_point(aes(x=x,y=y),size=4)+
scale_x_discrete(breaks=df$x,labels=to_use_1)#replace with y and scale_y_discrete for y
Plot:
Original and erroneous answer:
I have deviated from standard tidyverse practice by using $, you can replace it with . and it might work although in this case it's not really important since the focus is on labels.:
library(dplyr)
df %>%
mutate(new_x=gsub("\\^"," ",x),
new_y=gsub("\\^"," ",y))->new_df
new_df %>%
ggplot()+
geom_point(aes(x=x,y=y),size=4)+
scale_x_discrete(breaks=x,labels=new_df$new_x)+
scale_y_discrete(breaks=y,labels=new_df$new_y)
This can be done with functions scale_x_log2 and scale_y_log2 that can be found in GitHub package jrnoldmisc.
First, install the package.
devtools::install_github("jrnold/rubbish")
Then, coerce the variables to numeric. I wil work with a copy of the original dataframe.
df1 <- df
df1[] <- lapply(df1, function(x){
x <- as.character(x)
sapply(x, function(.x)eval(parse(text = .x)))
})
Now, graph it.
library(jrnoldmisc)
library(ggplot2)
library(MASS)
library(scales)
a <- ggplot(df1, aes(x = x, y = y, size = 4)) +
geom_point(show.legend = FALSE) +
scale_x_log2(limits = c(0.01, NA),
labels = trans_format("log2", math_format(2^.x)),
breaks = trans_breaks("log2", function(x) 2^x, n = 10)) +
scale_y_log2(limits = c(0.01, NA),
labels = trans_format("log2", math_format(2^.x)),
breaks = trans_breaks("log2", function(x) 2^x, n = 10))
a + annotation_logticks(base = 2)
Edit.
Following the discussion in the comments, here are the two other ways that were seen to give different axis labels.
Axis labels every tick mark. Set limits = c(1.01, NA) and function argument n = 11, an odd number.
Axis labels on odd number exponents. Keep limits = c(0.01, NA), change to function(x) 2^(x - 1), n = 11.
Just the instructions, no plots.
The first.
a <- ggplot(df1, aes(x = x, y = y, size = 4)) +
geom_point(show.legend = FALSE) +
scale_x_log2(limits = c(1.01, NA),
labels = trans_format("log2", math_format(2^.x)),
breaks = trans_breaks("log2", function(x) 2^(x), n = 11)) +
scale_y_log2(limits = c(1.01, NA),
labels = trans_format("log2", math_format(2^.x)),
breaks = trans_breaks("log2", function(x) 2^(x), n = 11))
a + annotation_logticks(base = 2)
And the second.
a <- ggplot(df1, aes(x = x, y = y, size = 4)) +
geom_point(show.legend = FALSE) +
scale_x_log2(limits = c(0.01, NA),
labels = trans_format("log2", math_format(2^.x)),
breaks = trans_breaks("log2", function(x) 2^(x - 1), n = 11)) +
scale_y_log2(limits = c(0.01, NA),
labels = trans_format("log2", math_format(2^.x)),
breaks = trans_breaks("log2", function(x) 2^(x - 1), n = 11))
a + annotation_logticks(base = 2)
You can provide a function to the labels argument of the scale_x_*** and scale_y_*** functions to generate labels with superscripts (or other formatting). See examples below.
library(jrnoldmisc)
library(ggplot2)
df<-data.frame(x=2^seq(-5,5,2),
y=2^seq(-5,5,2))
ggplot(df) +
geom_point(aes(x=x,y=y),size=2) +
scale_x_log2(breaks=2^seq(-5,5,2),
labels=function(x) parse(text=paste("2^",round(log2(x),2))))
ggplot(df) +
geom_point(aes(x=x,y=y),size=2) +
scale_x_continuous(breaks=c(2^-5, 2^seq(1,5,2)),
labels=function(x) parse(text=paste("2^",round(log2(x),2))))
ggplot(df) +
geom_point(aes(x=x,y=y),size=2) +
scale_x_log10(breaks=10^seq(-1,1,1),
labels=function(x) parse(text=paste("10^",round(log10(x),2))))

Creating equal and pretty scale breaks with facet_grid()

I want to create a plot using facet_grid(), with free scales for the y axis. However, for each row, the scale breaks should be distributed evenly, that is, with 3 breaks.
I lended from this question, but I was not able to adapt the code in a way that the scale breaks are actually pretty.
However, this is my current approach:
# Packages
library(dplyr)
library(ggplot2)
library(scales)
# Test Data
set.seed(123)
result_df <- data.frame(
variable = rep(c(1,2,3,4), each = 4),
mode = rep(c(1,2), each = 2),
treat = rep(c(1,2)) %>% as.factor(),
mean = rnorm(16, mean = .7, sd = 0.2),
x = abs(rnorm(16, mean = 0, sd = 0.5))) %>%
mutate(lower = mean - x,upper = mean + x)
# Function for equal breaks, lended from
equal_breaks <- function(n = 3, s = 0.05, ...) {
function(x) {
d <- s * diff(range(x)) / (1+2*s)
round(seq(min(x)+d, max(x)-d, length=n), 2)
}}
## Plot
result_df %>%
ggplot(aes(y = mean*100, x = treat)) +
geom_pointrange(aes(ymin = lower*100, ymax = upper*100), shape = 20) +
facet_grid(variable ~ mode, scales = "free_y")+
scale_y_continuous(breaks = equal_breaks(n = 3, s = .2))+
labs(x = "", y = "")
Which leads to this current plot. As one can see, the breaks are far from being reasonable.
Thanks in advance for any kind of recommendation, and please excuse me in case I have missed a already existing solution.
Best, Malte

Change ggplot scale labels to original values after using a log transformation

I've got an exponentially distributed variable that I'd like to plot using ggplot2. I'm going to take the log of the variable. However, instead of having the axis label be the log format, I'd like it to be the original exponentially distributed values. Here's an example.
set.seed(1000)
aero_df <-
data_frame(
x = rnorm(100,100,99),
y = sample(c('dream on',
'dude looks like a lady'),
100,
replace = T)) %>%
mutate(x = x*x,
log_x = log(x)) %>%
gather(key,value,-y)
aero_plot <- ggplot(aero_df,aes(value,color = y,fill = y))+
geom_density(show.legend = F)+
facet_wrap(key~y,scales = 'free')
I'd like to have the x variable labels on the log_x.
aero_plot
I started of with this, but the issue here is that you can see the normal log_x labels also in the x plots.
ticks <- c(3,6,9,12)
logticks <- c(exp(9),exp(10),exp(11))
ggplot(aero_df,aes(value,color = y,fill = y))+
geom_density(show.legend = F)+
scale_x_continuous(breaks = c(ticks,logticks), labels = c(ticks,log(logticks))) +
facet_wrap(key~y,scales = 'free')
ggplot's scale_x_log10 to the rescue, maybe? I'm not 100% sure I understand your question, because I didn't understand your example code. Hopefully this is what you mean...
library(tidyverse)
set.seed(1000)
aero_df <-
data_frame(
x = rnorm(100,100,99),
y = sample(c('dream on',
'dude looks like a lady'),
100,
replace = T))
aero_plot <- ggplot(aero_df,aes(x,color = y,fill = y)) +
geom_density(show.legend = F) +
scale_x_log10() +
facet_wrap(~y,scales = 'free')
print(aero_plot)

Color one point and add an annotation in ggplot2?

I have a dataframe a with three columns :
GeneName, Index1, Index2
I draw a scatterplot like this
ggplot(a, aes(log10(Index1+1), Index2)) +geom_point(alpha=1/5)
Then I want to color a point whose GeneName is "G1" and add a text box near that point, what might be the easiest way to do it?
You could create a subset containing just that point and then add it to the plot:
# create the subset
g1 <- subset(a, GeneName == "G1")
# plot the data
ggplot(a, aes(log10(Index1+1), Index2)) + geom_point(alpha=1/5) + # this is the base plot
geom_point(data=g1, colour="red") + # this adds a red point
geom_text(data=g1, label="G1", vjust=1) # this adds a label for the red point
NOTE: Since everyone keeps up-voting this question, I thought I would make it easier to read.
Something like this should work. You may need to mess around with the x and y arguments to geom_text().
library(ggplot2)
highlight.gene <- "G1"
set.seed(23456)
a <- data.frame(GeneName = paste("G", 1:10, sep = ""),
Index1 = runif(10, 100, 200),
Index2 = runif(10, 100, 150))
a$highlight <- ifelse(a$GeneName == highlight.gene, "highlight", "normal")
textdf <- a[a$GeneName == highlight.gene, ]
mycolours <- c("highlight" = "red", "normal" = "grey50")
a
textdf
ggplot(data = a, aes(x = Index1, y = Index2)) +
geom_point(size = 3, aes(colour = highlight)) +
scale_color_manual("Status", values = mycolours) +
geom_text(data = textdf, aes(x = Index1 * 1.05, y = Index2, label = "my label")) +
theme(legend.position = "none") +
theme()

Resources