Add annotation outside of stratigraphic plot - r

How can I add some annotations to a stratigraphic plot?
For example, here's Stratiplot from analogue:
library(analogue)
data(V12.122)
Depths <- as.numeric(rownames(V12.122))
names(V12.122)
(plt <- Stratiplot(Depths ~ O.univ + G.ruber + G.tenel + G.pacR,
data = V12.122,
type = c("h","l","g"),
zones = 400))
plt
I want to add some text in the white space between the blue plots and the zones rectangle on the far right. For example, like this:
With A = 150, B = 600, C = 1000

Here's one way:
pacman::p_load(analogue)
data(V12.122)
Depths <- as.numeric(rownames(V12.122))
names(V12.122)
(plt <- Stratiplot(Depths ~ O.univ + G.ruber + G.tenel + G.pacR,
data = V12.122,
type = c("h","l","g"),
zones = 400))
(plt2 <- Stratiplot(Depths ~ O.univ + G.ruber + G.tenel + G.pacR,
data = V12.122,
type = c("h","l","g"),
yticks = c(150,600,1000)
))
We need to update the y-axis labels like this:
plt2$y.scales$labels <- c("A", "B", "C")
And then we can plot it with the two y-axes like so:
require(latticeExtra)
doubleYScale(plt,plt2,add.axis=T)
I checked out the source code of this function and figured out that it was a wrapper around xyplot then I did a search to find out that doubleYScale can be used this way to add a 2nd Y axis to an xyscale.
Or to keep the colours uniformly black,
doubleYScale(plt,plt2,add.axis=T,use.style = F)

Related

How to align multiple legends and avoid overlapping in ggplot?

The bounty expires in 7 days. Answers to this question are eligible for a +50 reputation bounty.
Electrino wants to draw more attention to this question.
I am trying to create a plot that combines 2 separate legends and a grid of multiple plots. The issue I'm having is I'm finding it difficult to align the legends so they are visible and not overlapping. hopefully the example below will explain what I mean.
To begin I am going to create 2 plots. In these two plots I am only interested in the legends, and I am discarding the actual plot (so please ignore the actual plots in these two plots). To get just the legend I am using the cowplot package.
library(ggplot2)
library(cowplot)
# -------------------------------------------------------------------------
# plot 1 ------------------------------------------------------------------
# create fake data
dfLegend_1 <- data.frame(x = LETTERS[1:10], y = c(1:10))
# set colours
pointColours <- c(A = "#F5736A", B = "#D58D00", C = "#A0A300",
D = "#36B300", E = "#00BC7B", F = "#00BCC2",
G = "#00ADF4", H = "#928DFF", I = "#E568F0",
J = "#808080")
# plot
ggLegend_1 <- ggplot(dfLegend_1, aes(x=x, y=y))+
geom_point(aes(fill = pointColours), shape = 22, size = 10) +
scale_fill_manual(values = unname(pointColours),
label = names(pointColours),
name = 'Variable') +
theme(legend.key.size = unit(0.5, "cm")) +
theme_void()
# get legend
legend_1 <- get_legend(ggLegend_1)
# -------------------------------------------------------------------------
# plot 2 ------------------------------------------------------------------
# Create fake data
dflegend_2 <- data.frame(
x = runif(100),
y = runif(100),
z2 = abs(rnorm(100))
)
# plot
ggLegend_2 <- ggplot(dflegend_2, aes(x=x, y = y))+
geom_point(aes(color = z2), shape = 22, size = 10) +
scale_color_gradientn(
colours = rev(colorRampPalette(c('steelblue', '#f7fcfd', 'orange'))(5)),
limits = c(0,10),
name = 'Gradient',
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
))
# get legend
legend_2 <- get_legend(ggLegend_2)
Then I am creating many plots (in this example, I am creating 20 individual plots) and plotting them on a grid:
# create data
dfGrid <- data.frame(x = rnorm(10), y = rnorm(10))
# make a list of plots
plotList <- list()
for(i in 1:20){
plotList[[i]] <- ggplot(dfGrid) +
geom_ribbon(aes(x = x, ymin = min(y), ymax = 0), fill = "red", alpha = .5) +
geom_ribbon(aes(x = x, ymin = min(0), ymax = max(y)), fill = "blue", alpha = .5) +
theme_void()
}
# plot them on a grid
gridFinal <- cowplot::plot_grid(plotlist = plotList)
Finally, I am joining the two legends together and adding them to the grid of many plots:
# add legends together into on single plot
legendFinal <- plot_grid(legend_2, legend_1, ncol = 1)
# plot everything on the same plot
plot_grid(gridFinal, legendFinal, rel_widths = c(3, 1))
This results in something that looks like this:
As you can see, the legends overlap and are not very well spaced. I was wondering if there is any way to fit everything in whilst having the legends appropriately spaced and readable?
I should also note, that, in general, there can be any number of variables and any number of gridded plots.
One option to fix your issue would be to switch to patchwork to glue your plots and the legends together. Especially I make use of the design argument to assign more space to the Variable legend. However, you should be aware that legends are much less flexible compared to plots, i.e. the size of legends is in absolute units and will not adjust to the available space. Hence, I'm not sure whether my solution will fit your desire for a "one-size-fits-all" approach.
library(patchwork)
design <-
"
ABCDEU
FGHIJV
KLMNOV
PQRSTV
"
plotList2 <- c(plotList, list(legend_2, legend_1))
wrap_plots(plotList2) +
plot_layout(design = design)

How do I move lines in a ggplot to create a 3D effect and add a pseudo-axis that indicates the name of each line?

I have 3 lines in a plot which I want to move 1 unit on the X axis and 100 units on the Y axis to create a 3D effect as in the example below. So far I have only been able to do the lines. I tried with the position_nudge() function, but it didn't have the effect I expected, it changed the scale of the axes, but not the position of the lines.
Plus: If the plot with the frames looks like a cube, that would be a great thing.
Example:
MWE:
library(ggplot2)
Group <- c("A", "B", "C")
Time <- 0:10
DF <- expand.grid(Time = Time,
Group = Group)
DF$Y <- c(rep(1,5), 100, rep(1,5),
rep(1,5), 500, rep(1,5),
rep(1,5), 1000, rep(1,5))
ggplot(data = DF,
aes(x = Time,
y = Y,
color = Group)) +
geom_line(position = position_nudge(y = 100, x=1)) +
theme_bw()

R control jitter function - avoid overplotting / non-random jitter

My problems seems simple, I am using ggplot2 with geom_jitter() to plot a variable. (take my picture as an example)
Jitter now adds some random noise to the variable (the variable is just called "1" in this example) to prevent overplotting. So I have now random noise in the y-direction and clearly what otherwise would be completely overplotted is now better visible.
But here is my question:
As you can see, there are still some points, that overplot each other. In my example here, this could be easily prevented, if it wouldn't be random noise in y-direction... but somehow more strategically placed offsets.
Can I somehow alter the geom_jitter() behavior or is there a similar function in ggplot2 that does exactly this?
Not really a minimal example, but also not too long:
library("imputeTS")
library("ggplot2")
data <- tsAirgap
# 2.1 Create required data
# Get all indices of the data that comes directly before and after an NA
na_indx_after <- which(is.na(data[1:(length(data) - 1)])) + 1
# starting from index 2 moves all indexes one in front, so no -1 needed for before
na_indx_before <- which(is.na(data[2:length(data)]))
# Get the actual values to the indices and put them in a data frame with a label
before <- data.frame(id = "1", type = "before", input = na_remove(data[na_indx_before]))
after <- data.frame(id = "1", type = "after", input = na_remove(data[na_indx_after]))
all <- data.frame(id = "1", type = "source", input = na_remove(data))
# Get n values for the plot labels
n_before <- length(before$input)
n_all <- length(all$input)
n_after <- length(after$input)
# 2.4 Create dataframe for ggplot2
# join the data together in one dataframe
df <- rbind(before, after, all)
# Create the plot
gg <- ggplot(data = df) +
geom_jitter(mapping = aes(x = id, y = input, color = type, alpha = type), width = 0.5 , height = 0.5)
gg <- gg + ggplot2::scale_color_manual(
values = c("before" = "skyblue1", "after" = "yellowgreen","source" = "gray66"),
)
gg <- gg + ggplot2::scale_alpha_manual(
values = c("before" = 1, "after" = 1,"source" = 0.3),
)
gg + ggplot2::theme_linedraw() + theme(aspect.ratio = 0.5) + ggplot2::coord_flip()
So many good suggestions...here is what Bens suggestion would look like for my example:
I changed parts of my code to:
gg <- ggplot(data = df, aes(x = input, color = type, fill = type, alpha = type)) +
geom_dotplot(binwidth = 15)
Would basically also work as intended for me. ggbeeplot as suggested by Jon also worked great for my purpose.
I thought of a hack I really like, using ggrepel. It's normally used for labels, but nothing preventing you from making the label into a point.
df <- data.frame(x = rnorm(200),
col = sample(LETTERS[1:3], 200, replace = TRUE),
y = 1)
ggplot(df, aes(x, y, label = "●", color = col)) + # using unicode black circle
ggrepel::geom_text_repel(segment.color = NA,
box.padding = 0.01, key_glyph = "point")
A downside of this method is that ggrepel can take a lot time for a large number of points, and will recalculate differently each time you change the plot size. A faster alternative would be to use ggbeeswarm::geom_quasirandom, which uses a deterministic process to define jitter that looks random.
ggplot(df, aes(x,y, color = col)) +
ggbeeswarm::geom_quasirandom(groupOnX = FALSE)

Can I draw a horizontal line at specific number of range of values using ggplot2?

I have data (from excel) with the y-axis as ranges (also calculated in excel) and the x-axis as cell counts and I would like to draw a horizontal line at a specific value in the range, like a reference line. I tried using geom_hline(yintercept = 450) but I am sure it is quite naive and does not work that way for a number in range. I wonder if there are any better suggestions for it :)
plot.new()
library(ggplot2)
d <- read.delim("C:/Users/35389/Desktop/R.txt", sep = "\t")
head(d)
d <- cbind(row.names(d), data.frame(d), row.names=NULL)
d
g <- ggplot(d, aes(d$CTRL,d$Bin.range))+ geom_col()
g + geom_hline(yintercept = 450)
First of all, have a look at my comments.
Second, this is how I suggest you to proceed: don't calculate those ranges on Excel. Let ggplot do it for you.
Say, your data is like this:
df <- data.frame(x = runif(100, 0, 500))
head(df)
#> x
#>1 322.76123
#>2 57.46708
#>3 223.31943
#>4 498.91870
#>5 155.05416
#>6 107.27830
Then you can make a plot like this:
library(ggplot2)
ggplot(df) +
geom_histogram(aes(x = x),
boundary = 0,
binwidth = 50,
fill = "steelblue",
colour = "white") +
geom_vline(xintercept = 450, colour = "red", linetype = 2, size = 1) +
coord_flip()
We don't have your data, but the following data frame is of a similar structure:
d <- data.frame(CTRL = sample(100, 10),
Bin.range = paste(0:9 * 50, 0:9 * 50 + 49.9, sep = "-"))
The first thing to note is that your y axis does not have your ranges ordered correctly. You have 50-99.9 at the top of the y axis. This is because your ranges are stored as characters and ggplot will automatically arrange these alphabetically, not numerically. So you need to reorder the factor levels of your ranges:
d$Bin.range <- factor(d$Bin.range, d$Bin.range)
When you create your plot, don't use d$Bin.range, but instead just use Bin.range. ggplot knows to look for this variable in the data frame you have passed.
g <- ggplot(d, aes(CTRL, Bin.range)) + geom_col()
If you want to draw a horizontal line, your two options are to specify the y axis label at which you want to draw the line (i.e. yintercept = "400-449.9") or, which is what I suspect you want, use a numeric value of 9.5 which will put it between the top two values:
g + geom_hline(yintercept = 9.5, linetype = 2)

R: interactive plots (tooltips): rCharts dimple plot: formatting axis

I have some charts created with ggplot2 which I would like to embed in a web application: I'd like to enhance the plots with tooltips. I've looked into several options. I'm currently experimenting with the rCharts library and, among others, dimple plots.
Here is the original ggplot:
Here is a first attempt to transpose this to a dimple plot:
I have several issues:
after formatting the y-axis with percentages, the data is altered.
after formatting the x-axis to correctly render dates, too many labels are printed.
I am not tied to dimple charts, so if there are other options that allow for an easier way to tweak axis formats I'd be happy to know. (the Morris charts look nice too, but tweaking them looks even harder, no?)
Objective: Fix the axes and add tooltips that give both the date (in the format 1984) and the value (in the format 40%).
If I can fix 1 and 2, I'd be very happy. But here is another, less important question, in case someone has suggestions:
Could I add the line labels ("Top 10%") to the tooltips when hovering over the lines?
After downloading the data from: https://gist.github.com/ptoche/872a77b5363356ff5399,
a data frame is created:
df <- read.csv("ps-income-shares.csv")
The basic dimple plot is created with:
library("rCharts")
p <- dPlot(
value ~ Year,
groups = c("Fractile"),
data = transform(df, Year = as.character(format(as.Date(Year), "%Y"))),
type = "line",
bounds = list(x = 50, y = 50, height = 300, width = 500)
)
While basic, so far so good. However, the following command, intended to convert the y-data to percentages, alters the data:
p$yAxis(type = "addMeasureAxis", showPercent = TRUE)
What am I doing wrong with showPercent?
For reference, here is the ggplot code:
library("ggplot2")
library("scales")
p <- ggplot(data = df, aes(x = Year, y = value, color = Fractile))
p <- p + geom_line()
p <- p + theme_bw()
p <- p + scale_x_date(limits = as.Date(c("1911-01-01", "2023-01-01")), labels = date_format("%Y"))
p <- p + scale_y_continuous(labels = percent)
p <- p + theme(legend.position = "none")
p <- p + geom_text(data = subset(df, Year == "2012-01-01"), aes(x = Year, label = Fractile, hjust = -0.2), size = 4)
p <- p + xlab("")
p <- p + ylab("")
p <- p + ggtitle("U.S. top income shares (%)")
p
For information, the chart above is based on the data put together by Thomas Piketty and Emmanuel Saez in their study of U.S. top incomes. The data and more may be found on their website, e.g.
http://elsa.berkeley.edu/users/saez/
http://piketty.pse.ens.fr/en/
EDIT:
Here is a screenshot of Ramnath's solution, with a title added and axis labels tweaked. Thanks Ramnath!
p$xAxis(inputFormat = '%Y-%m-%d', outputFormat = '%Y')
p$yAxis(outputFormat = "%")
p$setTemplate(afterScript = "
<script>
myChart.axes[0].timeField = 'Year'
myChart.axes[0].timePeriod = d3.time.years
myChart.axes[0].timeInterval = 10
myChart.draw()
myChart.axes[0].titleShape.remove() // remove x label
myChart.axes[1].titleShape.remove() // remove y label
myChart.svg.append('text') // chart title
.attr('x', 40)
.attr('y', 20)
.text('U.S. top income shares (%)')
.style('text-anchor','beginning')
.style('font-size', '100%')
.style('font-family','sans-serif')
</script>
")
p
To change (rather than remove) axis labels, for instance:
myChart.axes[1].titleShape.text('Year')
To add a legend to the plot:
p$set(width = 1000, height = 600)
p$legend(
x = 580,
y = 0,
width = 50,
height = 200,
horizontalAlign = "left"
)
To save the rchart:
p$save("ps-us-top-income-shares.html", cdn = TRUE)
An alternative based on the nvd3 library can be obtained (without any of the fancy stuff) with:
df$Year <- strftime(df$Year, format = "%Y")
n <- nPlot(data = df, value ~ Year, group = 'Fractile', type = 'lineChart')
Here is one way to solve (1) and (2). The argument showPercent is not to add % to the values, but to recompute the values so that they stack up to 100% which is why you are seeing the behavior you pointed out.
At this point, you will see that we are still having to write custom javascript to tweak the x-axis to get it to display the way we want it to. In future iterations, we will strive to allow the entire dimple API to be accessible within rCharts.
df <- read.csv("ps-income-shares.csv")
p <- dPlot(
value ~ Year,
groups = c("Fractile"),
data = df,
type = "line",
bounds = list(x = 50, y = 50, height = 300, width = 500)
)
p$xAxis(inputFormat = '%Y-%m-%d', outputFormat = '%Y')
p$yAxis(outputFormat = "%")
p$setTemplate(afterScript = "
<script>
myChart.axes[0].timeField = 'Year'
myChart.axes[0].timePeriod = d3.time.years
myChart.axes[0].timeInterval = 5
myChart.draw()
//if we wanted to change our line width to match the ggplot chart
myChart.series[0].shapes.style('stroke-width',1);
</script>
")
p
rCharts is rapidly evolving. I know it is late, but in case someone else would like to see it, here is an almost complete replication of the ggplot sample shown.
#For information, the chart above is based
#on the data put together by Thomas Piketty and Emmanuel Saez
#in their study of U.S. top incomes.
#The data and more may be found on their website, e.g.
#http://elsa.berkeley.edu/users/saez/
#http://piketty.pse.ens.fr/en/
#read in the data
df <- read.csv(
"https://gist.githubusercontent.com/ptoche/872a77b5363356ff5399/raw/ac86ca43931baa7cd2e17719025c8cde1c278fc1/ps-income-shares.csv",
stringsAsFactors = F
)
#get year as date
df$YearDate <- as.Date(df$Year)
library("ggplot2")
library("scales")
p <- ggplot(data = df, aes(x = YearDate, y = value, color = Fractile))
p <- p + geom_line()
p <- p + theme_bw()
p <- p + scale_x_date(limits = as.Date(c("1911-01-01", "2023-01-01")), labels = date_format("%Y"))
p <- p + scale_y_continuous(labels = percent)
p <- p + theme(legend.position = "none")
p <- p + geom_text(data = subset(df, Year == "2012-01-01"), aes(x = YearDate, label = Fractile, hjust = -0.2), size = 4)
p <- p + xlab("")
p <- p + ylab("")
p <- p + ggtitle("U.S. top income shares (%)")
gp <- p
gp
p <- dPlot(
value ~ Year,
groups = c("Fractile"),
data = df,
type = "line",
bounds = list(x = 50, y = 50, height = 300, width = 500)
)
p$xAxis(inputFormat = '%Y-%m-%d', outputFormat = '%Y')
p$yAxis(outputFormat = "%")
p$setTemplate(afterScript = "
<script>
myChart.axes[0].timeField = 'Year'
myChart.axes[0].timePeriod = d3.time.years
myChart.axes[0].timeInterval = 5
myChart.draw()
//if we wanted to change our line width to match the ggplot chart
myChart.series[0].shapes.style('stroke-width',1);
//to take even one step further
//we can add labels like in the ggplot example
myChart.svg.append('g')
.selectAll('text')
.data(
d3.nest().key(function(d){return d.cx}).map(myChart.series[0]._positionData)[myChart.axes[0]._max])
.enter()
.append('text')
.text(function(d){return d.aggField[0]})
.attr('x',function(d){return myChart.axes[0]._scale(d.cx)})
.attr('y',function(d){return myChart.axes[1]._scale(d.cy)})
.attr('dy','0.5em')
.style('font-size','80%')
.style('fill',function(d){return myChart._assignedColors[d.aggField[0]].fill})
</script>
")
p$defaultColors(ggplot_build(gp)$data[[2]]$colour)
p

Resources