Using R ggridges (R-joyplot) for bar charts - r

Is it possible to use the ggridges package to draw sets of bars instead of ridgelines, similar to geom_col()?
I have data such as:
dt = tibble(
hr = c(1,2,3,4,1,2,3,4),
fr = c(.1,.5,.9,.1,.4,.9,.9,.4),
gr = c('Mon','Mon','Mon','Mon','Sun','Sun','Sun','Sun')
)
The plot below gives me:
ggplot(dt, aes(x=hr, y=gr, height=fr)) +
geom_ridgeline() + ylab(NULL)
As you can see it draws a line connecting the values. What I am looking for instead are individual columns, as in this plot:
ggplot(dt, aes(x=hr, y=fr)) +
geom_col() + ylab(NULL) +
facet_wrap(~gr)

Here is a solution tracing out the individual bars.
library(tidyverse)
library(ggridges)
dt = tibble(
hr = c(1,2,3,4,1,2,3,4),
fr = c(.1,.5,.9,.1,.4,.9,.9,.4),
gr = c('Mon','Mon','Mon','Mon','Sun','Sun','Sun','Sun')
)
# function that turns an x, y pair into the shape of a bar of given width
make_bar <- function(x, y, width = 0.9) {
xoff <- width/2
data.frame(x = c(x-xoff*(1+2e-8), x-xoff*(1+1e-8), x-xoff, x+xoff, x+xoff*(1+1e-8), x+xoff*(1+2e-8)),
height = c(NA, 0, y, y, 0, NA))
}
# convert data table using make_bar function
dt %>%
mutate(bars = map2(hr, fr, ~make_bar(.x, .y))) %>%
unnest() -> dt_bars
ggplot(dt_bars, aes(x=x, y=gr, height=height)) +
geom_ridgeline() + ylab(NULL)

Related

R vertical ridgeline plot, error on width and stat parameters

How can I reformat this ridgeline plot so that is a vertical ridgeline plot?
My real dataset is the actual PDF. For a minimum reproducible example, I generate distributions and extract the PDFs to use in a dummy function. The dataframe has a model name (for grouping), x values paired with PDF ordinates, and an id field that separates the different ridgeline levels (i.e., ridgeline y axis).
set.seed(123)
makedfs <- function(name, id, mu, sig) {
vals <- exp(rnorm(1000, mean=mu, sd=sig))
pdf <-density(vals)
model <- rep(name, length(pdf$x))
prox <- rep(id, length(pdf$x))
df <- data.frame(model, prox, pdf$x, pdf$y)
colnames(df) <- c("name", "id", "x", "pdf")
return(df)
}
df1 <- makedfs("model1", 0, log(1), 1)
df2 <- makedfs("model2", 0, log(0.5), 2)
df3 <- makedfs("model1", 1, log(0.2), 0.8)
df4 <- makedfs("model2", 1, log(1), 1)
df <- rbind(df1, df2, df3, df4)
From this answer, R Ridgeline plot with multiple PDFs can be overlayed at same level, I have a standard joyplot:
ggplot(df, aes(x=x, y=id, height = pdf, group = interaction(name, id), fill = name)) +
geom_ridgeline(alpha = 0.5, scale = .5) +
scale_y_continuous(limits = c(0, 5)) +
scale_x_continuous(limits = c(-6, 6))
I am trying the code below based on https://wilkelab.org/ggridges/reference/geom_vridgeline.html but it throws an error on the width parameter.
p <- ggplot(df, aes(x=id, y=x, width = ..density.., fill=id)) +
geom_vridgeline(stat="identity", trim=FALSE, alpha = 0.85, scale = 2)
Error in `f()`:
! Aesthetics must be valid computed stats. Problematic aesthetic(s): width = ..density...
Did you map your stat in the wrong layer?
If you wanted the same graph, just vertically oriented, you need to use the same parameters when you use geom_vridgeline.
I swapped the limits you originally set so you can see that it's the same.
ggplot(df, aes(x = id, y = x, width = pdf, fill = name,
group = interaction(name, id))) +
geom_vridgeline(alpha = 0.85, scale = .5) +
scale_x_continuous(limits = c(0, 5)) + # <-- note that the x & y switched
scale_y_continuous(limits = c(-6, 6))

ggplot: Drawing tiles / rectangles with discrete variables

I'm attempting to draw tiles / rectangles to get the following result:
library(tidyverse)
library(plotly)
set.seed(0)
df <- tibble(
a = runif(5),
b = runif(5),
c = runif(5),
d = runif(5),
case_id = 1:5
) %>% tidyr::pivot_longer(cols = -case_id)
plot <- ggplot2::ggplot(
data = df,
mapping = aes(
x = name,
y = value,
group = case_id
)
) + geom_point()
plot_boxes_y <- seq(from = 0, to = 1, by = .2)
plot_boxes_x <- unique(df$name) %>% length()
for (x in 1:plot_boxes_x) {
for (y in plot_boxes_y) {
plot <- plot + geom_rect(
mapping = aes_(
xmin = x - .5,
xmax = x + .5,
ymin = y - .5,
ymax = y + .5
),
color = "red",
fill = NA
)
}
}
plotly::ggplotly(plot)
As you can see, I currently do this by looping through coordinates and drawing each rectangle individually. The problem is, that this generates many layers which makes plotly::ggplotly() really slow on large datasets.
Therefore, I'm looking for a more efficient way. Please note, that I cannot use the panel.grid, since I intend to visualize z-data by filling rectangles later on.
My approach was to draw geom_tile() on top of the scatter plot:
# my attempt
df$z <- rep(0, nrow(df))
plot2 <- ggplot2::ggplot(
data = df,
mapping = aes(
x = name,
y = value,
color = z,
group = case_id
)
) + geom_point() + geom_tile()
I assume that this fails because of the fact that name is a discrete variable? So, how can i efficiently draw tiles in addition to my scatterplot?
Thanks
Here is a solution using the geom_tile option. The key here creating a data frame to hold the coordinates of the grid and then specifying the aesthetics individually in each of the function calls.
library(ggplot2)
library(tidyr)
set.seed(0)
df <- tibble(
a = runif(5),
b = runif(5),
c = runif(5),
d = runif(5),
case_id = 1:5
) %>% pivot_longer(cols = -case_id)
df$z <- rep(0, nrow(df))
#make data frame for the grid corrdinates
grid<-data.frame(x=factor( ordered( 1:4), labels = c("a", "b", "c", "d" )),
y=rep(seq(0, 1, .1), each=4))
#plot using geom_tile & geom_point
plot2 <- ggplot2::ggplot() + geom_tile(data=grid, aes(x=x, y=y), fill=NA, col="red") +
geom_point(data = df,
mapping = aes(
x = name,
y = value,
color = z,
group = case_id))
print(plot2)
if you don't mind them going beyond the axis
ggplot(df,aes(x=name,y=value)) + geom_point() +
geom_vline(xintercept=seq(0.5,4.5,by=1)) +
geom_hline(yintercept=seq(0,2,by=.2))
else:
#make a new data frame
GRIDS = rbind(
# the vertical lines
data.frame(x=seq(0.5,4.5,by=1),xend=seq(0.5,4.5,by=1),y=0,yend=2),
# the horizontal lines
data.frame(x=0.5,xend=4.5,y=seq(0,2,by=.2),yend=seq(0,2,by=.2))
)
ggplot(df,aes(x=name,y=value)) + geom_point() +
geom_segment(data=GRIDS,aes(x=x,y=y,xend=xend,yend=yend),col="red")

How can I add annotation in ggplotly animation?

I am creating animated plotly graph for my assignment in r, where I am comparing several models with various number of observations. I would like to add annotation showing what is the RMSE of the current model - this means I would like to have text that changes together with slider. Is there any easy way how to do that?
Here is my dataset stored on GitHub. There already is created variable with RMSE: data
The base ggplot graphic is as follows:
library(tidyverse)
library(plotly)
p <- ggplot(values_predictions, aes(x = x)) +
geom_line(aes(y = preds_BLR, frame = n, colour = "BLR")) +
geom_line(aes(y = preds_RLS, frame = n, colour = "RLS")) +
geom_point(aes(x = x, y = target, frame = n, colour = "target"), alpha = 0.3) +
geom_line(aes(x = x, y = sin(2 * pi * x), colour = "sin(2*pi*x)"), alpha = 0.3) +
ggtitle("Comparison of performance) +
labs(y = "predictions and targets", colour = "colours")
This is converted to plotly, and I have added an animation to the Plotly graph:
plot <- ggplotly(p) %>%
animation_opts(easing = "linear",redraw = FALSE)
plot
Thanks!
You can add annotations to a ggplot graph using the annotate function: http://ggplot2.tidyverse.org/reference/annotate.html
df <- data.frame(x = rnorm(100, mean = 10), y = rnorm(100, mean = 10))
# Build model
fit <- lm(x ~ y, data = df)
# function finds RMSE
RMSE <- function(error) { sqrt(mean(error^2)) }
library(ggplot2)
ggplot(df, aes(x, y)) +
geom_point() +
annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 2,
label = paste("RMSE", RMSE(fit$residuals)) )
There seems to be a bit of a problem converting between ggplot and plotly. However this workaround here shows a workaround which can be used:
ggplotly(plot) %>%
layout(annotations = list(x = 12, y = 13, text = paste("RMSE",
RMSE(fit$residuals)), showarrow = F))
Here's an example of adding data dependent text using the built in iris dataset with correlation as text to ggplotly.
library(plotly)
library(ggplot2)
library(dplyr)
mydata = iris %>% rename(variable1=Sepal.Length, variable2= Sepal.Width)
shift_right = 0.1 # number from 0-1 where higher = more right
shift_down = 0.02 # number from 0-1 where higher = more down
p = ggplot(mydata, aes(variable1,variable2))+
annotate(geom = "text",
label = paste0("Cor = ",as.character(round(cor.test(mydata$variable1,mydata$variable2)$estimate,2))),
x = min(mydata$variable1)+abs(shift_right*(min(mydata$variable1)-max(mydata$variable1))),
y = max(mydata$variable2)-abs(shift_down*(min(mydata$variable2)-max(mydata$variable2))), size=4)+
geom_point()
ggplotly(p) %>% style(hoverinfo = "none", traces = 1) # remove hover on text

Set axis for multiple plots in ggplot

I'm working on a population pyramide that should be saved as a gif. Kind of like in this tutorial of Flowing Data, but with ggplot instead of plotrix.
My workflow:
1) Create a population pyramide
2) Create multiple pyramide-plots in a for-loop
for (i in unique(d$jahr)) {
d_jahr <- d %>%
filter(jahr == i)
p <- ggplot(data = d_jahr, aes(x = anzahl, y = value, fill = art)) +
geom_bar(data = filter(d_jahr, art == "w"), stat = "identity") +
geom_bar(data = filter(d_jahr, art == "m"), stat = "identity") +
coord_flip() +
labs(title = paste(i), x = NULL, y = NULL)
ggsave(p,filename=paste("img/",i,".png",sep=""))
}
3) Save the plots as gif with the animation package
My problem:
All years have different values, so the x-axis have different ranges. This results in weird looks in a gif, because the center of the plots jumps to the right, to the left, to the right...
Is it possible to fix the x-axis (in this case y-axis, because of coord-flip()) over multiple plots that are created independently?
You can fix the range of an axis by setting the limits parameter:
library(ggplot2)
lst <- list(
data.frame(x = 1:100, y=runif(100, 0, 10)),
data.frame(x = 1:100, y=runif(100, 0, 100))
)
ylim <- range(do.call(c, lapply(lst, "[[", "y")))
for (x in seq(lst)) {
print(ggplot(lst[[x]], aes(x, y)) + geom_point() + scale_y_continuous(limits=ylim))
}
or by adding +ylim(ylim) instead of +scale_y_continuous(limits=ylim) (via #DeveauP).

R: interactive plots (tooltips): rCharts dimple plot: formatting axis

I have some charts created with ggplot2 which I would like to embed in a web application: I'd like to enhance the plots with tooltips. I've looked into several options. I'm currently experimenting with the rCharts library and, among others, dimple plots.
Here is the original ggplot:
Here is a first attempt to transpose this to a dimple plot:
I have several issues:
after formatting the y-axis with percentages, the data is altered.
after formatting the x-axis to correctly render dates, too many labels are printed.
I am not tied to dimple charts, so if there are other options that allow for an easier way to tweak axis formats I'd be happy to know. (the Morris charts look nice too, but tweaking them looks even harder, no?)
Objective: Fix the axes and add tooltips that give both the date (in the format 1984) and the value (in the format 40%).
If I can fix 1 and 2, I'd be very happy. But here is another, less important question, in case someone has suggestions:
Could I add the line labels ("Top 10%") to the tooltips when hovering over the lines?
After downloading the data from: https://gist.github.com/ptoche/872a77b5363356ff5399,
a data frame is created:
df <- read.csv("ps-income-shares.csv")
The basic dimple plot is created with:
library("rCharts")
p <- dPlot(
value ~ Year,
groups = c("Fractile"),
data = transform(df, Year = as.character(format(as.Date(Year), "%Y"))),
type = "line",
bounds = list(x = 50, y = 50, height = 300, width = 500)
)
While basic, so far so good. However, the following command, intended to convert the y-data to percentages, alters the data:
p$yAxis(type = "addMeasureAxis", showPercent = TRUE)
What am I doing wrong with showPercent?
For reference, here is the ggplot code:
library("ggplot2")
library("scales")
p <- ggplot(data = df, aes(x = Year, y = value, color = Fractile))
p <- p + geom_line()
p <- p + theme_bw()
p <- p + scale_x_date(limits = as.Date(c("1911-01-01", "2023-01-01")), labels = date_format("%Y"))
p <- p + scale_y_continuous(labels = percent)
p <- p + theme(legend.position = "none")
p <- p + geom_text(data = subset(df, Year == "2012-01-01"), aes(x = Year, label = Fractile, hjust = -0.2), size = 4)
p <- p + xlab("")
p <- p + ylab("")
p <- p + ggtitle("U.S. top income shares (%)")
p
For information, the chart above is based on the data put together by Thomas Piketty and Emmanuel Saez in their study of U.S. top incomes. The data and more may be found on their website, e.g.
http://elsa.berkeley.edu/users/saez/
http://piketty.pse.ens.fr/en/
EDIT:
Here is a screenshot of Ramnath's solution, with a title added and axis labels tweaked. Thanks Ramnath!
p$xAxis(inputFormat = '%Y-%m-%d', outputFormat = '%Y')
p$yAxis(outputFormat = "%")
p$setTemplate(afterScript = "
<script>
myChart.axes[0].timeField = 'Year'
myChart.axes[0].timePeriod = d3.time.years
myChart.axes[0].timeInterval = 10
myChart.draw()
myChart.axes[0].titleShape.remove() // remove x label
myChart.axes[1].titleShape.remove() // remove y label
myChart.svg.append('text') // chart title
.attr('x', 40)
.attr('y', 20)
.text('U.S. top income shares (%)')
.style('text-anchor','beginning')
.style('font-size', '100%')
.style('font-family','sans-serif')
</script>
")
p
To change (rather than remove) axis labels, for instance:
myChart.axes[1].titleShape.text('Year')
To add a legend to the plot:
p$set(width = 1000, height = 600)
p$legend(
x = 580,
y = 0,
width = 50,
height = 200,
horizontalAlign = "left"
)
To save the rchart:
p$save("ps-us-top-income-shares.html", cdn = TRUE)
An alternative based on the nvd3 library can be obtained (without any of the fancy stuff) with:
df$Year <- strftime(df$Year, format = "%Y")
n <- nPlot(data = df, value ~ Year, group = 'Fractile', type = 'lineChart')
Here is one way to solve (1) and (2). The argument showPercent is not to add % to the values, but to recompute the values so that they stack up to 100% which is why you are seeing the behavior you pointed out.
At this point, you will see that we are still having to write custom javascript to tweak the x-axis to get it to display the way we want it to. In future iterations, we will strive to allow the entire dimple API to be accessible within rCharts.
df <- read.csv("ps-income-shares.csv")
p <- dPlot(
value ~ Year,
groups = c("Fractile"),
data = df,
type = "line",
bounds = list(x = 50, y = 50, height = 300, width = 500)
)
p$xAxis(inputFormat = '%Y-%m-%d', outputFormat = '%Y')
p$yAxis(outputFormat = "%")
p$setTemplate(afterScript = "
<script>
myChart.axes[0].timeField = 'Year'
myChart.axes[0].timePeriod = d3.time.years
myChart.axes[0].timeInterval = 5
myChart.draw()
//if we wanted to change our line width to match the ggplot chart
myChart.series[0].shapes.style('stroke-width',1);
</script>
")
p
rCharts is rapidly evolving. I know it is late, but in case someone else would like to see it, here is an almost complete replication of the ggplot sample shown.
#For information, the chart above is based
#on the data put together by Thomas Piketty and Emmanuel Saez
#in their study of U.S. top incomes.
#The data and more may be found on their website, e.g.
#http://elsa.berkeley.edu/users/saez/
#http://piketty.pse.ens.fr/en/
#read in the data
df <- read.csv(
"https://gist.githubusercontent.com/ptoche/872a77b5363356ff5399/raw/ac86ca43931baa7cd2e17719025c8cde1c278fc1/ps-income-shares.csv",
stringsAsFactors = F
)
#get year as date
df$YearDate <- as.Date(df$Year)
library("ggplot2")
library("scales")
p <- ggplot(data = df, aes(x = YearDate, y = value, color = Fractile))
p <- p + geom_line()
p <- p + theme_bw()
p <- p + scale_x_date(limits = as.Date(c("1911-01-01", "2023-01-01")), labels = date_format("%Y"))
p <- p + scale_y_continuous(labels = percent)
p <- p + theme(legend.position = "none")
p <- p + geom_text(data = subset(df, Year == "2012-01-01"), aes(x = YearDate, label = Fractile, hjust = -0.2), size = 4)
p <- p + xlab("")
p <- p + ylab("")
p <- p + ggtitle("U.S. top income shares (%)")
gp <- p
gp
p <- dPlot(
value ~ Year,
groups = c("Fractile"),
data = df,
type = "line",
bounds = list(x = 50, y = 50, height = 300, width = 500)
)
p$xAxis(inputFormat = '%Y-%m-%d', outputFormat = '%Y')
p$yAxis(outputFormat = "%")
p$setTemplate(afterScript = "
<script>
myChart.axes[0].timeField = 'Year'
myChart.axes[0].timePeriod = d3.time.years
myChart.axes[0].timeInterval = 5
myChart.draw()
//if we wanted to change our line width to match the ggplot chart
myChart.series[0].shapes.style('stroke-width',1);
//to take even one step further
//we can add labels like in the ggplot example
myChart.svg.append('g')
.selectAll('text')
.data(
d3.nest().key(function(d){return d.cx}).map(myChart.series[0]._positionData)[myChart.axes[0]._max])
.enter()
.append('text')
.text(function(d){return d.aggField[0]})
.attr('x',function(d){return myChart.axes[0]._scale(d.cx)})
.attr('y',function(d){return myChart.axes[1]._scale(d.cy)})
.attr('dy','0.5em')
.style('font-size','80%')
.style('fill',function(d){return myChart._assignedColors[d.aggField[0]].fill})
</script>
")
p$defaultColors(ggplot_build(gp)$data[[2]]$colour)
p

Resources