Grid badly displayed using ggplot2 - r

I am trying to plot celestial object on the sky (basically with coordinates equivalent to latitude/longitude). I successfully plotted all my points using the "aitoff" projection of the coord_map function, but in this case, the grid is badly displayed, i.e. residual horizontal lines are still displayed for latitudes non equal to zero along with their correct projections.
How could I remove these lines?
Here is code that reproduces the behavior:
library(ggplot2)
library(mapproj)
sky2 = data.frame(RA=0, Dec=0)
skyplot2 <- qplot(RA,Dec,data=sky2,xlim=c(0,360),ylim=c(-89.999,89.999),
xlab="R.A.(°)", ylab="Decl. (°)",main="Source repartition on the sky")
skyplot2 + coord_map(projection="aitoff",orientation=c(89.999,180,0)) +
scale_y_continuous(breaks=(-2:2)*30,limits=c(-89.999,89.999)) +
scale_x_continuous(breaks=(0:8)*45,limits=c(0,360),
labels=c("","","","","","","","",""))

Definitely this is a bug in ggplot2 so could you please file this bug?
https://github.com/hadley/ggplot2/issues?state=open Filed as a bug.
Here is a quick and dirty hack.
f <- function(x, y, ...) {
if (any(is.na(x))) {
id <- rle(!is.na(x))$length
id <- rep(seq_along(id), id)
df <- data.frame(x, y, id)
df <- df[order(df$id, df$x), ]
} else if (any(is.na(y))) {
id <- rle(!is.na(y))$length
id <- rep(seq_along(id), id)
df <- data.frame(x, y, id)
}
polylineGrob(df$x, df$y, id = df$id, gp = gpar(col = "white"))
}
skyplot2 <- qplot(RA,Dec,data=sky2,xlim=c(0,360),ylim=c(-89.999,89.999),
xlab="R.A.(°)", ylab="Decl. (°)",main="Source repartition on the sky")
skyplot2 + coord_map(projection="aitoff",orientation=c(89.999,180,0)) +
scale_y_continuous(breaks=(-2:2)*30,limits=c(-89.999,89.999)) +
scale_x_continuous(breaks=(0:8)*45,limits=c(0,360),
labels=c("","","","","","","","","")) +
opts(panel.grid.major = f)
Note that this may work only with the aitoff projection.

You just need to add:
+ opts(axis.ticks = theme_blank())

Related

How to change type of line in prophet plot?

Facebook's Prophet in R (there's also a Python version) is used to generate time series forecasts.
A model m is created by:
m <- prophet(df)
future <- make_future_dataframe(m, periods = 365)
forecast <- predict(m, future)
plot(m, forecast)
Which returns a very nicely formatted graph, like:
I would like to change the line type, to get not dots but a usual thin line.
I had tried this
lines(m$history$y,lty=1)
but got an error
In doTryCatch(return(expr), name, parentenv, handler)
Are there are any suggestions how to convert those dots into a line?
The plot method for prophet objects uses ggplot2, so base R graphics functions like lines() won't work. You can use ggplot2::geom_line() to add lines, but at the moment I don't see an easy way to replace the points by lines ...
Example from ?prophet:
history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'),
y = sin(1:366/200) + rnorm(366)/10)
m <- prophet(history)
future <- make_future_dataframe(m, periods = 365)
forecast <- predict(m, future)
pp <- plot(m,forecast)
Add lines:
library(ggplot2)
pp + geom_line()
This question provides a (hacky) way forward:
pp2 <- pp + geom_line()
qq2 <- ggplot_build(pp2)
qq2$data[[2]]$colour <- NA
plot(ggplot_gtable(qq2))
But obviously something went wrong with the hack. The better bet would be to look at the plot method(prophet:::plot.prophet) and modify it to behave as you want ... Here is the bare-bones version:
df <- prophet:::df_for_plotting(m, forecast)
gg <-ggplot(df, aes(x = ds, y = y)) + labs(x = "ds", y = "y")
gg <- gg + geom_ribbon(ggplot2::aes(ymin = yhat_lower,
ymax = yhat_upper), alpha = 0.2, fill = "#0072B2",
na.rm = TRUE)
## replace first geom_point() with geom_line() in next line ...
gg <- gg + geom_line(na.rm = TRUE) + geom_line(aes(y = yhat),
color = "#0072B2", na.rm = TRUE) + theme(aspect.ratio = 3/5)
I may have stripped out some components that exist in your data/forecast, though ...
it is possible to make such manipulations with dyplot.prophet(m, forecast) (html version of plot) :) before that, we should rewrite function like here:
dyplot.prophet <- function(x, fcst, uncertainty=TRUE,
...)
{
forecast.label='Predicted'
actual.label='Actual'
# create data.frame for plotting
df <- prophet:::df_for_plotting(x, fcst)
# build variables to include, or not, the uncertainty data
if(uncertainty && exists("yhat_lower", where = df))
{
colsToKeep <- c('y', 'yhat', 'yhat_lower', 'yhat_upper')
forecastCols <- c('yhat_lower', 'yhat', 'yhat_upper')
} else
{
colsToKeep <- c('y', 'yhat')
forecastCols <- c('yhat')
}
# convert to xts for easier date handling by dygraph
dfTS <- xts::xts(df %>% dplyr::select_(.dots=colsToKeep), order.by = df$ds)
# base plot
dyBase <- dygraphs::dygraph(dfTS)
presAnnotation <- function(dygraph, x, text) {
dygraph %>%
dygraphs::dyAnnotation(x, text, text, attachAtBottom = TRUE)
}
dyBase <- dyBase %>%
# plot actual values
dygraphs::dySeries(
'y', label=actual.label, color='black',stepPlot = TRUE, strokeWidth=1
) %>%
# plot forecast and ribbon
dygraphs::dySeries(forecastCols, label=forecast.label, color='blue') %>%
# allow zooming
dygraphs::dyRangeSelector() %>%
# make unzoom button
dygraphs::dyUnzoom()
if (!is.null(x$holidays)) {
for (i in 1:nrow(x$holidays)) {
# make a gray line
dyBase <- dyBase %>% dygraphs::dyEvent(
x$holidays$ds[i],color = "rgb(200,200,200)", strokePattern = "solid")
dyBase <- dyBase %>% dygraphs::dyAnnotation(
x$holidays$ds[i], x$holidays$holiday[i], x$holidays$holiday[i],
attachAtBottom = TRUE)
}
}
return(dyBase)
}
the strokeWidth=0 was before and we have changed it to strokeWidth=1 and added stepPlot = TRUE
the whole basis code is situated here: https://rdrr.io/cran/prophet/src/R/plot.R

Removing outliers from boxplot and plotly

I am trying to create a plotly boxplot in R that doesnt show the outliers, and I found this link in the official page of plotly:
https://plot.ly/ggplot2/box-plots/#outliers
library(plotly)
set.seed(123)
df <- diamonds[sample(1:nrow(diamonds), size = 1000),]
p <- ggplot(df, aes(cut, price, fill = cut)) +
geom_boxplot(outlier.shape = NA) +
ggtitle("Ignore outliers in ggplot2")
# Need to modify the plotly object and make outlier points have opacity equal
to 0
p <- plotly_build(p)
p$data <- lapply(p$data, FUN = function(x){
x$marker = list(opacity = 0)
return(x)
})
# Create a shareable link to your chart
# Set up API credentials: https://plot.ly/r/getting-started
chart_link = plotly_POST(p, filename="geom_boxplot/outliers")
chart_link
The problem is that in their webpage and in my console, outliers are still being displayed.
Is this some kind of bug?
Seems like a typo. Maybe the example wasn't updated to account for some changes in the object structure. After calling p <- plotly_build(p), we observe that there is no p$data, but there is p$x$data. So, changing the lapply call to the following:
p$x$data <- lapply(p$x$data, FUN = function(x){
x$marker = list(opacity = 0)
return(x)
})
makes everything work as intended:

Fixing plot area width when using layout_matrix in grid.arrange

I am combining facet plots of tiles. I want each tile to be square, or at least take the same height and width.
So far I have managed to give equal height to each row of tiles using layout_matrix. I am stuck when trying to fix an equal width to each column of tiles (across the plots).
Some code based on mtcars to try and illustrate the layout of my plot (actual data way more complicated):
library("tidyverse")
library("gridExtra")
df0 <- mtcars %>%
group_by(cyl) %>%
count()
df1 <- mtcars %>%
rownames_to_column("car") %>%
mutate(man = gsub("([A-Za-z]+).*", "\\1", car))
g <- list()
for(i in 1:nrow(df0)){
g[[i]] <- ggplot(data = df1 %>% filter(cyl == df0$cyl[i]),
mapping = aes(x = "", y = car, fill = qsec)) +
geom_tile() +
facet_grid( man ~ ., scales = "free_y", space = "free") +
labs(x = "", y = "") +
guides(fill = FALSE) +
theme(strip.text.y = element_text(angle=0)) +
coord_fixed()
}
m0 <- cbind(c(rep(1, df0$n[1]), rep(NA, max(df0$n) - df0$n[1])),
c(rep(2, df0$n[2]), rep(NA, max(df0$n) - df0$n[2])),
c(rep(3, df0$n[3]), rep(NA, max(df0$n) - df0$n[3])))
grid.arrange(grobs = g, layout_matrix = m0)
Which produces this plot (minus my MS Paint skills):
Presumably the different lengths of the labels in the strip text and y axis lead to the different widths for the plotting area. Not sure how I can avoid this behavior though? I thought I could create on big facet_grid but I could not get anywhere near the layout of the plot above.
Turns out this is a rather tricky thing to do. Luckily, cowplot::plot_grid can already do the alignment that results in equal sizes of the columns. I just took that function and removed the fluff, and decoupled the heights from the grid pattern it normally uses. We end up with a little custom function that does the job (all credits to Claus Wilke):
plot_grid_gjabel <- function(plots, heights) {
grobs <- lapply(plots, function(x) {
if (!is.null(x))
cowplot:::ggplot_to_gtable(x)
else NULL
})
num_plots <- length(plots)
num_widths <- unique(lapply(grobs, function(x) {
length(x$widths)
}))
num_widths[num_widths == 0] <- NULL
max_widths <- do.call(grid::unit.pmax,
lapply(grobs, function(x) { x$widths }))
for (i in 1:num_plots) {
grobs[[i]]$widths <- max_widths
}
width <- 1 / num_plots
height <- heights / max(heights)
x <- cumsum(width[rep(1, num_plots)]) - width
p <- cowplot::ggdraw()
for (i in seq_along(plots)) {
p <- p + cowplot::draw_grob(grid::grobTree(grobs[[i]]), x[i], 1 - height[i],
width, height[i])
}
return(p)
}
We can simply call this like so:
plot_grid_gjabel(g, df0$n)
Resulting in:

R: Changing order of guide/legend from sorting strings to sorting numeric in ggplot2

I am trying to create an isoline plot with ggplot2. I successfully filled the tiles of the geom_tile() function in groups using the cut() function for breaks. Using this approach, the breaks will be interpreted as strings though, not as values, giving me trouble in the order the guide is set up. This way, e.g. a value of 20 will be interpreted as a bigger number than 180, resulting in fatally misleading colors in the plot, such as this purposely ugly one:
To create this ugly image, I used the following code:
plot.isoline <- function(data, dbr=20, n_interp=200){
library(akima)
library(ggplot2)
library(fields)
data <- read.table(file = "http://s000.tinyupload.com/download.php?file_id=90687695741432763217&t=9068769574143276321730276", header = T)
dint <- interp(x = data$x, y = data$y, z = data$z,
xo = seq(min(data$x), max(data$x), length = n_interp),
yo = seq(min(data$y), max(data$y), length = n_interp))
dat_interp <- data.frame(expand.grid(x=dint$x,y=dint$y), z=c(dint$z))
br <- seq(floor(min(dat_interp$z)/dbr)*dbr, ceiling(max(dat_interp$z)/dbr)*dbr, dbr)
breaks <- cut(dat_interp$z, breaks = br)
breaks <- gsub(","," - ",breaks,fixed=TRUE)
dat_interp$breaks <- sapply(breaks, function(x){strsplit(x, "[( ]")[[1]][[2]]})
ret <- ggplot(dat_interp) +
aes(x=x, y=y, z=z) +
geom_tile(aes(fill=breaks)) +
scale_fill_manual("TEMP", values=tim.colors(length(br)), guide = guide_legend(reverse=TRUE)) +
stat_contour(breaks=br) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
return(ret)
}
I already tried setting the break strings to values the following way:
dat_interp$breaks <- as.numeric(sapply(breaks, function(x){strsplit(x, "[( ]")[[1]][[2]]}))
This approach only leads to an error "Error: Continuous value supplied to discrete scale". I am sure there has to be a way to order the scale in a numeric way, and I'd be really glad if someone could help me out on this one.
ggplot2 will sort character values as strings, and factor values by their level. In your code,
breaks <- gsub(","," - ",breaks,fixed=TRUE)
converts the factor returned by cut to a character. Try omitting it.
If you need to change the display, use something like
levels(breaks) <- gsub(",", " - ", levels(breaks), fixed=TRUE)
With the helpful input of #krlmlr I managed to solve the problem. As he said, the crucial part was to just edit the levels(). For anyone interested, this is the important part:
breaks <- cut(dat_interp$z, breaks = br)
levels(breaks) <- gsub(","," - ", levels(breaks),fixed=TRUE)
dat_interp$breaks <- breaks
levels(dat_interp$breaks) <- as.numeric(sapply(levels(breaks), function(x){strsplit(x, "[( ]")[[1]][[2]]}))
This way I get the following result, properly sorted:
And the full code:
plot.isoline <- function(data, dbr=20, n_interp=200){
library(akima)
library(ggplot2)
library(fields)
data <- read.table(file = "http://s000.tinyupload.com/download.php?file_id=90687695741432763217&t=9068769574143276321730276", header = T)
dint <- interp(x = data$x, y = data$y, z = data$z,
xo = seq(min(data$x), max(data$x), length = n_interp),
yo = seq(min(data$y), max(data$y), length = n_interp))
dat_interp <- data.frame(expand.grid(x=dint$x,y=dint$y), z=c(dint$z))
br <- seq(floor(min(dat_interp$z)/dbr)*dbr, ceiling(max(dat_interp$z)/dbr)*dbr, dbr)
breaks <- cut(dat_interp$z, breaks = br)
levels(breaks) <- gsub(","," - ", levels(breaks),fixed=TRUE)
dat_interp$breaks <- breaks
levels(dat_interp$breaks) <- as.numeric(sapply(levels(breaks), function(x){strsplit(x, "[( ]")[[1]][[2]]}))
ret <- ggplot(dat_interp) +
aes(x=x, y=y, z=z) +
geom_tile(aes(fill=breaks)) +
scale_fill_manual("TEMP", values=tim.colors(length(br)), guide = guide_legend(reverse=TRUE)) +
stat_contour(breaks=br) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
return(ret)
}

How to produce a meaningful draftsman/correlation plot for discrete values

One of my favorite tools for exploratory analysis is pairs(), however in the case of a limited number of discrete values, it falls flat as the dots all align perfectly. Consider the following:
y <- t(rmultinom(n=1000,size=4,prob=rep(.25,4)))
pairs(y)
It doesn't really give a good sense of correlation. Is there an alternative plot style that would?
If you change y to a data.frame you can add some 'jitter' and with the col option you can set the transparency level (the 4th number in rgb):
y <- data.frame(y)
pairs(sapply(y,jitter), col = rgb(0,0,0,.2))
Or you could use ggplot2's plotmatrix:
library(ggplot2)
plotmatrix(y) + geom_jitter(alpha = .2)
Edit: Since plotmatrix in ggplot2 is deprecated use ggpairs (GGally package mentioned in #hadley's comment above)
library(GGally)
ggpairs(y, lower = list(params = c(alpha = .2, position = "jitter")))
Here is an example using corrplot:
M <- cor(y)
corrplot.mixed(M)
You can find more examples in the intro
http://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
Here are a couple of options using ggplot2:
library(ggplot2)
## re-arrange data (copied from plotmatrix function)
prep.plot <- function(data) {
grid <- expand.grid(x = 1:ncol(data), y = 1:ncol(data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
xcol <- grid[i, "x"]
ycol <- grid[i, "y"]
data.frame(xvar = names(data)[ycol], yvar = names(data)[xcol],
x = data[, xcol], y = data[, ycol], data)
}))
all$xvar <- factor(all$xvar, levels = names(data))
all$yvar <- factor(all$yvar, levels = names(data))
return(all)
}
dat <- prep.plot(data.frame(y))
## plot with transparent jittered points
ggplot(dat, aes(x = x, y=y)) +
geom_jitter(alpha=.125) +
facet_grid(xvar ~ yvar) +
theme_bw()
## plot with color representing density
ggplot(dat, aes(x = factor(x), y=factor(y))) +
geom_bin2d() +
facet_grid(xvar ~ yvar) +
theme_bw()
I don't have enough credits yet to comment on #Vincent 's post - when doing
library(GGally)
ggpairs(y, lower = list(params = c(alpha = .2, position = "jitter")))
I get
Error in stop_if_params_exist(obj$params) :
'params' is a deprecated argument. Please 'wrap' the function to supply arguments. help("wrap", package = "GGally")
So it seems, based on the indicated help page, that it would need to be in this case here:
ydf <- as.data.frame(y)
regularPlot <- ggpairs(ydf, lower = list(continuous = wrap(ggally_points, alpha = .2, position = "jitter")))
regularPlot

Resources