position_stack() changes data when used with geom_line() in ggplot2 - r

I would like to stack several geom_line() plots one above the other. However they appeared with changed data.
Here is an example:
# make 3 data.frame with some random data
x <- seq(5, 15, length = 1000)
data1 <- data.frame(x = x, y = dnorm(x, mean = 10, sd = 3), sample = "1")
data2 <- data.frame(x = x, y = dnorm(x, mean = 7.5, sd = 3), sample = "2")
data3 <- data.frame(x = x, y = dnorm(x, mean = 12.5, sd = 1), sample = "3")
# bind data
data <- bind_rows(data1, data2, data3)
# plot data without stacking
plot.data <- data %>% ggplot(mapping = aes(x = x, y = y, color = sample)) + geom_line()
# plot data with stacking
plot.data <- data %>% ggplot(mapping = aes(x = x, y = y, color = sample)) + geom_line(position = position_stack(vjust = 1, reverse = T))
The plot without stacking looks like this:
The plot with stacking looks like this:
So it seems that position_stack sums the data, not shifts them to some constnant value, which is not expected behaviour for geom_line in my opinion. Could you suggest how to make the plots to be just shifted one above the other?

Related

ggplot: Drawing tiles / rectangles with discrete variables

I'm attempting to draw tiles / rectangles to get the following result:
library(tidyverse)
library(plotly)
set.seed(0)
df <- tibble(
a = runif(5),
b = runif(5),
c = runif(5),
d = runif(5),
case_id = 1:5
) %>% tidyr::pivot_longer(cols = -case_id)
plot <- ggplot2::ggplot(
data = df,
mapping = aes(
x = name,
y = value,
group = case_id
)
) + geom_point()
plot_boxes_y <- seq(from = 0, to = 1, by = .2)
plot_boxes_x <- unique(df$name) %>% length()
for (x in 1:plot_boxes_x) {
for (y in plot_boxes_y) {
plot <- plot + geom_rect(
mapping = aes_(
xmin = x - .5,
xmax = x + .5,
ymin = y - .5,
ymax = y + .5
),
color = "red",
fill = NA
)
}
}
plotly::ggplotly(plot)
As you can see, I currently do this by looping through coordinates and drawing each rectangle individually. The problem is, that this generates many layers which makes plotly::ggplotly() really slow on large datasets.
Therefore, I'm looking for a more efficient way. Please note, that I cannot use the panel.grid, since I intend to visualize z-data by filling rectangles later on.
My approach was to draw geom_tile() on top of the scatter plot:
# my attempt
df$z <- rep(0, nrow(df))
plot2 <- ggplot2::ggplot(
data = df,
mapping = aes(
x = name,
y = value,
color = z,
group = case_id
)
) + geom_point() + geom_tile()
I assume that this fails because of the fact that name is a discrete variable? So, how can i efficiently draw tiles in addition to my scatterplot?
Thanks
Here is a solution using the geom_tile option. The key here creating a data frame to hold the coordinates of the grid and then specifying the aesthetics individually in each of the function calls.
library(ggplot2)
library(tidyr)
set.seed(0)
df <- tibble(
a = runif(5),
b = runif(5),
c = runif(5),
d = runif(5),
case_id = 1:5
) %>% pivot_longer(cols = -case_id)
df$z <- rep(0, nrow(df))
#make data frame for the grid corrdinates
grid<-data.frame(x=factor( ordered( 1:4), labels = c("a", "b", "c", "d" )),
y=rep(seq(0, 1, .1), each=4))
#plot using geom_tile & geom_point
plot2 <- ggplot2::ggplot() + geom_tile(data=grid, aes(x=x, y=y), fill=NA, col="red") +
geom_point(data = df,
mapping = aes(
x = name,
y = value,
color = z,
group = case_id))
print(plot2)
if you don't mind them going beyond the axis
ggplot(df,aes(x=name,y=value)) + geom_point() +
geom_vline(xintercept=seq(0.5,4.5,by=1)) +
geom_hline(yintercept=seq(0,2,by=.2))
else:
#make a new data frame
GRIDS = rbind(
# the vertical lines
data.frame(x=seq(0.5,4.5,by=1),xend=seq(0.5,4.5,by=1),y=0,yend=2),
# the horizontal lines
data.frame(x=0.5,xend=4.5,y=seq(0,2,by=.2),yend=seq(0,2,by=.2))
)
ggplot(df,aes(x=name,y=value)) + geom_point() +
geom_segment(data=GRIDS,aes(x=x,y=y,xend=xend,yend=yend),col="red")

How to control legend with many groups

I have a plot like this:
Which was created with this code:
# Make data:
set.seed(42)
n <- 1000
df <- data.frame(values = sample(0:5, size = n, replace = T, prob = c(9/10, rep(0.0167,5))),
group = rep(1:100, each = 10),
fill2 = rep(rnorm(10), each = 100),
year = rep(2001:2010, times = 100)
)
df$values <- ifelse(df$year %in% 2001:2007 == T, 0, df$values)
# Plot
require(ggplot2)
p <- ggplot(data = df, aes(x = year, y = values, colour = as.factor(group))) + geom_line()
p
Since there are so many groups, the legend is really not helpfull.
Ideally I would like just two elements in the legend, one for group = 1 and for all the other groups (they should all have the same color). Is there a way to force this?
you can define a new variable that has only two values, but still plot lines according to their original group,
ggplot(data = df, aes(x = year, y = values, group = group,
colour = ifelse(group == 1, "1", "!1"))) +
geom_line() +
scale_colour_brewer("groups", palette="Set1")

Mix color and fill aesthetics in ggplot

I wonder if there is the possibility to change the fill main colour according to a categorical variable
Here is a reproducible example
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = c(rep('a', times = 10),
rep('b', times = 10)),
val = rep(1:10, times = 2))
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(color = grp,
fill = val,
size = val))
Of course it is easy to change the circle colour/shape, according to the variable grp, but I'd like to have the a group in shades of red and the b group in shades of blue.
I also thought about using facets, but don't know if the fill gradient can be changed for the two panels.
Anyone knows if that can be done, without gridExtra?
Thanks!
I think there are two ways to do this. The first is using the alpha aesthetic for your val column. This is a quick and easy way to accomplish your goal but may not be exactly what you want:
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(alpha=val,
fill = grp,
size = val)) + theme_minimal()
The second way would be to do something similar to this post: Vary the color gradient on a scatter plot created with ggplot2. I edited the code slightly so its not a range from white to your color of interest but from a lighter color to a darker color. This requires a little bit of work and using the scale_fill_identity function which basically takes a variable that has the colors you want and maps them directly to each point (so it doesn't do any scaling).
This code is:
#Rescale val to [0,1]
df$scaled_val <- rescale(df$val)
low_cols <- c("firebrick1","deepskyblue")
high_cols <- c("darkred","deepskyblue4")
df$col <- ddply(df, .(grp), function(x)
data.frame(col=apply(colorRamp(c(low_cols[as.numeric(x$grp)[1]], high_cols[as.numeric(x$grp)[1]]))(x$scaled_val),
1,function(x)rgb(x[1],x[2],x[3], max=255)))
)$col
df
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(
fill = col,
size = val)) + theme_minimal() +scale_fill_identity()
Thanks to this other post I found a way to visualize the fill bar in the legend, even though that wasn't what I meant to do.
Here's the ouptup
And the code
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = factor(c(rep('a', times = 10),
rep('b', times = 10)),
levels = c('a', 'b')),
val = rep(1:10, times = 2)) %>%
group_by(grp) %>%
mutate(scaledVal = rescale(val)) %>%
ungroup %>%
mutate(scaledValOffSet = scaledVal + 100*(as.integer(grp) - 1))
scalerange <- range(df$scaledVal)
gradientends <- scalerange + rep(c(0,100,200), each=2)
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(fill = scaledValOffSet,
size = val)) +
scale_fill_gradientn(colours = c('white',
'darkred',
'white',
'deepskyblue4'),
values = rescale(gradientends))
Basically one should rescale fill values (e.g. between 0 and 1) and separate them using another order of magnitude, provided by the categorical variable grp.
This is not what I wanted though: the snippet can be improved, of course, to make the whole thing less manual, but still lacks the simple usual discrete fill legend.

control horizontal spacing between geom_bar in ggplot2

I have the following code:
library("ggplot2")
set.seed(12351234)
names <- factor(rep(paste("C", 1:10, sep = "_"), each = 10))
time <- rep(1:10, 10)
outcome <- rnorm(mean = 1e7, sd = 1e7, n = length(time))
outcome <-ifelse(outcome < 0, 0, outcome)
data.toy <- data.frame(names, time, outcome)
ggplot(data = data.toy, aes(y = outcome, x = time)) + geom_bar(stat = "identity", aes(fill = names)) + scale_x_continuous(breaks = unique(data.toy$time))
and it produces the following image: http://picpaste.com/data_toy-OR0jVHj5.png
I am wondering if there is a way to remove the horizontal "gray" space between the bars on the x-axis (the space that the arrows are pointing at). I suspect I am using this geom incorrectly as time is not categorical and there is a more appropriate geom for this.

insert labels in proportional bar chart with ggplot2 and geom_text

I am trying to insert labels into a proportional barchart: one label per segment, with as text the percentage of each segment. With the help of thothal I managed to do this:
var1 <- factor(as.character(c(1,1,2,3,1,4,3,2,3,2,1,4,2,3,2,1,4,3,1,2)))
var2 <- factor(as.character(c(1,4,2,3,4,2,1,2,3,4,2,1,1,3,2,1,2,4,3,2)))
data <- data.frame(var1, var2)
dat <- ddply(data, .(var1), function(.) {
res <- cumsum(prop.table(table(factor(.$var2))))
data.frame(lab = names(res), y = c(res))
})
ggplot(data, aes(x = var1)) + geom_bar(aes(fill = var2), position = 'fill') +
geom_text(aes(label = lab, x = var1, y = y), data = dat)
I would like to have for labels the percentage of each level, and not the level name.
Any help appreciated!
You are telling geom_text to use var2 as your y variable. That is in fact as.numeric(data$var2), which translates to a range of 1-4. However, your barplot uses the cumulative percentages.
Hence you have to calculate these positions before:
library(ggplot2)
library(plyr) # just for convenience
var1 <- factor(as.character(c(1,1,2,3,1,4,3,2,3,2,1,4,2,3,2,1,4,3,1,2)))
var2 <- factor(as.character(c(1,4,2,3,4,2,1,2,3,4,2,1,1,3,2,1,2,4,3,2)))
data <- data.frame(var1, var2)
dat <- ddply(data, .(var1), function(.) {
res <- cumsum(prop.table(table(factor(.$var2)))) # re-factor to use only used levels
res2 <- prop.table(table(factor(.$var2))) # re-factor to use only used levels
data.frame(lab = names(res), y = c(res), lab2 = c(res2))
})
ggplot(data, aes(x = var1)) + geom_bar(aes(fill = var2), position = 'fill') +
geom_text(aes(label = round(lab2, 2), x = var1, y = y), data = dat)
This places the labs at the end of each bar. If you want to have them slightly offset, you should play arround in the creation of dat.
Another way to get non-cumulative percentage plus centering the labels, for future reference:
dat <- ddply(data, .(var1), function(.) {
good <- prop.table(table(factor(.$var2)))
res <- cumsum(prop.table(table(factor(.$var2))))
data.frame(lab = names(res), y = c(res), good = good, pos = cumsum(good) - 0.5*good)
})
ggplot(data, aes(x = var1)) + geom_bar(aes(fill = var2), position = 'fill') +
geom_text(aes(label = round(good.Freq, 2), x = var1, y = pos.Freq), data = dat)
I used the following code and work well for me, give it a try.
geom_text(aes(label = paste(round(dat2$value,0), "%"),
vjust = ifelse(value >= 0, -0.05, 1.15)
),
size = 4, position = position_stack(vjust=0.5)
)
Basically, you need label = paste(y value, "%"). In my code, dat2 is the data file name; value is the Y value in the figure. In this case, I rounded up the number with 0 decimal.Good luck.

Resources