Dodging vertical lines for median_hilow in ggplot - r

I need to plot lines that show median and IQR for 3 replicates, across multiple samples.
Data:
sampleid <- rep(1:20, each = 3)
replicate <- rep(1:3, 20)
sample1 <- seq(120,197, length.out = 60)
sample2 <- seq(113, 167, length.out = 60)
sample3 <- seq(90,180, length.out = 60)
What I have done so far?
df <- as.data.frame(cbind(sampleid,replicate,sample1, sample2, sample3))
library(reshape2)
long <- melt(df,id.vars = c('sampleid', 'replicate'))
ggplot(data = long, aes(x = variable, y = value, colour = factor(replicate))) + stat_summary(fun.data=median_hilow, conf.int=.5)
However, the plot of the IQR for replicates that I am getting are overlapped with each other for each sample. I would like to find out a way to "dodge" these 3 lines so that they are visible next to each other, without changing other parameters of the plot that I have achieved. Is this achievable?

You have to introduce jitter to the lines:
ggplot(data = long, aes(x = variable, y = value, colour = factor(replicate))) +
stat_summary(fun.data=median_hilow, fun.args = (conf.int=.5), position = "jitter")
Please note you also need to have your conf.int=5 wrapped in the fun.args.
Alternatively, change your x to factor(replicate) and add facet_wrap:
ggplot(data = long, aes(x = factor(replicate), y = value, colour = factor(replicate))) +
stat_summary(fun.data=median_hilow, fun.args = (conf.int=.5)) +
facet_wrap(~variable)

Related

Move chart labels of variables in opposite directions

I couldn't find out how to do this anywhere so I thought I would post the solution now that I've figured it out.
I created a simple chart with labels based on a data set in long format (see below for dat). There are two lines and the labels overlap. I would like to move the labels for the upper chart up and for the lower chart down.
library(dplyr)
library(ggplot2)
library(tidyr)
# sample data
dat <- data.frame(
x = seq(1, 10, length.out = 10),
y1 = seq(1, 5, length.out = 10),
y2 = seq(1, 6, length.out = 10))
# convert to long format
dat <- dat %>%
gather(var, value, -x)
# plot it
ggplot(data = dat, aes(x = x, y = value, color = var)) +
geom_line() +
geom_label(aes(label = value))
To move the labels in opposite directions, one can create a step function in nudge_y to multiply the upper line's labels by +1 times a nudge factor and the multiply the lower line's labels by -1 times the nudge factor:
# move labels in opposite directions
ggplot(data = dat, aes(x = x, y = value, color = var)) +
geom_line() +
geom_label(aes(label = value),
nudge_y = ifelse(dat$var == "y2", 1, -1) * 1)
This produces the following chart with adjusted labels.

ggplot change line color specified by x axis values

Code to reproduce:
myDat <- data.frame(Event = rep(c("Arrival", "Departure"), 3),
AtNode = c("StationA", "StationA", "Track", "Track", "StationB", "StationB"),
Lane = c("Lane1", "Lane1", "Lane2", "Lane2", "Lane1", "Lane1"),
atTime = c(10, 12, 18, 20, 34, 36),
Type = c("Station", "Station", "Track", "Track", "Station", "Station"),
Train = 1 )
ggplot(data =myDat, aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))), group = Train, colour = Lane ))+
geom_point(data = myDat)+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),])
Now i need to project the two green points (Y = "Track") on the orange line and color the line between the projected points the same color as the points.
Expected result: (without the points (Y ="Track")
Thanks in advance for every hint or trick!
Cheers
I don't think your output is the right way of showing what you want. You have factors on your y-axis, which means it ranges between 1 and 3.
Therefore, projecting a line there means nothing in terms of y-axis values.
For me, the correct way of showing your data would be like this
ggplot(data =myDat,
aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))),
group = AtNode, colour = Lane ))+
geom_point()+
geom_line() +
labs(y = 'AtNode')
However, to do it how you asked, you can do some simple trigonometry to project your line segment
x1 = 1 + tan(asin(2/sqrt(484)))*6 #y projection given x = 18
x2 = 1 + tan(asin(2/sqrt(484)))*8 #y projection given x = 20
foo = data.frame(x = c(18,20), y = c(x1, x2), Lane = "Lane2")
ggplot(data = myDat, aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))), group = 1, colour = Lane ))+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),]) +
geom_line(data = foo, aes(x = x, y = y, color = Lane), size = 1) +
scale_y_discrete(drop = FALSE)
I don't think there is a quick solution to this, but you could do something like this:
myDat$AtNode <- factor(myDat$AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))) #Generate factor here so we can use in imputation calculation
impute_rows <- which(myDat$Type == "Track") #Select rows to impute
slope_df <- myDat[impute_rows + c(-1,1), ] #Select rows before and after imputation to calculate slope
line <- lm(as.numeric(AtNode) ~ atTime, data = slope_df) #Get slope of line so we can do the calculations
df <- data.frame(x = myDat[impute_rows, "atTime"], y = myDat[impute_rows, "atTime"]*line$coefficients[["atTime"]] + line$coefficients[["(Intercept)"]], Lane = myDat[impute_rows,"Lane"], Train = myDat[impute_rows,"Train"])
ggplot(data =myDat, aes(x = atTime, y=AtNode, group = Train, colour = Lane ))+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),]) +
geom_path(data = df, aes(x = x, y = y), size = 2) +
scale_y_discrete(drop = FALSE)
The idea is as follows:
Identify the rows you want to impute: which()
Identify the rows before and after the ones to impute slope_df
Using the rows before and after the desired values to impute generate equation of line you want to impute along (using the slope_df)
Generate data based on the line df <- data.frame(...)
Note that you also need the scale_y_discrete(drop = FALSE) so that the Track level isn't removed from the plot.

How to control legend with many groups

I have a plot like this:
Which was created with this code:
# Make data:
set.seed(42)
n <- 1000
df <- data.frame(values = sample(0:5, size = n, replace = T, prob = c(9/10, rep(0.0167,5))),
group = rep(1:100, each = 10),
fill2 = rep(rnorm(10), each = 100),
year = rep(2001:2010, times = 100)
)
df$values <- ifelse(df$year %in% 2001:2007 == T, 0, df$values)
# Plot
require(ggplot2)
p <- ggplot(data = df, aes(x = year, y = values, colour = as.factor(group))) + geom_line()
p
Since there are so many groups, the legend is really not helpfull.
Ideally I would like just two elements in the legend, one for group = 1 and for all the other groups (they should all have the same color). Is there a way to force this?
you can define a new variable that has only two values, but still plot lines according to their original group,
ggplot(data = df, aes(x = year, y = values, group = group,
colour = ifelse(group == 1, "1", "!1"))) +
geom_line() +
scale_colour_brewer("groups", palette="Set1")

Mix color and fill aesthetics in ggplot

I wonder if there is the possibility to change the fill main colour according to a categorical variable
Here is a reproducible example
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = c(rep('a', times = 10),
rep('b', times = 10)),
val = rep(1:10, times = 2))
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(color = grp,
fill = val,
size = val))
Of course it is easy to change the circle colour/shape, according to the variable grp, but I'd like to have the a group in shades of red and the b group in shades of blue.
I also thought about using facets, but don't know if the fill gradient can be changed for the two panels.
Anyone knows if that can be done, without gridExtra?
Thanks!
I think there are two ways to do this. The first is using the alpha aesthetic for your val column. This is a quick and easy way to accomplish your goal but may not be exactly what you want:
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(alpha=val,
fill = grp,
size = val)) + theme_minimal()
The second way would be to do something similar to this post: Vary the color gradient on a scatter plot created with ggplot2. I edited the code slightly so its not a range from white to your color of interest but from a lighter color to a darker color. This requires a little bit of work and using the scale_fill_identity function which basically takes a variable that has the colors you want and maps them directly to each point (so it doesn't do any scaling).
This code is:
#Rescale val to [0,1]
df$scaled_val <- rescale(df$val)
low_cols <- c("firebrick1","deepskyblue")
high_cols <- c("darkred","deepskyblue4")
df$col <- ddply(df, .(grp), function(x)
data.frame(col=apply(colorRamp(c(low_cols[as.numeric(x$grp)[1]], high_cols[as.numeric(x$grp)[1]]))(x$scaled_val),
1,function(x)rgb(x[1],x[2],x[3], max=255)))
)$col
df
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(
fill = col,
size = val)) + theme_minimal() +scale_fill_identity()
Thanks to this other post I found a way to visualize the fill bar in the legend, even though that wasn't what I meant to do.
Here's the ouptup
And the code
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = factor(c(rep('a', times = 10),
rep('b', times = 10)),
levels = c('a', 'b')),
val = rep(1:10, times = 2)) %>%
group_by(grp) %>%
mutate(scaledVal = rescale(val)) %>%
ungroup %>%
mutate(scaledValOffSet = scaledVal + 100*(as.integer(grp) - 1))
scalerange <- range(df$scaledVal)
gradientends <- scalerange + rep(c(0,100,200), each=2)
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(fill = scaledValOffSet,
size = val)) +
scale_fill_gradientn(colours = c('white',
'darkred',
'white',
'deepskyblue4'),
values = rescale(gradientends))
Basically one should rescale fill values (e.g. between 0 and 1) and separate them using another order of magnitude, provided by the categorical variable grp.
This is not what I wanted though: the snippet can be improved, of course, to make the whole thing less manual, but still lacks the simple usual discrete fill legend.

Bar plot of group means with lines of individual results overlaid

this is my first stack overflow post and I am a relatively new R user, so please go gently!
I have a data frame with three columns, a participant identifier, a condition (factor with 2 levels either Placebo or Experimental), and an outcome score.
set.seed(1)
dat <- data.frame(Condition = c(rep("Placebo",10),rep("Experimental",10)),
Outcome = rnorm(20,15,2),
ID = factor(rep(1:10,2)))
I would like to construct a bar plot with two bars with the mean outcome score for each condition and the standard deviation as an error bar. I would like to then overlay lines connecting points for each participant's score in each condition. So the plot displays the individual response as well as the group mean.If it is also possible I would like to include an axis break.
I don't seem to be able to find any advice in other threads, apologies if I am repeating a question.
Many Thanks.
p.s. I realise that presenting data in this way will not be to everyones tastes. It is for a specific requirement!
This ought to work:
library(ggplot2)
library(dplyr)
dat.summ <- dat %>% group_by(Condition) %>%
summarize(mean.outcome = mean(Outcome),
sd.outcome = sd(Outcome))
ggplot(dat.summ, aes(x = Condition, y = mean.outcome)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = mean.outcome - sd.outcome,
ymax = mean.outcome + sd.outcome),
color = "dodgerblue", width = 0.3) +
geom_point(data = dat, aes(x = Condition, y = Outcome),
color = "firebrick", size = 1.2) +
geom_line(data = dat, aes(x = Condition, y = Outcome, group = ID),
color = "firebrick", size = 1.2, alpha = 0.5) +
scale_y_continuous(limits = c(0, max(dat$Outcome)))
Some people are better with ggplot's stat functions and arguments than I am and might do it differently. I prefer to just transform my data first.
set.seed(1)
dat <- data.frame(Condition = c(rep("Placebo",10),rep("Experimental",10)),
Outcome = rnorm(20,15,2),
ID = factor(rep(1:10,2)))
dat.w <- reshape(dat, direction = 'wide', idvar = 'ID', timevar = 'Condition')
means <- colMeans(dat.w[, 2:3])
sds <- apply(dat.w[, 2:3], 2, sd)
ci.l <- means - sds
ci.u <- means + sds
ci.width <- .25
bp <- barplot(means, ylim = c(0,20))
segments(bp, ci.l, bp, ci.u)
segments(bp - ci.width, ci.u, bp + ci.width, ci.u)
segments(bp - ci.width, ci.l, bp + ci.width, ci.l)
segments(x0 = bp[1], x1 = bp[2], y0 = dat.w[, 2], y1 = dat.w[, 3], col = 1:10)
points(c(rep(bp[1], 10), rep(bp[2], 10)), dat$Outcome, col = 1:10, pch = 19)
Here is a method using the transfomations inside ggplot2
ggplot(dat) +
stat_summary(aes(x=Condition, y=Outcome, group=Condition), fun.y="mean", geom="bar") +
stat_summary(aes(x=Condition, y=Outcome, group=Condition), fun.data="mean_se", geom="errorbar", col="green", width=.8, size=2) +
geom_line(aes(x=Condition, y=Outcome, group=ID), col="red")

Resources