How to control legend with many groups

How to control legend with many groups - r

I have a plot like this:
Which was created with this code:
# Make data:
set.seed(42)
n <- 1000
df <- data.frame(values = sample(0:5, size = n, replace = T, prob = c(9/10, rep(0.0167,5))),
group = rep(1:100, each = 10),
fill2 = rep(rnorm(10), each = 100),
year = rep(2001:2010, times = 100)
)
df$values <- ifelse(df$year %in% 2001:2007 == T, 0, df$values)
# Plot
require(ggplot2)
p <- ggplot(data = df, aes(x = year, y = values, colour = as.factor(group))) + geom_line()
p
Since there are so many groups, the legend is really not helpfull.
Ideally I would like just two elements in the legend, one for group = 1 and for all the other groups (they should all have the same color). Is there a way to force this?

you can define a new variable that has only two values, but still plot lines according to their original group,
ggplot(data = df, aes(x = year, y = values, group = group,
colour = ifelse(group == 1, "1", "!1"))) +
geom_line() +
scale_colour_brewer("groups", palette="Set1")

Related

Dodging vertical lines for median_hilow in ggplot

I need to plot lines that show median and IQR for 3 replicates, across multiple samples.
Data:
sampleid <- rep(1:20, each = 3)
replicate <- rep(1:3, 20)
sample1 <- seq(120,197, length.out = 60)
sample2 <- seq(113, 167, length.out = 60)
sample3 <- seq(90,180, length.out = 60)
What I have done so far?
df <- as.data.frame(cbind(sampleid,replicate,sample1, sample2, sample3))
library(reshape2)
long <- melt(df,id.vars = c('sampleid', 'replicate'))
ggplot(data = long, aes(x = variable, y = value, colour = factor(replicate))) + stat_summary(fun.data=median_hilow, conf.int=.5)
However, the plot of the IQR for replicates that I am getting are overlapped with each other for each sample. I would like to find out a way to "dodge" these 3 lines so that they are visible next to each other, without changing other parameters of the plot that I have achieved. Is this achievable?

You have to introduce jitter to the lines:
ggplot(data = long, aes(x = variable, y = value, colour = factor(replicate))) +
stat_summary(fun.data=median_hilow, fun.args = (conf.int=.5), position = "jitter")
Please note you also need to have your conf.int=5 wrapped in the fun.args.
Alternatively, change your x to factor(replicate) and add facet_wrap:
ggplot(data = long, aes(x = factor(replicate), y = value, colour = factor(replicate))) +
stat_summary(fun.data=median_hilow, fun.args = (conf.int=.5)) +
facet_wrap(~variable)

ggplot change line color specified by x axis values

Code to reproduce:
myDat <- data.frame(Event = rep(c("Arrival", "Departure"), 3),
AtNode = c("StationA", "StationA", "Track", "Track", "StationB", "StationB"),
Lane = c("Lane1", "Lane1", "Lane2", "Lane2", "Lane1", "Lane1"),
atTime = c(10, 12, 18, 20, 34, 36),
Type = c("Station", "Station", "Track", "Track", "Station", "Station"),
Train = 1 )
ggplot(data =myDat, aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))), group = Train, colour = Lane ))+
geom_point(data = myDat)+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),])
Now i need to project the two green points (Y = "Track") on the orange line and color the line between the projected points the same color as the points.
Expected result: (without the points (Y ="Track")
Thanks in advance for every hint or trick!
Cheers

I don't think your output is the right way of showing what you want. You have factors on your y-axis, which means it ranges between 1 and 3.
Therefore, projecting a line there means nothing in terms of y-axis values.
For me, the correct way of showing your data would be like this
ggplot(data =myDat,
aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))),
group = AtNode, colour = Lane ))+
geom_point()+
geom_line() +
labs(y = 'AtNode')
However, to do it how you asked, you can do some simple trigonometry to project your line segment
x1 = 1 + tan(asin(2/sqrt(484)))*6 #y projection given x = 18
x2 = 1 + tan(asin(2/sqrt(484)))*8 #y projection given x = 20
foo = data.frame(x = c(18,20), y = c(x1, x2), Lane = "Lane2")
ggplot(data = myDat, aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))), group = 1, colour = Lane ))+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),]) +
geom_line(data = foo, aes(x = x, y = y, color = Lane), size = 1) +
scale_y_discrete(drop = FALSE)

I don't think there is a quick solution to this, but you could do something like this:
myDat$AtNode <- factor(myDat$AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))) #Generate factor here so we can use in imputation calculation
impute_rows <- which(myDat$Type == "Track") #Select rows to impute
slope_df <- myDat[impute_rows + c(-1,1), ] #Select rows before and after imputation to calculate slope
line <- lm(as.numeric(AtNode) ~ atTime, data = slope_df) #Get slope of line so we can do the calculations
df <- data.frame(x = myDat[impute_rows, "atTime"], y = myDat[impute_rows, "atTime"]*line$coefficients[["atTime"]] + line$coefficients[["(Intercept)"]], Lane = myDat[impute_rows,"Lane"], Train = myDat[impute_rows,"Train"])
ggplot(data =myDat, aes(x = atTime, y=AtNode, group = Train, colour = Lane ))+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),]) +
geom_path(data = df, aes(x = x, y = y), size = 2) +
scale_y_discrete(drop = FALSE)
The idea is as follows:
Identify the rows you want to impute: which()
Identify the rows before and after the ones to impute slope_df
Using the rows before and after the desired values to impute generate equation of line you want to impute along (using the slope_df)
Generate data based on the line df <- data.frame(...)
Note that you also need the scale_y_discrete(drop = FALSE) so that the Track level isn't removed from the plot.

Mix color and fill aesthetics in ggplot

I wonder if there is the possibility to change the fill main colour according to a categorical variable
Here is a reproducible example
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = c(rep('a', times = 10),
rep('b', times = 10)),
val = rep(1:10, times = 2))
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(color = grp,
fill = val,
size = val))
Of course it is easy to change the circle colour/shape, according to the variable grp, but I'd like to have the a group in shades of red and the b group in shades of blue.
I also thought about using facets, but don't know if the fill gradient can be changed for the two panels.
Anyone knows if that can be done, without gridExtra?
Thanks!

I think there are two ways to do this. The first is using the alpha aesthetic for your val column. This is a quick and easy way to accomplish your goal but may not be exactly what you want:
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(alpha=val,
fill = grp,
size = val)) + theme_minimal()
The second way would be to do something similar to this post: Vary the color gradient on a scatter plot created with ggplot2. I edited the code slightly so its not a range from white to your color of interest but from a lighter color to a darker color. This requires a little bit of work and using the scale_fill_identity function which basically takes a variable that has the colors you want and maps them directly to each point (so it doesn't do any scaling).
This code is:
#Rescale val to [0,1]
df$scaled_val <- rescale(df$val)
low_cols <- c("firebrick1","deepskyblue")
high_cols <- c("darkred","deepskyblue4")
df$col <- ddply(df, .(grp), function(x)
data.frame(col=apply(colorRamp(c(low_cols[as.numeric(x$grp)[1]], high_cols[as.numeric(x$grp)[1]]))(x$scaled_val),
1,function(x)rgb(x[1],x[2],x[3], max=255)))
)$col
df
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(
fill = col,
size = val)) + theme_minimal() +scale_fill_identity()

Thanks to this other post I found a way to visualize the fill bar in the legend, even though that wasn't what I meant to do.
Here's the ouptup
And the code
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = factor(c(rep('a', times = 10),
rep('b', times = 10)),
levels = c('a', 'b')),
val = rep(1:10, times = 2)) %>%
group_by(grp) %>%
mutate(scaledVal = rescale(val)) %>%
ungroup %>%
mutate(scaledValOffSet = scaledVal + 100*(as.integer(grp) - 1))
scalerange <- range(df$scaledVal)
gradientends <- scalerange + rep(c(0,100,200), each=2)
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(fill = scaledValOffSet,
size = val)) +
scale_fill_gradientn(colours = c('white',
'darkred',
'white',
'deepskyblue4'),
values = rescale(gradientends))
Basically one should rescale fill values (e.g. between 0 and 1) and separate them using another order of magnitude, provided by the categorical variable grp.
This is not what I wanted though: the snippet can be improved, of course, to make the whole thing less manual, but still lacks the simple usual discrete fill legend.

control horizontal spacing between geom_bar in ggplot2

I have the following code:
library("ggplot2")
set.seed(12351234)
names <- factor(rep(paste("C", 1:10, sep = "_"), each = 10))
time <- rep(1:10, 10)
outcome <- rnorm(mean = 1e7, sd = 1e7, n = length(time))
outcome <-ifelse(outcome < 0, 0, outcome)
data.toy <- data.frame(names, time, outcome)
ggplot(data = data.toy, aes(y = outcome, x = time)) + geom_bar(stat = "identity", aes(fill = names)) + scale_x_continuous(breaks = unique(data.toy$time))
and it produces the following image: http://picpaste.com/data_toy-OR0jVHj5.png
I am wondering if there is a way to remove the horizontal "gray" space between the bars on the x-axis (the space that the arrows are pointing at). I suspect I am using this geom incorrectly as time is not categorical and there is a more appropriate geom for this.

Boxplot width in ggplot with cross classified groups

I am making boxplots with ggplot with data that is classified by 2 factor variables. I'd like to have the box sizes reflect sample size via varwidth = TRUE but when I do this the boxes overlap.
1) Some sample data with a 3 x 2 structure
data <- data.frame(group1= sample(c("A","B","C"),100, replace = TRUE),group2= sample(c("D","E"),100, replace = TRUE) ,response = rnorm(100, mean = 0, sd = 1))
2) Default boxplots: ggplot without variable width
ggplot(data = data, aes(y = response, x = group1, color = group2)) + geom_boxplot()
I like how the first level of grouping is shown.
Now I try to add variable widths...
3) ...and What I get when varwidth = TRUE
ggplot(data = data, aes(y = response, x = group1, color = group2)) + geom_boxplot(varwidth = T)
This overlap seems to occur whether I use color = group2 or group = group2 in both the main call to ggplot and in the geom_boxplot statement. Fussing with position_dodge doesn't seem to help either.
4) A solution I don't like visually is to make unique factors by combining my group1 and group2
data$grp.comb <- paste(data$group1, data$group2)
ggplot(data = data, aes(y = response, x = grp.comb, color = group2)) + geom_boxplot()
I prefer having things grouped to reflect the cross classification
5) The way forward:
I'd like to either a)figure out how to either make varwidth = TRUE not cause the boxes to overlap or b)manually adjusted the space between the combined groups so that boxes within the 1st level of grouping are closer together.

I think your problem can be solved best by using facet_wrap.
library(ggplot2)
data <- data.frame(group1= sample(c("A","B","C"),100, replace = TRUE), group2=
sample(c("D","E"),100, replace = TRUE) ,response = rnorm(100, mean = 0, sd = 1))
ggplot(data = data, aes(y = response, x = group2, color = group2)) +
geom_boxplot(varwidth = TRUE) +
facet_wrap(~group1)
Which gives:

A recent update to ggplot2 makes it so that the code provided by #N Brouwer in (3) works as expected:
# library(devtools)
# install_github("tidyverse/ggplot2")
packageVersion("ggplot2") # works with v2.2.1.9000
library(ggplot2)
set.seed(1234)
data <- data.frame(group1= sample(c("A","B","C"), 100, replace = TRUE),
group2= sample(c("D","E"), 100, replace = TRUE),
response = rnorm(100, mean = 0, sd = 1))
ggplot(data = data, aes(y = response, x = group1, color = group2)) +
geom_boxplot(varwidth = T)
(I'm a new user and can't post images inline)
fig 1

This question has been answered here ggplot increase distance between boxplots
The answer involves using the position = position_dodge() argument of geom_boxplot().
For your example:
data <- data.frame(group1= sample(c("A","B","C"),100, replace = TRUE), group2=
sample(c("D","E"),100, replace = TRUE) ,response = rnorm(100, mean = 0, sd = 1))
ggplot(data = data, aes(y = response, x = group1, color = group2)) +
geom_boxplot(position = position_dodge(1))

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

How to control legend with many groups - r

you can define a new variable that has only two values, but still plot lines according to their original group, ggplot(data = df, aes(x = year, y = values, group = group, colour = ifelse(group == 1, "1", "!1"))) + geom_line() + scale_colour_brewer("groups", palette="Set1")

Related

Dodging vertical lines for median_hilow in ggplot

ggplot change line color specified by x axis values

Mix color and fill aesthetics in ggplot

control horizontal spacing between geom_bar in ggplot2

Boxplot width in ggplot with cross classified groups

Categories

Resources