I have the following data set called t:
n <- 12
t <- data.frame(
V1 = runif(n, 0.12, 0.35),
V2 = runif(n, 0.25, 0.39),
group = gl(3, 4, labels = c("a1", "a2", "a3")),
x = seq_len(n),
color = rep(rep.int(c("R", "G"), 2), c(3, 4, 3, 2))
)
I created the following plot from this data.
p <- ggplot(t, aes(x, colour = color)) +
geom_point(aes(y = V1, size = 10)) +
geom_point(aes(y = V2, size = 10))
What I want to do now is to connect the points depending on the group column (e.g, points of group a1 will be connected with a blue line, points of group a2 will be connected in a yellow line, ...) and i want the line to be different depending on V1 and V2 (dashed line for V1 and normal line for V2).
How this can be done?
First of all: naming a dataset "t" is not a good idea because it is confusing since there is a function t() as well.
The easiest way is to melt() your dataset first
Molten <- melt(t, id.vars = c("group", "x", "color"))
ggplot(Molten, aes(x = x, y = value, colour = group, linetype = variable)) + geom_line()
Have a look at the ggplot2 website on how to customise the colours.
If you want to plot your graph without using melt():
p <-ggplot(t) + geom_line(aes(x,V2,color=group)) + geom_line(aes(x,V1,color=group), linetype = "dashed")
Related
Hi I have a much larger data frame but a sample dummy df is as follows:
set.seed(23)
df = data.frame(name = c(rep("Bob",8),rep("Tom",8)),
topic = c(rep(c("Reading","Writing"),8)),
subject = c(rep(c("English","English","Spanish","Spanish"),4)),
exam = c(rep("First",4),rep("Second",4),rep("First",4),rep("Second",4)),
score = sample(1:100,16))
I have to plot it in the way shown in the picture below (for my original data frame) but with lines connecting the scores corresponding to each name between the first and second class in the exam variable, I tried geom_line(aes(group=name)) but the lines are not connected in the right way. Is there any way to connect the points that also respects the grouping by the fill variable similar to how the position_dodge() helps separate the points by their fill grouping? Thanks a lot!
library(ggplot2)
df %>% ggplot(aes(x=topic,y=score,fill=exam)) +
geom_boxplot(outlier.shape = NA) +
geom_point(size=1.75,position = position_dodge(width = 0.75)) +
facet_grid(~subject,switch = "y")
One option to achieve your desired result would be to group the lines by name and topic and do the dodging of lines manually instead of relying on position_dogde. To this end convert topic to a numeric for the geom_line and shift the position by the necessary amount to align the lines with the dodged points:
set.seed(23)
df <- data.frame(
name = c(rep("Bob", 8), rep("Tom", 8)),
topic = c(rep(c("Reading", "Writing"), 8)),
subject = c(rep(c("English", "English", "Spanish", "Spanish"), 4)),
exam = c(rep("First", 4), rep("Second", 4), rep("First", 4), rep("Second", 4)),
score = sample(1:100, 16)
)
library(ggplot2)
ggplot(df, aes(x = topic, y = score, fill = exam)) +
geom_boxplot(outlier.shape = NA) +
geom_point(size = 1.75, position = position_dodge(width = 0.75)) +
geom_line(aes(
x = as.numeric(factor(topic)) + .75 / 4 * ifelse(exam == "First", -1, 1),
group = interaction(name, topic)
)) +
facet_grid(~subject, switch = "y")
Say I have this data frame:
treatment <- c(rep("A",6),rep("B",6),rep("C",6),rep("D",6),rep("E",6),rep("F",6))
year <- as.numeric(c(1999:2004,1999:2004,2005:2010,2005:2010,2005:2010,2005:2010))
variable <- c(runif(6,4,5),runif(6,5,6),runif(6,3,4),runif(6,4,5),runif(6,5,6),runif(6,6,7))
se <- c(runif(6,0.2,0.5),runif(6,0.2,0.5),runif(6,0.2,0.5),runif(6,0.2,0.5),runif(6,0.2,0.5),runif(6,0.2,0.5))
id <- 1:36
df1 <- as.data.table(cbind(id,treatment,year,variable,se))
df1$year <- as.numeric(df1$year)
df1$variable <- as.numeric(df1$variable)
df1$se <- as.numeric(df1$se)
As I mentioned in a previous question (draw two lines with the same origin using ggplot2 in R), I wanted to use ggplot2 to display my data in a specific way.
I managed to do so using the following script:
y1 <- df1[df1$treatment=='A'&df1$year==2004,]$variable
y2 <- df1[df1$treatment=='B'&df1$year==2004,]$variable
y3 <- df1[df1$treatment=='C'&df1$year==2005,]$variable
y4 <- df1[df1$treatment=='D'&df1$year==2005,]$variable
y5 <- df1[df1$treatment=='E'&df1$year==2005,]$variable
y5 <- df1[df1$treatment=='E'&df1$year==2005,]$variable
y6 <- df1[df1$treatment=='F'&df1$year==2005,]$variable
p <- ggplot(df1,aes(x=year,y=variable,group=treatment,color=treatment))+
geom_line(aes(y = variable, group = treatment, linetype = treatment, color = treatment),size=1.5,lineend = "round") +
scale_linetype_manual(values=c('solid','solid','solid','dashed','solid','dashed')) +
geom_point(aes(colour=factor(treatment)),size=4)+
geom_errorbar(aes(ymin=variable-se,ymax=variable+se),width=0.2,size=1.5)+
guides(colour = guide_legend(override.aes = list(shape=NA,linetype = c("solid", "solid",'solid','dashed','solid','dashed'))))
p+labs(title="Title", x="years", y = "Variable 1")+
theme_classic() +
scale_x_continuous(breaks=c(1998:2010), labels=c(1998:2010),limits=c(1998.5,2010.5))+
geom_segment(aes(x=2004, y=y1, xend=2005, yend=y3),colour='blue1',size=1.5,linetype='solid')+
geom_segment(aes(x=2004, y=y1, xend=2005, yend=y4),colour='blue1',size=1.5,linetype='dashed')+
geom_segment(aes(x=2004, y=y2, xend=2005, yend=y5),colour='red3',size=1.5,linetype='solid')+
geom_segment(aes(x=2004, y=y2, xend=2005, yend=y6),colour='red3',size=1.5,linetype='dashed')+
scale_color_manual(values=c('blue1','red3','blue1','blue1','red3','red3'))+
theme(text = element_text(size=12))
As you can see I used both geom_line and geom_segment to display the lines for my graph.
It's almost perfect but if you look closely, the segments that are drawn (between 2004 and 2005) do not display the same line size, even though I used the same arguments values in the script (i.e. size=1.5 and linetype='solid' or dashed).
Of course I could change manually the size of the segments to get similar lines, but when I do that, segments are not as smooth as the lines using geom_line.
Also, I get the same problem (different line shapes) by including the size or linetype arguments within the aes() argument.
Do you have any idea what causes this difference and how I can get the exact same shapes for both my segments and lines ?
It seems to be an anti-aliasing issue with geom_segment, but that seems like a somewhat cumbersome approach to begin with. I think I have resolved your issue by duplicating the A and B treatments in the original data frame.
# First we are going to duplicate and rename the 'shared' treatments
library(dplyr)
library(ggplot2)
df1 %>%
filter(treatment %in% c("A", "B")) %>%
mutate(treatment = ifelse(treatment == "A",
"AA", "BB")) %>%
bind_rows(df1) %>% # This rejoins with the original data
# Now we create `treatment_group` and `line_type` variables
mutate(treatment_group = ifelse(treatment %in% c("A", "C", "D", "AA"),
"treatment1",
"treatment2"), # This variable will denote color
line_type = ifelse(treatment %in% c("AA", "BB", "D", "F"),
"type1",
"type2")) %>% # And this variable denotes the line type
# Now pipe into ggplot
ggplot(aes(x = year, y = variable,
group = interaction(treatment_group, line_type), # grouping by both linetype and color
color = treatment_group)) +
geom_line(aes(x = year, y = variable, linetype = line_type),
size = 1.5, lineend = "round") +
geom_point(size=4) +
# The rest here is more or less the same as what you had
geom_errorbar(aes(ymin = variable-se, ymax = variable+se),
width = 0.2, size = 1.5) +
scale_color_manual(values=c('blue1','red3')) +
scale_linetype_manual(values = c('dashed', 'solid')) +
labs(title = "Title", x = "Years", y = "Variable 1") +
scale_x_continuous(breaks = c(1998:2010),
limits = c(1998.5, 2010.5))+
theme_classic() +
theme(text = element_text(size=12))
Which will give you the following
My numbers are different since they were randomly generated.
You can then modify the legend to your liking, but my recommendation is using something like geom_label and then be sure to set check_overlap = TRUE.
Hope this helps!
I wonder if there is the possibility to change the fill main colour according to a categorical variable
Here is a reproducible example
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = c(rep('a', times = 10),
rep('b', times = 10)),
val = rep(1:10, times = 2))
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(color = grp,
fill = val,
size = val))
Of course it is easy to change the circle colour/shape, according to the variable grp, but I'd like to have the a group in shades of red and the b group in shades of blue.
I also thought about using facets, but don't know if the fill gradient can be changed for the two panels.
Anyone knows if that can be done, without gridExtra?
Thanks!
I think there are two ways to do this. The first is using the alpha aesthetic for your val column. This is a quick and easy way to accomplish your goal but may not be exactly what you want:
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(alpha=val,
fill = grp,
size = val)) + theme_minimal()
The second way would be to do something similar to this post: Vary the color gradient on a scatter plot created with ggplot2. I edited the code slightly so its not a range from white to your color of interest but from a lighter color to a darker color. This requires a little bit of work and using the scale_fill_identity function which basically takes a variable that has the colors you want and maps them directly to each point (so it doesn't do any scaling).
This code is:
#Rescale val to [0,1]
df$scaled_val <- rescale(df$val)
low_cols <- c("firebrick1","deepskyblue")
high_cols <- c("darkred","deepskyblue4")
df$col <- ddply(df, .(grp), function(x)
data.frame(col=apply(colorRamp(c(low_cols[as.numeric(x$grp)[1]], high_cols[as.numeric(x$grp)[1]]))(x$scaled_val),
1,function(x)rgb(x[1],x[2],x[3], max=255)))
)$col
df
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(
fill = col,
size = val)) + theme_minimal() +scale_fill_identity()
Thanks to this other post I found a way to visualize the fill bar in the legend, even though that wasn't what I meant to do.
Here's the ouptup
And the code
df = data.frame(x = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
y = c(rnorm(10, mean = 0),
rnorm(10, mean = 3)),
grp = factor(c(rep('a', times = 10),
rep('b', times = 10)),
levels = c('a', 'b')),
val = rep(1:10, times = 2)) %>%
group_by(grp) %>%
mutate(scaledVal = rescale(val)) %>%
ungroup %>%
mutate(scaledValOffSet = scaledVal + 100*(as.integer(grp) - 1))
scalerange <- range(df$scaledVal)
gradientends <- scalerange + rep(c(0,100,200), each=2)
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(pch = 21,
aes(fill = scaledValOffSet,
size = val)) +
scale_fill_gradientn(colours = c('white',
'darkred',
'white',
'deepskyblue4'),
values = rescale(gradientends))
Basically one should rescale fill values (e.g. between 0 and 1) and separate them using another order of magnitude, provided by the categorical variable grp.
This is not what I wanted though: the snippet can be improved, of course, to make the whole thing less manual, but still lacks the simple usual discrete fill legend.
Following http://docs.ggplot2.org/current/aes_group_order.html
h <- ggplot(Oxboys, aes(age, height))
h + geom_line(aes(group = Subject))
Produces
But if two Subjects have exactly the same line, one subject's line will hide the other. Could we use line thickness or intensity to indicate the number of subjects who have the same line? Could we add a bubble using geom_point() to indicate the number of subjects?
Use geom_line(aes(group = 'Subject'), alpha = .5). Play around with the alpha values.
You could accomplish it by first mapping the colour and size aesthetics and then adjusting their values using the scale_size_manual and scale_colour_manual functions. Below is a demonstration of the approach.
# a fake data set with two pairs of identical lines:
df <- data.frame(t = c(1:10, 1:10, 1:10, 1:10),
a = c(1:10, 1:10, seq(5, 8, length =10), seq(5, 8, length =10)),
c = rep(c("a", "b", "c", "d"), each = 10))
ggplot(df, aes(x = t, y = a, group = c)) +
geom_line(aes(size = c, colour = c)) +
scale_size_manual(values = c(4, 2, 3, 1.5)) +
scale_colour_manual(values = c("black", "red", "blue", "yellow"))
You must consider how your grouping factor (in the example c) is ordered, because the lines are also plotted in this order. So the line which is plotted first should get a larger value for size.
I need to add a legend of the two lines (best fit line and 45 degree line) on TOP of my two plots. Sorry I don't know how to add plots! Please please please help me, I really appreciate it!!!!
Here is an example
type=factor(rep(c("A","B","C"),5))
xvariable=seq(1,15)
yvariable=2*xvariable+rnorm(15,0,2)
newdata=data.frame(type,xvariable,yvariable)
p = ggplot(newdata,aes(x=xvariable,y=yvariable))
p+geom_point(size=3)+ facet_wrap(~ type) +
geom_abline(intercept =0, slope =1,color="red",size=1)+
stat_smooth(method="lm", se=FALSE,size=1)
Here is another approach which uses aesthetic mapping to string constants to identify different groups and create a legend.
First an alternate way to create your test data (and naming it DF instead of newdata)
DF <- data.frame(type = factor(rep(c("A", "B", "C"), 5)),
xvariable = 1:15,
yvariable = 2 * (1:15) + rnorm(15, 0, 2))
Now the ggplot code. Note that for both geom_abline and stat_smooth, the colour is set inside and aes call which means each of the two values used will be mapped to a different color and a guide (legend) will be created for that mapping.
ggplot(DF, aes(x = xvariable, y = yvariable)) +
geom_point(size = 3) +
geom_abline(aes(colour="one-to-one"), intercept =0, slope = 1, size = 1) +
stat_smooth(aes(colour="best fit"), method = "lm", se = FALSE, size = 1) +
facet_wrap(~ type) +
scale_colour_discrete("")
Try this:
# original data
type <- factor(rep(c("A", "B", "C"), 5))
x <- 1:15
y <- 2 * x + rnorm(15, 0, 2)
df <- data.frame(type, x, y)
# create a copy of original data, but set y = x
# this data will be used for the one-to-one line
df2 <- data.frame(type, x, y = x)
# bind original and 'one-to-one data' together
df3 <- rbind.data.frame(df, df2)
# create a grouping variable to separate stat_smoothers based on original and one-to-one data
df3$grp <- as.factor(rep(1:2, each = nrow(df)))
# plot
# use original data for points
# use 'double data' for abline and one-to-one line, set colours by group
ggplot(df, aes(x = x, y = y)) +
geom_point(size = 3) +
facet_wrap(~ type) +
stat_smooth(data = df3, aes(colour = grp), method = "lm", se = FALSE, size = 1) +
scale_colour_manual(values = c("red","blue"),
labels = c("abline", "one-to-one"),
name = "") +
theme(legend.position = "top")
# If you rather want to stack the two keys in the legend you can add:
# guide = guide_legend(direction = "vertical")
#...as argument in scale_colour_manual
Please note that this solution does not extrapolate the one-to-one line outside the range of your data, which seemed to be the case for the original geom_abline.