Multiple data lines in a single ggplot2 - r

I would like to plot multiple lines in a single ggplot, where each line would represent relationship between x and y given two or more parameters.
I know how to do that for one parameter:
Take following example data:
library(ggplot2)
library(reshape2)
rs = data.frame(seq(200, 1000, by=200),
runif(5),
runif(5),
rbinom(n = 5, size = 1, prob = 0.5))
names(rs) = c("x_", "var1", "var2", "par")
melted = melt(rs, id.vars="x_")
ggplot(data = melted,
aes(x = x_, y = value, group = variable, col = variable)) +
geom_point() +
geom_line(linetype = "dashed")
This plots three lines one for var1, one for var2 and one for par.
However, I would like four lines: one for var1 given par=0 and another one for var1 given par=1, and the same then again for var2.
How would this scale up, for example if I want that the condition is a combination of multiple parameters (e.g. par2 + par)?

If you melt the data in a different way, you can use par to change the shape and linetype of your lines, so it's nice and clear which line is which:
rs_melt = melt(rs, id.vars = c("x_", "par"))
ggplot(rs_melt, aes(x = x_, y = value, colour = variable,
shape = factor(par), linetype = factor(par))) +
geom_line(size = 1.1) +
geom_point(size = 3) +
labs(shape = "par", linetype = "par")
Output:

You need to adjust your melt function and add a group column which has both par and var details. I think below is what you want?
library(reshape)
library(ggplot2)
rs = data.frame(seq(200, 1000, by=200), runif(5), runif(5), rbinom(n = 5, size = 1, prob = 0.5))
names(rs)=c("x_", "var1", "var2", "par")
melted = melt(rs, id.vars=c("x_", "par"))
melted$group <- paste(melted$par, melted$variable)
ggplot(data=melted, aes(x=x_, y=value, group =group, col=group))+ geom_point() + geom_line(linetype = "dashed")

Related

ggplot line plot with one group`s lines on top

I am making a line plot of several groups and want to make a visualization where one of the groups lines are highlighted
ggplot(df) + geom_line(aes(x=timepoint ,y=var, group = participant_id, color=color)) +
scale_color_identity(labels = c(red = "g1",gray90 = "Other"),guide = "legend")
However, the group lines are partially obscured by the other groups lines
How can I make these lines always on top of other groups lines?
The simplest way to do this is to plot the gray and red groups on different layers.
First, let's try to replicate your problem with a dummy data set:
set.seed(1)
df <- data.frame(
participant_id = rep(1:50, each = 25),
timepoint = factor(rep(0:24, 50)),
var = c(replicate(50, runif(1, 50, 200) + runif(25, 0.3, 1.5) *
sin(0:24/(0.6*pi))^2/seq(0.002, 0.005, length = 25))),
color = rep(sample(c("red", "gray90"), 50, TRUE, prob = c(1, 9)), each = 100)
)
Now we apply your plotting code:
library(ggplot2)
ggplot(df) +
geom_line(aes(x=timepoint ,y=var, group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend") +
theme_classic()
This looks broadly similar to your plot. If instead we plot in different layers, we get:
ggplot(df, aes(timepoint, var, group = participant_id)) +
geom_line(data = df[df$color == "gray90",], aes(color = "Other")) +
geom_line(data = df[df$color == "red",], aes(color = "gl")) +
scale_color_manual(values = c("red", "gray90")) +
theme_classic()
Created on 2022-06-20 by the reprex package (v2.0.1)
You can use factor releveling to bring the line (-s) of interest to front.
First, let's plot the data as is, with the red line partly hidden by others.
library(ggplot2)
library(dplyr)
set.seed(13)
df <-
data.frame(timepoint = rep(c(1:100), 20),
participant_id = paste0("p_", sort(rep(c(1:20), 100))),
var = abs(rnorm(2000, 200, 50) - 200),
color = c(rep("red", 100), rep("gray90", 1900)))
ggplot(df) +
geom_line(aes(x = timepoint ,
y = var,
group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Now let's bring p_1 to front by making it the last factor level.
df %>%
mutate(participant_id = factor(participant_id)) %>%
mutate(participant_id = relevel(participant_id, ref = "p_1")) %>%
mutate(participant_id = factor(participant_id, levels = rev(levels(participant_id)))) %>%
ggplot() +
geom_line(aes(x=timepoint,
y=var,
group = participant_id,
color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")

How to write point values to a lineplot in R?

I have a dataframe of single column with multiple values. I was using basic rplot function like plot() and points(). I successfully plotted the lineplot but I was unable to write point values from the dataframe onto the plot field. Is there anyway to add data values onto the plot?
Below is the following code for test
> x = data.frame(A = rnorm(10))
> plot(x$A, type = "o", pch = 20)**
Sorry, I made an edit to make my question clearer.
Here below is the example plot for 10 random numbers
Plot lines, then add text:
#data
set.seed(1); x = data.frame(A = rnorm(10))
#base plot
plot(x$A, type = "o", pch = 20, ylim = range(x$A * 1.3))
text(x = seq_along(x$A), y = x$A + 0.3, labels = round(x$A, 2), srt = 90)
Or using ggplot with ggrepel for pretty labels:
#ggplot
library(ggplot2)
library(ggrepel) # pretty labels, avoid overlap:
ggplot(cbind(x = seq_along(x$A), x),
aes(x = x, y = A, label = round(A, 2))) +
geom_line() +
geom_point() +
geom_label_repel()
#geom_text_repel()
Probably this is more than what you are asking, but you can add labels to the values you have in your line plot using ggplot:
library(ggplot2)
x = data.frame(A = rnorm(10),
pos = runif(10, 0.1, 0.7))
ggplot(x) +
geom_point(aes(x = A),
y = 0) +
geom_line(aes(x = A),
y = 0) +
geom_segment(aes(x = A,
xend = A,
y = 0,
yend = pos),
linetype = 2) +
geom_label(aes(x = A,
y = pos,
label = round(A, 2)),
size = 3) +
scale_y_continuous(name = "",
limits = c(0, 0.8)) +
guides(y = "none") +
theme_bw()
You could make a base R "type b" equivalent.
The OP hasn't specified that every y value should be set to zero.
library(ggh4x)
#> Loading required package: ggplot2
set.seed(1)
x = data.frame(A = rnorm(10))
ggplot(x, aes(1:10, A)) +
geom_pointpath(shape = NA) +
geom_text(aes(label = round(A,2))) +
labs(x= "Index")
Created on 2022-05-27 by the reprex package (v2.0.1)

Need to change the legend to decimal values

I am using this code to generate boxplots. I want the legend to be continuos, not discrete. Also, the boxplot colour needs to be different for each value of SOP(this is in increments of .5).
brk1 <- seq(from = 0.5, to = 5.5, by = .5)
ggplot(data = data, aes(x = SOP, y = Chance.of.Admit)) +
geom_boxplot(aes(fill = SOP, group = SOP)) +
scale_x_continuous(breaks = brk1)
In the current plot as the fill is continous variable it is already different color for each SOP value though it hard for human eyes to detect the differences. If you want to have different color can try different type in the scale_fill_continous such as type = "viridis"
library(dplyr)
library(ggplot2)
# Create a random sample data
data <- tibble(
SOP = sample(seq(1, 5, by = 0.5), size = 1000, replace = TRUE),
Chance.of.Admit = runif(1000, min = 0, max = 1)
)
brk1 <- seq(from = 0.5, to = 5.5, by = .5)
# Using scale_fill_continuous with breaks option
ggplot(data = data, aes(x = SOP, y = Chance.of.Admit)) +
geom_boxplot(aes(fill = SOP, group = SOP)) +
scale_x_continuous(breaks = brk1) +
scale_fill_continuous(breaks = brk1,
type = "viridis")
I understand you don't want a discrete legend though it may worth have a looked.
# Create the colors scale coresponded to the SOP
color_scales_fn <- colorRampPalette(c("#173f5f", "#20639b", "#3caea3",
"#f6d55c", "#ed553b"))
sop_list <- sort(unique(data$SOP))
manual_color <- color_scales_fn(length(sop_list))
names(manual_color) <- sop_list
# Using scale_fill_manual with new color palletes
ggplot(data = data, aes(x = SOP, y = Chance.of.Admit)) +
geom_boxplot(aes(fill = as.character(SOP), group = SOP)) +
scale_x_continuous(breaks = brk1) +
scale_fill_manual(values = manual_color, name = "SOP")
Created on 2021-04-02 by the reprex package (v1.0.0)

Add horizontal lines in categorical scatter plot using ggplot2 in R

I am trying to plot a simple scatter plot for 3 groups, with different horizontal lines (line segment) for each group: for instance a hline at 3 for group "a", a hline at 2.5 for group "b" and a hline at 6 for group "c".
library(ggplot2)
df <- data.frame(tt = rep(c("a","b","c"),40),
val = round(rnorm(120, m = rep(c(4, 5, 7), each = 40))))
ggplot(df, aes(tt, val))+
geom_jitter(aes(tt, val), data = df, colour = I("red"),
position = position_jitter(width = 0.05))
I really appreciate your help!
Never send a line when a point can suffice:
library(ggplot2)
df <- data.frame(tt = rep(c("a","b","c"),40),
val = round(rnorm(120, m = rep(c(4, 5, 7), each = 40))))
hline <- data.frame(tt=c("a", "b", "c"), v=c(3, 2.5, 6))
ggplot(df, aes(tt, val))+
geom_point(data=hline, aes(tt, v), shape=95, size=20) +
geom_jitter(aes(tt, val), data = df, colour = I("red"),
position = position_jitter(width = 0.05))
There are other ways if this isn't acceptable, such as:
hline <- data.frame(tt=c(1, 2, 3), v=c(3, 2.5, 6))
ggplot(df, aes(tt, val))+
geom_jitter(aes(tt, val), data = df, colour = I("red"),
position = position_jitter(width = 0.05)) +
geom_segment(data=hline, aes(x=tt-0.25, xend=tt+0.25, y=v, yend=v))
The downside for the point is the egregious thickness and no control over width.
The downside for the segment is the need to use numerics for the discrete axis position vs the factors.
I also should have set the random seed to ensure reproducibility.

add geom_hline legend without screwing up geom_line legend

I'm trying to draw a simple (scree)-plot with some extra geom_hline and geom_vlines thrown in.
Problem is: whenever I so much as add show_guide=TRUE or add some aes() to the geom_xline, I screw up the original legend.
Here's some (ugly) fake data:
exdf <- data.frame(rep(x=1:12, times = 3), rep(x = c("A", "B", "C"), times = 6), rnorm(36), stringsAsFactors = FALSE)
colnames(exdf) <- c("PC", "variable", "eigenvalue")
And here's my plot:
g <- ggplot(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue))
g <- g + geom_line(mapping = aes(group = factor(variable), linetype = variable))
g <- g + geom_vline(xintercept = 7, colour = "green", show_guide = TRUE)
How do I add a separate legend for the geom_vline without polluting the other legend?
Can't wrap my head around why one layer's color would change that of another legend.
This partly solves the problem:
g <- ggplot(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue))
g <- g + geom_line(mapping = aes(group = factor(variable), linetype = variable))
g <- g + geom_vline(aes(xintercept = x, colour = Threshold), data.frame(x = 7, Threshold = "A"), show_guide = TRUE) + scale_colour_manual(values = c(A = "green")
But the legend will still have crosses for the variable section, albeit not green ones.
Alternatively you could use a geom_line with a new data frame with two rows, both with the same x and y equal to the lower and upper bounds of your data. This will give you a legend that has a horizontal green line for your threshold and no vertical lines.
Based on #Nick K's suggestion in the above, here's a way to do this with clean legends via different data = for the different layers.
exdf <- data.frame(rep(x=1:12, times = 3), rep(x = c("A", "B", "C"), times = 6), rnorm(36), stringsAsFactors = FALSE)
colnames(exdf) <- c("PC", "variable", "eigenvalue")
g <- ggplot()
g <- g + geom_line(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue, group = factor(variable), linetype = variable))
g
thresholds <- data.frame(threshold = "Threshold-A", PC = 7, ymin = min(exdf$eigenvalue), ymax = max(exdf$eigenvalue))
g <- g + geom_linerange(data = thresholds, mapping = aes(x = PC, ymin = ymin, ymax = ymax, color = threshold))
g
yields:
Notice:
I know, the original data exdf are dumb and make an ugly plot; that's not the point here.
Notice that you have to set the data = argument for both layers, and keep the first g <- ggplot() blank, otherwise ggplot2 gets confused about the dataframes.
yeah, it's a hack job (see below), and it also doesn't fill the y-height of the plot, as a geom_vline should.
As an add-on, (not a solution!), here's how it should work with geom_vline:
exdf <- data.frame(rep(x=1:12, times = 3), rep(x = c("A", "B", "C"), times = 6), rnorm(36), stringsAsFactors = FALSE)
colnames(exdf) <- c("PC", "variable", "eigenvalue")
g <- ggplot()
g <- g + geom_line(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue, group = factor(variable), linetype = variable))
g
g + geom_vline(data = thresholds, mapping = aes(xintercept = PC, color = threshold), show_guide = TRUE)
yields:
That fills the yheight, as you would expect from geom_vline, but somehow messes up the legend of variable (notice the vertical lines).
Not sure why this is so, feels like a bug to me.
Here reported: https://github.com/hadley/ggplot2/issues/1267

Resources