I am trying to highlight selected points based on their order statistics in a ggplot stat_qq output:
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ggplot(ydata, aes(sample=x)) + stat_qq() + facet_wrap(~sample) + scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) + theme_bw(base_size = 14, base_family = "sans") + labs(title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",
x = "Standard Deviation\n[%exceeding]") +
geom_text(data = ydata[ydata$order %in% c(2,16,50,84,98),], aes(x=qnorm(pnorm(x)), y=x, label = order), nudge_y = 1)
Which produced this:
Obviously my text notation is not highlighting the right points (the 2, 16, 50 84, 98th points). I wish I could also highlight the actual points in red. Would appreciate any suggestions.
You could calculate the qq values outside of ggplot and create a separate column to group the qq values into highlighted and not highlighted. Then you could plot them using geom_point with the grouping variable as a colour aesthetic. For example:
library(tidyverse)
# Generate data reproducibly
set.seed(2)
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
# Quantile indices to highlight
pts = c(2,16,50,84,98)
# Add qq values and grouping column to data frame and pipe into ggplot
# Use split and map to calculate the qq values separately for each Sample
split(ydata, ydata$sample) %>%
map_df(~ .x %>% mutate(xq = qqnorm(x, plot.it=FALSE)$x,
group = ifelse(order %in% pts, "A", "B"))) %>%
ggplot(aes(xq, x, colour=group)) +
geom_point(size=1) +
geom_text(aes(label=ifelse(group=="A", order, "")),
nudge_y=1, size=3) +
facet_wrap(~ sample) +
theme_bw(base_size = 14, base_family = "sans") +
scale_colour_manual(values=c("red", "black")) +
guides(colour=FALSE)
As an alternative, a quick hack would be to use ggplot_build to highlight specific points in your original plot (note though that something is not quite right with how you placed the labels relative to the highlighted points):
pts = rep(c(2,16,50,84,98), 4) + rep(seq(0,300,100), each=5)
# Assuming you've assigned your plot to the object p
pb = ggplot_build(p)
# Change point colors
pb$data[[1]][pts, "colour"] = "red"
# Change label colors
pb$data[[2]][["colour"]] = "red"
# Regenerate plot object
p = ggplot_gtable(pb)
plot(p)
You can apply stat="qq" to your geom_point and then use the colors assigned to new variable
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ydata$highlight = ifelse(ydata$order %in% c(2,16,50,84,98), "#FF0000", "#000000")
ydata$order_txt = ifelse(ydata$order %in% c(2,16,50,84,98), ydata$order, "")
ggplot(ydata, aes(sample=x)) +
geom_point(color=ydata$highlight, stat="qq") +
geom_text(label=ydata$order_txt, stat="qq", nudge_y=1) +
facet_wrap(~sample) +
scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) +
theme_bw(base_size = 14, base_family = "sans") +
labs(
title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",x = "Standard Deviation\n[%exceeding]")
Related
I have to produce a scatter plot with many points.
I am already using the package "ggrepel" in order to avoid overlapping, but it sometimes still doesnt work. Is there a possibility to insert a linebreak into the labels (e.g. after a certain length)?
Thanks for help!
items <- c("A long description of the item",
"Another very long text descrbing the item",
"And finally another one ",
"This text exceeds the available space by far",
"Incredibly long text",
"Here we go with another one",
"A linebreak would help here",
"This has at least 20 characters")
items <- rep(items, 4)
df <- data.frame(
descs = items,
x = rnorm(n = length(items), mean = 2, sd = 2),
y = rnorm(n = length(items), mean = 2, sd = 2),
cat = as.factor(runif(length(items), min = 1, max = 6))
)
library(ggplot2)
library(tidyverse)
library(ggrepel)
df %>% ggplot(aes(x = x, y = y, color = cat)) + geom_point() +
#geom_text(aes(label = descs)) +
geom_text_repel(aes(label = descs)) +
theme_light() +
theme(legend.position="none")
rm(items)
rm(df)
You could use stringr::str_wrap to achieve line breaks at an appropriate point. For example, to limit lines to 20 characters, you can do:
df %>%
ggplot(aes(x = x, y = y, color = cat)) + geom_point() +
geom_text_repel(aes(label = stringr::str_wrap(descs, 20))) +
theme_light() +
theme(legend.position = "none")
since a long time I despair to straighten the label of the x-axis in my plot (ggplot2).
The challenge is that I have two geom_paths, each fetching the data from a different dataframe - I'm sure this will become a bit clearer in the code:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
The goal is, to Label the X-Axis with the years from the dataframe "dx", as shown in the aes-parameter. And it works! But only if you disable the geom_paths - shown below:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
#geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
#geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
I can't really understand why the paths destroy the labeling like this - it must be the aes parameters.
If someone has a solution for this, I would be extremely grateful!
This could be achieved like so:
Convert your original month variable to a date time before calling xspline. This way the interpolated date values could be easily converted back to datetime via e.g. lubridate::as_datetime.
besides that you could row bind your datasets which makes plotting a bit easier
library(ggplot2)
library(tidyr)
library(dplyr)
datengesamt <- datengesamt %>%
# Convert to datetime
mutate(month = as.POSIXct(month))
plot(1, 1)
ps <- xspline(datengesamt[,1], datengesamt[,2], 1, draw=FALSE)
pg <- xspline(datengesamt[,1], datengesamt[,3], 1, draw=FALSE)
pp <- list("Person 1" = data.frame(ps), "Person 2" = data.frame(pg)) %>%
bind_rows(.id = "id") %>%
mutate(x = lubridate::as_datetime(x))
ggplot(pp, aes(x, y, color = id, linetype = id)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 2)) +
geom_path(size=0.5) +
scale_x_datetime(date_labels = "%Y")
I saw this interesting way of creating a publication timeline using geom_dumbell, so I created my own by first loading the libraries:
library(tidyverse)
library(ggalt)
library(ggrepel)
Entering in some data:
# create data frame
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2))
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
and, finally, creating my own basic timeline using this code:
ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
#theme_ipsum_tw() +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
As you can see from the above image, I can't see the x-axis. Additionally, I'd like to put another aesthetic and legend on the right side for the journal, perhaps putting a different shape on each line. Any other bells and whistles using the above data would be fun, too. Thanks!
Ok, I finally found some time to figure this out with help from this terrific post. To start, let's load the revised list of packages:
library(tidyverse)
library(ggalt)
library(ggrepel)
library(gridExtra)
library(gtable)
library(grid)
For comprehensiveness, let's reload the data:
# create dataframe
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
Journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2)
)
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
First, let's create the plot with the color legend and extract it. Because I want that legend to be on top, I make sure indicate that as my legend position. Note that I specify my preferred colors using the scale_color_manual argument:
# make plot with color legend
p1 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
# Extract the color legend - leg1
leg1 <- gtable_filter(ggplot_gtable(ggplot_build(p1)), "guide-box")
Second, let's make the plot with the shape legend and extract it. Because I want this legend to be positioned on the right side, I don't need to even specify the legend position here. Note that I specify my preferred shapes using the scale_shape_manual argument:
# make plot with shape legend
p2 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
shape = Journal)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE)
# Extract the shape legend - leg2
leg2 <- gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
Third, let's make the full plot with no legend, specifying both the scale_color_manual and scale_shape_manual arguments as well as theme(legend.position = 'none'):
# make plot without legend
plot <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color =status, shape = Journal)) +
geom_dumbbell(size = 1, size_x = 3) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, nudge_x = -5.25, show.legend = FALSE) +
theme(legend.position = 'none')
Fourth, let's arrange everything according to our liking:
# Arrange the three components (plot, leg1, leg2)
# The two legends are positioned outside the plot:
# one at the top and the other to the side.
plotNew <- arrangeGrob(leg1, plot,
heights = unit.c(leg1$height, unit(1, "npc") - leg1$height), ncol = 1)
plotNew <- arrangeGrob(plotNew, leg2,
widths = unit.c(unit(1, "npc") - leg2$width, leg2$width), nrow = 1)
Finally, plot and enjoy the final product:
grid.newpage()
grid.draw(plotNew)
As everyone will no doubt recognize, I relied very heavily on this post. However, I did change a few things, I tried be comprehensive with my explanation, and some others spent time trying to help, so I think it is still helpful to have this answer here.
So I have a data set with 3 columns (notCrammed, Hours, total grade)
I am plotting the hours against the total grade.
The notCrammed has only "0" for crammed and "1" for notCrammed.
I am plotting and trying to change the color for the people who crammed.
for example if 80 people did not cram and 20 did. I would like to change the color so i can see 80 green dots and 20 red dots. Or any color to differentiate them.
color <- 1
if(my.data$Studied<-1){
color <- 2
}
plot(my.data$Hours,my.data$Grade,xlab = "notCrammed",ylab = "Grade Scale",main = "Student Report",col =color)
EDIT 2, uploaded new plot, since the previous one had wrong par-settings.
You could do something like this:
#Simulating some data
set.seed(10)
my.data = data.frame("Hours" = sample(1:50, 10, replace = T),
"total grade" = sample(c(0,2,4,7,10,12), 10, replace = T),
"notCrammed" = rbinom(10, 1, prob = 0.5))
#Creating the plot
plot(my.data$Hours, my.data$total.grade, xlab = "notCrammed",ylab = "Grade Scale",
main = "Student Report", col = (my.data$notCrammed + 1))
In this case "not crammed" is the zeroes and is colored black, while crammed are colored red.
EDIT: Alternatively this should work if you want green and red:
plot(my.data$Hours, my.data$total.grade, xlab = "notCrammed",ylab = "Grade Scale", main = "Student Report",
col = c("green", "red")[as.factor(my.data$notCrammed)]))
Using #Qwethm's base dataset, you can preset colours using ggplot. This also allows you to change colours using plain language.
set.seed(10)
my.data = data.frame("Hours" = sample(1:50, 10, replace = T),
"total grade" = sample(c(0,2,4,7,10,12), 10, replace = T),
"notCrammed" = rbinom(10, 1, prob = 0.5))
library(ggplot2)
library(viridis)
my.data$notCrammed <- as.factor(my.data$notCrammed) #Change to a factor so it's discrete output
cols <- c("0" = "darkgreen","1"="red")
ggplot()+
geom_point(data=my.data,aes(x=Hours,y=total.grade,colour=notCrammed))+
scale_colour_manual(
values = cols,
aesthetics = c("colour", "fill")
)+
xlab("notCrammed")+
ylab("Grade Scale")+
ggtitle("Student Report")
I'm using facet_grid() to display some data, and I have facet labels that span multiple lines of text (they contain the "\n" character).
require(ggplot2)
#Generate example data
set.seed(3)
df = data.frame(facet_label_text = rep(c("Label A",
"Label B\nvery long label",
"Label C\nshort",
"Label D"),
each = 5),
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
#Plot test data
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0))
So by using the hjust = 0 argument, I can left-align facet label text as a unit.
What I would like to do is left-align each individual line of text. So "Label B" and "very long label" are both aligned along the left side, rather than centered relative to each other (ditto for "Label C" and "short"). Is this possible in ggplot2?
This is fairly straightforward using grid's grid.gedit function to edit the strips.
library(ggplot2) # v2.1.0
library(grid)
# Your data
set.seed(3)
df = data.frame(facet_label_text = rep(c("Label A",
"Label B\nvery long label",
"Label C\nshort",
"Label D"),
each = 5),
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
# Your plot
p = ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0))
p
# Get a list of grobs in the plot
grid.ls(grid.force())
# It looks like we need the GRID.text grobs.
# But some care is needed:
# There are GRID.text grobs that are children of the strips;
# but also there are GRID.text grobs that are children of the axes.
# Therefore, a gPath should be set up
# to get to the GRID.text grobs in the strips
# The edit
grid.gedit(gPath("GRID.stripGrob", "GRID.text"),
just = "left", x = unit(0, "npc"))
Or, a few more lines of code to work with a grob object (in place of editing on screen as above):
# Get the ggplot grob
gp = ggplotGrob(p)
grid.ls(grid.force(gp))
# Edit the grob
gp = editGrob(grid.force(gp), gPath("GRID.stripGrob", "GRID.text"), grep = TRUE, global = TRUE,
just = "left", x = unit(0, "npc"))
# Draw it
grid.newpage()
grid.draw(gp)
Until someone comes along with a real solution, here's a hack: Add space in the labels to get the justification you want.
require(ggplot2)
#Generate example data
set.seed(3)
df = data.frame(facet_label_text = rep(c("Label A",
"Label B \nvery long label",
"Label C\nshort ",
"Label D"),
each = 5),
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
#Plot test data
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0))
There may be a cleaner way to do this but I didn't find a way to do this within ggplot2. The padwrap function could be more generalized as it basically does just what you requested. To get the justification right, I had to use a mono-spaced font.
# Wrap text with embedded newlines: space padded and lef justified.
# There may be a cleaner way to do this but this works on the one
# example. If using for ggplot2 plots, make the font `family`
# a monospaced font (e.g. 'Courier')
padwrap <- function(x) {
# Operates on one string
padwrap_str <- function(s) {
sres <- strsplit(s, "\n")
max_len <- max(nchar(sres[[1]]))
paste( sprintf(paste0('%-', max_len, 's'), sres[[1]]), collapse = "\n" )
}
# Applys 'padwrap' to a vector of strings
unlist(lapply(x, padwrap_str))
}
require(ggplot2)
facet_label_text = rep(c("Label A",
"Label B\nvery long label",
"Label C\nshort",
"Label D"), 5)
new_facet_label_text <- padwrap(facet_label_text)
#Generate example data
set.seed(3)
df = data.frame(facet_label_text = new_facet_label_text,
time = rep(c(0, 4, 8, 12, 16), times = 4),
value = runif(20, min=0, max=100))
#Plot test data
ggplot(df, aes(x = time, y = value)) +
geom_line() +
facet_grid(facet_label_text ~ .) +
theme(strip.text.y = element_text(angle = 0, hjust = 0, family = 'Courier'))
The strip text is left justified in the image below