scale_fill_manual based on another factor in ggplot2 - r

I am trying to color-code my legend based on a broader categorization of the factor used to "fill" my geom_bar in ggplot2. My plot looks like this: which I got using this R code:
ggplot(df, aes(year, TOTALshark, fill=fishery)) + geom_bar(width=.5,stat="identity", position="dodge")+ facet_wrap(~div)
Here is a dput sample of my dataset:
> dput(smpl)
df <- structure(list(X1 = structure(c(6L, 11L, 22L, 27L, 10L, 10L,
6L, 11L, 6L, 10L, 8L, 6L, 6L, 4L, 22L, 18L, 10L, 10L, 11L, 6L
), .Label = c("AMERICAN PLAICE", "BIGEYE TUNA", "BIVALVE", "BLUEFIN TUNA",
"CAPELIN", "COD(ATL)", "CRAB(SNOW,QUEEN)", "HADDOCK", "HAGFISH(ATL)",
"HALIBUT(ATL)", "HALIBUT(GREENLAND)", "HERRING(ATL)", "JONAH CRAB (CANC.BOR.)",
"LOBSTER", "LONGHORN SCULPIN", "LUMPFISH", "MACKEREL(ATL)", "MONKFISH",
"PAND.BOR.", "PAND.MON.", "POLLOCK", "REDFISH", "SCALLOP", "SEA URCHINS",
"SEACU", "SILVER HAKE", "SWORDFISH", "WHELK", "WHITE HAKE", "WINTER FLOUNDER",
"WITCH FLOUNDER", "YELLOWFIN TUNA", "YELLOWTAIL FLOUNDER"), class = "factor"),
X2 = structure(c(2L, 2L, 8L, 5L, 5L, 5L, 5L, 8L, 5L, 5L,
5L, 2L, 5L, 5L, 8L, 2L, 5L, 5L, 2L, 2L), .Label = c("Dredge",
"Gillnet", "Hook", "Jigger", "Line", "Seine", "Trap", "Trawlb",
"Trawlm"), class = "factor"), fishery = structure(c(12L,
25L, 43L, 50L, 24L, 24L, 15L, 27L, 15L, 24L, 21L, 12L, 15L,
9L, 43L, 36L, 24L, 24L, 25L, 12L), .Label = c("AMERICAN PLAICE-Gillnet",
"AMERICAN PLAICE-Line", "AMERICAN PLAICE-Trawlb", "BIGEYE TUNA-Jigger",
"BIGEYE TUNA-Line", "BIVALVE-Dredge", "BLUEFIN TUNA-Hook",
"BLUEFIN TUNA-Jigger", "BLUEFIN TUNA-Line", "CAPELIN-Seine",
"CAPELIN-Trap", "COD(ATL)-Gillnet", "COD(ATL)-Hook", "COD(ATL)-Jigger",
"COD(ATL)-Line", "COD(ATL)-Trap", "COD(ATL)-Trawlb", "CRAB(SNOW,QUEEN)-Trap",
"CUSK-Line", "HADDOCK-Gillnet", "HADDOCK-Line", "HADDOCK-Trawlb",
"HAGFISH(ATL)-Trap", "HALIBUT(ATL)-Line", "HALIBUT(GREENLAND)-Gillnet",
"HALIBUT(GREENLAND)-Line", "HALIBUT(GREENLAND)-Trawlb", "HERRING(ATL)-Seine",
"HERRING(ATL)-Trawlm", "JONAH CRAB (CANC.BOR.)-Trap", "LOBSTER-Trap",
"LONGHORN SCULPIN-Trawlb", "LUMPFISH-Gillnet", "MACKEREL(ATL)-Seine",
"MACKEREL(ATL)-Trawlm", "MONKFISH-Gillnet", "MONKFISH-Trawlb",
"PAND.BOR.-Trawlb", "PAND.MON.-Trawlb", "POLLOCK-Gillnet",
"POLLOCK-Trawlb", "REDFISH-Gillnet", "REDFISH-Trawlb", "REDFISH-Trawlm",
"SCALLOP-Dredge", "SEA URCHINS-Dredge", "SEACU-Dredge", "SILVER HAKE-Trawlb",
"SWORDFISH-Jigger", "SWORDFISH-Line", "SWORDFISH-unk", "WHELK-Trap",
"WHITE HAKE-Gillnet", "WHITE HAKE-Line", "WINTER FLOUNDER-Gillnet",
"WINTER FLOUNDER-Trawlb", "WITCH FLOUNDER-Trawlb", "YELLOWFIN TUNA-Line",
"YELLOWTAIL FLOUNDER-Trawlb"), class = "factor"), year = c(2008L,
2008L, 2009L, 2009L, 2008L, 2009L, 2009L, 2008L, 2006L, 2007L,
2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2009L, 2009L, 2009L,
2009L), div = structure(c(6L, 19L, 2L, 4L, 5L, 10L, 3L, 19L,
9L, 10L, 3L, 9L, 6L, 4L, 3L, 9L, 6L, 11L, 7L, 9L), .Label = c("5Z",
"5Y", "4X", "4W", "4V", "4T", "4S", "4R", "3P", "3O", "3N",
"3M", "3L", "3K", "2J", "2H", "2G", "1F", "0B", "1B", "0A"
), class = "factor"), TOTALshark = c(3369.72, 12243.2, 6080.06,
316646.05, 18786.8, 6565.91, 1339771.2, 45841.03, 41329.64,
6411.86, 204980.36, 67608.78, 2617.05, 61547.64, 447349.44,
13226.4, 1362.55, 6012.23, 13152.51, 1067.92), cat = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("groundfish", "largepelagic",
"bivalve", "smallpelagic", "crabs/lobsters", "shrimps", "others"
), class = "factor")), .Names = c("X1", "X2", "fishery",
"year", "div", "TOTALshark", "cat"), class = "data.frame", row.names = c(70L,
278L, 500L, 554L, 242L, 245L, 131L, 315L, 106L, 224L, 194L, 60L,
115L, 37L, 489L, 385L, 249L, 244L, 284L, 75L))
I wish to have the same legend, but with a few colors based on which category "cat" variable (i.e.,, pelagic, groundfish) the fishery falls in.

Is this what you want?
library(ggplot2)
library(plyr)
library(gridExtra)
# create data that links colour per 'cat' with 'fishery'
# the 'cat' colours will be used as manually set fill colours.
# get 'cat' colours
# alt. 1: grab 'cat' colours from plot object
# create a plot with fill = fishery *and* colour = cat
g1 <- ggplot(df, aes(x = year, y = TOTALshark, fill = fishery, colour = cat)) +
geom_bar(width = 0.5, stat = "identity", position = "dodge") +
facet_wrap(~ div)
g1
# grab 'cat' colours for each 'fishery' from plot object
# to be used in manual fill
cat_cols <- unique(ggplot_build(g1)[["data"]][[1]]$colour)
# unique 'cat'
cat <- unique(df$cat)
# create data frame with one colour per 'cat'
df2 <- data.frame(cat = cat, cat_cols)
df2
# alt 2: create your own 'cat' colours
# number of unique 'cat'
n <- length(cats)
# select one colour per 'cat', from e.g. brewer_pal or other palette tools
cat_cols <- brewer_pal(type = "qual")(n)
# cat_cols <- rainbow(n)
# create data frame with one colour per 'cat'
df2 <- data.frame(cat, cat_cols)
df2
# select unique 'fishery' and 'cat' combinations
# in the order they show up in the legend, i.e. ordered ('arranged') by fishery
df3 <- unique(arrange(df[, c("fishery", "cat")], fishery))
df3
# add 'cat' colours to 'fishery'
# use 'join' to keep order
df3 <- join(df3, df2)
df3
# plot with fill by 'fishery' creates a fill scale by fishery,
# but colours are set manually using scale_fill_manual and the 'cat' colours from above
g2 <- ggplot(df, aes(x = year, y = TOTALshark, fill = fishery)) +
geom_bar(width = 0.5, stat = "identity", position = "dodge") +
facet_wrap(~ div, nrow = 5) +
scale_fill_manual(values = as.character(df3$cat_cols))
g2
# create plot with both 'fishery' and 'cat' legend.
# extract 'fisheries' legend
tmp <- ggplot_gtable(ggplot_build(g2))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_fish <- tmp$grobs[[leg]]
# create a non-sense plot just to get a 'fill = cat' legend
g3 <- ggplot(df, aes(x = year, y = TOTALshark, fill = cat)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = as.character(df3$cat_cols))
# extract 'cat' legend
tmp <- ggplot_gtable(ggplot_build(g3))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_cat <- tmp$grobs[[leg]]
# arrange plot and legends
library(gridExtra)
# quick and dirty with grid.arrange
# in the first column, put the plot (g2) without legend (removed using the 'theme' code)
# put the two legends in the second column
grid.arrange(g2 + theme(legend.position = "none"),
arrangeGrob(legend_fish, legend_cat), ncol = 2)
# arrange with viewports
# define plotting regions (viewports)
grid.newpage()
vp_plot <- viewport(x = 0.25, y = 0.5,
width = 0.5, height = 1)
vp_legend <- viewport(x = 0.75, y = 0.7,
width = 0.5, height = 0.5)
vp_sublegend <- viewport(x = 0.7, y = 0.25,
width = 0.5, height = 0.3)
print(g2 + theme(legend.position = "none"), vp = vp_plot)
upViewport(0)
pushViewport(vp_legend)
grid.draw(legend_fish)
upViewport(0)
pushViewport(vp_sublegend)
grid.draw(legend_cat)
See also #mnel's answer here for replacing values in the plot object. It might be worth trying here as well. You may also check gtable methods for arranging grobs.

Related

Adding lines connecting means to ggplot (Raincloud Plots)

I have a ggplot to which I want to add a line connecting the means. However, I keep getting an Error message:
"geom_path: Each group consists of only one
observation. Do you need to adjust the group
aesthetic?"
I tried solutions suggested on here, but these seemingly stopped working years ago. Hence, I opened a new post.
#some packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(here, readr, cowplot, tidyr, ggplot2, dplyr)
#some functions from https://github.com/RainCloudPlots/RainCloudPlots
source("R_rainclouds.R")
source("summarySE.R")
source("simulateData.R")
#some data
df3 <- structure(list(participant = c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L), condition = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("RT_Cau", "RT_Moro",
"RT_Asi"), class = "factor"), RT = c(1.44248448543333, 2.73934002973517,
1.89927013090706, 1.32510448686595, 2.44101598069973, 2.84290772015973,
1.19419819108836, 2.12124958877175, 1.14707311860052, 2.72286767178203,
1.15914495363538, 1.5340050993702, 1.62616192435053, 1.32694796283192,
1.2720800304128, 0.99275928310549, 1.04329096409593, 1.43288644582691,
1.60302970699442, 1.3393626055176, 1.24088162033185, 2.42448868318791,
1.6398716779282, 1.53816275909702, 1.51033130413559, 3.226993255043,
2.1915727996463, 1.39240057519678, 3.0538809712989, 2.52658416881183,
1.16366335020089, 2.33377114484134, 1.39357978132538, 2.691606623485,
1.21999657945028, 1.72195011524003, 1.38834235226937, 1.44350802586345,
1.29563539425317, 0.909762618509679, 1.13583585924538, 1.58240957515452,
1.82142351906117, 1.3644415734435, 1.32141664778601, 2.23277562688125,
1.5773976029336, 1.43200172590417, 1.68991681725, 2.9617422858462,
1.60886625604519, 1.38647850513866, 3.46156610375971, 2.96950698342897,
1.17905107770577, 2.36256332626113, 1.31254065801458, 3.204902618708,
1.21067325368702, 1.80371515914087, 1.57816183853565, 1.40761655308155,
1.27304559913463, 1.07621914272144, 1.04203150853998, 1.58958820979388,
1.79859778873147, 1.19249820050996, 1.4116357628608, 2.15806795062162,
1.70597872926531, 1.66135756110131)), row.names = c(NA, -72L), class = "data.frame")
#make a summary of the data
df4 <- summarySE(df3, measurevar = "RT", groupvars = c("condition"))
#a working plot that shows dots, boxplot, distribution, and mean+SE
#I want to have lines connecting the mean dots.
ggplot(df3,aes(x=condition,y=RT,fill=condition,col=condition))+
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .6,adjust =4)+
geom_point(aes(x = as.numeric(condition)-.15, y = RT, colour = condition),position = position_jitter(width = .05), size = .25, shape = 20)+
geom_boxplot(aes(x = condition, y = RT, fill = condition),outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
geom_point(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition), shape = 18) +
geom_errorbar(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition, ymin = RT_mean-se, ymax = RT_mean+se), width = .05) +
ylab('RT')+
scale_fill_brewer(palette = "Dark2")+scale_colour_brewer(palette = "Dark2")+
guides(fill = FALSE, col = FALSE) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)
) # +
# geom_line(data = df4, aes(x = as.numeric(condition)+.1, #y = RT_mean, group = condition, colour = condition), #linetype = 3)
#The commented outpart is my attempt to connect it with lines, which produces the described error.
#If you do not want to load the package from github, here is the raw code of the summariseSE function:
# summarySE function
summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE,
conf.interval = .95, .drop = TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function(x, na.rm = FALSE) {
if (na.rm) {
sum(!is.na(x))
} else {
length(x)
}
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, median, and sd
datac <- plyr::ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean(xx[[col]], na.rm=na.rm),
median = median(xx[[col]], na.rm=na.rm),
sd = sd(xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" and "median" columns
datac <- plyr::rename(datac, c("mean" = paste(measurevar, "_mean", sep = "")))
datac <- plyr::rename(datac, c("median" = paste(measurevar, "_median", sep = "")))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval / 2 + .5, datac$N - 1)
datac$ci <- datac$se * ciMult
return(datac)
}
The final plot should have lines connecting the means, similar to Plot 11 of https://wellcomeopenresearch.org/articles/4-63/v2.
Thanks already for your help!
This can be useful:
#Code
ggplot(df3,aes(x=condition,y=RT,fill=condition,col=condition))+
geom_flat_violin(position = position_nudge(x = .2, y = 0),
alpha = .6,adjust =4)+
geom_point(aes(x = as.numeric(condition)-.15, y = RT,
colour = condition),
position = position_jitter(width = .05), size = .25, shape = 20)+
geom_boxplot(aes(x = condition, y = RT, fill = condition),
outlier.shape = NA, alpha = .5,
width = .1, colour = "black") +
geom_point(data = df4, aes(x = as.numeric(condition)+.1,
y = RT_mean,
group = condition, colour = condition), shape = 18) +
geom_errorbar(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition, ymin = RT_mean-se, ymax = RT_mean+se), width = .05) +
ylab('RT')+
scale_fill_brewer(palette = "Dark2")+scale_colour_brewer(palette = "Dark2")+
guides(fill = FALSE, col = FALSE) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)
) +
geom_line(data = df4, aes(x = as.numeric(condition)+.1,
y = RT_mean, group = 1)
Output:

Plotly heatmap with different cell widths

I would like to plot an interactive heatmap, where the column widths are different.
Although I managed to get different cell widths, the widths do not correspond to the values and the ordering is not correct.
The order of the x-axis should remain the same as the segments column in the df data.frame.
If the heatmap doesn't work, I would also be fine with a stacked barchart.
df <- structure(list(
segments = c(101493L, 101493L, 101493L, 101492L, 101492L, 101492L, 101494L, 101494L, 101494L, 102018L, 102018L,
102018L, 102018L, 102018L, 102019L, 102019L, 102019L, 102019L, 102019L),
timestamp = structure(c(1579233600, 1579240800, 1579248000,
1579233600, 1579240800, 1579248000, 1579233600, 1579240800, 1579248000,
1579219200, 1579226400, 1579233600, 1579240800, 1579248000, 1579219200,
1579226400, 1579233600, 1579240800, 1579248000), class = c("POSIXct", "POSIXt"), tzone = "Europe/Berlin"),
value = c(91.772, 91.923, 96.968, 104.307, 101.435, 105.539, 104.879, 104.197, 103.038,
96.403, 90.926, 111.807, 115.931, 111.729, 100.129, 86.903, 108.22, 117.841, 112.293),
width = c(5L, 5L, 5L, 2L, 2L, 2L, 3L, 3L, 3L, 10L, 10L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L)),
row.names = c(1L, 2L, 3L, 11L, 12L, 13L, 21L, 22L, 23L, 31L, 32L, 33L, 34L, 35L,43L, 44L, 45L, 46L, 47L),
class = "data.frame")
library(plotly)
plot_ly(data = df) %>%
add_trace(type="heatmap",
x = ~as.character(width),
y = ~timestamp,
z = ~value,
xgap = 0.2, ygap = 0.2) %>%
plotly::layout(xaxis = list(rangemode = "nonnegative",
tickmode = "array",
tickvals=as.character(unique(df$width)),
ticktext=as.character(unique(df$segments)),
zeroline = FALSE))
By giving Plotly a matrix for the z-values it seems to work and the widths are respected.
df$newx <- rep(cumsum(df[!duplicated(df$segments),]$width), rle(df$segments)$length)
mappdf <- expand.grid(timestamp=unique(df$timestamp), newx=unique(df$newx))
mappdf <- merge(mappdf, df[,c("timestamp","value","newx")], all.x = T, all.y = F, sort = F)
mappdf <- mappdf[order(mappdf$newx, mappdf$timestamp),]
zvals <- matrix(data = mappdf$value,
nrow = length(unique(df$timestamp)),
ncol = length(unique(df$newx)))
plot_ly() %>%
add_heatmap(y = sort(unique(df$timestamp)),
x = c(0,unique(df$newx)),
z = zvals) %>%
plotly::layout(xaxis = list(
title = "",
tickvals=unique(df$newx),
ticktext=paste(unique(df$segments), "-", unique(df$width))
))

Labeling a single point with ggrepel

I am trying to use geom_label_repel to add labels to a couple of data points on a plot. In this case, they happen to be outliers on box plots. I've got most of the code working, I can label the outlier, but for some reason I am getting multiple labels (equal to my sample size for the entire data set) mapped to that point. I'd like just one label for this outlier.
Example:
Here is my data:
dput(sus_dev_data)
structure(list(time_point = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L), .Label = c("3", "8", "12"), class = "factor"),
days_to_pupation = c(135L, 142L, 143L, 155L, 149L, 159L,
153L, 171L, 9L, 67L, 53L, 49L, 72L, 67L, 55L, 64L, 60L, 122L,
53L, 51L, 49L, 53L, 50L, 56L, 44L, 47L, 60L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L), class = "data.frame")
and my code...
####################################################################################################
# Time to pupation statistical analysis
####################################################################################################
## linear model
pupation_Model=lm(sus_dev_data$days_to_pupation~sus_dev_data$time_point)
pupationANOVA=aov(pupation_Model)
summary(pupationANOVA)
# Tukey test to study each pair of treatment :
pupationTUKEY <- TukeyHSD(x=pupationANOVA, which = 'sus_dev_data$time_point',
conf.level=0.95)
## Function to generate significance labels on box plot
generate_label_df <- function(pupationTUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- pupationTUKEY[[variable]][,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels, reversed = TRUE)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$treatment=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$treatment) , ]
return(Tukey.labels)
}
#generate labels using function
labels<-generate_label_df(pupationTUKEY , "sus_dev_data$time_point")
#rename columns for merging
names(labels)<-c('Letters','time_point')
# obtain letter position for y axis using means
pupationyvalue<-aggregate(.~time_point, data=sus_dev_data, max)
#merge dataframes
pupationfinal<-merge(labels,pupationyvalue)
####################################################################################################
# Time to pupation plot
####################################################################################################
# Plot of data
(pupation_plot <- ggplot(sus_dev_data, aes(time_point, days_to_pupation)) +
Alex_Theme +
geom_boxplot(fill = "grey80", outlier.size = 0.75) +
geom_text(data = pupationfinal, aes(x = time_point, y = days_to_pupation,
label = Letters),vjust=-2,hjust=.5, size = 4) +
#ggtitle(expression(atop("Days to pupation"))) +
labs(y = 'Days to pupation', x = 'Weeks post-hatch') +
scale_y_continuous(limits = c(0, 200)) +
scale_x_discrete(labels=c("3" = "13", "8" = "18",
"12" = "22")) +
geom_label_repel(aes(x = 1, y = 9),
label = '1')
)
Here's a shorter example to demonstrate what is going on. Essentially, your labels are beng recycled to be the same length as the data.
df = data.frame(x=1:5, y=1:5)
ggplot(df, aes(x,y, color=x)) +
geom_point() +
geom_label_repel(aes(x = 1, y = 1), label = '1')
You can override this by providing new data for the ggrepel
ggplot(df, aes(x,y, color=x)) +
geom_point() +
geom_label_repel(data = data.frame(x=1, y=1), label = '1')
Based on your data, you have 3 outliers (one in each group), you can manually identify them by applying the classic definition of outliers by John Tukey (Upper: Q3+1.5*IQR and Lower: Q1-1.5*IQR) (but you are free to set your own rules to define an outlier). You can use the function quantile and IQR to get those points.
Here, I incorporated them in a sequence of pipe using dplyr package:
library(tidyverse)
Outliers <- sus_dev_data %>% group_by(time_point) %>%
mutate(Out_up = ifelse(days_to_pupation > quantile(days_to_pupation,0.75)+1.5*IQR(days_to_pupation), "Out","In"))%>%
mutate(Out_Down = ifelse(days_to_pupation < quantile(days_to_pupation,0.25)-1.5*IQR(days_to_pupation), "Out","In")) %>%
filter(Out_up == "Out" | Out_Down == "Out")
# A tibble: 3 x 4
# Groups: time_point [3]
time_point days_to_pupation Out_up Out_Down
<fct> <int> <chr> <chr>
1 3 9 In Out
2 8 122 Out In
3 12 60 Out In
As mentioned by #dww, you need to pass a new dataframe to geom_label_repel if you want your outliers to be single labeled. So, here we use the dataframe Outliers to feed the geom_label_repel function:
library(ggplot2)
library(ggrepel)
ggplot(sus_dev_data, aes(time_point, days_to_pupation)) +
#Alex_Theme +
geom_boxplot(fill = "grey80", outlier.size = 0.75) +
geom_text(data = pupationfinal, aes(x = time_point, y = days_to_pupation,
label = Letters),vjust=-2,hjust=.5, size = 4) +
#ggtitle(expression(atop("Days to pupation"))) +
labs(y = 'Days to pupation', x = 'Weeks post-hatch') +
scale_y_continuous(limits = c(0, 200)) +
scale_x_discrete(labels=c("3" = "13", "8" = "18",
"12" = "22")) +
geom_label_repel(inherit.aes = FALSE,
data = Outliers,
aes(x = time_point, y = days_to_pupation, label = "Out"))
And you get the following graph:
I hope it helps you to figure it how to label all your outliers.

R ggplot facet_grid_sc with cols

I'm trying to use facet_grid_sc to manipulate the y axis but by plotting the panel column-wise instead of row-wise. I have the following dataframe:
test2 <- structure(list(stream = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("Feed", "Cells 1-4",
"Cells 5-8", "Cells 9-12", "Totalconcentrate", "Tailings"), class = "factor"),
mineral = c("Calcite", "Calcite", "Calcite", "Calcite", "Scheelite",
"Scheelite", "Scheelite", "Scheelite", "Calcite", "Calcite",
"Calcite", "Calcite", "Scheelite", "Scheelite", "Scheelite",
"Scheelite", "Calcite", "Calcite", "Calcite", "Calcite",
"Scheelite", "Scheelite", "Scheelite", "Scheelite", "Calcite",
"Calcite", "Calcite", "Calcite", "Scheelite", "Scheelite",
"Scheelite", "Scheelite"), shapefactor = structure(c(3L,
1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L,
3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L,
4L), .Label = c("Angularity", "Circularity", "Formfactor",
"Roundness"), class = "factor"), mean = c(0.191074267258554,
1.57871188864644, 4.98640988695014, 0.709748496492633, 0.255307602333514,
1.41318627525434, 4.48236746482907, 0.787906844284224, 0.2370993275776,
1.59011418196729, 5.00866589220356, 0.708099932389451, 0.379279621962832,
1.41798512797767, 4.49174029724501, 0.803054249581329, 0.188107140488459,
1.58446664800185, 4.99394785197469, 0.720664938740251, 0.261663000285933,
1.33457686608134, 4.2649277507168, 0.809433325901688, 0.204386468447994,
1.55129002878455, 4.88754754288822, 0.761051008277419, 0.432222746956355,
1.22012862228623, 3.87276933395819, 0.861599941934953)), .Names = c("stream",
"mineral", "shapefactor", "mean"), row.names = c(73L, 74L, 75L,
76L, 93L, 94L, 95L, 96L, 125L, 126L, 127L, 128L, 145L, 146L,
147L, 148L, 177L, 178L, 179L, 180L, 197L, 198L, 199L, 200L, 281L,
282L, 283L, 284L, 301L, 302L, 303L, 304L), class = "data.frame")
I plot it using the following code:
scales_y <- list(
"Angularity" = scale_y_continuous(limits = c(0.5,2)),
"Circularity" = scale_y_continuous(limits = c(2,5.5)),
"Formfactor" = scale_y_continuous(limits = c(0,0.5)),
"Roundness" = scale_y_continuous(limits = c(0.6,0.9))
)
g <- ggplot(test2, aes(x=stream, y=mean, color=mineral, group=mineral))
g <- g + geom_point()
g <- g + geom_line()
g <- g + theme_bw()
g <- g + theme(axis.text.x = element_text(size =8),
axis.ticks.x=element_blank(),
legend.position="bottom")
g <- g + scale_color_brewer(palette = "Paired")
g <- g + facet_grid_sc(rows = vars(shapefactor), scales = list(y = scales_y))
print(g)
This works fine. However, if I want to plot the shapefactor in columns instead of rows (so writing facet_grid_sc(cols = vars(shapefactor), scales = list(y = scales_y))), then I get this error message:
Error in .subset2(x, i, exact = exact) : attempt to select less
than one element in get1index
I'm probably writing this wrong, but I can't find in the help of the package how to write it properly. Can anyone help me please?
Thanks in advance!
Nath
I did not get your fancy facet_grid_sc to work, but here is an alternate, a bit hack-ey solution using cowplot:
library(tidyverse)
library(cowplot)
# split, not group by for the labels
out <- test2 %>% split(.,.$shapefactor) %>%
map( ~ggplot(.,aes(x=stream, y=mean, color=mineral, group=mineral)) +
geom_point() +
geom_line() +
theme_bw() +
theme(axis.text.x = element_text(size =8),
axis.ticks.x=element_blank(),
legend.position='none') +
scale_color_brewer(palette = "Paired") +
scales_y[[.$shapefactor[1]]])
# create Dummy for legend
dummy <- ggplot(test2,aes(x=stream, y=mean, color=mineral, group=mineral)) +
geom_point() +
geom_line()+
scale_color_brewer(palette = "Paired") +
theme(legend.position = 'bottom',legend.justification = 'center')
# add legend to list
out$' ' <- cowplot::get_legend(dummy)
cowplot::plot_grid(plotlist = out, ncol = 1,labels = names(out),axis = 'r', align = 'h')
It obviously needs a bit of formatting, but you get the jest.
plot_grid offers a lot of customizability for its labels, the legend has to be changed via the dummy-plot.

factor order when subsetting within ggplot

I have factors on x-axis and order those factor levels in a way that's intuitive to plot with ggplot. It works fine. However, when I use the subset command within ggplot, it re-orders my original sequence of factors.
Is it possible to do subsetting within ggplot and preserve the order of factor levels?
Here is the data and code:
library(ggplot2)
library(plyr)
dat <- structure(list(SubjectID = structure(c(12L, 4L, 6L, 7L, 12L,
7L, 5L, 8L, 14L, 1L, 15L, 1L, 7L, 1L, 7L, 5L, 4L, 2L, 9L, 6L,
7L, 13L, 12L, 2L, 15L, 3L, 5L, 13L, 13L, 10L, 7L, 8L, 10L, 10L,
1L, 10L, 12L, 7L, 6L, 10L), .Label = c("s001", "s002", "s003",
"s004", "s005", "s006", "s007", "s008", "s009", "s010", "s011",
"s012", "s013", "s014", "s015"), class = "factor"), Parameter = structure(c(7L,
3L, 5L, 3L, 6L, 4L, 6L, 7L, 7L, 4L, 7L, 12L, 8L, 11L, 1L, 4L,
3L, 4L, 6L, 4L, 6L, 6L, 12L, 5L, 12L, 1L, 7L, 13L, 11L, 1L, 4L,
1L, 6L, 13L, 10L, 10L, 10L, 13L, 5L, 8L), .Label = c("(Intercept)",
"c0.008", "c0.01", "c0.015", "c0.02", "c0.03", "PrevCorr1", "PrevFail1",
"c0.025", "c0.004", "c0.006", "c0.009", "c0.012", "c0.005"), class = "factor"),
Weight = c(0.0352725634087837, 1.45546697427904, 2.29457594510248,
0.479548914792514, 6.39680995359234, 1.48829600339586, 2.69253113220079,
-0.171219812386926, -0.453625394224277, 1.43732884325816,
0.742416863226952, 0.256935761466245, -0.29401087047524,
0.34653127811481, 0.33120592543102, 2.79213318878505, 2.47047299128637,
1.022450287681, 6.92891513416868, 0.648982326396105, 6.58336282626389,
6.40600461501379, 1.80062359655524, 3.86658202530889, 1.23833324887194,
-0.026560261876089, 0.121670468861011, 0.9290824087063, 0.349104382483186,
0.24722583823016, 1.82473621255801, -0.712668411699556, 6.51789901685784,
0.74682257127003, 0.0755807984938072, 0.131705709322157,
0.246465073382095, 0.876279316248929, 1.83442709571662, -0.579086982613267
)), .Names = c("SubjectID", "Parameter", "Weight"), row.names = c(2924L,
784L, 1537L, 1663L, 3138L, 1744L, 1266L, 1996L, 3548L, 86L, 3692L,
230L, 1613L, 213L, 1627L, 1024L, 832L, 384L, 2418L, 1568L, 1714L,
3362L, 3200L, 497L, 3632L, 683L, 1020L, 3281L, 3263L, 2779L,
1632L, 1995L, 2674L, 2753L, 312L, 2638L, 3198L, 1809L, 1569L,
2589L), class = "data.frame")
## Sort factors in the order that will make it intuitive to read the plot
## It goes, "(Intercept), "PrevCorr1", "PrevFail1", "c0.004", "c0.006", etc.
paramNames <- levels(dat$Parameter)
contrastNames <- sort(paramNames[grep("c0",paramNames)])
biasNames <- paramNames[!paramNames %in% contrastNames]
dat$Parameter <- factor(dat$Parameter, levels=c(biasNames, contrastNames))
## Add grouping parameter that will be used to plot different weights in different colors
dat$plotColor <-"Contrast"
dat$plotColor[dat$Parameter=="(Intercept)"] <- "Intercept"
dat$plotColor[grep("PrevCorr", dat$Parameter)] <- "PrevSuccess"
dat$plotColor[grep("PrevFail", dat$Parameter)] <- "PrevFail"
p <- ggplot(dat, aes(x=Parameter, y=Weight)) +
# The following command, which adds geom_line to data points of the graph, changes the order of levels
# If I uncomment the next line, the factor level order goes wrong.
#geom_line(subset=.(plotColor=="Contrast"), aes(group=1), stat="summary", fun.y="mean", color="grey50", size=1) +
geom_point(aes(group=Parameter, color=plotColor), size=5, stat="summary", fun.y="mean") +
geom_point(aes(group=Parameter), size=2.5, color="white", stat="summary", fun.y="mean") +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
print(p)
Here is the plot when geom line is commented
And here is what happens when geom_line is uncommented
If you switch the order in which you plot the objects, the problem disappears:
p <- ggplot(dat, aes(x=Parameter, y=Weight)) +
# The following command, which adds geom_line to data points of the graph, changes the order of levels
# If I uncomment the next line, the factor level order goes wrong.
geom_point(aes(group=Parameter, color=plotColor), size=5, stat="summary", fun.y="mean") +
geom_line(subset = .(plotColor == "Contrast"), aes(group=1), stat="summary", fun.y="mean", color="grey50", size=1) +
geom_point(aes(group=Parameter), size=2.5, color="white", stat="summary", fun.y="mean") +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
print(p)
I think the problem lies in plotting the subsetted data first, it ditches the levels for the original data, and when you add back in the points, it doesn't know where to put them. When you plot with the original data first, it maintains the levels. I'm not sure though, you might have to take my word on it.

Resources