Related
I am trying to add labels to sit above box plots. For example, in this example, instead of NA, I would want the label above A to say "total number of var3 = 11" and over B "total number of var3 = 34". In my real data, numbers are produced, but they bear no relation to the original data set (I cannot work out how they could possibly be calculated from the original data, so I must be doing something wrong!).
var1<- c("A", "B", "A", "B", "B", "B", "A", "B", "B")
var2<- as.numeric(c(4:12))
var3<- as.numeric(c(1:9))
df<- data.frame(var1, var2, var3)
stat_box_data <- function(y, upper_limit = max(df$var2) * 1.15 ) {
return(
data.frame(
y = 0.95* upper_limit,
label = paste('number of var1 =', length(y), '\n',
'total number of var3 =', sum(df$var3[y])
)
)
)
}
ggplot(df, aes(var1, var2)) +
geom_boxplot() +
stat_summary( fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9)
df%>% group_by (var1) %>% summarise (sum = sum(var3))
Link to graph
Thanks to original post for code here https://gscheithauer.medium.com/how-to-add-number-of-observations-to-a-ggplot2-boxplot-b22710f7ef80
You could get the result you want using this rather convoluted method.
library(dplyr)
library(ggplot2)
var1<- c("A", "B", "A", "B", "B", "B", "A", "B", "B")
var2<- as.numeric(c(4:12))
var3<- as.numeric(c(1:9))
df<- data.frame(var1, var2, var3)
stat_box_data <- function(y, upper_limit = max(df$var2) * 1.15) {
return(
data.frame(
y = 0.95* upper_limit,label = paste('count =', length(y), '\n',
'mean =', sum(df$var3[match(y, df$var2)]), '\n'
)
)
)
}
d<-df%>% group_by (var1) %>% summarise (sum = sum(var3)) %>% pull(sum)
ggplot(df, aes(var1, var2)) +
geom_boxplot() +
stat_summary(fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9)
With ggplot2 I want to plot two vectors (vec1_num, vec2_num) in two dimensions and colour the points by a group variable (vec3_char). Some data points are overlapping.
library(ggplot2)
vec1_num = c(1,2,3,4,1,3,4,5,5,5)
vec2_num = c(1,2,3,4,1,3,4,5,5,5)
vec3_char = c("A", "B", "C", "A", "B", "C", "C", "A", "B", "C")
# plot 1
ggplot(data = NULL) +
geom_point(aes(x=vec1_num, y=vec2_num, colour=vec3_char), alpha=0.4, size=4) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "C"="orange")) +
theme(panel.grid = element_blank())
I know I can attenuate the overlap by reducing alpha or working with geom_jitter adding a bit of noise. Like this:
# plot 2
ggplot(data = NULL) +
geom_jitter(aes(x=vec1_num, y=vec2_num, colour=vec3_char), alpha=0.4, size=4, width = 0.1) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "C"="orange")) +
theme(panel.grid = element_blank())
However, is it possible to make use of plot 1 but colour the overlapping points differently? So that, for example, "A" = "darkblue, "AB" = "black", "ABC" = "grey", "B" = "darkred", "BC" = "pink", "C"="orange"? And can I additionally add a small Venn Diagram (legend) that visualises the color choice for the point overlap?
Thanks!
My way of doing this would be to convert the letters into numbers, sum them and covert back into letters.
NB The one complication is that the letters need to be A, B, D, H, ... so there is only one way of making each number combination. Though there is probably a way to start with A, B, C, ... and encode for unique values
library(tidyverse)
vec1_num = c(1,2,3,4,1,3,4,5,5,5)
vec2_num = c(1,2,3,4,1,3,4,5,5,5)
vec3_char = c("A", "B", "D", "A", "B", "D", "D", "A", "B", "D")
removeDup <- function(str) paste(rle(strsplit(str, "")[[1]])$values, collapse="") # Function to remove duplicated values in a string
data <- data.frame(x = vec1_num, y = vec2_num, col = match(vec3_char, LETTERS))
data <- data %>%
group_by(x) %>%
mutate(colour = glue::glue_collapse(col, sep = "")) %>%
select(-col) %>%
distinct(x, y, .keep_all = TRUE) %>%
mutate(colour = removeDup(colour)) %>%
mutate(colour = sapply(str_extract_all(colour, '\\d'), function(x) sum(as.integer(x)))) %>%
mutate(colour = case_when(
colour == 1 ~ "A",
colour == 2 ~ "B",
colour == 3 ~ "AB",
colour == 4 ~ "D",
colour == 5 ~ "AD",
colour == 6 ~ "BD",
colour == 7 ~ "ABD"
))
# plot 1
ggplot(data) +
geom_point(aes(x=x, y=y, colour = as_factor(colour)), alpha=0.4, size=4) +
geom_text(aes(x = x, y = y, label = colour), vjust = 2) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "AB"="orange", "D" = "green", "AD" = "black", "BD" = "orange", "ABD" = "purple"), name = "Colour") +
theme(panel.grid = element_blank())
.
I would firstly create a dataframe. Then I would extract for every x y combination (list(df$vec1_num, df$vec2_num)) what characters are present (...unique(xy_i$vec3_char)...). Like this:
df <- data.frame(vec1_num, vec2_num, vec3_char)
df_new <- do.call("rbind.data.frame", by(df, list(df$vec1_num, df$vec2_num), function(xy_i){
chars_i <- paste0(sort(unique(xy_i$vec3_char)),collapse= "")
xy_i$chars_comb <- factor(chars_i, levels= c("A", "AB", "AC", "ABC", "B", "BC", "C"))
xy_i
}))
If you now make the plot it shows you what characters overlap at which point.
ggplot(data = df_new) +
geom_point(aes(x=vec1_num, y=vec2_num, colour=chars_comb), alpha=0.4, size=4) +
scale_colour_manual(values=c("AB" = "black", "ABC" = "grey", "B" = "darkred", "C"="orange", "AC"= "red")) +
theme(panel.grid = element_blank())
I've created a dodged bar chart in ggplot2 with geom_col(). The code looks like this:
cat <- c("A", "A", "A", "A","B", "B", "B", "B")
var <- c("X", "Y", "Z", "T", "X", "Y", "Z", "T")
val <- c(35, 25, 20, 20, 40, 10, 15, 35)
df <- data.frame(var, cat, val)
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = cat), position = "dodge")
This produces the following plot:
I would like each variable to have a different filling colour, for example T = Green, X = Blue etc. and still keep a colour separation between the categories, for example T-A = darkgreen, T-B = lightgreen, X-A = darkblue, X-B = lightblue etc.
Is there an easy way to add this feature?
Thanks!
I think the easiest way to do what you're asking is to use the alpha scale:
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = var, alpha = cat),
position = "dodge") +
scale_alpha_discrete(range = c(0.5, 1), guide = guide_none()) +
theme_classic()
If you really want to use a grid in the background and don't want to see lines through the pale bars, make sure you plot some white bars of the same dimension underneath:
ggplot(data = df) +
geom_col(aes(x = var, y = val, group = cat),
position = "dodge", fill = "white", alpha = 1) +
geom_col(aes(x = var, y = val, fill = var, alpha = cat),
position = "dodge") +
scale_alpha_discrete(range = c(0.5, 1), guide = guide_none())
Maybe this can be useful:
library(ggplot2)
#Data
cat <- c("A", "A", "A", "A","B", "B", "B", "B")
var <- c("X", "Y", "Z", "T", "X", "Y", "Z", "T")
val <- c(35, 25, 20, 20, 40, 10, 15, 35)
df <- data.frame(var, cat, val)
#Plot
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = interaction(var,cat)), position = "dodge")+
labs(fill='Var')
Output:
You can customize colors with scale_fill_*(). Here an example using a fill scale from ggsci package:
#Plot 2
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = interaction(var,cat)), position = "dodge")+
labs(fill='Var')+
ggsci::scale_fill_futurama()
Output:
I have a time series (albeit small scale i.e., 4 days) dataset that consists of four groups, say, types of granola. Some of the ingredients are shared among the types of granola, so that granola "A" consists of ingredient "x", "y", "c", "f" and granola "B" consists of "g", "t", "f", "e". Granola "A" and Granola "B" both contain ingredient "f"
I am trying to plot how much of each ingredient an animal eats from the offered granola sample on each of the days.
I've tried using the 'group' call in 'geom_raster()' but that has no effect.
Here is some sample data:
'test.df <- data.frame(day = c(rep(1, 4), rep(2, 4), rep(3,4), rep(4,4), rep(1,5), rep(2,5), rep(3,5), rep(4,5),
rep(1, 3), rep(2, 3), rep(3,3), rep(4,3), rep(1, 4), rep(2, 4), rep(3,4), rep(4,4)),
spp = rep("species A"),
type = c(rep("A", 16), rep("B", 20), rep("C", 12), rep("D", 16)),
comp = c(rep(c("ss", "pe", "mi", "li"),4), rep(c("co", "csf", "cd", "ah", "mi"),4),
rep(c("cd", "cgf", "ah"),4),
rep(c("di", "csf", "xi", "ef"),4)),
value = sample(0.0:60.0, 64, replace = T))
ggplot(test.df) +
geom_raster(aes(x = day, y = comp, group= type, fill = value)) +
scale_fill_gradientn(colours = rainbow(10)) +
coord_equal()
I'm hoping to get the y-axis to display each component by group such that like components are not combined.
UPDATE:
I made an object for each group, then plotted each out. The end result I am looking for would have the four plots stacked to make one plot.
A <- test.df %>%
filter(type == "A")
B <- test.df %>%
filter(type == "B")
C <- test.df %>%
filter(type == "C")
D <- test.df %>%
filter(type == "D")
ggplot(A) +
geom_raster(aes(x =day, y = comp, fill = value)) +
scale_fill_gradientn(colours = rainbow(10)) + coord_equal() +
ylab("A")
ggplot(B) +
geom_raster(aes(x =day, y = comp, fill = value)) +
scale_fill_gradientn(colours = rainbow(10)) + coord_equal() +
ylab("B")
ggplot(C) +
geom_raster(aes(x =day, y = comp, fill = value)) +
scale_fill_gradientn(colours = rainbow(10)) + coord_equal() +
ylab("C")
ggplot(D) +
geom_raster(aes(x =day, y = comp, fill = value)) +
scale_fill_gradientn(colours = rainbow(10)) + coord_equal() +
ylab("D")
I recognize that this has been an issue that's been asked in many other instances, but none of the solutions provided worked for my particular problem.
Here, I have the following data:
library(tidyverse)
library(scales)
mydata <- tibble(Category = c("A", "B", "C", "D"),
Result = c(0.442, 0.537, 0.426, 0.387),
A = c(NA, "A", NA, NA),
B = rep(NA, 4),
C = c(NA, "C", NA, NA),
D = c("D", "D", NA, NA))
mydata$Category <- factor(mydata$Category)
And I have the following vector for the colors:
colors_vct <- c(A = "#0079c0", B = "#cc9900", C = "#252525", D = "#c5120e")
With this information, I can create the following plot:
p <- ggplot(data = mydata , aes(x = Category, y = Result, fill = Category)) +
geom_bar(stat = "identity") + geom_text(aes(label = percent(Result), color = Category), hjust = -.25) +
coord_flip() + scale_y_continuous(limits = c(0,1), labels = percent) +
scale_colour_manual(values = colors_vct) + scale_fill_manual(values = colors_vct)
p
And I'd like to have little triangles appear after the labels based on whether a certain category is mentioned in the last 4 columns of mydata, colored by that category's color, as so:
p <- p + geom_text(data = filter(mydata, mydata[,3] == "A"), aes(label = sprintf("\u25b2")), colour = colors_vct["A"], hjust = -4)
#p <- p + geom_text(data = filter(mydata, mydata[,4] == "B"), aes(label = sprintf("\u25b2")), colour = colors_vct["B"], hjust = -5) #This is commented out because there are no instances where the layer ends up being applied.
p <- p + geom_text(data = filter(mydata, mydata[,5] == "C"), aes(label = sprintf("\u25b2")), colour = colors_vct["C"], hjust = -6)
p <- p + geom_text(data = filter(mydata, mydata[,6] == "D"), aes(label = sprintf("\u25b2")), colour = colors_vct["D"], hjust = -7)
p
This is what I want the final chart to look like (more or less, see bonus question below). Now, I'd like to iterate the last bit of code using a for loop. And this is where I'm running into trouble. It just ends up adding one layer only. How do I make this work? Here is my attempt:
#Set the colors into another table for matching:
colors_tbl <- tibble(Category = levels(mydata$Category),
colors = c("#0079c0", "#cc9900", "#252525", "#c5120e"))
for (i in seq_along(mydata$Category)) {
if (is_character(mydata[[i]])) { #This makes the loop skip if there is nothing to be applied, as with category B.
#Filters to just the specific categories I need to have the triangles shown.
triangles <- filter(mydata, mydata[,(i+2)] == levels(mydata$Category)[i])
#Matches up with the colors_tbl to determine which color to use for that triangle.
triangles <- mutate(triangles, colors = colors_tbl$colors[match(levels(triangles$Category)[i], colors_tbl$Category)])
#Sets a particular position for that triangle for the hjust argument below.
pos <- -(i+3)
#Adding the layer to the plot object
p <- p + geom_text(data = triangles, aes(label = sprintf("\u25b2")), color = triangles$colors, hjust = pos)
}
}
p
:(
Bonus question: Is there a way I can avoid gaps in between the triangles, as per the 2nd chart?
EDIT: As per #baptiste 's suggestion, I re-processed the data as such:
mydata2 <- mydata %>% gather(key = comp, value = Present, -Result, -Category)
mydata2 <- mydata2 %>% mutate(colors = colors_tbl$colors[match(mydata2$Present, colors_tbl$Category)]) %>%
filter(!is.na(mydata2$Present)) %>% select(-comp)
mydata2 <- mydata2 %>% mutate(pos = if_else(Present == "A", -4, if_else(Present == "B", -5, if_else(Present == "C", -6, -7))))
p <- p + geom_text(data = mydata2, aes(x = Category, label = sprintf("\u25b2")), colour = mydata2$colors, hjust = mydata2$pos)
p
Ok, I got it to work. my bonus question still stands.