I'm trying to find a solution in order to create small squares figure that I will incorporate into a bigger figer. So for instance if we take this dataframe;
a b c d
1 1 1 1 1
2 1 0 1 1
I would like to create 2 plots :
for row1
and
for row2
Here is the data
data<-structure(list(a = c(1, 1),
b = c(1, 0), c = c(1,
1), d = c(1, 1
)), class = "data.frame", row.names = c(NA, -2L))
does someone have an idea please ?
The first step is to convert the data into a tidy (long) format:
library(tidyverse)
df1<-structure(list(a = c(1, 1),
b = c(1, 0), c = c(1,
1), d = c(1, 1
)), class = "data.frame", row.names = c(NA, -2L))
df1_tidy <- df1 %>%
rowid_to_column("rowID") %>%
pivot_longer(names_to = "colID", values_to= "value", -rowID) %>%
mutate(rowID = factor(rowID, levels = sort(unique(rowID), decreasing = T)))
Then to plot you can use geom_tile with coord_fixed to make sure you end up with squares.
ggplot(df1_tidy, aes(x = colID, y = rowID)) +
geom_tile(aes(width = value*0.75,height = value *0.75 , fill = colID)) +
coord_fixed() +
scale_fill_manual(values = c("#53ae32", "#2e76b5", "#f2d355", "#d23c28")) +
theme_void() +
theme(legend.position = "none")
Or in base R:
#genrate blank plot with correct dimensions
plot(1, type="n", xlab="", ylab="", xlim=c(0,5), ylim=c(-5,0), bty="n", xaxt='n',yaxt='n', ann=FALSE, )
clrs <- c("#53ae32", "#2e76b5", "#f2d355", "#d23c28")
for(i in 1:nrow(df1)){
for(j in 1:ncol(df1)){
cat(i,j, v, "\n")
v <- df1[i,j]
rect(xleft = j, xright = j+0.9, ybottom = -i, ytop = -i-0.9, col = ifelse(v==0, "white", clrs[j]), border = NA)
}
}
As to "is there a way without transforming to a long format" - well, not really for ggplot2. If you want to use ggplot, you will want to map your variables to an aesthetic, which will take columns, not rows. But there is no real issue here - you can just create a new data frame and add this to the other plot.
Another option instead of tiles is to draw points in the shape of squares. I take from your question that you want to indeed create separate plots. I am using patchwork here for the convenience of showing the output, but you can of course leave them separate. The advantage of this approach - you would not be constricted to use coord_fixed.
Apparently you also want to draw nothing when the value is 0. Best to replace with NA, so ggplot will ignore this point.
Thanks user GordonShumway for the colors!
library(ggplot2)
library(patchwork)
df<-structure(list(a = c(1, 1), b = c(1, 0), c = c(1, 1), d = c(1, 1 )), class = "data.frame", row.names = c(NA, -2L))
df_long <- data.frame(t(df))
df_long$colors <- c("#53ae32", "#2e76b5", "#f2d355", "#d23c28")
df_long[df_long == 0] <- NA
p_ls <-
purrr::map(1:nrow(df), function(i){
ggplot(df_long, aes_string(1:4, paste0("X", i))) +
geom_point(aes(color = colors), shape = 15, size = 20, show.legend = FALSE) +
scale_color_identity() +
coord_cartesian(clip = "off", expand = FALSE) +
theme_void() +
theme(plot.margin = margin(r = 1, l = 1, unit = "in")) +
ggtitle(paste("Plot", i))
})
wrap_plots(p_ls) +plot_layout(nrow = 2)
#> Warning: Removed 1 rows containing missing values (geom_point).
Created on 2021-03-12 by the reprex package (v1.0.0)
Related
I have found it challenging to create a treemap using ggplot, and this blog example captured the issues very well and also provided a nice work around. The work around takes the output from the tree map package to create a ggplot version with geom_rect.
My problem and question is how to adjust the labels and, if I wanted to, colors by hierarchy as I have more groups than the linked example and have different labeling requirements.
Here is a reproducible simple example:
library(tidyverse)
library(treemap)
# Create dummy data
tree_data <- data.frame(
my_segment = c(
rep("seg_a", 5),
rep("seg_b", 6),
rep("seg_c", 7)),
my_class = c(
rep("class_1", 2),
rep("class_2", 2),
rep("class_3", 1),
rep("class_4", 2),
rep("class_5", 2),
rep("class_6", 2),
rep("class_7", 1),
rep("class_8", 3),
rep("class_9", 3)),
my_type = c(
rep("type_1", 7),
rep("type_2", 6),
rep("type_3", 5)),
vals = round(runif(18, min = 20, max = 100), 0)
)
Here is the head of the sample dataframe:
my_segment my_class my_type vals
1 seg_a class_1 type_1 86
2 seg_a class_1 type_1 41
3 seg_a class_2 type_1 23
4 seg_a class_2 type_1 79
5 seg_a class_3 type_1 33
6 seg_b class_4 type_1 82
7 seg_b class_4 type_1 85
8 seg_b class_5 type_2 40
9 seg_b class_5 type_2 83
10 seg_b class_6 type_2 69
11 seg_b class_6 type_2 98
12 seg_c class_7 type_2 91
13 seg_c class_8 type_2 33
The tree map package runs fine, but produces unreadable output in RStudio, and I'd like to be able to customize more with ggplot (similar to the linked article)
# Run treemap function
tree_p <- treemap(
tree_data,
index = c("my_segment", "my_class", "my_type"),
vColor = "my_segment",
vSize = "vals",
type = "index",
fontsize.labels = c(15, 12, 10),
fontcolor.labels = c("white", "orange", "green"),
fontface.labels = c(2, 1, 1),
bg.labels = 0,
align.labels = list(
c("center", "center"),
c("right", "bottom"),
c("left", "bottom")
),
overlap.labels = 0.5,
inflate.labels = FALSE
)
# Note: unreadable output in Rstudio (too small)
Using the workaround in this blog, but adding an additional hierarchy and wanted to change the labeling is where the problem comes in.
# Create the plot in ggplot using geom_rect
# Get underlying data created from running treemap
tm_plot_data <- tree_p$tm %>%
mutate(x1 = x0 + w,
y1 = y0 + h) %>%
mutate(x = (x0+x1)/2,
y = (y0+y1)/2) %>%
mutate(
primary_group = case_when(
level == 1 ~ 1.5,
level == 2 ~ 0.75,
TRUE ~ 0.5
)
)
# Plot
ggplot(tm_plot_data, aes(xmin = x0, ymin = y0, xmax = x1, ymax = y1)) +
# add fill and borders for groups and subgroups
geom_rect(aes(fill = color, size = primary_group),
show.legend = FALSE,
color = "black",
alpha = 0.3
) +
scale_fill_identity() +
# set thicker lines for group borders
scale_size(range = range(tm_plot_data$primary_group)) +
# add labels
ggfittext::geom_fit_text(aes(label = my_segment), color = "white", min.size = 1) +
ggfittext::geom_fit_text(aes(label = my_class), color = "blue", min.size = 1) +
ggfittext::geom_fit_text(aes(label = my_type), color = "red", min.size = 1) +
# options
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme_void()
So the question I have is there a way to create the labeling like treemap? Specifically, seg_a, seg_b, and seg_c should only appear once, centered over the area of their respective segments. I'd also like to move the labels so that they do not overlap
Thanks for any help and suggestions!
The issue is that you use your full dataset tm_plot_data to add the labels. Hence, for each upper level you you get multiple labels. To solve this issue aggregate your datasets and pass these datasets as data to ggfittext::geom_fit_text. To deal with overlapping labels you could e.g. use the place argument of ggfittext::geom_fit_text to move the class labels to the bottom left and the type labels to the topright.
library(tidyverse)
library(treemap)
set.seed(123)
tm_seg <- tm_plot_data %>%
group_by(my_segment) %>%
summarise(x0 = min(x0), y0 = min(y0), y1 = max(y1), x1 = max(x1)) %>%
ungroup()
tm_class <- tm_plot_data %>%
group_by(my_segment, my_class) %>%
summarise(x0 = min(x0), y0 = min(y0), y1 = max(y1), x1 = max(x1)) %>%
ungroup()
tm_type <- tm_plot_data %>%
group_by(my_segment, my_class, my_type) %>%
summarise(x0 = min(x0), y0 = min(y0), y1 = max(y1), x1 = max(x1)) %>%
ungroup()
# Plot
ggplot(tm_plot_data, aes(xmin = x0, ymin = y0, xmax = x1, ymax = y1)) +
# add fill and borders for groups and subgroups
geom_rect(aes(fill = color, size = primary_group),
show.legend = FALSE,
color = "black",
alpha = 0.3
) +
scale_fill_identity() +
# set thicker lines for group borders
scale_size(range = range(tm_plot_data$primary_group)) +
# add labels
ggfittext::geom_fit_text(data = tm_seg, aes(label = my_segment), color = "white", min.size = 4) +
ggfittext::geom_fit_text(data = tm_class, aes(label = my_class), color = "blue", min.size = 1, place = "bottomleft") +
ggfittext::geom_fit_text(data = tm_type, aes(label = my_type), color = "red", min.size = 1, place = "topright") +
# options
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme_void()
#> Warning: Removed 3 rows containing missing values (geom_fit_text).
#> Warning: Removed 12 rows containing missing values (geom_fit_text).
I want to create a timeline plot that roughly resembles the example below: lots of overlap at some points, not a lot of overlap at others.
What I need: overlapping images should repel each other where necessary, eliminating or reducing overlap. Ideally I'd be able to implement either a vertical or horizontal repel.
library(tidyverse)
library(ggimage)
test_img <- list.files(system.file("extdata", package="ggimage"), pattern="png", full.names=TRUE)
set.seed(123)
df <-
tibble(date = as.Date(paste0("2020-", round(runif(45, 1, 2)), "-", round(runif(45, 1, 10)))),
group = paste0("Timeline ", rep(1:9, each = 5)),
img = sample(test_img, size = 45, replace = T) )
df %>%
ggplot() +
geom_line(aes(x = date, y = group, group = group), size = 5, alpha = 0.2) +
geom_image(aes(x = date, y = group, image = img, group = group), asp = 1)
Something similar to the repelling in ggbeeswarm::geom_beeswarm or ggrepel::geom_text_repel would be nice, but those don't support images. So I think I need to pre-apply some kind of 1-dimensional packing algorithm, implementing iterative pair-wise repulsion on my vector of dates within each group, to try to find a non-overlapping arrangement.
Any ideas? Thank you so much!
Created on 2021-10-30 by the reprex package (v2.0.1)
Here is the solution I’ve been able to come up with, repurposing the circleRepelLayout function from the awesome packcircles package
into the repel_vector vector function that takes in your overlapping vector and a "repel_radius", and returns, if possible, a non-overlapping version.
I demonstrate the solution with the richtext geom since this is a geom I’ve always wished had repel functionality.
library(packcircles)
library(tidyverse)
library(ggtext)
library(ggimage)
repel_vector <- function(vector, repel_radius = 1, repel_bounds = range(vector)){
stopifnot(is.numeric(vector))
repelled_vector <-
packcircles::circleRepelLayout(x = data.frame(vector, ypos = 1, repel_radius),
xysizecols = c("vector", "ypos", "repel_radius"),
xlim = repel_bounds, ylim = c(0,1),
wrap = FALSE) %>%
as.data.frame() %>%
.$layout.x
return(repelled_vector)
}
overlapping_vec <- c(1, 1.1, 1.2, 10, 10.1, 10.2)
repelled_vec_default <- repel_vector(overlapping_vec)
repelled_vec_tighter <- repel_vector(overlapping_vec, repel_radius = 0.35)
ggplot() +
annotate("richtext", x = overlapping_vec, y = 3, label = "**test**", alpha = 0.5) +
annotate("richtext", x = repelled_vec_default, y = 2, label = "**test**", alpha = 0.5) +
annotate("richtext", x = repelled_vec_tighter, y = 1, label = "**test**", alpha = 0.5) +
scale_y_continuous(breaks = 1:3, labels = c("Tighter repel", "Default repel", "Overlapping points"))
In theory you apply this to 2D repelling as well.
To solve the problem in my question, this can be applied like so:
test_img <- list.files(system.file("extdata", package="ggimage"), pattern="png", full.names=TRUE)
set.seed(123)
df <-
tibble(date = as.Date(paste0("2020-", round(runif(45, 1, 2)), "-", round(runif(45, 1, 10)))),
group = paste0("Timeline ", rep(1:9, each = 5)),
img = sample(test_img, size = 45, replace = T) ) %>%
group_by(group) %>%
mutate(repelled_date = repel_vector(as.numeric(date),
repel_radius = 4,
repel_bounds = range(as.numeric(date)) + c(-3,3)),
repelled_date = as.Date(repelled_date, origin = "1970-01-01"))
df %>%
ggplot() +
geom_line(aes(x = date, y = group, group = group), size = 5, alpha = 0.2) +
geom_image(aes(x = repelled_date, y = group, image = img, group = group), asp = 1)
Created on 2021-10-30 by the reprex package (v2.0.1)
I am trying to label 4 lines grouped by the value of variable cc. To label the lines I use ggrepel but I get all the 4 labels instead of 2 for each graph. How to correct this error?
The location of the labels is in this example at the last date but I want something more flexible: I want to locate each of the 4 labels in specific points that I chose (e.g. b at date 1, a at date 2, etc.). How to do that?
library(tidyverse)
library(ggrepel)
library(cowplot)
set.seed(1234)
df <- tibble(date = c(rep(1,4), rep(2,4), rep(3,4), rep(4,4)),
country = rep(c('a','b','c','d'),4),
value = runif(16),
cc = rep(c(1,1,2,2),4))
df$cc <- as.factor(df$cc)
# make list of plots
ggList <- lapply(split(df, df$cc), function(i) {
ggplot(i, aes(x = date, y = value, color = country)) +
geom_line(lwd = 1.1) +
geom_text_repel(data = subset(df, date == 4),
aes(label = country)) +
theme(legend.position = "none")
})
# plot as grid in 1 columns
cowplot::plot_grid(plotlist = ggList, ncol = 1,
align = 'v', labels = levels(df$cc))
Created on 2021-08-18 by the reprex package (v2.0.0)
Here I make a tibble to hold color and position preferences, and join that to df.
The geom_text_repel line should probably use i instead of df so that it's split the same way as the line. The only trouble is this forces us to specify that we want four colors up front, since otherwise each chart would just use the two it needs.
set.seed(1234)
df <- tibble(date = c(rep(1,4), rep(2,4), rep(3,4), rep(4,4)),
country = rep(c('a','b','c','d'),4),
value = runif(16),
cc = rep(c(1,1,2,2),4))
label_pos <- tibble(country = letters[1:4],
label_pos = c(2, 1, 3, 2),
color = RColorBrewer::brewer.pal(4, "Set2")[1:4])
df <- df %>% left_join(label_pos)
df$cc <- as.factor(df$cc)
# make list of plots
ggList <- lapply(split(df, df$cc), function(i) {
ggplot(i, aes(x = date, y = value, color = color)) +
geom_line(lwd = 1.1) +
geom_text_repel(data = subset(i, date == label_pos),
aes(label = country), box.padding = unit(0.02, "npc"), direction = "y") +
scale_color_identity() +
theme(legend.position = "none")
})
# plot as grid in 1 columns
cowplot::plot_grid(plotlist = ggList, ncol = 1,
align = 'v', labels = levels(df$cc))
I just encountered such graph attached where two colors of geom_point are used (I believe it is made by ggplot2). Similarly, I would like to have dots of one color to range from size 1 to 5, and have another color for a series of dots for the range 10 to 50. I have however no clue on how to add two different ranges of point in one graph.
At the basic step I have:
a <- c(1,2,3,4,5)
b <- c(10,20,30,40,50)
Species <- factor(c("Species1","Species2","Species3","Species4","Species5"))
bubba <- data.frame(Sample1=a,Sample2=b,Species=Species)
bubba$Species=factor(bubba$Species, levels=bubba$Species)
xm=melt(bubba,id.vars = "Species", variable.name="Samples", value.name = "Size")
str(xm)
ggplot(xm,aes(x= Samples,y= fct_rev(Species)))+geom_point(aes(size=Size))+scale_size(range = range(xm$Size))+theme_bw()
Any would have clues where I should look into ? Thanks!
I've got an approach that gets 90% of the way there, but I'm not sure how to finish the deed. To get a single legend for size, I used a transformation to convert input size to display size. That makes the legend appearance conform to the display. What I don't have figured out yet is how to apply a similar transformation to the fill so that both can be integrated into the same legend.
Here's the transformation, which in this case shrinks everything 10 or more:
library(scales)
shrink_10s_trans = trans_new("shrink_10s",
transform = function(y){
yt = if_else(y >= 10, y*0.1, y)
return(yt)
},
inverse = function(yt){
return(yt) # Not 1-to-1 function, picking one possibility
}
)
Then we can use this transformation on the size to selectively shink only the dots that are 10 or larger. This works out nicely for the legend, aside from integrating the fill encoding with the size encoding.
ggplot(xm,aes(x= Samples,y= fct_rev(Species), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,2,3,10,20,30,40),
labels = c(1,2,3,10,20,30,40)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
bubba$Species <- factor(bubba$Species, levels = bubba$Species)
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = Size, color = Size)) +
scale_color_continuous(breaks = c(1,2,3,10,20,30), guide = guide_legend()) +
scale_size(range = range(xm$Size), breaks = c(1,2,3,10,20,30)) +
theme_bw()
Here's a cludge. I haven't got time to figure out the legend at the moment. Note that 1 and 10 are the same size, but a different colour, as are 3 and 40.
# Create data frame
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
# Restructure data
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
# Calculate bubble size
bubble_size <- function(val){
ifelse(val > 3, (1/15) * val + (1/3), val)
}
# Calculate bubble colour
bubble_colour <- function(val){
ifelse(val > 3, "A", "B")
}
# Calculate bubble size and colour
xm %<>%
mutate(bub_size = bubble_size(Size),
bub_col = bubble_colour(Size))
# Plot data
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = bub_size, fill = bub_col), shape = 21, colour = "black") +
theme(panel.grid.major = element_line(colour = alpha("gray", 0.5), linetype = "dashed"),
text = element_text(family = "serif"),
legend.position = "none") +
scale_size(range = c(1, 20)) +
scale_fill_manual(values = c("brown", "pink")) +
ylab("Species")
I think you are looking for bubble plots in R
https://www.r-graph-gallery.com/bubble-chart/
That said, you probably want to build the right and left the side of the graphic separately and then combine.
I have the following dataset
SIZE ALG1 ALG2 ALG3
A 2 3 5
A 3 2 1
A 1 2 2
B 10 10 11
B 12 12 12
I'd like a plot having as a horizontal axis the SIZE column and as Y values the series ALG1 ALG2 ALG3.
E.g.
How can I get it wit Excel (actually OOCALC) or R?
In general I get the SIZE values repeated on the x-axis, which appears then much longer.
TIA
Suppose your data frame is called df1.
You have to convert it in a long format using reshape2::melt
melt(df1) -> dfmelt
library(ggplot2)
ggplot(data=dfmelt, aes(x=SIZE, y=value)) + geom_jitter(aes(color=variable))
or, more simply and quickly:
qplot(data=dfmelt, x=SIZE, y=value, geom="jitter", color=variable)
If you just want the points without "jittering" them:
qplot(data=dfmelt, x=SIZE, y=value, geom="point", color=variable)
In R you could do it as follows:
df <- data.frame(
size = c(rep('A', 3), rep('B', 2)),
alg1 = c(2,3,1,10,12),
alg2 = c(3,2,2,10,12),
alg3 = c(5,1,2,11,12)
)
plot(alg1 ~ jitter(as.numeric(size)), data = df,
xlim = c(0.5, 2.5), ylim = c(0, 14), col = 'red',
axes = FALSE, xlab = 'Size', ylab = 'Alg')
points(alg2 ~ jitter(as.numeric(size)), data = df, col = 'blue')
points(alg3 ~ jitter(as.numeric(size)), data = df, col = 'black')
axis(side = 2)
axis(side = 1, at = c(1, 2), labels = c('A', 'B'))
legend(x = 2, y = 4, legend = c('alg1', 'alg2', 'alg3'),
col = c('red', 'blue', 'black'), lty = 1)