ordering of labels with bar and point plot - r

I have a bar plot using geom_bar() that I'd like to overlay points using geom_point(). The issues is the ordering of the axis labels. I have 2 groups, group A which I want to show with geom_bar() ordered from high to low and group B which I want to show with points using geom_bar. Group A and B will not always have the same categories but I always want group A shown with bars and ordered from high to low. and
If you run this code you will see just the bar plot correctly ordered. I need the pet supercategory shown first and then the car category. I have defined supercategory as an ordered factor and it is working.
Then within the supercategory, the bars are sorted by gorup A's value form high to low. you can see in the pet category dog is higher than the others and kia is higher than the others in the car category.
library(dplyr)
group = c("A","A","A","B","B","B","A","A","A","B","B","B")
supercategory = c("pet", "pet","pet","pet","pet","pet","car","car","car","car","car","car")
category = c("bird","cat","dog","bird","cat","lizard","ford","chevy","kia","kia","toyota","ford")
supercategory = factor(supercategory, levels= c("pet", "car"), ordered = TRUE)
value=c(3,4,5,4,5,6,1,3,10,8,3,5)
dat = data.frame(group = group,supercategory = supercategory, category = category, value = value )
dat = dat %>% mutate(LABEL = paste0(supercategory, "-",category), HIGH_VALUE = ifelse(group =="A",value,0)) %>%
arrange(supercategory, -HIGH_VALUE)
# after the lines above the data is ordered correctly. first by supercategory then by group A's value from higest to lowest using the HIGH_VALUE field
dat$ROW_NUMBER = 1:nrow(dat)
dat = dat %>% group_by(supercategory,category) %>% mutate(ROW_NUMBER2= min(ROW_NUMBER)) %>% arrange( supercategory ,ROW_NUMBER2)
# after the 2 lines above now the data is sorted by ROW_NUMBER2 which orders the category within supercategory.
# Group A will be be in bars using geom_bar
# group B will be displayed iwht points using geom_point
# The bars and points should be in the order of ROW_NUMBER2
library(ggplot2)
dat$LABEL = factor(dat$LABEL, levels = unique(dat$LABEL), ordered = TRUE)
ggplot(dat[dat$group=="A",] , aes(x = LABEL, y = value))+
geom_bar(stat="identity")
I'd like to keep the ordering of the plot above and just add the points above the bars. And if Group B has a category that is not one of Group A's the point should be to the right of Group A's last bar within whatever supercategory it is in.
But when I try to add the points the ordering gets messed up. Run this code which just adds group B's data as points and you will see the order of the labels gets messed up.
library(ggplot2)
dat$LABEL = factor(dat$LABEL, levels = unique(dat$LABEL), ordered = TRUE)
ggplot(dat[dat$group=="A",] , aes(x = LABEL, y = value))+
geom_bar(stat="identity") +
geom_point(data = dat[dat$group=="B",], aes(x = LABEL, y = value), shape=15, size = 3, color = "blue" )
How can I add this line to the plot:
geom_point(data = dat[dat$group=="B",], aes(x = LABEL, y = value), shape=15, size = 3, color = "blue" )
while keeping group A's ordering?

Each group have not the same set of values, then you have to force the X axis order by adding:
+ scale_x_discrete(limits=dat$LABEL)
Then:
ggplot(data = dat , aes(x = LABEL, y = value) ) +
geom_bar(data = dat[dat$group=="A",], stat="identity") +
geom_point(data = dat[dat$group=="B",], shape=15, size = 3, color = "blue") +
scale_x_discrete(limits=dat$LABEL)

I agree with #Cédric Miachon.
There is a problem of using different x.
A possible way to change the behaviour is to introduce NA's to the not present x:
require(reshape2)
require(dplyr)
require(tidyr)
vector_f <- unique(dat$LABEL)
dat1 <- dat %>%
dcast(group+supercategory~LABEL, value.var = 'value') %>% #casting and gathering
gather(label, value , 3:10)
ggplot() +
geom_bar(data = dat1[dat1$group=="A",],aes(x = factor(label, levels = vector_f), y = value), stat="identity") +
geom_point(data = dat1[dat1$group=="B",], aes(x = factor(label, levels = vector_f), y = value))
##I removed some of the geom_point layout specs

Related

plotly and ggplot legend order interaction

I have multiple graphs that I am plotting with ggplot and then sending to plotly. I set the legend order based the most recent date, so that one can easily interpret the graphs. Everything works great in generating the ggplot, but once I send it through ggplotly() the legend order reverts to the original factor level. I tried resetting the factors but this creates a new problem - the colors are different in each graph.
Here's the code:
Data:
Country <- c("CHN","IND","INS","PAK","USA")
a <- data.frame("Country" = Country,"Pop" = c(1400,1300,267,233,330),Year=rep(2020,5))
b <- data.frame("Country" = Country,"Pop" = c(1270,1000,215,152,280),Year=rep(2000,5))
c <- data.frame("Country" = Country,"Pop" = c(1100,815,175,107,250),Year=rep(1990,5))
Data <- bind_rows(a,b,c)
Legend Ordering Vector - This uses 2020 as the year to determine order.
Legend_Order <- Data %>%
filter(Year==max(Year)) %>%
arrange(desc(Pop)) %>%
select(Country) %>%
unlist() %>%
as.vector()
Then I create my plot and use Legend Order as breaks
Graph <- Data %>%
ggplot() +
geom_line(aes(x = Year, y = Pop, group = Country, color = Country), size = 1.2) +
scale_color_discrete(name = 'Country', breaks = Legend_Order)
Graph
But then when I pass this on to:
ggplotly(Graph)
For some reason plotly ignores the breaks argument and uses the original factor levels.
If I set the factor levels beforehand, the color schemes changes (since the factors are in a different order).
How can I keep the color scheme from graph to graph, but change the legend order when using plotly?
Simply recode your Conutry var as factor with the levels set according to Legend_Order. Try this:
library(plotly)
library(dplyr)
Country <- c("CHN","IND","INS","PAK","USA")
a <- data.frame("Country" = Country,"Pop" = c(1400,1300,267,233,330),Year=rep(2020,5))
b <- data.frame("Country" = Country,"Pop" = c(1270,1000,215,152,280),Year=rep(2000,5))
c <- data.frame("Country" = Country,"Pop" = c(1100,815,175,107,250),Year=rep(1990,5))
Data <- bind_rows(a,b,c)
Legend_Order <- Data %>%
filter(Year==max(Year)) %>%
arrange(desc(Pop)) %>%
select(Country) %>%
unlist() %>%
as.vector()
Data$Country <- factor(Data$Country, levels = Legend_Order)
Graph <- Data %>%
ggplot() +
geom_line(aes(x = Year, y = Pop, group = Country, color = Country), size = 1.2)
ggplotly(Graph)
To "lock in" the color assignment you can make use of a named color vector like so (for short I only show the ggplots):
# Fix the color assignments using a named color vector which can be assigned via scale_color_manual
cols <- scales::hue_pal()(5) # Default ggplot2 colors
cols <- setNames(cols, Legend_Order) # Set names according to legend order
# Plot with unordered Countries but "ordered" color assignment
Data %>%
ggplot() +
geom_line(aes(x = Year, y = Pop, color = Country), size = 1.2) +
scale_color_manual(values = cols)
# Plot with ordered factor
Data$Country <- factor(Data$Country, levels = Legend_Order)
Data %>%
ggplot() +
geom_line(aes(x = Year, y = Pop, color = Country), size = 1.2) +
scale_color_manual(values = cols)

Can I control the order of multiple labels for the same value using geom_text_repel?

I'm making a plot where several data points have the same coordinates. By default, the labels all overlap, but using geom_text_repel with direction = "y", I can vertically space them out.
However, every time I generate the plot, it chooses a new order for the labels. I would like them to be ordered based on a value.
I have tried:
using "arrange" to order the dataframe in the order that I want to see the labels (this seems to have no effect)
Trying to use "nudge_y" to re-arrange the labels in the order I want them. This seems to change the plot - it does "nudge" them - but it does NOT nudge them into the correct order!
Here is sample code to recreate the problem. Basically, I want the final plot to be ordered by the "order" value - so, for the three datapoints on "10", the order should be Ayala, Zoe, JL, and for the two datapoints on "5", the order should be Raph, Oona.
I've color-coded the plot to make it obvious what order they should be in - for each value, the lightest blue should be on top, and the darkest should be on the bottom.
library(tidyverse)
library(ggrepel)
name <- c("Oona","Sam","Raph", "JL", "Zoe","Ayala")
year <- rep(c("2016"),6)
value <- c(5,15,5,10,10,10) #The value I'm plotting
order <- c(5,-10,10,-5,0,5) #The value I want to order the labels by
test_df <- bind_cols(name = name, year = year, value = value, order = order) %>%
arrange(-value, -order) #arranging the df doesn't seem to affect the order on the plot at all, I just do it so I can easily preview the df in the correct order
ggplot(data = test_df, aes(x = year, y = value, group = name)) +
geom_point(aes(color = order)) +
geom_text_repel(data = test_df,
aes(label = name, color = order),
hjust = "left",
nudge_y = order, #This is where I'm trying to "nudge" them into the right order
nudge_x = -.45,
direction = "y")
I think the values in your order column were too big for the y-axis scale provided, so geom_text_repel was doing behind-the-scenes work to make it all actually fit, and changed the order of the labels in the process. When I scaled the order column down to one-fifth the sizes you had originally, it worked perfectly.
test_df$order <- test_df$order*1/5
ggplot(data = test_df, aes(x = year, y = value, group = name)) +
geom_point(aes(color = order)) +
geom_text_repel(data = test_df,
aes(label = name, color = order),
hjust = "left",
nudge_y = test_df$order,
nudge_x = -.45,
direction = "y"
)

ggplot add segments to scatter plot according to factors

I have the following 'code'
set.seed(100)
values<-c(rnorm(200,10,1),rnorm(200,2.1,1),rnorm(250,6,1),rnorm(75,2.1,1),rnorm(50,9,1),rnorm(210,2.05,1))
rep1<-rep(3,200)
rep2<-rep(0,200)
rep3<-rep(1,250)
rep4<-rep(0,75)
rep5<-rep(2,50)
rep6<- rep(0,210)
group<-c(rep1,rep2,rep3,rep4,rep5,rep6)
df<-data.frame(values,group)
I would like to plot these data as a scatter plot (like the attached plot) and add segments. These segments (y values) shall represent the mean value of the data for a given group. In addition, the segments should have a different color depending on the factor (group). Is there an efficient way to do it with ggplot ?
Many thanks
We can do this by augmenting your data a little. We'll use dplyr to get the mean by group, and we'll create variables that give the observation index and one that increments by one each time the group changes (which will be helpful to get the segments you want):1
library(dplyr)
df <- df %>%
mutate(idx = seq_along(values), group = as.integer(group)) %>%
group_by(group) %>%
mutate(m = mean(values)) %>%
ungroup() %>%
mutate(group2 = cumsum(group != lag(group, default = -1)))
Now we can make the plot; using geom_line() with grouping by group2, which changes every time the group changes, makes the segments you want. Then we just color by (a discretized version of) group:
ggplot(data = df, mapping = aes(x = idx, y = values)) +
geom_point(shape = 1, color = "blue") +
geom_line(aes(x = idx, y = m, group = group2, color = as.factor(group)),
size = 2) +
scale_color_manual(values = c("red", "black", "green", "blue"),
name = "group") +
theme_bw()
1 See https://stackoverflow.com/a/42705593/8386140

plotting stacked points using ggplot

I have a data frame and I would like to stack the points that have overlaps exactly on top of each other.
here is my example data:
value <- c(1.080251e-04, 1.708859e-01, 1.232473e-05, 4.519876e-03,2.914256e-01, 5.869711e-03, 2.196347e-01,4.124873e-01, 5.914052e-03, 2.305623e-03, 1.439013e-01, 5.407597e-03, 7.530298e-02, 7.746897e-03)
names = letters[1:7]
data <- data.frame(names = rep(names,), group = group, value = value, stringsAsFactors = T)
group <- c(rep("AA", 7) , rep("BB", 7))
I am using the following command:
p <- ggplot(data, aes(x = names, y = "", color = group)) +
geom_point(aes(size = -log(value)), position = "stack")
plot(p)
But the stacked circle outlines out of the grid. I want it close or exactly next to the bottom circle. do you have any idea how I can fix the issue?
Thanks,
The y-axis has no numeric value, so use the group instead. And we don't need the color legend now since the group labels are shown on the y-axis.
ggplot(data, aes(x = names, y = group, color = group)) +
geom_point(aes(size = -log(value))) +
guides(color=FALSE)

Within a function, how to create a discrete axis with _repeated and ordered_ labels

I want to create a function that makes a heatmap where the y axis will have unique breaks, but repeated and ordered labels. I know that this is might not be a great practice. I am also aware that similar questions have been asked before. For example: ggplot in R, reordering the bars. But I want to achieve these repeated and ordered labels through sorting within a function, not by typing them manually. I am aware of solutions for reordering axes based on the values of factor (e.g., Order Bars in ggplot2 bar graph), but I don't think they apply or can't see how to apply these to my case, where the breaks are unique but the labels repeat.
Here is some code to reproduce the problem and some of my attempts:
Libraries and data
library(ggplot2)
library(dplyr)
library(tidyr)
set.seed(4)
id <- LETTERS[1:10]
lab <- paste(c("AB", "CD"), 1:5, sep = "_") %>%
sample(., size = 10, replace = TRUE)
val <- sample.int(n = 6, size = 10, replace = TRUE)
tes <- ifelse(val >= 4, 1, 0)
dat <- data.frame(id, lab, val, tes)
A heatmap with unique breaks on the y axis
dat2 <- dat %>% gather(kind, value, val:tes)
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1)
A heatmap where the y axis is labeled with repeated labels instead of the unique breaks
This works, to the point that labels are used instead of unique ids, but the y axis is not ordered by the labels. Also, I am not sure about setting breaks and labels from the data frame in wide format (dat), rather than the data frame in long format used by ggplot (dat2).
dat2 <- dat %>% gather(kind, value, val:tes)
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1) +
scale_y_discrete(breaks=dat$id, labels=dat$lab)
Mapping the vector of with repeated values on the y axis obviously doesn't work
dat2 <- dat %>% gather(kind, value, val:tes)
ggplot(dat2) +
geom_tile(aes(x = kind, y = lab, fill = value), color="white", size=1)
Repeated and ordered labels, try 1
As expected, merely sorting the input data by the non-unique lab variable does not work.
dat2 <- dat %>% gather(kind, value, val:tes) %>%
arrange(lab)
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1) +
scale_y_discrete(breaks=id, label=lab)
Repeated and ordered labels, try 2
Try to create a named breaks vector ordered by the (repeating) labels. This gets me nowhere. Half the labels are missing and they are still not sorted.
dat2 <- dat %>% gather(kind, value, val:tes)
brks <- setNames(dat$id, dat$lab)[sort(dat$lab)]
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1) +
scale_y_discrete(breaks = brks, labels = names(brks))
Repeated and ordered labels, try 3
Starting with the data frame sorted by label, try to create an ordered factor for lab. Then sort the table by this ordered factor. No luck.
dat2 <- dat %>% gather(kind, value, val:tes) %>% arrange(lab)
dat2 <- mutate(dat2, lab_f=factor(lab, levels=sort(unique(lab)), ordered = TRUE))
dat2 <- arrange(dat2, lab_f)
# check
dat2$lab_f
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1) +
scale_y_discrete(breaks = dat2$id, labels = dat2$lab_f)
A workaround, which I can use if I have to, but I am trying to avoid
We can create a combination of id and lab which will be unique and use it for the y axis
dat2 <- dat %>% gather(kind, value, val:tes) %>%
mutate(id_lab=paste(lab, id, sep="_"))
ggplot(dat2) +
geom_tile(aes(x = kind, y = id_lab, fill = value), color="white", size=1)
I must be missing something. Any help is much appreciated.
The goal is to have a function that will take an arbitrarily long table and plot a y axis with unique breaks but (possibly) repeated and ordered labels.
heat <- function(dat) {
dat2 <- dat %>% gather(kind, value, val:tes)
# any other manipulation here
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1)
# scale_y_discrete() (if needed)
}
The plot I am looking for is something like this (created in inkscape)
Using limits instead of breaks sets the order:
ggplot(dat2) +
geom_tile(aes(x = kind, y = id, fill = value), color="white", size=1) +
geom_text(aes(x = 1, y = id, label = id), col = 'white') +
scale_y_discrete(limits = dat$id[order(dat$lab)], labels = sort(dat$lab))

Resources