order y-axis of geom_tile plot by variable - r

I am using geom_tile to visualize random draws
Generate data:
set.seed(1)
df= crossing(sim=1:10,part= 1:10)
df$result = sample(c(1,0),size = nrow(df), replace=T)
df = df %>%
group_by(sim)%>%
# find out how many successful (1) pilots there were in the first 4 participants
summarize(good_pilots = sum(result[1:4])) %>%
arrange(good_pilots) %>%
ungroup() %>%
# add this back into full dataframe
full_join(df)
# plot data
plot = ggplot(df, aes( y=factor(sim), x=part)) +
geom_tile(aes(fill = factor(result)), colour = "black",
show.legend = T)+
scale_fill_manual(values=c("lightgrey", "darkblue"))+# c(0,1)
theme(panel.border = element_rect(size = 2),
plot.title = element_text(size = rel(1.2)),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.title = element_blank(),
legend.position = "right")+ theme_classic()+ coord_fixed(ratio=1)
This results in:
What I actually want is the y axis to be ordered by the # of blue (ie 1's) in the first four columns of the block (which is calculated in good_pilots).
I tried scale_y_discrete but that cannot be what is intended:
plot + scale_y_discrete(limits=df$sim[order(df$good_pilots)])
resulting in:
From what I can tell it seems like the ordering worked correctly, but using scale_y_discrete caused the plot to be messed up.

You can use reorder here
ggplot(df, aes(y = reorder(sim, good_pilots), x = part)) +
...

Related

How to pass external data to stat_compare_means in ggplot2

I created a barplot based on ggplot2 using an original long format that has been transferred with mean and standard error, which means I was not using raw data to generate a barplot with an errorbar but using processed mean and error data to generate a barplot. Therefore, I wonder how to add statistical comparison under such a situation using the original data because it is difficult to do statistics with only mean and error values.
This is my code, and you could imagine that df was the original long format data, and the data were grouped to calculate the mean and standard error, leading to the data summaryFGA.
The barplot with errorbar was generated using summaryFGA like below using the following code:
errorbar without statistical comparsion
summaryFGA <- df %>% group_by(DMP,tumor) %>% dplyr::summarize(mean = mean(FGA, na.rm = TRUE), se = std(FGA, na.rm = TRUE)) %>% as.data.frame
p1 <- ggplot(summaryFGA, aes(x = DMP, y = mean,fill = DMP)) +
geom_bar(stat = 'identity') +
geom_errorbar(aes(ymax = mean+se, ymin = mean-se),position = position_dodge(0.9), width = 0.15) +
scale_fill_manual(values = jco[1:2]) + scale_color_manual(values = jco[1:2]) +
ylab("Fraction of Genome Altered") + xlab("") +
facet_wrap(.~tumor, nrow = 1,scales = "fixed") +
# stat_compare_means(data = df,
# aes(x = DMP, y = FGA, fill=DMP),
# comparisons = my_comparisons,
# method = "wilcox.test", inherit.aes = F) +
theme(axis.text.x = element_text(hjust = 1, vjust = 0.5,size = 10, angle = 90,color = "black"),
axis.text.y = element_text(size = 10,color = "black"),
axis.ticks = element_line(size=0.2, color="black"),
axis.ticks.length = unit(0.2, "cm"),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
strip.text = element_text(colour = 'white', size = 10),
axis.line = element_line(colour = "black"))
p1
g <- ggplot_gtable(ggplot_build(p1))
stripr <- which(grepl('strip-t', g$layout$name))
k <- 1
for (i in stripr) {
j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder))
g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- tumorColor[k]
k <- k+1
}
What I want is the following:
errorbar with statistical comparison
to add statistical comparison using the code I annotated, but it doesn't work. Obviously, it should be based on the original df data instead of summaryFGA.
If anyone wants an example, please refer to this thread and add a statistical comparison using "original data" in the example.
Many thanks in advance.

Horizontal bar plot with both stack and cluster bars in R

I am trying to create a horizontal bar plot where for each variable I have two bars. the first should be a stack bar with the count of positive and negative values, the second only the neutral values. So far I didn't manage to do that.
Here is an example of the data:
df <- read.table(text = "Positive Negative Neutral
A 4 5 1
B 6 8 3
C 12 3 6
D 10 5 2
E 2 11 7", header = TRUE)
And here the plot I aiming to do (made with Excel):
Edit:
Thanks everyone for the help! I got great feedbacks already.
I am planning to continue with the option provided by #Duck as I feel that with ggplot2 it's more flexible. However, I figured out I would like to add (and learn) one more things:
Reorder automatically the y axis values (id (A, B, C, D, E)) based on the number of positive values (I edited the plot above). For instance in this case would be C, D, B,A, E):
I tried with reorder() and using factors() but unsuccessfully. Should it be applied on the data or directly on ggplot? The solution I found so far do not have this case of double level of variable (here is the name of variable (id) and the flag (TRUE = positive/negative, FALSE = neutral).
I might be the only person on the planet using base R plotting these days, but it is actually pretty flexible for non-standard plots like this.
xl <- c(0,20)
cols <- c("blue","orange","grey")
bd <- t(as.matrix(df))
bp <- barplot(unname(bd[1:2, rbind(1:5,NA,NA)]), xlim=xl, horiz=TRUE, col=cols[1:2])
barplot(unname(bd[3, rbind(NA,1:5,NA)]), xlim=xl, horiz=TRUE, col=cols[3],
axes=FALSE, add=TRUE)
axis(2, at=colMeans(matrix(bp, nrow=3)[1:2,]), labels=colnames(bd), las=1, lty=0)
par(xpd=NA)
legend("bottom", rownames(bd), fill=cols, horiz=TRUE, inset=-1 / par("pin")[2], bty="n")
Result:
Try with facets like this and smartly create a reference variable to create two variables and plot them using ggplot2 and some tidyverse functions:
library(ggplot2)
library(tidyverse)
#Data
df %>% rownames_to_column("id") %>%
pivot_longer(-id) %>%
mutate(Flag=name %in% c('Positive','Negative')) %>%
ggplot(aes(x=Flag,y=value,fill=name))+
geom_bar(stat = 'identity')+
facet_grid(id~., switch = "y") +
scale_fill_manual(values = c("red", "tomato", "cyan3"), name = "") +
coord_flip() +
theme_classic() +
theme(panel.spacing = unit(0, "points"),
strip.background = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.y = unit(0, "points"),
axis.title = element_blank(),
strip.placement = "outside",
strip.text = element_text(),
legend.position = "bottom",
panel.grid.major.x = element_line())
Output:
Update: In order to have the desired order, you could create a dummy data to order the labels and then format the id variable as factor. Here the code:
#Auxiliar data
levs <- df %>% rownames_to_column("id") %>%
pivot_longer(-id) %>%
filter(name=='Positive') %>%
arrange(desc(value))
#Data
df %>% rownames_to_column("id") %>%
pivot_longer(-id) %>%
mutate(Flag=name %in% c('Positive','Negative'),
id=factor(id,levels = levs$id,ordered = T)) %>%
ggplot(aes(x=Flag,y=value,fill=name))+
geom_bar(stat = 'identity')+
facet_grid(id~., switch = "y") +
scale_fill_manual(values = c("red", "tomato", "cyan3"), name = "") +
coord_flip() +
theme_classic() +
theme(panel.spacing = unit(0, "points"),
strip.background = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.y = unit(0, "points"),
axis.title = element_blank(),
strip.placement = "outside",
strip.text = element_text(),
legend.position = "bottom",
panel.grid.major.x = element_line())
Output:

Default frame to display in a ggplotly animation

Does anyone know how to set initial frame with ggplotly()?
The initial states of the 4 points is 1:4 and the slider is adding numbers -10:10.
Let's say I have this animated plot:
library(ggplot2)
library(plotly)
add <- rep(seq(-10, 10, by = .5), each = 4)
x <- 1:4 + add
df <- data.frame(x = x, y = 1, add = add)
plot <- ggplot(df, aes(x, y)) +
geom_point(aes(frame = add),
col = rep(c("yellow", "green", "red", "blue"), length(unique(add))),
size = 5) +
theme(axis.line.x = element_line(),
panel.background = element_blank(), axis.title = element_blank(),
axis.text.y = element_blank(), axis.ticks.y = element_blank())
ggplotly(plot)
I would like the plot to start at x = 1:4 (frame 21) instead of x = -9:-6 (frame 1).
Thanks a lot!
It could be a matter of how add is ordered in your data frame.
df$add <- rev(df$add)
# ... same code as before
ggplotly(plot)

ggplot2: Boxplots with points and fill separation [duplicate]

This question already has answers here:
ggplot2 - jitter and position dodge together
(2 answers)
Closed 6 years ago.
I have a data which can be divaded via two seperators. One is year and second is a field characteristics.
box<-as.data.frame(1:36)
box$year <- c(1996,1996,1996,1996,1996,1996,1996,1996,1996,
1997,1997,1997,1997,1997,1997,1997,1997,1997,
1996,1996,1996,1996,1996,1996,1996,1996,1996,
1997,1997,1997,1997,1997,1997,1997,1997,1997)
box$year <- as.character(box$year)
box$case <- c(6.40,6.75,6.11,6.33,5.50,5.40,5.83,4.57,5.80,
6.00,6.11,6.40,7.00,NA,5.44,6.00, NA,6.00,
6.00,6.20,6.40,6.64,6.33,6.60,7.14,6.89,7.10,
6.73,6.27,6.64,6.41,6.42,6.17,6.05,5.89,5.82)
box$code <- c("L","L","L","L","L","L","L","L","L","L","L","L",
"L","L","L","L","L","L","M","M","M","M","M","M",
"M","M","M","M","M","M","M","M","M","M","M","M")
colour <- factor(box$code, labels = c("#F8766D", "#00BFC4"))
In boxplots, I want to display points over them, to see how data is distributed. That is easily done with one single boxplot for every year:
ggplot(box, aes(x = year, y = case, fill = "#F8766D")) +
geom_boxplot(alpha = 0.80) +
geom_point(colour = colour, size = 5) +
theme(text = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none")
But it become more complicated as I add fill parameter in them:
ggplot(box, aes(x = year, y = case, fill = code)) +
geom_boxplot(alpha = 0.80) +
geom_point(colour = colour, size = 5) +
theme(text = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none")
And now the question: How to move these points to boxplot axes, where they belong? As blue points to blue boxplot and red to red one.
Like Henrik said, use position_jitterdodge() and shape = 21. You can clean up your code a bit too:
No need to define box, then fill it piece by piece
You can let ggplot hash out the colors if you wish and skip constructing the colors factor. If you want to change the defaults, look into scale_fill_manual and scale_color_manual.
box <- data.frame(year = c(1996,1996,1996,1996,1996,1996,1996,1996,1996,
1997,1997,1997,1997,1997,1997,1997,1997,1997,
1996,1996,1996,1996,1996,1996,1996,1996,1996,
1997,1997,1997,1997,1997,1997,1997,1997,1997),
case = c(6.40,6.75,6.11,6.33,5.50,5.40,5.83,4.57,5.80,
6.00,6.11,6.40,7.00,NA,5.44,6.00, NA,6.00,
6.00,6.20,6.40,6.64,6.33,6.60,7.14,6.89,7.10,
6.73,6.27,6.64,6.41,6.42,6.17,6.05,5.89,5.82),
code = c("L","L","L","L","L","L","L","L","L","L","L","L",
"L","L","L","L","L","L","M","M","M","M","M","M",
"M","M","M","M","M","M","M","M","M","M","M","M"))
ggplot(box, aes(x = factor(year), y = case, fill = code)) +
geom_boxplot(alpha = 0.80) +
geom_point(aes(fill = code), size = 5, shape = 21, position = position_jitterdodge()) +
theme(text = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none")
I see you've already accepted #JakeKaupp's nice answer, but I thought I would throw in a different option, using geom_dotplot. The data you are visualizing is rather small, so why not forego the boxplot?
ggplot(box, aes(x = factor(year), y = case, fill = code))+
geom_dotplot(binaxis = 'y', stackdir = 'center',
position = position_dodge())

how to make a cdf plot smoother and label y axis

I read parameters "data1" and "data2" from files and use this code to plot cdf but I have two problems:
make the figure smoother
label Y axis to CDF
Please notice that this code is correct but I need some modifications.
df <- data.frame(x = c(data1, data2), ggg=factor(rep(1:2, c(19365,19365))))
ggplot(df, aes(x, colour = ggg)) +
stat_ecdf() +
labs(x='Time (ms)', ggg='CDF', fill='') +
theme_bw()+
theme(panel.grid.major = element_line(colour = 'grey'),
panel.border = element_rect(colour = 'black'),
axis.line = element_blank(),
panel.background = element_blank(),
legend.direction='vertical',
legend.position = c(1, 0.5),
legend.justification = c(1, 0.5),
legend.background = element_rect(colour = NA)) +
scale_colour_hue(name='', labels=c('IEEE 802.11p','Our protocol'))
The empirical distribution function is always a step function and you should not smooth it in any way. Having said that, you can get the values for the empirical distribution function using ecdf. If you want to do any smoothing on the result (and this is not suggested), you can.
require(dplyr)
res <- df %>%
group_by(ggg) %>%
do(data.frame(x = sort(.$x),
ecdf = ecdf(.$x)(sort(.$x))))
ggplot(res, aes(x, ecdf, colour = ggg)) + geom_step()
To relabel the y axis, you can use
labs(x='Time (ms)', y='CDF')

Resources