(I have edited the question with a min reproducible code!)
I have a 100% stacked bar chart (horizontal) and would like to put numbers (means) as text next to each of the bars, on the right.
This is the graph I have right now:
graph_data = mpg %>%
group_by(manufacturer, class) %>%
summarise(
count = n()
) %>%
left_join(
data %>%
group_by(manufacturer) %>%
summarise(
manufacturer_total = n()
)
)
graph_data %>%
ggplot(aes(x = manufacturer, y = count/manufacturer_total, fill = class)) +
geom_bar(position = 'stack', stat = 'identity') +
coord_flip()
And I want to be able to put the means in this table, next to the corresponding bar:
hwy_mean = mpg %>%
group_by(manufacturer) %>%
summarise(
mean_hwy = round(mean(hwy), digits = 2)
)
manufacturer mean_hwy
<chr> <dbl>
1 audi 26.4
2 chevrolet 21.9
3 dodge 18.0
4 ford 19.4
5 honda 32.6
6 hyundai 26.9
E.g. There would be the text '26.44' next to the audi bar.
I've tried setting hwy_mean as the label and use geom_text but does not work because "Aesthetics must be either length 1 or the same as the data (32): "
I've this rewrite:
graph_data = mpg %>%
group_by(manufacturer, class) %>%
summarise(count = n()) %>%
left_join(mpg %>%
group_by(manufacturer) %>%
summarise(manufacturer_total = n()))
graph_data %>%
ggplot(aes(
y = manufacturer,
x = count / manufacturer_total,
fill = class
)) +
geom_bar(position = 'stack', stat = 'identity') ->
pl_a
hwy_mean = mpg %>%
group_by(manufacturer) %>%
summarise(
class = NA,
mean_hwy = round(mean(hwy), digits = 2)
)
pl_a +
geom_text(data = hwy_mean, aes(label = mean_hwy, y = manufacturer, x = .99),
hjust = 1)
hwy_mean
Related
I am reading the book Text Mining with R: A Tidy Approach by Julia Silge & David Robinson to try to find the difference between two works, and not the three in the original book, how can I draw a similar graph with ggplot?
In the original book:
austen <- austen_books() %>%
select(-book) %>%
mutate(author = "Jane Austen")
bronte <- gutenberg_download(c(1260, 768, 969, 9182, 767)) %>%
select(-gutenberg_id) %>%
mutate(author = "Brontë Sisters")
hgwells <- gutenberg_download(c(35, 36, 5230, 159)) %>%
select(-gutenberg_id) %>%
mutate(author = "H.G. Wells")
comparison_df <- books %>%
add_count(author, wt = n, name = "total_word") %>%
mutate(proportion = n / total_word) %>%
select(-total_word, -n) %>%
pivot_wider(names_from = author, values_from = proportion,
values_fill = list(proportion = 0)) %>%
pivot_longer(3:4, names_to = "other", values_to = "proportion")
comparison_df
#> # A tibble: 56,002 x 4
#> word `Jane Austen` other proportion
#> <chr> <dbl> <chr> <dbl>
#> 1 miss 0.00855 Brontë Sisters 0.00342
#> 2 miss 0.00855 H.G. Wells 0.000120
#> 3 time 0.00615 Brontë Sisters 0.00424
#> 4 time 0.00615 H.G. Wells 0.00682
#> 5 fanny 0.00449 Brontë Sisters 0.0000438
#> 6 fanny 0.00449 H.G. Wells 0
#> # ... with 5.6e+04 more rows
But what if I just want to compare two works?Just like austen and bronte.
comparison_df %>%
filter(proportion > 1 / 1e5) %>%
ggplot(aes(proportion, `Jane Austen`)) +
geom_abline(color = "gray40", lty = 2) +
geom_jitter(aes(color = abs(`Jane Austen` - proportion)),
alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +
geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +
scale_x_log10(labels = label_percent()) +
scale_y_log10(labels = label_percent()) +
scale_color_gradient(limits = c(0, 0.001), low = "darkslategray4", high = "gray75") +
facet_wrap(~ other) +
guides(color = FALSE)
How can I modify the code above here?
Here is a complete, reproducible solution. The books data wrangling code is a copy&paste of or based on Text Mining with R: A Tidy Approach, Julia Silge & David Robinson.
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(tidytext)
library(stringr)
library(gutenbergr)
library(janeaustenr)
library(ggplot2)
library(scales)
})
data(stop_words)
austen <- austen_books() %>%
select(-book)
bronte <- gutenberg_download(c(1260, 768, 969, 9182, 767)) %>%
select(-gutenberg_id)
#> Determining mirror for Project Gutenberg from http://www.gutenberg.org/robot/harvest
#> Using mirror http://aleph.gutenberg.org
hgwells <- gutenberg_download(c(35, 36, 5230, 159)) %>%
select(-gutenberg_id)
bind_rows(
austen %>% mutate(author = "Jane Austen"),
bronte %>% mutate(author = "Brontë Sisters"),
hgwells %>% mutate(author = "H.G. Wells")
) %>%
unnest_tokens(word, text) %>%
anti_join(stop_words, by = "word") %>%
mutate(word = str_extract(word, "[a-z']+")) %>%
count(author, word, sort = TRUE) %>%
add_count(author, name = "total_word") %>%
mutate(proportion = n / total_word) %>%
select(-total_word, -n) %>%
pivot_wider(names_from = author, values_from = proportion,
values_fill = list(proportion = 0)) %>%
pivot_longer(3:4, names_to = "other", values_to = "proportion") %>%
#
# to filter author solves the question's problem
# also filter Jane Austen's values to avoid warnings, log10 was giving
# Warning messages:
# 1: Transformation introduced infinite values in continuous y-axis
# 2: Transformation introduced infinite values in continuous y-axis
# 3: Removed 18761 rows containing missing values (geom_point).
#
# I have separated the filters to make the code clearer
# but they can be combined as only one
#
filter(proportion > 1/1e5, `Jane Austen` > 1/1e5) %>%
filter(other == "Brontë Sisters") %>%
#
ggplot(aes(proportion, `Jane Austen`)) +
geom_abline(color = "gray40", lty = 2) +
geom_jitter(aes(color = abs(`Jane Austen` - proportion)),
alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +
geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +
scale_x_log10(labels = label_percent()) +
scale_y_log10(labels = label_percent()) +
scale_color_gradient(limits = c(0, 0.001), low = "darkslategray4", high = "gray75") +
xlab(label = "Brontë Sisters") +
guides(color = "none")
#> Warning: Removed 1 rows containing missing values (geom_text).
Created on 2022-05-14 by the reprex package (v2.0.1)
I am using the R programming language. I made the following interactive graph using the plotly library:
library(dplyr)
library(ggplot2)
library(shiny)
library(plotly)
library(htmltools)
library(dplyr)
#generate data
set.seed(123)
######
var = rnorm(731, 85,25)
date= seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
data = data.frame(var,date)
vals <- 90:100
combine <- vector('list', length(vals))
count <- 0
for (i in vals) {
data$var_i = i
data$new_var_i = ifelse(data$var >i,1,0)
#percent of observations greater than i (each month)
aggregate_i = data %>%
mutate(date = as.Date(date)) %>%
group_by(month = format(date, "%Y-%m")) %>%
summarise( mean = mean(new_var_i))
#combine files together
aggregate_i$var = i
aggregate_i$var = as.factor(aggregate_i$var)
count <- count + 1
combine[[count]] <- aggregate_i
}
result_2 <- bind_rows(combine)
result_2$group = "group_b"
result_2$group = as.factor(result_2$group)
graph <-ggplot(result_2, aes(frame = var, color = group)) + geom_line(aes(x=month, y=mean, group=1))+ theme(axis.text.x = element_text(angle=90)) + ggtitle("title") + facet_wrap(. ~ group)
graph = ggplotly(graph)
When the user moves the mouse over any point on the graph, the following information is displayed (hover text):
I am trying to add more information to the hover text. For example:
result_2$tot = mean(result_2$mean)
> head(result_2)
# A tibble: 6 x 5
month mean var group tot
<chr> <dbl> <fct> <fct> <dbl>
1 2014-01 0.387 90 group_b 0.364
2 2014-02 0.429 90 group_b 0.364
3 2014-03 0.452 90 group_b 0.364
4 2014-04 0.367 90 group_b 0.364
5 2014-05 0.355 90 group_b 0.364
6 2014-06 0.433 90 group_b 0.364
Yet, when I make a new graph using this result_2 file, the new information does not appear in the hover text:
graph <-ggplot(result_2, aes(frame = var, color = group)) + geom_line(aes(x=month, y=mean, group=1))+ theme(axis.text.x = element_text(angle=90)) + ggtitle("title") + facet_wrap(. ~ group)
graph = ggplotly(graph)
#view graph
graph
Can someone please shoe me how to fix this problem?
Thanks
If you want full control of your hoverinfo its actually best to create a plotly chart rather than a ggplot and then use ggplotly(). If you have only one group in result_2 as in your example above you can use
result_2 %>%
plot_ly(x=~month, y=~mean, color=~group) %>%
group_by(group) %>%
add_lines(frame=~var,hoverinfo = "text",
text = ~ paste0("Month: ",month, "<br>",
"Mean: ", mean, "<br>",
"Total: ", mean(mean))) %>%
layout(title = list(text = "title"),
xaxis = list(tickangle = -90, tickformat = "%m-%Y"))
or if you have > 1 group in result_2 and you want to facet by group as indicated in your ggplot you can do:
result_2 %>%
group_by(group) %>%
do(
plot = plot_ly(data =., x=~month, y=~mean, color=~group) %>%
add_lines(frame=~var,hoverinfo = "text",
text = ~ paste0("Month: ",month, "<br>",
"Mean: ", mean, "<br>",
"Total: ", mean(mean))) %>%
layout(title = list(text = "title"),
xaxis = list(tickangle = -90, tickformat = "%m-%Y"))
) %>%
subplot(shareX = TRUE, shareY = FALSE, nrows = 2)
But this won't work if you have only one group hence the two options provided.
You can create any function and write anything you want in the text = ~paste0() part and it will show up in your hoverinfo.
I'm using the tidyverse-ggplot2 combination to plot multiple bar plots. In one of my comparisons i would like to have even up to 300 single plots. I was wondering if there is a possibility to make sure that the plots will be visible in the pdf file and not look like the attached example
If possible I would prefer to have all the plots in one single pdf file, but if not, also multiple pages will be ok.
The command to plot the bar charts is
common %>%
as_tibble(rownames="gene") %>%
left_join(x= ., y = up[,1:2], by = c("gene" = "ensembl_gene_id") ) %>%
pivot_longer(starts_with("S"), names_to="sample", values_to="counts") %>%
left_join(groups, by="sample") %>%
group_by(mgi_symbol, group, cond, time) %>%
summarize(mean_count=mean(counts)) %>%
ggplot( aes(x = time, y = mean_count, fill=cond)) +
geom_bar(stat = "identity", position = position_dodge(width=0.9) ) +
scale_fill_manual(values=c("darkblue", "lightblue", "black")) +
facet_wrap(~mgi_symbol, scales = "free", ncol = 5) +
theme_bw()
I forgot to add the group table
groups <- tibble(
sample= colnames(normCounts),
group = rep(seq(1, ncol(normCounts)/3), each=3),
cond = rep(c("WT", "GCN2-KO", "GCN1-KO"), each = 12),
time = rep(rep(c("0h", "1h", "4h", "8h"), each=3), times = 3 )
)
thanks
Adding the command with the group_map was as such
common %>%
as_tibble(rownames="gene") %>%
left_join(x= ., y = up[,1:2], by = c("gene" = "ensembl_gene_id") ) %>%
pivot_longer(starts_with("S"), names_to="sample", values_to="counts") %>%
left_join(groups, by="sample") %>%
group_by(mgi_symbol, group, cond, time) %>%
summarize(mean_count=mean(counts)) %>%
group_map(function(g, ...)
ggplot(g, aes(x = time, y = mean_count, fill=cond)) +
geom_bar(stat = "identity", position = position_dodge(width=0.9) ) +
scale_fill_manual(values=c("darkblue", "lightblue", "black")) +
facet_wrap(~mgi_symbol, scales = "free", ncol = 5) +
theme_bw()
)
EDIT
This is how the data looks like in the input table (after summarizing the means)
df <-
common %>%
as_tibble(rownames="gene") %>%
left_join(x= ., y = up[,1:2], by = c("gene" = "ensembl_gene_id") ) %>%
pivot_longer(starts_with("S"), names_to="sample", values_to="counts") %>%
left_join(groups, by="sample") %>%
group_by(mgi_symbol, group, cond, time) %>%
summarize(mean_count=mean(counts)) %>%
ungroup()
df
#>`summarise()` regrouping output by 'mgi_symbol', 'group', 'cond' (override with `.groups` argument)
#> # A tibble: 1,212 x 5
#> mgi_symbol group cond time mean_count
#> <chr> <int> <chr> <chr> <dbl>
#> 1 0610031O16Rik 1 WT 0h 14.4
#> 2 0610031O16Rik 2 WT 1h 30.9
#> 3 0610031O16Rik 3 WT 4h 45.5
#> 4 0610031O16Rik 4 WT 8h 56.0
#> 5 0610031O16Rik 5 GCN2-KO 0h 18.9
#> 6 0610031O16Rik 6 GCN2-KO 1h 39.4
#> 7 0610031O16Rik 7 GCN2-KO 4h 13.9
#> 8 0610031O16Rik 8 GCN2-KO 8h 13.3
#> 9 0610031O16Rik 9 GCN1-KO 0h 12.3
#> 10 0610031O16Rik 10 GCN1-KO 1h 25.3
#> # … with 1,202 more rows
Start with some dummy data. This is the data after you've finished running left_join, pivot_longer, group_by, summarize.
library(tidyverse)
df <- tibble(
time = 1:5,
mean_count = 1:5,
cond = "x"
) %>%
expand_grid(mgi_symbol = c(letters, LETTERS))
Create a column group which represents what page the mgi_symbol belongs on.
plots_per_page <- 20
df <-
df %>%
mutate(group = (dense_rank(mgi_symbol) - 1) %/% plots_per_page)
Create all the plots with group_map.
plots <-
df %>%
group_by(group) %>%
group_map(function(g, ...) {
ggplot(g, aes(x = time, y = mean_count, fill=cond)) +
geom_bar(stat = "identity", position = position_dodge(width=0.9) ) +
scale_fill_manual(values=c("darkblue", "lightblue", "black")) +
facet_wrap(~mgi_symbol, scales = "free", ncol = 5) +
theme_bw()
})
Save as multiple pages using ggpubr
ggpubr::ggexport(
ggpubr::ggarrange(plotlist = plots, nrow = 1, ncol = 1),
filename = "plots.pdf"
)
I recently started working dygraphs in R, and wanted to achieve a ribbon line plot with it.
Currently, I have the below ggplot which displays a ribbon (for data from multiple batches over time) and its median for two groups. Below is the code for it.
ggplot(df,
aes(x=variable, y=A, color=`[category]`, fill = `[category]`)) +
stat_summary(geom = "ribbon", alpha = 0.35) +
stat_summary(geom = "line", size = 0.9) +
theme_minimal()+ labs(x="TimeStamp")
I could add the median solid line on the dygraph, but I'm unable to add the ribbon to it. Below is the dygraph and my code for it.
df_Medians<- df%>%
group_by(variable,`[category]`) %>%
summarise(A = median(A[!is.na(A)]))
median <- cbind(as.ts(df_Medians$A))
dygraph(median) %>%
dyRangeSelector()
Is there anyway to plot something similar to the above ggplot on dygraphs? Thanks in advance.
See if the following serves your purpose:
ggplot code (for mean, replace median_se with mean_se in the stat_summary layers):
library(ggplot2)
ggplot(df,
aes(x=variable, y=A, color=category, fill = category)) +
stat_summary(geom = "ribbon", alpha = 0.35, fun.data = median_se) +
stat_summary(geom = "line", size = 0.9, fun.data = median_se) +
theme_minimal()
dygraph code (for mean, replace median_se with mean_se in the summarise step):
library(dplyr)
library(dygraph)
# calculate summary statistics for each category, & spread results out such that each row
# corresponds to one position on the x-axis
df_dygraph <- df %>%
group_by(variable, category) %>%
summarise(data = list(median_se(A))) %>%
ungroup() %>%
tidyr::unnest(data) %>%
mutate(category = as.integer(factor(category))) %>% # optional: standardizes the column
# names for summary stats
tidyr::pivot_wider(id_cols = variable, names_from = category,
values_from = c(ymin, y, ymax))
> head(df_dygraph)
# A tibble: 6 x 7
variable ymin_1 ymin_2 y_1 y_2 ymax_1 ymax_2
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 3817. 2712. 4560. 2918. 5304. 3125.
2 2 3848. 2712. 4564. 2918. 5279. 3125.
3 3 3847. 2826. 4564 2961 5281. 3096.
4 4 3722. 2827. 4331 2962. 4940. 3098.
5 5 3833. 2831. 4570. 2963 5306. 3095.
6 6 3835. 2831. 4572 2964 5309. 3097.
dygraph(df_dygraph, main = "Dygraph title") %>%
dySeries(c("ymin_1", "y_1", "ymax_1"), label = "Category 1") %>%
dySeries(c("ymin_2", "y_2", "ymax_2"), label = "Category 2") %>%
dyRangeSelector()
Code for median counterpart of mean_se:
median_se <- function(x) {
x <- na.omit(x)
se <- sqrt(var(x) / length(x))
med <- median(x)
ggplot2:::new_data_frame(list(y = med,
ymin = med - se,
ymax = med + se),
n = 1)
}
Sample data:
df <- diamonds %>%
select(price, cut) %>%
filter(cut %in% c("Fair", "Ideal")) %>%
group_by(cut) %>%
slice(1:1000) %>%
mutate(variable = rep(seq(1, 50), times = 20)) %>%
ungroup() %>%
rename(A = price, category = cut)
I'm trying to show absolute values for the data points in a proportion bar chart using ggplot2.
Here is the code:
mtcars_gear_by_make <- mtcars %>%
tibble::rownames_to_column(var = "car") %>%
tidyr::separate(car, c("make", "model"), sep = "\\s", extra = "merge") %>%
dplyr::filter(make == "Merc" | make == "Toyota") %>%
dplyr::group_by(make, gear) %>%
dplyr::summarize(n_model = n())
And this is the tibble the code creates:
# Groups: make [2]
make gear n_model
<chr> <dbl> <int>
1 Merc 3 3
2 Merc 4 4
3 Toyota 3 1
4 Toyota 4 1
I used this code to generate the proportion bar chart:
ggplot(mtcars_gear_by_make,
aes(x = make, y = n_model, fill = gear)) +
geom_col(position = "fill")
Is there a way to add labels to the bar chart to show Merc has 3 models with gear 3 and 4 models with gear 4, and Toyota has 1 model with gear 3 and 1 model with gear 4?
Thank you!
Try this, you have to create the position p because you show in proportion style on y-axis and then use that in geom_text() (I modified the code to compute that):
library(tidyverse)
mtcars_gear_by_make <- mtcars %>%
tibble::rownames_to_column(var = "car") %>%
tidyr::separate(car, c("make", "model"), sep = "\\s", extra = "merge") %>%
dplyr::filter(make == "Merc" | make == "Toyota") %>%
dplyr::group_by(make, gear) %>%
dplyr::summarize(n_model = n()) %>%
dplyr::mutate(p=n_model/sum(n_model))
ggplot(mtcars_gear_by_make,
aes(x = make, y = n_model, fill = gear,label=n_model)) +
geom_col(position = "fill")+
geom_text(aes(y=p),position = position_stack(vjust = .5),color='white',fontface='bold')