Plot two variables in bar plot side by side using ggplot2 - r

I think I need to use melt function but I'm not sure how to do so? Sample data, code, and resulting graph below. Basically, the "cnt" column is made up of the "registered" plus the "casual" for each row. I want to display the total "registered" vs the total 'casual" per month, instead of overall total "cnt"
example data
#Bar Chart
bar <- ggplot(data=subset(bikesharedailydata, !is.na(mnth)), aes(x=mnth, y=cnt)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title="My Bar Chart", subtitle = "Total Renters per Month", caption = "Caption", x = "Month", y = "Total Renters") +
mychartattributes

To "melt" your data use reshape2::melt:
library(ggplot2)
library(reshape2)
# Subset your data
d <- subset(bikesharedailydata, !is.na(mnth))
# Select columns that you will plot
d <- d[, c("mnth", "registered", "casual")]
# Melt according to month
d <- melt(d, "mnth")
# Set fill by variable (registered or casual)
ggplot(d, aes(mnth, value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(title="My Bar Chart", subtitle = "Total Renters per Month",
caption = "Caption",
x = "Month", y = "Total Renters")

Use tidyr and dplyr:
set.seed(1000)
library(dplyr)
library(tidyr)
library(ggplot2)
bikesharedailydata <- data.frame(month = month.abb, registered = rpois(n = 12, lambda = 2), casual =rpois(12, lambda = 1))
bikesharedailydata %>% gather(key="type", value = "count", -month) %>%
ggplot(aes(x=month, y=count, fill = type))+geom_bar(stat = "identity", position = "dodge")

Related

How to re-order bar plot with ggplot2 by distribution proportions of variables

I'm looking for a way to re-order the bar plot produced with ggplot2 such that the rates of the less observed category (i.e. ThemeFirst) increase from the left to the right. The original bar plot I generated is below:
And it is plotted using the following codes:`
t1<-table(data$Variety,data$realization)
dataframe_realization<-data.frame(Variety=names(prop.table(t1,1)[,1]),
RecipientFirst=prop.table(t1,1)[,1],
ThemeFirst=prop.table(t1,1)[,2],
row.names=NULL)
dataframe_realization<-melt(dataframe_realization,id="Variety",variable_name="Variant")
# adding absolute frequency values to the table
dataframe_realization_absfreq<-data.frame(Variety=names(t1[,1]),
RecipientFirst=as.numeric(t1[,1]),
ThemeFirst=as.numeric(t1[,2]))
dataframe_realization_absfreq<-melt(dataframe_realization_absfreq,id="Variety",variable_name = "Variant")
dataframe_realization$absvals<-dataframe_realization_absfreq$value
dataframe_realization$Proportion<-dataframe_realization$value
dataframe_realization$variable<-dataframe_realization$Variant
labels.order <- dataframe_realization %>%
filter(Variety == '14th-18thCentury') %>%
arrange(Proportion) %>%
pull(Variant)
df.new <- dataframe_realization %>%
mutate(
Variable = factor(Variant, levels = labels.order, ordered = T)
)
# stacked bar plot with absolute values added on the each bar
realization_plot_absvals<-ggplot(data = dataframe_realization, aes(Variety, Proportion, group = Variant)) +
geom_col(aes(fill = Variant)) +
labs(title = "", y="Proportion of theme-recipient tokens", x="") +
scale_y_continuous() +
scale_fill_grey(start = 0.25, end = 0.75) +
geom_text(aes(label = absvals), position = position_stack(vjust = 0.5),color=ifelse(dataframe_realization$Variant=="RecipientFirst", "white", "black"), ) +
theme(text=element_text(size=15))
ggsave("~/VADIS_VarietyProportion_absvals.png",realization_plot_absvals, width=13, height=6, units="in", dpi = 1000)
So, again, the idea is to rearrange the plot and the bar to the far left will be the "Variety" with least ThemeFirst proportion (namely 19thCentury), and the bar to the far right will be the one with most ThemeFirst proportion (namely CTM_Other). The data for producing the plot can be found in this OSF page.
Just make a factor of the x-axis variable, with the levels based on the order of proportions like you did for labels.order.
library(dplyr)
library(reshape2)
library(ggplot2)
variety.order <- dataframe_realization %>%
filter(Variant == labels.order[1]) %>%
arrange(Proportion) %>%
pull(Variety)
df.new <- dataframe_realization %>%
mutate(
Variable = factor(Variant, levels = labels.order, ordered = T),
Variety = factor(Variety, levels = variety.order)
)
# stacked bar plot with absolute values added on the each bar
realization_plot_absvals<-ggplot(data = df.new, aes(Variety, Proportion, group = Variant)) +
geom_col(aes(fill = Variant)) +
labs(title = "", y="Proportion of theme-recipient tokens", x="") +
scale_y_continuous() +
scale_fill_grey(start = 0.25, end = 0.75) +
geom_text(aes(label = absvals), position = position_stack(vjust = 0.5),color=ifelse(dataframe_realization$Variant=="RecipientFirst", "white", "black"), ) +
theme(text=element_text(size=15))
(note: the option variable_name in the function reshape::melt should be variable.name)

How to Change the Tooltip in ggplotly to show SUM of a Column in a Bar Chart

I have this bar chart rendered using ggplot and ggplotly()
It renders just the way I want, in that it displays the SUM of total crashes by Mode Type and Year.
But, I am struggling with getting the (hover) tooltip to display the SUM of the Total Crashes. Right now, it just shows "1".
What do I need to do to my code to configure the tool tip to show the total crashes as shown in the chart, and not a value of 1? Here is my code, and the bar chart it produces:
crashplot <- ggplot(totcrashes_by_year, aes(totcrashes_by_year$`Crash Year`, totcrashes_by_year$`Total Crashes`))
crashplot +geom_bar(stat = "identity",
aes(fill=totcrashes_by_year$`Mode Type`), position = "stack") + xlab("Crash Year") + ylab("Total Crashes") + ggtitle("Total Crashes by Year and Mode Type") + labs(fill = "Mode Type")
EDIT: Reproducible Code Example
Here is code for replicating this issue.
crashsample <- data.frame(year = sample(2007:2018, 40, replace = T),
crashes = 1,
type = sample(c('Vehicle', 'Pedestrian','Bike','Motorcycle'), 5, replace = TRUE))
Create ggplot
crashplot <- ggplot(crashsample, aes(x = year, y = crashes, fill = type)) +
geom_bar(stat = "identity") +
labs(x = 'Crash Year',
y = 'Total Crashes',
title = "Total Crashes by Year and Mode Type",
fill = "Mode Type")
#call to ggplotly to render as a dynamic plotly chart
ggplotly(crashplot)
This code returns valid labels for me
# Simulate Data
df <- data.frame(year = sample(2005:2015, 20, replace = T),
crashes = sample(30:100, 20),
type = sample(c('Vehicle', 'Pedestrian'), 20, replace = TRUE))
# Required packages
require(plotly); require(tidyverse)
# ggplot2 plot definition
crashplot <- df %>%
group_by(year, type) %>%
summarise(summed_crashes = sum(crashes)) %>%
ggplot(., aes(x = year, y = summed_crashes, fill = type)) +
geom_bar(stat = 'identity') +
labs(x = 'Crash Year',
y = 'Total Crashes',
title = 'Total Crashes by Year and Mode Type',
fill = "Mode Type")
# Show plot with plotly
ggplotly(crashplot)
If it does not work, it would be good to see reproducible data example for this problem.

How to annotate inside the plot when using datetime on the X axis with ggplot2?

I have successfully created a line a graph in R using ggplot2 with percentage on Y axis and Date/Time on the X axis, but I am unsure how to annotate inside the graph for specific date/time points when their is a high/low peak.
The examples I identified (on R-bloggers & RPubs) are annotated without using date/time, and I have made attempts to annotate it (with ggtext and annotate functions, etc), but got nowhere. Please can you show me an example of how to do this using ggplot2 in R?
The current R code below creates the line graph, but can you help me extend the code to annotate inside of the graph?
sentimentdata <- read.csv("sentimentData-problem.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)
sentimentTime <- sentimentdata %>%
filter(between(Hour, 11, 23))
sentimentTime$Datetime <- ymd_hm(sentimentTime$Datetime)
library(zoo)
sentimentTime %>%
filter(Cat %in% c("Negative", "Neutral", "Positive")) %>%
ggplot(aes(x = Datetime, y = Percent, group = Cat, colour = Cat)) +
geom_line() +
scale_x_datetime(breaks = date_breaks("1 hours"), labels = date_format("%H:00")) +
labs(title="Peak time on day of event", colour = "Sentiment Category") +
xlab("By Hour") +
ylab("Percentage of messages")
Data source available via GitHub:
Since you have multiple lines and you want two labels on each line according to the maxima and minima, you could create two small dataframes to pass to geom_text calls.
First we ensure the necessary packages and the data are loaded:
library(lubridate)
library(ggplot2)
library(scales)
library(dplyr)
url <- paste0("https://raw.githubusercontent.com/jcool12/",
"datasets/master/sentimentData-problem.csv")
sentimentdata <- read.csv(url, stringsAsFactors = FALSE)
sentimentdata$Datetime <- dmy_hm(sentimentdata$Datetime)
sentimentTime <- filter(sentimentdata, between(Hour, 11, 23))
Now we can create a max_table and min_table that hold the x and y co-ordinates and the labels for our maxima and minima:
max_table <- sentimentTime %>%
group_by(Cat) %>%
summarise(Datetime = Datetime[which.max(Percent)],
Percent = max(Percent) + 3,
label = paste(trunc(Percent, 3), "%"))
min_table <- sentimentTime %>%
group_by(Cat) %>%
summarise(Datetime = Datetime[which.min(Percent)],
Percent = min(Percent) - 3,
label = paste(trunc(Percent, 3), "%"))
Which allows us to create our plot without much trouble:
sentimentTime %>%
filter(Cat %in% c("Negative", "Neutral", "Positive")) %>%
ggplot(aes(x = Datetime, y = Percent, group = Cat, colour = Cat)) +
geom_line() +
geom_text(data = min_table, aes(label = label)) + # minimum labels
geom_text(data = max_table, aes(label = label)) + # maximum labels
scale_x_datetime(breaks = date_breaks("1 hours"),
labels = date_format("%H:00")) +
labs(title="Peak time on day of event", colour = "Sentiment Category") +
xlab("By Hour") +
ylab("Percentage of messages")

Summarizing data into percentages for side-by-side Bar Charts in R

Below is the code I am having trouble with and its output. The data set is linked at the bottom of the post.
What I am wanting to do is group the StateCodes together with each MSN (opposite of what is showing now in the output).
plotdata <- EnergyData %>%
filter(MSN %in% c("BMTCB", "GETCB", "HYTCB", "SOTCB", "WYTCB")) %>%
filter(Year %in% c("2009")) %>%
select(StateCode, MSN, Data) %>%
group_by(StateCode) %>%
mutate(pct = Data/sum(Data),
lbl = scales::percent(pct))
plotdata
This outputs to:
I thought that the group_by function would do that for me but I would like to know if I am missing a key chunk of code?
Once the above chunk runs correctly, I want to create side by side Bar charts by StateCode using the percentages of each of the 5 MSN's.
Here's the code I have so far.
ggplot(EnergyData,
aes(x = factor(StateCode,
levels = c("AZ", "CA", "NM", "TX")),
y = pct,
fill = factor(drv,
levels = c("BMTCB", "GETCB", "HYTCB", "SOTCB", "WYTCB"),
labels = c("BMTCB", "GETCB", "HYTCB", "SOTCB", "WYTCB")))) +
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(breaks = seq(0, 1, .2),
label = pct) +
geom_text(aes(label = lbl),
size = 3,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Set2") +
labs(y = "Percent",
fill = "MSN",
x = "State",
title = "Renewable Resources by State") +
theme_minimal()
As of now I believe this all has to do with how I create the percentages for the bar charts.
Any assistance would be great. Thank you!
Here's the data I used Energy Data http://www.mathmodels.org/Problems/2018/MCM-C/ProblemCData.xlsx
Here is a version using data.table for the initial filtering, and changes to the plot function that hopefully get you the result you are after:
library(readxl)
library(data.table)
library(ggplot2)
download.file("http://www.mathmodels.org/Problems/2018/MCM-C/ProblemCData.xlsx", "~/ex/ProblemCData.xlsx")
# by default, factor levels will be in alphabetical order, so we do not need to specify that
EnergyData <- data.table(read_xlsx("~/ex/ProblemCData.xlsx"), key="StateCode", stringsAsFactors = TRUE)
# filter by Year and MSN list
plotdata <- EnergyData[as.character(MSN) %chin% c("BMTCB", "GETCB", "HYTCB", "SOTCB", "WYTCB") & Year == 2009]
# calculate percentages of Data by StateCode
plotdata[, pct := Data/sum(Data), by = "StateCode"]
# plot using percent format and specified number of breaks
ggplot(plotdata,
aes(x = StateCode,
y = pct,
fill = MSN)) +
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), n.breaks = 6) +
scale_fill_brewer(palette = "Set2") +
labs(y = "Percent",
fill = "MSN",
x = "State",
title = "Renewable Resources by State") +
theme_minimal()
Created on 2020-03-20 by the reprex package (v0.3.0)

Write group labels in first bar or above instead of legend

I have a stacked barplot with the following data
df <- expand.grid(name = c("oak","birch","cedar"),
sample = c("one","two"),
type = c("sapling","adult","dead"))
df$count <- sample(5:200, size = nrow(df), replace = T)
I generate a barplot and try to add the group lables to it:
ggplot(df, aes(x = name, y = count, fill = type)) +
geom_bar(stat = "identity") +
coord_flip() +
theme(legend.position="none") +
geom_text(aes(label = type, position = "stack"))
It produces:
Two to three questions arise:
How can I make the labels appear in the top bar only?
How can I make the labels appear in the center of the bar section?
Optionally: How can I make the labels appear on top of the top bar being connected to their sections by arrows?
There is a link suggested above. That will help you. Here, I have another suggestion.
set.seed(123)
df <- expand.grid(name = c("oak","birch","cedar"),
sample = c("one","two"),
type = c("sapling","adult","dead"))
df$count <- sample(5:200, size = nrow(df), replace = T)
### Arrange a data frame (summing up sample one and two)
library(dplyr)
ana <- df %>%
group_by(name, type) %>%
summarise(total = sum(count))
# Draw a figure once
bob <- ggplot(ana, aes(x = name, y = total, fill = type)) +
geom_bar(stat = "identity", position = "stack")
# Get a data frame for ggplot
cathy <- ggplot_build(bob)$data[[1]]
# calculate text position & add text labels
cathy$y_pos <- (cathy$ymin + cathy$ymax) / 2
cathy$label <- rep(c("sampling", "adult", "dead"), times = 3)
# Subset the data for labeling for the top bar
dan <- cathy[c(7:9), ]
# Draw a figure again
bob +
annotate(x = dan$x, y = dan$y_pos, label = dan$label, geom="text", size=3) +
coord_flip()

Resources