geom_text in barplot to show frequency over bars using R - r

I have this graph that I want to show the count over the bar, however my code shows the number 1 inside the bars..
What I have:
What I am trying to make:
# Library
library(ggplot2)
# 1. Read data (comma separated)
df = read.csv2(text = "Id;Date
1;2021-06-09
2;2021-06-08
3;2021-06-08
4;2021-06-09
5;2021-06-09")
# 2. Print table
df_date <- df[, "Date"]
df_date <- as.data.frame(table(df_date))
colnames(df_date)[which(names(df_date) == "df_date")] <- "Date" # Set column name to Date
df_date
# 3. Plot bar chart
ggplot(df_date, aes(x = Date, y = Freq)) +
geom_bar(stat = "identity") +
theme_classic() +
ggtitle("Date") +
xlab("Date") +
ylab("Frequency") +
geom_text(stat= "count", aes(label = ..count.., y= ..prop..), vjust = -1)

Since you have already calculated the frequency use geom_col.
library(ggplot2)
ggplot(df_date, aes(x = Date, y = Freq)) +
geom_col() +
theme_classic() +
ggtitle("Date") +
xlab("Date") +
ylab("Frequency") +
geom_text(aes(label = Freq), vjust = -1)
If you use df you can use geom_bar as -
ggplot(df, aes(x = Date)) +
geom_bar() +
theme_classic() +
ggtitle("Date") +
xlab("Date") +
ylab("Frequency") +
geom_text(stat= "count",aes(label = ..count..), vjust = -1)

Related

Y axis values different from actual column in dataset in R

I am currently working with a dataset of "world bank islands". In that, I am trying to plot the population Vs country graph for each year. Below is the code that I have done.
library(ggplot2)
options(scipen = 999)
bank <- read.csv("C:/Users/True Gamer/OneDrive/Desktop/world_bank_international_arrivals_islands.csv")
bank[bank == "" | bank == "."] <- NA
bank$country <- as.numeric(bank$country)
bank$year <- as.numeric(bank$year)
bank$areakm2 <- as.numeric(bank$areakm2)
bank$pop <- as.numeric(bank$pop)
bank$gdpnom <- as.numeric(bank$gdpnom)
bank$flights...WB <- as.numeric(bank$flights...WB)
bank$hotels <- as.numeric(bank$hotels)
bank$hotrooms <- as.numeric(bank$hotrooms)
bank$receipt <- as.numeric(bank$receipt)
bank$ovnarriv <- as.numeric(bank$ovnarriv)
bank$dayvisit <- as.numeric(bank$dayvisit)
bank$arram <- as.numeric(bank$arram)
bank$arreur <- as.numeric(bank$arreur)
bank$arraus <- as.numeric(bank$arraus)
str(bank)
plot1 <- ggplot(bank, aes(x=country,y=pop)) + geom_bar(stat = "identity",aes(fill=year)) + ggtitle("Population of each country yearwise") + xlab("Countries") + ylab("Population")
plot1
However, when I do this, the y values shown on the graph are different from the actual y values. This is the link to the dataset
The problem is that you are stacking the bars (this is default behaviour). Also, geom_bar(stat = "identity") is just a long way of writing geom_col. One further point to note is that since all your columns are numeric, the single line:
bank <- as.data.frame(lapply(bank, as.numeric))
replaces all your individual numeric conversions.
The plot you are trying to create would be something like this:
ggplot(bank, aes(x = country, y = pop)) +
geom_col(aes(fill = factor(year)), position = "dodge") +
ggtitle("Population of each country yearwise") +
xlab("Countries") +
ylab("Population") +
labs(fill = "Year") +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = 1:27)
However, it would probably be best to present your data in a different way. Perhaps, if you are comparing population growth, something like this would be better:
ggplot(bank, aes(x = year, y = pop)) +
geom_line(aes(color = factor(country)), position = "dodge") +
ggtitle("Population of each country yearwise") +
xlab("Year") +
ylab("Population") +
facet_wrap(.~country, scales = "free_y", nrow = 6) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(0, 5, 10)) +
theme_minimal() +
theme(legend.position = "none")
Or with bars:
ggplot(bank, aes(x = year, y = pop)) +
geom_col(aes(fill = factor(country)), position = "dodge") +
ggtitle("Population of each country yearwise") +
xlab("Year") +
ylab("Population") +
facet_wrap(.~country, scales = "free_y", nrow = 6) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(0, 5, 10)) +
theme_minimal() +
theme(legend.position = "none")

Change x-axis ticks

Good afternoon everyone,
I am having difficulties plotting a time series with ggplot. I have a time series with daily data from 2020-01-30 to 2020-10-22. When I plot the data with the following code, I get an x-axis, which is not at all readable:
plot_cases <- ggplot(NULL, aes(x= Date, y= Count)) +
xlab("Time") + ylab("Number of Cases") +
scale_y_continuous(limits=c(0, 2000)) +
scale_x_discrete() +
geom_line(data = P_cases_reduced_aggregated_female, aes(colour = P_cases_reduced_aggregated_female$Sex, group = 1)) +
geom_line(data = P_cases_reduced_aggregated_male, aes(colour = P_cases_reduced_aggregated_male$Sex, group = 1)) +
scale_color_manual(values=c("#FF00FF", "#000FFF"))
Therefore, I would like to only have the abbreviation of the months represented such as "Apr", "Jul", "Oct". However, as I use the following code, I don't get any x-axis graduation anymore...
plot_cases <- ggplot(NULL, aes(x= Date, y= Count)) +
xlab("Time") + ylab("Number of Cases") +
scale_y_continuous(limits=c(0, 2000)) +
scale_x_discrete(breaks = seq(as.Date("2020-01-30"), as.Date("2020-10-22"), by = "month")) +
geom_line(data = P_cases_reduced_aggregated_female, aes(colour = P_cases_reduced_aggregated_female$Sex, group = 1)) +
geom_line(data = P_cases_reduced_aggregated_male, aes(colour = P_cases_reduced_aggregated_male$Sex, group = 1)) +
scale_color_manual(values=c("#FF00FF", "#000FFF"))
As one of you a solution to this? I have checked the previous questions about scale_x_discrete
but I still don't get how to do the relevant changes.
Thank you so much in advance.
Following the great advice of #AllanCameron and #r2evans first transform to date in x-axis to date and then use scale_x_date(). No output showed due to lack of data:
library(ggplot2)
#Code
plot_cases <- ggplot(NULL, aes(x= Date, y= Count)) +
xlab("Time") + ylab("Number of Cases") +
scale_y_continuous(limits=c(0, 2000)) +
scale_x_date(date_labels="%B-%d",breaks = '7 days') +
geom_line(data = P_cases_reduced_aggregated_female,
aes(colour = P_cases_reduced_aggregated_female$Sex, group = 1)) +
geom_line(data = P_cases_reduced_aggregated_male,
aes(colour = P_cases_reduced_aggregated_male$Sex, group = 1)) +
scale_color_manual(values=c("#FF00FF", "#000FFF"))+
theme(axis.text.x = element_text(angle=90))

How to show value label in stacked and grouped bar chart using ggplot

My question is about how to show data (or value) labels in a stacked and grouped bar chart using ggplot. The chart is in the form of what has been resolved here stacked bars within grouped bar chart .
The code for producing the chart can be found in the first answer of the question in the above link. An example data set is also given in the question in the link. To show the value labels, I tried to extend that code with
+ geom_text(aes(label=value), position=position_dodge(width=0.9), vjust=-0.25)
but this does not work for me. I greatly appreciate any help on this.
You need to move data and aesthetics from geom_bar() up to ggplot() so that geom_text() can use it.
ggplot(data=test, aes(y = value, x = cat, fill = cond)) +
geom_bar(stat = "identity", position = "stack") +
theme_bw() +
facet_grid( ~ year) +
geom_text(aes(label = value), position = "stack")
Then you can play around with labels, e.g. omitting the zeros:
ggplot(data=test, aes(y = value, x = cat, fill = cond)) +
geom_bar(stat = "identity", position = "stack") +
theme_bw() +
facet_grid( ~ year) +
geom_text(aes(label = ifelse(value != 0, value, "")), position = "stack")
... and adjusting the position by vjust:
ggplot(data=test, aes(y = value, x = cat, fill = cond)) +
geom_bar(stat = "identity", position = "stack") +
theme_bw() +
facet_grid( ~ year) +
geom_text(aes(label = ifelse(value != 0, value, "")), position = "stack", vjust = -0.3)
Try this. Probably the trick is to use position_stack in geom_text.
library(tidyverse)
test <- expand.grid('cat' = LETTERS[1:5], 'cond'= c(F,T), 'year' = 2001:2005)
test$value <- floor((rnorm(nrow(test)))*100)
test$value[test$value < 0] <- 0
ggplot(test, aes(y = value, x = cat, fill = cond)) +
geom_bar(stat="identity", position='stack') +
geom_text(aes(label = ifelse(value > 0, value, "")), position = position_stack(), vjust=-0.25) +
theme_bw() +
facet_grid( ~ year)
Created on 2020-06-05 by the reprex package (v0.3.0)

Categorical ordering (unexpected behavior - order is reversed)

Goal is to achieve ordered categories on y axis.
y1 -> y2 -> y3.
Here's the example:
require(data.table)
require(ggplot2)
dt <- data.table(var = rep("x1", 3),
categ = paste0("y", c(1,2,3)),
value = c(-2,0.5,-1))
ggplot(dt, aes(x = categ, y = value)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
It seems to be reversed. Here's the one way to achieve desired ordering in ggplot2:
dt$categ <- factor(dt$categ, levels = rev(levels(factor(dt$categ))))
ggplot(dt, aes(x = categ, y = value)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
Great, now ordering seems to be right. But with some modifications:
ggplot(dt, aes(x = categ, y = value)) +
geom_bar(data = dt[value < 0], stat = "identity", fill = "darkred") +
geom_bar(data = dt[value >= 0], stat = "identity", fill = "darkblue") +
coord_flip() +
theme_bw()
For some reason factor ordering is ignored here. Any clues why?
Solution could be:
# dt is original data without factors
ggplot(dt, aes(categ, value, fill = value >= 0)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("darkred", "darkblue")) +
# since we want y1 on top and y3 on bottom we have to apply rev
scale_x_discrete(limits = rev(dt$categ)) +
coord_flip() +
theme_bw()
Trick is to pass dt$categ as limits argument to scale_x_discrete(). In your first plot order is not reversed, this is how it should be as ggplot2 starts putting values from the origin of the axis (0).
I also removed two geom_bar lines that were used in a not-ggplot way.

ggplot2 pie chart bad position of labels

Sample data
data <- data.frame(Country = c("Mexico","USA","Canada","Chile"), Per = c(15.5,75.3,5.2,4.0))
I tried set position of labels.
ggplot(data =data) +
geom_bar(aes(x = "", y = Per, fill = Country), stat = "identity", width = 1) +
coord_polar("y", start = 0) +
theme_void()+
geom_text(aes(x = 1.2, y = cumsum(Per), label = Per))
But pie chart actually look like:
You have to sort the data before calculating the cumulative sum. Then, you can optimize label position, e.g. by subtracting half of Per:
library(tidyverse)
data %>%
arrange(-Per) %>%
mutate(Per_cumsum=cumsum(Per)) %>%
ggplot(aes(x=1, y=Per, fill=Country)) +
geom_col() +
geom_text(aes(x=1,y = Per_cumsum-Per/2, label=Per)) +
coord_polar("y", start=0) +
theme_void()
PS: geom_col uses stat_identity by default: it leaves the data as is.
Or simply use position_stack
data %>%
ggplot(aes(x=1, y=Per, fill=Country)) +
geom_col() +
geom_text(aes(label = Per), position = position_stack(vjust = 0.5))+
coord_polar(theta = "y") +
theme_void()
From the help:
# To place text in the middle of each bar in a stacked barplot, you
# need to set the vjust parameter of position_stack()

Resources