Change x-axis ticks - r

Good afternoon everyone,
I am having difficulties plotting a time series with ggplot. I have a time series with daily data from 2020-01-30 to 2020-10-22. When I plot the data with the following code, I get an x-axis, which is not at all readable:
plot_cases <- ggplot(NULL, aes(x= Date, y= Count)) +
xlab("Time") + ylab("Number of Cases") +
scale_y_continuous(limits=c(0, 2000)) +
scale_x_discrete() +
geom_line(data = P_cases_reduced_aggregated_female, aes(colour = P_cases_reduced_aggregated_female$Sex, group = 1)) +
geom_line(data = P_cases_reduced_aggregated_male, aes(colour = P_cases_reduced_aggregated_male$Sex, group = 1)) +
scale_color_manual(values=c("#FF00FF", "#000FFF"))
Therefore, I would like to only have the abbreviation of the months represented such as "Apr", "Jul", "Oct". However, as I use the following code, I don't get any x-axis graduation anymore...
plot_cases <- ggplot(NULL, aes(x= Date, y= Count)) +
xlab("Time") + ylab("Number of Cases") +
scale_y_continuous(limits=c(0, 2000)) +
scale_x_discrete(breaks = seq(as.Date("2020-01-30"), as.Date("2020-10-22"), by = "month")) +
geom_line(data = P_cases_reduced_aggregated_female, aes(colour = P_cases_reduced_aggregated_female$Sex, group = 1)) +
geom_line(data = P_cases_reduced_aggregated_male, aes(colour = P_cases_reduced_aggregated_male$Sex, group = 1)) +
scale_color_manual(values=c("#FF00FF", "#000FFF"))
As one of you a solution to this? I have checked the previous questions about scale_x_discrete
but I still don't get how to do the relevant changes.
Thank you so much in advance.

Following the great advice of #AllanCameron and #r2evans first transform to date in x-axis to date and then use scale_x_date(). No output showed due to lack of data:
library(ggplot2)
#Code
plot_cases <- ggplot(NULL, aes(x= Date, y= Count)) +
xlab("Time") + ylab("Number of Cases") +
scale_y_continuous(limits=c(0, 2000)) +
scale_x_date(date_labels="%B-%d",breaks = '7 days') +
geom_line(data = P_cases_reduced_aggregated_female,
aes(colour = P_cases_reduced_aggregated_female$Sex, group = 1)) +
geom_line(data = P_cases_reduced_aggregated_male,
aes(colour = P_cases_reduced_aggregated_male$Sex, group = 1)) +
scale_color_manual(values=c("#FF00FF", "#000FFF"))+
theme(axis.text.x = element_text(angle=90))

Related

Y axis values different from actual column in dataset in R

I am currently working with a dataset of "world bank islands". In that, I am trying to plot the population Vs country graph for each year. Below is the code that I have done.
library(ggplot2)
options(scipen = 999)
bank <- read.csv("C:/Users/True Gamer/OneDrive/Desktop/world_bank_international_arrivals_islands.csv")
bank[bank == "" | bank == "."] <- NA
bank$country <- as.numeric(bank$country)
bank$year <- as.numeric(bank$year)
bank$areakm2 <- as.numeric(bank$areakm2)
bank$pop <- as.numeric(bank$pop)
bank$gdpnom <- as.numeric(bank$gdpnom)
bank$flights...WB <- as.numeric(bank$flights...WB)
bank$hotels <- as.numeric(bank$hotels)
bank$hotrooms <- as.numeric(bank$hotrooms)
bank$receipt <- as.numeric(bank$receipt)
bank$ovnarriv <- as.numeric(bank$ovnarriv)
bank$dayvisit <- as.numeric(bank$dayvisit)
bank$arram <- as.numeric(bank$arram)
bank$arreur <- as.numeric(bank$arreur)
bank$arraus <- as.numeric(bank$arraus)
str(bank)
plot1 <- ggplot(bank, aes(x=country,y=pop)) + geom_bar(stat = "identity",aes(fill=year)) + ggtitle("Population of each country yearwise") + xlab("Countries") + ylab("Population")
plot1
However, when I do this, the y values shown on the graph are different from the actual y values. This is the link to the dataset
The problem is that you are stacking the bars (this is default behaviour). Also, geom_bar(stat = "identity") is just a long way of writing geom_col. One further point to note is that since all your columns are numeric, the single line:
bank <- as.data.frame(lapply(bank, as.numeric))
replaces all your individual numeric conversions.
The plot you are trying to create would be something like this:
ggplot(bank, aes(x = country, y = pop)) +
geom_col(aes(fill = factor(year)), position = "dodge") +
ggtitle("Population of each country yearwise") +
xlab("Countries") +
ylab("Population") +
labs(fill = "Year") +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = 1:27)
However, it would probably be best to present your data in a different way. Perhaps, if you are comparing population growth, something like this would be better:
ggplot(bank, aes(x = year, y = pop)) +
geom_line(aes(color = factor(country)), position = "dodge") +
ggtitle("Population of each country yearwise") +
xlab("Year") +
ylab("Population") +
facet_wrap(.~country, scales = "free_y", nrow = 6) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(0, 5, 10)) +
theme_minimal() +
theme(legend.position = "none")
Or with bars:
ggplot(bank, aes(x = year, y = pop)) +
geom_col(aes(fill = factor(country)), position = "dodge") +
ggtitle("Population of each country yearwise") +
xlab("Year") +
ylab("Population") +
facet_wrap(.~country, scales = "free_y", nrow = 6) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(0, 5, 10)) +
theme_minimal() +
theme(legend.position = "none")

geom_text in barplot to show frequency over bars using R

I have this graph that I want to show the count over the bar, however my code shows the number 1 inside the bars..
What I have:
What I am trying to make:
# Library
library(ggplot2)
# 1. Read data (comma separated)
df = read.csv2(text = "Id;Date
1;2021-06-09
2;2021-06-08
3;2021-06-08
4;2021-06-09
5;2021-06-09")
# 2. Print table
df_date <- df[, "Date"]
df_date <- as.data.frame(table(df_date))
colnames(df_date)[which(names(df_date) == "df_date")] <- "Date" # Set column name to Date
df_date
# 3. Plot bar chart
ggplot(df_date, aes(x = Date, y = Freq)) +
geom_bar(stat = "identity") +
theme_classic() +
ggtitle("Date") +
xlab("Date") +
ylab("Frequency") +
geom_text(stat= "count", aes(label = ..count.., y= ..prop..), vjust = -1)
Since you have already calculated the frequency use geom_col.
library(ggplot2)
ggplot(df_date, aes(x = Date, y = Freq)) +
geom_col() +
theme_classic() +
ggtitle("Date") +
xlab("Date") +
ylab("Frequency") +
geom_text(aes(label = Freq), vjust = -1)
If you use df you can use geom_bar as -
ggplot(df, aes(x = Date)) +
geom_bar() +
theme_classic() +
ggtitle("Date") +
xlab("Date") +
ylab("Frequency") +
geom_text(stat= "count",aes(label = ..count..), vjust = -1)

How can I add a legend to my ggplot? Im using geom_line and geom_ribbon

Good afternoon,
I use the following code to generate a plot:
ggplot() +
geom_line(data = disDataHeadItems, aes(x=disDataHeadItems$x,
y=disDataHeadItems$Freq)) +
geom_line(data = disDataLongTail, aes(x=disDataLongTail$x,
y=disDataLongTail$Freq)) +
xlab("Item id") +
ylab("# of occurrences") +
scale_x_continuous(breaks=seq(5, 75, 10), expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
geom_ribbon(aes(ymin=0, ymax=disDataHeadItems$Freq,
x=disDataHeadItems$x), fill="#CC6666") +
geom_ribbon(aes(ymin=0, ymax=disDataLongTail$Freq,
x=disDataLongTail$x), fill="#66CC99") +
geom_vline(xintercept=19, linetype="dotted")
This creates the following plot, which im very happy with :)
Now I want to add a legend which just indicated that the red part is the top-head items and the green part is the long tail of the data. When I look it up I only find answers that indicate how to alter the legend. But mine just does not show up. Any help is welcome :)
Putting the comments together and adding some random dataframe results in this:
library(ggplot2)
disDataHeadItems <- data.frame(
x = runif(100,1,100),
Freq = runif(100,5,10)
)
disDataLongTail <- data.frame(
x = runif(100,1,100),
Freq = runif(100,5,10)
)
ggplot() +
geom_line(data = disDataHeadItems, aes(x=x,
y=Freq)) +
geom_line(data = disDataLongTail, aes(x=x,
y=Freq)) +
xlab("Item id") +
ylab("# of occurrences") +
scale_x_continuous(breaks=seq(5, 75, 10), expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
geom_ribbon(aes(ymin=0, ymax=disDataHeadItems$Freq,
x=disDataHeadItems$x, fill="#CC6666")) +
geom_ribbon(aes(ymin=0, ymax=disDataLongTail$Freq,
x=disDataLongTail$x, fill="#66CC99")) +
geom_vline(xintercept=19, linetype="dotted")

Make overlapping histogram in with geom_histogram

I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!

ggplot2 and facet_grid : add highest value for each plot

I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!
library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)

Resources