geom_text change factor order of facet plot - r

I am trying annotate individual plots of a facet plot. I have set the order to 2008, 1999 using factor levels.
But when I add the geom_text to the ggplot, the order of the plots change. See examples below. What am I doing wrong? How can I solve this?
library(tidyverse)
df <- mpg %>% mutate(year = factor(year, levels = c(2008,1999)))
anno <- data.frame(xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1","text2"),
year = c("2008","1999"))
df %>% ggplot(aes(class, displ)) +
geom_col(aes(fill=drv)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab))

Convert the year column in your annotation dataframe also to a factor with the same levels and order as in your main df:
library(ggplot2)
df <- mpg
df$year = factor(df$year, levels = c(2008, 1999))
anno <- data.frame(
xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1", "text2"),
year = factor(c("2008", "1999"), levels = c(2008, 1999))
)
ggplot(df, aes(class, displ)) +
geom_col(aes(fill = drv)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Related

Add extra tick where vertical line crosses x-axis

I have below ggplot
library(ggplot2)
library(quantmod)
dat = data.frame(val = 1:20, qtr = as.yearqtr(seq(as.Date('2000-01-01'), length.out = 20, by = '3 months')))
ggplot(data = dat) +
geom_line(aes(x = qtr, y = val)) +
geom_vline(xintercept = as.yearqtr(as.Date('2003-04-01')))
I want to add one extra tick in x-axis where my vertical line crosses the x-axis. The corresponding tick label will also be manual i.e. 'Sep : 2003-04-01'. Is there any function available in ggplot to achieve this?
Thanks for your help.
You could define your bespoke date breaks as a separate vector.
And the labels as a matching text vector including the bespoke text for the x intercept tick.
Adjustments to the axis text for legibility.
library(ggplot2)
library(quantmod)
intercept_date <- as.yearqtr(as.Date('2003-04-01'))
x_breaks <- c(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months')), intercept_date)
x_labels <- c(as.character(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months'))), "Sep : 2003-04-01")
ggplot(data = dat) +
geom_line(aes(x = qtr, y = val)) +
geom_vline(xintercept = intercept_date)+
scale_x_yearqtr(breaks = x_breaks,
labels = x_labels)+
theme(axis.text.x = element_text(angle = 30, hjust = 1))
Created on 2022-10-16 with reprex v2.0.2
Option 2
In response to comment: intercept breaks and labels defined within scale_x_yearqtr:
ggplot(data = dat) +
geom_line(aes(x = qtr, y = val)) +
geom_vline(xintercept = as.yearqtr(as.Date('2003-04-01')))+
scale_x_yearqtr(breaks = c(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months')), as.yearqtr(as.Date('2003-04-01'))),
labels = c(as.character(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months'))), "Sep : 2003-04-01"))+
theme(axis.text.x = element_text(angle = 30, hjust = 1))

Combine geom_bar, geom_segment and facet_grid on time series visualization

I'm trying to do a nice graph with ggplot but I'm still faces a barrier.
When I use facet_grid at the end of my code, somethings wrong happen. A helping hand would be great!
This is my code :
# Package
library(ggplot2)
# Function
firstup <- function(x) {
x <- tolower(x)
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
Create data
data_F = data.frame(DATE = seq(as.Date('2020-01-21'), as.Date('2020-03-06'), by = 'days'),
NB = sample(1:20, 46, replace=TRUE))
Manage the data
data_F = data.frame(DATE = data_F$DATE,
year = as.numeric(format(data_F$DATE, format = "%Y")),
month = as.factor(format(data_F$DATE, format = "%B")),
days = as.numeric(format(data_F$DATE, format = "%d")),
NB = data_F$NB)
Relevel month with the right order
data_F$month = as.factor(firstup(data_F$month))
data_F$month = factor(data_F$month,unique(data_F$month))
month = factor(data_F$month,unique(data_F$month))
month = unique(month)
month = as.factor(month)
The main plot
plot1 = ggplot(data_F,aes(x=DATE,y=NB)) +
geom_bar(stat = "identity", colour="black", fill = "dodgerblue3", width=0.5) +
scale_x_date(breaks = data_F$DATE, labels = data_F$days, minor_breaks = NULL,
expand = expansion(add = 0.3))+
scale_y_continuous(limits = c(0, 65), breaks = seq(0, 65, by = 5), minor_breaks = seq(0, 65, by = 1))
plot1
Creating the segment data
data.segm = data.frame(x=data_F$DATE,y=Inf, xend = data_F$DATE, yend=-Inf,
month=data_F$month)
Show two days, for example at the row 6 and 35
i = 6
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
i = 35
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
plot1
And know my problem with facet_grid
plot2 = plot1 + facet_grid(.~month, space="free_x", scales="free_x", switch="x")
plot2
Jonas

Add legend to ggplot line graph based on linetype

I am trying to add a legend to a ggplot graph that uses solid and dashed lines.
require(ggplot2)
DATE <- c("2020-10-14", "2020-10-15", "2020-10-16", "2020-10-17", "2020-10-18")
TMAX <- c(47, 45, 43, 40, 4)
TMIN <- c(35, 34, 28, 26, 29)
df <- data.frame(DATE, TMAX, TMIN)
ggplot(data = df, aes(x = DATE, y = TMIN, group = 1)) +
geom_path(linetype = 1, size = 1.5) +
labs(x = "Date",
y = "Temp (F)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_path(data = df, linetype = 2, size = 1.5, aes(x = DATE, y=TMAX))
A similar question states that I should include linetype within aes, although this does not yield a legend. For example:
ggplot(data = df, aes(x = DATE, y = TMIN, group = 1, linetype = 1)) +
geom_path(size = 1.5) +
labs(x = "Date",
y = "Temp (F)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_path(data = df, size = 1.5, aes(x = DATE, y=TMAX, linetype=2))
Here is the error:
Error: A continuous variable can not be mapped to linetype
How can I add a legend to the figure showing both my solid line and dashed line?
The alternative to reshaping if you only have two lines is to put the linetype as a character assignment inside aes
ggplot(data = df, aes(x = DATE, y = TMIN, group = 1)) +
geom_path(aes(linetype = "TMIN"), size = 1.5) +
geom_path(aes(y = TMAX, linetype = "TMAX"), size = 1.5) +
labs(x = "Date", y = "Temp (F)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Try this reshaping data to long, and then using a variable in the linetype statement. In that way you can obtain the legend. Here the code:
require(ggplot2)
require(tidyr)
require(dplyr)
#Data
DATE <- c("2020-10-14", "2020-10-15", "2020-10-16", "2020-10-17", "2020-10-18")
TMAX <- c(47, 45, 43, 40, 4)
TMIN <- c(35, 34, 28, 26, 29)
df <- data.frame(DATE, TMAX, TMIN)
#Plot
df %>% pivot_longer(-DATE) %>%
ggplot(aes(x = DATE, y = value, group = name,linetype=name)) +
geom_path(size = 1.5) +
labs(x = "Date",
y = "Temp (F)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_linetype_manual(values=c(2,1))+labs(linetype='Var')+
guides(linetype = guide_legend(override.aes = list(size = 0.5)))
Output:

Legend mixing shapes with geom_points R

My legend is not showing correctly when I am doing my graph in R using ggplot2. One column of my dataset is represented by a geom_bar and the two others are represented by geom_points (one shape each). The circle and the diamond shape are showing for both 2000 and 2008, the circle being in the diamong for both year. However, the graph works totally fine...
Here is a screenshot:
I have created a simplified version of my dataset:
order_var <- c(1, 4, 3, 5, 6, 2)
alt_name <- c('Agriculture', 'Mining', 'Food products',' Manufacture', 'Chemicals', 'Machinery')
y2000 <- c(20, 40, 50, 80, 30, 70)
y2008 <- c(40, 50, 80, 70, 30, 60)
y2018 <- c(10, 30, 80, 50, 40, 50)
datatest <- data.frame("order_var" = order_var, "alt_name" = alt_name, "y2000" = y2000, "y2008" = y2008, "y2018" = y2018)
And the code for my graph:
datatest %>% ggplot(aes(x = reorder(alt_name, order_var))) +
geom_bar(stat = "identity", aes(y = `y2018`, fill = "2018"), width = 0.7, col = "black") +
geom_point(aes(y = `y2008`, col = "2008"), shape = 23, fill = "white", size = 5) +
geom_point(aes(y = `y2000`, col = "2000"), shape = 19, fill = "black", size = 3) +
xlab("Industry") +
ylab("Percentage") +
theme(legend.position = "top") +
scale_fill_manual(name = '', values = c("2018" = "#4F81BD"), breaks = c("2018")) +
scale_colour_manual(name = '', values = c("2008" = "black", "2000" = "orange"))
If you know how to correct this problem, I would be very grateful!!
Thank you :)
That's a very tricky plot you are trying to make because you are in essence mapping the same aesthetics to different geoms.
The first thing you should do is to reshape your data to the long format. I also divided your dataset between 2018 (the bar), and 2000, 2008 (the points).
df2 <- datatest %>%
pivot_longer(cols = -c(order_var, alt_name)) %>%
mutate(bar = if_else(name == "y2018", 1, 0))
data_bar <- df2 %>% filter(bar == 1)
data_point <- df2 %>% filter(bar != 1)
I also find it useful to add a dodge to your points to avoid overlapping one inside the other as in the case of chemicals with position = position_dodge(width = 0.6).
The first solution gives what you want, but it is a bit of a hack, and I wouldn't recommend doing it as a general strategy. You basically add an aesthetics that you are not going to use to the bars (in this case, linetype), and then override it, as suggested in this answer.
ggplot(data_bar, aes(x = reorder(alt_name, order_var))) +
geom_bar(aes(y = value, linetype = name), fill = "#4F81BD", stat = 'identity', color = 'black') +
geom_point(data = data_point, position=position_dodge(width=0.6), aes(y = value, color = name, shape = name, size = name, fill = name)) +
scale_colour_manual(values = c("orange", "black"), labels = c("2000", "2008")) +
scale_fill_manual(values = c("orange", "white"), labels = c("2000", "2008")) +
scale_shape_manual(values = c(19, 23), labels = c("2000", "2008")) +
scale_size_manual(values = c(3, 5), labels = c("2000", "2008")) +
scale_linetype_manual(values = 1, guide = guide_legend(override.aes = list(fill = c("#4F81BD"))), labels = c("2018")) +
theme(legend.position = "top", legend.title = element_blank()) +
labs(x = "Industry", y = "Percentage")
Another solution, more general, is to avoid using the fill aesthetics for the geom_point and changing the shape to a solid one instead:
ggplot(data_bar, aes(x = reorder(alt_name, order_var))) +
geom_bar(aes(y = value, fill = name), stat = 'identity', color = "black") +
geom_point(data = data_point, position=position_dodge(width=0.6), aes(y = value, color = name, shape = name, size = name)) +
scale_fill_manual(values = c("#4F81BD"), labels = c("2018")) +
scale_colour_manual(values = c("orange", "white"), labels = c("2000", "2008")) +
scale_shape_manual(values = c(19, 18), labels = c("2000", "2008")) +
scale_size_manual(values = c(4, 6), labels = c("2000", "2008")) +
theme(legend.position = "top", legend.title = element_blank()) +
labs(x = "Industry", y = "Percentage")

ggplot2: x axis breaks doesn't work with 7 days breaks

I'm trying to figure it out why this function for x axis breaks works perfectly for 2 to 6 days breaks, but gives me an error when I change to 7 days (Error: breaks and labels must have the same length). Thank you
Data Frame
library(tidyverse)
df <- data.frame(date = seq(as.Date("2019-01-01"), as.Date("2019-12-31"), by = "day"))
df$counts <-sample(seq(from = 20, to = 50, by = 5), size = 365, replace = TRUE)
df<- df %>%
mutate(date = as.Date(date),
counts = as.numeric(counts))
Code
breaks_daily = seq(from = min(df$date), to = max(df$date), by = "1 day")
# Then make the 7 days interval labels
labels_7_days = format(seq(from = min(df$date), to = max(df$date), by = "7 days"), "%b-%d")
labels_final = c(sapply(labels_7_days, function(x) {
c(x, rep("", 6))
}))
#
if ((length(breaks_daily) %% 7) == 0) {
labels_final <- labels_final
} else {
labels_final<- labels_final[-length(labels_final)]
}
myplot <- ggplot(df,
aes(y = counts, x = date)) +
geom_bar(stat = "identity", position = "dodge", fill = "#99CCFF", width=1) +
labs(x="Date", y="Quantity of Fruits") +
scale_x_date(labels = labels_final, breaks = breaks_daily, expand=c(0,0)) +
scale_y_continuous(limits = c(0, 70),
breaks = seq(0, 70, 10),
expand = c(0, 0)) +
ggtitle(paste0("Figure 2: Fruits Example" )) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 35),
axis.text.y = element_text(size = 35),
axis.text = element_text(size = 35),
axis.title = element_text(size = 40, face="bold"),
axis.title.y = element_text(vjust = -2),
axis.title.x = element_text(vjust = -1),
axis.ticks.length = unit(.5, "cm"))
myplot
Not a direct answer to your question, but why don't you just use the inbuilt functionality? Sometimes it is not necessary to reinvent the wheel... ?
library(tidyverse)
df <- data.frame(date = seq(as.Date("2019-01-01"), as.Date("2019-12-31"), by = "day"))
df$counts <-sample(seq(from = 20, to = 50, by = 5), size = 365, replace = TRUE)
ggplot(df, aes(y = counts, x = date)) +
geom_col() +
scale_x_date(date_breaks = "1 week",date_labels = "%b %d")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
update
Here how to keep the ticks in-between (although I don't think your visualisation gains a lot with it)
You need to bring the two vectors to the same length. When using your label creator, you are creating six empty spaces for each week until the maximum (and including it!), then of course making "too many labels". Just subset the vector by using only the length of your breaks.
P.S. geom_col is identical to geom_bar(stat = "identity") , and in your example you don't need position = position_dodge, as you have no group defined. This argument only makes sense when you are dodging by a group.
library(tidyverse)
df <- data.frame(date = seq(as.Date("2019-01-01"), as.Date("2019-12-31"), by = "day"))
df$counts <-sample(seq(from = 20, to = 50, by = 5), size = 365, replace = TRUE)
breaks_daily <- seq(from = min(df$date), to = max(df$date), by = "1 day")
labels_7_days <- format(seq(from = min(df$date), to = max(df$date), by = "7 days"), "%b-%d")
labels_final <- c(sapply(labels_7_days, function(x) {
c(x, rep("", 6))
})) [1:length(breaks_daily)] #that is the crucial bit
ggplot(df, aes(y = counts, x = date)) +
geom_col(fill = "#99CCFF", width=1) +
labs(x="Date", y="Quantity of Fruits") +
scale_x_date(labels = labels_final, breaks = breaks_daily, expand=c(0,0)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Created on 2020-05-30 by the reprex package (v0.3.0)

Resources