I have two dataframes:
df1 <- data.frame(name = rep(LETTERS[1:5], each = 5), age = 1:25)
df2 <- data.frame(name = rep(LETTERS[1:5], each = 5), age = c(rep(1,5), rep(5,5), 1,12,3,2,1,1:5,6:10))
And I want to produce horizontal barplots like these:
df1 %>%
mutate(name = fct_reorder(name, age)) %>%
ggplot( aes(x = name, y = age)) +
geom_bar(stat = "identity", fill = "#74D055FF", alpha = .6, width = .6) +
coord_flip() +
theme_bw()
df2 %>%
mutate(name = fct_reorder(name, age)) %>%
ggplot( aes(x = name, y = age)) +
geom_bar(stat = "identity", fill = "#481568FF", alpha = .6, width = .6) +
coord_flip() +
theme_bw()
I would like to show them in the same figure: there should be a vertical line in correspondence of age = 0, and then the violet bars should go on one side and the green ones on the other (of course it will then be sorted only based on age of either df1 or df2, as the descending order of age is not the same in both dataframes). I don't know how this type of plot it's called and how to approach this.
One option would be to bind your datasets into one dataframe and add an identifier column for which I use dplyr::bind_rows. The identifier could then be mapped on the fill aes and the colors set via scale_fill_manual. Also I aggregated the data using count instead of relying on stacking:
library(dplyr)
library(ggplot2)
dplyr::bind_rows(df1, df2, .id = "id") %>%
count(id, name, wt = age, name = "age") |>
mutate(
name = reorder(name, (id == 1) * age, sum),
age = if_else(id == 2, -age, age)
) |>
ggplot(aes(y = name, x = age, fill = id, group = 1)) +
geom_col(alpha = .6, width = .6) +
geom_vline(xintercept = 0) +
scale_fill_manual(values = c("#74D055FF", "#481568FF")) +
theme_bw()
Related
I obtained the two separate mean plots. Is there any simple way to combine them on a single plane with different line colours? Tricky part is each has a different scale, so I want to put one (lshare) scale on left hand side of y-axis and the other (va) on right side of y-axis.
p1 <- ggplot(df, aes(x = year, y = lshare)) + stat_summary(geom = "line", fun.y = mean)
p2 <- ggplot(df, aes(x = year, y = va)) + stat_summary(geom = "line", fun.y = mean)
grid.arrange(p1, p2, ncol = 2)
Update2:
Combining all:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1) +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
Update: How to add secodn y axis as requested:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
ggplot(aes(x=year))+
geom_line(aes(y=mpg*10), size=1, color="red")+
geom_line(aes(y=disp), size=1, color="blue") +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
First answer:
Here is a reproducible example with the mtcars dataset:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1)
As #jdobres commented, you can use facet_wrap(), like in the following example. Simply introduce a grouping factor to your data.frame.
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare))
df$group <- factor(gl(2, length(year)))
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun.y = mean, size = 1) +
facet_wrap(~ group)
Addition
As per your edit, which I saw after I posted this answer, facet_wrap() also works when you want to have two different y-axes. You just have to play a bit with the function that is specified within sec_axis().
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
noise <- abs(rnorm(length(lshare), mean = 150, sd = 100))
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare + noise))
df$group <- factor(gl(2, length(year)))
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun = mean, size = 1) +
facet_wrap(~ group) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right))
Addition 2
If you would like to have the two lines in the same pane, you can use something along the following lines of code. Note, I use the same data as in the first addition (see above).
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(colour = group)) +
stat_summary(data = subset(df, group == 1),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
stat_summary(data = subset(df, group == 2),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right)) +
scale_colour_manual(name = 'My_groups',
values = c('1' = "blue4", '2' = "darkorange"),
labels = c('Group 1', 'Group 2'))
I have created a graph to demonstrate the development of four variables. Is there any way to add a label that tells the percentage change(last observation/first observation -1) to the end of the plots to highlight the relative change during the observed period?
Data&plots=
library(tidyverse)
Data <- data.frame(
Wind = c(236,325,470,615,647,821),
Hard_coal= c(591,811,667,681,532,344),
Gas= c(883,841,472,731,678,680),
Bio = c(883,841,811,731,678,680),
year= c("2015","2016","2017","2018","2019","2020"))
Data %>%
pivot_longer(-year) %>%
ggplot(aes(x = year, y = value, color = name, group = name, linetype = name)) +
geom_line(size = 1.5)
Using the ggrepel option offered in Plot labels at ends of lines this could be achieved like so where I make use of dplyrs first and last to compute the percentage change.
Note: I still vote to close this question as a duplicate.
library(tidyr)
library(dplyr)
library(ggplot2)
data_long <- Data %>%
pivot_longer(-year) %>%
mutate(year = as.numeric(year)) %>%
group_by(name) %>%
mutate(change = last(value) / first(value) - 1)
ggplot(data_long, aes(x = year, y = value, color = name, group = name)) +
geom_line(size = 1) +
ggrepel::geom_text_repel(data = filter(data_long, year == max(year)),
aes(label = scales::percent(change)),
direction = "y", nudge_x = .25,
hjust = 0, show.legend = FALSE) +
scale_x_continuous(limits = c(NA, 2020.5)) +
coord_cartesian(clip = "off")
I have a data frame ("Date", "A", "B"). I'm trying to use boxplot (by month) to analysis the data "A" for the row filtered by "B" and also for all the "A". I can only create two separate plots to do the boxplot for specific rows and for whole rows of data.
I tried two have 2 geom_boxplot under one ggplot(), but two boxplot just overlap with each other. Here is the code I used. Does anyone know how I can combime those two boxplot into one, so two boxplots will share same x axis, and each month in x axis will have two boxes.
ggplot() +
geom_boxplot(data = df %>% filter(B == 1),
aes(x = Month, y = A, group=Month, fill = "Chamber_no fire"), outlier.shape = T) +
geom_boxplot(data = df, aes(x = Month, y = A, group=Month, fill="Chamber"), outlier.shape = T) +
theme_bw() +
theme(panel.grid.major = element_blank()) +
scale_x_continuous(breaks=seq(2,12,1), minor_breaks = F) +
geom_hline(yintercept = 0, linetype="dotted")
ggsave("sate_meas_O3_NOx_5km_nofire.png", width = 6, height = 4, units = "in")
One approach to achieve your desired result is to
Bind the filtered dataset and the total dataset by row and add an identifier id for each dataset which could easily be done via dplyr::bind_rows.
Make a boxplot where you map id on the fill aesthetic and group by both id and Month using interaction
Set the legend labels via scale_fill_discrete
As you provided no data I make use of a random example data set:
set.seed(42)
df <- data.frame(
Month = sample(2:12, 100, rep = TRUE),
A = rnorm(100),
B = sample(1:2, 100, rep = TRUE)
)
library(ggplot2)
library(dplyr)
d <- bind_rows(list(b1 = df %>% filter(B == 1),
all = df), .id = "id")
ggplot(data = d, mapping = aes(x = Month, y = A, group=interaction(Month, id), fill = id)) +
geom_boxplot(outlier.shape = T, position = "dodge") +
scale_fill_discrete(labels = c(b1 = "Chamber_no fire", all = "Chamber")) +
theme_bw() +
theme(panel.grid.major = element_blank()) +
scale_x_continuous(breaks=seq(2,12,1), minor_breaks = F) +
geom_hline(yintercept = 0, linetype="dotted")
Thanks in advance.
I'd like to plot a multi-bar chart on the primary y-axis while plotting multiple lines on the secondary y-axis with a different scale in R. Here is the test data.
test.data<-data.frame(TYPE=c("A","B","C","D"), D1=c(20,10,1,1.1),
D2=c(40,20,3,2), D3=c(9,30,8,3), D4=c(12,20,3,6), D5=c(30,10,6,3) )
I hope it can be displayed like this below.
You can use the following code
library(tidyverse)
test.data<-data.frame(TYPE=c("A","B","C","D"), D1=c(20,10,1,1.1),
D2=c(40,20,3,2), D3=c(9,30,8,3), D4=c(12,20,3,6), D5=c(30,10,6,3) )
df1 <- test.data %>%
pivot_longer(cols = -TYPE) %>%
subset(TYPE %in% c("A","B"))
df2 <- test.data %>%
pivot_longer(cols = -TYPE) %>%
subset(TYPE %in% c("C","D"))
ggplot() +
geom_col(data = df1, aes(x = name, y = value, fill = TYPE), position = position_dodge()) +
scale_fill_manual("Type", values = c("A" = "#56B4E9", "B" = "#E69F00"))+
geom_point(data = df2, aes(x = name, y = value*5, group = TYPE, col = TYPE)) +
geom_line(data = df2, aes(x = name, y = value*5, group = TYPE, col = TYPE)) +
scale_color_manual("Type", values = c("C" = "darkgrey", "D" = "black"))+
scale_y_continuous(name = "First Axis",
sec.axis = sec_axis(trans = ~.*1/5, name="Second Axis"))+
theme_bw()
I would like to draw a line (or making points) on top of my stacked bar_plots. As I have no real data points I can refer to (only the spereated values and not the sum of them) I don't know how I can add such line. The Code produce this plot:
I want to add this black line(my real data are not linear):
library(tidyverse)
##Create some fake data
data3 <- tibble(
year = 1991:2020,
One = c(31:60),
Two = c(21:50),
Three = c(11:40)
)
##Gather the variables to create a long dataset
new_data3 <- data3 %>%
gather(model, value, -year)
##plot the data
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack")
You can use stat_summary and sum for the summary function:
ggplot(new_data3, aes(year, value)) +
geom_col(aes(fill = model)) +
stat_summary(geom = "line", fun.y = sum, group = 1, size = 2)
Result:
You could get sum by year and plot it with new geom_line
library(dplyr)
library(ggplot2)
newdata4 <- new_data3 %>%
group_by(year) %>%
summarise(total = sum(value))
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack") +
geom_line(aes(year, total, fill = ""), data = newdata4, size = 2)