I have the following data:
library(dplyr)
countries <- c('Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Republic of Cyprus')
year <- rep(2009:2022, length(countries))
country <- as.data.frame(rep(countries, length(2009:2022)))
country <- country[order(country$`rep(countries, length(2009:2022))`),]
df<- cbind.data.frame(country, year)
df$year <- as.numeric(df$year)
df <- df %>%
group_by(country) %>%
mutate(n_obs = 1:n())
df <- df %>% group_by(country) %>%
mutate(gdp = rnorm(n = 1, mean = 3000, sd = 300) + 20.64*n_obs,
inflation = rnorm(n = 1, mean = 5, sd = 3) + 1.23*n_obs)
I want to make line plots for gdp and inflation one by one like so:
ggplot(df, aes(x = year, y = inflation, color = country)) + geom_line()
ggplot(df, aes(x = year, y = gdp, color = country)) + geom_line()
However, in the real data, I have a lot of variables that I want to plot, and I was wondering how I could use lapply to achieve that. I tried the following code:
lapply(df[,c(4,5)], function(var)
ggplot(data = df, aes(x = year, y = var, color = country))
+ geom_line() + labs(x = "year", y = var))
This works, but I cannot get the y variable label on the plot. Any help would be appreciated.
Regards
You can use the following code:
lapply(names(df)[4:5], function(var)
ggplot(data = df, aes(x = year, y = .data[[var]], color = country))
+ geom_line() + ylab(var))
Output gdp:
Output inflation:
Related
I obtained the two separate mean plots. Is there any simple way to combine them on a single plane with different line colours? Tricky part is each has a different scale, so I want to put one (lshare) scale on left hand side of y-axis and the other (va) on right side of y-axis.
p1 <- ggplot(df, aes(x = year, y = lshare)) + stat_summary(geom = "line", fun.y = mean)
p2 <- ggplot(df, aes(x = year, y = va)) + stat_summary(geom = "line", fun.y = mean)
grid.arrange(p1, p2, ncol = 2)
Update2:
Combining all:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1) +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
Update: How to add secodn y axis as requested:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
ggplot(aes(x=year))+
geom_line(aes(y=mpg*10), size=1, color="red")+
geom_line(aes(y=disp), size=1, color="blue") +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
First answer:
Here is a reproducible example with the mtcars dataset:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1)
As #jdobres commented, you can use facet_wrap(), like in the following example. Simply introduce a grouping factor to your data.frame.
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare))
df$group <- factor(gl(2, length(year)))
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun.y = mean, size = 1) +
facet_wrap(~ group)
Addition
As per your edit, which I saw after I posted this answer, facet_wrap() also works when you want to have two different y-axes. You just have to play a bit with the function that is specified within sec_axis().
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
noise <- abs(rnorm(length(lshare), mean = 150, sd = 100))
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare + noise))
df$group <- factor(gl(2, length(year)))
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun = mean, size = 1) +
facet_wrap(~ group) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right))
Addition 2
If you would like to have the two lines in the same pane, you can use something along the following lines of code. Note, I use the same data as in the first addition (see above).
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(colour = group)) +
stat_summary(data = subset(df, group == 1),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
stat_summary(data = subset(df, group == 2),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right)) +
scale_colour_manual(name = 'My_groups',
values = c('1' = "blue4", '2' = "darkorange"),
labels = c('Group 1', 'Group 2'))
I have the following script I'm working on, I want to re order the bar graph in descending order by their values.
library(tidyverse)
library(lubridate)
library(ggplot2)
#df <- read_csv('dataframe.csv')
df %>%
mutate(date=mdy(date), year=year(date), year = year + (date >= mdy(paste0("10/01/", year))))%>%
group_by(year) %>%
summarize(avg = mean(flow)) -> df
y <- df$avg
x <- ymd(sprintf("%d-01-01",df$year))
d <- data.frame(x = x, y = y)
# interpolate values from zero to y and create corresponding number of x values
vals <- lapply(d$y, function(y) seq(0, y, by = 0.1))
y <- unlist(vals)
mid <- rep(d$x, lengths(vals))
d2 <- data.frame(x = mid - 100,
xend = mid + 100,
y = y,
yend = y)
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2)
Results
I want to reorder the bars in descending order by values
The dataset can be found through the following link
https://drive.google.com/file/d/11PVub9avzMFhUz02cHfceGh9DrlVQDbD/view?usp=sharing
the output I'm looking for is like this.
Kindly assist.
To arrange the data you need to adjust the factor levels. You could arrange the data based on avg column and change year to factor.
library(dplyr)
library(ggplot2)
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg) + geom_col(aes(fill = 'red')) + guides(fill=FALSE)
Or :
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg, fill = avg) + geom_col()
Try this:
library(scales)
#Custom Transform function
dttrans <- function(a, b, breaks = b$breaks, format = b$format) {
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inv, breaks, format = format)
}
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2) +
scale_x_continuous(trans = dttrans("reverse", "date"))
Credits: Mikko Marttila
I would like to draw a line (or making points) on top of my stacked bar_plots. As I have no real data points I can refer to (only the spereated values and not the sum of them) I don't know how I can add such line. The Code produce this plot:
I want to add this black line(my real data are not linear):
library(tidyverse)
##Create some fake data
data3 <- tibble(
year = 1991:2020,
One = c(31:60),
Two = c(21:50),
Three = c(11:40)
)
##Gather the variables to create a long dataset
new_data3 <- data3 %>%
gather(model, value, -year)
##plot the data
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack")
You can use stat_summary and sum for the summary function:
ggplot(new_data3, aes(year, value)) +
geom_col(aes(fill = model)) +
stat_summary(geom = "line", fun.y = sum, group = 1, size = 2)
Result:
You could get sum by year and plot it with new geom_line
library(dplyr)
library(ggplot2)
newdata4 <- new_data3 %>%
group_by(year) %>%
summarise(total = sum(value))
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack") +
geom_line(aes(year, total, fill = ""), data = newdata4, size = 2)
Sample data:
dat <- data.frame(year = as.factor(rep(c(2012:2015),each = 6)),id.2wk = rep(c(18,19,20,21,22,23),times = 4),
value = c(1.8,15.6,32.9,27.5,19.6,2.6,1,8,42,35,11,3,2,7,12,47,26,7,2,13,24,46,12,4))
ggplot(dat %>% group_by(year) %>% mutate(cv=cumsum(value)),
aes(x = id.2wk, y = cv, colour = factor(year))) +
geom_line(size = 1)+
geom_point()
packageVersion("ggplot2")
2.2.1
I was expecting a plot similar to below. What went wrong?
How about using data.table to calculate cumulative sum within group?
library(data.table)
library(ggplot2)
ggplot(setDT(dat)[, cv:= cumsum(value), year],
aes(x = id.2wk, y = cv, colour = factor(year))) +
geom_line(size = 1) +
geom_point()
Sample data:
dat <- data.frame(year = as.factor(rep(c(2012:2015),each = 6)),
id.2wk = rep(c(18,19,20,21,22,23),times = 4),
value = c(1.8,15.6,32.9,27.5,19.6,2.6,1,8,42,35,11,3,2,7,12,47,26,7,2,13,24,46,12,4))
dat <- data.frame(id = rep(1:4,each = 7), year = rep(2012:2018, times = 4),
y = runif(28), start = rep(2012:2015,each = 7), end = rep(2014:2017,each = 7))
ggplot(dat, aes(x = year, y = y)) + geom_line() + facet_wrap(~id)
How do I insert for each id, two vertical lines whose position are given by the respective start and end column?
Try this:
library(dplyr)
library(tidyr)
vlines <- dat %>%
select(id,start,end) %>%
distinct() %>%
gather(key = grp,value = x,start,end)
ggplot(dat, aes(x = year, y = y)) +
geom_line() +
facet_wrap(~id) +
geom_vline(data = vlines,aes(xintercept = x))