Insert vertical lines for each group in ggplot - r

dat <- data.frame(id = rep(1:4,each = 7), year = rep(2012:2018, times = 4),
y = runif(28), start = rep(2012:2015,each = 7), end = rep(2014:2017,each = 7))
ggplot(dat, aes(x = year, y = y)) + geom_line() + facet_wrap(~id)
How do I insert for each id, two vertical lines whose position are given by the respective start and end column?

Try this:
library(dplyr)
library(tidyr)
vlines <- dat %>%
select(id,start,end) %>%
distinct() %>%
gather(key = grp,value = x,start,end)
ggplot(dat, aes(x = year, y = y)) +
geom_line() +
facet_wrap(~id) +
geom_vline(data = vlines,aes(xintercept = x))

Related

Using ggplot with lapply does not show y labels

I have the following data:
library(dplyr)
countries <- c('Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Republic of Cyprus')
year <- rep(2009:2022, length(countries))
country <- as.data.frame(rep(countries, length(2009:2022)))
country <- country[order(country$`rep(countries, length(2009:2022))`),]
df<- cbind.data.frame(country, year)
df$year <- as.numeric(df$year)
df <- df %>%
group_by(country) %>%
mutate(n_obs = 1:n())
df <- df %>% group_by(country) %>%
mutate(gdp = rnorm(n = 1, mean = 3000, sd = 300) + 20.64*n_obs,
inflation = rnorm(n = 1, mean = 5, sd = 3) + 1.23*n_obs)
I want to make line plots for gdp and inflation one by one like so:
ggplot(df, aes(x = year, y = inflation, color = country)) + geom_line()
ggplot(df, aes(x = year, y = gdp, color = country)) + geom_line()
However, in the real data, I have a lot of variables that I want to plot, and I was wondering how I could use lapply to achieve that. I tried the following code:
lapply(df[,c(4,5)], function(var)
ggplot(data = df, aes(x = year, y = var, color = country))
+ geom_line() + labs(x = "year", y = var))
This works, but I cannot get the y variable label on the plot. Any help would be appreciated.
Regards
You can use the following code:
lapply(names(df)[4:5], function(var)
ggplot(data = df, aes(x = year, y = .data[[var]], color = country))
+ geom_line() + ylab(var))
Output gdp:
Output inflation:

conditional probability is not displaying properly in facet label in ggplot2

I am trying to display the expression p(\theta|y) in a facet label but it displays as p(|(\theta, y))
I have the following code:
library(tidyverse)
X = seq(0, 1, by = 0.01)
prior = dbeta(X, 3, 2)
posterior = dbeta(X, 14, 6)
plot_df =
tibble(X = X, `p(theta)` = prior, `p(theta|y)` = posterior) %>%
gather(type, Y, -X) %>%
mutate(type = factor(type, levels = c("p(theta)", "p(theta|y)")))
ggplot(plot_df, aes(X, Y)) +
geom_line() +
facet_wrap(~type, ncol = 1, labeller = label_parsed, strip.position = "left") +
xlab(expr(theta)) +
ylab("")
So how do I get that to display as p(\theta | y)?
The following works. The trick is to add spacing characters, asterisks, and to put the vertical bar between single quotes. This must match the new factor levels.
plot_df =
tibble(X = X, `p(theta)` = prior, `p(theta*'|'*y)` = posterior) %>%
gather(type, Y, -X) %>%
mutate(type = factor(type, levels = c("p(theta)", "p(theta*'|'*y)")))
The rest of the code is exactly the same.
ggplot(plot_df, aes(X, Y)) +
geom_line() +
facet_wrap(~type, ncol = 1, labeller = label_parsed, strip.position = "left") +
xlab(expr(theta)) +
ylab("")

Cumulative plot using ggplot and dplyr

Sample data:
dat <- data.frame(year = as.factor(rep(c(2012:2015),each = 6)),id.2wk = rep(c(18,19,20,21,22,23),times = 4),
value = c(1.8,15.6,32.9,27.5,19.6,2.6,1,8,42,35,11,3,2,7,12,47,26,7,2,13,24,46,12,4))
ggplot(dat %>% group_by(year) %>% mutate(cv=cumsum(value)),
aes(x = id.2wk, y = cv, colour = factor(year))) +
geom_line(size = 1)+
geom_point()
packageVersion("ggplot2")
2.2.1
I was expecting a plot similar to below. What went wrong?
How about using data.table to calculate cumulative sum within group?
library(data.table)
library(ggplot2)
ggplot(setDT(dat)[, cv:= cumsum(value), year],
aes(x = id.2wk, y = cv, colour = factor(year))) +
geom_line(size = 1) +
geom_point()
Sample data:
dat <- data.frame(year = as.factor(rep(c(2012:2015),each = 6)),
id.2wk = rep(c(18,19,20,21,22,23),times = 4),
value = c(1.8,15.6,32.9,27.5,19.6,2.6,1,8,42,35,11,3,2,7,12,47,26,7,2,13,24,46,12,4))

Combined geom_bar and geom_point legend in ggplotly

I am trying to get a combined bar + point chart with a legend for both bars different Indicators) and points (a change in the Indicator). I tried to follow along with ggplot2 legend for plot combining geom_bar and geom_point and introduced a shape into my geom_point (without doing that I could not get a legend for points).
library(ggplot2)
library(dplyr)
library(ggthemes)
library(plotly)
set.seed(369)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
p <- ggplot(df, aes(value))
bars <- df %>%
filter(Type == "Bar")
points <- df %>%
filter(Type == "Point")
pl <- p +
geom_bar(data = bars,
aes(fill = Indicator, group = Indicator, x = Year, y = value), stat = "identity", position = "dodge") +
geom_point(data = points, aes(x = Year, y = value, group = Indicator, fill = Indicator, shape = "Change"), position = position_dodge(width = 0.9)) +
theme_tufte()
p
ggplotly(pl, tooltip = c("value"))
ggplotly has the output I want, however the legend has a strange grouping. Is there a way to fix the legend in the chart below?
there's probably a better way, but how's this:
library(tidyverse)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
bars <- df %>% filter(Type == "Bar")
points <- df %>% filter(Type == "Point") %>% mutate(Year =
ifelse(Indicator == "Indicator1", Year - 0.25, Year + 0.25))
p <- ggplot(bars, aes(fill = Indicator, group = Indicator, x = Year, y = value)) +
geom_bar(stat = "identity", position = "dodge", width = 1)
p <- p + geom_point(data = points, mapping = aes(fill = Indicator, x =
Year, y = value), shape = 21) + labs(x = "value") + labs(y = "value")
p
I don't know ggplotly() , but building separate geom_bar() and geom_point() plots, and then using get_legend() to remove each legend, and then building them back with plot_grid with the full plot seems a decent option.
library(tidyverse)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
bars <- df %>% filter(Type == "Bar")
points <- df %>% filter(Type == "Point") %>% mutate(Year =
ifelse(Indicator == "Indicator1", Year - 0.25, Year + 0.25),
IndicatorChange = Indicator)
p1 <- ggplot(points, mapping = aes(fill = IndicatorChange, x = Year, y = value )) + labs(x = "value") + labs(y = "value") +
geom_point(shape = 21)
p1_leg <- get_legend(p1)
p2 <- ggplot(bars, aes(fill = Indicator, group = Indicator, x = Year, y = value)) +
geom_bar(stat = "identity", position = "dodge")
p2_leg <- get_legend(p2)
p_leg <- plot_grid(p1_leg, p2_leg, ncol = 1, nrow = 5) #toggle nrow to get right spacing between legends
p3 <-ggplot(bars, aes(fill = Indicator, group = Indicator, x = Year, y = value)) + geom_bar(stat = "identity", position = "dodge", width = 1)
p3 <- p3 + geom_point(data = points, mapping = aes(fill = Indicator, x = Year, y = value), shape = 21) +
labs(x = "value") + labs(y = "value")
p3 <- p3 + theme(legend.position="none")
p3
p <- plot_grid(p3, p_leg, ncol =2, nrow =2) #more toggling possible
p
I don't know whether this is what you want(although the font size of the legend should be modified):
library(ggplot2)
library(dplyr)
library(ggthemes)
library(plotly)
set.seed(369)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
p <- ggplot(df, aes(value))
bars <- df %>%
filter(Type == "Bar")
points <- df %>%
filter(Type == "Point")
points$Type1=paste(points$Indicator,"change",sep=",")
pl <- p +
geom_bar(data = bars,
aes(fill = Indicator, group = Indicator, x = Year, y = value), stat = "identity", position = "dodge") +
geom_point(data = points,
aes(x = Year, y = value, group = Indicator, fill = Indicator, shape = "Change"),
position = position_dodge(width = 0.9)) +
theme_tufte()+
theme(legend.position="bottom")
pl <- p +
geom_bar(data = bars,
aes(fill = Indicator, group = Indicator,x = Year, y = value), stat = "identity", position = "dodge") +
geom_point(data = points,
aes(x = Year, y = value,shape = Type1),
position = position_dodge(width = 0.9)) +
theme_tufte()+
theme(legend.position="bottom",
legend.title=element_blank())
p

Combining position_dodge and position_fill in ggplot2

What I would like to do is use both the position = "fill" and the position = "dodge" arguments of geom_bar() at the same time somehow. Using some sample data
set.seed(1234)
df <- data.frame(
Id = rep(1:10, each = 12),
Month = rep(1:12, times = 10),
Value = sample(1:2, 10 * 12, replace = TRUE)
)
I'm able to create the following graph
df.plot <- ggplot(df, aes(x = as.factor(Month), fill = as.factor(Value))) +
geom_bar(position = "fill") +
scale_x_discrete(breaks = 1:12) +
scale_y_continuous(labels = percent) +
labs(x = "Month", y = "Value")
I like the scaling and labeling of this graph but I want to be able to unstack it. However when I do the following
df.plot2 <- ggplot(df, aes(x = as.factor(Month), fill = as.factor(Value))) +
geom_bar(position = "dodge", aes(y = (..count..)/sum(..count..))) +
scale_x_discrete(breaks = 1:12) +
scale_y_continuous(labels = percent) +
labs(x = "Month", y = "Value")
The bars are in the position and scaling that I want but the y-axis labels represent the percentage of each bar relative to the total count, not the count within each month.
All in all I want the visuals of the second graph with the labeling of the first graph. Is there a relatively easy way to automate this?
Expanding on my comment:
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
df1 <- df %>%
group_by(Month) %>%
summarise(Value1 = sum(Value == 1) / n(),
Value2 = sum(Value == 2) / n()) %>%
gather(key = Group,value = Val,Value1:Value2)
df.plot2 <- ggplot(df1, aes(x = as.factor(Month),
y = Val,
fill = as.factor(Group))) +
geom_bar(position = "dodge",stat = "identity") +
scale_y_continuous(labels = percent_format()) +
scale_x_discrete(breaks = 1:12) +
labs(x = "Month", y = "Value")

Resources