Cumulative plot using ggplot and dplyr - r

Sample data:
dat <- data.frame(year = as.factor(rep(c(2012:2015),each = 6)),id.2wk = rep(c(18,19,20,21,22,23),times = 4),
value = c(1.8,15.6,32.9,27.5,19.6,2.6,1,8,42,35,11,3,2,7,12,47,26,7,2,13,24,46,12,4))
ggplot(dat %>% group_by(year) %>% mutate(cv=cumsum(value)),
aes(x = id.2wk, y = cv, colour = factor(year))) +
geom_line(size = 1)+
geom_point()
packageVersion("ggplot2")
2.2.1
I was expecting a plot similar to below. What went wrong?

How about using data.table to calculate cumulative sum within group?
library(data.table)
library(ggplot2)
ggplot(setDT(dat)[, cv:= cumsum(value), year],
aes(x = id.2wk, y = cv, colour = factor(year))) +
geom_line(size = 1) +
geom_point()
Sample data:
dat <- data.frame(year = as.factor(rep(c(2012:2015),each = 6)),
id.2wk = rep(c(18,19,20,21,22,23),times = 4),
value = c(1.8,15.6,32.9,27.5,19.6,2.6,1,8,42,35,11,3,2,7,12,47,26,7,2,13,24,46,12,4))

Related

Using ggplot with lapply does not show y labels

I have the following data:
library(dplyr)
countries <- c('Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Republic of Cyprus')
year <- rep(2009:2022, length(countries))
country <- as.data.frame(rep(countries, length(2009:2022)))
country <- country[order(country$`rep(countries, length(2009:2022))`),]
df<- cbind.data.frame(country, year)
df$year <- as.numeric(df$year)
df <- df %>%
group_by(country) %>%
mutate(n_obs = 1:n())
df <- df %>% group_by(country) %>%
mutate(gdp = rnorm(n = 1, mean = 3000, sd = 300) + 20.64*n_obs,
inflation = rnorm(n = 1, mean = 5, sd = 3) + 1.23*n_obs)
I want to make line plots for gdp and inflation one by one like so:
ggplot(df, aes(x = year, y = inflation, color = country)) + geom_line()
ggplot(df, aes(x = year, y = gdp, color = country)) + geom_line()
However, in the real data, I have a lot of variables that I want to plot, and I was wondering how I could use lapply to achieve that. I tried the following code:
lapply(df[,c(4,5)], function(var)
ggplot(data = df, aes(x = year, y = var, color = country))
+ geom_line() + labs(x = "year", y = var))
This works, but I cannot get the y variable label on the plot. Any help would be appreciated.
Regards
You can use the following code:
lapply(names(df)[4:5], function(var)
ggplot(data = df, aes(x = year, y = .data[[var]], color = country))
+ geom_line() + ylab(var))
Output gdp:
Output inflation:

Is there a way for adding labels with number of observations per value in violin plot in ggplot?

Image you want to create a violin plot and have data like this:
set.seed(123)
Bodytype <- sample(LETTERS[1:3], 500, replace = T)
Weight <- rnorm(500,40,1)
df <- data.frame(Bodytype, Weight)
ggplot(data = df,
aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray()
Now I would like to add text label or something for each bodytype at each kg level to see how many observations in each bodytype category that weigh 36 kg, 37kg, etc. Is there are way for achieving this, or am I better off using another plot?
This can be done in many ways, here is one:
library(dplyr)
library(ggplot2)
summ <- df %>%
group_by(Bodytype) %>%
summarize(n = n(), Weight = mean(Weight))
ggplot(data = df, aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray() +
geom_text(aes(label = n), data = summ)
Okay, so you want multiple weight counts:
weightcounts <- df %>%
mutate(Weight = as.integer(round(Weight, 0))) %>%
group_by(Bodytype, Weight) %>%
count()
ggplot(data = df, aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray() +
geom_text(aes(label = n), data = weightcounts)
Either way, the premise is that you can generate a summary frame with the associated labels you need, and then add geom_text (or geom_label) with the new dataset as an argument.
Another way computing labels outside in base R:
set.seed(123)
Bodytype <- sample(LETTERS[1:3], 500, replace = T)
Weight <- rnorm(500,40,1)
df <- data.frame(Bodytype, Weight)
#Labels
df$i <- 1
labs <- aggregate(i~Bodytype,df,sum)
labs$Weight<-NA
#Plot
ggplot(data = df,
aes(x = Bodytype, y = Weight, fill = Bodytype)) +
geom_violin(scale = "count", trim = F, adjust = 0.75) +
geom_text(data=labs,aes(x=Bodytype,y=45,label=i))
scale_y_continuous(breaks = seq(34, 46, 1)) +
theme_gray()
Output:

Insert vertical lines for each group in ggplot

dat <- data.frame(id = rep(1:4,each = 7), year = rep(2012:2018, times = 4),
y = runif(28), start = rep(2012:2015,each = 7), end = rep(2014:2017,each = 7))
ggplot(dat, aes(x = year, y = y)) + geom_line() + facet_wrap(~id)
How do I insert for each id, two vertical lines whose position are given by the respective start and end column?
Try this:
library(dplyr)
library(tidyr)
vlines <- dat %>%
select(id,start,end) %>%
distinct() %>%
gather(key = grp,value = x,start,end)
ggplot(dat, aes(x = year, y = y)) +
geom_line() +
facet_wrap(~id) +
geom_vline(data = vlines,aes(xintercept = x))

Combined geom_bar and geom_point legend in ggplotly

I am trying to get a combined bar + point chart with a legend for both bars different Indicators) and points (a change in the Indicator). I tried to follow along with ggplot2 legend for plot combining geom_bar and geom_point and introduced a shape into my geom_point (without doing that I could not get a legend for points).
library(ggplot2)
library(dplyr)
library(ggthemes)
library(plotly)
set.seed(369)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
p <- ggplot(df, aes(value))
bars <- df %>%
filter(Type == "Bar")
points <- df %>%
filter(Type == "Point")
pl <- p +
geom_bar(data = bars,
aes(fill = Indicator, group = Indicator, x = Year, y = value), stat = "identity", position = "dodge") +
geom_point(data = points, aes(x = Year, y = value, group = Indicator, fill = Indicator, shape = "Change"), position = position_dodge(width = 0.9)) +
theme_tufte()
p
ggplotly(pl, tooltip = c("value"))
ggplotly has the output I want, however the legend has a strange grouping. Is there a way to fix the legend in the chart below?
there's probably a better way, but how's this:
library(tidyverse)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
bars <- df %>% filter(Type == "Bar")
points <- df %>% filter(Type == "Point") %>% mutate(Year =
ifelse(Indicator == "Indicator1", Year - 0.25, Year + 0.25))
p <- ggplot(bars, aes(fill = Indicator, group = Indicator, x = Year, y = value)) +
geom_bar(stat = "identity", position = "dodge", width = 1)
p <- p + geom_point(data = points, mapping = aes(fill = Indicator, x =
Year, y = value), shape = 21) + labs(x = "value") + labs(y = "value")
p
I don't know ggplotly() , but building separate geom_bar() and geom_point() plots, and then using get_legend() to remove each legend, and then building them back with plot_grid with the full plot seems a decent option.
library(tidyverse)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
bars <- df %>% filter(Type == "Bar")
points <- df %>% filter(Type == "Point") %>% mutate(Year =
ifelse(Indicator == "Indicator1", Year - 0.25, Year + 0.25),
IndicatorChange = Indicator)
p1 <- ggplot(points, mapping = aes(fill = IndicatorChange, x = Year, y = value )) + labs(x = "value") + labs(y = "value") +
geom_point(shape = 21)
p1_leg <- get_legend(p1)
p2 <- ggplot(bars, aes(fill = Indicator, group = Indicator, x = Year, y = value)) +
geom_bar(stat = "identity", position = "dodge")
p2_leg <- get_legend(p2)
p_leg <- plot_grid(p1_leg, p2_leg, ncol = 1, nrow = 5) #toggle nrow to get right spacing between legends
p3 <-ggplot(bars, aes(fill = Indicator, group = Indicator, x = Year, y = value)) + geom_bar(stat = "identity", position = "dodge", width = 1)
p3 <- p3 + geom_point(data = points, mapping = aes(fill = Indicator, x = Year, y = value), shape = 21) +
labs(x = "value") + labs(y = "value")
p3 <- p3 + theme(legend.position="none")
p3
p <- plot_grid(p3, p_leg, ncol =2, nrow =2) #more toggling possible
p
I don't know whether this is what you want(although the font size of the legend should be modified):
library(ggplot2)
library(dplyr)
library(ggthemes)
library(plotly)
set.seed(369)
obs <- 6
values1 <- c(round(100 + rnorm(obs) * 10, 2))
values2 <- c(round(100 + rnorm(obs) * 10, 2))
df <- data.frame(Year = rep(2014:2019, 2*2),
value = c(rep(values1, 2), rep(values2, 2)),
Indicator = rep(c("Indicator1", "Indicator2"), each = obs * 2),
Type = rep(c("Bar", "Point"), each = obs))
p <- ggplot(df, aes(value))
bars <- df %>%
filter(Type == "Bar")
points <- df %>%
filter(Type == "Point")
points$Type1=paste(points$Indicator,"change",sep=",")
pl <- p +
geom_bar(data = bars,
aes(fill = Indicator, group = Indicator, x = Year, y = value), stat = "identity", position = "dodge") +
geom_point(data = points,
aes(x = Year, y = value, group = Indicator, fill = Indicator, shape = "Change"),
position = position_dodge(width = 0.9)) +
theme_tufte()+
theme(legend.position="bottom")
pl <- p +
geom_bar(data = bars,
aes(fill = Indicator, group = Indicator,x = Year, y = value), stat = "identity", position = "dodge") +
geom_point(data = points,
aes(x = Year, y = value,shape = Type1),
position = position_dodge(width = 0.9)) +
theme_tufte()+
theme(legend.position="bottom",
legend.title=element_blank())
p

Combining position_dodge and position_fill in ggplot2

What I would like to do is use both the position = "fill" and the position = "dodge" arguments of geom_bar() at the same time somehow. Using some sample data
set.seed(1234)
df <- data.frame(
Id = rep(1:10, each = 12),
Month = rep(1:12, times = 10),
Value = sample(1:2, 10 * 12, replace = TRUE)
)
I'm able to create the following graph
df.plot <- ggplot(df, aes(x = as.factor(Month), fill = as.factor(Value))) +
geom_bar(position = "fill") +
scale_x_discrete(breaks = 1:12) +
scale_y_continuous(labels = percent) +
labs(x = "Month", y = "Value")
I like the scaling and labeling of this graph but I want to be able to unstack it. However when I do the following
df.plot2 <- ggplot(df, aes(x = as.factor(Month), fill = as.factor(Value))) +
geom_bar(position = "dodge", aes(y = (..count..)/sum(..count..))) +
scale_x_discrete(breaks = 1:12) +
scale_y_continuous(labels = percent) +
labs(x = "Month", y = "Value")
The bars are in the position and scaling that I want but the y-axis labels represent the percentage of each bar relative to the total count, not the count within each month.
All in all I want the visuals of the second graph with the labeling of the first graph. Is there a relatively easy way to automate this?
Expanding on my comment:
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
df1 <- df %>%
group_by(Month) %>%
summarise(Value1 = sum(Value == 1) / n(),
Value2 = sum(Value == 2) / n()) %>%
gather(key = Group,value = Val,Value1:Value2)
df.plot2 <- ggplot(df1, aes(x = as.factor(Month),
y = Val,
fill = as.factor(Group))) +
geom_bar(position = "dodge",stat = "identity") +
scale_y_continuous(labels = percent_format()) +
scale_x_discrete(breaks = 1:12) +
labs(x = "Month", y = "Value")

Resources