Multiple points for same y-value - r

library(ggplot2)
df <- read.csv("Counsumer_Price_Index.csv")
sav <- df %>%
filter(ï..Location %in% c("IRL", "CAN", "AUS",
"USA", "FRA", "DEU",
"ESP", "ITA")) %>%
filter(Time == 2021) %>%
filter(Subject != "Energy")
sav
sav %>% ggplot(aes(x = Percentage, y = ï..Location)) +
geom_point(aes(colour = Subject), size = 2.5) +
geom_segment(aes(yend = ï..Location, colour = Subject), xend = 0 , linetype="dotted") + theme_light()
I want to make a lollipop plot for the CPI index data, for each country there are three categories how do I display the categories such that they dont overlap
I want to make a lollipop plot for the CPI index data, for each country there are three categories how do I display the categories such that they dont overlap

You could use position_dodge with a coord_flip:
library(ggplot2)
sav %>%
ggplot(aes(x = ï..Location, y = Percentage, colour = Subject)) +
geom_point(size = 2.5, position = position_dodge(0.5)) +
geom_linerange(aes(ymin = 0, ymax = Percentage),
linetype = "dotted", position = position_dodge(0.5)) +
coord_flip() +
theme_light()

Something like this:
library(ggplot2)
sav %>% ggplot(aes(x = Percentage, y = ï..Location)) +
geom_point(aes(colour = Subject), size = 2.5) +
geom_segment(aes(yend = ï..Location, colour = Subject), xend = 0 , linetype="dotted") +
facet_wrap(. ~ Subject) +
theme_light()

Related

put legend of area chart inside the figure

here is my data and my area chart:
df<- data.frame(age=15:40,h1= 25:50,h2=35:60,h3=45:70)
data1<- df %>% gather(timeuse, minute, -age)
ggplot(data1, aes(x = age, y = minute, fill = timeuse)) +
geom_area() +
scale_fill_brewer(palette = "Gray")+
scale_x_continuous(breaks = seq(15, 90, by = 5))+
scale_y_continuous(breaks = seq(0, 1500, by = 100))+
theme_classic()
I want to put legend inside the area chart like this picture:
In general that could be easily achieved using geom_text with position = position_stack(vjust = 0.5). Depending on your real data the tricky part would be to select the x positions where you want to place the labels. In my code below I use dplyr::case_when to set different positions for each category of timeuse. Additionally depending on your real data it might be worthwhile to have a look at ggrepel::geom_text_repel.
library(ggplot2)
library(dplyr)
data1 <- data1 %>%
mutate(label = case_when(
timeuse == "h3" & age == 20 ~ timeuse,
timeuse == "h2" & age == 27 ~ timeuse,
timeuse == "h1" & age == 35 ~ timeuse,
TRUE ~ ""
))
p <- ggplot(data1, aes(x = age, y = minute, fill = timeuse)) +
geom_area() +
scale_fill_brewer(palette = "Greys")+
scale_x_continuous(breaks = seq(15, 90, by = 5))+
scale_y_continuous(breaks = seq(0, 1500, by = 100))+
theme_classic()
p +
geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
guides(fill = "none")
You can do it manually with annotate
annotate("text", x=50, y=2, label= "market work")
or more automated, something like this (play with the selection of rows where you want to place them):
geom_text(data = df%>% group_by(timeuse) %>% sample_n(1),
aes(x = Age, y = minute,
label = rev(timeuse), color = rev(timeuse)),
position = position_stack(vjust = 0.5))

How to change color of moving averages in ggplot, plotting two series into one graph?

In order to highlight the moving average in my ggplot visualization, I want to give it a different color (in this case grey or black for both MA lines). When it comes to to a graph representing two time series, I struggle to find the best solution. Maybe I need to take a different approach.
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(tidyquant))
V = 365
data <- data.frame (var1 = c(rnorm(V)),
var2 = c(rnorm(V)+12),
date = c(dates <- ymd("2013-01-01")+ days(0:364))
)
data_melted <- reshape2::melt(data, id.var='date')
data_melted %>%
ggplot() +
geom_line(mapping = aes(x= date, y=value, col=variable)) +
scale_color_manual(values=c("#CC6666", "steelblue")) +
geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, col=variable)) +
theme(axis.text.x = element_text(angle = 50, vjust = 0.5)) +
scale_x_date(date_breaks = "1 month")
I think you can get what you want by not mapping variable to color in aes() for the MA part. I instead include group = variable to tell ggplot2 that the two MA's should be separate series, but no difference in their color based on that.
data_melted %>%
ggplot() +
geom_line(mapping = aes(x= date, y=value, col=variable)) +
scale_color_manual(values=c("#CC6666", "steelblue")) +
tidyquant::geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, group = variable), color = "black") +
theme(axis.text.x = element_text(angle = 50, vjust = 0.5)) +
scale_x_date(date_breaks = "1 month")
If you want different colors, the natural way to do this in ggplot would be to give the moving averages their own values to be mapped to color.
...
scale_color_manual(values=c("#CC6666", "#996666", "steelblue", "slateblue")) +
tidyquant::geom_ma(ma_fun = SMA, n = 30, mapping = aes(x= date, y=value, col=paste(variable, "MA"))) +
...
I would consider looking at the tsibble library for time series data.
library(tsibble)
data_melted <-as_tsibble(data_melted, key = 'variable', index = 'date')
data_melted <- data_melted %>%
mutate(
`5-MA` = slider::slide_dbl(value, mean,
.before = 2, .after = 2, .complete = TRUE)
)
data_melted %>%
filter(variable == "var1") %>%
autoplot(value) +
geom_line(aes(y = `5-MA`), colour = "#D55E00") +
labs(y = "y",
title = "title") +
guides(colour = guide_legend(title = "series"))

How would I change the shape of ggplot legend symbols?

I am using ggplot to visualize the gapminder data set. Can someone help me get the legend to show round points with their sizes respective to the population of the country?
df1<-gapminder[!(gapminder$country=="Kuwait"),]
blackline <- df1 %>%
group_by(continent, year) %>%
summarise(average = weighted.mean(gdpPercap))
p <- ggplot(data = df1, mapping = aes(x = year, y = gdpPercap)) +
scale_x_continuous(breaks = seq(1960, 2000, by = 20)) +
theme_bw() +
labs(x = "Life Expectancy",
y = "GDP Per Capita",
color = "Continent",
size = "Population (100K)") +
# colored dots
geom_point(df1, mapping = aes(col = continent, size = pop/100000)) +
# colored lines
geom_line(data = df1, aes(color = continent, group = country)) +
facet_grid(cols = vars(continent)) +
# weighted average black line
geom_line(data = blackline, aes(x = year, y = average, size = 1)) +
geom_point(data = blackline, aes(x = year, y = average, size = 1000))
p
This could be achieved by setting show.legend=FALSE in the geoms for the blackline:
library(gapminder)
library(ggplot2)
library(dplyr)
df1<-gapminder[!(gapminder$country=="Kuwait"),]
blackline <- df1 %>%
group_by(continent, year) %>%
summarise(average = weighted.mean(gdpPercap))
#> `summarise()` regrouping output by 'continent' (override with `.groups` argument)
p <- ggplot(data = df1, mapping = aes(x = year, y = gdpPercap)) +
scale_x_continuous(breaks = seq(1960, 2000, by = 20)) +
theme_bw() +
labs(x = "Life Expectancy",
y = "GDP Per Capita",
color = "Continent",
size = "Population (100K)") +
# colored dots
geom_point(aes(col = continent, size = pop/100000)) +
# colored lines
geom_line(aes(color = continent, group = country)) +
facet_grid(cols = vars(continent)) +
# weighted average black line
geom_line(data = blackline, aes(x = year, y = average, size = 1), show.legend = FALSE) +
geom_point(data = blackline, aes(x = year, y = average, size = 1000), show.legend = FALSE)
p

Adding boxplot below density plot

I'm new to ggplot and I'm trying to create this graph:
But actually, I'm just stuck here:
This is my code :
ggplot(diamonds) +
aes(x = carat, group = cut) +
geom_line(stat = "density", size = 1) +
theme_grey() +
facet_wrap(~cut, nrow = 5, strip.position = "right") +
geom_boxplot(aes())
Does someone know what I can do next?
Edit: As of ggplot2 3.3.0, this can be done in ggplot2 without any extension package.
Under the package's news, under new features:
All geoms and stats that had a direction (i.e. where the x and y axes
had different interpretation), can now freely choose their direction,
instead of relying on coord_flip(). The direction is deduced from
the aesthetic mapping, but can also be specified directly with the new
orientation argument (#thomasp85, #3506).
The following will now work directly (replacing all references to geom_boxploth / stat_boxploth in the original answer with geom_boxplot / stat_boxplot:
library(ggplot2)
ggplot(diamonds, aes(x = carat, y = -0.5)) +
# horizontal boxplots & density plots
geom_boxplot(aes(fill = cut)) +
geom_density(aes(x = carat), inherit.aes = FALSE) +
# vertical lines at Q1 / Q2 / Q3
stat_boxplot(geom = "vline", aes(xintercept = ..xlower..)) +
stat_boxplot(geom = "vline", aes(xintercept = ..xmiddle..)) +
stat_boxplot(geom = "vline", aes(xintercept = ..xupper..)) +
facet_grid(cut ~ .) +
scale_fill_discrete()
Original answer
This can be done easily with a horizontal boxplot geom_boxploth() / stat_boxploth(), found in the ggstance package:
library(ggstance)
ggplot(diamonds, aes(x = carat, y = -0.5)) +
# horizontal box plot
geom_boxploth(aes(fill = cut)) +
# normal density plot
geom_density(aes(x = carat), inherit.aes = FALSE) +
# vertical lines at Q1 / Q2 / Q3
stat_boxploth(geom = "vline", aes(xintercept = ..xlower..)) +
stat_boxploth(geom = "vline", aes(xintercept = ..xmiddle..)) +
stat_boxploth(geom = "vline", aes(xintercept = ..xupper..)) +
facet_grid(cut ~ .) +
# reproduce original chart's color scale (o/w ordered factors will result
# in viridis scale by default, using the current version of ggplot2)
scale_fill_discrete()
If you are limited to the ggplot2 package for one reason or another, it can still be done, but it would be less straightforward, since geom_boxplot() and geom_density() go in different directions.
Alternative 1: calculate the box plot's coordinates, & flip them manually before passing the results to ggplot(). Add a density layer in the normal way:
library(dplyr)
library(tidyr)
p.box <- ggplot(diamonds, aes(x = cut, y = carat)) + geom_boxplot()
p.box.data <- layer_data(p.box) %>%
select(x, ymin, lower, middle, upper, ymax, outliers) %>%
mutate(cut = factor(x, labels = levels(diamonds$cut), ordered = TRUE)) %>%
select(-x)
ggplot(p.box.data) +
# manually plot flipped boxplot
geom_segment(aes(x = ymin, xend = ymax, y = -0.5, yend = -0.5)) +
geom_rect(aes(xmin = lower, xmax = upper, ymin = -0.75, ymax = -0.25, fill = cut),
color = "black") +
geom_point(data = . %>% unnest(outliers),
aes(x = outliers, y = -0.5)) +
# vertical lines at Q1 / Q2 / Q3
geom_vline(data = . %>% select(cut, lower, middle, upper) %>% gather(key, value, -cut),
aes(xintercept = value)) +
# density plot
geom_density(data = diamonds, aes(x = carat)) +
facet_grid(cut ~ .) +
labs(x = "carat") +
scale_fill_discrete()
Alternative 2: calculate the density plot's coordinates, & flip them manually before passing the results to ggplot(). Add a box plot layer in the normal way. Flip the whole chart:
p.density <- ggplot(diamonds, aes(x = carat, group = cut)) + geom_density()
p.density.data <- layer_data(p.density) %>%
select(x, y, group) %>%
mutate(cut = factor(group, labels = levels(diamonds$cut), ordered = TRUE)) %>%
select(-group)
p.density.data <- p.density.data %>%
rbind(p.density.data %>%
group_by(cut) %>%
filter(x == min(x)) %>%
mutate(y = 0) %>%
ungroup())
ggplot(diamonds, aes(x = -0.5, y = carat)) +
# manually flipped density plot
geom_polygon(data = p.density.data, aes(x = y, y = x),
fill = NA, color = "black") +
# box plot
geom_boxplot(aes(fill = cut, group = cut)) +
# vertical lines at Q1 / Q2 / Q3
stat_boxplot(geom = "hline", aes(yintercept = ..lower..)) +
stat_boxplot(geom = "hline", aes(yintercept = ..middle..)) +
stat_boxplot(geom = "hline", aes(yintercept = ..upper..)) +
facet_grid(cut ~ .) +
scale_fill_discrete() +
coord_flip()
Maybe this will help. Although need little upgrade :)
library(tidyverse)
library(magrittr)
library(wrapr)
subplots <-
diamonds$cut %>%
unique() %>%
tibble(Cut = .) %>%
mutate(rn = row_number() - 1) %$%
map2(
.x = Cut,
.y = rn,
~annotation_custom(ggplotGrob(
diamonds %>%
filter(cut == .x) %.>%
ggplot(data = .) +
aes(x = carat, fill = cut) +
annotation_custom(ggplotGrob(
ggplot(data = .) +
geom_boxplot(
aes(x = -1, y = carat),
fill = .y + 1
) +
coord_flip() +
theme_void() +
theme(plot.margin = margin(t = 20))
)) +
geom_line(stat = 'density', size = 1) +
theme_void() +
theme(plot.margin = margin(t = .y * 100 + 10, b = (4 - .y) * 100 + 40))
))
)
ggplot() + subplots

ggplot2 add sum to chart

Using mtcars as an example, I've produced some violin plots. I wanted to add two things to this chart:
for each group, list n
for each group, sum a third variable (e.g. wt)
I can do (1) with the geom_text code below although (n) is actually plotted on the x axis rather than off to the side.
But I can't work out how to do (2).
Any help much appreciated!
library(ggplot2)
library(gridExtra)
library(ggthemes)
result <- mtcars
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(stat = "count", aes(label = ..count.., y = ..count..))
You can add both of these annotations by creating them in your dataframe temporarily prior to graphing. Using the dplyr package, you can create two new columns, one with the count for each group, and one with the sum of wt for each group. This can then be piped directly into your ggplot using %>% (alternatively, you could save the new dataset and insert it into ggplot the way you have it). Then with some minor edits to your geom_text call and adding a second one, we can create the plot you want. The code looks like this:
library(ggplot2)
library(gridExtra)
library(ggthemes)
library(magrittr)
library(dplyr)
result <- mtcars
result %>%
group_by(gear) %>%
mutate(count = n(), sum_wt = sum(wt)) %>%
ggplot(aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(aes(label = paste0("n = ", count),
x = (gear + 0.25),
y = 4.75)) +
geom_text(aes(label = paste0("sum wt = ", sum_wt),
x = (gear - 0.25),
y = 4.75))
The new graph looks like this:
Alternatively, if you create a summary data frame named result_sum, then you can manually add that into the geom_text calls.
result <- mtcars %>%
mutate(gear = factor(as.character(gear)))
result_sum <- result %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) +
theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count),
x = (as.numeric(gear) + 0.25),
y = 4.75)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", sum_wt),
x = (as.numeric(gear) - 0.25),
y = 4.75))
This gives you this:
The benefit to this second method is that the text isn't bold like in the first graph. The bold effect occurs in the first graph due to the text being printed over itself for all observations in the dataframe.
Thanks to those who helped.... I used this in the end which plots the calculated values, one set of classes being text based so using vjust to position the vertical offset.
thanks again!
library(ggplot2)
library(gridExtra)
library(ggthemes)
results <- mtcars
results$gear <- as.factor(as.character(results$gear)) #Turn 'gear' to text to simulate classes, then factorise
result_sum <- results %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(results, aes(x = gear, y = drat, group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count), x = (gear), vjust= 0, y = 5.25)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", round(sum_wt,0)), x = (gear), vjust= -2, y = 5.25))

Resources