ggplot - plotting bars and lines in the same chart - r

I need to generate a plot with bar graph for two variables and a line with the third variable.
I can create a column graph for one variable like below
df <- head(mtcars)
df$car <- row.names(df)
ggplot(df) + geom_col(aes(x=car, y=disp))
Ref this answer - I can plot two variables - disp and hp as below
library(tidyr)
df$car = row.names(df)
df_long = gather(df, key = var, value = value, disp, hp)
ggplot(df_long, aes(x = car, y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'dodge')
I need to have a third variable qsec plotted as a line like as in the below chart - how to go about this ?

You can try:
library(tidyverse)
# some data
data <- mtcars %>%
mutate(car = rownames(mtcars)) %>%
slice(1:6) %>%
select(car, disp, hp)
data %>%
gather(key, value, -car) %>%
group_by(car) %>%
mutate(qsec_value = median(value)) %>%
mutate(qsec = factor("qsec")) %>%
ggplot() +
geom_col(aes(x=car, y=value, fill = key), position = "dodge") +
geom_point(aes(x=car, y=qsec_value,color = qsec)) +
geom_line(aes(x=car, y=qsec_value, color = qsec, group =1)) +
scale_colour_manual(name= "", values = 1) +
theme(legend.position = "top",
legend.title = element_blank())
Less code, same result:
data %>%
pivot_longer(-1) %>%
ggplot(aes(x = car)) +
geom_col(aes(y=value, fill = name), position = "dodge") +
stat_summary(aes(y=value, group=1, color="qseq"), fun = "median", geom = "point")+
stat_summary(aes(y=value, group=1, color="qseq"), fun = "median", geom = "line")+
scale_colour_manual(name= "", values = 1)

You need another layer and because geom_line is for continuous data, you need to do as if your x-values are for the line-layer. By doing so, order of data becomes crucial, hence you have also to sort it:
gather(df, key = var, value = value, disp, hp, qsec) %>%
arrange(car) %>%
{
print(
ggplot() +
geom_bar(stat = 'identity', position = 'dodge', data = filter(., var != "qsec"), mapping = aes(x = car, y = value, fill = var)) +
geom_line(mapping = aes(x = 1:length(car), y = value), data = filter(., var == "qsec"))
)
}
Edit:
btw, you can check the correct order of qsec to the respective x-value by calling plotly::ggplotly(), then you can read the values better and compare them to the df, because they will show up if you point on the element...

Related

How to have sum of values sum to 1 in geom_freqpoly()?

As an example, we can use geom_freqpoly() to examine how hp varies by cyl in the mtcars data.
library(tidyverse)
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x=hp, color=cyl) +
geom_freqpoly(mapping = aes(y = after_stat(ncount)), bins=5)
Using after_stat(ncount), I can make each line be normalized between 0 and 1. However, is there a way to have it so that the sum of all the lines at any point is equal to 1? i.e., at any value of hp, the red, green, and blue lines add to one -- representing the estimated proportion of each cyl type at that value of hp.
This can be achieved with position = "fill", though it looks confusing with lines and is better represented as a filled geom using the same statistical transformation as geom_freqpoly
library(tidyverse)
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x = hp, fill =c yl) +
stat_bin(bins = 5, position = "fill", geom = "area")
Compare this to the same result using an unfilled geom_freqpoly
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x = hp, color = cyl) +
geom_freqpoly(position = "fill", bins = 5)
I think this is harder to follow.
Another alternative to geom_freqpoly would be geom_density, which permits more visually appealing representations of similar information:
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x = hp, fill = cyl) +
geom_density(position = "fill", alpha = 0.5, color = "white", lwd = 2) +
coord_cartesian(xlim = c(50, 200)) +
scale_fill_brewer(palette = "Set2") +
theme_minimal(base_size = 20) +
labs(y = "Relative density")
Created on 2022-09-05 with reprex v2.0.2

Order grouped scatterplot by mean

I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")
ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)

How can I change the colour of the column header in geom_table() in R?

I'm using the geom_table() function from the ggpmisc package to add a table legend to my figure. I want to remove the grey colour from the first row with the column headers.
library(ggpmisc)
library(tidyverse)
mtcars %>%
group_by(cyl) %>%
summarize(wt = mean(wt), mpg = mean(mpg)) %>%
ungroup() %>%
mutate(wt = sprintf("%.2f", wt),
mpg = sprintf("%.1f", mpg)) -> tb
df <- tibble(x = 5.45, y = 34, tb = list(tb))
ggplot(mtcars, aes(wt, mpg, colour = factor(cyl))) +
geom_point() +
geom_table(data = df, aes(x = x, y = y, label = tb),
table.theme = ttheme_gtbw)
You can set the theme using arguments that are passed from ggpmisc to the corresponding ttheme function from gridExtra (Description of some of the possible options). If I understand your question correctly you want the background of the first row in your table to be white. You can achieve this using the following code to build your plot:
ggplot(mtcars, aes(wt, mpg, colour = factor(cyl))) +
geom_point() +
geom_table(data = df, aes(x = x, y = y, label = tb),
table.theme = ttheme_gtbw(colhead = list(bg_params = list(fill = "white"))))

Label grouped bar plot in R

I'm tryng to add label to a grouped bar plot in r.
However I'm using percentege in the y axis, and I want the label to be count.
I've tried to use the geom_text() function, but I don't how exacly the parameters i need to use.
newdf3 %>%
dplyr::count(key, value) %>%
dplyr::group_by(key) %>%
dplyr::mutate(p = n / sum(n)) %>%
ggplot() +
geom_bar(
mapping = aes(x = key, y = p, fill = value),
stat = "identity",
position = position_dodge()
) +
scale_y_continuous(labels = scales::percent_format(),limits=c(0,1))+
labs(x = "", y = "%",title="")+
scale_fill_manual(values = c('Before' = "deepskyblue", 'During' = "indianred1", 'After' = "green2", '?'= "mediumorchid3"),
drop = FALSE, name="")
Here is an exemple of how I need it:
here's a sample of data I'm using:
key value
A Before
A After
A During
B Before
B Before
C After
D During
...
I also wanted to keep the bars with no value (label = 0).
Can someone help me with this?
Here is MWE of how to add count labels to a simple bar chart. See below for the case when these are grouped.
library(datasets)
library(tidyverse)
data <- chickwts %>%
group_by(feed) %>%
count %>%
ungroup %>%
mutate(p = n / sum(n))
ggplot(data, aes(x = feed, y = p, fill = feed)) +
geom_bar(stat = "identity") +
geom_text(stat = "identity",
aes(label = n), vjust = -1)
You should be able to do the same thing on your data.
EDIT: StupidWolf points out in the comments that the original example has grouped data. Adding position = position_dodge(0.9) in geom_text deals with this.
Again, no access to the original data, but here's a different MWE using mtcars showing this:
library(datasets)
library(tidyverse)
data <- mtcars %>%
as_tibble %>%
transmute(gear = as_factor(gear),
carb = as_factor(carb),
cyl = cyl) %>%
group_by(gear, carb) %>%
count
ggplot(data, aes(x = gear, y = n, fill = carb)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_text(aes(label = n),
stat = "identity",
vjust = -1,
position = position_dodge(0.9))

How to sort bars after grouped top_n in facet_wrap with ggplot2?

I'm facing an issue with sorting bars when using facet_wrap (which is commonly reported here, here and others) after group variables and get top values.
When I run the code without factor conversion, bars are ordered:
iris %>%
gather(key = measurements, value = values, - Species) %>%
mutate(kk = factor(measurements, levels = unique(.$measurements)),
species_l = with(., paste(Species, .$measurements, sep = "_"))) %>%
ggplot(aes(x = reorder(species_l, values),
y = values,
fill = kk)) +
geom_bar(stat = "identity") +
facet_wrap(.~kk,
scales = "free")
But now I want to order decreasingly bars within facet_wrap and after top_n.
Heres is what I've tried so far:
library(tidyverse)
iris %>%
gather(key = measurements, value = values, - Species) %>%
within(.,
Species <- factor(Species,
levels=names(sort(table(Species),
decreasing=FALSE)))) %>%
ggplot(aes(x = Species,
y = values,
fill = measurements)) +
geom_bar(stat = "identity") +
facet_wrap(.~ measurements,
scales = "free")
and this:
iris %>%
gather(key = measurements, value = values, - Species) %>%
group_by(measurements, Species) %>%
top_n(5, wt = values) %>%
ggplot(aes(x = reorder(Species, Species,
function(x)-length(x)),
y = values,
fill = measurements)) +
geom_bar(stat = "identity") +
facet_wrap(.~measurements,
scales = "free")
and this:
iris %>%
gather(key = measurements, value = values, - Species) %>%
mutate(kk = factor(measurements, levels = unique(.$measurements)),
species_l = with(., paste(Species, .$measurements, sep = "_"))) %>%
group_by(measurements, Species) %>%
top_n(5, wt = values) %>%
ungroup() %>%
ggplot(aes(x = reorder(species_l, values),
y = values,
fill = kk)) +
geom_bar(stat = "identity") +
facet_wrap(.~kk,
scales = "free")
This is what I get:
As you can see Sepal.Width bars are not sorted.
Your first attempt was close -- you need to make sure you're reordering per facet, and not just reordering the factor based on the top 5 values of all measurements. Julia Silge explains thoroughly here
library(tidytext)
library(tidyverse)
library(magtrittr)
iris %>%
gather(key = measurements, value = values, - Species) %>%
mutate(kk = factor(measurements, levels = unique(.$measurements)),
#The '-values' below specifies to order in descending
Species = reorder_within(Species, -values, measurements)) %>%
ggplot(aes(x = Species, y = values, fill = kk)) +
geom_bar(stat = "identity") +
facet_wrap(.~kk, scales = "free") +
scale_x_reordered()`

Resources