create a legend with different datasets in ggplot2 - r

I am trying to create a legend in ggplot. If I use different variables from the same file, I add colour = "xx" in aes and it works. but what about if it is the same variable but different datasets?
In the example below, I plot Value ~ Year from two different datasets. How can I create a legend that says df1 with a red line, and df2 with a blue line?
A <- c(2001, 2002, 2003, 2004, 2005)
B <- c(3, 5, 2, 7, 5)
C <- c(2, 7, 4, 3, 5)
df1 <- data.frame(A, B)
df2 <- data.frame(A, C)
colnames(df1) <- c("Year","Value")
colnames(df2) <- c("Year","Value")
(test <- ggplot(df1, aes(Value, Year)) + geom_path(size = 1, colour='red') +
geom_path(data=df2, colour='blue') + ylab("Year")+ scale_x_continuous(position = "top") + scale_y_reverse(expand = c(0, 0)))

We could create a single dataset with bind_rows and specify .id to create a grouping column, which can be passed in aes as 'colour`
library(ggplot2)
library(dplyr)
bind_rows(lst(df1, df2), .id = 'grp') %>%
ggplot(aes(Value, Year, colour = grp)) +
geom_path(size = 1) +
ylab("Year")+
scale_x_continuous(position = "top") +
scale_y_reverse(expand = c(0, 0))
-output

Here is simple solution, but not a great one with you have more data.frames
Libraries
library(tidyverse)
Code
ggplot(df1, aes(Value, Year)) +
geom_path(size = 1,aes(colour='df1')) +
geom_path(data = df2,size = 1,aes(colour='df2')) +
ylab("Year")+
scale_x_continuous(position = "top") +
scale_y_reverse(expand = c(0, 0))+
scale_colour_manual(values = c("df1" = "red", "df2" = "blue"))
Output

Related

Two axis plot with ggplot2

Example I want to replicate I need to plot a two axis plot in R with ggplot2. The first y axis goes from -10 to 10, and the second from 0 to 10. I add an example. Please, let me know if there is a way to do it with ggplot2.
I used this code, but the result makes the first axis from -5 to 10, and the second, from 5 to 10. I want to get the breaks I define earlier.
df %>% filter(Country == "Chile" & year >= 1973) %>% ggplot(aes(x = year)) +
geom_line(aes(y = polity2, colour = "Polity 2")) + geom_line(aes(y = gee_totGDP,colour = "gee_totGDP")) + scale_y_continuous(sec.axis = sec_axis(~.*-1,name = "gee_totGDP")) + scale_colour_manual(values = c("blue", "red"))
I generated some fake data with four rows based on your example image.
To make the plot, I set the limits for the first axis using the limits() argument. Then I set up the second axis using a transformation formula, like you attempted. The transformation should be axis2 = (axis1 + 10)/2.
library(tidyverse)
df <- tibble(year = seq(1985, 2000, 5),
ed = c(6, 6, 8, 5),
polity = c(-10, -10, -8, -8))
df %>%
ggplot(aes(x = year)) +
geom_line(aes(y = polity)) +
geom_line(aes(y = ed)) +
scale_y_continuous(limits = c(-10, 10),
sec.axis = sec_axis(~(. + 10)/2))
You can use scale_y_continuous() for both axis as the following:
ggplot(data = df, aes(x = year)) +
geom_line(aes(y = polity2, color = "Polity 2")) +
geom_line(aes(y = gee_totGDP, color = "gee_totGDP")) +
scale_y_continuous(limits = c(-10, 10), name = "Polity 2") +
scale_y_continuous(limits = c(0, 10), sec.axis = sec_axis(~., name =
"gee_totGDP")) +
scale_color_manual(values = c("blue", "red"))

Change colour in stacked plots, ggplot 2

I would like to use palette colours for my stacked plot:
p <- ggplot() + theme_bw() +
geom_bar(aes(fill = a, y = b, x= c), data = df, width = 0.7,
position="stack", stat="identity") + theme(legend.position="bottom")
I tried the following but it didn`t work:
p + scale_color_brewer(palette = "PuOr")
Futhermore I would like to plot a line showing the mean over the barplot. Maybe somebody has a Idea how to.
Some thoughts:
1) better to use geom_col than geom_bar for values you want the bar to represent, see the documentation
2) Used factor(...) to make continuous variables discrete
3) you code will be easier to read if you follow the order of arguments as set out in the documentation; although of course it does not matter what the order is.
4) updated to reflect request with mean for each x value
library(ggplot2)
library(dplyr)
df <- data.frame(a = c(2001, 2001, 2001, 2002, 2002, 2003),
x = c(6, 7, 8, 6, 7, 6),
y = c(1, 258, 1, 3, 9, 11))
#data frame for means
df_y_mean <-
df %>%
group_by(x) %>%
summarise(y_mean = mean(y))
ggplot() +
geom_col(data = df, aes(x = factor(x), y = y, fill = factor(a)), width = 0.7) +
geom_line(data = df_y_mean, aes(factor(x), y_mean, colour = "red"), group = 1, size = 1) +
scale_fill_brewer(palette = "PuOr", name = "Year") +
guides(colour = guide_legend(title = "Mean", label = FALSE)) +
theme_bw() +
theme(legend.position = "bottom")
Created on 2020-05-20 by the reprex package (v0.3.0)
You are defining fill but using scale_colour_brewer(). Use scale_fill_brewer() to modify fill.
To draw a horizontal line add geom_hline() to your plot call.
p <- ggplot() + theme_bw() +
geom_bar(aes(fill = a, y = b, x= c), data = df, width = 0.7,
position="stack", stat="identity") +
theme(legend.position="bottom")
my.mean <- mean(df$b) ## can be any value, change as needed
p + scale_fill_brewer(palette = "PuOr") + geom_hline(my.mean)

R barplot of count: Skewed data with columns with large counts and columns with zero or few counts

The following code draws a barplot of count of x elements with y-axis in log scale.
library(ggplot2)
library(scales)
myData <- data.frame(
x = c(rep(1, 22500),
rep(2, 6000),
rep(3, 8000),
rep(4, 5000),
rep(11, 86),
rep(16, 15),
rep(31, 1),
rep(32, 1),
rep(47, 1))
)
ggplot(myData, aes(x=x)) +
geom_bar(width = 0.5)+
geom_text(stat='count', aes(label = ..count..), vjust = -1, size=3)+
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
scale_x_continuous(breaks=(seq(1:47)))
Below is the plot:
My questions are:
How do I remove the x-axis tick marks/labels for those columns with zero count?
How do I show the values of column 31, 32, 47 better? (those with count 1)
How do I just label the count of the tallest column? (22500 of column 1 in this case)
One option would be for you to add a border color, which would help highlight that there is at least something in those parts of the graph:
library(tidyverse)
df <-
myData %>%
group_by(x) %>%
count()
df %>%
ggplot(aes(x = x, y = n)) +
geom_col(color = "cyan4", fill = "cyan3") +
geom_text(data = . %>% filter(x == 1), aes(label = n, y = n + 10000)) +
scale_y_log10()

How to create such a figure using ggplot2 in R?

I have a matrix with many zero elements. The column names are labeled on the horizontal axis. I'd like to show explictly the nonzero elements as the bias from the vertical line for each column.
So how should construct a figure such as the example using ggplot2?
An example data can be generated as follow:
set.seed(2018)
N <- 5
p <- 40
dat <- matrix(0.0, nrow=p, ncol=N)
dat[2:7, 1] <- 4*rnorm(6)
dat[4:12, 2] <- 2.6*rnorm(9)
dat[25:33, 3] <- 2.1*rnorm(9)
dat[19:26, 4] <- 3.3*rnorm(8)
dat[33:38, 5] <- 2.9*rnorm(6)
colnames(dat) <- letters[1:5]
print(dat)
Here is another option using facet_wrap and geom_col with theme_minimal.
library(tidyverse)
dat %>%
as.data.frame() %>%
rowid_to_column("row") %>%
gather(key, value, -row) %>%
ggplot(aes(x = row, y = value, fill = key)) +
geom_col() +
facet_wrap(~ key, ncol = ncol(dat)) +
coord_flip() +
theme_minimal()
To further increase the aesthetic similarity to the plot in your original post we can
move the facet strips to the bottom,
rotate strip labels,
add "zero lines" in matching colours,
remove the fill legend, and
get rid of the x & y axis ticks/labels/title.
library(tidyverse)
dat %>%
as.data.frame() %>%
rowid_to_column("row") %>%
gather(key, value, -row) %>%
ggplot(aes(x = row, y = value, fill = key)) +
geom_col() +
geom_hline(data = dat %>%
as.data.frame() %>%
gather(key, value) %>%
count(key) %>%
mutate(y = 0),
aes(yintercept = y, colour = key), show.legend = F) +
facet_wrap(~ key, ncol = ncol(dat), strip.position = "bottom") +
coord_flip() +
guides(fill = FALSE) +
theme_minimal() +
theme(
strip.text.x = element_text(angle = 45),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
It would be much easier if you can provide some sample data. Thus I needed to create them and there is no guarantee that this will work for your purpose.
set.seed(123)
# creating some random sample data
df <- data.frame(id = rep(1:100, each = 3),
x = rnorm(300),
group = rep(letters[1:3], each = 100),
bias = sample(0:1, 300, replace = T, prob = c(0.7, 0.3)))
# introducing bias
df$bias <- df$bias*rnorm(nrow(df))
# calculate lower/upper bias for errorbar
df$biaslow <- apply(data.frame(df$bias), 1, function(x){min(0, x)})
df$biasupp <- apply(data.frame(df$bias), 1, function(x){max(0, x)})
Then I used kind of hack to be able to print groups in sufficient distance to make them not overlapped. Based on group I shifted bias variable and also lower and upper bias.
# I want to print groups in sufficient distance
df$bias <- as.numeric(df$group)*5 + df$bias
df$biaslow <- as.numeric(df$group)*5 + df$biaslow
df$biasupp <- as.numeric(df$group)*5 + df$biasupp
And now it is possible to plot it:
library(ggplot2)
ggplot(df, aes(x = x, col = group)) +
geom_errorbar(aes(ymin = biaslow, ymax = biasupp), width = 0) +
coord_flip() +
geom_hline(aes(yintercept = 5, col = "a")) +
geom_hline(aes(yintercept = 10, col = "b")) +
geom_hline(aes(yintercept = 15, col = "c")) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(5, 10, 15), labels = letters[1:3])
EDIT:
To incorporate special design you can add
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
to your plot.
EDIT2:
To incorporate several horizontal lines, you can create different dataset:
df2 <- data.frame(int = unique(as.numeric(df$group)*5),
gr = levels(df$group))
And use
geom_hline(data = df2, aes(yintercept = int, col = gr))
instead of copy/pasting geom_hline for each group level.

plot histogram that visualizes each observation whose color is mapped to a second variable

I'm looking for a way to show a histogram of values (time2) with binwidth equal to 1, I think, and have the color of each observation ("count") be mapped to a second variable (diff).
df <- data.frame(person=seq(from=1, to=12, by=1),
time1=c(9, 9, 9, 8, 8, 8, 8, 7, 7, 6, 6, 5),
time2=c(9, 4, 3, 9, 6, 5, 4, 9, 3, 2, 1, 2))
df$diff <- df$time2-df$time1
I've not come across a plot like this before, and I don't know of a way to implement this is ggplot2. Any ideas? This toy example shows the distribution of values for 12 people measured at time 1 and time 2. The color is mapped to the change in values from time 1 to time 2. I'm trying to show non-quant students how the group mean shifts down by 2.75, but the individual movement from time 1 to time 2 ranges from an increase of 2 points, to a decrease of 6 points. On average the group improves, but one person stays the same and two people get worse.
Here is a hacked solution using geom_tile(). I'm sure someone could rewrite the data manipulation code using pure dplyr/purr. Most of the work is performed by mapping each of the tile to a x and y coordinate.
df_plot = df %>%
gather(time, value, time1:time2)
df_plot = df_plot %>%
split(df_plot$time) %>%
lapply(function(x) {x %>% group_by(value) %>% mutate(y=1:n())}) %>%
bind_rows() %>%
mutate(diff = factor(diff))
ggplot(df_plot) +
geom_tile(aes(x = value, y = y, fill = diff)) +
facet_wrap(~time) +
theme_classic() +
scale_fill_brewer(type = "seq", palette = 3) +
scale_x_continuous(breaks = 0:10) +
xlab("") + ylab("")
You can fudge with the fill colors to achieve your desired output. Also need to fudge with the plot dimensions to ensure that your tiles are squares.
# load packages
library(ggplot2)
# calculate nth occurence of time 1 value
new.df <- df %>%
group_by(time1) %>%
mutate(time1Index=1:n())
# plot time 1
p<- ggplot(new.df, aes(x = time1 , y=time1Index, fill = diff)) + geom_tile()
p + expand_limits(x = c(0, 10)) + xlab("") + ylab("")
# calculate nth occurence of time 2 value
new.df2 <- df %>%
group_by(time2) %>%
mutate(time2Index=1:n())
# plot time 2
p2<- ggplot(new.df2, aes(x = time2 , y=time2Index, fill = diff)) + geom_tile()
p2 + expand_limits(x = c(0, 10)) +
xlab("") + ylab("")
Here's an alternative that uses gridExtra if you want an alternative to facet_wrap- otherwise similar to Vlo's use of geom_tile. Used your example data for df:
Libraries:
library(data.table)
library(reshape2)
library(ggplot2)
library(gridExtra)
Convert to a data table, then add y values for time1 and time2 with .N and grouping for each
dt <- as.data.table(df)
dt[, y1 := 1:.N, by = time1][, y2 := 1:.N, by = time2]
Then, make a separate ggplot object for each, with particular scaling and color parameters:
p1 <- ggplot(dt) +
geom_tile(aes(x = time1, y = y1), fill = "white", col = "black") +
coord_cartesian(xlim = c(0, 10), ylim = c(0.5, 4.5), expand = TRUE) +
scale_x_continuous(breaks = 0:10)+
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
plot.margin = unit(c(6,1,1,0.5), "cm"))
p2 <- ggplot(dt) +
geom_tile(aes(x = time2, y = y2, fill = diff), col = "black") +
scale_fill_gradientn(colours = c("#237018", "white", "red4"), values = c(0, 0.8, 1)) +
coord_cartesian(xlim = c(0, 10), ylim = c(0.5, 4.5), expand = TRUE) +
scale_x_continuous(breaks = 0:10) +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
plot.margin = unit(c(6,1,1,0.5), "cm"),
legend.position = c(0, 1.55),
legend.direction = "horizontal")
Then use grid.arrange to plot them adjacent:
grid.arrange(p1, p2, nrow = 1)
Output:
Couldn't quite get the legend right, might need some more work there.

Resources