Set legend according to line colour using ggplot - r

I have a plot as
Using ggplot, how can I set the legend with two labels only, i.e, red lines with name say "prediction intervals" and blue line with name "fit line"
I used following R lines for this
dfs <- data.frame("x"=1:50,"fit" = rnorm(50,30,4),"upper"=rnorm(50,30,4)+15, "lower"=rnorm(50,30,4)-15)
df_melt <- reshape2::melt(dfs,id="x")
g <- ggplot(df_melt,aes(x=x,y=value,colour=variable)) + geom_line(linetype=5)
g <- g + scale_colour_manual(values=c("blue","red","red"))
g <- g + theme_grey(base_size = 16) + theme(axis.text=element_text(colour = "black",size = 12))
g

You can simply add a new column with 2 values ("prediction intervals" or "fit line")
# Add a new group column
df_melt$group[df_melt$variable == "fit"] <- "fit line"
df_melt$group[df_melt$variable != "fit"] <- "prediction intervals"
# don't forget `group = variable`
g <- ggplot(df_melt, aes(x = x, y = value, colour = group, group = variable)) +
geom_line(linetype=5)
g <- g + scale_colour_manual(values = c("blue" ,"red", "red"))
g <- g + theme_grey(base_size = 16) +
theme(axis.text = element_text(colour = "black", size = 12))
g

Related

overlaying points in specific manner in ggplot scatter plots

The colors are added to the ggplot scatter plot based on interaction of two variables : choice and flag (each has two values, therefore, total four combinations). I used faceting based on z value.
library(tidyverse)
x <- runif(10000)
y <- runif(10000)
z <- c(rep(0, 5000), rep(1, 5000))
flag <- c(rep(0, 500), rep(1, 4500), rep(0, 4500), rep(1, 500))
choice <- rep(c(0, 1), 5000)
tbl <- tibble(x, y, z, flag, choice)
scatterplot <- ggplot(tbl,
aes(x = x,
y = y,
color = factor(interaction(choice, flag)))
) +
geom_point(alpha = 0.7,
size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")
scatterplot
But I have the following requirement -
z is used for facetting. For z = i, I want points with flag = i to be above, i.e. in the figure below,
for z = 0, blue points (flag = 0) should be over red/orange points.
for z = 1, red/orange points (flag = 1) should be over blue points (as shown)
If I understand you correctly, you are happy with the lower panel, but you need the blue dots in the top panel to be overlaid on the orange dots (at the moment the orange dots are overlaid on the blue dots in both panels).
If this is the case, then calling geom_point a second time with a subsetted data frame where z == 0 & flag == 0 will overlay the appropriate blue points on the top panel without affecting the lower panel.
tbl <- tbl %>%
mutate(col = interaction(choice, flag))
ggplot(tbl, aes(x, y, color = col)) +
geom_point(alpha = 0.7, size = 2) +
geom_point(data = subset(tbl, z == 0 & flag == 0),
alpha = 0.7, size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")
Consider this as an option for you. With facets it was complex to set specific order but you can do the same plot using patchwork:
library(tidyverse)
library(patchwork)
#Data
x <- runif(10000)
y <- runif(10000)
z <- c(rep(0, 5000), rep(1, 5000))
flag <- c(rep(0, 500), rep(1, 4500), rep(0, 4500), rep(1, 500))
choice <- rep(c(0, 1), 5000)
tbl <- tibble(x, y, z, flag, choice)
Plots:
#Plot
G1 <- ggplot(subset(tbl,z==0),aes(x = x,y = y,
color = factor(interaction(choice, flag),
levels = rev(unique(interaction(choice, flag))),
ordered = T))) +
geom_point(alpha = 0.7,
size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
ggtitle("Scatter plot")+
labs(color='Color',x='')+theme(legend.position = 'none')
#Plot 2
G2 <- ggplot(subset(tbl,z==1),aes(x = x,y = y,
color = factor(interaction(choice, flag)))) +
geom_point(alpha = 0.7,
size = 2) +
scale_color_manual(values = c("blue3", "cyan1", "red3", "orange")) +
facet_grid(z ~ .) +
theme_bw() +
theme(legend.position = "right") +
theme(aspect.ratio = 1) +
labs(color='Color')
Final arrange:
#Final plot
G <- G1/G2
G <- G+plot_layout(guides = 'collect')
Output:

using y-axis values to create secondary x-axis in ggplot2

I would like to create a dot plot with percentiles, which looks something like this-
Here is the ggplot2 code I used to create the dot plot. There are two things I'd like to change:
I can plot the percentile values on the y-axis but I want these
values on the x-axis (as shown in the graph above). Note that
the coordinates are flipped.
The axes don't display label for the
minimum value (for example the percentile axis labels start at 25
when they should start at 0 instead.)
# loading needed libraries
library(tidyverse)
library(ggstatsplot)
# creating dataframe with mean mileage per manufacturer
cty_mpg <- ggplot2::mpg %>%
dplyr::group_by(.data = ., manufacturer) %>%
dplyr::summarise(.data = ., mileage = mean(cty, na.rm = TRUE)) %>%
dplyr::rename(.data = ., make = manufacturer) %>%
dplyr::arrange(.data = ., mileage) %>%
dplyr::mutate(.data = ., make = factor(x = make, levels = .$make)) %>%
dplyr::mutate(
.data = .,
percent_rank = (trunc(rank(mileage)) / length(mileage)) * 100
) %>%
tibble::as_data_frame(x = .)
# plot
ggplot2::ggplot(data = cty_mpg, mapping = ggplot2::aes(x = make, y = mileage)) +
ggplot2::geom_point(col = "tomato2", size = 3) + # Draw points
ggplot2::geom_segment(
mapping = ggplot2::aes(
x = make,
xend = make,
y = min(mileage),
yend = max(mileage)
),
linetype = "dashed",
size = 0.1
) + # Draw dashed lines
ggplot2::scale_y_continuous(sec.axis = ggplot2::sec_axis(trans = ~(trunc(rank(.)) / length(.)) * 100, name = "percentile")) +
ggplot2::coord_flip() +
ggplot2::labs(
title = "City mileage by car manufacturer",
subtitle = "Dot plot",
caption = "source: mpg dataset in ggplot2"
) +
ggstatsplot::theme_ggstatsplot()
Created on 2018-08-17 by the reprex package (v0.2.0.9000).
I am not 100% sure to have understood what you really want, but below is my attempt to reproduce the first picture with mpg data:
require(ggplot2)
data <- aggregate(cty~manufacturer, mpg, FUN = mean)
data <- data.frame(data[order(data$cty), ], rank=1:nrow(data))
g <- ggplot(data, aes(y = rank, x = cty))
g <- g + geom_point(size = 2)
g <- g + scale_y_continuous(name = "Manufacturer", labels = data$manufacturer, breaks = data$rank,
sec.axis = dup_axis(name = element_blank(),
breaks = seq(1, nrow(data), (nrow(data)-1)/4),
labels = 25 * 0:4))
g <- g + scale_x_continuous(name = "Mileage", limits = c(10, 25),
sec.axis = dup_axis(name = element_blank()))
g <- g + theme_classic()
g <- g + theme(panel.grid.major.y = element_line(color = "black", linetype = "dotted"))
print(g)
That produces:
data <- aggregate(cty~manufacturer, mpg, FUN = mean)
data <- data.frame(data[order(data$cty), ], rank=1:nrow(data))
These two lines generate the data for the graph. Basically we need the manufacturers, the mileage (average of cty by manufacturer) and the rank.
g <- g + scale_y_continuous(name = "Manufacturer", labels = data$manufacturer, breaks = data$rank,
sec.axis = dup_axis(name = element_blank(),
breaks = seq(1, nrow(data), (nrow(data)-1)/4),
labels = 25 * 0:4))
Note that here the scale is using rank and not the column manufacturer. To display the name of the manufacturers, you must use the labels property and you must force the breaks to be for every values (see property breaks).
The second y-axis is generated using the sec.axis property. This is very straight-forward using dup_axis that easily duplicate the axis. By replacing the labels and the breaks, you can display the %-value.
g <- g + theme(panel.grid.major.y = element_line(color = "black", linetype = "dotted"))
The horizontal lines are just the major grid. This is much easier to manipulate than geom_segments in my opinion.
Regarding your question 1, you can flip the coordinates easily using coord_flip, with minor adjustments. Replace the following line:
g <- g + theme(panel.grid.major.y = element_line(color = "black", linetype = "dotted")
By the following two lines:
g <- g + coord_flip()
g <- g + theme(panel.grid.major.x = element_line(color = "black", linetype = "dotted"),
axis.text.x = element_text(angle = 90, hjust = 1))
Which produces:
Regarding your question 2, the problem is that the value 0% is outside the limits. You can solve this issue by changing the way you calculate the percentage (starting from zero and not from one), or you can extend the limit of your plot to include the value zero, but then no point will be associated to 0%.

Legend does not fit well ggplot density

I am doing a plot of densities, I want to add a legend but is overlapped with the symbol. The code is hereunder:
dfGamma = data.frame(a = rgamma(100,shape = 7.1,rate= 0.0055),
b = rgamma(100, shape = 10,rate= 0.0055),
c = rgamma(100, shape = 7.1,rate= 0.0055))
dfGamma = stack(dfGamma)
p <- ggplot(dfGamma, aes(x = values)) +
stat_density(aes(group = ind, colour = ind),position="identity",geom="line",size=1)+
ggtitle("Gamma distribution")+theme(legend.position="right")+
scale_color_manual(labels = c(expression(paste(alpha,"=7.1 ",beta,"=0.0055")),
expression(paste(alpha,"= 10 ",beta,"=0.0055")),
expression(paste(alpha,"=7.1 ",beta,"=0.0055"))),
values = c('red', 'blue',"green"))
p
the plot is:
The guides option, guide_legend is what you need. You can read more about it in the ggplot reference. Does this help?
p <- ggplot(dfGamma, aes(x = values)) +
stat_density(aes(group = ind, colour = ind),position="identity",geom="line",size=1)+
ggtitle("Gamma distribution")+
theme(legend.position="right") +
scale_color_manual(labels = c(expression(paste(alpha, "=7.1 ", beta, "=0.0055")),
expression(paste(alpha,"= 10 ",beta,"=0.0055")),
expression(paste(alpha,"=7.1 ",beta,"=0.0055"))),
values = c('red', 'blue',"green")) +
guides(colour = guide_legend(label.position = "bottom"))
p

How to make a sorted geom_bar() ggplot [duplicate]

This question already has answers here:
Order discrete x scale by frequency/value
(7 answers)
Closed 5 years ago.
My dataframe is called:
d3with variable names : course_name,id,total_enrolled,total_capacity
I did:
d3a <- head(d3[order(d3$total_capacity, decreasing = T),], 15)
d3.plottable <- d3a[, c(1,3,4)]
d3.plottable <- melt(d3.plottable, id.vars = "course_name")
library(ggplot2)
g <- ggplot(d3.plottable, aes(x = course_name, y = value))
g + geom_bar(aes(fill = variable), position = position_dodge(), stat = "identity") +
coord_flip() + theme(legend.position = "top")
g <- g + labs(x = "Course Name")
g <- g+ labs(y = "Number of Students")
g
And what I get is this:
No matter what I do I can't sort the orange bar in descending order.
Is there a way to do that? I would like to sort on the variable total_enrolled.
PS:I apologize for the badly formatted code,I am still figuring out stackoverflow.
Here is a an example redefining the order of the factor levels.
Note, since you don't provide sample data I will simulate some data.
# Sample data
set.seed(2017);
df <- cbind.data.frame(
course_name = rep(LETTERS[1:6], each = 2),
value = sample(300, 12),
variable = rep(c("total_enrolled", "total_capacity"), length.out = 12)
);
# Relevel factor levels, ordered by subset(df, variable == "total_enrolled")$value
df$course_name <- factor(
df$course_name,
levels = as.character(subset(df, variable == "total_enrolled")$course_name[order(subset(df, variable == "total_enrolled")$value)]));
# Plot
require(ggplot2);
g <- ggplot(df, aes(x = course_name, y = value))
g <- g + geom_bar(aes(fill = variable), position = position_dodge(), stat = "identity");
g <- g + coord_flip() + theme(legend.position = "top");
g <- g + labs(x = "Course Name")
g <- g + labs(y = "Number of Students")
g;

qplot (ggplot2): plot of more functions with the same color

I'm plotting 11 curves and the program bellow works well. BUT I'm not able two change the wild colors to plot 11 black curves:
library(ggplot2)
#library(latex2exp)
library(reshape)
fn <- "img/plot.eps"
fct1 <- function(x0 ){
return(1/sin(x0)+1/tan(x0))
}
fct2 <- function(beta, t ){
return(2*atan(exp(t)/beta))
}
t<-seq(from=0,to=10,by=0.01)
s1<-cbind(t, fct2(fct1(-pi+0.0001),t),
fct2(fct1(-1.5),t),
fct2(fct1(-0.5),t),
fct2(fct1(-0.05),t),
fct2(fct1(-0.01),t),
fct2(fct1(0),t),
fct2(fct1(0.01),t),
fct2(fct1(0.05),t),
fct2(fct1(0.5),t),
fct2(fct1(1.5),t),
fct2(fct1(pi),t))
colnames(s1)<-c("time","y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")
s2 <- melt(as.data.frame(s1), id = "time")
q <- ggplot(s2, aes(x = time, y = value, color = variable))
q <- q + geom_line() + ylab("y") + xlab("t")+ ylab("x(t)")+
theme_bw(base_size = 7) + guides(colour = FALSE)
ggsave(file = fn, width = 2, height = 1)
q
EDIT Now the code should be reproducible
You need to map the variable to the grouping, and it will produce black lines by default.
q <- ggplot() +
geom_line(data = s2, aes(x = time, y = value,
group = variable)) +
xlab("t")+ ylab("x(t)") +
theme_bw(base_size = 7) + guides(colour = FALSE)
q
To be perfectly clear, it is possible to map the color to the variable, which can produce black lines, but not without changing the legend. Here is how you would amend the colors after the fact, if you wanted to, having already mapped the color to the variable.
q <- ggplot() +
geom_line(data = s2, aes(x = time, y = value,
color = variable)) +
xlab("t")+ ylab("x(t)") +
theme_bw(base_size = 7) + guides(colour = FALSE) +
scale_color_manual(values = rep("black",11))
q

Resources