Adding ggplot2 legend - r

I've been trying to find an answer to my problem but I couldn't solve it with what I found in the forum. I know the key it's do the mapping right (or at least that is what I understood from previous msgs).
Here is my code:
dat <- data.frame(
Individuals = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
Year = c(0, 5, 0, 0, 0, 0, 8, 0, 0, 3),
end = c(15, 10, 15, 6, 10, 8, 15, 6, 9, 5))
Person_time_R <- ggplot(dat) +
geom_segment(aes(x=Year, y=Individuals, xend=end, yend=Individuals),
color=c("blue","red","red","blue","red","red","blue","red","red","red"),
size=2) +
scale_y_reverse() +
ggtitle("Person-time") +
xlab("Years") +
ylab("Individuals") +
theme(
plot.title = element_text(hjust = 0.5, size=26, face="bold"),
axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20)
) +
scale_y_discrete(limits=c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +
scale_x_continuous(limits = c(0,16)) +
scale_x_discrete(limits=c( 0, 1, 3, 5, 7, 9, 11, 13, 15))
I would like to have a legend to separate the "red" and "blue" lines... How could I do that?

To show the color as a legend, you can add a column showing the type, then mapped to the aes in geom_segment. Finally, use scale_color_manual to specify name and color.
dat$Type <- c(1, 2, 2, 1, 2, 2, 1, 2, 2, 2)
ggplot(dat) +
geom_segment(aes(x=Year, y=Individuals, xend=end, yend=Individuals, colour = factor(Type)),
size = 2) +
scale_color_manual(values = c("blue", "red"), name = "Type")

Related

How to add points to a single line amongst many lines in ggplot?

gfg_data <- data.frame(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
y1 = c(1.1, 2.4, 3.5, 4.1, 5.9, 6.7,
7.1, 8.3, 9.4, 10.0),
y2 = c(7, 5, 1, 7, 4, 9, 2, 3, 1, 4),
y3 = c(5, 6, 4, 5, 1, 8, 7, 4, 5, 4),
y4 = c(1, 4, 8, 9, 6, 1, 1, 8, 9, 1),
y5 = c(1, 1, 1, 3, 3, 7, 7, 10, 10, 10))
gfg_plot <- ggplot(gfg_data, aes(x)) +
geom_line(aes(y = y1), color = "black") +
geom_line(aes(y = y2), color = "red") +
geom_line(aes(y = y3), color = "green") +
geom_line(aes(y = y4), color = "blue") +
geom_line(aes(y = y5), color = "purple")
I would like to add points to the red line, but simply adding geom_point() after it does not seem to work. Is there any workaround?

Changing colours of grouped bar chart in ggplot2

For a sample dataframe:
df <- structure(list(year = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4), imd.quintile = c(1, 2, 3, 4, 5, 1, 2, 3,
4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5), average_antibiotic = c(1.17153515458827,
1.11592565388857, 1.09288449967773, 1.07442652168281, 1.06102887394413,
1.0560582933182, 1.00678980505929, 0.992997489072538, 0.978343676071694,
0.967900478870214, 1.02854157116164, 0.98339099101476, 0.981198852494798,
0.971392872980818, 0.962289579742817, 1.00601488964457, 0.951187417739673,
0.950706064156994, 0.939174499710836, 0.934948233015044)), .Names = c("year",
"imd.quintile", "average_antibiotic"), row.names = c(NA, -20L
), vars = "year", drop = TRUE, class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
I am producing a graph detailing the differences in antibiotic prescribing BY imd.decile BY year:
ggplot(plot_data.quintiles) +
geom_col(aes(x = year, y = average_antibiotic, group=imd.quintile, fill=imd.quintile), position = "dodge") +
ylab("Antibiotic STAR-PU") +
xlab("Year") +
theme_bw() +
ylim(0, 1.5)+
scale_colour_brewer("clarity")
The blue colour choice isn't to my taste, as the differences between the imd.quintiles isn't very distinctive. I have read various posts, here, here and here, but none of which seem to answer my question.
I attempted to use the 'clarity' colours to get a wider range of colour choices. How can I correctly change the fill colour in my ggplot2 graph? what options do I have?
Is this what you want? Use factor(imd.quintile) to create discrete (categorical) data otherwise ggplot will treat numeric/integer imd.quintile as continuous.
df <- data.frame(
year = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4,
4, 4),
imd.quintile = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3,
4, 5),
average_antibiotic = c(1.17153515458827, 1.11592565388857, 1.09288449967773,
1.07442652168281, 1.06102887394413, 1.0560582933182,
1.00678980505929, 0.992997489072538, 0.978343676071694,
0.967900478870214, 1.02854157116164, 0.98339099101476,
0.981198852494798, 0.971392872980818,
0.962289579742817, 1.00601488964457, 0.951187417739673,
0.950706064156994, 0.939174499710836, 0.934948233015044)
)
library(ggplot2)
p1 <- ggplot(df) +
geom_col(aes(
x = year, y = average_antibiotic,
group = imd.quintile, fill = factor(imd.quintile)), position = "dodge") +
ylab("Antibiotic STAR-PU") +
xlab("Year") +
theme_bw() +
ylim(0, 1.5)
p1 +
scale_fill_brewer(palette = "Set2") # use scale_fill_xxx to chose the desired color palette
If you prefer continuous (sequential) colormaps, viridis or scico are good options:
p1 +
scale_fill_viridis_c(option = 'E', direction = -1)
# install.packages('scico')
library(scico)
p1 +
scale_fill_scico()
Created on 2018-11-29 by the reprex package (v0.2.1.9000)
scale_####_brewer uses palettes from RColorBrewer, there's no palette called "Clarity".
Use RColorBrewer::display.brewer.all() to see what palette's are available, then call them by name with the palette arg. Also you need to change the imd.quintile variable to be either character or factor. You're mapping your aesthetics by fill also, not colour, so you need to use scale_fill_brewer.
ggplot(df) +
geom_col(aes(x = year, y = average_antibiotic, group=imd.quintile, fill=imd.quintile), position = "dodge") +
ylab("Antibiotic STAR-PU") +
xlab("Year") +
theme_bw() +
ylim(0, 1.5) +
scale_fill_brewer(palette = "Spectral")

Extra spaces between non latin characters in plots in R

When I use non-latin (Russian) characters in ggplot legend, the legend line becomes much longer. This example code shows the problem.
test <- data.frame(x = c(1, 2, 3, 4, 5, 6, 7, 8), y = c(1, 4, 9, 16, 25, 36, 25, 16), label = c('green / зеленый ', 'зеленый', 'red', 'white', 'yellow', 'pink', 'красный', 'синий' ))
ggplot(data = test, aes(x= x, y=y)) + geom_point(aes(colour = label), size = 20) + theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 2))
Does anyone know the way to cure it? Thanks in advance.

geom_vline Median not consistant with True Median of Vector

I have a vector
var = c(5, 3, 6, 0, 1, 1, 1, 0, 4, 2, 1, 3, 3, 6, 3, 15, 1, 0, 2, 3,
1, 0, 0, 3, 2, 3, 2, 2, 2, 4, 4, 0, 1, 0, 2, 2, 5, 3, 3, 1, 0,
1, 1, 6, 4, 3, 0, 7, 4, 2, 3, 3, 0, 1, 1, 3, 4, 5, 2, 1, 3, 10,
13, 3, 1, 4, 5, 3, 1, 1, 5, 4, 2, 1, 6, 1, 2, 3, 5, 8, 3, 1,
7, 4, 0, 1, 7, 1, 3, 4, 3, 5, 3, 2, 1, 1, 9, 2, 0, 4, 3, 5)
I am plotting the distribution of the histogram using ggplot and drawing a vertical line of the median. The median of var equals 3(doubled checked using python numpy)
groupMedian <- median(var)
print(groupMedian)
df <- data.table(x = var)
df <- df[, .N, by=x]
df$x <- factor(df$x, levels=c(0:25))
p <- ggplot(df, aes(x=x, y=N)) +
geom_bar(stat="identity", width=1.0,
colour = "darkgreen",
fill = 'lightslateblue')
p <- p + labs(title = "Var Histogram",
x = "x",
y = "Frequency") +
scale_x_discrete(drop=FALSE) +
geom_vline(xintercept=groupMedian,
colour = 'red', size = 2)
p = p + coord_cartesian(ylim=c(0, 50)) +
scale_y_continuous(breaks=seq(0, 50, 2))
p = p + theme(panel.grid.major =
element_line(colour = "black", linetype = "dotted") )
ggsave("barplot.png", p, width=8, height=4, dpi=120)
print(p)
The median is 3 but the line is placed at 2.
I also tried using
p = p+ geom_vline(data=var,
aes(xintercept = median),
colour = 'red', size = 2 )
You can let ggplot2 do the aggregation for you and just you geom_histogram() instead. This seems to provide the gist of what you're after:
#load data.table
library(data.table)
df <- data.table(x = var)
groupMedian <- median(var)
ggplot(df, aes(x)) +
geom_histogram(binwidth = 1,
colour = "darkgreen",
fill = "lightslateblue",
origin = -0.5) + #this effectively centers the x-axis under the bins
geom_vline(xintercept = groupMedian,
colour = "red",
size = 2) +
scale_x_continuous(breaks = seq(0,25),
limits = c(0,25))
Giving you something like this:

Linetype and Guide options for ggplot2 when using geom_smooth with continuous variables

I am using ggplot2 and the geom_smooth functions to make predicted lines from a lm model. I am using continuous y and x functions in my ggplot code.
I am trying to create a different line type for each line in the plot (which is the variable "location" ranging from -1,0, or 1), and I would like each location listed in the guide (where -1 = Location A, 0 = Location B, and 1 = Location C). A replicable excerpt of my code is below:
#test scores that range from 1 to 7
score <- c(6, 3, 5, 6, 7, 2, 4, 6, 3, 5, 4, 3, 3, 1, 3, 3, 3, 5, 2, 3, 2, 2, 7, 3, 7, 5, 4, 1, 3, 2, 7, 6, 6, 3, 6)
#location of the school
location <- c(1, 0, 0, 0, 1, 1, 1, -1, 1, -1, 0, -1, 0, 1, 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 1, 0)
IQ <- c(0.7171425604, 0.7056850461, 1.3929736220, 0.0633936624, -0.6872828336, -1.3665840767, 1.4368569944, 0.7297599487, -0.5735047485, -0.6752912747, -0.6213572428, -0.6110533924, -0.7090921238, 0.0501744806, 1.3916802944, -0.0055243194, 1.3619753292, 1.4406369365, 0.6529601586, 0.0538097896, 1.3821853866, 1.3870600993, 0.0040551996, 0.6600558495, 1.3550162100, 1.3081187951, -1.7541949601, 1.3768167017, -0.6232446826, -1.2793074919, 0.0560708725, -0.5993356051, -0.5857733192, -0.6005459705, -0.6659873442)
df <- data.frame(score, location, IQ)
p1 <- ggplot(data=df,aes(y=score,x=IQ,shape=factor(location))) +
geom_smooth(method = "lm", se=F) + scale_y_continuous("Test Score",limits=c(1,7)) +
scale_x_continuous("IQ level", limits=c(-2.45, 1.45)) +
theme_bw() +
theme(axis.text.x=element_text(size=rel(1.2), color = 'black'),
axis.title.x=element_text(size=rel(1.3)),
axis.title.y=element_text(size=rel(1.3)),
axis.text.y=element_text(size=rel(1.2), color = 'black'),
panel.grid.minor=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.major.y=element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
plot.title = element_text(size = rel(1.4)))
print(p1)
Whenever I try to add linetype or the manual linetype, I keep getting errors about how linetype cannot be mapped with continuous variable (only discrete). Is there any way around this or should I be using a different function? Maybe create labels for the values in location (School 1, School 2, and School 3?) beforehand? Or should I be using some alternative to linetype? I cannot find a solution online or in Wickham's book. Thank you!
You are getting this error message because location is continuous variable (numeric variables are treated as continuous in ggplot) and for linetype= only discrete variables can be used. To use it for linetype= convert location to factor. To change labels in legend use scale_linetype() and provide argument labels=.
ggplot(data=df,aes(y=score,x=IQ,linetype=factor(location))) +
geom_smooth(method = "lm", se=F) + scale_y_continuous("Test Score",limits=c(1,7)) +
scale_x_continuous("IQ level", limits=c(-2.45, 1.45)) +
scale_linetype(labels=c("Location A","Location B","Location C"))

Resources