I want to plot quarterly data, where the information is saved in the variable yq as: YYYY Q (e..g 2007 Q1), I created by combining Year and Quarter (1, 2, 3, 4):
df$yq = as.yearqtr(paste(df$Year, df$Quarter), "%Y %q")
Looking around on the examples online, I wrote this code but the problem is that it does not let me specify the years I want to display on the x-axis - or rather I can't seem to find a way to apply the breaks for this type of data. I have 15 years of data and would like to only display the label every 3 years, so that the only labels are 2005.Q1, 2008.Q1, 2011.Q1, 2014.Q1, 2017.Q1.
Grateful for tips!
plot = ggplot(df, aes(x = yq, y = rate, color = "red")) +
geom_point() +
geom_line(aes(y = lin, color = "green"), size = 1) +
geom_line(aes(y = quad, color = "brown"), size = 1) +
geom_line(aes(y = cube, color = "blue"), size = 1) +
geom_line(aes(y = log, color = "pink"), size = 1) +
geom_line(aes(y = power_corr, color = "black"), size = 1) +
geom_line(aes(y = exp_corr, color = "yellow"), size = 1) +
scale_color_identity(name = "Models",
breaks = c("red", "green", "brown", "blue", "pink", "black", "yellow"),
labels = c("Observed", "Linear", "Quadratic", "Cubic", "Log", "Power", "Exponential"),
guide = "legend")
plot +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))+
theme(axis.text.x = element_text(size = 8, angle=-45, hjust = 0.001),
legend.key = element_rect(color = NA, fill = NA))+
labs(title="", x = "Year/quarters", y = "Smoking prevalence")+
scale_x_yearqtr(format='%Y.Q%q')
The data was not provided so we will use a simple example of x and y vectors and use scale_x_yearqtr with the indicated breaks and format arguments.
library(ggplot2)
library(zoo)
# inputs
x <- as.yearqtr(2000) + 0:29
y <- 1:30
qplot(x, y) +
scale_x_yearqtr(breaks = seq(min(x), max(x), by = 3), format = "%Y.Q%q")
Related
Below is a simple bubble plot for three character traits (Lg_chr, Mid_chr, and Sm_chr) across three locations.
All good, except that because the range of Lg_chr is several orders of magnitude larger than the ranges for the other two traits, it swamps out the area differences between the smaller states, making the differences very difficult to see - for example, the area of the points for for Location_3's Mid_chr (70) and Sm_chr (5), look almost the same.
Is there a way to set a conditional size scale based on name in ggplot2 without having to facit wrap them? Maybe a conditional statement for scale_size_continuous(range = c(<>, <>)) separately for Lg_chr, Mid_chr, and Sm_chr?
test_df = data.frame(lg_chr = c(100000, 150000, 190000),
mid_chr = c(50, 90, 70),
sm_chr = c(15, 10, 5),
names = c("location_1", "location_2", "location_3"))
#reformat for graphing
test_df_long<- test_df %>% pivot_longer(!names,
names_to = c("category"),
values_to = "value")
#plot
ggplot(test_df_long,
aes(x = str_to_title(category),
y = str_to_title(names),
colour = str_to_title(names),
size = value)) +
geom_point() +
geom_text(aes(label = value),
colour = "white",
size = 3) +
scale_x_discrete(position = "top") +
scale_size_continuous(range = c(10, 50)) +
scale_color_manual(values = c("blue", "red",
"orange")) +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank()) ```
Edit:
You could use ggplot_build to manually modify the point layer [[1]] to specify the sizes of your points like this:
#plot
p <- ggplot(test_df_long,
aes(x = str_to_title(category),
y = str_to_title(names),
colour = str_to_title(names),
size = value)) +
geom_point() +
geom_text(aes(label = value),
colour = "white",
size = 3) +
scale_x_discrete(position = "top") +
scale_color_manual(values = c("blue", "red",
"orange")) +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank())
q <- ggplot_build(p)
q$data[[1]]$size <- c(7,4,1,8,5,2,9,6,3)*5
q <- ggplot_gtable(q)
plot(q)
Output:
You could use scale_size with a log10 scale to make the difference more visuable like this:
#plot
ggplot(test_df_long,
aes(x = str_to_title(category),
y = str_to_title(names),
colour = str_to_title(names),
size = value)) +
geom_point() +
geom_text(aes(label = value),
colour = "white",
size = 3) +
scale_size(trans="log10", range = c(10, 50)) +
scale_x_discrete(position = "top") +
scale_color_manual(values = c("blue", "red",
"orange")) +
labs(x = NULL, y = NULL) +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank())
Output:
I have the following plot:
ggplot() +
geom_pointrange(data=data_FA, mapping=aes(x=snr, y=median, ymin=p25, ymax=p75, colour=factor(method), group=method), position = pd) +
geom_hline(yintercept=FA_GT, linetype="dashed", color = "blue") +
theme(legend.title = element_blank(), legend.position = "none", panel.border = element_rect(colour = "gray", fill=NA, size=1),
plot.margin = unit( c(0,0.5,0,0) , units = "lines" )) +
labs( title = "", subtitle = "")
obtained from the following dataset:
For each group (red and blue) codified by the factor method, I want to see red/blue dots and lines with different transparency according to the factor subset. Does anyone know how to do that? In addition, how can I add more separation space between the two groups (red and blue)?
Thank you!
You can just map alpha to subset inside aes:
ggplot(data_FA) +
geom_pointrange(aes(snr, median, ymin = p25, ymax = p75,
colour = factor(method), group = method,
alpha = subset),
position = pd) +
geom_hline(yintercept = FA_GT, linetype = "dashed", color = "blue") +
scale_alpha_manual(values = c(0.3, 1)) +
theme_bw() +
theme(legend.position = 'none',
panel.border = element_rect(colour = "gray", fill = NA, size = 1),
plot.margin = unit( c(0,0.5,0,0), units = "lines" )) +
labs(title = "", subtitle = "")
Data
data_FA <- data.frame(X = c("X1", "X1.7", "X1.14", "X1.21"),
snr = "snr10",
subset = c("full", "full", "subset5", "subset5"),
method= c("sc", "trunc", "sc", "trunc"),
median = c(0.4883985, 0.4883985, 0.4923685, 0.4914260),
p25 = c(0.4170183, 0.4170183, 0.4180174, 0.4187472),
p75 = c(0.5617713, 0.5617713, 0.5654203, 0.5661565))
FA_GT <- 0.513
pd <- position_dodge2(width = 1)
I'm trying to plot a 2D density plot with ggplot, with added marginal histograms. Problem is that the polygon rendering is stupid and needs to be given extra padding to render values outside your axis limits (e.g. in this case I set limits between 0 and 1, because values outside this range have no physical meaning). I still want the density estimate though, because often it's much cleaner than a blocky 2D heatmap.
Is there a way around this problem, besides scrapping ggMarginal entirely and spending another 50 lines of code trying to align histograms?
Unsightly lines:
Now rendering works, but ggMarginal ignores choord_cartesian(), which demolishes the plot:
Data here:
http://pasted.co/b581605a
dataset <- read.csv("~/Desktop/dataset.csv")
library(ggplot2)
library(ggthemes)
library(ggExtra)
plot_center <- ggplot(data = dataset, aes(x = E,
y = S)) +
stat_density2d(aes(fill=..level..),
bins= 8,
geom="polygon",
col = "black",
alpha = 0.5) +
scale_fill_continuous(low = "yellow",
high = "red") +
scale_x_continuous(limits = c(-1,2)) + # Render padding for polygon
scale_y_continuous(limits = c(-1,2)) + #
coord_cartesian(ylim = c(0, 1),
xlim = c(0, 1)) +
theme_tufte(base_size = 15, base_family = "Roboto") +
theme(axis.text = element_text(color = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.text = element_text(size = 12, family = "Roboto"),
legend.title = element_blank(),
legend.position = "none")
ggMarginal(plot_center,
type = "histogram",
col = "black",
fill = "orange",
margins = "both")
You can solve this problem by using xlim() and ylim() instead of coord_cartesian.
dataset <- read.csv("~/Desktop/dataset.csv")
library(ggplot2)
library(ggthemes)
library(ggExtra)
plot_center <- ggplot(data = dataset, aes(x = E,
y = S)) +
stat_density2d(aes(fill=..level..),
bins= 8,
geom="polygon",
col = "black",
alpha = 0.5) +
scale_fill_continuous(low = "yellow",
high = "red") +
scale_x_continuous(limits = c(-1,2)) + # Render padding for polygon
scale_y_continuous(limits = c(-1,2)) + #
xlim(c(0,1)) +
ylim(c(0,1)) +
theme_tufte(base_size = 15, base_family = "Roboto") +
theme(axis.text = element_text(color = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.text = element_text(size = 12, family = "Roboto"),
legend.title = element_blank(),
legend.position = "none")
ggMarginal(plot_center,
type = "histogram",
col = "black",
fill = "orange",
margins = "both")
I need to create a plot, in which a histogram gets overlayed by a density. Here is my result so far using some example data:
library("ggplot2")
set.seed(1234)
a <- round(rnorm(10000, 5, 5), 0)
b <- rnorm(10000, 5, 7)
df <- data.frame(a, b)
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., col = "histogram", linetype = "histogram"), fill = "blue") +
stat_density(aes(x = b, y = ..density.., col = "density", linetype = "density"), geom = "line") +
scale_color_manual(values = c("red", "white"),
breaks = c("density", "histogram")) +
scale_linetype_manual(values = c("solid", "solid")) +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.text = element_text(size = 15))
Unfortunately I can not figure out how I can change the symbols in the legend properly. The first symbol should be a relatively thick red line and the second symbol should be a blue box without the white line in the middle.
Based on some internet research, I tried to change different things in scale_linetype_manual and further I tried to use override.aes, but I could not figure out how I would have to use it in this specific case.
EDIT - Here is the best solution based on the very helpful answers below.
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., linetype = "histogram"),
fill = "blue",
# I added the following 2 lines to keep the white colour arround the histogram.
col = "white") +
scale_linetype_manual(values = c("solid", "solid")) +
stat_density(aes(x = b, y = ..density.., linetype = "density"),
geom = "line", color = "red") +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.text = element_text(size = 15),
legend.key = element_blank()) +
guides(linetype = guide_legend(override.aes = list(linetype = c(1, 0),
fill = c("white", "blue"),
size = c(1.5, 1.5))))
As you thought, most of the work can be done via override.aes for linetype.
Note I removed color from the aes of both layers to avoid some trouble I was having with the legend box outline. Doing this also avoids the need for the scale_*_* function calls. To set the color of the density line I used color outside of aes.
In override.aes I set the linetype to be solid or blank, the fill to be either white or blue, and the size to be 2 or 0 for the density box and histogram box, respectively.
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., linetype = "histogram"), fill = "blue") +
stat_density(aes(x = b, y = ..density.., linetype = "density"), geom = "line", color = "red") +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.text = element_text(size = 15),
legend.key = element_blank()) +
guides(linetype = guide_legend(override.aes = list(linetype = c(1, 0),
fill = c("white", "blue"),
size = c(2, 0))))
The fill and colour aesthetics are labelled by histogram and density respectively, and their values set using scale_*_manual. Doing so maps directly to the desired legend without needing any overrides.
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., fill = "histogram")) +
stat_density(aes(x = b, y = ..density.., colour="density"), geom = "line") +
scale_fill_manual(values = c("blue")) +
scale_colour_manual(values = c("red")) +
labs(fill="", colour="") +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.box.just = "left",
legend.background = element_rect(fill=NULL),
legend.key = element_rect(fill=NULL),
legend.text = element_text(size = 15))
This question is a follow-up to this post: previous post
I have 28 variables, M1, M2, ..., M28, for which I compute certain statistics x and y.
library(ggplot2)
df = data.frame(model = factor(paste("M", 1:28, sep = ""), levels=paste("M", 1:28, sep = "")), a = runif(28, 1, 1.05), b = runif(28, 1, 1.05))
levels = seq(0.8, 1.2, 0.05)
Here is the plot:
ggplot(data=df) +
geom_polygon(aes(x=model, y=a, group=1), color = "black", fill = NA) +
geom_polygon(aes(x=model, y=b, group=1), color = "blue", fill = NA) +
coord_polar() +
scale_y_continuous(limits=range(levels), breaks=levels, labels=levels) +
theme(axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank())
I would like to add a point the the plot, with y-value = 1 for M1 (model1). I tried adding:
geom_point(aes(y = 1, x = "M1"), color = "red", cex = 0.5)
but it doesn't work. Any idea what I am doing wrong?
Thanks for your help!
cex is not an argument for geom_point. Try size, e.g.
geom_point(aes(y = 1, x = "M1"), color = "red", size = 10)