I am working on the Boston data set and trying to see where the 8 room data are on each graph. I have commented out the line which is giving me the error.
I have to add vertical lines at all the points corresponding to rm = 8, to see the spread of data, in every graph of the grid. I want to know:
1. what I have done wrong.
2. A better way to find/represent data points where rm = 8.
library(ggplot2)
library(reshape2)
library(MASS)
library(data.table)
data("Boston")
Boston <- as.data.table(Boston)
molten_boston <- Boston[, `:=`(rm = round(rm),
nox = nox * 100,
chas = chas * 10)]
molten_boston <- melt(data = molten_boston, id.vars = "rm")
comments_bar <- ggplot(molten_boston) +
geom_bar(binwidth = 1, aes(x = value), color = "black", fill = "salmon") +
# geom_vline(data = molten_boston[rm == 8, .SD, by = variable, .SDcols = "value"], aes(xintercept = value)) +
facet_wrap(~ variable, scales = "free")
print(comments_bar)
One other visualization would be stacked bars, it looks ok when large:
molten_boston$EightRooms <- as.factor(molten_boston$rm == 8)
molten_boston$EightRooms <- relevel(molten_boston$EightRooms, 2)
ggplot(molten_boston, aes(x = value, fill = EightRooms)) +
geom_bar(binwidth = 1, color = "black") +
facet_wrap(~ variable, scales = "free")
Using a density plot in the background would be nice, but is a bit tricky in this case because of the changing y-axis. You probably have to do some pre-calculation. Here's my best attempt:
ggplot(molten_boston, aes(x = value)) +
geom_density(data = subset(molten_boston, rm == 8), aes(y =..density.. * 300),
fill = 'blue', alpha = 0.5) +
geom_bar(binwidth = 1, color = "black", fill = "salmon", alpha = 0.5) +
facet_wrap(~ variable, scales = "free")
Another way to do this would be using a rug-plot, with the rugs at the top. This is more or less the same as using geom_vline(...) but the lines don't extend all the way down, obscuring the bars. Also, I don't see why you want to use binwidth=1.
ggplot(molten_boston) +
geom_bar(aes(x = value), color = "grey50", fill = "salmon") +
geom_rug(data=molten_boston[rm==8,value, by=variable],
aes(x=value), sides="t", color="blue") +
facet_wrap(~ variable, scales = "free")
I don't have package data.table, so I can't tell if the problem lies in the data.table part of the code or not. But you need a single value for each room size, so
Boston$rm = round(Boston$rm)
molten_boston <- melt(data =Boston, id.vars = "rm")
rm.means = aggregate.data.frame(molten_boston$value,by=molten_boston[,1:2],FUN=mean)
comments_bar <- ggplot(molten_boston) +
geom_bar(binwidth = 1, aes(x = value), color = "black", fill = "salmon") +
geom_vline(data = rm.means[rm.means$rm==8,], aes(xintercept = x)) +
facet_wrap(~ variable, scales = "free")
print(comments_bar)
seems to work.
Related
I'm trying to control the color of two separate calls to geom_crosbar, using green for the first plot, and blue for the second plot. However, I get the warning from the second geom_crossbar call Scale for 'fill' is already present:
Warning: Adding another scale for 'fill', which will replace the existing
scale.
Here's an example of my code:
my.data %>%
ggplot(aes(site, npp_nofert)) +
geom_crossbar(aes(ymin=npp_nofert-npp.sd_nofert,ymax=npp_nofert+npp.sd_nofert,
fatten=1.0,fill=period),position='dodge', alpha=0.5) +
scale_fill_brewer(palette="Greens") +
#labs(y=expression(paste("MMM %",Delta," (+/- 1",sigma,")")), x="", fill="", title="") + theme_bw() +
labs(y="",x="", fill="", title="") + theme_bw() +
theme(legend.key.size=unit(1.0,"cm"),legend.direction="horizontal",legend.position=c(0.3,0.05),
axis.text.x=element_blank(),axis.ticks.x=element_blank(),
plot.title=element_text(size=12,margin=margin(t=5,b=-20)), legend.spacing=unit(0,"cm"),
text = element_text(size=15)) +
new_scale_fill() +
geom_crossbar(aes(ymin=npp_fert-npp.sd_fert,ymax=npp_fert+npp.sd_fert, fatten=1.0,fill=period),
position='dodge',alpha=0.5) +
scale_fill_brewer(palette="Blues")
And example output:
Unfortunately, I cannot dput() the data as I do not have permission to do that.
How can I set the first plot to green and second to blue? Also, just noticed the call to alpha is in the legend. How to remove that?
Notes: The 1980 to 1999 period, there is only a single plot (i.e., no treatment), so there will not be overlaying plots for that period. The x axis represents study sites, I can fix the labels later.
The general way to go about this would be to use the ggnewscale package, which allows you to 'reset' an aesthetic at some point in the plotting process.
Since there is no data to use, I'll make up some dummy data that has a vague semblance to what you're showing above.
library(ggplot2)
library(ggnewscale)
df <- data.frame(
x = 1:5,
blue_low = 1:5,
blue_mid = 2:6,
blue_high = 3:7,
green_low = 0:4,
green_mid = 2:6,
green_high = 4:8
)
ggplot(df, aes(x = 1, group = x)) +
geom_crossbar(aes(ymin = green_low, y = green_mid, ymax = green_high,
fill = as.factor(x)),
position = "dodge", alpha = 0.5) +
scale_fill_brewer(palette = "Greens") +
new_scale_fill() + # Important to put this after you defined the first scale
geom_crossbar(aes(ymin = blue_low, y = blue_mid, ymax = blue_high,
fill = paste0(x, "_blue")), # paste to differentiate scale
position = "dodge", alpha = 0.5) +
scale_fill_brewer(palette = "Blues")
Created on 2020-06-18 by the reprex package (v0.3.0)
I'm sure it won't be too difficult to take the new_scale_fill() and put it in the correct position in your plotting code, which I think is after scale_fill_brewer(palette="Greens").
So I've decided that the approach I was using for the plot looks terrible. A better solution, IMO, is to use geom_crossbar with geom_pointrange.
Here's an example using the data that teubrand provided:
library(ggplot2)
library(ggnewscale)
df <- data.frame(
x = 1:5,
blue_low = 1:5,
blue_mid = 2:6,
blue_high = 3:7,
green_low = 0:4,
green_mid = 2:6,
green_high = 4:8
)
ggplot(df, aes(x = 1, group = x)) +
geom_crossbar(aes(ymin = green_low, y = green_mid, ymax = green_high,
fill = as.factor(x)),
position = "dodge", alpha = 0.8) +
scale_fill_brewer(palette = "Greens") +
new_scale_fill() + # Important to put this after you defined the first scale
geom_pointrange(aes(ymin = blue_low, y = blue_mid, ymax = blue_high,
fill = as.factor(x)), # paste to differentiate scale
position = position_dodge(width=0.9), color="gray30") +
scale_fill_brewer(palette = "Blues")
I came across this question the other day and tried to re-create it for myself. ggplot, facet, piechart: placing text in the middle of pie chart slices
. My data is in a very similar format, but sadly the accepted answer did not help, hence why I am re posting.
I essentially want to create the accepted answer but with my own data, yet the issue I run into is that coord_polar does not support free scale. Using the first answer:
I tried it using the second version of the answer, with the ddplyr version, but I also do not get my desired output. Using the second answer:
Clearly none of these has the desired effect. I would prefer to create one as with size pie charts, but only showed four as an example, follows: .
This I did in excel, but with one legend, and no background grid.
Code
title<-c(1,1,2,2,3,3,4,4,5,5,6,6)
type<-c('A','B','A','B','A','B','A','B','A','B','A','B')
value<-c(0.25,0.75,0.3,0.7,0.4,0.6,0.5,0.5,0.1,0.9,0.15,0.85)
piec<-data.frame(title,type,value)
library(tidyverse)
p1<-ggplot(data = piec, aes(x = "", y = value, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = value), position = position_stack(vjust = 0.5)) +
coord_polar(theta = "y")
#facet_grid(title ~ ., scales = "free")
p1
piec <- piec %>% group_by(title) %>% mutate(pos=cumsum(value)-0.5*value)
p2<-ggplot(data = piec) +
geom_bar(aes(x = "", y = value, fill = type), stat = "identity") +
geom_text(aes(x = "", y = pos, label = value)) +
coord_polar(theta = "y")
#facet_grid(Channel ~ ., scales = "free")
p2
You don't have to supply different y values for geom_text and geom_bar (use y = value for both of them). Next you have to specify position in geom_text. Finally, remove scales from facets.
library(ggplot2)
title<-c(1,1,2,2,3,3,4,4,5,5,6,6)
type<-c('A','B','A','B','A','B','A','B','A','B','A','B')
value<-c(0.25,0.75,0.3,0.7,0.4,0.6,0.5,0.5,0.1,0.9,0.15,0.85)
piec<-data.frame(title,type,value)
ggplot(piec, aes("", value, fill = type)) +
geom_bar(stat = "identity", color = "white", size = 1) +
geom_text(aes(label = paste0(value * 100, "%")),
position = position_stack(vjust = 0.5),
color = "white", size = 3) +
coord_polar(theta = "y") +
facet_wrap(~ title, ncol = 3) +
scale_fill_manual(values = c("#0048cc", "#cc8400")) +
theme_void()
I using ggplot to create a bubble plot. With this code:
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
theme_bw() +
theme() +
scale_size(range = c(1, 50)) +
ylim(0,100)
It is working perfectly apart from 2 things:
For each name (fill) I would like to manually specify the colour used (via a dataframe that maps name to colour) - this is to provide consistency across multiple figures.
I would like to substitute the numbers on the y for text labels (for several reasons I cannot use the text labels from the outset due to ordering issues)
I have tried several methods using scale_color_manual() and scale_y_continuous respectively and I am getting nowhere! Any help would be very gratefully received!
Thanks
Since you have not specified an example df, I created one of my own.
To manually specify the color, you have to use scale_fill_manual with a named vector as the argument of values.
Edit 2
This appears to do what you want. We use scale_y_continuous. The breaks argument specifies the vector of positions, while the labels argument specifies the labels which should appear at those positions. Since we already created the vectors when creating the data frame, we simply pass those vectors as arguments.
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_y_continuous(breaks = mean, labels = order_label)
Edit 1
From your comment, it appears that you want to label the circles. One option would be to use geom_text. Code below. You may need to experiment with values of nudge_y to get the position correct.
order <- c(1, 2)
mean <- c(0.75, 0.3)
n <- c(180, 200)
name <- c("a", "b")
order_label <- c("New York", "London")
df <- data.frame(order, mean, n, name, order_label, stringsAsFactors = FALSE)
color <- c("blue", "red")
name_color <- data.frame(name, color, stringsAsFactors = FALSE)
gcolors <- name_color[, 2]
names(gcolors) <- name_color[, 1]
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
geom_text(aes(label = order_label), size = 3, hjust = "inward",
nudge_y = 0.03) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
ylab(NULL)
Original Answer
It is not clear what you mean by "substitute the numbers on the y for text labels". In the example below, I have formatted the y-axis as a percentage using the scales::percent_format() function. Is this similar to what you want?
order <- c(1, 2)
mean <- c(0.75, 0.3)
n <- c(180, 200)
name <- c("a", "b")
df <- data.frame(order, mean, n, name, stringsAsFactors = FALSE)
color <- c("blue", "red")
name_color <- data.frame(name, color, stringsAsFactors = FALSE)
gcolors <- name_color[, 2]
names(gcolors) <- name_color[, 1]
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_y_continuous(labels = scales::percent_format())
Thanks, for all your help, this worked perfectly:
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_x_continuous(breaks = order, labels = order_label)
I have two sets of data, which I want to present using a heat map with the viridis color scale. For the first data set, my values range from 0 to 1.2 and I can easily see the differences I want to see. However my second data set has some outliers, resulting in a range from 0 to 2. Now it's harder to see the differences in the interesting range between 0 and 1 and it's more diffucult to compare the two images directly. Is there a possibility to show the data from 0 to 1.2 using the viridis colour scale while showing the higher values in yellow ("highest" colour of the viridis scale)?
Here is an example:
library(viridis)
#Create Data
DataSet1 <- expand.grid(x = 0:5, y = 0:5)
DataSet1$z <- runif(36, 0, 1.2)
DataSet2 <- expand.grid(x = 0:5, y = 0:5)
DataSet2$z <- runif(36, 0, 2)
#Plot Data
ggplot(DataSet1, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_viridis() +
geom_text(aes(label = round(z, 2)), size = 2)
DataSet1: Differences between 0.5 and 0.7 are easy to see
ggplot(DataSet2, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_viridis() +
geom_text(aes(label = round(z, 2)), size = 2)
DataSet2: Differences between 0.5 and 0.7 are diffucult to see
EDIT 2022-05-03: The scale function is called scale_fill_viridis_c() these days.
#ClausWilke's solution is better because it shows in the legend, but sometimes one just needs a quick solution without having to write too much specific code. This one also relies on the scales package
ggplot(DataSet2, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_viridis_c(limits = c(0.2, 1), oob = scales::squish) +
geom_text(aes(label = round(z, 2)), size = 2)
You can define an arbitrary rescaling function. Not sure this looks that great, would likely need some work with the legend, but in principle this mechanism allows you to map data values onto the scale in any way you want.
ggplot(DataSet2, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_viridis(rescaler = function(x, to = c(0, 1), from = NULL) {
ifelse(x<1.2,
scales::rescale(x,
to = to,
from = c(min(x, na.rm = TRUE), 1.2)),
1)}) +
geom_text(aes(label = round(z, 2)), size = 2)
Are you looking for something like this?
ggplot(DataSet2, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_gradient(low="green", high="red", limits=c(0, 1.2),
na.value = "yellow") +
geom_text(aes(label = round(z, 2)), size = 2)
Using the viridis colors, asper jazzurro recommendation.
ggplot(DataSet2, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_gradientn(colors = viridis_pal()(9), limits=c(0, 1.2),
na.value = "#FDE725FF") +
geom_text(aes(label = round(z, 2)), size = 2)
It's not necessarily an improvement, but you could do something like this to show the higher values in yellow:
DataSet2A <- DataSet2 %>% filter(z <= 1.2)
DataSet2B <- DataSet2 %>% filter(z > 1.2)
ggplot(DataSet2A, aes(x, y, fill = z)) +
geom_tile() +
scale_fill_viridis(begin = 0, end = .75) +
geom_text(aes(label = round(z, 2)), size = 2) +
geom_tile(data = DataSet2B, aes(x, y), fill = "yellow")
Maybe if you play around with the cutoff as well as the begin= and end= parameters in the scale, which control the portion of the viridis scale that you're employing, you can achieve the result you want. (Note that you can only have one fill scale per plot, but you can set additional constant fills as I've done here with yellow.)
I want to plot the distribution of a variable by Class and add vertical lines denoting the means of the subsets defined by each Class and having them colored by Class. While I succeed to color the distributions by Class, the vertical lines appear gray. For a reproducible example see below:
library(data.table)
library(ggplot2)
library(ggthemes)
data(mtcars)
setDT(mtcars)
mtcars[, am := factor(am, levels = c(1, 0))]
mean_data <- mtcars[, .(mu = mean(hp)), by = am]
ggplot(mtcars, aes(x = hp, fill = am , color = am)) +
geom_histogram(aes(y=..density..), position="identity",alpha = 0.4) + guides(color = FALSE) +
geom_density (alpha = 0.5)+
geom_vline(data = mean_data, xintercept = mean_data$mu, aes(color = as.factor(mean_data$am)), size = 2, alpha = 0.5) +
ggtitle("Hp by am") + scale_fill_discrete(labels=c("am" , "no am")) +
labs(fill = "Transmission") + theme_economist()
This code renders the following plot:
Your advice will be appreciated.
You need to include the xintercept mapping in your aes call, so that ggplot properly maps all the aesthetics:
ggplot(mtcars, aes(x = hp, fill = am , color = am)) +
geom_histogram(aes(y=..density..), position="identity",alpha = 0.4) + guides(color = FALSE) +
geom_density (alpha = 0.5)+
geom_vline(data = mean_data, aes(xintercept = mu, color = as.factor(am)), size = 2, alpha = 0.5) +
ggtitle("Hp by am") + scale_fill_discrete(labels=c("am" , "no am")) +
labs(fill = "Transmission") + theme_economist()
Anything you put in a geom call that's not in aes gets treated as a one-off value, and doesn't get all the mapped aesthetics applied to it.