Below is the code for a graph I am making for an article I am working on. The plot showed the predicted probabilities along a range of values in my data set. Along the x-axis is a rug plot that shows the distribution of trade share values (I provided the code and an image of the graph):
sitc8 <- ggplot() + geom_line(data=plotdat8, aes(x = lagsitc8100, y = PredictedProbabilityMean), size = 2, color="blue") +
geom_ribbon(data=plotdat8, aes(x = lagsitc8100, ymin = lowersd, ymax = uppersd),
fill = "grey50", alpha=.5) +
ylim(c(-0.75, 1.5)) +
geom_hline(yintercept=0) +
geom_rug(data=multi.sanctions.bust8.full#frame, aes(x=lagsitc8100), col="black", size=1.0, sides="b") +
xlab("SITC 8 Trade Share") +
ylab("Probability of Sanctions Busting") +
theme(panel.grid.major = element_line(colour = "gray", linetype = "dotted"), panel.grid.minor =
element_blank(), panel.background = element_blank())
My question is: is it possible to change the color of the lines of the rugplot of trade share in which the event I am modeling occurs? In other words, I would like to add red lines or red dots along those values of trade share when my event = 1.
Is this possible?
Sure. You'd just have to add a color argument within an aes() function call within geom_rug().
Here's some code to create a dummy data frame.
library(tidyverse)
set.seed(42)
dummy_data <- tibble(x_var = rnorm(100),
y_var = abs(rnorm(100)) * x_var) %>%
rownames_to_column(var = "temp_row") %>%
mutate(color_id = if_else(as.numeric(temp_row) <= 50,
"Type A",
"Type B"))
And here's a ggplot call where the color for geom_rug is mapped to a character column named color_id
ggplot(data = dummy_data, mapping = aes(x = x_var, y = y_var)) +
geom_smooth(method = "lm") +
geom_rug(mapping = aes(color = color_id), sides = "b")
Update:
Following OP's comment, here's an updated version. If it's a numeric vector of 0s and 1s, you have to tell ggplot to treat it as a dichotomous variable. You can do that by wrapping it in a call to factor() for instance.
For the color we can set that manually using scale_color_manual(). So the changes to the code are the following.
color_id is now a vector og 0s and 1s.
the color is now mapped to factor(color_id)
the color scale is determined using scale_color_manual
library(tidyverse)
set.seed(42)
dummy_data <- tibble(x_var = rnorm(100),
y_var = abs(rnorm(100)) * x_var) %>%
rownames_to_column(var = "temp_row") %>%
mutate(color_id = if_else(as.numeric(temp_row) <= 50,
0,
1))
ggplot(data = dummy_data, mapping = aes(x = x_var, y = y_var)) +
geom_smooth(method = "lm") +
geom_rug(mapping = aes(color = factor(color_id)), sides = "b") +
scale_color_manual(values = c("black", "red")) +
labs(color = "This takes two values")
Definitely possible. Here's an example using iris, and a dynamic condition in the rug. You could also do two rugs, if you chose.
library(tidyverse)
iris %>%
ggplot(aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
geom_rug(aes(color = Petal.Length >3), sides = "b")
# Second example, output not shown
iris %>%
ggplot(aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
geom_rug(data = subset(iris, Petal.Length > 3), color = "black", sides = "b") +
geom_rug(data = subset(iris, Petal.Length <= 3), color = "red", sides = "b")
Related
I can't seem to be able to set different fill colours for geom_ribbon(), using one of the columns as input to fill
library(ggplot2)
time <- as.factor(c('A','B','C','D'))
grouping <- as.factor(c('GROUP1','GROUP1','GROUP1','GROUP1',
'GROUP2','GROUP2','GROUP2','GROUP2'))
x <- c(1.00,1.03,1.03,1.06,0.5,0.43,0.2,0.1)
x.upper <- x+0.05
x.lower <- x-0.05
df <- data.frame(time, x, x.upper, x.lower,grouping)
ggplot(data = df,aes(as.numeric(time),x,group=grouping,color=grouping)) +
geom_ribbon(data = df, aes(x=as.numeric(time), ymax=x.upper, ymin=x.lower),
fill=grouping, alpha=.5) +
geom_point() + labs(title="My ribbon plot",x="Time",y="Value") +
scale_x_continuous(breaks = 1:4, labels = levels(df$time))
I get the error Error: Unknown colour name: grouping but fill=c("pink","blue") works fine. I don't want to specify the colours manually.
All other examples I can find simply list the column in the fill argument so I'm not sure what I'm doing incorrectly.
Move fill = grouping inside aes so that this column is mapped to the fill variable.
ggplot(data = df, aes(as.numeric(time), x, color = grouping)) +
geom_ribbon(data = df, aes(ymax = x.upper, ymin = x.lower,
fill = grouping), alpha = 0.5) +
geom_point() +
labs(title = "My ribbon plot", x = "Time", y = "Value") +
scale_x_continuous(breaks = 1:4, labels = levels(df$time))
library(tidyverse)
library(poliscidata)
world_red <- world %>% group_by(dem_level4) %>% summarize(number_of_countries = n(), oil=sum(oil, na.rm = TRUE))
colors <- c("green", "blue", "orange", "red")
p1 <- ggplot(data = world_red) +
aes(x = reorder(dem_level4, -oil) , y = oil,
fill = factor(number_of_countries) ) +
geom_col() +
scale_fill_manual(values = colors)
The variable "world" is part of the loaded package.
There is no error message in this code, but the legend appears as "factor(number_of_countries"), but, of course, it should only be "number_of_countries". How can I achieve this?
Just writing
fill = number_of_countries) +
without calling factor () results in the error message "Continuous value supplied to discrete scale"
You can add a name argument into scale_fill_manual:
p1 <- ggplot(data = world_red) +
aes(x = reorder(dem_level4, -oil) , y = oil,
fill = factor(number_of_countries) ) +
geom_col() +
scale_fill_manual(name = "number_of_countries", values = colors)
Or you can add a labs line, then give a new name to fill. You can also update the x and y axis labels here too (which I did below).
library(tidyverse)
p1 <- ggplot(data = world_red) +
aes(x = reorder(dem_level4, -oil) , y = oil,
fill = factor(number_of_countries) ) +
geom_col() +
scale_fill_manual(values = colors) +
labs(fill='number_of_countries',
y = "Oil",
x = "dem_level4")
Output
I have the following sample data:
library(tidyverse)
df <- data.frame(col=rep(c("A_B", "A_C", "A_D",
"B_A", "C_A", "D_A",
"B_C", "B_D",
"C_B", "D_B",
"C_D", "D_C"), 2),
level=c(rep("lower_level", 12), rep("higher_level", 12)),
value=abs(rnorm(24, mean=5, sd=2)))%>% tibble()
df[c('origin', 'target')] <- str_split_fixed(df$col, '_', 2)
df <- df %>% select(c(origin, target, level, value))
I now want to create horizontal stacked barplots for each target (df %>% filter(target=="A")). I do this using the following code:
# plot
p1 <- ggplot(data = df %>% filter(target=="A"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
Since I want to combine multiple such plots later (s. below), I would like to
remove the empty space between y-axis and the bars (or manipulate it to value X)
have the fill label displayed on the right side
have one value on the left, saying "target: A"
and have fill legend and y axis shared between all plots.
See annotated plot:
For reference, I create additional plots with this code:
p2 <- ggplot(data = df %>% filter(target=="B"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
p3 <- ggplot(data = df %>% filter(target=="C"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
p4 <- ggplot(data = df %>% filter(target=="D"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
And combine them with this code (but happy to use other ways of combining them if needed).
library("gridExtra")
grid.arrange(p1, p2, p3, p4, ncol = 1, nrow = 4)
It sounds very much as though you simply want to facet by target. No need for stitching multiple plots here.
ggplot(data = df %>% mutate(target = paste('Target:', target)),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_col(position = "fill", width = 0.9) +
scale_fill_manual(values = c("A"="yellow", "B" = "green",
"C"="red", "D"="blue"), name = 'origin') +
facet_grid(target~., switch = 'y') +
coord_flip() +
theme(strip.placement = 'outside',
strip.background = element_blank(),
axis.title.y = element_blank())
two suggestions_
to remove the offset between axis and bar, set the axis expansion to zero
scale_x_continuous(..., expand = c(0,0))
instead of tediously subsetting the data frame, use the facet_wrap or facet_grid option of ggplot:
ggplot(data = df,
aes(x = factor(level), y = value, fill = factor(origin))) +
## other plot instructions
facet_wrap( ~target)
see ?facet_wrap for various layout options like number of plot columns
3. the vertical spacing between bars will be adjusted to the output dimensions (here: figure height) anyway
I am plotting 2 sets of data on the same plot using ggplot. I have specified the colour for each data set, but there is no legend that comes out when the dot plot is generated.
What can i do to manually add a legend?
# Create an index to hold values of m from 1 to 100
m_index <- (1:100)
data_frame_50 <- data(prob_max_abs_cor_50)
data_frame_20 <- data.frame(prob_max_abs_cor_20)
library(ggplot2)
plot1 <- ggplot(data_frame_50, mapping = aes(x = m_index,
y = prob_max_abs_cor_50),
colour = 'red') +
geom_point() +
ggplot(data_frame_20, mapping = aes(x = m_index,
y = prob_max_abs_cor_20),
colour = 'blue') +
geom_point()
plot1 + labs(x = " Values of m ",
y = " Maximum Absolute Correlation ",
title = "Dot plot of probability")
First, I would suggest neatening your ggplot code a little. This is equivalent to your posted code;
ggplot() +
geom_point(data = data_frame_50, aes(x = m_index, y = prob_max_abs_cor_50,
colour = 'red')) +
geom_point(data = data_frame_20, aes(x = m_index, y = prob_max_abs_cor_20,
colour = 'blue')) +
labs(x = " Values of m ", y = " Maximum Absolute Correlation ",
title = "Dot plot of probability")
You won't get a legend here, because you are plotting different datasets with only one category in each. You need to have a single dataset with a column grouping your data (i.e. 20 or 50). So using some example data, this is the equivalent of what you are plotting and ggplot won't provide a legend;
ggplot() +
geom_point(data = iris, aes(x = Sepal.Length, y = Petal.Width), colour = 'red') +
geom_point(data = iris, aes(x = Sepal.Length, y = Petal.Length), colour = 'blue')
If you want to colour by category, include a colour argument inside the aes call;
ggplot() +
geom_point(data = iris, aes(x = Sepal.Length, y = Petal.Width,
colour = factor(Species)))
Have a look at the iris dataset to get a sense of how you need to shape your data. It's hard to give precise advice, because you haven't provided an idea of what your data look like, but something like this might work;
df.20 <- data.frame("m" = 1:100, "Group" = 20, "Numbers" = prob_max_abs_cor_20)
df.50 <- data.frame("m" = 1:100, "Group" = 50, "Numbers" = prob_max_abs_cor_50)
df.All <- rbind(df.20, df.50)
I using ggplot to create a bubble plot. With this code:
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
theme_bw() +
theme() +
scale_size(range = c(1, 50)) +
ylim(0,100)
It is working perfectly apart from 2 things:
For each name (fill) I would like to manually specify the colour used (via a dataframe that maps name to colour) - this is to provide consistency across multiple figures.
I would like to substitute the numbers on the y for text labels (for several reasons I cannot use the text labels from the outset due to ordering issues)
I have tried several methods using scale_color_manual() and scale_y_continuous respectively and I am getting nowhere! Any help would be very gratefully received!
Thanks
Since you have not specified an example df, I created one of my own.
To manually specify the color, you have to use scale_fill_manual with a named vector as the argument of values.
Edit 2
This appears to do what you want. We use scale_y_continuous. The breaks argument specifies the vector of positions, while the labels argument specifies the labels which should appear at those positions. Since we already created the vectors when creating the data frame, we simply pass those vectors as arguments.
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_y_continuous(breaks = mean, labels = order_label)
Edit 1
From your comment, it appears that you want to label the circles. One option would be to use geom_text. Code below. You may need to experiment with values of nudge_y to get the position correct.
order <- c(1, 2)
mean <- c(0.75, 0.3)
n <- c(180, 200)
name <- c("a", "b")
order_label <- c("New York", "London")
df <- data.frame(order, mean, n, name, order_label, stringsAsFactors = FALSE)
color <- c("blue", "red")
name_color <- data.frame(name, color, stringsAsFactors = FALSE)
gcolors <- name_color[, 2]
names(gcolors) <- name_color[, 1]
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
geom_text(aes(label = order_label), size = 3, hjust = "inward",
nudge_y = 0.03) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
ylab(NULL)
Original Answer
It is not clear what you mean by "substitute the numbers on the y for text labels". In the example below, I have formatted the y-axis as a percentage using the scales::percent_format() function. Is this similar to what you want?
order <- c(1, 2)
mean <- c(0.75, 0.3)
n <- c(180, 200)
name <- c("a", "b")
df <- data.frame(order, mean, n, name, stringsAsFactors = FALSE)
color <- c("blue", "red")
name_color <- data.frame(name, color, stringsAsFactors = FALSE)
gcolors <- name_color[, 2]
names(gcolors) <- name_color[, 1]
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_y_continuous(labels = scales::percent_format())
Thanks, for all your help, this worked perfectly:
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_x_continuous(breaks = order, labels = order_label)