Highlighting particular regions of a scatterplot in a ggplot - r

I need to discuss a scatter plot and would like to refer to particular regions of the plot. Is there any way to 'highlight' particular sections of the plot? Perhaps with boxes and labels as below?
set.seed(1410)
dsmall<-diamonds[sample(nrow(diamonds), 100), ]
df<-data.frame("x"=dsmall$carat, "y"=dsmall$price)
p <-ggplot(df, aes(x, y))
p <- p + geom_point(alpha=2/10, shape=21, fill="blue", colour="black", size=5)

For a single region, it's easiest to use annotate, first with rect, then text:
p + annotate("rect", xmin=1.5, xmax=2.5, ymin=12500, ymax= 18000,
fill=NA, colour="red") +
annotate("text", x=1.75, y=17000, label="Region A", size=8)
For multiple regions, you can put the data into a data frame and use geom_text and geom_rect:
regions <- data.frame(
xmin=c(1.5, 1, 0),
xmax=c(2.5, 2, 1),
ymin=c(12500, 5000, 0),
ymax=c(17500, 12500, 5000),
x =c(2, 1.5, 0.5),
y =c(15000, 7500, 2500),
lab = paste("Region", LETTERS[1:3])
)
p +
geom_rect(data=regions, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax),
fill=NA, colour="red") +
geom_text(data=regions, aes(x=x, y=y, label=lab))

Related

Error with tick marks interval and density curve in ggplot2 [duplicate]

This question already has answers here:
geom_bar bars not displaying when specifying ylim
(4 answers)
R - ggplot2: geom_area loses its fill if limits are defined to max and min values from a data.frame
(1 answer)
Closed 6 days ago.
I am using ggplot2 to draw a line chart with the following R codes:
library(ggplot2)
X <-c(0, 1, 2, 3, 4, 5)
Y <-c(1, 2, 3, 2, 1, 2)
df <- data.frame(X=X, Y=Y)
ggplot(data=df, mapping=aes(x=X, y=Y))+
geom_line(colour="violet", size=3, linetype=2)+
geom_point(shape=23, fill="blue", color="darkred", size=3)
and the output is fine like this:
However, I would like to color the under density curve and I change the codes like these:
ggplot(data=df, mapping=aes(x=X, y=Y))+geom_line(colour="violet", size=3, linetype=2)+geom_point(shape=23, fill="blue", color="darkred", size=3)+geom_area("darkseagreen1")
and I got output like this:
This is not ideal so I try to set the tick marks interval using the following code:
ggplot(data=df, mapping=aes(x=X, y=Y))+geom_line(colour="violet", size=3, linetype=2)+geom_point(shape=23, fill="blue", color="darkred", size=3)+geom_area(fill = "darkseagreen1")+scale_y_continuous(breaks = seq(71.4, 72.9, by=0.3), limits=c(71.4,72.7))
It becomes normal but the color is gone like this:
Can anyone help me with this error? Thank you.
You can use geom_area() if you set your limits using coord_cartesian() (see e.g. https://stackoverflow.com/a/3606798/12957340 for more details).
For example:
library(ggplot2)
X <-c(0, 1, 2, 3, 4, 5)
Y <-c(1, 2, 3, 2, 1, 2)
df <- data.frame(X=X, Y=Y)
# starting plot
ggplot(data=df, mapping=aes(x=X, y=Y)) +
geom_line(colour="violet", size=3, linetype=2) +
geom_point(shape=23, fill="blue", color="darkred", size=3)
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
ggsave("example.pdf")
#> Saving 7 x 5 in image
# with geom_area()
ggplot(data=df, mapping=aes(x=X, y=Y)) +
geom_line(colour="violet", size=3, linetype=2) +
geom_point(shape=23, fill="blue", color="darkred", size=3) +
geom_area(fill = "darkseagreen1")
ggsave("example2.pdf")
#> Saving 7 x 5 in image
# add y-axis limits (the problem)
ggplot(data=df, mapping=aes(x=X, y=Y)) +
geom_line(colour="violet", size=3, linetype=2) +
geom_point(shape=23, fill="blue", color="darkred", size=3) +
geom_area(fill = "darkseagreen1") +
scale_y_continuous(breaks = seq(0, 3, by=0.3), limits = c(1, 2))
ggsave("example3.pdf")
#> Saving 7 x 5 in image
# add y-axis limits without losing data (a solution)
ggplot(data=df, mapping=aes(x=X, y=Y)) +
geom_line(colour="violet", size=3, linetype=2) +
geom_point(shape=23, fill="blue", color="darkred", size=3) +
geom_area(fill = "darkseagreen1") +
scale_y_continuous(breaks = seq(0, 3, by=0.3)) +
coord_cartesian(ylim = c(1,2))
ggsave("example4.pdf")
#> Saving 7 x 5 in image
Created on 2023-02-08 with reprex v2.0.2
You want to use geom_ribbon() instead of geom_area(). Try this code instead:
ggplot(data = df, mapping = aes(x = X, y = Y)) +
geom_line(colour = "violet", size = 3, linetype = 2) +
geom_point(shape = 23, fill = "blue", color = "darkred", size = 3) +
scale_y_continuous(breaks = seq(71.4, 72.9, by = 0.3), limits = c(71.4, 72.7)) +
geom_ribbon(aes(ymin = 71.4, ymax = Y), fill = "darkseagreen1")

Adding a centred overlaid title to a ggplot2 doughnut graph

I'm hoping to create a ggplot2 title overlaying a doughnut graph, with my reprex adapted this example from https://www.r-graph-gallery.com/128-ring-or-donut-plot.html.
# load library
library(ggplot2)
# Create test data.
data <- data.frame(
category=c("A", "B", "C"),
count=c(10, 60, 30)
)
# Compute percentages
data$fraction <- data$count / sum(data$count)
# Compute the cumulative percentages (top of each rectangle)
data$ymax <- cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin <- c(0, head(data$ymax, n=-1))
# Compute label position
data$labelPosition <- (data$ymax + data$ymin) / 2
# Compute a good label
data$label <- paste0(data$count)
# Make the plot
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") + # Try to remove that to understand how the chart is built initially
xlim(c(2, 4))+ # Try to remove that to see how to make a pie chart
theme_void()+
scale_fill_brewer(palette = 1)+
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6)+
theme(legend.position = "top",
plot.title = element_text(hjust=0.5))+
ggtitle("My title")
This is what I have currently:
And this is what I want:
I haven't been able to find any documentation demonstrating how to do this in ggplot2. Any suggestions are appreciated.
You can add an annotation layer :
library(ggplot2)
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") +
xlim(c(2, 4))+
theme_void()+
scale_fill_brewer(palette = 1)+
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6)+
theme(legend.position = "top") +
annotate('text', x = 2, y = 0.5, label = 'My title', color = 'blue', size = 5)

ggplot legends when plot is built from two data frames

I have data coming from two different data frames. I am trying to create legend for each data frame. I know I can combine the data frame and do it, but because of my data source it makes the most sense to plot from two different data frames.
Please find the simplified example below. I have gotten close but the 'Main Forecast' in the legend is only white color. I want to show where 'Main Forecast' is red on the outside and white on the inside.
x = seq(1,10, 1)
y = seq(10,100, 10)
df = data.frame(x=x, y=y)
df2 = data.frame(x=5, y=50)
p = ggplot(data=df) +
geom_point(data=df,aes(x=x, y=y, color="Weekly Forecast"), fill="red", size=5, shape=16) +
geom_line(data=df,aes(x=x, y=y), color="red", size=1) +
geom_point(data=df2, aes(x=x, y=y, color="Main Forecast"), size=2, shape=16) +
scale_color_manual("Legend Title", breaks=c("Weekly Forecast", "Main Forecast"), values = c("white","red"))
p
Any assistance will be greatly appreciated.
You need to use one of the symbols that takes a fill (pch = 21:25). You then need to use override.aes to get the legend right. I've moved shared data and aes into the ggplot command.
ggplot(data=df, aes(x=x, y=y)) +
geom_point(aes(color="Weekly Forecast"), shape=16, size = 5) +
geom_line(color="red", size=1) +
geom_point(data=df2, aes(color="Main Forecast"), shape=21, fill = "white", size = 5) +
scale_color_manual("Legend Title", limits=c("Weekly Forecast", "Main Forecast"), values = c("red","red")) +
guides(colour = guide_legend(override.aes = list(pch = c(16, 21), fill = c("red", "white"))))
This can also be done without override.aes:
ggplot(data=df, aes(x=x, y=y)) +
geom_line(aes(color="Main Forecast"), size=1) +
geom_point(aes(color="Weekly Forecast", fill="Weekly Forecast"), shape=21, size = 5) +
geom_point(data=df2, aes(color="Main Forecast", fill="Main Forecast"), shape=21, size = 5) +
scale_color_manual(name="", values = c("red","red")) +
scale_fill_manual(name="", values=c("white","red"))

Add multiple geom_line to ggplot

The geom_line CUR_MTH_UNEARN_REV_EUR is plotted correctly as a numeric. My goal is to add a second numeric geom_line (i.e., CUR_MTH_EARN_REV_EUR). Here's the code:
library("ggthemes")
library("gridExtra")
library("grid")
p = ggplot(f, aes(DTE_OF_REPORT_EUR, CUR_MTH_UNEARN_REV_EUR, label=(CUR_MTH_UNEARN_REV_EUR)))
+ geom_point(size=ifelse(f$CUR_MTH_UNEARN_REV_EUR<8.0, 11, 5), color=ifelse(f$CUR_MTH_UNEARN_REV_EUR<8.0, '#CC0000', 'black'))
+ geom_line(size=2,aes(group=1)) + geom_rangeframe() + theme_wsj()
+ theme(axis.text.x=element_text(angle=50, size=20, vjust=0.7))
+ geom_smooth(aes(group=1), method="loess", colour = "#CC0000", lwd=2)
+ geom_text(aes(label=CUR_MTH_UNEARN_REV_EUR), hjust=-0.5, vjust=0.5, fontface="bold")
+ ggtitle("Unearned Revenue by Service Code 'BS', in CSG Months, Jul. 2014-Aug. 2015")
+ theme(plot.title = element_text(lineheight=.8, face="bold"))
p
Text1 = textGrob("Source: Revenue Assurance and Quality Control", gp=gpar(fontsize=7))
p2 = p + annotation_custom(grob = Text1, ymin = -0.2, ymax = -30)
p2
format(round(f$CUR_MTH_UNEARN_REV_EUR, 2), nsmall = 2)
f$ScoreRounded <- round(f$CUR_MTH_UNEARN_REV_EUR, 1)
f$DTE_OF_REPORT_EUR <- factor(f$DTE_OF_REPORT_EUR, levels=unique(as.character(f$DTE_OF_REPORT_EUR)))
Hope this helps as a start. You can just add things, but you need to have the correct aes.
#data with x and two y-variables
set.seed(123)
f <- data.frame(x=1:10, var1=sample(7:10,10,T),
var2=sample(5:7,10,T))
#as you want sizing by a measure, make a flag
f$var1_threshold <- f$var1 <9
#example with adding different geoms
#not that it's unnecessary to use my data (f)
#in the call to aes, as everything I need is already
#inside f
p <- ggplot(f, aes(x=x)) +
geom_point(aes(y=var1,size=var1_threshold, color=var1_threshold))+
#colors and size in aes allows for legend generation.
scale_size_manual(values=c("FALSE"=5,"TRUE"=8)) +
scale_color_manual(values=c("FALSE"='#CC0000',"TRUE"='black')) +
geom_line(aes(y=var1),size=1) +
geom_line(aes(y=var2),size=1) +
geom_smooth(aes(y=var1), colour="#CC0000") +
geom_smooth(aes(y=var2), colour="black")

Draw lines between two facets in ggplot2

How can I draw several lines between two facets?
I attempted this by plotting points at the min value of the top graph but they are not between the two facets. See picture below.
This is my code so far:
t <- seq(1:1000)
y1 <- rexp(1000)
y2 <- cumsum(y1)
z <- rep(NA, length(t))
z[100:200] <- 1
df <- data.frame(t=t, values=c(y2,y1), type=rep(c("Bytes","Changes"), each=1000))
points <- data.frame(x=c(10:200,300:350), y=min(y2), type=rep("Bytes",242))
vline.data <- data.frame(type = c("Bytes","Bytes","Changes","Changes"), vl=c(1,5,20,5))
g <- ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour=I("black")) +
facet_grid(type ~ ., scales="free") +
scale_y_continuous(trans="log10") +
ylab("Log values") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), panel.margin = unit(0, "lines"))+
geom_point(data=points, aes(x = x, y = y), colour="green")
g
In order to achieve that, you have to set the margins inside the plot to zero. You can do that with expand=c(0,0). The changes I made to your code:
When you use scale_y_continuous, you can define the axis label inside that part and you don't need a seperarate ylab.
Changed colour=I("black") to colour="black" inside geom_line.
Added expand=c(0,0) to scale_x_continuous and scale_y_continuous.
The complete code:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_point(data=points, aes(x = x, y = y), colour="green") +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:
Adding lines can also be done with geom_segment. Normally the lines (segments) will appear in both facets. If you want them to appear between the two facets, you will have to restrict that in data parameter:
ggplot(data=df, aes(x=t, y=values)) +
geom_line(colour="black") +
geom_segment(data=df[df$type=="Bytes",], aes(x=10, y=0, xend=200, yend=0), colour="green", size=2) +
geom_segment(data=df[df$type=="Bytes",], aes(x=300, y=0, xend=350, yend=0), colour="green", size=1) +
facet_grid(type ~ ., scales="free") +
scale_x_continuous("t", expand=c(0,0)) +
scale_y_continuous("Log values", trans="log10", expand=c(0,0)) +
theme(axis.text.x=element_text(angle=90, vjust=0.5), panel.margin=unit(0, "lines"))
which gives:

Resources