geom_text() to label two separate points from different plots in ggplot - r

I am trying to create individual plots facetted by 'iid' using 'facet_multiple', in the following dataset (first 3 rows of data)
iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98
and wrote the following code:
# Install gg_plus from GitHub
remotes::install_github("guiastrennec/ggplus")
# Load libraries
library(ggplot2)
library(ggplus)
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
p2<- p1 + geom_text(aes(label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(label = baseIOP, hjust=-1, vjust=-1))
# Plot on multiple pages (output plot to R/Rstudio)
facet_multiple(plot = p3,
facets = 'iid',
ncol = 1,
nrow = 1,
scales = 'free')
The main issue I am having is labeling the points. The points corresponding to (x=age, y=axl) get labelled fine, but labels for the second group of points (x=baseIOP, y=baseAL) gets put in the wrong place.individual plot sample
I have had a look at similar issues in Stack Overflow e.g. ggplot combining two plots from different data.frames
But not been able to correct my code.
Thanks for your help

You need to define the x and y coordinates for the labels or they will default to the last ones specified.
Thus the geom_text() definitions should look something like:
data_longF1 <-read.table(header=TRUE, text="iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98")
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
#Need to specify the x and y coordinates or will default to the last ones defined
p2<- p1 + geom_text(aes(x=Age, y= al, label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(x=baseAGE, y= baseAL, label = baseIOP, hjust=-1, vjust=-1))
print(p3)

Related

Two columns on x-axis and different grids in R

I need some help with a graph in R.
This is how my dataframe looks like
Footprint
Local Number
Remote Number
Location
10.4
45
4
L1
12.5
452
78
L9
15.6
86
52
L5
85.3
12
12
L4
12.5
35
36
L2
85.9
78
78
L3
78.5
44
44
L6
4.6
10
11
L7
13.9
157
2
L8
What I want to achieve is a graph with the 'Footprint' column in the y-axis, the 'Local Number' column(in the x-axis) in the positive grid of the graph and the 'Remote Number' column(in the x-axis) in the negative grid of the graph. The data should be presented in dots and the lab name should be the label. So basically, I want to show for each location the remote and local number of employees.
I am struggling on presenting the two columns in the x-axis. I appreciate the help!
Maybe you want something like where you could use geom_point for both columns with one negative and positive and add labels using geom_text like this:
df <- read.table(text = 'Footprint Local_Number Remote_Number Location
10.4 45 4 L1
12.5 452 78 L9
15.6 86 52 L5
85.3 12 12 L4
12.5 35 36 L2
85.9 78 78 L3
78.5 44 44 L6
4.6 10 11 L7
13.9 157 2 L8
', header = TRUE)
library(ggplot2)
ggplot() +
geom_point(df, mapping = aes(x = Footprint, y = Local_Number, color = '1')) +
geom_point(df, mapping = aes(x = -Remote_Number, y = Local_Number, color = '2')) +
geom_text(df, mapping = aes(x = Footprint, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
geom_text(df, mapping = aes(x = -Remote_Number, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
scale_color_manual('Legend', labels = c('Footprint', 'Remote number'), values = c('blue', 'red')) +
labs(y = 'Local Number')
Created on 2022-10-14 with reprex v2.0.2
If you want to show it on only a positive axis you could the negative sign like this:
library(ggplot2)
ggplot() +
geom_point(df, mapping = aes(x = Footprint, y = Local_Number, color = '1')) +
geom_point(df, mapping = aes(x = Remote_Number, y = Local_Number, color = '2')) +
geom_text(df, mapping = aes(x = Footprint, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
geom_text(df, mapping = aes(x = Remote_Number, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
scale_color_manual('Legend', labels = c('Footprint', 'Remote number'), values = c('blue', 'red')) +
labs(y = 'Local Number')
Created on 2022-10-14 with reprex v2.0.2
Here two more suggestions for visualisation. This seems to be paired data - remote vs local number. That can be either represented as a scatter plot or as change.
Footprint can then be encoded as color. Thanks +1 to Quieten for the data.
library(tidyverse)
df <- read.table(text = 'Footprint Local_Number Remote_Number Location
10.4 45 4 L1
12.5 452 78 L9
15.6 86 52 L5
85.3 12 12 L4
12.5 35 36 L2
85.9 78 78 L3
78.5 44 44 L6
4.6 10 11 L7
13.9 157 2 L8
', header = TRUE)
df %>%
ggplot(aes(Local_Number, Remote_Number)) +
## use Number as x and y and color code by footprint value
geom_point(aes(color = Footprint), size = 3) +
## label the points, best with repel
ggrepel::geom_text_repel(aes(label = Location)) +
## optional add a line of equality to help intuitive recognition of change
## + keeping same limits helps intuitive comparison
geom_abline(intercept = 0, slope = 1, lty = 2, size = .3) +
coord_equal(xlim = range(c(df$Local_Number, df$Remote_Number)), ylim = range(c(df$Local_Number, df$Remote_Number))) +
## optional change color scale
scale_color_viridis_c(option = "magma")
## or, not to waste half of your graph (there is no positive value)
## you can show the difference instead
df %>%
mutate(change = Local_Number-Remote_Number) %>%
ggplot() +
## now use Location as x variable, therefore no labels needed any more
geom_point(aes(Location, change, color = Footprint), size = 3) +
## optional change color scale
scale_color_viridis_c(option = "magma")
Created on 2022-10-14 by the reprex package (v2.0.1)

Creating GW contours using ggplot

I am a novice at R coding and am trying to plot GW contours using X (Easting) and Y (Northing) cords and GW level (rswl) data in ggplot. An example of the data that I am trying to plot is:
X Obs_No Season Easting Northing rswl
1 56 ADE146 Winter 2017 275638.7 6131431 5.72
2 113 YAT099 Winter 2017 271723.0 6133405 3.16
4 227 YAT066 Winter 2017 276503.0 6135636 2.31
5 292 YAT053 Winter 2017 277780.8 6139285 -2.30
6 400 YAT129 Winter 2017 282065.1 6146759 5.60
7 509 PTA040 Winter 2017 270868.0 6150199 1.68
An example of the code I have tried is:
ggplot(data)+
aes(x = Easting, y = Northing, z = rswl, fill = rswl)+
geom_tile()+
geom_contour(colour = "white", alpha = 0.5)+
scale_fill_distiller(palette = "Spectral", na.value = "white") +
theme_bw()
but it comes up with "Not possible to generate contour data"
Something else I tried with 1 of my datasets is:
ggplot(data, aes(x = Easting, y = Northing, z = rswl)) +
geom_density_2d(colour = "black")+
geom_point(aes(color = factor(Obs_No)))+
theme(legend.title = element_blank())+
ggtitle("Tomw.T2 Winter 2017.csv")
This seems to be contours based on the distribution of points and has nothing to do with the GW level.
Any tips would be greatly appreciated.
Thanks

how to colour a funnel plot in ggplot R

I have drawn the attached funnel plot in ggplot, But I have 2 questions:
Is there any way to make the coloured green dot bigger (only that one);
is there any way to colour the upper and lower part of the confidence intervals?
This is what I am able to make so far:
Thank you!
The data set I am working on:
df <-
read.table(text = "
school_id year sdq_emotional
1060 7 4
1060 7 5
1060 7 7
1060 7 6
1060 7 4
1060 7 7
1060 7 8
1115 7 5
1115 7 9
1115 7 3
1136 7 1
1136 7 8
1136 7 5
1136 7 9
1135 7 4
1139 7 7
1139 7 3
2371 7 6
2371 7 3
2372 7 4
2372 7 1
2378 7 6
2378 7 7
2378 7 5", header=TRUE)
My code as follows:
# Format the data
df1 <- plyr::count(df, c('school_id'))
df2 <- merge(df,df1, by= c("school_id"))
df <- df2
M3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),mean,na.rm=T)
S3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),nona)
CG_PLOT1 <- merge(M3,S3,by="Group.1")
names(CG_PLOT1) <- c("School","Mean","Size")
LINE3 <- data.frame(M3=rep(mean(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
SD3=rep(sd(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
N3=sqrt(1:(max(CG_PLOT1$Size)+25)))
ID <- 1060
filling3 <- rep("white",nrow(CG_PLOT1))
filling3[CG_PLOT1$School ==ID]<-"green"
# Build the graph
ggplot(data = CG_PLOT1) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 + qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 - qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_segment(xend = max(CG_PLOT1$Size)+25,yend=mean(LINE3$M3,na.rm=T)),
aes(x = 1, y = mean(LINE3$M3,na.rm=T), size=1, colour="steelblue2") +
geom_point(data = CG_PLOT1, aes(x = Size, y = Mean), size = 2,
colour = "black", shape = 21,fill = filling3) +
ylim(0, 8)
thank you very much!
As you didn't provide a reproducible example, I have used this question as a template for your problem:
Creating a dataset here:
library(ggplot2)
set.seed(101)
x <- runif(100, min=1, max=10)
y <- rnorm(length(x), mean=5, sd=0.1*x)
df <- data.frame(x=x*70, y=y)
m <- lm(y ~ x, data=df)
fit95 <- predict(m, interval="conf", level=.95)
fit99 <- predict(m, interval="conf", level=.999)
df <- cbind.data.frame(df,
lwr95=fit95[,"lwr"], upr95=fit95[,"upr"],
lwr99=fit99[,"lwr"], upr99=fit99[,"upr"])
To add a colour background to the funnel plot, we can use the geom_ribbon function within ggplot to fill the area between a ymin and ymax. In this case, we will use the data used to construct each of the lines:
ggplot(df, aes(x, y)) +
# Add background
geom_ribbon(ymin= df$upr99, ymax = Inf, fill = "#e2a49a", alpha = 0.5) +
geom_ribbon(ymin = df$lwr99, ymax = df$upr99, fill = "#e0ba9d", alpha = 0.5 ) +
geom_ribbon(ymin = 0, ymax = df$lwr99, fill = "#8fd6c9", alpha = 0.5 ) +
# Overlay points and lines
geom_point() +
geom_smooth(method="lm", colour="black", lwd=1.1, se=FALSE) +
geom_line(aes(y = upr95), color="black", linetype=2) +
geom_line(aes(y = lwr95), color="black", linetype=2) +
geom_line(aes(y = upr99), color="red", linetype=3) +
geom_line(aes(y = lwr99), color="red", linetype=3)
labs(x="No. admissions...", y="Percentage of patients...")
As for changing the size of one point, you can check out the answer here. I would recommend subsetting the data to extract the one point, and then add another layer for the geom_point and then changing the size and colour argument of the new layer`

Creating a geom histogram that counts both positive/negative ggplot

I am having trouble plotting the positive log2_ratio count on the positive y-axis and the count of the negative log2_ratio on the negative-y axis.
In essence, I want the positive count to be above the x-axis while the negative count to be below the x-axis.
Here is the data frame and the code:
chrom chr_start chr_stop num_positions normal_depth tumor_depth log2_ratio gc_content sample
324202 1 156249804 156249858 55 12.3 4.7 -1.399 34.5 10
324203 1 156250463 156250473 11 10.0 4.6 -1.109 27.3 10
324204 1 156250664 156250705 42 12.0 7.4 -0.704 19.0 10
324205 1 156250816 156250847 32 11.7 4.6 -1.343 40.6 10
324206 1 156251108 156251132 25 10.6 3.6 -1.569 60.0 10
324207 1 156251411 156251464 54 12.3 6.8 -0.863 46.3 10
newHist = ggplot(resultsPileup1COMBINED[resultsPileup1COMBINED$sample <= 25,],
aes(x=sample)) +
geom_histogram(fill="blue" , bindwidth = 1) +
geom_histogram(data=resultsPileup1COMBINED[resultsPileup1COMBINED$sample > 25,],
fill="gray50" , binwidth = 1) +
scale_x_continuous(breaks = seq(from = 1, to = 50, by = 3))
Here is a current graph:
If you are asking for a completely new graph, try:
ggplot() + geom_histogram(data = resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio > 0, ],
aes(x = log2_ratio, y = ..count..)) +
geom_histogram(data = resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio < 0, ],
aes(x = - log2_ratio, y = - ..count..))
In this plot we have the < 0 log2_ratios on the bottom, with the x values negative to line up with the above, using the - ..count.. method
Edit: Asking for a slightly different graph, leaving the above in for posterity.
To graph the number of +/- values for each bin, we plot it out, again using the -..count.. trick:
ggplot() + geom_histogram(data =
resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio < 0, ],
aes(x = sample, y = -..count..)) +
geom_histogram(data =
resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio > 0, ],
aes(x = sample, y = ..count..))
Again, breaks and colours are up to you.
To make it similar to the original plot, make sure you include:
scale_x_discrete(breaks = seq(from = 1, to = 50, by = 3))

Generate multiple x-y plots from the same data frame in the same plot using ggplot2 in R using a loop and display corresponding legend

I have a data frame which I generated using the following piece of code,
x <- c(1:10)
y <- x^3
z <- y-20
s <- z/3
t <- s*6
q <- s*y
x1 <- cbind(x,y,z,s,t,q)
x1 <- data.frame(x1)
The data frame x1 thus has the following data,
x y z s t q
1 1 1 -19 -6.333333 -38 -6.333333
2 2 8 -12 -4.000000 -24 -32.000000
3 3 27 7 2.333333 14 63.000000
4 4 64 44 14.666667 88 938.666667
5 5 125 105 35.000000 210 4375.000000
6 6 216 196 65.333333 392 14112.000000
7 7 343 323 107.666667 646 36929.666667
8 8 512 492 164.000000 984 83968.000000
9 9 729 709 236.333333 1418 172287.000000
10 10 1000 980 326.666667 1960 326666.666667
Now I want to plot columns x vs y, z vs s and t vs q in the same plot, so for this I use the following code,
p <- ggplot() +
geom_line(data = x1, aes(x = x1[,1], y = x1[,2], color = "red")) +
geom_line(data = x1, aes(x = x1[,3], y = x1[,4], color = "blue")) +
geom_line(data = x1, aes(x = x1[,5], y = x1[,6], color = "green")) +
xlab('x') +
ylab('y')
While the above piece of code works fine for a data frame of just 6 columns, I would like to perform the same operation for a data frame with many number of columns. For example if there are 20 columns in a data frame, there should be one single plot generated containing plot of col 1 vs 2, col 3 vs 4, col 5 vs 6 and so on until col 19 vs 20. To do this I use this following piece of code,
p <- ggplot() + geom_line(data = x1, aes(x = x1[,1], y = x1[,2], color = "red")) + xlab('x') + ylab('y')
ctr <- 1
for (iz in seq(3, ncol(x1), by = 2))
{
p$ctr <- p + geom_line(data = x1, aes(x = x1[,iz], y = x1[,iz+1], color = "green"))
ctr <- ctr+1
}
So the plots should be layered incrementally and the last object should contain the entire plot. Using the above code the plot gets overwritten every time when the loop runs, could some one point out how to capture the full data. I would like to display a legend for each of the plot as well.
Thanks
You don't need a loop if you put your data into the right format. You can create a long data frame based on your original data frame.
x1_long <- data.frame(x = unlist(x1[c(TRUE, FALSE)]),
y = unlist(x1[c(FALSE, TRUE)]),
ind = gl(ncol(x1) / 2, nrow(x1)))
Now, a single geom_line command is sufficient:
library(ggplot2)
ggplot(x1_long) +
geom_line(aes(x = x, y = y, colour = ind))
(Note. The red line is plotted too but its values are quite small.)
How about this?
ggplot() +
lapply(seq(1,ncol(x1),2), # every second col index
function(x){ # return the geom_line calls in a list
geom_line(aes_string(x=x1[x], # remember to use aes_string for x
y=x1[x+1]), # and y
color=factor(x), # then color
size=2) # and size
}) +
xlab('x') + ylab('y')

Resources