Text labels in scatterplot not overlapping trend line in r (ggplot) - r

I am trying to create a scatterplot using ggplot. Is there a way to stop my text labels from overlapping the trend line?
I was only able to stop overlapping the text labels from each other.
rownames = c("dummy", "dummy", "dummy", "dummy", "dummy", "dummy","dummy", "dummy", "dummy", "dummy")
corr_truth = c(-0.39, -0.13, 0.28, -0.49, -0.14, 0.52, 0.43, 0.22, -0.29, -0.02)
corr_pred= c(-0.41, 0.01, 0.36, -0.38, -0.28, 0.44, 0.26, 0.24, -0.38, -0.23)
corr_complete = data.frame(rownames, corr_truth,corr_pred)
plot_corr_complete = ggplot(data = corr_complete, aes(corr_truth, corr_pred)) + geom_point() +
xlim(-0.5,0.7) +
ylim(-0.5,0.7) +
geom_text(label = corr_complete$rownames, nudge_x = 0.08, nudge_y = 0.005, check_overlap = T) +
geom_smooth(method = "lm", se = FALSE, color = "black")
plot_corr_complete

An example using ggrepel. I needed to add some padding to the solution, so the labels did not overlap the trend line.
library(tidyverse);library(ggrepel)
rownames = c("dummy", "dummy", "dummy", "dummy", "dummy", "dummy","dummy", "dummy", "dummy", "dummy")
corr_truth = c(-0.39, -0.13, 0.28, -0.49, -0.14, 0.52, 0.43, 0.22, -0.29, -0.02)
corr_pred= c(-0.41, 0.01, 0.36, -0.38, -0.28, 0.44, 0.26, 0.24, -0.38, -0.23)
corr_complete = data.frame(rownames, corr_truth,corr_pred)
plot_corr_complete = ggplot(data = corr_complete, aes(corr_truth, corr_pred)) + geom_point() +
xlim(-0.5,0.7) +
ylim(-0.5,0.7) +
geom_text_repel(label = corr_complete$rownames,point.padding = 0.2,
nudge_y = 0.005, nudge_x = 0.02) +
geom_smooth(method = "lm", se = FALSE, color = "black")
plot_corr_complete

ggrepel package provides functions to avoid texts from overlapping.
Once youve installed the package, load it before running the following code
Revised code worked from my machine:
rownames = c("dummy", "dummy", "dummy", "dummy", "dummy", "dummy","dummy", "dummy", "dummy", "dummy")
corr_truth = c(-0.39, -0.13, 0.28, -0.49, -0.14, 0.52, 0.43, 0.22, -0.29, -0.02)
corr_pred= c(-0.41, 0.01, 0.36, -0.38, -0.28, 0.44, 0.26, 0.24, -0.38, -0.23)
corr_complete = data.frame(rownames, corr_truth,corr_pred)
plot_corr_complete = ggplot(data = corr_complete, aes(corr_truth, corr_pred, label = rownames)) + geom_point() +
xlim(-0.5,0.7) +
ylim(-0.5,0.7) +
geom_text_repel() +
geom_smooth(method = "lm", se = FALSE, color = "black")
plot_corr_complete
Hope this helps

Related

Question regarding some specific labelling on plots

I have a code for a plot that I am trying to add specific labels for.
The data code is this:
CombinedMetricScore<-c("zero", "5", "10", "15", "20", "25", "30", "35", "40",
"45", "50", "60", "M11", "MICKEY", "MEANING", "MICKEYTWO",
"MICKEYTHREE", "MIKE", "PASTA", "MCIDandPASS",
"MICKDorPASS", "MIKEDOORPASS", "WOMAC20andPASS" ,"Ideal")
FalsePositiveRate<-c( 0, 0.05, 0.08, 0.12, 0.2, 0.28, 0.19, 0.5, 0.6, 0.7, 0.8, 0.94,
0.11, 0.28, 0.07, 0.5, 0.08, 0.28, 0.04, 0.3, 0.03, 0.03, 0.22, 1 )
TruePositiveRate<-c(0, 0.31, 0.35, 0.46, 0.69, 0.73, 0.59, 0.92, 0.92, 0.96, 1, 1,
0.46, 0.73, 0.42, 0.88, 0.35, 0.73, 0.46, 0.73, 0.46, 0.46, 0.69, 1)
ScoreOrMetric<-c("Metric", "Score", "Score", "Score", "Score", "Score", "Score", "Score", "Score",
"Score", "Score", "Score", "Metric", "Metric", "Metric", "Metric",
"Metric", "Metric", "Metric", "Metric",
"Metric", "Score", "Score", "Metric" )
COMBINEDSCORETABLE<-data.frame(CombinedMetricScore, FalsePositiveRate, TruePositiveRate, ScoreOrMetric)
The plot code is this:
ggplot(COMBINEDSCORETABLE, aes(x = FalsePositiveRate, y = TruePositiveRate, color = ScoreOrMetric)) +
geom_abline(slope = 1, intercept = .5, lwd = 1.5, color = "grey") +
geom_point(size =2, alpha = .8) +
coord_cartesian(xlim=c(0,1), ylim=c(0, 1)) +
coord_fixed() +
geom_text_repel(label = ifelse(TruePositiveRate > .44 + FalsePositiveRate,
yes = CombinedMetricScore, no = ""),
box.padding = 0.5)
Question: I want to add labels for the following 2 points "5", "45" but I don't know how to add it to my existing plot code.
We can use an | ("OR") in your ifelse logic. In general, though, I recommend only passing the data you need to geom_text_repel instead of everything (most of which having ""), so try this:
ggplot(COMBINEDSCORETABLE, aes(x = FalsePositiveRate, y = TruePositiveRate, color = ScoreOrMetric)) +
geom_abline(slope = 1, intercept = .5, lwd = 1.5, color = "grey") +
geom_point(size =2, alpha = .8) +
coord_cartesian(xlim=c(0,1), ylim=c(0, 1)) +
coord_fixed() +
ggrepel::geom_text_repel(
aes(label = CombinedMetricScore),
box.padding = 0.5,
data = ~ subset(., TruePositiveRate > (0.44 + FalsePositiveRate) | CombinedMetricScore %in% c("5", "45")))

Error: Discrete value supplied to continuous scale - stat_ma_line

This is the data frame and ggplot code I am using:
ROS<- c(0.03, 0.03, 0.03, 0.03, 0.07, 0.07, 0.07, 0.07, 0.07, 0.1, 0.1, 0.1)
wind<- c(0.84, 1.77, 3.5, 6.44, 0.84, 1.77, 3.5, 6.44, 7.55, 0.84, 1.77, 3.5)
rey <- c(31500,66375,131250,241500,31500,66375,131250,241500,283125,31500,66375,131250)
wind250_1 <- c(69.4,69.4,1,1,31.08,37.07,1,1,1,22.8,19.45,1)
lee250_1 <- c(79.84,125.56,93.34,94.42,33.78,49.6,38.95,40.9,39.32,24.2,32.95,27.46)
df<- data.frame(ROS,wind,rey,wind250_1,lee250_1)
ggplot() +
stat_ma_line(df, mapping=aes(rey, lee250_1), method="RMA",
range.y = "interval", range.x = "interval",
linewidth = 1,fill = "yellow") +
geom_point(df, mapping = aes(x = rey, lee250_1, colour=factor(ROS)),
size=3)+
xlab("Re") + ylab((expression(paste(tau~"windward"))))+
scale_x_continuous(trans='log10', label = scientific_10) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = c("#0072B2", "#000000","#E7B800","#CC79A7")) +
labs(colour = "ROS (m/s)") +
theme_bw()
When I plot using the variable "y = wind250_1", the code work with no problem. But when I try to use the variable "y = lee250_1" it gives the "Error: Discrete value supplied to continuous scale". The variable is numeric (checked the class) and here are a few things I tried it didn't work: use y= as.numeric(lee250_1) in ggplot code, change the name of the variables, run ggplot code without the lines scale_x_continuous(), scale_y_continuous(), and scale_color_manual().
The error I am getting is probably related to the stat_ma_line() because I tried to plot using geom_line() and it did work but I need to use stat_ma_line. So any help on how to solve this error is very much appreciated!!
You probably have too less points per group (probably you need more than 7 points per group), that's why you get an error. I added some fake data and now it works:
ROS<- c(0.03, 0.03, 0.03, 0.03, 0.07, 0.07, 0.07, 0.07, 0.07, 0.03, 0.03, 0.03, 0.03, 0.07, 0.07, 0.07, 0.07, 0.07)
rey <- c(31500,66375,131250,241500,31500,66375,131250,241500,131250, 31600,66475,131350,241600,31300,66575,132250,242500,283425)
lee250_1 <- c(79.84,125.56,93.34,94.42,33.78,49.6,24.2,32.95, 79.94,122.54,92.34,91.42,32.78,43.6,31.95,44.9,32.32,22.2)
library(ggplot2)
library(ggpmisc)
df<- data.frame(ROS,rey,lee250_1)
ggplot(df, aes(rey, lee250_1)) +
geom_point(aes(colour = factor(ROS))) +
stat_ma_line(method = "RMA",
range.y = "interval", range.x = "interval", fill = 'yellow', linewidth = 1) +
xlab("Re") + ylab((expression(paste(tau~"windward"))))+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10') +
scale_color_manual(values = c("#0072B2", "#000000","#E7B800","#CC79A7")) +
labs(colour = "ROS (m/s)") +
theme_bw()
Created on 2023-01-26 with reprex v2.0.2
It looks like that the model fit returns NAs for RMA and ggplot would get problems with that. Please see the answer in more details here https://github.com/aphalo/ggpmisc/issues/36

Plotting of the mean in boxplot before axis log transformation in R

I want to include the mean inside the boxplot but apparently, the mean is not located at the position where it is supposed to be. If I calculate the mean from the data it is 16.2, which would equal 1.2 at the log scale. I tried various things, e.g., changing the position of the stat_summary function before or after the transformation but this does not work.
Help is much appreciated!
Yours,
Kristof
Code:
Data:
df <- c(2e-05, 0.38, 0.63, 0.98, 0.04, 0.1, 0.16, 0.83, 0.17, 0.09, 0.48, 4.36, 0.83, 0.2, 0.32, 0.44, 0.22, 0.23, 0.89, 0.23, 1.1, 0.62, 5, 340, 47) %>% as.tibble()
Output:
df %>%
ggplot(aes(x = 0, y = value)) +
geom_boxplot(width = .12, outlier.color = NA) +
stat_summary(fun=mean, geom="point", shape=21, size=3, color="black", fill="grey") +
labs(
x = "",
y = "Particle counts (P/kg)"
) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))
The mean calculated by stat_summary is the mean of log10(value), not of value. Below I propose to define a new function my_mean for a correct calculation of the average value.
library(ggplot2)
library(dplyr)
library(tibble)
library(scales)
df <- c(2e-05, 0.38, 0.63, 0.98, 0.04, 0.1, 0.16,
0.83, 0.17, 0.09, 0.48, 4.36, 0.83, 0.2, 0.32, 0.44,
0.22, 0.23, 0.89, 0.23, 1.1, 0.62, 5, 340, 47) %>% as.tibble()
# Define the mean function
my_mean <- function(x) {
log10(mean(10^x))
}
df %>%
ggplot(aes(x = 0, y = value)) +
geom_boxplot(width = .12, outlier.color = NA) +
stat_summary(fun=my_mean, geom="point", shape=21, size=3, color="black", fill="grey") +
labs(
x = "",
y = "Particle counts (P/kg)"
) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))

Need help to display legend and in similar color code to the data

I am visualizing a time-series plot using ggplot2 and trying to combine the legend. I have tried many options but in not yet gotten my desired output. In one plot the lines are missing the color coding and in the other, the chart is missing the legend. My desired output is to have a chart with the legend and the color scheme being the same.
Here is the script where the lines are missing the color-coding;
library(tidyverse)
deviation <- read_csv("C:/Users/JohnWaweru/Documents/Thesis/Data/yearly_CSVs/Turkana_new/2018_new.csv")
deviation %>% ggplot() +
geom_line(aes(x = as.Date(Month), y = Upper_curve, col = 'red'), linetype = 2) +
geom_line(aes(x = as.Date(Month), y = Lower_curve, col = 'red'), linetype = 2) +
geom_line(aes(x = as.Date(Month), y = Mean_NDVI, col = 'red'), linetype = 1) +
geom_line(aes(x = as.Date(Month), y = NDVI_2018, col = 'green'), linetype = 1) +
scale_color_manual(name = 'Legend',
values = c('Mean_NDVI'= 'red', 'NDVI_2018' = 'green', 'Upper_curve' = 'red', 'Lower_curve' = 'red'),
labels = c('Mean_NDVI', 'NDVI_2018', 'Upper_curve','Lower_curve')) +
ylim(0.2, 0.6) +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
ylab(label = "NDVI") +
xlab(label = "Month") +
ggtitle("NDVI Deviation 2018") ```
Here is the Sample data I am working with;
structure(list(Month = structure(c(18262, 18293, 18322, 18353, 18383, 18414), class = "Date"),
Mean_NDVI = c(0.26, 0.23, 0.25, 0.34, 0.36, 0.32),
NDVI_2018 = c(0.22, 0.23, 0.23, 0.41, 0.46, 0.32),
Mean_Std = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.02),
Std_2018 = c(0.01, 0.01, 0.03, 0.03, 0.04, 0.03),
Upper_curve = c(0.27, 0.24, 0.26, 0.36, 0.38, 0.34),
Lower_curve = c(0.25, 0.22, 0.24, 0.32, 0.34, 0.3)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
Setting literal colours only works outside the aes() function or when you use scale_colour_identity(). Most of the time when you want to label individual line layers, you can set aes(..., colour = "My legend label").
library(ggplot2)
deviation <- structure(list(
Month = structure(c(18262, 18293, 18322, 18353, 18383, 18414), class = "Date"),
Mean_NDVI = c(0.26, 0.23, 0.25, 0.34, 0.36, 0.32),
NDVI_2018 = c(0.22, 0.23, 0.23, 0.41, 0.46, 0.32),
Mean_Std = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.02),
Std_2018 = c(0.01, 0.01, 0.03, 0.03, 0.04, 0.03),
Upper_curve = c(0.27, 0.24, 0.26, 0.36, 0.38, 0.34),
Lower_curve = c(0.25, 0.22, 0.24, 0.32, 0.34, 0.3)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame")
)
ggplot(deviation) +
geom_line(aes(x = Month, y = Upper_curve, colour = 'Upper_curve'), linetype = 2) +
geom_line(aes(x = Month, y = Lower_curve, colour = 'Lower_curve'), linetype = 2) +
geom_line(aes(x = Month, y = Mean_NDVI, colour = 'Mean_NDVI'), linetype = 1) +
geom_line(aes(x = Month, y = NDVI_2018, colour = 'NDVI_2018'), linetype = 1) +
scale_color_manual(
name = 'Legend',
values = c('Mean_NDVI'= 'red', 'NDVI_2018' = 'green',
'Upper_curve' = 'red', 'Lower_curve' = 'red'),
# Setting appropriate linetypes
guide = guide_legend(
override.aes = list(linetype = c(2,1,1,2))
)
) +
ylim(0.2, 0.6) +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
ylab(label = "NDVI") +
xlab(label = "Month") +
ggtitle("NDVI Deviation 2018")
Created on 2021-08-05 by the reprex package (v1.0.0)

Repeating categories on lattice plot (likert function in R)

I am a novice R user and am trying to create a plot using the likert function from the HH package. My problem seems to come from from repeating category labels. It is easier to show the issue:
library(HH)
responses <- data.frame( Subtable= c(rep('Var1',5),rep('Var2',4),rep('Var3',3)),
Question=c('very low','low','average','high','very high', '<12', '12-14', '15+',
'missing', '<25','25+','missing'), Res1=as.numeric(c(0.05, 0.19, 0.38, 0.24, .07,
0.09, 0.73, 0.17, 0.02, 0.78, 0.20, 0.02)), Res2=as.numeric(c(0.19, 0.04, 0.39,
0.22, 0.06, 0.09, 0.50, 0.16, 0.02, 0.75, 0.46, 0.20)))
likert(Question ~ . | Subtable, responses,
scales=list(y=list(relation="free")), layout=c(1,3),
positive.order=TRUE,
between=list(y=0),
strip=FALSE, strip.left=strip.custom(bg="gray97"),
par.strip.text=list(cex=.6, lines=3),
main="Description of Sample",rightAxis=FALSE,
ylab=NULL, xlab='Percent')
Unfortunately it creates strange spaces that aren't really there, as exhibited in the bottom panel of the following plot:
This seems to come from the repeated category 'missing'. My actual data has several repeats (e.g., 'no', 'other') and whenever they are included I get these extra spaces. If I run the same code but remove the repeated categories then it runs properly. In this case that means changing 'responses' in the code above to responses[! responses$Question %in% 'missing',].
Can someone tell me how to create the graph using all the categories, without getting the 'extra' spaces? Thanks for your help and patience.
-Z
R 3.0.2
HH 3.0-3
lattice 0.20-24
latticeExtra 0.6-26
Here is a solution using ggplot2 to create the graphic
library(ggplot2)
responses <-
data.frame(Subtable = c(rep('Var1',5), rep('Var2',4), rep('Var3',3)),
Question = c('very low','low','average','high','very high',
'<12', '12-14', '15+', 'missing', '<25','25+',
'missing'),
Res1 = as.numeric(c(0.05, 0.19, 0.38, 0.24, .07, 0.09, 0.73,
0.17, 0.02, 0.78, 0.20, 0.02)),
Res2 = as.numeric(c(0.19, 0.04, 0.39, 0.22, 0.06, 0.09, 0.50,
0.16, 0.02, 0.75, 0.46, 0.20)),
stringsAsFactors = FALSE)
responses$Subtable <- factor(responses$Subtable, levels = paste0("Var", 1:3))
responses$Question <-
factor(responses$Question,
levels = c("missing", "25+","<25", "<12", "12-14", "15+",
"very low", "low", "average", "high", "very high"))
ggplot(responses) +
theme_bw() +
aes(x = 0, y = Question) +
geom_errorbarh(aes(xmax = 0, xmin = Res1, color = "red")) +
geom_errorbarh(aes(xmin = 0, xmax = -Res2, color = "blue")) +
facet_wrap( ~ Subtable, ncol = 1, scale = "free_y") +
scale_color_manual(name = "",
values = c("red", "blue"),
labels = c("Res1", "Res2")) +
scale_x_continuous(breaks = c(-0.5, 0, 0.5),
labels = c("0.5", "0", "0.5")) +
ylab("") + xlab("Percent") +
theme(legend.position = "bottom")

Resources