Related
i have three variables in my dataset:
school (School)
actual score (actual_score)
expected score (expected_score)
and need to do this graph
So far I have
data%>%
mutate(School=fct_reorder(School, actual_score)
)%>%
ggplot(aes(x=School))+
geom_point(aes(y=actual_score), colour="red")+
geom_point(aes(y= expected_score), colour="blue")
But they are just points... how to connect them?
structure(list(School = structure(c(9L,
6L, 8L, 2L, 1L), levels = c("11278", "11274", "11285", "11289",
"11280", "01424", "11290", "11272", "01206", "11286"), class = "factor"),
actual_score = c(453.4875, 423.375757575758, 441.481481481482,
375.103846153846, 363.621428571429), expected_score = c(452.489150512886,
428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884
)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .rows = structure(list(
1:5), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
Your dput result is slightly corrupt, so I slightly modified it.
You can use geom_linerange to connect the points.
I also included the rest of the graph as placing the labels is a bit tricky.
library(tidyverse)
data <- tibble(
School = structure(
c(9L, 6L, 8L, 2L, 1L),
levels = c("11278", "11274", "11285", "11289", "11280", "01424", "11290", "11272", "01206", "11286"),
class = "factor"),
actual_score = c(453.4875, 423.375757575758, 441.481481481482, 375.103846153846, 363.621428571429),
expected_score = c(452.489150512886, 428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884))
data%>%
mutate(School = fct_reorder(fct_relabel(School, ~ paste("School", LETTERS[1:(length(.))])), actual_score)) %>%
ggplot(aes(x = School)) +
geom_linerange(aes(ymin = actual_score, ymax = expected_score)) +
geom_point(aes(y = actual_score, color = "Actual", shape = "Acutal"), size = 3) +
geom_text(aes(y = actual_score - 5 + 10 * (actual_score > expected_score), label = round(actual_score))) +
geom_point(aes(y = expected_score, color = "Expected", shape = "Expected"), size = 3) +
geom_text(aes(y = expected_score - 5 + 10 * (actual_score < expected_score), label = round(expected_score))) +
scale_color_manual(name = NULL,
labels = c("Acutal", "Expected"),
values = c("blue", "red")) +
scale_shape_manual(name = NULL,
labels = c("Acutal", "Expected"),
values = c(16, 17)) +
labs(y = "Average NAPLAN score", x = NULL) +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major.x = element_blank())
Created on 2022-12-19 with reprex v2.0.2
To connect your points you could use a geom_segment. And to get the different shapes map on the shape aesthetic. Also do the same for color to get a legend reflecting both shape and color. The rest is some styling plus some additional geom_text layers for the labels.
library(dplyr)
library(ggplot2)
library(forcats)
data %>%
mutate(School = fct_reorder(School, actual_score)) %>%
ggplot(aes(x = School)) +
geom_segment(aes(xend = School, y = actual_score, yend = expected_score),
colour = "grey80", linewidth = 1
) +
geom_point(aes(y = actual_score, colour = "Actual", shape = "Actual"), size = 3) +
geom_point(aes(y = expected_score, colour = "Expected", shape = "Expected"), size = 3) +
geom_label(aes(
y = actual_score, label = round(actual_score),
vjust = ifelse(actual_score > expected_score, 0, 1)
), label.size = NA, label.padding = unit(10, "pt"), fill = NA) +
geom_label(aes(
y = expected_score, label = round(expected_score),
vjust = ifelse(expected_score > actual_score, 0, 1)
), label.size = NA, label.padding = unit(10, "pt"), fill = NA) +
scale_color_manual(values = c("red", "blue")) +
scale_shape_manual(values = c(16, 17)) +
scale_y_continuous(breaks = seq(320, 480, 40), limits = c(320, 480)) +
labs(color = NULL, shape = NULL, x = NULL, y = "Average NAPLAN Score") +
theme_minimal() +
theme(
legend.position = "bottom",
axis.title.y = element_text(face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
DATA
data <- structure(list(
School = structure(c(9L, 6L, 8L, 2L, 1L), levels = c(
"11278", "11274", "11285", "11289",
"11280", "01424", "11290", "11272", "01206", "11286"
), class = "factor"),
actual_score = c(
453.4875, 423.375757575758, 441.481481481482,
375.103846153846, 363.621428571429
), expected_score = c(
452.489150512886,
428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884
)
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .rows = structure(list(1:5), ptype = integer(0), class = c(
"vctrs_list_of",
"vctrs_vctr", "list"
)))
I am plotting a linear mixed effects model using ggplot2 in R. I keep receiving this error with regards to including the mean rating per trial.
Error: Continuous value supplied to discrete scale
I have localized the problem to geom_point as geom_line and geom_ribbon work just fine. Here is the code I am currently using
p2 <- ggplot(td_mean_pref_plot_groups, aes(x, td_mean_pref_plot_groups$predicted, col = group)) +
geom_line(size=1.5) +
scale_color_manual(values = c("blue","red")) +
geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group),alpha=.2,colour=NA) +
scale_fill_manual(values = c("blue","red")) +
geom_point(data=summStats_td_mean,aes(trial,mean, col = condition),size=2) +
scale_color_manual(values = c("blue","red")) +
theme_bw() +
xlab('Trial') +
ylab('Prediction Error') +
ylim(1,2.2) +
ggtitle('TD learning about TD vs. TD \n learning about ASD') +
theme(text=element_text(size=20),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank())
p2
geom_point reads the data below
structure(list(condition = c(1L, 1L, 1L, 1L, 1L, 1L), trial = 1:6,
n = c(80L, 93L, 92L, 94L, 94L, 94L), mean = c(1.225, 1.39784946236559,
1.25, 1.40425531914894, 1.24468085106383, 1.29787234042553
), sd = c(1.01849976541573, 1.08487411558084, 1.00137268424261,
1.11025666834073, 1.00199983058361, 1.09573746202196)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
condition = 1L, .rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
As you can see, the options for condition for condition are 1 or 2 but R seems to be reading those as continuous. I have used this code many times before and never run into this issue so I'm not sure why it's suddenly acting up. Thank you!
As mentioned in the comment the condition in your data is an integer therefore the scale in continuous, you can easily change this by calling as.factor() on your condition:
library(ggplot2)
p1 = ggplot()+
geom_point(data= df, aes(trial, mean, col = condition))+
labs(subtitle = "Continuous scale")
p2 = ggplot()+
geom_point(data= df, aes(trial, mean, col = as.factor(condition)))+
scale_color_manual(values = c("blue","red"))+
labs(subtitle = "Discrete scale")
library(ggpubr)
ggarrange(p1,p2)
I'm trying to place count labels in a ggplot2 barplot and I haven't been able to do it. I need to display the number of pixels within each temperature range.
The dataframe was built from a raster: EneroT5cmSC
datene <- as.data.frame(EneroT5cmSC,xy=TRUE)%>%drop_na()
datene$cuts <- cut(datene$layer, breaks=seq(21, 29, length.out=12))
dput:
datene_stuc <- structure(
list(
x = c(-57.063098328,-57.021448328,-56.996458328,-56.988128328),
y = c(-30.087481664,-30.087481664,-30.087481664,-30.087481664),
layer = c(
25.6227328470624,
26.6386584334308,
26.0636709134397,
26.0580615984563
),
cuts = structure(
c(7L, 9L,
8L, 8L),
.Label = c(
"(20,20.8]",
"(20.8,21.6]",
"(21.6,22.5]",
"(22.5,23.3]",
"(23.3,24.1]",
"(24.1,24.9]",
"(24.9,25.7]",
"(25.7,26.5]",
"(26.5,27.4]",
"(27.4,28.2]",
"(28.2,29]"
),
class = "factor"
)
),
row.names = c(NA,
4L),
class = "data.frame")
Barplot code:
ggplot() +
geom_bar(data = datene, aes(cuts, fill = cuts)) +
scale_fill_viridis_d(option = "B",'Temp (Cº)') +
theme(axis.title.x=element_blank(), axis.title.y=element_blank()) +
geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "black")
If you replace the code arguments from geom_bar to ggplot() and change vjust = -1 it works like this:
datene_stuc <- structure(
list(
x = c(-57.063098328,-57.021448328,-56.996458328,-56.988128328),
y = c(-30.087481664,-30.087481664,-30.087481664,-30.087481664),
layer = c(
25.6227328470624,
26.6386584334308,
26.0636709134397,
26.0580615984563
),
cuts = structure(
c(7L, 9L,
8L, 8L),
.Label = c(
"(20,20.8]",
"(20.8,21.6]",
"(21.6,22.5]",
"(22.5,23.3]",
"(23.3,24.1]",
"(24.1,24.9]",
"(24.9,25.7]",
"(25.7,26.5]",
"(26.5,27.4]",
"(27.4,28.2]",
"(28.2,29]"
),
class = "factor"
)
),
row.names = c(NA,
4L),
class = "data.frame")
library(ggplot2)
ggplot(data = datene_stuc, aes(cuts, fill = cuts)) +
geom_bar() +
geom_text(aes(label = ..count..), stat = "count", vjust = -1, colour = "black") +
scale_fill_viridis_d(option = "B",'Temp (Cº)') +
theme(axis.title.x=element_blank(), axis.title.y=element_blank())
Created on 2022-07-17 by the reprex package (v2.0.1)
This is probably an easy question for ggplot2 experts: I want to use my own colors rather than the default colors. How to achieve that?
Here's a snippet of the data:
df <- structure(list(start = c(0, 251, 1976, 5127, 5717, 6783), end = c(251,
1976, 5127, 5717, 6783, 6830), minute = c(0L, 0L, 0L, 0L, 0L,
0L), AOI = c("*", "A", "*", "*", "A", "*"), AOI_col = c("blue",
"red", "blue", "blue", "red", "blue")), row.names = c(NA, -6L
), groups = structure(list(minute = 0L, .rows = structure(list(
1:6), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = 1L, class = c("tbl_df", "tbl", "data.frame"
), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
))
The colors I wish to plot are in column AOI_col. Here's the code so far:
library(ggplot2)
ggplot(df5, aes(x = start, xend = end,
y = minute + scale(as.numeric(as.factor(AOI)))/10,
yend = minute + scale(as.numeric(as.factor(AOI)))/10,
color = AOI)) +
geom_segment(size = 2) +
scale_y_reverse(breaks = 0:53, labels = paste0(0:53, "min"), name = NULL) +
labs(title = "Gaze activity Speaker C F01") +
theme(axis.title.x.bottom = element_blank())
I've tried using aes(fill = AOI_col) and scale_color_manual(values = AOI_col) but to no avail. Help is appreciated!
You can add a color adjustment in the geom_segment():
ggplot(df, aes(x = start, xend = end,
y = minute + scale(as.numeric(as.factor(AOI)))/10,
yend = minute + scale(as.numeric(as.factor(AOI)))/10)) +
geom_segment(size = 2, color = df$AOI_col) +
scale_y_reverse(breaks = 0:53, labels = paste0(0:53, "min"), name = NULL) +
labs(title = "Gaze activity Speaker C F01") +
theme(axis.title.x.bottom = element_blank())
Or again the complete example with the answer from #user438383:
df <-
structure(
list(
start = c(0, 251, 1976, 5127, 5717, 6783),
end = c(251,
1976, 5127, 5717, 6783, 6830),
minute = c(0L, 0L, 0L, 0L, 0L,
0L),
AOI = c("*", "A", "*", "*", "A", "*"),
AOI_col = c("blue",
"red", "blue", "blue", "red", "blue")
),
row.names = c(NA,-6L),
groups = structure(
list(
minute = 0L,
.rows = structure(
list(1:6),
ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr",
"list")
)
),
row.names = 1L,
class = c("tbl_df", "tbl", "data.frame"),
.drop = TRUE
),
class = c("grouped_df", "tbl_df", "tbl", "data.frame")
)
library(ggplot2)
ggplot(df,
aes(
x = start,
xend = end,
y = minute + scale(as.numeric(as.factor(AOI))) / 10,
yend = minute + scale(as.numeric(as.factor(AOI))) / 10,
color = AOI
)) +
geom_segment(size = 2) +
scale_y_reverse(breaks = 0:53,
labels = paste0(0:53, "min"),
name = NULL) +
labs(title = "Gaze activity Speaker C F01") +
theme(axis.title.x.bottom = element_blank()) +
scale_colour_manual(values = unique(df$AOI_col))
I think the ideal thing to do is make AOI_col a factor and sort it alphabetically, then assign the colours to be the unique values of that column:
df5$AOI_col = factor(df5$AOI_col, levels = sort(unique(df$AOI_col)))
ggplot(df5, aes(x = start, xend = end,
y = minute + scale(as.numeric(as.factor(AOI)))/10,
yend = minute + scale(as.numeric(as.factor(AOI)))/10,
color = AOI)) +
geom_segment(size = 2) +
scale_y_reverse(breaks = 0:53, labels = paste0(0:53, "min"), name = NULL) +
labs(title = "Gaze activity Speaker C F01") +
theme(axis.title.x.bottom = element_blank()) +
scale_colour_manual(values = unique(df$AOI_col))
I am attempting to make a series of plots using the same code with unique coral species databases.
Databases
data_1 <- structure(list(Site_long = structure(c(1L, 1L, 2L, 2L), .Label = c("Hanauma Bay",
"Waikiki"), class = "factor"), Shelter = structure(c(1L, 2L,
1L, 2L), .Label = c("Low", "High"), class = c("ordered", "factor"
)), mean = c(1.19986885018767, 2.15593884020962, 0.369605100791602,
0.31005865611133), sd = c(2.5618758944073, 3.67786619671933,
1.0285671157698, 0.674643037178562), lower = c(0.631321215232725,
1.33972360808602, 0.141339007832154, 0.160337623931733), upper = c(1.76841648514261,
2.97215407233321, 0.59787119375105, 0.459779688290928), sample_size = c(78L,
78L, 78L, 78L)), row.names = c(NA, -4L), groups = structure(list(
Site_long = structure(1:2, .Label = c("Hanauma Bay", "Waikiki"
), class = "factor"), .rows = structure(list(1:2, 3:4), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
data_2 <- structure(list(Site_long = structure(c(2L, 2L, 1L, 1L), .Label = c("Hanauma Bay",
"Waikiki"), class = "factor"), Shelter = structure(c(1L, 2L,
1L, 2L), .Label = c("Low", "High"), class = c("ordered", "factor"
)), mean = c(0.695203162997812, 0.838720069947102, 0.76957780057238,
0.771070502382599), sd = c(1.17117437618039, 1.02766824928792,
1.43499288333539, 1.28634022958585), lower = c(0.435288768568787,
0.610653459098997, 0.451115141323908, 0.485597776371556), upper = c(0.955117557426838,
1.06678668079521, 1.08804045982085, 1.05654322839364), sample_size = c(78L,
78L, 78L, 78L)), row.names = c(NA, -4L), groups = structure(list(
Site_long = structure(1:2, .Label = c("Hanauma Bay", "Waikiki"
), class = "factor"), .rows = structure(list(3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
When I run my code on the first species database (data_1), the barplots and associated error bar annotations render correctly. Notice I also made a new variable "data" that will be the same object used in later for species 2. In order to keep this plot to make a composite of a number of plots later, I named the plot "species_1_plot" to save it to the global environment.
Code for Species 1 Plot
data <- data_1
mult_compare_recruitment <- c("A", "A", "A", "A")
data <- data[c(3, 4, 1, 2),]
data$Shelter <- factor(data$Shelter, levels = c("Low", "High"))
# reorder summary dataframe for plotting
position <- c("Waikiki", "Hanauma Bay")
# ggplot2 barplot position with Waikiki (Low-High Shelter) and Hanauma Bay
recruitment_plot_3 <- ggplot(data = data, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare_recruitment, y = data$upper), vjust = -.5, position = position_dodge(width = 0.8), size = 4) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = expression(paste("Coral recruitment per m"^"2"))) +
theme_classic(base_size = 14.5) +
theme(text = element_text(size = 18), axis.title.x = element_blank(),
legend.position = "none", axis.text.y = element_text(angle = 90))
species_1_plot <- recruitment_plot_3
species_1_plot
In order to create my next plot, I run the same code on a different species database (data_2) while once again assigning the new database to the object "data". Once again, I saved the new plot "species_2_plot" to the global environment.
Code for Species 2 Plot
data <- data_2
mult_compare_recruitment <- c("A", "A", "B", "B")
data <- data[c(3, 4, 1, 2),]
data$Shelter <- factor(data$Shelter, levels = c("Low", "High"))
# reorder summary dataframe for plotting
position <- c("Waikiki", "Hanauma Bay")
# ggplot2 barplot position with Waikiki (Low-High Shelter) and Hanauma Bay
recruitment_plot_3 <- ggplot(data = data, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare_recruitment, y = data$upper), vjust = -.5, position = position_dodge(width = 0.8), size = 4) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = expression(paste("Coral recruitment per m"^"2"))) +
theme_classic(base_size = 14.5) +
theme(text = element_text(size = 18), axis.title.x = element_blank(),
legend.position = "none", axis.text.y = element_text(angle = 90))
species_2_plot <- recruitment_plot_3
species_2_plot
The problem is, when I plot the first species plot again (species_1_plot), the data are correct (bars), but the height of text annotations and their letter values are not correct. They are in fact the values from species_2_plot.
species_1_plot
I saved each plot to the global environment with a unique name knowing this would be an issue. But despite this, geom_text() seems to be using data from the second plot (code that is in the global environment) instead despite that the actual data (bars) in the plot are correct (from species_plot_1). My understanding was that when you name a plot as an object (species_1_plot and species_2_plot) that its akin to saving the plot and therefore preventing any changes to plot and annotations unless specified. Is there any way to prevent this from happening without specifically naming the databases (data_1 and data_2)? All input is appreciated. Thanks in advance!
I would suggest you to use an approach with a function. The fact of using data twice is maybe changing the environment and as a result the plots change. I have made a function with parameters for data, position and recruitment and I display the outputs. You have to fill them in the same way you defined that variables in your code. Functions work on internal environments so there might not be issues about how data is processed. Here the code where I used the data you shared:
library(ggplot2)
#Function
myplotfunc <- function(x,y,z)
{
data <- x
mult_compare_recruitment <- y
data <- data[c(3, 4, 1, 2),]
data$Shelter <- factor(data$Shelter, levels = c("Low", "High"))
# reorder summary dataframe for plotting
position <- z
# ggplot2 barplot position with Waikiki (Low-High Shelter) and Hanauma Bay
plot <- ggplot(data = data, aes(fill=Shelter, y=mean, x=Site_long)) +
geom_bar(position = "dodge", stat="identity", width = .8) +
scale_x_discrete(limits = position) +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(.8), width = .1) +
geom_text(aes(label = mult_compare_recruitment, y = data$upper), vjust = -.5, position = position_dodge(width = 0.8), size = 4) +
scale_fill_grey(name = "Shelter", start = .8, end = .2) +
labs(x = "Site", y = expression(paste("Coral recruitment per m"^"2"))) +
theme_classic(base_size = 14.5) +
theme(text = element_text(size = 18), axis.title.x = element_blank(),
legend.position = "none", axis.text.y = element_text(angle = 90))
return(plot)
}
#Code
o1 <- myplotfunc(x=data_1,y=c("A", "A", "A", "A"),z=c("Waikiki", "Hanauma Bay"))
o2 <- myplotfunc(x=data_2,y=c("A", "A", "B", "B"),z=c("Waikiki", "Hanauma Bay"))
Outputs: