So my data set is to analyse the effect of predation on salmon selected for growth.
Basically I have a start and ending point, 3 different strains and 2 environments (with and without predator). Does anyone knows the best way to do this?
I was thinking of something like this drawing enter image description here
I have been trying but I can only come up either with the separated time points, in which I would have to do 2 graphs, or with and average of both.
The data set is available here.
https://dryad-assetstore-merritt-west.s3.us-west-2.amazonaws.com/ark%3A/13030/m55q9wc8%7C1%7Cproducer/Salmon_Size_Data.txt?response-content-type=text%2Fplain&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEgaCXVzLXdlc3QtMiJIMEYCIQD%2FrxcoA78DX5N86nFNROptzvNB%2Bo82OubnJESH4AQF5wIhALF9AuZuZMgV6Ik7EBd9Pje07bsANAT%2BB5R%2BBh24rjJYKswECEEQABoMNDUxODI2OTE0MTU3Igxdeqv51kC67yp3Gr0qqQTdXAWYho6s5Xrf3UFxy0BvZ%2Fm1OUwz%2BSvZS2jSWam%2BcFwyEk2gVOvcZis5PLf%2BAUk43X0wn4S5%2FpXkunbyWiWWlwoV1d%2BOlt8M%2FiyuGrg%2Bzydv2d%2FT6l5zdQ2dxa5ISKLmLHvpl5CzfCB2aChuWTwruTMsssEPZQUyxZy2ihgpbPpjV%2FM5LOfOxcunwJXrMBL4BUk6PCqZQYMpe5NiIOvv7mO58trcPKL5hQ0W4HECtiPtoslFn5Gv5v6KWG4A9VDAfgZwc0TxVmqzzbd6xnb57i6bbfgOyX7PkwFXTuNswa1VJL8Zai08%2BmlmvCXYZyhENYuVTk7K9g3N2aUWlP0nSSMyUKoJPgW45fldrgMMfl7uAH5Budh8EfoFUMQMStuse9gR0qiCHWMbohDao0YcOImNYmoCO5znwTbuDerPsGEzQbrK9YFPKbTpFtm%2Fqc5pAPWw4wWPWcj0PmG2FvNphT3IV8M8jL5Nc%2BNkCM2SbKf82XY2sBar43Xn%2BhPFlsaU%2FkeaFINCSRf29FY6mFNgoKWHfcGbiFoS6gegiFc4iyK7zMjReIFjJ9%2Bsur6HpwWVLG%2Br2JZ8OZjjwg1Uy6tWZ5LxUk%2Fm00fhjIuJyYe6vb%2BL98gKyzL9YXEOEDoEbQ6C%2FCGPsYzKs2mEJSic%2FRxGHIt7%2B4wI7ilcdVnpmoBxiQDYIjD5EYF1UYX2RzXCAb%2Ba4Feb5Y%2FnLv5Wd9lZH67KnrCl%2F%2FP80n%2FUMLmNqJsGOqgBUH4Uc6%2BmRqbTXPRp0NF%2BL6Ieni3hFJbOhhF33xQvrX0R75mGpFCUGSh15B1V%2F%2BQyoPJSJ6KpjBbmhvByzaUNzp9Tu9IRVbrAYaQjU1msReCU7%2B8T6NQnphj%2FizbzJsYEAPxVesRFiGfoH%2FcqjfDSIXDWiJU4pzwyaITjlPe2qawZ06sxXaP%2BxkrgINQ93FHpFTh6DX7kcYUG0dXkwGsDVXYln3pXlXTG&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20221108T081734Z&X-Amz-SignedHeaders=host&X-Amz-Expires=14400&X-Amz-Credential=ASIAWSMX3SNW4W5FR7N3%2F20221108%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Signature=8ab2326025376f5c9b96f2b4e31c51ba5fe15a96743794ac8d6cc48a75efe7e0
I am using this code:
ggplot(data = aqua, mapping = aes(x = Env, y = mass, group = Strain, color = Strain))+
geom_line(stat = "summary", fun = mean, size = 1, linetype = 2)+
geom_point(stat = "summary", fun = mean, size = 3)+
stat_summary(geom = "errorbar", fun.data = mean_se, width = 0.1, size = .5)+
labs(x = "Environment", y = "Body mass (g)")+
theme(axis.title.x.bottom = element_text(size = 20), axis.title.y.left = element_text(size = 20))
Since you have not given any data, I made up an example data for you. Next time please include a reproducible example data with your code.
example_data <- tibble(
strain = rep(c("A", "B", "C"), each = 3),
env = rep(c("x", "y", "z"), times = 3),
mass = c(1,4,7,2,6,9,3,8,10)
)
ggplot(example_data, mapping = aes(x = env, y = mass, group = strain, color = strain))+
geom_line(size = 1, linetype = 2)+
geom_point(size = 3) +
labs(x = "Environment", y = "Body mass (g)")+
theme(axis.title.x.bottom = element_text(size = 20), axis.title.y.left = element_text(size = 20))
Related
Im making a scatterplot which shows a value plotted against the date since symptom onset. These patients are categorised based on disease severity, and i wanted to show how the values change over time in each severity category. I have coloured the dots based on severity score, but i prefer to use shape =21 so i can have a border. I also draw a line to see the trend, and i want that coloured in the same way, however, this has added another legend and it looks complicated. This issue doesnt happen if use a different shape that isnt filled, because scale_colour_manual can be used for both the lines and the dots, but i dont think it looks as nice. Any idea how i can fix this?
IC50SymObySS <- ggplot(data = isaric) +
geom_point(mapping = aes(x = Days_since_onset, y = log2IC50, fill = Severity_score), size = 2, colour = "black", shape = 21)+
geom_smooth(mapping = aes(x = Days_since_onset, y = log2IC50, colour = Severity_score), se = FALSE)+
scale_fill_manual(breaks=c("1","2","3","4","5"),
values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"),
labels=c("1","2","3","4","5"),
name = "Severity Score")+
scale_colour_manual(values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"))+
theme_minimal()+
JTheme+
ylab("Serum Log2 IC50")+
xlab("Days Since Symptom Onset")+
guides(colour = guide_legend(title.position = "top", title.hjust = 0.5))
IC50SymObySS
As per this answer, you need to use identical name and labels values for both fill and colour scale.
library(ggplot2)
library(dplyr)
isaric <- transmute(iris,
Days_since_onset = (Sepal.Length - 4)^3,
log2IC50 = Sepal.Width * 3,
Severity_score = cut(Petal.Length, breaks = quantile(Petal.Length, prob = 0:5 / 5), labels = 1:5))
ggplot(data = isaric) +
geom_smooth(mapping = aes(x = Days_since_onset, y = log2IC50, colour = Severity_score), se = FALSE)+
geom_point(mapping = aes(x = Days_since_onset, y = log2IC50, fill = Severity_score), size = 2, colour = "black", shape = 21)+
scale_colour_manual(
name = "Severity Score",
values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"),
labels=c("1","2","3","4","5"))+
scale_fill_manual(
name = "Severity Score",
breaks=c("1","2","3","4","5"),
values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"),
labels=c("1","2","3","4","5"))+
theme_minimal()+
ylab("Serum Log2 IC50")+
xlab("Days Since Symptom Onset")+
guides(colour = guide_legend(title.position = "top", title.hjust = 0.5))
Suppose I have data with both an ordinal variable and a categorical variable:
set.seed(35)
df <- data.frame(Class = factor(rep(c(1,2),times = 80), labels = c("Math","Science")),
StudyTime = factor(sort(sample(1:4, 16, prob = c(0.25,0.3,0.3,0.15), replace = TRUE)),labels = c("<5","5-10","10-20",">20")),
Nerd = factor(sapply(rep(c(0.1,0.3,0.5,0.8),c(30,50,50,30)), function(x)sample(c("Nerd","NotNerd"),size = 1, prob = c(x,1-x))),levels = c("NotNerd","Nerd")))
One could use ggplot and geom_bar with x, fill and alpha (or color) aesthetic mappings to visualize the relationship between these variables.
ggplot(data = df, aes(x = Class, fill = StudyTime, alpha = Nerd)) +
geom_bar(position = "dodge", color = "black") +
scale_alpha_manual(values = c(Nerd = 0.5, NotNerd = 1)) +
scale_fill_manual(values = colorRampPalette(c("#0066CC","#FFFFFF","#FF8C00"))(4)) +
labs(x = "Class", y = "Number of Students", alpha = "Nerd?") +
theme(legend.key.height = unit(1, "cm"))
However, alpha and color are not ideal. A better alternative might be to apply a pattern such as stripes or a crosshatch.
The accepted answer to this question from over 10 years ago says to use colors, and the most upvoted answer (while clever) uses over 100 lines of code.
This question received some upvotes but no new answers.
Is there any better alternative to adding a pattern such as can be seen here?
One approach is to use the ggpattern package written by Mike FC (no affiliation):
library(ggplot2)
#remotes::install_github("coolbutuseless/ggpattern")
library(ggpattern)
ggplot(data = df, aes(x = Class, fill = StudyTime, pattern = Nerd)) +
geom_bar_pattern(position = position_dodge(preserve = "single"),
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_fill_manual(values = colorRampPalette(c("#0066CC","#FFFFFF","#FF8C00"))(4)) +
scale_pattern_manual(values = c(Nerd = "stripe", NotNerd = "none")) +
labs(x = "Class", y = "Number of Students", pattern = "Nerd?") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none")))
The package appears to support a number of common geometries. Here is an example of using geom_tile to combine a continuous variable with a categorical variable:
set.seed(40)
df2 <- data.frame(Row = rep(1:9,times=9), Column = rep(1:9,each=9),
Evaporation = runif(81,50,100),
TreeCover = sample(c("Yes", "No"), 81, prob = c(0.3,0.7), replace = TRUE))
ggplot(data=df2, aes(x=as.factor(Row), y=as.factor(Column),
pattern = TreeCover, fill= Evaporation)) +
geom_tile_pattern(pattern_color = NA,
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.5,
pattern_spacing = 0.025,
pattern_key_scale_factor = 1) +
scale_pattern_manual(values = c(Yes = "circle", No = "none")) +
scale_fill_gradient(low="#0066CC", high="#FF8C00") +
coord_equal() +
labs(x = "Row",y = "Column") +
guides(pattern = guide_legend(override.aes = list(fill = "white")))
I'm using the ggalluvial package to make multiple alluvial plots for some data that I have, but I'd like to try to order the alluvials so I can compare them across multiple plots.
Here's some example data:
set.seed(234)
Data1 <- data.frame(
ID = rep(1:10, each = 6),
Group = rep(1:2, each = 30),
Week = rep(1:6, times = 10),
Y = sample(c("High", "Low", "None"), 60, replace = TRUE)
)
Data2 <- data.frame(
ID = rep(1:10, each = 6),
Group = rep(1:2, each = 30),
Week = rep(1:6, times = 10),
Y = sample(c("High", "Low", "None"), 60, replace = TRUE)
)
And some example code for making the two graphs:
plot1 <- ggplot(Data1,
aes(x = Week,
stratum = Y,
alluvium = ID,
fill = Y,
label = Y))+
facet_grid(Group ~.)+
scale_fill_manual(values = c("red", "yellow", "green3"))+
geom_flow(stat = "alluvium", lode.guidance = "frontback", color = "darkgray")+
geom_stratum()
plot2 <- ggplot(Data2,
aes(x = Week,
stratum = Y,
alluvium = ID,
fill = Y,
label = Y))+
facet_grid(Group ~.)+
scale_fill_manual(values = c("red", "yellow", "green3"))+
geom_flow(stat = "alluvium", lode.guidance = "frontback", color = "darkgray")+
geom_stratum()
And finally the two output graphs
Is there a way to know which alluvial belongs to which individual? Or designate the order of alluvials so that, for example, the top group 1 alluvial is the same in the first and second graphs? I realize that might make the graph look kind of bad, but for my actual data the outcome in the first week is pretty homogenous across the board so I think it would be alright. Thanks in advance.
The way that makes the most sense to me at this point is to pass the alluvium variable (in this case, ID) to the label aesthetic in a new layer using the alluvium stat and the text geom. Provided the parameters (e.g. lode.guidance) are passed the same values, the new layer should stack the alluvia in the same order at every axis, and the text labels will coincide with the "lodes" where the alluvia intersect the strata.
Here are your examples with the additional layer. Note that it is added after the stratum layer; otherwise the text would be obscured by the filled-in rectangles.
library(ggalluvial)
#> Loading required package: ggplot2
set.seed(234)
Data1 <- data.frame(
ID = rep(1:10, each = 6),
Group = rep(1:2, each = 30),
Week = rep(1:6, times = 10),
Y = sample(c("High", "Low", "None"), 60, replace = TRUE)
)
Data2 <- data.frame(
ID = rep(1:10, each = 6),
Group = rep(1:2, each = 30),
Week = rep(1:6, times = 10),
Y = sample(c("High", "Low", "None"), 60, replace = TRUE)
)
ggplot(Data1,
aes(x = Week,
stratum = Y,
alluvium = ID,
fill = Y,
label = Y))+
facet_grid(Group ~.)+
scale_fill_manual(values = c("red", "yellow", "green3"))+
geom_flow(stat = "alluvium", lode.guidance = "frontback", color = "darkgray")+
geom_stratum()+
geom_text(stat = "alluvium", aes(label = ID), lode.guidance = "frontback")
ggplot(Data2,
aes(x = Week,
stratum = Y,
alluvium = ID,
fill = Y,
label = Y))+
facet_grid(Group ~.)+
scale_fill_manual(values = c("red", "yellow", "green3"))+
geom_flow(stat = "alluvium", lode.guidance = "frontback", color = "darkgray")+
geom_stratum()+
geom_text(stat = "alluvium", aes(label = ID), lode.guidance = "frontback")
Created on 2019-11-22 by the reprex package (v0.3.0)
This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:
I have a ggplot problem. Here is the example data:
df <- data.frame(x = rep(1:5,5),
type2 = c(rep(letters[1:2],each = 10),rep("c",5)),
type1 = rep(LETTERS[1:5],each = 5),
value = unlist(lapply(-2:2,function(a){rnorm(5,mean = a, sd = 1)})))
library(ggplot2)
plotcolor <- c( "#99d8c9","#2ca25f","#cbc9e2","#9e9ac8","#e34a33")
p <- ggplot(df,aes(x,value,color = type1,fill = type1,shape = type2))+
geom_point(size = 5)+
theme_light()+
labs(title = "",
color = "Method",
fill = "Method",
shape = "")+
geom_hline(yintercept = 0)+
guides(colour = guide_legend(override.aes = list(shape = c(21,21,24,24,22),
linetype = c(rep("blank",5)),
fill = plotcolor,
color = plotcolor)))+
scale_shape(guide = FALSE)+
scale_colour_manual(values = plotcolor)
p
which gives
Now I want to split the legend into two columns, for space reasons. I tried
p + guides(color=guide_legend(ncol=2))
but it remove the override part of my legend, letting just points:
p + guides(color=guide_legend(ncol=2),
fill =guide_legend(ncol=2) ,
shape = guide_legend(ncol=2))
didn't work either. Does anyone have an idea on how to deal with this particular problem?
You can specify ncol within the existing guide_legend (do not use it multiple times):
guides(colour = guide_legend(override.aes = list(shape = c(24,24,22,22,21),
linetype = c(rep("blank",5)),
fill = plotcolor,
color = plotcolor),
ncol = 2))+