R ggplot2 show significance between subgroups - r

I am trying to show the significance levels within a group consisting of two factors, but I seem to always get the significance levels between groups which is not what I want.
df <- structure(list(Datum = structure(c(2L, 1L, 3L, 1L, 1L, 3L, 1L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 2L), .Label = c("2021-04-08",
"2021-05-17", "2021-07-07"), class = "factor"), Soll = c("1202",
"172", "119", "1192", "119", "1189", "1189", "552", "1189", "1192",
"2484", "119", "1189", "1189", "172", "552", "1192", "172", "1189",
"172"), Plot = c("6", "5", "3", "4", "6", "5", "4", "5", "7",
"8", "3", "6", "6", "1", "8", "3", "1", "3", "8", "4"), Entfernung = structure(c(2L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L), .Label = c("2", "5"), class = "factor"), Behandlung = structure(c(1L,
1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 1L), .Label = c("a", "b"), class = "factor"), DGUnkraut = c(3.5,
0, 2.8, 3, 0.3, 2, 1, 3, 0, 0.3, 10, 0, 1.7, 2.5, 0.2, 0.3, 9,
0.3, 2.5, 0.2)), class = "data.frame", row.names = c(NA, -20L
))
This what I have tried so far:
library(tidyverse)
library(ggsignif)
df %>% group_by (Datum, Entfernung)%>%
ggplot(., aes(Entfernung, DGUnkraut , color = Datum)) +
geom_boxplot()+
geom_signif(comparisons =list (c("2","5")),
map_signif_level = T)
So I would like to see the significant differences between "2" and "5" for each of the three dates, so for example that the significance level of the red boxplot with the date "2021-04-08" and Entfernung = "2" is compared to the one where Entfernung = "5".

Facets don't seem to work with {ggsignif}, but you could fake them, by looping over your dates, and then patching the plots together.
Below one way
library(ggsignif)
library(patchwork)
df %>%
split(., .$Datum) %>%
map(~{
ggplot(., aes(Entfernung, DGUnkraut , color = Datum)) +
geom_boxplot()+
geom_signif(comparisons =list(c("2","5")),
map_signif_level = T) +
scale_x_discrete(drop = FALSE)
}) %>%
wrap_plots() + plot_layout(guides = "collect")

Related

How can I increase room for ggplot y-axis?

My plot is cutting off a portion of the y-axis when viewed and when saved as a jpeg. I am sure this is an easy fix, but I can't seem to figure it out. Any advice? Thanks
treat_freqplot<-ggplot(Treat_occur, aes(Trial, freq, fill = Treatment)) + geom_bar(stat = 'identity', alpha = 1) +
#facet_grid(~Trial, scales = "free")+
scale_y_continuous(labels = scales::percent, breaks=seq(0,100,1/10), expand=c(0,0)) +
labs(x = 'Trial', y = "Proportional Deer Use") +
#scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
scale_fill_manual("Treatment",values=c("dark Green", "dark gray", "fire brick 4","dark blue"))+
theme(axis.text = element_text(size=30, colour = "black",vjust=0.3),axis.title=element_text(size=30,face="bold")) +
theme(legend.position = "right",legend.background = element_rect(color="black",linetype="solid", fill="gray100"),
legend.key.size = unit(1, 'cm'),legend.text = element_text(size=25), legend.title=element_text(size=30)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_rect(fill = NA, color = "black"))
treat_freqplot
#ggsave('C:\\Projects\\CaptiveStudy\\Analysis\\Output2\\treatfreq_plot.jpeg',
width = 20, height = 12, units = "in",treat_freqplot)
structure(list(Trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Control", "30%Shade",
"60%Shade", "90%Shade"), class = "factor"), Use = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), n = c(2L, 7L, 5L, 30L, 1L, 2L, 6L,
45L, 9L, 3L), freq = c(0.037037037037037, 0.12962962962963, 0.0925925925925926,
0.555555555555556, 0.0185185185185185, 0.037037037037037, 0.111111111111111,
0.833333333333333, 0.166666666666667, 0.0555555555555556)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
Trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L
), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Control",
"30%Shade", "60%Shade", "90%Shade"), class = "factor"), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))

Update row values based on condition in R

I am trying to update the values of Column C2 and C3 based on conditions:
• The variable C2 is equal to 1 if the type of cue = 2,
and 0 otherwise.
• The variable C3 is equal to 1 if the type of cue = 3,
and 0 otherwise.
Data frame Image: https://drive.google.com/file/d/1Enik09cXQ21d3cQQv0_YQDZGBb3Btm5n/view?usp=sharing
dput(Cognitive[1:6,]) =
structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 1L), Time = c(191L,
206L, 219L, 176L, 182L, 196L), W = c(0L, 0L, 0L, 1L, 1L, 1L),
Cue = c(1L, 2L, 3L, 1L, 2L, 3L), D = c(0L, 0L, 0L, 0L, 0L,
0L), Subject.f = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "24"), class = "factor"), Cue.f = structure(c(1L, 2L,
3L, 1L, 2L, 3L), .Label = c("1", "2", "3"), class = "factor"),
D.f = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0",
"1"), class = "factor"), C2 = structure(c(1L, 2L, 3L, 1L,
2L, 3L), .Label = c("1", "2", "3"), class = "factor"), C3 = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("1", "2", "3"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
Cognitive <- read.csv(file = 'Cognitive.csv')
View(Cognitive)
# Factor the variables Subject, Cue and D and add these variable to the Cognitive data frame.
Cognitive <- mutate(Cognitive, Subject.f = factor(Subject), Cue.f = factor(Cue), D.f = factor(D))
Cognitive <- mutate(Cognitive, C2 = Cue.f, C3 = Cue.f)
Thanks.
df %>%
mutate(C2 = case_when(cue == 2 ~ 1
TRUE ~ 0),
C3 = case_when(cue ==3 ~ 1,
TRUE ~ 0))
A super easy base solution
df <- data.frame(cue=sample(c(1:3),10,replace = T),c2=sample(c(0,1),10,replace = T),c3=sample(c(0,1),10,replace = T))
df$c2 <- ifelse(df$cue==2,1,0)
df$c3 <- ifelse(df$cue==3,1,0)
EDIT
to add another dplyr solution
df <- dplyr::mutate(df,c2= ifelse(cue==2,1,0),c3= ifelse(cue==3,1,0))
We can use sapply in base R
df[-1] <- +(sapply(c(2, 3), `==`, df$cue))

How to change from factored data to numeric?

I have this dataframe in factored form:
Data <- structure(list(ID = c("1", "2", "3", "4", "5",
"6"), V1 = structure(c(1L, 1L, 4L, 4L, 4L, 1L), .Label = c("1",
"129", "2", "3", "76"), class = "factor"), V2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "3"), class = "factor"),
V3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"),
V4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"3"), class = "factor"), V5 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), V6 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), V7 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), V8 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("0", "1", "3"), class = "factor"),
V9 = structure(c(2L, 2L, 3L, 2L, 2L, 2L
), .Label = c("0", "1", "3"), class = "factor"), V10 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("0", "1", "2", "3"), class = "factor"),
V11 = structure(c(2L, 2L, 2L, 2L,
2L, 2L), .Label = c("0", "1"), class = "factor"), V12 = structure(c(1L,
1L, 1L, 1L, 1L, 3L), .Label = c("1", "2", "3"), class = "factor"),
V13 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), V14 = structure(c(1L,
1L, 2L, 1L, 1L, 1L), .Label = c("1", "3"), class = "factor"),
V15 = structure(c(2L, 2L, 2L, 2L, 2L,
2L), .Label = c("0", "1", "3"), class = "factor"), V17 = structure(c(3L,
1L, 3L, 1L, 1L, 3L), .Label = c("1", "2", "3"), class = "factor"),
V18 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), V19 = structure(c(1L,
1L, 2L, 1L, 1L, 1L), .Label = c("1", "3"), class = "factor"),
V20 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"3"), class = "factor"), V21 = structure(c(1L, 3L,
1L, 1L, 3L, 1L), .Label = c("1", "2", "3"), class = "factor"),
V22 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), V23 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "3"), class = "factor"),
V24 = structure(c(1L, 1L, 1L, 1L, 1L, 1L
), .Label = "1", class = "factor"), V25 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
V26 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), V27 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1", class = "factor"), V28 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
V29 = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), V30 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
V31 = structure(c(2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("0", "1"), class = "factor"), V32 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
Totals = structure(c(1L, 1L, 4L, 1L, 2L, 2L), .Label = c("1",
"2", "3", "5"), class = "factor")), row.names = c(NA, 6L), class = "data.frame")
It is in factored form but I need to change it to numeric form without changing the original dataframe called Data. So, I tried this method:
Data2 <- lapply(Data[c(1:33)], numeric)
This gave me the "invalid length argument" error. So I tried this method after looking up the issue:
Data2 <- lapply(Data[c(1:33)], as.numeric)
Data2 <- as.data.frame(Data2)
I do indeed get a new dataframe, but the data doesn't match what I have in my script. Some numbers change by 1 value, for example. (Where there is a 3, it is a 4. Where there is a 4, there is a 5).
Any other methods to this issue?
EDIT: earlier in my script I convert from character to factor using this method:
Data <- lapply(Data[c(2:33)], factor)
Would it be easier to instead convert to numeric and wait until I am done with all of my analyses to convert to factor?
You need to convert to character first:
Data <- lapply(Data, function(x) as.numeric(as.character(x)))
Try this dplyr approach:
library(dplyr)
#Code
Data2 <- Data %>% mutate(across(2:33,~as.numeric(as.character(.))))
We can use type.convert from base R
Data <- type.convert(Data, as.is = TRUE)

glmmLasso Various errors stating that NA and missing values present when none in the dataset

It is my first time using glmmLasso so please excuse me if this is a simple mistake. I am trying to use the function to create a binomial regression with a number of numerical and categorical factors.
A number of posts about similar errors to those I am receiving have been resolved by re-stating the categorical variables levels, but that hasn't worked in my case. My code is as follows
clinical$subclinical = factor(clinical$subclinical)
clinical$Teat = factor(clinical$Teat)
clinical$drug = factor(clinical$drug)
clinical$serverity = factor(clinical$serverity)
clinical$Cow = factor(clinical$Cow)
clinical$tubes = factor(clinical$tubes)
clinical$farm = factor(clinical$farm)
clinical$Calfs = factor(clinical$Calfs)
clinical$treatement = factor(clinical$treatement)
clinical$subtreatment = factor(clinical$subtreatment)
library(glmmLasso)
glmmLasso(subclinical ~ as.factor(Teat) + as.factor(drug) + as.factor(serverity) + as.factor(tubes) + as.factor(farm) +
Dry_Off_Days + as.factor(Calfs) + as.factor(treatement) + as.factor(subtreatment) + age +
fat_1 + protein_1 + lactose_1 + scc_1 + yield_1 + time_1 +
max_flow_1 + conc_fed_1 + fat_2 + protein_2 + lactose_2 +
yield_2 + time_2 + max_flow_2 + conc_fed_2 + weight + BCS,
rnd = list(Cow = ~1),
data = clinical,
family = binomial(),
lambda = 100)
I receive the following errors
Error in if (group.sum[1] == 0 & sqrt(sum(score.beta[1:block[1]]^2)) > :
missing value where TRUE/FALSE needed
In addition: Warning message:
In Ops.factor(y, Mu) : ‘-’ not meaningful for factors
As a form of trouble shooting I have simplified the model significantly, removing most the variables to see if it was my categorical variables causing the issue,
glmmLasso(subclinical ~ Dry_Off_Days + age +
fat_1 + protein_1 + lactose_1 + scc_1 + conc_fed_2 + weight + BCS,
rnd = list(Cow = ~1),
data = clinical,
family = binomial(),
lambda = 100)
but I receive the error
Error in grad.lasso[b.is.0] <- score.beta[b.is.0] - lambda.b * sign(score.beta[b.is.0]) :
NAs are not allowed in subscripted assignments
In addition: Warning message:
In Ops.factor(y, Mu) : ‘-’ not meaningful for factors
Note that there are no NA values in the dataset.
Here is the dput() of a very small random subset of the dataset, that still causes the same issues that I am facing so you can see for yourself, and I'll also attach the full dataset incase that is of any help. Thanks.
structure(list(X = c(205L, 438L, 85L, 536L, 581L, 329L, 431L,
146L), subclinical = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L), .Label = c("Clinical", "Subclinical"), class = "factor"),
Teat = structure(c(1L, 1L, 2L, 5L, 3L, 4L, 5L, 3L), .Label = c("LF_",
"LH_", "RF_", "RF_LF_", "RH_"), class = "factor"), drug = structure(c(2L,
3L, 1L, 2L, 3L, 2L, 1L, 1L), .Label = c("SYNULOX", "TEREXINE",
"TERREXINE"), class = "factor"), serverity = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("2", "3"), class = "factor"),
tubes = structure(c(3L, 2L, 2L, 1L, 2L, 4L, 2L, 2L), .Label = c("2",
"3", "6", "7"), class = "factor"), Cow = structure(c(4L,
8L, 1L, 3L, 6L, 5L, 7L, 2L), .Label = c("IE151549212814",
"IE151549232824", "IE151549241281", "IE151549251167", "IE151549282077",
"IE151549297861", "IE151828760007", "IE151941680002"), class = "factor"),
farm = structure(c(1L, 3L, 2L, 1L, 2L, 1L, 1L, 2L), .Label = c("1",
"2", "4"), class = "factor"), Dry_Off_Days = c(3.3, 2.733333333,
1.433333333, 2.8, 4.1, 2.133333333, 3.566666667, 2), Calfs = structure(c(4L,
3L, 1L, 3L, 2L, 2L, 5L, 1L), .Label = c("2", "3", "4", "5",
"6"), class = "factor"), treatement = structure(c(1L, 1L,
3L, 1L, 4L, 2L, 1L, 1L), .Label = c("1", "10", "2", "3"), class = "factor"),
subtreatment = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L
), .Label = c("0", "2"), class = "factor"), age = c(7.709589041,
5.487671233, 3.18630137, 6.189041096, 4.391780822, 5.101369863,
7.802739726, 3.164383562), fat_1 = c(3.9275, 5.5275, 2.6625,
3.434, 4.5625, 3.305, 3.416666667, 2.846), protein_1 = c(3.4875,
3.8375, 3.36, 3.808, 3.7075, 3.505, 3.603333333, 3.298),
lactose_1 = c(4.4725, 4.605, 4.4675, 4.332, 4.8275, 4.67,
4.473333333, 4.77), scc_1 = c(2.477, 0.02675, 0.223, 0.58375,
0.765, 0.021, 1.681333333, 0.072), yield_1 = c(-1.5413041,
7.8806937, 0.8921023, 5.1831993, -21.2273281, -12.5422328,
-10.4103572, 1.3659622), time_1 = c(5.3, 7.266666667, 7.2,
6.666666667, 6.183333333, 5.566666667, 7.8, 9.216666667),
max_flow_1 = c(3.294925926, 0.342321429, 3.821866667, 2.296148148,
4.25825, 3.912058824, 0.8695, 3.582333333), conc_fed_1 = c(1.2,
2.7, 1.4, 1.9, 0.2, 0.1, 1.2, 1.4), fat_2 = c(4.566666667,
5.29, 3.6275, 4.7875, 6.186666667, 4.2275, 6.2, 3.8925),
protein_2 = c(3.77, 3.876666667, 3.4525, 3.7925, 3.943333333,
3.7825, 3.93, 3.505), lactose_2 = c(4.536666667, 4.563333333,
4.5325, 4.38, 4.793333333, 4.285, 4.355, 4.7375), yield_2 = c(1.9431404,
-1.4835626, -0.8854283, 4.0142704, -10.7772177, -6.3873432,
-5.9222661, 4.7830923), time_2 = c(4.3, 5.1, 6.7, 4.433333333,
5.55, 3.983333333, 4.55, 6.65), max_flow_2 = c(2.810375,
0.29775, 2.6153, 1.852137931, 2.99804, 2.8799375, 0.834,
2.840066667), conc_fed_2 = c(1.2, 2.7, 1.4, 1.9, 0.2, 2.7,
1.2, 1.4), weight = c(6.605, 6.16, 5.13, 7.053333333, 5.22,
4.813333333, 5.84, 4.596666667), BCS = c(2.125, 3, 2.75,
3.5, 3, 2, 3.25, 3)), row.names = c(205L, 436L, 85L, 534L,
579L, 328L, 429L, 146L), class = "data.frame")

Visualization of Groups of Poisson random samples using ggridges

I have two sets of data, all in one data frame. The first set is related to data collected in Location 1 and the second set is collected in Location 2. Each location has different count data (column value) for 5 months.
# DataSet
-----------------
rp_data <- structure(list(Month = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), location = c("1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2"), value = c(0L, 1L, 1L, 1L,
2L, 1L, 0L, 0L, 1L, 1L, 3L, 2L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 0L, 2L, 4L, 3L, 5L, 5L, 0L, 4L, 3L, 3L, 4L, 2L, 5L,
2L, 3L, 10L, 6L, 5L, 6L, 4L, 6L, 4L, 5L, 6L, 5L, 3L, 7L, 1L,
1L, 1L, 1L, 0L, 0L, 2L, 1L, 2L, 0L, 2L, 3L, 4L, 1L, 2L, 1L, 2L,
0L, 2L, 2L, 4L, 4L, 5L, 1L, 4L, 5L, 4L, 5L, 1L, 4L, 3L, 7L, 7L,
4L, 2L, 5L, 4L, 1L, 5L, 3L, 7L, 3L, 4L, 8L, 5L, 7L, 1L, 1L, 6L,
3L)), .Names = c("Month", "location", "value"), row.names = c(NA,
-100L), class = "data.frame")
I used this example below, as illustrated on the ggridges examples webpage, to display the various count values across different months.
# Plot 1 , filtering data related to location = 1
#---------------
ggplot(rp_data[rp_data$location == '1',], aes(x = value, y = Month, group = Month)) +
geom_density_ridges2(aes(fill = Month), stat = "binline", binwidth = 1, scale = 0.95) +
geom_text(stat = "bin",
aes(y = group + 0.95*(..count../max(..count..)),
label = ifelse(..count..>0, ..count.., "")),
vjust = 1.4, size = 3, color = "white", binwidth = 1) +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#7070D0")) +
labs(title = "Poisson random samples location 1 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE) +
theme(axis.title.x = element_text(hjust = 0.5),
axis.title.y = element_text(hjust = 0.5))
# Plot 2 , filtering data related to location = 2
#---------------
ggplot(rp_data[rp_data$location == '2',], aes(x = value, y = Month, group = Month)) +
geom_density_ridges2(aes(fill = Month), stat = "binline", binwidth = 1, scale = 0.95) +
geom_text(stat = "bin",
aes(y = group + 0.95*(..count../max(..count..)),
label = ifelse(..count..>0, ..count.., "")),
vjust = 1.4, size = 3, color = "white", binwidth = 1) +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#7070D0")) +
labs(title = "Poisson random samples location 2 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE) +
theme(axis.title.x = element_text(hjust = 0.5),
axis.title.y = element_text(hjust = 0.5))
Result for plot 1:
My question is how can I combine these two plots, sort of like an overlay plot as shown in this example:
I don't want to plot them in two separate plots.
You need to create a grouping variable that contains both Month and location. You can do that by using paste0(Month, location). For now, I'm leaving out the text labels, though they may be possible with a little more thought as well. (But I think they'd make the figure too busy.)
ggplot(rp_data,
aes(x = value, y = Month,
group = paste0(Month, location),
fill = paste0(Month, location))) +
geom_density_ridges2(stat = "binline", binwidth = 1,
scale = 0.95, alpha = 0.7) +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13),
expand = c(0, 0), name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#B00000",
"#7070D0", "#FC5E5E")) +
labs(title = "Poisson random samples location 1 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE, center = TRUE)
Edit: Now with text labels.
ggplot(rp_data, aes(x = value, y = Month, group = paste0(Month, location), fill = paste0(Month, location))) +
geom_density_ridges2(stat = "binline", binwidth = 1, scale = 0.95, alpha = 0.7) +
geom_text(stat = "bin",
aes(y = ceiling(group/2) + 0.95*(..count../max(..count..)),
label = ifelse(..count..>0, ..count.., ""), color = location),
vjust = 1.4, size = 3, binwidth = 1, fontface = "bold") +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#B00000", "#7070D0", "#FC5E5E")) +
scale_color_cyclical(values = c("white", "black")) +
labs(title = "Poisson random samples location 1 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE, center = TRUE)
Again, not sure it's a good idea, but there you go.

Resources