ggplot2 - factor colour and legend adjustment - r

I have written a short script to plot trends of measured radioactivity activities on 2 separate measurement devices. The script is shown below
pkgLoad <- function(x)
{
if (!require(x,character.only = TRUE))
{
install.packages(x,dep=TRUE, repos='http://star-www.st-andrews.ac.uk/cran/')
if(!require(x,character.only = TRUE)) stop("Package not found")
}
}
pkgLoad("ggplot2")
pkgLoad("XLConnect")
pkgLoad("reshape2")
#Load the workbook
wb<-loadWorkbook("CapintecQC.xlsx")
df_blue <-readWorksheet(wb, sheet = "Blue", startCol=1, endCol=6)
#sort date format
df_blue$Date <- as.Date(df_blue$Date , "%d/%m/%y")
df_blue[order(df_blue$Date ),]
df_gold <-readWorksheet(wb, sheet = "Gold", startCol=1, endCol=6)
df_gold$Date <- as.Date(df_gold$Date , "%d/%m/%y")
df_gold[order(df_gold$Date ),]
#Reference Cs-137 details
ref_activity <- 9.3
half_life <- 30.23
ref_date <- as.Date('06/01/08',format='%d/%m/%y')
blue_melt <- melt(df_blue[,c(1,2:6)], id="Date", value.name="Activity", variable.name="Isotope")
#Add new column to data frame with expected activity
df_gold["Exp_Act"] <- round(ref_activity*exp((-0.693/half_life)*as.numeric(difftime(df_gold$Date,ref_date))/365.25),3)
df_gold["Exp_Act_0.95"] <- 0.95 * df_gold$Exp_Act
df_gold["Exp_Act_1.05"] <- 1.05 * df_gold$Exp_Act
gold_melt <- melt(df_gold[,c(1,2:6)], id="Date", value.name="Activity", variable.name="Isotope")
p <- ggplot( NULL )+geom_point(data = gold_melt, aes(x=Date,y=Activity, col=Isotope)) + geom_ribbon(data = df_gold, aes(x = Date, ymin = Exp_Act_0.95, ymax = Exp_Act_1.05), fill='blue', alpha=0.2) + geom_point(data = blue_melt, aes(x=Date,y=Activity, col=Isotope), shape=2) + theme_bw()
print(p)
I am not very competent with R/ggplot2. I would like the final plot to show the measured activity for each radionuclide to be the same color for both devices (i.e Cs-137 in red, 99mTc in Blue). How can I do this as my graph plots different colours.
Also the legend is non-pleasing.
(i) The format for each nuclide, which is picked up from the excel header changes from Cs-137 to Cs.137. How can I have Cs-137, Tc-99m etc as headers?
(ii) Each radionuclide is duplicated in the legend - one for each device. Is it possible to show just the legend for the first data frame (df_gold) or better just have text in the legend, with the text color matched to the marker color in the plot?)
df_gold structure
structure(list(Date = structure(c(15708, 15709, 15712, 15713,
15714, 15715, 15716, 15719, 15720, 15721, 15722, 15723, 15726,
15727, 15729, 15730, 15733, 15734, 15735, 15736, 15740, 15741,
15743, 15747, 15748, 15749, 15750, 15751, 15754, 15755, 15756,
15757, 15758, 15761, 15762, 15764, 15765, 15768, 15769, 15770,
15771, 15772, 15775, 15776, 15777, 15779, 15782, 15783, 15784,
15785, 15786, 15789, 15790, 15791, 15792, 15797, 15798, 15799,
15800), class = "Date"), Cs..137 = c(8.2, 8.1, 8.1, 8.1, 8.1,
8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1,
8.1, 8.1, 8.1, 8.2, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8, 8.2,
8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1,
8.1, 8.1, 8.1, 8, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1, 8.1,
8.1, 8.1), In..111 = c(6.49, 6.47, 6.48, 6.43, 6.49, 6.51, 6.5,
6.47, 6.48, 6.4, 6.48, 6.48, 6.48, 6.49, 6.49, 6.47, 6.48, 6.48,
6.5, 6.47, 6.49, 6.55, 6.46, 6.49, 6.48, 6.48, 6.46, 6.48, 6.49,
6.44, 6.49, 6.46, 6.45, 6.46, 6.46, 6.43, 6.49, 6.47, 6.45, 6.43,
6.44, 6.44, 6.44, 6.46, 6.45, 6.47, 6.45, 6.43, 6.44, 6.47, 6.45,
6.46, 6.45, 6.46, 6.39, 6.46, 6.44, 6.42, 6.41), I..123 = c(6.97,
6.94, 6.96, 6.91, 6.92, 6.95, 6.93, 6.92, 6.93, 7, 6.97, 6.96,
6.96, 6.94, 6.98, 6.97, 6.95, 6.95, 6.94, 6.96, 6.97, 7.01, 6.92,
7, 6.98, 6.97, 6.91, 6.99, 6.95, 6.88, 6.96, 6.91, 6.91, 6.93,
6.94, 6.94, 6.97, 6.93, 6.93, 6.93, 6.96, 6.94, 6.94, 6.92, 6.93,
6.91, 6.93, 6.92, 6.92, 6.91, 6.91, 6.89, 6.92, 6.9, 6.9, 6.91,
6.91, 6.9, 6.9), I..131 = c(10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.8, 10.5, 10.6, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.6, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.4,
10.5, 10.4, 10.5, 10.5, 10.5, 10.4, 10.5, 10.4, 10.4, 10.5, 10.4,
10.4, 10.4, 10.4, 10.4, 10.3, 10.5, 10.5, 10.5, 10.6), Tc..99m = c(15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15.1, 15, 15, 15.1, 15,
15, 15, 15, 15.1, 15, 15.1, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 14.9, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
14.9, 14.8, 14.9, 14.9, 14.9, 14.9, 15, 15, 14.8, 15, 15, 15,
15), Exp_Act = c(8.294, 8.293, 8.292, 8.291, 8.291, 8.29, 8.29,
8.288, 8.288, 8.287, 8.287, 8.286, 8.285, 8.284, 8.283, 8.283,
8.281, 8.28, 8.28, 8.279, 8.277, 8.277, 8.276, 8.274, 8.273,
8.273, 8.272, 8.272, 8.27, 8.27, 8.269, 8.269, 8.268, 8.266,
8.266, 8.265, 8.264, 8.263, 8.262, 8.262, 8.261, 8.261, 8.259,
8.259, 8.258, 8.257, 8.256, 8.255, 8.255, 8.254, 8.254, 8.252,
8.251, 8.251, 8.25, 8.248, 8.247, 8.247, 8.246), Exp_Act_0.95 = c(7.8793,
7.87835, 7.8774, 7.87645, 7.87645, 7.8755, 7.8755, 7.8736, 7.8736,
7.87265, 7.87265, 7.8717, 7.87075, 7.8698, 7.86885, 7.86885,
7.86695, 7.866, 7.866, 7.86505, 7.86315, 7.86315, 7.8622, 7.8603,
7.85935, 7.85935, 7.8584, 7.8584, 7.8565, 7.8565, 7.85555, 7.85555,
7.8546, 7.8527, 7.8527, 7.85175, 7.8508, 7.84985, 7.8489, 7.8489,
7.84795, 7.84795, 7.84605, 7.84605, 7.8451, 7.84415, 7.8432,
7.84225, 7.84225, 7.8413, 7.8413, 7.8394, 7.83845, 7.83845, 7.8375,
7.8356, 7.83465, 7.83465, 7.8337), Exp_Act_1.05 = c(8.7087, 8.70765,
8.7066, 8.70555, 8.70555, 8.7045, 8.7045, 8.7024, 8.7024, 8.70135,
8.70135, 8.7003, 8.69925, 8.6982, 8.69715, 8.69715, 8.69505,
8.694, 8.694, 8.69295, 8.69085, 8.69085, 8.6898, 8.6877, 8.68665,
8.68665, 8.6856, 8.6856, 8.6835, 8.6835, 8.68245, 8.68245, 8.6814,
8.6793, 8.6793, 8.67825, 8.6772, 8.67615, 8.6751, 8.6751, 8.67405,
8.67405, 8.67195, 8.67195, 8.6709, 8.66985, 8.6688, 8.66775,
8.66775, 8.6667, 8.6667, 8.6646, 8.66355, 8.66355, 8.6625, 8.6604,
8.65935, 8.65935, 8.6583)), row.names = c(NA, -59L), .Names = c("Date",
"Cs..137", "In..111", "I..123", "I..131", "Tc..99m", "Exp_Act",
"Exp_Act_0.95", "Exp_Act_1.05"), class = "data.frame")
df_blue structure
structure(list(Date = structure(c(15790, 15791, 15792, 15797,
15798, 15799, 15800), class = "Date"), Cs.137 = c(8.1, 8.2, 8.2,
8.2, 8.2, 8.2, 8.2), I.123 = c(6.82, 6.85, 6.91, 6.84, 6.82,
6.82, 6.83), I.131 = c(10.5, 10.6, 10.6, 10.5, 10.6, 10.6, 10.6
), In.111 = c(6.35, 6.45, 6.43, 6.37, 6.38, 6.4, 6.37), X99m.Tc = c(15,
15, 15.1, 15.1, 15.1, 15.1, 15.1)), .Names = c("Date", "Cs.137",
"I.123", "I.131", "In.111", "X99m.Tc"), row.names = c(NA, -7L
), class = "data.frame")

My approach would be to bind together both data frames and then add new column that contains name of device (gold or blue).
df<-rbind(gold_melt,blue_melt)
df$device<-rep(c("gold","blue"),c(nrow(gold_melt),nrow(blue_melt)))
With function recode() from library car change names of Isotope as they should be.
df$Isotope<-recode(df$Isotope,"c('Cs..137','Cs.137')='Cs-137';
c('I..123','I.123')='I-123';
c('I..131','I.131')='I-131';
c('In..111','In.111')='In-111'
;c('Tc..99m','X99m.Tc')='Tc-99m'")
Now you just need one call to geom_point() using new data frame. I added also shape=device to get different shapes for each device.
ggplot(NULL) +
geom_point(data=df,aes(x=Date,y=Activity, col=Isotope,shape=device))+
geom_ribbon(data = df_gold, aes(x = Date, ymin = Exp_Act_0.95, ymax = Exp_Act_1.05), fill='blue', alpha=0.2)

Just in case you'd want to "fuse" the legend together, then building up on Didzis' answer:
df <- transform(df, device = factor(device, levels=unique(device)),
grp = paste(Isotope, device, sep="_"))
require(RColorBrewer)
ggplot() + geom_point(data = df, aes(x = Date, y = Activity,
colour=grp, shape = grp, fill=grp)) +
geom_ribbon(data = df_gold, aes(x = Date, ymin = Exp_Act_0.95,
ymax = Exp_Act_1.05), fill='blue', alpha=0.2) +
scale_shape_manual("", values=rep(c(21,24), 5)) +
scale_fill_manual("", values=rep(brewer.pal(5, "Set1"), each=2)) +
scale_colour_manual("", values=rep(brewer.pal(5, "Set1"), each=2))

Related

Using lmer for multiple dependent variables

Here is an example of my dataset:
df <- data.frame(
id = c(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 62, 63, 64, 65, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 62, 63, 64, 65, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 62, 63, 64, 65),
collection_point = c(rep(c("Baseline", "Immediate", "3M"), each=28)),
intervention = c(rep(c("B", "A", "C", "B", "C", "A", "A", "B", "A", "C", "B", "C",
"A", "A", "B", "A", "C", "B", "C", "A", "A"), each = 4)),
scale_A = c(6.5, 7.0, 6.25, 6.0, NA, 7.5, 7.5,
8.0, 7.5, 6.75, 7.5, 6.75, 6.75, 6.5,
5.75, 6.75, 7.75, 7.5, 7.75, 7.25, 7.75,
7.25, 7.25, 5.75, 6.75, NA, 6.75, 7.5,
6.75, 7.0, 6.5, 7.0, 7.5, 7.5, 7.5,
7.75, 7.25, 7.25, 7.25, 7.5, 6.5, 6.25,
6.25, 7.25, 7.5, 6.75, 7.25, 7.25, 7.5,
7.25, 7.5, 7.25, NA, 7.0, 7.5, 7.5,
6.75, 7.25, 6.5, 7.0, 7.5, 7.5, 7.5,
7.75, 7.5, 7.5, 7.5, 7.5, 6.5, 5.75,
6.25, 6.75, 7.5, 7.25, 7.25, 7.5, 7.75,
7.75, 7.75, 7.5, NA, NA, NA, NA))
scale_B = c(5.0, 6.5, 6.25, 7.0, NA, 5.5, 6.5,
6.0, 7.5, 5.75, 6.5, 5.75, 7.75, 6.5,
6.75, 7.75, 7.75, 7.5, 7.75, 5.25, 7.75,
6.25, 6.25, 6.75, 5.75, NA, 6.75, 6.5,
7.75, 6.0, 7.5, 6.0, 7.5, 7.5, 6.5,
6.75, 6.25, 6.25, 6.25, 6.5, 6.5, 7.25,
7.25, 6.25, 6.5, 7.75, 6.25, 7.25, 6.5,
6.25, 6.5, 6.25, NA, 7.0, 6.5, 7.5,
7.75, 6.25, 7.5, 6.0, 7.5, 6.5, 6.5,
6.75, 6.5, 6.5, 6.5, 7.5, 7.5, 6.75,
7.25, 7.75, 6.5, 6.25, 7.25, 6.5, 6.75,
6.75, 6.75, 6.5, 5.5, NA, NA, 6.5))
scale_C = c(5.5, 5.0, 7.25, 7.0, 8.0, 5.5, 5.5,
8.0, 5.5, 7.75, 5.5, 7.75, 7.75, 7.5,
7.75, 7.75, 5.75, 5.5, 5.75, 5.25, 5.75,
5.25, 6.25, 7.75, 7.75, NA, 7.75, 5.5,
6.75, 6.0, 7.5, 5.0, 5.5, 5.5, 7.5,
5.75, 6.25, 5.25, 5.25, 5.5, 7.5, 7.25,
7.25, 6.25, 5.5, 7.75, 5.25, 5.25, 7.5,
5.25, 6.5, 5.25, 5.0, 5.0, 5.5, 5.5,
7.75, 6.25, 7.5, 5.0, 5.5, 5.5, 7.5,
5.75, 6.5, 5.5, 5.5, 5.5, 7.5, 7.75,
7.25, 7.75, 5.5, 5.25, 5.25, 5.5, 6.75,
5.75, 5.75, 5.5, 6.75, NA, 5.75, NA))
where,
id = participant
collection_point = times data was collected from participant (repeated measure)
intervention = group each participant was randomized to (fixed effect)
scale_A = questionnaire score that each participant completed at each data collection point (outcome)
Participants were randomized to one of three interventions and completed the same scales (scales A-C) at three different time points to determine any improvements over time.
I have used the code
mixed.lmer.A1<-lmer(scale_A~intervention+(collection_point|id), control =
lmerControl(check.nobs.vs.nRE = "ignore"), data = df)
but I would like to run MANOVA as all scales measure different aspects of a cohesive theme. However, I can't run
mixed.lmer.comb<-lmer(cbind(scale_A, scale_B, scale_C)~intervention+
(collection_point|id), control = lmerControl(check.nobs.vs.nRE = "ignore"),
data = df)
like I originally thought. It does work if I run using lm but that wouldn't be super meaningful as I need to account for collection_point as a repeated measure.
Is there a way I can run multiple dependent variables using lmer?
You can do this by converting the data to long format: there are a lot of ways to do this, e.g. reshape in base R or reshape2::melt, but I find tidyr::pivot_longer the easiest:
df_long <- tidyr::pivot_longer(df, cols = starts_with("scale_"),
names_to = "scales",
values_to = "value")
The fixed effects are 0 + scales + scales:intervention: we don't want an overall intercept, we want a scale-specific intercept, plus intervention effects for each scale.
The random effects are collection_point|scales/id: this allows the effect of collection point to vary across scales and across id (as in the original model).
mm <- lmer(value ~ 0 + scales + scales:intervention + (collection_point|scales/id),
data = df_long,
control = lmerControl(check.nobs.vs.nRE = "ignore"))
This model is technically correct, but gives a singular fit (as is not surprising since we are trying to estimate a variance across only three levels of scales); see ?isSingular, or the GLMM FAQ, for advice about to handle this.
This is not the only model we could set up; a maximal model would include more terms.
Some further comments:
One principle is that, since the elements of multivariate data generally have different units, we should not have any terms in the model that apply across scales (such as an overall intercept, or an overall effect of intervention); this might not apply in your particular case, I don't know
it is unusual (and often, although not always, wrong) to have a term varying in a random effect (collection_point in this case) that does not have a corresponding fixed effect — by doing so you are assuming that the average (population-level) effect of collection point is exactly zero, which is surprising unless (1) there's something special about the experimental design (e.g. observations were somehow randomized across collection points), (2) you have pre-processed the data in some way (e.g. explicitly transformed the data to have zero variance across collection points), (3) are setting up a null model for comparison.
I'm a little concerned about your need to override the check that you have fewer random effects estimated than observations; I haven't looked into this in detail, but that usually means your model is overfitting in some way. (Maybe it's because we're looking at a subset of the data, and this doesn't come up in the full data set?)
More here.

GGplot + Shiny changing a line color based off slope of line

I am trying to have multiple lines each of which changes between red and green depending on its slope. So if one changes from positive to negative it will change from green to red at that point. I have tried splitting the data so that red lines are formed between negative points and vice versa for green, but I encounter a problem. When a line goes from positive to negative back to positive; it is red. This is because red needs to know all points in that sequence to build the red lines but I want to avoid it connecting the positive slope line with red. An example of this is the bottom line in the below graph at Mar 15 I don't know if it makes a difference but I am using shiny whit ggplot.
output$pen_performance_graphs <- renderPlotly({
#subsets data to date range selected
#five_year_disp <-subset(five_year_weekly, as.Date(Date)>=input$pen_dates[1]&date<=input$pen_dates[2])
five_year_disp <- five_year_weekly[five_year_weekly$Date >= input$pen_dates[1],]
five_year_disp <- five_year_disp[five_year_disp$Date <= input$pen_dates[2],]
ggp <- ggplot(five_year_disp, aes(x = as.Date(Date), y = Yeild,
label = Date,
label2 = Animal_ID,
label3 = Precent_Change,
label4 = Yeild,
label5 = Treatment,
group = Animal_ID,xmin = as.Date(input$pen_dates[1], "%Y-%m-%d"),
xmax = as.Date(input$pen_dates[2], "%Y-%m-%d"),)) +
geom_line() +
#GREEN LINES
geom_line(data=five_year_disp, aes(x=as.Date(Date), y=posY, col="green")) +
#RED LINES
geom_line(data=five_year_disp, aes(x=as.Date(Date), y=negY, col="red")) +
scale_color_identity() +
geom_point() +
labs(x = "Date", y = "Milk Yeild (LBS)")
#ggp <- ggp + scale_x_date(limits = as.Date(c(input$pen_dates[1], input$pen_dates[2])), date_breaks = "month")
p <- ggplotly(ggp, tooltip = c("label", "label2", "label3", "label4", "label5"))
p
})
dput(five_year_disp)
structure(list(Animal_ID = c(578L, 578L, 578L, 578L, 578L, 578L,
578L, 578L, 578L, 578L, 578L, 578L, 578L, 579L, 579L, 579L, 579L,
579L, 579L, 579L, 579L, 579L, 579L, 579L, 579L, 579L, 618L, 618L,
618L, 618L, 618L, 618L, 618L, 618L, 618L, 618L, 618L, 618L, 618L,
5082L, 5082L, 5082L, 5082L, 5082L, 5082L, 5082L, 5082L, 5082L,
5082L, 5082L, 5082L, 5082L, 5451L, 5451L, 5451L, 5451L, 5451L,
5451L, 5451L, 5451L, 5451L, 5451L, 5451L, 5451L, 5451L, 5570L,
5570L, 5570L, 5570L, 5570L, 5570L, 5570L, 5570L, 5570L, 5570L,
5570L, 5570L, 5570L, 5836L, 5836L, 5836L, 5836L, 5836L, 5836L,
5836L, 5836L, 5836L, 5836L, 5836L, 5836L, 5836L, 5842L, 5842L,
5842L, 5842L, 5842L, 5842L, 5842L, 5842L, 5842L, 5842L, 5842L,
5842L, 5842L, 5868L, 5868L, 5868L, 5868L, 5868L, 5868L, 5868L,
5868L, 5868L, 5868L, 5868L, 5868L, 5868L, 5883L, 5883L, 5883L,
5883L, 5883L, 5883L, 5883L, 5883L, 5883L, 5883L, 5883L, 5883L,
5883L), Date = c("2021/02/08", "2021/02/13", "2021/02/20", "2021/02/27",
"2021/03/01", "2021/03/08", "2021/03/13", "2021/03/20", "2021/03/27",
"2021/04/01", "2021/04/08", "2021/04/13", "2021/04/20", "2021/02/08",
"2021/02/13", "2021/02/20", "2021/02/27", "2021/03/01", "2021/03/08",
"2021/03/13", "2021/03/20", "2021/03/27", "2021/04/01", "2021/04/08",
"2021/04/13", "2021/04/20", "2021/02/08", "2021/02/13", "2021/02/20",
"2021/02/27", "2021/03/01", "2021/03/08", "2021/03/13", "2021/03/20",
"2021/03/27", "2021/04/01", "2021/04/08", "2021/04/13", "2021/04/20",
"2021/02/08", "2021/02/13", "2021/02/20", "2021/02/27", "2021/03/01",
"2021/03/08", "2021/03/13", "2021/03/20", "2021/03/27", "2021/04/01",
"2021/04/08", "2021/04/13", "2021/04/20", "2021/02/08", "2021/02/13",
"2021/02/20", "2021/02/27", "2021/03/01", "2021/03/08", "2021/03/13",
"2021/03/20", "2021/03/27", "2021/04/01", "2021/04/08", "2021/04/13",
"2021/04/20", "2021/02/08", "2021/02/13", "2021/02/20", "2021/02/27",
"2021/03/01", "2021/03/08", "2021/03/13", "2021/03/20", "2021/03/27",
"2021/04/01", "2021/04/08", "2021/04/13", "2021/04/20", "2021/02/08",
"2021/02/13", "2021/02/20", "2021/02/27", "2021/03/01", "2021/03/08",
"2021/03/13", "2021/03/20", "2021/03/27", "2021/04/01", "2021/04/08",
"2021/04/13", "2021/04/20", "2021/02/08", "2021/02/13", "2021/02/20",
"2021/02/27", "2021/03/01", "2021/03/08", "2021/03/13", "2021/03/20",
"2021/03/27", "2021/04/01", "2021/04/08", "2021/04/13", "2021/04/20",
"2021/02/08", "2021/02/13", "2021/02/20", "2021/02/27", "2021/03/01",
"2021/03/08", "2021/03/13", "2021/03/20", "2021/03/27", "2021/04/01",
"2021/04/08", "2021/04/13", "2021/04/20", "2021/02/08", "2021/02/13",
"2021/02/20", "2021/02/27", "2021/03/01", "2021/03/08", "2021/03/13",
"2021/03/20", "2021/03/27", "2021/04/01", "2021/04/08", "2021/04/13",
"2021/04/20"), Yeild_gr = c(50670, 46065, 40101, 32613, 37695,
37036, 30634, 33787, 31460, 30826, 26050, 27395, 28957, 38375,
40061, 34028, 25966, 28609, 33850, 27921, 31511, 30946, 28963,
26031, 27421, 27754, 49306, 51508, 44800, 37900, 41330, 44519,
38556, 41680, 43477, 40167, 32131, 39124, 40348, 50872, 49346,
42075, 45195, 44415, 46700, 44372, 44211, 39444, 37177, 39067,
36270, 37110, 52265, 45753, 42932, 40839, 39943, 43049, 42969,
44352, 43141, 35598, 30473, 29328, 29415, 42137, 39911, 33229,
28658, 29314, 42391, 31544, 35591, 32331, 31842, 21259, 27222,
21272, 33986, 36264, 28851, 34661, 32127, 32005, 27759, 29025,
27663, 26708, 26692, 26025, 24856, 32255, 30794, 29464, 32200,
32139, 26482, 24468, 26563, 25800, 24214, 21352, 23367, 20130,
38088, 39296, 35180, 35936, 39282, 37509, 38335, 33096, 38771,
36884, 34456, 29630, 34145, 44728, 51297, 39168, 45407, 50389,
45554, 47758, 48574, 47675, 34608, 43766, 37066, 40068), Conductivity = c(9.8,
9.6, 10.4, 10.8, 10.4, 10, 10.4, 10.2, 10.2, 10.6, 10.1, 10.5,
10, 9.9, 9.6, 11.1, 9.9, 9.9, 10.1, 10.5, 9.8, 10.3, 11.6, 10.8,
12.1, 10.7, 8.6, 8.8, 8.8, 9.1, 10.2, 8.9, 9, 8.8, 8.8, 9.8,
8.4, 10.1, 8.8, 10, 9.7, 9.7, 10.1, 10.1, 9.9, 9.7, 10, 10.2,
9.8, 9.8, 10.2, 9.9, 8.5, 8.9, 8.9, 9, 8.8, 8.7, 8.9, 8.8, 8.6,
8.5, 8.8, 8.6, 8.8, 10.1, 10.7, 10.8, 11.4, 10.5, 10.1, 10.7,
10.3, 10.2, 10.2, 10.5, 11.1, 11.4, 8.4, 9, 9, 8.6, 9.2, 8.6,
9, 9.2, 8.7, 9.6, 9.4, 9.3, 9.3, 9.9, 8.4, 10.5, 11.2, 9.7, 8.5,
9.6, 10.3, 8.9, 10.3, 10.6, 10.5, 9.9, 8.7, 8.7, 9.1, 9.4, 9.1,
8.6, 8.9, 8.9, 9.1, 9.2, 9.3, 9.6, 9.1, 10.3, 9.6, 10.3, 10.6,
10.5, 10.4, 10.8, 9.9, 9.3, 10, 10, 10.6, 10.3), FatPct = c(4.7,
4.4, 5.1, 4.5, 5.1, 4.6, 5, 5, 5.2, 4.6, 4.8, 4.8, 4.9, 4.2,
4.4, 4.5, 4.4, 4.7, 4.8, 4.8, 4.6, 3.9, 5.3, 5.2, 5.1, 4.3, 4.1,
3.9, 3.8, 4.2, 4, 4.5, 4.2, 4, 4.6, 4.2, 4.8, 4.1, 4.2, 4.2,
3.6, 4.2, 4, 3.6, 3.6, 3.4, 3.5, 3.9, 3.9, 4.2, 4.4, 4.1, 3.7,
4.7, 5.2, 4.6, 4.3, 5.1, 3.8, 4, 4.2, 6, 5.4, 5.4, 5.4, 3.2,
4.8, 3.5, 3.7, 5.1, 3.9, 4.6, 3.5, 3.7, 4, 3.9, 4.1, 3.7, 3.9,
3.9, 4.3, 4.1, 3.9, 4.9, 4.1, 4.1, 3.9, 4.1, 4.2, 4.7, 3.8, 3.6,
4.3, 4.1, 3.7, 3.4, 3.9, 3.4, 4.6, 3.6, 3.5, 4, 3.5, 4.2, 5.2,
5.7, 5.8, 4.9, 5.5, 5.5, 4.5, 5.4, 6, 4.5, 5.6, 6.5, 4.5, 4.2,
4, 3.8, 4.7, 4.3, 4, 4.9, 3.6, 4, 4.3, 4.2, 4.3, 3.9), ProPct = c(3,
3.1, 3.8, 4, 3.2, 3.9, 2.8, 3.2, 3.3, 3.1, 4, 3, 3.4, 4.3, 4.1,
3.7, 4.4, 3.5, 2.7, 3.2, 3.9, 3.8, 2.9, 2.9, 3, 3.2, 3.5, 3.7,
3.2, 3.6, 3.6, 3.1, 3.9, 3.4, 2.8, 3.9, 4.2, 4, 4.2, 3.4, 4.7,
2.8, 3.8, 3.5, 4.4, 4.2, 4.1, 3.5, 4.3, 3.9, 3.6, 3.7, 4.2, 4.6,
3.6, 3.1, 3.7, 3.4, 4.1, 4.4, 4.1, 3.6, 3.4, 3.9, 3.4, 4.3, 4,
4.7, 4.5, 4, 3.9, 4.1, 4.1, 4.5, 3.7, 4.4, 4, 4.3, 3.7, 3.8,
3.2, 2.9, 3.8, 3.5, 3.6, 4.2, 3.7, 3.8, 3, 3.3, 3.8, 4.1, 3.1,
3.9, 4.3, 4.1, 3.6, 4.2, 3.4, 3.1, 4, 4.1, 3.4, 3.3, 3.7, 4.9,
4.4, 3.7, 4.3, 2.9, 3.3, 4, 2.7, 4.3, 4.4, 4, 4.3, 3.6, 3.3,
3.7, 2.7, 3.3, 2.5, 3.1, 3.1, 3.2, 4.3, 3.5, 2.6, 3), Yeild = c(111.71,
101.56, 88.41, 71.9, 83.1, 81.65, 67.54, 74.49, 69.36, 67.96,
57.43, 60.4, 63.84, 84.6, 88.32, 75.02, 57.25, 63.07, 74.63,
61.56, 69.47, 68.22, 63.85, 57.39, 60.45, 61.19, 108.7, 113.56,
98.77, 83.56, 91.12, 98.15, 85, 91.89, 95.85, 88.55, 70.84, 86.25,
88.95, 112.15, 108.79, 92.76, 99.64, 97.92, 102.96, 97.82, 97.47,
86.96, 81.96, 86.13, 79.96, 81.81, 115.22, 100.87, 94.65, 90.03,
88.06, 94.91, 94.73, 97.78, 95.11, 78.48, 67.18, 64.66, 64.85,
92.9, 87.99, 73.26, 63.18, 64.63, 93.46, 69.54, 78.46, 71.28,
70.2, 46.87, 60.01, 46.9, 74.93, 79.95, 63.61, 76.41, 70.83,
70.56, 61.2, 63.99, 60.99, 58.88, 58.85, 57.38, 54.8, 71.11,
67.89, 64.96, 70.99, 70.85, 58.38, 53.94, 58.56, 56.88, 53.38,
47.07, 51.52, 44.38, 83.97, 86.63, 77.56, 79.23, 86.6, 82.69,
84.51, 72.96, 85.48, 81.32, 75.96, 65.32, 75.28, 98.61, 113.09,
86.35, 100.11, 111.09, 100.43, 105.29, 107.09, 105.11, 76.3,
96.49, 81.72, 88.33), Treatment = c(78, 73, 66, 59, 57, 50, 45,
38, 31, 26, 19, 14, 7, 78, 73, 66, 59, 57, 50, 45, 38, 31, 26,
19, 14, 7, 78, 73, 66, 59, 57, 50, 45, 38, 31, 26, 19, 14, 7,
78, 73, 66, 59, 57, 50, 45, 38, 31, 26, 19, 14, 7, 78, 73, 66,
59, 57, 50, 45, 38, 31, 26, 19, 14, 7, 78, 73, 66, 59, 57, 50,
45, 38, 31, 26, 19, 14, 7, 78, 73, 66, 59, 57, 50, 45, 38, 31,
26, 19, 14, 7, 78, 73, 66, 59, 57, 50, 45, 38, 31, 26, 19, 14,
7, 78, 73, 66, 59, 57, 50, 45, 38, 31, 26, 19, 14, 7, 78, 73,
66, 59, 57, 50, 45, 38, 31, 26, 19, 14, 7), Precent_Change = c("12.6",
"-9.1", "-12.9", "-18.7", "15.6", "-1.7", "-17.3", "10.3", "-6.9",
"-2.0", "-15.5", "5.2", "5.7", "-0.2", "4.4", "-15.1", "-23.7",
"10.2", "18.3", "-17.5", "12.8", "-1.8", "-6.4", "-10.1", "5.3",
"1.2", "-9.4", "4.5", "-13.0", "-15.4", "9.0", "7.7", "-13.4",
"8.1", "4.3", "-7.6", "-20.0", "21.8", "3.1", "-4.0", "-3.0",
"-14.7", "7.4", "-1.7", "5.1", "-5.0", "-0.4", "-10.8", "-5.7",
"5.1", "-7.2", "2.3", "4.7", "-12.5", "-6.2", "-4.9", "-2.2",
"7.8", "-0.2", "3.2", "-2.7", "-17.5", "-14.4", "-3.8", "0.3",
"14.7", "-5.3", "-16.7", "-13.8", "2.3", "44.6", "-25.6", "12.8",
"-9.2", "-1.5", "-33.2", "28.0", "-21.8", "3.6", "6.7", "-20.4",
"20.1", "-7.3", "-0.4", "-13.3", "4.6", "-4.7", "-3.5", "-0.1",
"-2.5", "-4.5", "4.1", "-4.5", "-4.3", "9.3", "-0.2", "-17.6",
"-7.6", "8.6", "-2.9", "-6.2", "-11.8", "9.5", "-13.9", "4.0",
"3.2", "-10.5", "2.2", "9.3", "-4.5", "2.2", "-13.7", "17.2",
"-4.9", "-6.6", "-14.0", "15.2", "-6.2", "14.7", "-23.6", "15.9",
"11.0", "-9.6", "4.8", "1.7", "-1.8", "-27.4", "26.5", "-15.3",
"8.1"), posY = c(111.71, NA, NA, 71.9, 83.1, NA, 67.54, 74.49,
NA, NA, 57.43, 60.4, 63.84, 84.6, 88.32, NA, 57.25, 63.07, 74.63,
61.56, 69.47, NA, NA, 57.39, 60.45, 61.19, 108.7, 113.56, NA,
83.56, 91.12, 98.15, 85, 91.89, 95.85, NA, 70.84, 86.25, 88.95,
NA, NA, 92.76, 99.64, 97.92, 102.96, NA, NA, NA, 81.96, 86.13,
79.96, 81.81, 115.22, NA, NA, NA, 88.06, 94.91, 94.73, 97.78,
NA, NA, NA, 64.66, 64.85, 92.9, NA, NA, 63.18, 64.63, 93.46,
69.54, 78.46, NA, NA, 46.87, 60.01, 46.9, 74.93, 79.95, 63.61,
76.41, NA, NA, 61.2, 63.99, NA, NA, NA, NA, 54.8, 71.11, NA,
64.96, 70.99, NA, NA, 53.94, 58.56, NA, NA, 47.07, 51.52, 44.38,
83.97, 86.63, 77.56, 79.23, 86.6, 82.69, 84.51, 72.96, 85.48,
NA, NA, 65.32, 75.28, 98.61, 113.09, 86.35, 100.11, 111.09, 100.43,
105.29, 107.09, NA, 76.3, 96.49, 81.72, 88.33), negY = c(111.71,
101.56, 88.41, 71.9, 83.1, 81.65, 67.54, 74.49, 69.36, 67.96,
57.43, NA, NA, 84.6, 88.32, 75.02, 57.25, NA, 74.63, 61.56, 69.47,
68.22, 63.85, 57.39, NA, NA, 108.7, 113.56, 98.77, 83.56, NA,
98.15, 85, NA, 95.85, 88.55, 70.84, NA, NA, 112.15, 108.79, 92.76,
99.64, 97.92, 102.96, 97.82, 97.47, 86.96, 81.96, 86.13, 79.96,
NA, 115.22, 100.87, 94.65, 90.03, 88.06, 94.91, 94.73, 97.78,
95.11, 78.48, 67.18, 64.66, NA, 92.9, 87.99, 73.26, 63.18, NA,
93.46, 69.54, 78.46, 71.28, 70.2, 46.87, 60.01, 46.9, NA, 79.95,
63.61, 76.41, 70.83, 70.56, 61.2, 63.99, 60.99, 58.88, 58.85,
57.38, 54.8, 71.11, 67.89, 64.96, 70.99, 70.85, 58.38, 53.94,
58.56, 56.88, 53.38, 47.07, 51.52, 44.38, NA, 86.63, 77.56, NA,
86.6, 82.69, 84.51, 72.96, 85.48, 81.32, 75.96, 65.32, NA, 98.61,
113.09, 86.35, NA, 111.09, 100.43, NA, 107.09, 105.11, 76.3,
96.49, 81.72, NA)), row.names = c(38819L, 35876L, 32913L, 28959L,
27969L, 24993L, 22013L, 19023L, 15022L, 13008L, 8983L, 6986L,
3003L, 38411L, 36285L, 32500L, 29370L, 28382L, 24579L, 22430L,
18606L, 15443L, 12587L, 9399L, 6570L, 3418L, 38818L, 35877L,
32912L, 28960L, 27970L, 24992L, 22014L, 19022L, 15023L, 13007L,
8984L, 6985L, 3004L, 38815L, 35880L, 32909L, 28963L, 27973L,
24989L, 22017L, 19019L, 15026L, 13004L, 8987L, 6982L, 3007L,
38469L, 35976L, 32798L, 29056L, 28066L, 24881L, 22109L, 18905L,
15118L, 12886L, 9076L, 6865L, 3099L, 38794L, 35901L, 32888L,
28984L, 27994L, 24968L, 22038L, 18997L, 15048L, 12982L, 9009L,
6960L, 3029L, 38775L, 35920L, 32869L, 29002L, 28012L, 24950L,
22056L, 18980L, 15065L, 12965L, 9025L, 6944L, 3047L, 38774L,
35921L, 32868L, 29003L, 28013L, 24949L, 22057L, 18979L, 15066L,
12964L, 9026L, 6943L, 3048L, 38769L, 35926L, 32863L, 29008L,
28018L, 24944L, 22062L, 18974L, 15071L, 12959L, 9031L, 6938L,
3053L, 38764L, 35931L, 32858L, 29013L, 28023L, 24939L, 22067L,
18969L, 15076L, 12954L, 9035L, 6934L, 3057L), class = "data.frame")
Here is one solution, based on duplicating the rows where the current direction of yield changes.
library(data.table)
library(ggplot2)
# Set five_year_display as data.table
setDT(five_year_display)
#Order the five year display, and create an row identifier
five_year_display[order(Animal_ID, Date),rowid:=.I]
# Create a version that duplicates rows when the next row changes direction
fyd <- rbindlist(list(
five_year_display,
five_year_display[five_year_display[,dup_row:=sign(Yeild-shift(Yeild,-1))!=sign(shift(Yeild,1)-Yeild), by = Animal_ID][dup_row==TRUE, rowid]]
),idcol = "src")[order(Animal_ID, Date, src)]
# Function to set the colors, based on yield and rowid
# This function first finds the initial direction of the yield,
# sets the color for that direction, and then
# looks at the changes in row id to determine toggle in colors
find_colors <- function(yield, rowid) {
colors=as.numeric(yield[1]>=yield[2])
for(i in seq(2,length(rowid))) {
if(rowid[i]>rowid[i-1]) colors = c(colors, colors[i-1])
else colors = c(colors, 1-colors[i-1])
}
return(colors)
}
# Use function above to assign colors to each row
fyd[,colors:=find_colors(Yeild,rowid), by=Animal_ID]
# create a colorgrp over animal and color, using rleid
fyd[,colorgrp:=rleid(Animal_ID,colors)]
# plot the fyd using the colorgrp in geom_line, and manually setting the color scale
ggplot(fyd, aes(as.Date(Date), Yeild)) +
geom_point()+
geom_line(aes(group=colorgrp,color=factor(colors, labels=c("Increasing", "Decreasing")))) +
scale_color_manual(values=c("green", "red")) +
labs(x = "Date", color="Slope") +
theme(legend.position="bottom")
Here is the resulting plot

Different result of durbinWatsonTest

I know function durbinWatsonTest in package car use bootstrap method to calculate p-value, then the results are different when i rerun this code.
Can someone know how to set seed this function?
Many tks.
df <- structure(list(y = c(-2.59, -3.42, -3.19, -3.25, -3.51, -3.18,
-3.36, -3.12, -3.13, -3.02, -2.99, -3.03, -2.9, -2.8, -2.8),
x1 = c(17.25, 9.7, 7.35, 7.93, 8.66, 10.31, 10.32, 11.4,
11.23, 13.13, 13.88, 14, 14, 14, 13),
x2 = c(NA, NA, NA, NA, 17.25, 9.7, 7.35, 7.93,
8.66, 10.31, 10.32, 11.4, 11.23, 13.13, 13.88),
x3 = c(NA, NA, 7.4, 6.5, 6.52, 5.66, 3.1,
3.9, 4.6, 5.4, 5.84, 6.16, 6.52, 6.42, 5.9)),
row.names = c(NA, -15L), class = c("tbl_df", "tbl", "data.frame"))
model <- lm(y~x1+x2+x3, data = df)
durbinWatsonTest(model)

How to identify a range of data points between a minimum and a maximum in a dataframe in R?

My measured variable V1 follows cycles: it goes up to reach a maximum and down to reach a minimum. I call "cycle" the range of data points between 2 consecutive maxima (cycle 1 is maximum 1 - minimum 1 - maximum 2, cycle 2 is maximum 2 - minimum 2 - maximum 3). The minima and maxima of each cycle are different.
My 2 questions are:
how to identify the range of data points in V1 corresponding to each cycle?
how to extract all the minima and all the maxima in V1?
I have used ggplot to identify my minima and maxima using stat_peaks() and stat_valleys(). I want to find a way of doing it without plotting it, to apply it to many data frames.
library(ggplot2)
library(ggpmisc)
#I plotted my data to visualize the minima (in yellow) and maxima (in blue) with stat_peaks and stat_valleys.
plot <- ggplot(df, aes(x=V0, y=V1))+
geom_point()+
stat_peaks(color="yellow", span=61)+
stat_valleys(color="blue", span=101)
#I used the ggplot_build function to extract the values of the highlighted peaks and valleys.
pb <- ggplot_build(plot)
I wanted to identify the 10 largest values in pb for which colour == "yellow" and the 10 lowest values in pb for which colour == "blue" but it does not work because pb is not a dataframe.
dput(df[1:200, c(1,2)])
structure(list(V0 = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 2.1,
2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3, 3.1, 3.2, 3.3, 3.4,
3.5, 3.6, 3.7, 3.8, 3.9, 4, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7,
4.8, 4.9, 5, 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6,
6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3,
7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6,
8.7, 8.8, 8.9, 9, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9,
10, 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 11,
11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12, 12.1,
12.2, 12.3, 12.4, 12.5, 12.6, 12.7, 12.8, 12.9, 13, 13.1, 13.2,
13.3, 13.4, 13.5, 13.6, 13.7, 13.8, 13.9, 14, 14.1, 14.2, 14.3,
14.4, 14.5, 14.6, 14.7, 14.8, 14.9, 15, 15.1, 15.2, 15.3, 15.4,
15.5, 15.6, 15.7, 15.8, 15.9, 16, 16.1, 16.2, 16.3, 16.4, 16.5,
16.6, 16.7, 16.8, 16.9, 17, 17.1, 17.2, 17.3, 17.4, 17.5, 17.6,
17.7, 17.8, 17.9, 18, 18.1, 18.2, 18.3, 18.4, 18.5, 18.6, 18.7,
18.8, 18.9, 19, 19.1, 19.2, 19.3, 19.4, 19.5, 19.6, 19.7, 19.8,
19.9, 20), V1 = c(32.56, 31.97, 29.08, 27.34, 25.34, 22.58,
20.93, 17.93, 14.65, 12.2, 9.88, 7, 5.52, 3.96, 3.26, 2.76, 3.23,
3.38, 3.5, 3.67, 4.24, 7.1, 9.94, 14.58, 17.57, 21.64, 23.83,
27.28, 29.48, 33.13, 34.37, 36.74, 37.13, 36.52, 35.87, 36, 35.49,
33.81, 32.89, 30.47, 29.87, 27.84, 25.83, 23.31, 21.39, 18.63,
16.42, 12.9, 10.6, 7.43, 5.95, 4.52, 3.76, 2.61, 2.94, 3.42,
2.89, 3.38, 3.64, 4.2, 5.74, 9.48, 12.71, 17.46, 19.76, 23.93,
27.46, 31.99, 34.07, 40.37, 46.48, 42.89, 48.33, 56.99, 47.16,
43.53, 39.86, 37.48, 30.36, 26.01, 23.03, 20.57, 15.92, 13.87,
11.61, 8.58, 6.52, 4.79, 3.88, 2.9, 2.94, 3.22, 3.45, 3.66, 3.89,
6.01, 8.37, 12.83, 15.06, 18.68, 21.2, 24.12, 26.97, 28.48, 26.69,
37.06, 40.15, 39.36, 35.73, 35.61, 35.83, 35.14, 31.55, 30.05,
25.34, 24.24, 23.4, 21.09, 18.32, 16.04, 13.18, 10.07, 8.23,
5.78, 4.71, 3.44, 3.48, 3.71, 3.72, 3.9, 4.56, 6.93, 9.3, 14.04,
14.66, 16.25, 18.43, 20.76, 21.86, 23.87, 26.63, 24.85, 29.98,
26.67, 26.99, 27.36, 25.08, 25.24, 26.48, 24.1, 22.66, 22.28,
23.29, 21.87, 21.02, 19.53, 22.75, 22.04, 20.64, 19.05, 19.4,
21, 18.93, 25.38, 23.59, 21.48, 21.9, 23.75, 23.38, 25.06, 25.2,
26.38, 25.22, 28.62, 27.38, 34.16, 35.94, 34.03, 28.95, 24.33,
24.76, 25.56, 24.96, 21.99, 23.53, 23.76, 24.5, 22.39, 23.01,
23.42, 24, 22.65, 21.44, 22.15, 21.72, 18.46, 17.65, 15.34, 16.11,
14.93)), row.names = c(NA, 200L), class = "data.frame")
You can add a variable to your data frame that labels the maxima and minima quite easily with the following line:
df$is_min_max <- c(FALSE, diff(as.numeric(diff(df$V1) > 0)) != 0, FALSE)
I'll explain how this works:
You can find out the difference between consecutive points in your data by doing
diff(df$V1)
so you can see where your data are going up or down by doing
as.numeric(diff(df$V1) > 0)
Which will give you a 1 between two points on an upward gradient and 0 on a downward gradient. So if you do
diff(as.numeric(diff(df$V1) > 0))
You will get a +1 or -1 at the points where the direction changes.
So if you do:
diff(as.numeric(diff(df$V1) > 0)) != 0
You will get a logical vector of the points that are local maxima and minima. Note the start and end points have been removed because we have double-diffed. Therefore we need to add a FALSE on at either end:
c(FALSE, diff(as.numeric(diff(df$V1) > 0)) != 0, FALSE)
So we could add this to your data frame as
df$is_min_max <- c(FALSE, diff(as.numeric(diff(df$V1) > 0)) != 0, FALSE)
You haven't included the actual data in your example, so I will show an example here using a simple sine wave:
df <- data.frame(x = seq(1, 20, 0.1), V1 = sin(seq(1, 20, 0.1)))
plot(df$x, df$V1)
And now we can just find our local maxima and minima...
df$is_min_max <- c(FALSE, diff(as.numeric(diff(df$V1) > 0)) != 0, FALSE)
And plot them:
points(df$x[df$is_min_max], df$V1[df$is_min_max], col = "red", cex = 3 )
Note that this will show up every change in direction, so if there are local "wobbles" in your data you will find maxima and minima there too. Removing these is possible but a little more complex.
Created on 2020-02-27 by the reprex package (v0.3.0)
The coordinates of peaks and valleys are contained in pb:
The peaks are in
pb$data[[2]]
xintercept yintercept label x y PANEL group x.label y.label shape colour size fill alpha stroke
1 7.9 0.9989413 7.9 7.9 0.9989413 1 -1 7.9 0.9989 19 yellow 1.5 NA NA 0.5
2 14.1 0.9993094 14.1 14.1 0.9993094 1 -1 14.1 0.9993 19 yellow 1.5 NA NA 0.5
The valleys are in
pb$data[[3]]
xintercept yintercept label x y PANEL group x.label y.label shape colour size fill alpha stroke
1 11 -0.9999902 11 11 -0.9999902 1 -1 11 -1 19 blue 1.5 NA NA 0.5
Note that the order of list elements may change depending on the order of ggplot function calls (layers).
Also note that the sample data provided by the OP is too small with respect to the spans given in calls stat_peaks(color="yellow", span=61) and stat_valleys(color="blue", span=101), resp.
Therefore, I have used the sample data from Allan's answer:
df <- data.frame(V0 = seq(1, 20, 0.1), V1 = sin(seq(1, 20, 0.1)))
which highlights two peaks and one valley using OP's code:
library(ggplot2)
library(ggpmisc)
plot <- ggplot(df, aes(x=V0, y=V1))+
geom_point()+
stat_peaks(color="yellow", span=61)+
stat_valleys(color="blue", span=101)
plot

How to plot wind direction with lat lon and arrow in ggplot2

I have a data frame with Lat Lon mean_wind and wind_dir in each grid cells.
I am trying to make a spatial plot with mean wind in background and wind direction as arrow on each grid cells.
I have tried following on sample data-frame wind.dt
win.plt<- ggplot(wind.dt,aes(x=Lon,y=Lat))+
#Mean wind plot : OK
geom_tile(aes(fill=mean_wind),alpha=1)+
geom_tile(aes(color=mean_wind), fill=NA) +
scale_fill_gradientn(colours=(brewer.pal(9,rev("RdYlGn"))))+
scale_color_gradientn(colours=(brewer.pal(9,rev("RdYlGn"))),guide=F)
#Wind Direction : doesnot work
geom_segment(arrow = arrow(),aes(yend = Lon + wind_dir, xend = Lat + wind_dir))
win.plt
wind.dt<-structure(list(Lon = c(170.25, 171, 171.75, 172.5, 173.25, 174,
174.75, 175.5, 176.25, 177, 177.75, 178.5, 179.25, 180, 180.75,
181.5, 182.25, 183, 183.75, 184.5, 185.25, 186, 186.75, 187.5,
188.25, 189, 189.75, 190.5, 191.25, 192, 192.75, 193.5, 194.25,
170.25, 171, 171.75, 172.5, 173.25, 174, 174.75, 175.5, 176.25,
177, 177.75, 178.5, 179.25, 180, 180.75, 181.5, 182.25, 183,
183.75, 184.5, 185.25, 186, 186.75, 187.5, 188.25, 189, 189.75,
190.5, 191.25, 192, 192.75, 193.5, 194.25, 170.25, 171, 171.75,
172.5, 173.25, 174, 174.75, 175.5, 176.25, 177, 177.75, 178.5,
179.25, 180, 180.75, 181.5, 182.25, 183, 183.75, 184.5, 185.25,
186, 186.75, 187.5, 188.25, 189, 189.75, 190.5, 191.25, 192,
192.75, 193.5, 194.25, 170.25, 171, 171.75, 172.5, 173.25, 174,
174.75, 175.5, 176.25, 177, 177.75, 178.5, 179.25, 180, 180.75,
181.5, 182.25, 183, 183.75, 184.5, 185.25, 186, 186.75, 187.5,
188.25, 189, 189.75, 190.5, 191.25, 192, 192.75, 193.5, 194.25,
170.25, 171, 171.75, 172.5, 173.25, 174, 174.75, 175.5, 176.25,
177, 177.75, 178.5, 179.25, 180, 180.75, 181.5, 182.25, 183,
183.75, 184.5, 185.25, 186, 186.75, 187.5, 188.25, 189, 189.75,
190.5, 191.25, 192, 192.75, 193.5, 194.25, 170.25, 171, 171.75,
172.5, 173.25, 174, 174.75, 175.5, 176.25, 177, 177.75, 178.5,
179.25, 180, 180.75, 181.5, 182.25, 183, 183.75, 184.5, 185.25,
186, 186.75, 187.5, 188.25, 189, 189.75, 190.5, 191.25, 192,
192.75, 193.5, 194.25), Lat = c(14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5), mean_wind = c(8.34, 8.33,
8.31, 8.29, 8.27, 8.24, 8.22, 8.2, 8.19, 8.16, 8.14, 8.13, 8.1,
8.08, 8.06, 8.02, 7.99, 7.96, 7.93, 7.89, 7.85, 7.81, 7.78, 7.73,
7.7, 7.67, 7.63, 7.62, 7.6, 7.58, 7.56, 7.53, 7.54, 8.65, 8.64,
8.61, 8.59, 8.56, 8.53, 8.51, 8.48, 8.46, 8.43, 8.41, 8.39, 8.38,
8.37, 8.33, 8.31, 8.28, 8.24, 8.2, 8.15, 8.12, 8.07, 8.03, 8.01,
7.97, 7.94, 7.92, 7.89, 7.87, 7.85, 7.85, 7.83, 7.8, 8.85, 8.84,
8.81, 8.8, 8.77, 8.74, 8.72, 8.69, 8.67, 8.65, 8.63, 8.61, 8.59,
8.58, 8.55, 8.54, 8.5, 8.46, 8.44, 8.4, 8.37, 8.33, 8.29, 8.26,
8.21, 8.18, 8.16, 8.13, 8.12, 8.09, 8.06, 8.06, 8.03, 9.01, 8.99,
8.96, 8.94, 8.91, 8.89, 8.86, 8.83, 8.82, 8.79, 8.78, 8.77, 8.75,
8.75, 8.73, 8.7, 8.68, 8.66, 8.63, 8.59, 8.55, 8.52, 8.47, 8.43,
8.4, 8.38, 8.35, 8.32, 8.31, 8.29, 8.26, 8.25, 8.23, 9.07, 9.06,
9.04, 9.01, 8.99, 8.97, 8.94, 8.92, 8.91, 8.9, 8.89, 8.88, 8.88,
8.87, 8.86, 8.84, 8.83, 8.8, 8.75, 8.74, 8.7, 8.67, 8.63, 8.59,
8.57, 8.53, 8.52, 8.51, 8.47, 8.47, 8.45, 8.42, 8.41, 9.1, 9.08,
9.06, 9.04, 9.02, 9, 8.98, 8.97, 8.96, 8.96, 8.95, 8.95, 8.97,
8.96, 8.96, 8.94, 8.91, 8.89, 8.86, 8.84, 8.8, 8.76, 8.73, 8.69,
8.67, 8.64, 8.63, 8.63, 8.61, 8.59, 8.57, 8.54, 8.53), wind_dir = c(81.27,
81.34, 81.38, 81.44, 81.47, 81.34, 81.31, 81.51, 81.56, 81.46,
81.54, 81.53, 81.42, 81.53, 81.66, 81.76, 81.86, 81.96, 82.02,
82.28, 82.65, 82.77, 83.07, 83.46, 83.78, 84.15, 84.52, 84.92,
85.39, 85.87, 86.15, 86.38, 86.53, 81.34, 81.34, 81.38, 81.31,
81.2, 81.25, 81.39, 81.36, 81.31, 81.4, 81.47, 81.48, 81.59,
81.64, 81.58, 81.62, 81.75, 81.98, 82.13, 82.26, 82.52, 82.77,
82.97, 83.15, 83.49, 83.74, 84.23, 84.78, 85.04, 85.49, 85.73,
86.05, 86.35, 81.5, 81.41, 81.32, 81.28, 81.32, 81.31, 81.24,
81.17, 81.28, 81.33, 81.24, 81.3, 81.44, 81.46, 81.55, 81.76,
81.8, 81.88, 82.11, 82.31, 82.4, 82.61, 82.88, 82.95, 83.29,
83.59, 83.93, 84.46, 84.8, 85.26, 85.47, 85.78, 86.11, 81.3,
81.29, 81.29, 81.28, 81.32, 81.22, 81.24, 81.32, 81.31, 81.23,
81.34, 81.47, 81.37, 81.42, 81.5, 81.6, 81.78, 81.98, 82.06,
82.26, 82.49, 82.52, 82.7, 82.79, 83.05, 83.46, 83.79, 84.18,
84.5, 84.91, 85.23, 85.49, 85.7, 81.31, 81.33, 81.28, 81.19,
81.26, 81.29, 81.36, 81.24, 81.16, 81.18, 81.23, 81.23, 81.23,
81.47, 81.5, 81.55, 81.73, 81.99, 82.14, 82.18, 82.41, 82.46,
82.63, 82.83, 82.97, 83.27, 83.62, 84.01, 84.34, 84.64, 85.01,
85.38, 85.55, 81.14, 81.14, 81.1, 81.15, 81.2, 81.1, 81.14, 81.06,
81.21, 81.26, 81.13, 81.16, 81.17, 81.22, 81.28, 81.63, 81.71,
81.77, 82.13, 82.22, 82.37, 82.48, 82.56, 82.7, 82.92, 83.19,
83.43, 83.74, 84.15, 84.59, 84.89, 85.22, 85.39)), row.names = c(NA,
-198L), .Names = c("Lon", "Lat", "mean_wind", "wind_dir"), class = c("tbl_df",
"tbl", "data.frame"))
geom_spoke was made for this particular sort of plot. Cleaned up a little,
library(ggplot2)
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir,
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
Adjust scaling and sizes as desired.
Edit: Controlling the number of arrows
To adjust the number of arrows, a quick-and-dirty route is to subset one of the aesthetics passed to geom_spoke with a recycling vector that will cause some rows to be dropped, e.g.
library(ggplot2)
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir[c(TRUE, NA, NA, NA, NA)], # causes some values not to plot
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
#> Warning: Removed 158 rows containing missing values (geom_spoke).
This depends on your data frame being in order and is not infinitely flexible, but if it gets you a nice plot with minimal effort, can be useless nonetheless.
A more robust approach is to make a subsetted data frame for use by geom_spoke, say, selecting every other value of Lon and Lat, here using recycling subsetting on a vector of distinct values:
library(dplyr)
wind.arrows <- wind.dt %>%
filter(Lon %in% sort(unique(Lon))[c(TRUE, FALSE)],
Lat %in% sort(unique(Lat))[c(TRUE, FALSE)])
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir,
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(data = wind.arrows, # this is the only difference in the plotting code
arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
This approach makes getting (and scaling) a grid fairly easy, but getting a diamond pattern will take a bit more logic:
wind.arrows <- wind.dt %>%
filter(( Lon %in% sort(unique(Lon))[c(TRUE, FALSE)] &
Lat %in% sort(unique(Lat))[c(TRUE, FALSE)] ) |
( Lon %in% sort(unique(Lon))[c(FALSE, TRUE)] &
Lat %in% sort(unique(Lat))[c(FALSE, TRUE)] ))

Resources