How to set scale_x_date for one line in plot - r

I want to plot a figure with 2 lines (inner bay and outer bay), but I have 2 columns of data for inner bay. I need to set the date limits to switch columns on a certain date. In the data below, I want to plot IB.y from 2015-09-08 to 2015-09-23 and IB.x from 2015-09-24 to 2015-10-07. And then I want to plot all of OB.
The code below plots both IB.x and IB.y for the entire date range rather than split at 2015-09-24 as desired:
AllDailyMean = ggplot(AllMean, aes(x=Date)) + geom_line(aes(y=IB.x,
color = "Inner Bay"), size = 0.5) + geom_ribbon(aes(ymin=IBMin.x,
ymax = IBMax.x), fill = "coral2", alpha = 0.2, linetype = 3) +
scale_x_date(limits = as.Date(c("2015-09-08", "2015-09-23"))) +
geom_line(aes(y=IB.y, color = "Inner Bay"), size = 0.5) +
geom_line(aes(y=OB, color = "Outer Bay"), size = 0.5) +
geom_ribbon(aes(ymin=IBMin.y, ymax=IBMax.y), fill = "coral2", alpha
= 0.2, linetype = 3) + geom_ribbon(aes(ymin=OBMin, ymax=OBMax),
fill = "skyblue4", alpha = 0.2, linetype = 3) +
scale_x_date(labels = date_format("%b '%y"), date_breaks = "2
months") + labs(y = expression(atop("Mean Daily Temp",
paste(("°C"%+-%"Max/Min")))), x = "Date")
structure(list(Date = structure(c(16686, 16687, 16688, 16689,
16690, 16691, 16692, 16693, 16694, 16695, 16696, 16697, 16698,
16699, 16700, 16701, 16702, 16703, 16704, 16705, 16706, 16707,
16708, 16709, 16710, 16711, 16712, 16713, 16714, 16715, 16716
), class = "Date"), IB.x = c(29.7916666666667, 30.0166666666667,
30.075, 30.0875, 29.3666666666667, 29.2291666666667, 28.8875,
28.6826086956522, 28.6041666666667, 28.7125, 28.7416666666667,
28.5166666666667, 28.525, 28.525, 28.5166666666667
28.3916666666667, 28.3, 28.0875, 27.9541666666667, 27.475,
27.1458333333333, 26.9166666666667, 26.85, 26.9625,
26.4041666666667, 25.95, 25.7416666666667, 25.85,
25.6875, 25.7, 25.7958333333333), IBMax.x = c(30.1, 30.3, 30.4,
30.6, 29.7, 29.4, 29.2, 29, 28.9, 29, 29.1, 28.9, 28.8, 28.7,
28.7, 28.5, 28.5, 28.4, 28.1, 27.9, 27.6, 27.1, 27.1, 27.3, 27.1,
26.3, 26.2, 26.1, 25.9, 26.2, 26.1), IBMin.x = c(29.1, 29.7,
29.8, 29.7, 29, 29, 28.4, 28.2, 28.4, 28.5, 28.6, 27.9, 28.2,
28.3, 28.2, 28.2, 28.1, 27.7, 27.8, 27, 26.8, 26.7, 26.5, 26.7,
25.7, 25.5, 25.4, 25.4, 25.2, 25.2, 25.5), IB.y = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.671,
27.6027395833333, 27.25271875, 26.7719895833333, 26.3682604166667,
26.3313229166667, 26.4141875, 26.2628020833333, 26.14065625,
26.1491041666667, 26.2293541666667, 25.7827604166667, 25.44615625,
25.6583854166667, 26.0718645833333), IBMax.y = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 28.177, 28.151,
27.632, 27.187, 26.917, 26.843, 27.237, 26.77, 26.573, 26.622,
26.671, 26.059, 25.913, 26.279, 26.328), IBMin.y = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.41, 26.917,
26.77, 26.23, 25.766, 25.717, 25.644, 25.352, 25.255, 25.231,
25.498, 25.523, 25.084, 25.036, 25.766), OB = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.794625, 27.5463125,
27.0850208333333, 26.8009375, 26.7057083333333, 26.65728125,
26.535375, 26.46721875, 26.4802604166667, 26.7571145833333,
26.3706145833333, 26.0067395833333, 25.9274166666667,
25.8764895833333, 25.9058333333333), OBMax = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 28.147, 27.998, 27.358,
27.014, 27.136, 27.014, 27.186, 26.965, 27.038, 27.308, 26.646,
26.231, 26.256, 26.329, 26.207), OBMin = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 27.628, 27.21, 26.793,
26.524, 26.304, 26.28, 26.158, 26.158, 25.939, 26.329, 25.939,
25.768, 25.671, 25.574, 25.501)), row.names = 70:100, class =
"data.frame")

One option is to use a simple ifelse within your y aesthetic. I've removed a bunch of your code as it's superfluous to the problem at hand (it's helpful to provide a minimal reproducible example, removing any irrelevant details).
ggplot(df, aes(x=Date)) +
geom_line(aes(y=ifelse(Date <= "2015-09-23", IB.x, NA), color = "Inner Bay"), size = 0.5) +
geom_line(aes(y=ifelse(Date > "2015-09-23", IB.x, NA), color = "Outer Bay"), size = 0.5) +
labs(y = expression(atop("Mean Daily Temp", paste(("°C"%+-%"Max/Min")))), x = "Date")

Related

ggplot2 | How to customize the order of string values in the legend?

In continuation of my earlier question, I am facing issues w.r.t. to ordering the legends. The initially posted question had ordinal (ordered) values and hence worked perfectly. In real-time, the data rendered in the legend is being ordered alphabetically.
library(ggplot2)
library(tidyverse)
library(reshape2)
#Creating a dataframe with use-case specific variables.
df = data.frame(
Year = 2006:2025,
Survey = c(40.5, 39.0, NA, NA, NA, NA, 29.9, NA, NA, NA, 21.6,
NA, NA, NA, NA, NA, NA, NA, NA, NA),
Projected1 = c(NA, NA, NA, NA, NA, NA, 29.9, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 14.9),
WhatIf= c(NA, NA, NA, NA, NA, NA, 29.9, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 13.0),
Projected2 = c(NA, NA, NA, NA, NA, NA, 29.9, 27.6, 25.4, 23.4, 21.6,
19.9, 18.4, 16.9, 15.6, 14.4, 13.3, NA, 12.2, 11.3)
)
#Transforming data
df <- melt(df,id.vars = "Year")
ggplot(data = NULL, aes(x=factor(Year), y=value, group=variable)) +
geom_line(data = df[!is.na(df$value) & df$variable != "Survey",],
aes(linetype=variable, color = variable), size = 1, linetype = "dashed")+
geom_point(data = df[!is.na(df$value) & df$variable == "Survey",],
aes(color = variable), size = 4) +
scale_color_manual(values=c('#999999', 'orange2','turquoise2','blue2'))+
guides(color = guide_legend(override.aes = list(linetype = c("blank", "dashed", "dashed", "dashed"),
shape = c(16, NA, NA, NA)))) +
scale_y_continuous(
breaks=seq(0,100, 10), labels = seq(0, 100, 10), limits=c(0,70),
sec.axis = dup_axis()) +
theme(
legend.position = 'bottom', legend.direction = 'horizontal',
panel.grid.major.y = element_line(color='gray85'),
axis.title = element_text(face='bold')) +
labs(x='Year', y='measure (%)')
Created on 2020-07-11 by the reprex package (v0.3.0)
Output
Objective: Sequence in the legend and respective plots must be as follows: c("Survey", "WhatIf", "Projected1", "Projected2" )
I have tried the following methods alternatively but there's no difference in the output.
df$variable <- factor(df$variable, levels = c("Survey", "WhatIf", "Projected1", "Projected2" ))
scale_fill_discrete(breaks = c("Survey", "WhatIf", "Projected1", "Projected2" ))
I might be missing out on a trivial step and any suggestions would be greatly helpful.
You just need to add a breaks = argument to scale_color_manual and change the order of values = to match because you have the guide argument set to color =:
scale_color_manual(breaks = c("Survey", "WhatIf", "Projected1", "Projected2" ),
values=c('turquoise2','blue2','#999999', 'orange2'))+

(R) Add significance stars to correlation matrix heat map

I am looking at correlations between many variables in my data stratified by gender. I was able to create a heatmap using code I found on StackOverflow, but I'm not sure how to add stars for significance to the cells. I would also like to cut the matrix in half to avoid redundancy.
Here's the code:
# Variables to correlate
anthro <- c("Visit_age", "HeightCm", "WeightKg", "BMI",
"NeckLengthCm", "NeckCircCm", "HeadCircCm", "NeckVolumeCm")
peak <- c("ExtensorPeak_Newtons", "FlexorPeak_Newtons",
"RightPeak_Newtons", "LeftPeak_Newtons")
avg <- c("ExtensorAVG_Newtons", "FlexorAVG_Newtons",
"RightAVG_Newtons", "LeftAVG_Newtons")
# Function for creation of multiple heatmaps using
# male/female and peak/avg neck strength
heatmap <- function(gender, strength){
# Create three new variables: var1, var2, corr
# where corr is correlation between the var1 and var2
corrs <- filter(data, Gender == gender) %>%
select(anthro, strength) %>%
as.matrix() %>%
cor(use = "pairwise.complete.obs") %>%
as.data.frame() %>%
rownames_to_column(var = "var1") %>%
gather("var2", "corr", -var1)
# Plot heatmap
ggplot(corrs, aes(var1, var2)) +
geom_tile(aes(fill = corr), color = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
geom_text(aes(label = round(corr, 1))) +
ggtitle(gender) +
labs(x = "", y = "") +
theme(plot.title = element_text(hjust = 0.5),axis.text.x =
element_text(angle = 30, hjust = 1))
}
# Create heatmaps
heatmap("Male", peak)
heatmap("Female", peak)
heatmap("Male", avg)
heatmap("Female", avg)
dput(head(data, 20)):
data <- structure(list(Gender = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Male",
"Female"), class = "factor"), Visit_age = c(37, 38, 39, 22, 23,
24, 24, 20, 21, 21, 22, 22, 36, 37, 38, 38, 22, 42, 42, 43),
HeightCm = c(170, 170, 170, 182, 182, 182, 182, 177.8, 177.8,
177.8, 177.8, 177.8, 168, 168, 168, 168, 162.56, 164, 164,
164), WeightKg = c(63.18181, 58.63636, 60.45454, 70.90909,
77.72727, 75.45454, 80.45454, 78.86363, 81.36363, 80, 83.18181,
82.72727, 68.18181, 69.0909, 68.18181, 65, 69.0909, 48.18181,
50.45454, 47.72727), BMI = c(21.86222, 20.28939, 20.91852,
21.40716, 23.46554, 22.77941, 24.28889, 24.94671, 25.73752,
25.30617, 26.31266, 26.16888, 24.15739, 24.47948, 24.15739,
23.03004, 26.14529, 17.91412, 18.75912, 17.74511), NeckLengthCm = c(16,
16, 16, 14, 14, 14, 14, 16, 16, 16, 16, 16, 16, 16, 16, 16,
15, 15, 15, 15), NeckCircCm = c(35, 30, 32, 35, 34, 34, 36,
38, 39, 38, 40, 41, 39, 24, 36, 38, 34, 30, 29, 30), HeadCircCm = c(58,
58, 58, 56, 56, 56, 56, 57, 57, 57, 57, 57, 58, 58, 58, 58,
55, 52, 52, 52), NeckVolumeCm = c(1559.718, 1145.915, 1303.797,
1364.753, 1287.881, 1287.881, 1443.853, 1838.557, 1936.597,
1838.557, 2037.183, 2140.315, 1936.597, 733.3859, 1650.118,
1838.557, 1379.873, 1074.295, 1003.869, 1074.295), ExtensorPeak_Newtons = c(NA,
183.34, 145.96, NA, NA, 187.79, 153.525, NA, NA, 252.76,
227.395, 192.685, NA, NA, 168.21, 230.51, NA, NA, NA, 101.015
), FlexorPeak_Newtons = c(NA, 70.755, 68.975, NA, NA, 99.68,
112.585, NA, NA, 151.3, 136.615, 145.96, NA, NA, 97.9, 105.02,
NA, NA, NA, 53.4), RightPeak_Newtons = c(NA, 93.005, 125.935,
NA, NA, 85.885, 92.56, NA, NA, 102.35, 108.135, 108.135,
NA, NA, 74.315, 97.01, NA, NA, NA, 49.395), LeftPeak_Newtons = c(NA,
125.49, 131.275, NA, NA, 89.89, 99.68, NA, NA, 113.92, 121.93,
143.29, NA, NA, 59.185, 92.56, NA, NA, NA, 50.73), ExtensorAVG_Newtons = c(NA,
179.186637, 142.5483185, NA, NA, 178.445, 136.911637, NA,
NA, 242.97, 204.106637, 167.765, NA, NA, 161.09, 214.49,
NA, NA, NA, 95.081637), FlexorAVG_Newtons = c(NA, 68.2333185,
66.75, NA, NA, 87.516637, 100.125, NA, NA, 135.131637, 128.7533185,
138.84, NA, NA, 88.406637, 95.971637, NA, NA, NA, 51.62),
RightAVG_Newtons = c(NA, 85.1433185, 120.2983185, NA, NA,
75.65, 86.4783185, NA, NA, 96.7133185, 100.866637, 106.9483185,
NA, NA, 67.046637, 88.851637, NA, NA, NA, 47.7633185), LeftAVG_Newtons = c(NA,
121.93, 120.2983185, NA, NA, 74.315, 92.56, NA, NA, 110.656637,
111.546637, 130.83, NA, NA, 54.29, 88.11, NA, NA, NA, 48.801637
)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"
))
I found an alternative way to resolve your problem on http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
Try to make a correlogram
library(corrplot)
# Correlation for Male
data_male <- data[data$Gender == "Male",]
M <- cor(data_male[,-1], use = "pairwise.complete.obs")
M <- round(M, 1)
#Significant correlation
p.mat <- cor(data_male[,-1])
# Plot the correlogram
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M,
method="color",
col=col(200),
type="upper",
order="hclust",
addCoef.col = "black",
tl.col="black",
number.cex = 0.7,
tl.cex = 0.6,
tl.srt=45,
p.mat =p.mat,
sig.level = 0.5,
insig = "label_sig")
You can do the same thing for Female
data_female <- data[data$Gender == "Female",]
F <- cor(data_female[,-1], use = "pairwise.complete.obs")
F <- round(F, 1)
corrplot(F,
method="color",
col=col(200),
type="upper",
order="hclust",
addCoef.col = "black",
tl.col="black",
number.cex = 0.7,
tl.cex = 0.6,
tl.srt=45,
p.mat =p.mat,
sig.level = 0.5,
insig = "label_sig")
Instead of your current argument to geom_text(aes(label= ...)) use:
label = paste(round(corr,1), c(" ","*")[(abs(corr) <= .05)+1])
This will add a "*" when the absolute value of corr is below 0.05.
Look at the code of ggcorrplot::ggcorrplot to see how they handle filling only half a square tile plot.

Adding legend to ggplot with geom_line with factor color and manually added line

I can see that there are a lot of questions similar to this, but I cant find solution for my particular problem.
Data:
risk_accum <- structure(list(date = structure(c(1465948800, 1465952400, 1465956000,
1465959600, 1465963200, 1465966800, 1465970400, 1465974000, 1465977600,
1465981200, 1465984800, 1465988400, 1465992000, 1465995600, 1465999200,
1466002800, 1466006400, 1466010000, 1466013600, 1466017200, 1466020800,
1466024400, 1466028000, 1466031600, 1466035200, 1466038800, 1466042400,
1466046000, 1466049600, 1466053200, 1466056800, 1466060400, 1466064000,
1466067600, 1466071200, 1466074800, 1466078400, 1466082000, 1466085600,
1466089200, 1466092800, 1466096400, 1466100000, 1466103600, 1466107200,
1466110800, 1466114400, 1466118000, 1466121600, 1466125200, 1466128800,
1466132400, 1466136000, 1466139600, 1466143200, 1466146800, 1466150400,
1466154000, 1466157600, 1466161200, 1466164800, 1466168400, 1466172000,
1466175600, 1466179200, 1466182800, 1466186400, 1466190000, 1466193600,
1466197200, 1466200800, 1466204400, 1466208000, 1466211600, 1466215200,
1466218800, 1466222400, 1466226000, 1466229600, 1466233200, 1466236800,
1466240400, 1466244000, 1466247600, 1466251200, 1466254800, 1466258400,
1466262000, 1466265600, 1466269200, 1466272800, 1466276400, 1466280000,
1466283600, 1466287200, 1466290800, 1466294400, 1466298000, 1466301600,
1466305200, 1466308800, 1466312400, 1466316000, 1466319600, 1466323200,
1466326800, 1466330400, 1466334000, 1466337600, 1466341200, 1466344800,
1466348400, 1466352000, 1466355600, 1466359200, 1466362800, 1466366400,
1466370000, 1466373600, 1466377200), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), risk = c(NA, NA, NA, 1, 2, 3, 4, 5, 6, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, NA, NA)), .Names = c("date",
"risk"), row.names = c(NA, -120L), class = c("tbl_df", "tbl",
"data.frame"))
And code to generate graph:
#color variable
color_var <- vector(mode = "double",length = length(risk_accum$risk))
color_var[color_var== '0']<-NA
color_var[risk_accum$risk<6] <- "green4"
color_var[risk_accum$risk>=6 & risk_accum$risk<12] <- "yellow2"
color_var[risk_accum$risk>=12] <- "red"
#plot of Effective Blight Hours accumulation
ggplot(risk_accum)+
geom_line(aes(x = date, y = risk), color = color_var)+
scale_y_continuous(name = "EBH accumulation")+
scale_colour_manual(values=c("green", "yellow", "red"))+
geom_line(aes(date, y= 12), linetype= "dotted", size = 0.1)+
theme(axis.title.x = element_blank())
I need to get a legend which would explain the traffic light system (red is danger, etc) and manually added threshold risk line.
Add your color variable to the dataset, map to that variable inside aes, and use scale_*_identity to directly use the colors.
risk_accum$color_var <- NA
risk_accum$color_var[risk_accum$risk<6] <- "green4"
risk_accum$color_var[risk_accum$risk>=6 & risk_accum$risk<12] <- "yellow2"
risk_accum$color_var[risk_accum$risk>=12] <- "red"
ggplot(risk_accum)+
geom_line(aes(x = date, y = risk, color = color_var)) +
scale_y_continuous(name = "EBH accumulation")+
scale_color_identity(guide = 'legend') +
geom_line(aes(date, y= 12), linetype= "dotted", size = 0.1)+
theme(axis.title.x = element_blank())
You can also add your threshold to the legend:
ggplot(risk_accum)+
geom_line(aes(x = date, y = risk, color = color_var)) +
geom_line(aes(date, y= 12, linetype = "threshold"), size = 0.1)+
scale_y_continuous(name = "EBH accumulation")+
scale_color_identity(guide = 'legend') +
scale_linetype_manual(values = 2) +
theme(axis.title.x = element_blank())

How to use geom ribbon when NA values are present?

What I want to do:
I want to plot few curves and fill in the areas between them
The problem
I use geom_ribbon() to fill in the areas. But it fills in more than what I want.
Incorrectly filled white area:
I want to fill in white only between "OPDV" and "SDV" lines (shown in plot).
Code and Data
I used following code for above plot:
library(ggplot2)
library(dplyr)
ggplot() +
geom_ribbon(data = ddf,aes(ymin=BX,ymax=60, x=dv), fill="green") +
geom_ribbon(data = ddf,aes(ymin=BX,ymax=s, x=SDV_1), fill="orange") +
geom_ribbon(data = ddf,aes(ymin=BX,ymax=SDX_1, x=dv), fill="white") +
geom_path(data = ddf,mapping = aes(x = CLDV_1, y = s), size=0.5)+
geom_path(data = ddf,mapping = aes(x = OPDV_1, y = s), size=0.5) +
geom_path(data = ddf,aes(x = SDV_1, y = s), size=0.5) +
#geom_path(data = ddf,aes(x = dv, y = AX), size=0.5) +
geom_path(data = ddf,aes(x = dv, y = BX), size=0.5) +
geom_path(data = ddf,aes(x = dv, y = SDX_1), size=0.5) +
annotate(geom = "text", x = -0.8, y = 29, label = "OPDV",size = 3) +
annotate(geom = "text", x = 1.5, y = 40, label = "SDV",size = 3) +
labs(y = "Spacing (m)", x = "Relative Speed (Vf - Vl), m/s") +
coord_cartesian(ylim = c(25, 50),
xlim = c(-2,3.2))
I also tried replacing the third line with following:
geom_ribbon(data = ddf %>%
filter(dv>OPDV_1 & dv<SDV_1),
aes(ymin=BX,ymax=SDX_1, x=dv), fill="white")
But, since some of the values in OPDV_1 and SDV_1 are NA, it didn't fill in white at all.
Following are my data:
structure(list(BX = c(27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5,
27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5,
27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5,
27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5,
27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.5,
27.5, 27.5, 27.5, 27.5, 27.5, 27.5, 27.4804347826087, 27.4295652173913,
27.3786956521739, 27.3278260869565, 27.2769565217391, 27.2260869565217,
27.1752173913043, 27.124347826087, 27.0734782608696, 27.0226086956522,
26.9717391304348, 26.9208695652174, 26.87, 26.8191304347826,
26.7682608695652, 26.7173913043478, 26.6665217391304, 26.615652173913,
26.5647826086957, 26.5139130434783, 26.4630434782609, 26.4121739130435,
26.3613043478261, 26.3104347826087, 26.2595652173913, 26.2086956521739,
26.1578260869565, 26.1069565217391, 26.0560869565217, 26.0052173913043,
25.954347826087, 25.9034782608696, 25.8526086956522, 25.8017391304348,
25.7508695652174, 25.7), dv = c(3.2, 3.14347826086956, 3.08695652173913,
3.03043478260869, 2.97391304347826, 2.91739130434782, 2.86086956521739,
2.80434782608696, 2.74782608695652, 2.69130434782609, 2.63478260869565,
2.57826086956522, 2.52173913043478, 2.46521739130435, 2.40869565217391,
2.35217391304348, 2.29565217391304, 2.23913043478261, 2.18260869565217,
2.12608695652174, 2.0695652173913, 2.01304347826087, 1.95652173913043,
1.9, 1.84347826086956, 1.78695652173913, 1.7304347826087, 1.67391304347826,
1.61739130434783, 1.56086956521739, 1.50434782608696, 1.44782608695652,
1.39130434782609, 1.33478260869565, 1.27826086956522, 1.22173913043478,
1.16521739130435, 1.10869565217391, 1.05217391304348, 0.995652173913044,
0.939130434782609, 0.882608695652173, 0.826086956521738, 0.769565217391303,
0.713043478260868, 0.656521739130433, 0.600000000000001, 0.543478260869566,
0.486956521739131, 0.430434782608696, 0.373913043478261, 0.317391304347826,
0.260869565217391, 0.204347826086956, 0.14782608695652, 0.0913043478260853,
0.0347826086956502, -0.0217391304347814, -0.0782608695652165,
-0.134782608695652, -0.191304347826087, -0.247826086956522, -0.304347826086957,
-0.360869565217392, -0.417391304347827, -0.473913043478262, -0.530434782608694,
-0.586956521739133, -0.643478260869564, -0.699999999999999, -0.756521739130434,
-0.81304347826087, -0.869565217391305, -0.92608695652174, -0.982608695652175,
-1.03913043478261, -1.09565217391305, -1.15217391304348, -1.20869565217392,
-1.26521739130435, -1.32173913043478, -1.37826086956522, -1.43478260869565,
-1.49130434782609, -1.54782608695652, -1.60434782608696, -1.66086956521739,
-1.71739130434783, -1.77391304347826, -1.83043478260869, -1.88695652173913,
-1.94347826086956, -2), s = 8:100, SDV_1 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
-0.0875, 0.0375, 0.1625, 0.2875, 0.4125, 0.5375, 0.6625, 0.7875,
0.9125, 1.0375, 1.1625, 1.2875, 1.4125, 1.5375, 1.6625, 1.7875,
1.9125, 2.0375, 2.1625, 2.2875, 2.4125, 2.5375, 2.6625, 2.7875,
2.9125, 3.0375, 3.1625, 3.2875, 3.4125, 3.5375, 3.6625, 3.7875,
3.9125, 4.0375, 4.1625, 4.2875, 4.4125, 4.53994565217391, 4.67130434782609,
4.80266304347826, 4.93402173913043, 5.06538043478261, 5.19673913043478,
5.32809782608696, 5.45945652173913, 5.5908152173913, 5.72217391304348,
5.85353260869565, 5.98489130434783, 6.11625, 6.24760869565217,
6.37896739130435, 6.51032608695652, 6.6416847826087, 6.77304347826087,
6.90440217391304, 7.03576086956522, 7.16711956521739, 7.29847826086957,
7.42983695652174, 7.56119565217391, 7.69255434782609, 7.82391304347826,
7.95527173913043, 8.08663043478261, 8.21798913043478, 8.34934782608696,
8.48070652173913, 8.6120652173913, 8.74342391304348, 8.87478260869565,
9.00614130434783, 9.1375), SDX_1 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 31.5, 31.5, 31.5,
31.5, 31.5, 31.5, 31.5, 31.4804347826087, 31.4295652173913, 31.3786956521739,
31.3278260869565, 31.2769565217391, 31.2260869565217, 31.1752173913043,
31.124347826087, 31.0734782608696, 31.0226086956522, 30.9717391304348,
30.9208695652174, 30.87, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), CLDV_1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0.619176470588235, 0.646767058823529, 0.675703529411765,
0.705985882352941, 0.737614117647059, 0.770588235294118, 0.804908235294118,
0.840574117647059, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), OPDV_1 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.619176470588235,
-0.646767058823529, -0.675703529411765, -0.705985882352941, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), row.names = c(NA, -93L), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("BX", "dv", "s", "SDV_1", "SDX_1",
"CLDV_1", "OPDV_1"))
Please guide me if there is any other function in ggplot2 or extensions that I can use to fill in correctly. Or, if possible, how can I use geom_ribbon() effectively in this case?
Use geom_polygon to fill the white area between the OPDV, SDX, SDV and BX curves.
Create data to draw the white polygon
library(tidyr)
# Change data to a long format
ddflong <- ddf %>%
gather(key, value, -dv, -s)
# Extract data for each polygon side
sideOPDV <- ddflong %>%
filter(key =="OPDV_1" & !is.na(value)) %>%
transmute(x = value, y = s, key = key) %>%
arrange(y)
sideSDX <- ddflong %>%
filter(key =="SDX_1" & !is.na(value)) %>%
transmute(x = dv, y = value, key = key) %>%
arrange(x)
sideSDV <- ddflong %>%
filter(key =="SDV_1" & !is.na(value) & value < max(sideSDX$x)) %>%
transmute(x = value, y = s, key = key) %>%
arrange(desc(y))
sideBX <- ddflong %>%
filter(key == "BX" & dv > max(sideOPDV$x) & dv < min(sideSDV$x)) %>%
transmute(x = dv, y = value, key = key) %>%
arrange(desc(x))
# Combine all sides in one polygon
datapolygon <- rbind(sideOPDV, sideSDX, sideSDV, sideBX)
Draw the plot
ggplot(data = ddf) +
geom_ribbon(aes(ymin=BX,ymax=60, x=dv), fill="green") +
geom_ribbon(aes(ymin=BX,ymax=s, x=SDV_1), fill="orange") +
#### Here is the new instruction ####
geom_polygon(data = datapolygon, aes(x = x, y =y), fill="white") +
# Added colours to identify the lines
geom_path(aes(x = CLDV_1, y = s), colour = "yellow")+
geom_path(aes(x = OPDV_1, y = s), colour = "purple") +
geom_path(aes(x = SDV_1, y = s), colour = "dark green") +
geom_path(aes(x = dv, y = BX), colour = "blue") +
geom_path(aes(x = dv, y = SDX_1), colour = "red") +
annotate(geom = "text", x = 1, y = 30, label = "CLDV", colour = "yellow") +
annotate(geom = "text", x = -0.9, y = 29, label = "OPDV", colour = "purple") +
annotate(geom = "text", x = 1.2, y = 40, label = "SDV", colour = "dark green") +
annotate(geom = "text", x = -0.2, y = 26, label = "BX", colour = "blue") +
annotate(geom = "text", x = -0.3, y = 32, label = "SDX", colour = "red") +
labs(y = "Spacing (m)", x = "Relative Speed (Vf - Vl), m/s") +
coord_cartesian(ylim = c(25, 50),
xlim = c(-2,3.2))

Calculate percentage to total using rowPercents

I am trying to calculate a percentage to total for, lets say, the following reproducible example:
structure(c(197.95, 197.95, 197.95, 186.8, 190.51, 195.16, 199.81,
202.59, 202.59, 202.59, 92.28, 92.28, 90.07, 89.82, 87.36, 87.61,
90.56, 89.82, 90.07, 89.82, 20.43, 20.43, 20.43, 20.43, 20.43,
20.43, 20.43, 20.43, 20.43, 20.64, 24.7, 24.95, 24.54, 23.97,
23.97, 24.38, 24.38, 24.38, 24.54, 24.54, 37.4, 37.4, 37.4, 35.43,
35.43, 35.43, 35.43, 35.43, 35.43, 39.37, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 16.05,
16.05, 16.05, 16.05, 15.62, 15.62, 16.05, 15.62, 15.62, 15.62,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), index = structure(c(470620800,
470880000, 470966400, 471052800, 471139200, 471225600, 471484800,
471571200, 471657600, 471744000), tzone = "UTC", tclass = "Date"), .indexCLASS = "Date", .indexTZ = "UTC", tclass = "Date", tzone = "UTC", class = c("xts",
"zoo"), .Dim = c(10L, 9L), .Dimnames = list(NULL, c("AVON", "BA.",
"CMRG", "COB", "MGGT", "QQ.", "RR.", "SNR", "ULE")))
I need to return the same presentation of my data but each value is a percentage of the total of the row it belongs to. I did a lot of research and tried prop.table which returns a subscript error and finally I used rowPercents which is part of RcmdrMisc package. However, I could not find how to let it ignore the NA in my data set.
In the example provides there are two whole columns of NA. I can not drop them as the whole data set has some values for the subsequent rows.
Note the the class of my example is zoo and xts
You don't need any external packages for this.
dat.percent <- dat / rowSums(dat, na.rm = T) * 100
Check that it works:
> all(abs(rowSums(dat.percent, na.rm = T) - 100) < 0.0001)
[1] TRUE
prop.table does not seem to work with xts/zoo objects but this works:
library(xts)
prop.table(coredata(x), 1)
It returns all NAs which is correct since there is an NA in each row (and it is impossible to calculate the proportions without knowing every value). If you want to regard the NA values as zero then:
prop.table( na.fill(coredata(x), 0), 1)

Resources