Mean ±SE in scatterplot of three groups - r

edit: It's occurred to me that standard error cannot / will not be computed by R for the non-HG species (in particular, GB) where n is rather small (n = 4 for GB). Even if this is not the case for my failure to produce the plots, this low n is likely to invalidate standard error calculations. I'd appreciate others' advice.
I've seen a few similar answers, but nothing that closely approximates to what I want to do.
I've successfully used the code located here -
https://community.rstudio.com/t/using-stat-instead-of-dplyr-to-summarize-groups-in-a-ggplot/13916/2
- to produce a biplot showing the mean +/- s.e. whiskers for two groups of samples.
However, I've not been successful in re-working this code for use when there are three groups of samples. Can anyone tell me where I'm going wrong and how to rectify it please?
Thank you very much indeed in advance for advice; much appreciated :-)
Sample data and code (below):
Species d13C lin d34S lin
GB -20.1 8.09
HG -20.1 8.09
HG -19.51 9.46
HG -17.55 16.81
HG -23.72 8.03
HG -23.79 9.02
HG -18.09 8.64
HG -21.98 10.95
HG -18.6 8.5
HG -22.75 10.9
HG -21.7 9.08
HG -25 7.23
HG -17.61 16.56
HG -20.5 10.17
HG -18.14 15.4
GB -22.01 15.78
GB -19.62 12.62
LB -21.69 14.96
LB -25.56 8.4
LB -23.43 13.61
LB -22.92 12.68
LB -23.81 15.47
LB -25.42 8.63
HG -26.11 14.15
HG -20.61 9.27
HG -19.18 15.67
HG -19.76 14.49
HG -17.98 8.77
HG -22.71 9.26
HG -23.33 9.8
HG -22.7 10.84
HG -25.05 14.48
HG -24.63 9.98
HG -21.08 10.53
HG -24.93 8.42
HG -19.62 15.6
HG -23.82 7.7
HG -20.08 8.83
HG -24.36 11.01
HG -24.55 9.25
HG -21.89 16.59
HG -19 14.9
HG -18.96 17.69
HG -24.49 7.73
HG -19.15 8.82
HG -21.68 15.42
HG -23.6 8.03
HG -21.07 7.7
HG -19.56 14.34
HG -18.31 7.19
HG -23.39 14.24
HG -19.48 17.21
HG -18.25 8.71
HG -23.9 12.06
HG -23.19 8.59
HG -25.03 15.09
HG -21.01 11.74
HG -20.44 14.47
HG -24.4 10.5
HG -24.17 13.62
HG -22.41 15.63
HG -23.23 15.03
HG -25.22 13.36
HG -25.74 7.21
HG -18.01 12.47
HG -19.51 13.72
HG -25 14.56
HG -23.8 8.04
HG -21.99 12.24
HG -19.43 15.52
HG -24.93 9.45
HG -24.86 9.27
LB -25.27 12.94
LB -25.58 12.73
HG -26.02 10.28
HG -24.02 15.89
GB -24.85 7.14
LB -23.24 16.17
library(pillar)
library(rlang)
library (dplyr)
library(ggplot2)
library(ggstance)
fnc = function(data, group, x, y, z, adj=1) {
group=enquo(group)
x = enquo(x)
y = enquo(y)
z = enquo(z)
# Set size of whisker end caps
wv = data %>% pull(!!y) %>% range %>% diff/100*adj
wh = data %>% pull(!!x) %>% range %>% diff/100*adj
# If grouping variable is numeric, turn it into a factor
if(data %>% pull(!!group) %>% is.numeric) {
data = data %>%
mutate(!!quo_name(group) := factor(!!group))
}
# Generate column names for the x and y means that we'll calculate below
xmean = paste0(quo_text(x), "_mean")
ymean = paste0(quo_text(y), "_mean")
zmean = paste0(quo_text(z), "_mean")
left_join(data,
data %>%
group_by(!!group) %>%
summarise_at(vars(!!x, !!y, !!z) funs(mean=mean))
) %>%
ggplot(aes(colour=!!group)) +
geom_point(aes(x=!!x, y=!!y, z=!!z), size=5) +
theme_classic() +
geom_point(aes(x=!!sym(xmean), y=!!sym(ymean), z=!!sym(zmean)), shape=18, size=3) +
stat_summary(fun.data=mean_se, fun.args=list(mult=1.96),
aes(x=!!sym(xmean), y=!!y, z=!!z),
geom="errorbar", width=wh) +
stat_summaryh(fun.data=mean_se_h, fun.args=list(mult=1.96),
aes(x=!!x, y=!!y, z=!!z, sym(ymean)),
geom="errorbarh", width=wv)
}
##plot the ERROR BAR
fnc(data, Species, d13C.lin, d34S.lin) +
theme(axis.text=element_text(size=12, face = "bold", colour = "black"),
axis.title=element_text(size=17,face="bold", colour = "black")) +
theme(legend.text=element_text(size=14, face = "bold", colour = "black")) +
theme(legend.background = element_rect(fill="white",
size=0.5, linetype="solid",
colour ="black")) +
theme(legend.title = element_text(size = 14, face = "bold", colour = "black")) +
labs(y = expression(delta^{34}*"S"*" (‰)"), x = expression(delta^{13}*"C"*" (‰)"))
This is the error message I receive:
Error in is_call(expr, paren_sym) :
argument "expr" is missing, with no default
As I say, any help greatly appreciated please.

The error comes from the z argument which is not passed as argument in the function.
fnc <- function(data, group, x, y, adj=1) {
group <- enquo(group)
x <- enquo(x)
y <- enquo(y)
# Set size of whisker end caps
wv <- data %>%
pull(!!y) %>%
range %>%
diff/100*adj
wh <- data %>%
pull(!!x) %>%
range %>%
diff/100*adj
# If grouping variable is numeric, turn it into a factor
if(data %>%
pull(!!group) %>%
is.numeric) {
data <- data %>%
mutate(!!quo_name(group) := factor(!!group))
}
# Generate column names for the x and y means that we'll calculate below
xmean <- str_c(as_name(x), "_mean")
ymean <- str_c(as_name(y), "_mean")
left_join(data,
data %>%
group_by(!!group) %>%
summarise_at(vars(!!x, !!y), list(mean = ~ mean(.)))) %>%
ggplot(aes(colour=!!group)) +
geom_point(aes(x=!!x, y=!!y), size=5) +
theme_classic() +
geom_point(aes(x=!!sym(xmean), y=!!sym(ymean)), shape=18, size=3) +
stat_summary(fun.data=mean_se, fun.args=list(mult=1.96),
aes(x=!!sym(xmean), y=!!y),
geom="errorbar", width=wh) +
stat_summaryh(fun.data=mean_se_h, fun.args=list(mult=1.96),
aes(x=!!x, y = !!y),
geom="errorbarh", width=wv)
}
-testing
fnc(df1, Species, d13C.lin, d34S.lin)
data
df1 <- structure(list(Species = c("GB", "HG", "HG", "HG", "HG", "HG",
"HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "GB", "GB",
"LB", "LB", "LB", "LB", "LB", "LB", "HG", "HG", "HG", "HG", "HG",
"HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG",
"HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG",
"HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG",
"HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG", "HG",
"LB", "LB", "HG", "HG", "GB", "LB"), d13C.lin = c(-20.1, -20.1,
-19.51, -17.55, -23.72, -23.79, -18.09, -21.98, -18.6, -22.75,
-21.7, -25, -17.61, -20.5, -18.14, -22.01, -19.62, -21.69, -25.56,
-23.43, -22.92, -23.81, -25.42, -26.11, -20.61, -19.18, -19.76,
-17.98, -22.71, -23.33, -22.7, -25.05, -24.63, -21.08, -24.93,
-19.62, -23.82, -20.08, -24.36, -24.55, -21.89, -19, -18.96,
-24.49, -19.15, -21.68, -23.6, -21.07, -19.56, -18.31, -23.39,
-19.48, -18.25, -23.9, -23.19, -25.03, -21.01, -20.44, -24.4,
-24.17, -22.41, -23.23, -25.22, -25.74, -18.01, -19.51, -25,
-23.8, -21.99, -19.43, -24.93, -24.86, -25.27, -25.58, -26.02,
-24.02, -24.85, -23.24), d34S.lin = c(8.09, 8.09, 9.46, 16.81,
8.03, 9.02, 8.64, 10.95, 8.5, 10.9, 9.08, 7.23, 16.56, 10.17,
15.4, 15.78, 12.62, 14.96, 8.4, 13.61, 12.68, 15.47, 8.63, 14.15,
9.27, 15.67, 14.49, 8.77, 9.26, 9.8, 10.84, 14.48, 9.98, 10.53,
8.42, 15.6, 7.7, 8.83, 11.01, 9.25, 16.59, 14.9, 17.69, 7.73,
8.82, 15.42, 8.03, 7.7, 14.34, 7.19, 14.24, 17.21, 8.71, 12.06,
8.59, 15.09, 11.74, 14.47, 10.5, 13.62, 15.63, 15.03, 13.36,
7.21, 12.47, 13.72, 14.56, 8.04, 12.24, 15.52, 9.45, 9.27, 12.94,
12.73, 10.28, 15.89, 7.14, 16.17)), class = "data.frame",
row.names = c(NA,
-78L))

Related

How do I remove white space from between letters and not numbers?

I have character strings that I want to convert to tables. The identifier in each row can have white spaces and I need them removed without also removing spaces between the numbers. Is it possible to use a regular expression to achieve this?
For example, the data would look like this:
A B C 5.65 7.8
DC 5.65 7.8
D AB 7.9 12.2
D AB C 7.9 1.2
A BC 13.88 2.4
AB C 7.9 12.2
And I want to get to this:
ABC 5.65 7.8
DC 5.65 7.8
DAB 7.9 12.2
DABC 7.9 1.2
ABC 13.88 2.4
ABC 7.9 12.2
EDIT: As requested, this is an example of the data type and the form in which I receive it. This has 16 rows, each with 6 columns of data, but the first column is an alphabetic identifier.
# Data as I receive it.
data <- c("A", "a", "2.07", "2.35", "39.00", "82.20", "8.8", "3.80",
"B", "2.26", "2.25", "40.00", "80.80", "8.1", "1.86", "D",
"Et", "2.07", "2.22", "41.00", "83.80", "8.8", "3.87", "F",
"2.05", "2.15", "43.00", "82.20", "8.4", "3.11", "Bc", "2.08",
"2.12", "48.00", "82.60", "8.3", "2.47", "Gf", "H", "I",
"2.08", "2.10", "46.00", "82.20", "8.1", "2.90", "J", "K",
"1.95", "2.08", "38.00", "83.40", "8.7", "1.63", "L", "M",
"1.89", "2.07", "45.00", "83.80", "9.0", "1.84", "N", "2.06",
"2.05", "41.00", "80.60", "9.0", "4.09", "O", "P", "1.86",
"2.04", "48.00", "81.60", "8.6", "2.60", "Qst", "R", "1.95",
"2.03", "44.00", "82.80", "8.8", "1.40", "S", "2.03", "2.02",
"40.00", "81.40", "8.2", "1.74", "T", "1.95", "2.01", "43.00",
"81.80", "9.0", "2.30", "Unh", "1.96", "2.00", "44.00", "82.60",
"9.2", "2.40", "V", "W", "C", "1.98", "1.97", "40.00",
"82.00", "8.1", "1.15", "Yu", "1.90", "1.96", "41.00", "82.80",
"9.6", "2.08", "Z", "a", "bi", "1.90", "1.95", "42.00",
"84.20", "9.6", "1.69")
# Required format
data2 <- c("Aa", "2.07", "2.35", "39.00", "82.20", "8.8", "3.80",
"B", "2.26", "2.25", "40.00", "80.80", "8.1", "1.86",
"DEt", "2.07", "2.22", "41.00", "83.80", "8.8", "3.87", "F",
"2.05", "2.15", "43.00", "82.20", "8.4", "3.11", "Bc", "2.08",
"2.12", "48.00", "82.60", "8.3", "2.47", "GfHI",
"2.08", "2.10", "46.00", "82.20", "8.1", "2.90", "JK",
"1.95", "2.08", "38.00", "83.40", "8.7", "1.63", "LM",
"1.89", "2.07", "45.00", "83.80", "9.0", "1.84", "N", "2.06",
"2.05", "41.00", "80.60", "9.0", "4.09", "OP", "1.86",
"2.04", "48.00", "81.60", "8.6", "2.60", "QstR", "1.95",
"2.03", "44.00", "82.80", "8.8", "1.40", "S", "2.03", "2.02",
"40.00", "81.40", "8.2", "1.74", "T", "1.95", "2.01", "43.00",
"81.80", "9.0", "2.30", "Unh", "1.96", "2.00", "44.00", "82.60",
"9.2", "2.40", "VWC", "1.98", "1.97", "40.00",
"82.00", "8.1", "1.15", "Yu", "1.90", "1.96", "41.00", "82.80",
"9.6", "2.08", "Zabi", "1.90", "1.95", "42.00",
"84.20", "9.6", "1.69")
df <- data.frame(matrix(data2, ncol=7, byrow=T))
To do as you request within your R environment, one approach is to convert the vector to a string, apply a regular expression filter to the string, then convert the string back to a vector.
See details below, hopefully this points you in the right direction.
Solution
data <- c("A", "a", "2.07", "2.35", "39.00", "82.20", "8.8", "3.80",
"B", "2.26", "2.25", "40.00", "80.80", "8.1", "1.86", "D",
"Et", "2.07", "2.22", "41.00", "83.80", "8.8", "3.87", "F",
"2.05", "2.15", "43.00", "82.20", "8.4", "3.11", "Bc", "2.08",
"2.12", "48.00", "82.60", "8.3", "2.47", "Gf", "H", "I",
"2.08", "2.10", "46.00", "82.20", "8.1", "2.90", "J", "K",
"1.95", "2.08", "38.00", "83.40", "8.7", "1.63", "L", "M",
"1.89", "2.07", "45.00", "83.80", "9.0", "1.84", "N", "2.06",
"2.05", "41.00", "80.60", "9.0", "4.09", "O", "P", "1.86",
"2.04", "48.00", "81.60", "8.6", "2.60", "Qst", "R", "1.95",
"2.03", "44.00", "82.80", "8.8", "1.40", "S", "2.03", "2.02",
"40.00", "81.40", "8.2", "1.74", "T", "1.95", "2.01", "43.00",
"81.80", "9.0", "2.30", "Unh", "1.96", "2.00", "44.00", "82.60",
"9.2", "2.40", "V", "W", "C", "1.98", "1.97", "40.00",
"82.00", "8.1", "1.15", "Yu", "1.90", "1.96", "41.00", "82.80",
"9.6", "2.08", "Z", "a", "bi", "1.90", "1.95", "42.00",
"84.20", "9.6", "1.69")
# Use stringi base regular expression engine
require(stringi)
# Convert the vector data to be a string sequence - so we can manipulate as text
data1 <- toString(data)
# Now we can apply the regular expression substitution to the data (formatted as a string...
# Here we do a:
#
# (?<!\d) - Negative look behind to prevent a digit.
# , - A literal combination of quotes, comma and space. We drop the ", " in conversion to string...
# (?!\d) - Negative look ahead to prevent a digit.
#
data3 = stri_replace_all_regex(str = data1, pattern = '(?<!\\d), (?!\\d)', replacement = '')
# OK, check the string data...
data3
# Now we convert the string back to be a vector...
newData = strsplit(data3, " ")[[1]]
newData
# Now we convert to a dataframe...
df <- data.frame(matrix(newData, ncol=7, byrow=T))
df
# Done
Output
> data <- c("A", "a", "2.07", "2.35", "39.00", "82.20", "8.8", "3.80",
+ "B", "2.26", "2.25", "40.00", "80.80", "8.1", "1.86", "D",
+ "Et", "2.07", "2.22", "41.00", "83.80", "8.8", "3.87", "F",
+ "2.05", "2.15", "43.00", "82.20", "8.4", "3.11", "Bc", "2.08",
+ "2.12", "48.00", "82.60", "8.3", "2.47", "Gf", "H", "I",
+ "2.08", "2.10", "46.00", "82.20", "8.1", "2.90", "J", "K",
+ "1.95", "2.08", "38.00", "83.40", "8.7", "1.63", "L", "M",
+ "1.89", "2.07", "45.00", "83.80", "9.0", "1.84", "N", "2.06",
+ "2.05", "41.00", "80.60", "9.0", "4.09", "O", "P", "1.86",
+ "2.04", "48.00", "81.60", "8.6", "2.60", "Qst", "R", "1.95",
+ "2.03", "44.00", "82.80", "8.8", "1.40", "S", "2.03", "2.02",
+ "40.00", "81.40", "8.2", "1.74", "T", "1.95", "2.01", "43.00",
+ "81.80", "9.0", "2.30", "Unh", "1.96", "2.00", "44.00", "82.60",
+ "9.2", "2.40", "V", "W", "C", "1.98", "1.97", "40.00",
+ "82.00", "8.1", "1.15", "Yu", "1.90", "1.96", "41.00", "82.80",
+ "9.6", "2.08", "Z", "a", "bi", "1.90", "1.95", "42.00",
+ "84.20", "9.6", "1.69")
>
> # Use stringi base regular expression engine
> require(stringi)
>
> # Convert the vector data to be a string sequence - so we can manipulate as text
> data1 <- toString(data)
>
> # Now we can apply the regular expression substitution to the data (formatted as a string...
> # Here we do a:
> #
> # (?<!\d) - Negative look behind to prevent a digit.
> # , - A literal combination of quotes, comma and space. We drop the ", " in conversion to string...
> # (?!\d) - Negative look ahead to prevent a digit.
> #
> data3 = stri_replace_all_regex(str = data1, pattern = '(?<!\\d), (?!\\d)', replacement = '')
> # OK, check the string data...
> data3
[1] "Aa, 2.07, 2.35, 39.00, 82.20, 8.8, 3.80, B, 2.26, 2.25, 40.00, 80.80, 8.1, 1.86, DEt, 2.07, 2.22, 41.00, 83.80, 8.8, 3.87, F, 2.05, 2.15, 43.00, 82.20, 8.4, 3.11, Bc, 2.08, 2.12, 48.00, 82.60, 8.3, 2.47, GfHI, 2.08, 2.10, 46.00, 82.20, 8.1, 2.90, JK, 1.95, 2.08, 38.00, 83.40, 8.7, 1.63, LM, 1.89, 2.07, 45.00, 83.80, 9.0, 1.84, N, 2.06, 2.05, 41.00, 80.60, 9.0, 4.09, OP, 1.86, 2.04, 48.00, 81.60, 8.6, 2.60, QstR, 1.95, 2.03, 44.00, 82.80, 8.8, 1.40, S, 2.03, 2.02, 40.00, 81.40, 8.2, 1.74, T, 1.95, 2.01, 43.00, 81.80, 9.0, 2.30, Unh, 1.96, 2.00, 44.00, 82.60, 9.2, 2.40, VWC, 1.98, 1.97, 40.00, 82.00, 8.1, 1.15, Yu, 1.90, 1.96, 41.00, 82.80, 9.6, 2.08, Zabi, 1.90, 1.95, 42.00, 84.20, 9.6, 1.69"
>
> # Now we convert the string back to be a vector...
> newData = strsplit(data3, " ")[[1]]
> newData
[1] "Aa," "2.07," "2.35," "39.00," "82.20," "8.8," "3.80," "B," "2.26," "2.25," "40.00," "80.80,"
[13] "8.1," "1.86," "DEt," "2.07," "2.22," "41.00," "83.80," "8.8," "3.87," "F," "2.05," "2.15,"
[25] "43.00," "82.20," "8.4," "3.11," "Bc," "2.08," "2.12," "48.00," "82.60," "8.3," "2.47," "GfHI,"
[37] "2.08," "2.10," "46.00," "82.20," "8.1," "2.90," "JK," "1.95," "2.08," "38.00," "83.40," "8.7,"
[49] "1.63," "LM," "1.89," "2.07," "45.00," "83.80," "9.0," "1.84," "N," "2.06," "2.05," "41.00,"
[61] "80.60," "9.0," "4.09," "OP," "1.86," "2.04," "48.00," "81.60," "8.6," "2.60," "QstR," "1.95,"
[73] "2.03," "44.00," "82.80," "8.8," "1.40," "S," "2.03," "2.02," "40.00," "81.40," "8.2," "1.74,"
[85] "T," "1.95," "2.01," "43.00," "81.80," "9.0," "2.30," "Unh," "1.96," "2.00," "44.00," "82.60,"
[97] "9.2," "2.40," "VWC," "1.98," "1.97," "40.00," "82.00," "8.1," "1.15," "Yu," "1.90," "1.96,"
[109] "41.00," "82.80," "9.6," "2.08," "Zabi," "1.90," "1.95," "42.00," "84.20," "9.6," "1.69"
>
> # Now we convert to a dataframe...
> df <- data.frame(matrix(newData, ncol=7, byrow=T))
> df
X1 X2 X3 X4 X5 X6 X7
1 Aa, 2.07, 2.35, 39.00, 82.20, 8.8, 3.80,
2 B, 2.26, 2.25, 40.00, 80.80, 8.1, 1.86,
3 DEt, 2.07, 2.22, 41.00, 83.80, 8.8, 3.87,
4 F, 2.05, 2.15, 43.00, 82.20, 8.4, 3.11,
5 Bc, 2.08, 2.12, 48.00, 82.60, 8.3, 2.47,
6 GfHI, 2.08, 2.10, 46.00, 82.20, 8.1, 2.90,
7 JK, 1.95, 2.08, 38.00, 83.40, 8.7, 1.63,
8 LM, 1.89, 2.07, 45.00, 83.80, 9.0, 1.84,
9 N, 2.06, 2.05, 41.00, 80.60, 9.0, 4.09,
10 OP, 1.86, 2.04, 48.00, 81.60, 8.6, 2.60,
11 QstR, 1.95, 2.03, 44.00, 82.80, 8.8, 1.40,
12 S, 2.03, 2.02, 40.00, 81.40, 8.2, 1.74,
13 T, 1.95, 2.01, 43.00, 81.80, 9.0, 2.30,
14 Unh, 1.96, 2.00, 44.00, 82.60, 9.2, 2.40,
15 VWC, 1.98, 1.97, 40.00, 82.00, 8.1, 1.15,
16 Yu, 1.90, 1.96, 41.00, 82.80, 9.6, 2.08,
17 Zabi, 1.90, 1.95, 42.00, 84.20, 9.6, 1.69
> # Done

Getting an error when using gather to create clustered bar chart

I am trying to create a clustered bar chart and am trying to use the gather function to get the correct bars to group together. When I do this, I get the error that the gather function could not be found. I have dplyr and magrittr installed. Any thoughts on how to make this work or if there is a better way to create the bar chart grouped by test and date?
Plaster <- Plaster_2019_Data %>%
gather("pH", "Temperature", "Surface", -Date)
Data:
Surface pH Temperature Date
12.08 8.56 11.16 5/13/2019
11.68 8.90 8.76 5/29/2019
8.69 9.07 14.65 6/10/2019
2.26 7.49 17.51 6/24/2019
4.54 7.77 23.82 7/8/2019
2.13 8.17 25.29 8/5/2019
6.34 8.62 26.50 8/19/2019
9.33 9.03 24.31 9/4/2019
10.98 8.58 21.02 9/16/2019
9.59 8.61 17.33 9/30/2019
16.07 8.70 10.39 10/14/2019
9.12 8.07 6.38 11/14/2019
We can use require to install and load the package tidyr as gather is from tidyr
require('tidyr')
As mentioned by #akrun, you need tidyr. Moreover, the function pivot_longer is dedicated to replace in a near future the function gather (https://tidyr.tidyverse.org/reference/gather.html).
Moreover, as the range of values between your test is quite different, I would suggest to use facet_wrap to make a nice plot.
Altogether, you can write something like that:
df$Date = as.Date(df$Date, format = "%m/%d/%Y")
library(tidyr)
library(ggplot2)
library(dplyr)
df %>% pivot_longer(., -Date, names_to = "Test", values_to = "value") %>%
ggplot(aes(x = Date, y = value, fill = Test))+
geom_bar(stat = "identity", position = position_dodge())+
facet_wrap(.~Test, scales = "free") +
scale_x_date(date_labels = "%b %d",
date_breaks = "2 weeks")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Data
structure(list(Surface = c(12.08, 11.68, 8.69, 2.26, 4.54, 2.13,
6.34, 9.33, 10.98, 9.59, 16.07, 9.12), pH = c(8.56, 8.9, 9.07,
7.49, 7.77, 8.17, 8.62, 9.03, 8.58, 8.61, 8.7, 8.07), Temperature = c(11.16,
8.76, 14.65, 17.51, 23.82, 25.29, 26.5, 24.31, 21.02, 17.33,
10.39, 6.38), Date = structure(c(18029, 18045, 18057, 18071,
18085, 18113, 18127, 18143, 18155, 18169, 18183, 18214), class = "Date")), row.names = c(NA,
-12L), class = "data.frame")

Problem with ggplot: labels and error bars overlap

I made a barplot with error bars and labels written on the bars.
My problem is: I want the labels to appear on the bars and also next to the error bars. That is, I don't want labels and error bars to overlap.
An example with my code:
i <- data.frame(
nbr =c(15.18 ,11.53 ,13.37 ,9.2, 10.9, 12.23 ,9.53, 9.81, 7.86, 12.79,
22.03 ,17.64 ,18.1, 16.78 ,17.53 ,16.97 ,17.76 ,18.35 ,12.82 ,20.91,
22.09 ,19.18 ,17.54 ,18.45 ,19.83 ,16.99 ,19.69 ,19.45 ,13.07 ,21.41,
12.13 ,9.76, 10.79 ,10.74 ,12.43 ,9.65, 12.18 ,11.63 ,6.74, 12.31,
17.5, 14.75 ,15.2, 13.89 ,15.24 ,17.43 ,15.22 ,14.04,9.49, 15.86,
8.09, 5.86, 6.68, 7.34, 8.01, 6.35, 8.4, 7.4, 3.88, 6.92 ),
SD = c(4.46, 4.19, 2.27, 2.19, 5.10, 7.25, 8.42, 6.47, 6.04, 7.48, 6.38, 6.05, 3.58, 3.85,
6.94, 6.87, 6.32, 4.28, 4.10, 7.34, 7.46, 6.62, 4.28, 5.24, 8.00, 8.10, 7.73, 5.18,
5.53, 7.96, 7.46, 7.05, 4.47, 4.73, 8.15, 6.95, 5.88, 3.20, 4.01, 7.34, 7.24, 6.98,
5.98, 4.53, 4.22, 7.21, 4.02, 4.30, 1.96, 2.11, 4.98, 7.16, 8.45, 6.39, 6.20, 7.03,
6.10, 6.42, 3.77, 3.53),
x2=rep(c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j"),6),
s = c(rep(c(rep(c("3"),10),
rep(c("4"),10),
rep(c("5"),10),
rep(c("6"),10),
rep(c("7"),10),
rep(c("8"),10)),1)))
ii <- i[order(i$s, i$nbr ), ]
sn <- factor(x = 1:60, labels = ii$x2)
ii$sn <- sn
scale_x_reordered <- function(..., sep = "___") {
reg <- paste0(sep, ".+$")
ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}
reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
new_x <- paste(x, within, sep = sep)
stats::reorder(new_x, by, FUN = fun)
}
dummy2 <- data.frame(s = levels(i$s)[-1], Z = c( 4,16,16,8,4))
dummy2$s <- factor(dummy2$s)
ggplot(ii, aes(reorder_within(sn, nbr, s), nbr,
label =x2)) +
geom_bar(stat = 'identity') +
geom_text(aes(y = 0,fontface=2), angle = 90, hjust = -.05, size = 4)+
scale_x_reordered() +
facet_wrap(.~ s, scales = "free_x", ncol=2)+
#geom_text(aes(label=nbr), vjust=1.6, color="white", size=3.5)+
theme(axis.text.x = element_blank(),
axis.title=element_text(size=16),
axis.text=element_text(face = "bold"),
strip.text.x = element_text(size = 14,face="bold")
)+ geom_errorbar(aes(reorder_within(sn, nbr, s),ymin=nbr-SD, ymax=nbr+SD), width=.2, position=position_dodge(.9))
Example of expected parcel:
I want all the labels to be written next to the error bars on the bars.
Thanks for your help !
I found this solution and wanted to share it with you:
geom_text(aes(y = 0,fontface=2), angle = 90, vjust = -1, hjust = -.05, size = 4)

How to Plot Time-series data on Horizontal bar in R?

R::How to Plot Single Horizontal Bar Showing different stages on Continous Time-series Data from startdate to present date and for navigating time a horizontal scrollbar in R?
This is my data:
var_events time_date event_duration veh_id
LD 17-06-2018 13:25 6.52 B33
WL 17-06-2018 13:25 14.52 B31
TL 17-06-2018 13:26 0.32 B32
TE 17-06-2018 13:26 4.58 B13
UL 17-06-2018 13:26 3.45 B12
WT 17-06-2018 13:26 5.46 B25
UL 17-06-2018 13:26 1.56 B17
TL 17-06-2018 13:26 13.6 B33
SL 17-06-2018 13:26 0.05 B32
Here is a Example of line chart of Previous code:
require(ggplot2)
require(dplyr)
df = structure(list(Event_stage = c("SE", "MN", "MN", "TE", "TE", "TE", "TE", "TE", "TE", "TE", "TE", "WL", "TE", "TE", "SE", "TE", "TE", "WL", "WT", "MN", "WL", "TE", "WL", "WL", "WT", "WL", "LD", "WT", "WL", "WT", "WT", "TE", "WL", "LD", "WT", "LD", "MN", "TL", "TE", "WL", "TL", "TL", "WT", "TE", "TE", "LD", "WT", "TL", "LD" ), event_date = structure(c(1529573704, 1529573710, 1529573713, 1529573724, 1529573855, 1529573874, 1529573880, 1529573895, 1529573906, 1529573918, 1529573925, 1529573931, 1529573931, 1529573941, 1529573947, 1529573969, 1529574006, 1529574054, 1529574088, 1529574114, 1529574120, 1529574123, 1529574134, 1529574137, 1529574148, 1529574163, 1529574164, 1529574148, 1529574169, 1529574170, 1529574178, 1529574188, 1529574189, 1529574196, 1529574178, 1529574188, 1529574203, 1529574213, 1529574214, 1529574214, 1529574215, 1529574227, 1529574231, 1529574242, 1529574244, 1529574245, 1529574248, 1529574260, 1529574262), class = c("POSIXct", "POSIXt"), tzone = "UTC"), stage_duration = c(3.78, 3.47, 2.78, 3.45, 3.32, 4.93, 4.23, 4.22, 3.85, 3.37, 5.88, 5.92, 3.97, 3.7, NA, 4.08, 3.05, 0.57, 11.18, 12.08, 2.6, 3.3, 0.23, 0.85, 0.27, 0.25, 0.82, 10.42, 0.15, 0.43, 1.4, 0.25, 0.7, 0.52, 1.12, 0.45, 12.87, 12.18, 2.92, 0.57, 14.07, 12.72, 17.12, 4.13, 3.13, 0.25, 0.33, 18.98, 1.05), veh_id = c("B35", "B05", "B04", "B08", "B14", "B13", "B04", "B17", "B41", "B05", "B26", "B08", "B35", "B19a", "B10a", "B01a", "B28", "B14", "B14", "B18", "B05", "B37", "B04", "B41", "B04", "B19a", "B04", "B17", "B35", "B13", "B35", "B02b", "B28", "B13", "B19a", "B41", "B02b", "B04", "B15", "B01a", "B41", "B13", "B28", "B27", "B33", "B19a", "B01a", "B19a", "B35")), .Names = c("Event_stage", "event_date", "stage_duration", "veh_id"), row.names = c(NA, -49L), class = c("tbl_df", "tbl", "data.frame"))
# create ggplot
ggplot(data = df %>% filter(veh_id == "B35"), aes(x = event_date,
y = stage_duration)) +
geom_point(aes(color = Event_stage), size= 3) +
geom_line(alpha = 1/2)+
labs(x = "Event date", y = "Stage duration")
enter image description here
This is Sample bar plot, Everything same as in above line chart but instead of line with spikes a Horizontal line or I just want a single bar which is interactive with a Slider/Scrollbar to navigate time ::
enter image description here
Something resembling this plot,But only a Single Horizontal bar with a scrollbar from start-time to present-time::
enter image description here
df %>% filter(veh_id == "B35") %>%
ggplot(
aes(
x = event_date,
y = stage_duration)
) +
geom_bar(stat = "identity") +
labs(x = "Event date", y = "Stage duration") +
coord_flip()

Rolling Regression Data Frame

Appreciate this may have been asked before but I have not found a clear solution to work over a data frame.
I want to run a rolling linear regression over a look back of 5 days. (small so can illustrate here)
So far I am trying:
rollingbeta <- rollapply(df,
width=5,
FUN = function(Z)
{
t = lm(formula=y_Close ~ x_Close+0, data = as.data.frame(Z));
return(t$coef)[1]
},
by.column=FALSE, align="right",fill = NA)
head(rollingbeta,100)
However, I expect to have the beta for the rolling lookback window. Instead I have and output with 10 columns.
> NCOL(rollingbeta)
[1] 10
Can anyone assist?
Here is dummy data (save to .txt and read)
df <- read.table("your_dir\df.txt",header=TRUE, sep="", stringsAsFactors=FALSE)
Date open.x high.x low.x x_Close volume.x open.y high.y low.y y_Close volume.y x.y.cor
1451 2010-01-04 57.32 58.13 57.32 57.85 442900 6.61 6.8400 6.61 6.83 833100 NA
1452 2010-01-05 57.90 58.33 57.54 58.20 436900 6.82 7.1200 6.80 7.12 904500 NA
1453 2010-01-06 58.20 58.56 58.01 58.42 850600 7.05 7.3800 7.05 7.27 759800 NA
1454 2010-01-07 58.31 58.41 57.14 57.90 463600 7.24 7.3000 7.06 7.11 557800 NA
1455 2010-01-08 57.45 58.62 57.45 58.47 206500 7.08 7.3500 6.95 7.29 588100 NA
1456 2010-01-11 58.79 59.00 57.22 57.73 331900 7.38 7.4500 7.17 7.22 450500 NA
1457 2010-01-12 57.20 57.21 56.15 56.34 428500 7.15 7.1900 6.87 7.00 694700 NA
1458 2010-01-13 56.32 56.66 54.83 56.56 577500 7.05 7.1700 6.98 7.15 528800 NA
1459 2010-01-14 56.51 57.05 55.37 55.53 368100 7.08 7.1701 7.08 7.11 279900 NA
1460 2010-01-15 56.59 56.59 55.19 55.84 417900 7.03 7.0500 6.95 7.03 407600 NA
The output should for the first rolling linear regression should be:
NA NA NA NA NA 0.1229065
A faster alternative than wibeasley's answer is to use the rollRegres package as follows
ds <- structure(list(Date = structure(
c(14613, 14614, 14615, 14616, 14617, 14620, 14621, 14622, 14623, 14624), class = "Date"),
open.x = c(57.32, 57.9, 58.2, 58.31, 57.45, 58.79, 57.2, 56.32, 56.51, 56.59),
high.x = c(58.13, 58.33, 58.56, 58.41, 58.62, 59, 57.21, 56.66, 57.05, 56.59),
low.x = c(57.32, 57.54, 58.01, 57.14, 57.45, 57.22, 56.15, 54.83, 55.37, 55.19),
x_Close = c(57.85, 58.2, 58.42, 57.9, 58.47, 57.73, 56.34, 56.56, 55.53, 55.84),
volume.x = c(442900L, 436900L, 850600L, 463600L, 206500L, 331900L, 428500L, 577500L, 368100L, 417900L),
open.y = c(6.61, 6.82, 7.05, 7.24, 7.08, 7.38, 7.15, 7.05, 7.08, 7.03),
high.y = c(6.84, 7.12, 7.38, 7.3, 7.35, 7.45, 7.19, 7.17, 7.1701, 7.05),
low.y = c(6.61, 6.8, 7.05, 7.06, 6.95, 7.17, 6.87, 6.98, 7.08, 6.95),
y_Close = c(6.83, 7.12, 7.27, 7.11, 7.29, 7.22, 7, 7.15, 7.11, 7.03),
volume.y = c(833100L, 904500L, 759800L, 557800L, 588100L, 450500L, 694700L, 528800L, 279900L, 407600L)),
row.names = c(NA, -10L), class = "data.frame")
# we get the same
library(roll)
library(rollRegres)
X <- as.matrix(ds$x_Close)
Y <- ds$y_Close
Ymat <- as.matrix(Y)
all.equal(
roll_lm(x = X, y = Ymat, intercept = FALSE, width = 5L)$coefficients[, 2],
drop(roll_regres.fit(x = X, y = Y, width = 5L)$coefs),
check.attributes = FALSE)
#R [1] TRUE
You can also fit the model with a formula as with lm using the roll_regres function
all.equal(
roll_lm(x = X, y = Ymat, intercept = FALSE, width = 5L)$coefficients[, 2],
drop(roll_regres(y_Close ~ x_Close - 1, ds, width = 5L)$coefs),
check.attributes = FALSE)
#R [1] TRUE
Here is a benchmark of the computation speed
# We add a few more observation to get an interesting example
set.seed(1)
n <- 250 * 5 # 5 years of trading data
X <- as.matrix(rnorm(n))
Y <- rnorm(n)
Ymat <- as.matrix(Y)
microbenchmark::microbenchmark(
roll_lm(x = X, y = Ymat, intercept = FALSE, width = 5L),
roll_regres.fit(x = X, y = Y, width = 5L),
times = 1e3)
#R Unit: microseconds
#R expr min lq mean median uq max neval
#R roll_lm(x = X, y = Ymat, intercept = FALSE, width = 5L) 663.7 739.9 834.2 777.1 860.2 3972.3 1000
#R roll_regres.fit(x = X, y = Y, width = 5L) 186.9 204.6 237.4 224.8 248.3 546.4 1000
Consider using the roll package.
library(magrittr); requireNamespace("roll")
ds <- readr::read_csv(
" Date, open.x, high.x, low.x, x_Close, volume.x, open.y, high.y, low.y, y_Close, volume.y
2010-01-04, 57.32, 58.13, 57.32, 57.85, 442900, 6.61, 6.8400, 6.61, 6.83, 833100
2010-01-05, 57.90, 58.33, 57.54, 58.20, 436900, 6.82, 7.1200, 6.80, 7.12, 904500
2010-01-06, 58.20, 58.56, 58.01, 58.42, 850600, 7.05, 7.3800, 7.05, 7.27, 759800
2010-01-07, 58.31, 58.41, 57.14, 57.90, 463600, 7.24, 7.3000, 7.06, 7.11, 557800
2010-01-08, 57.45, 58.62, 57.45, 58.47, 206500, 7.08, 7.3500, 6.95, 7.29, 588100
2010-01-11, 58.79, 59.00, 57.22, 57.73, 331900, 7.38, 7.4500, 7.17, 7.22, 450500
2010-01-12, 57.20, 57.21, 56.15, 56.34, 428500, 7.15, 7.1900, 6.87, 7.00, 694700
2010-01-13, 56.32, 56.66, 54.83, 56.56, 577500, 7.05, 7.1700, 6.98, 7.15, 528800
2010-01-14, 56.51, 57.05, 55.37, 55.53, 368100, 7.08, 7.1701, 7.08, 7.11, 279900
2010-01-15, 56.59, 56.59, 55.19, 55.84, 417900, 7.03, 7.0500, 6.95, 7.03, 407600"
)
runs <- roll::roll_lm(
x = as.matrix(ds$x_Close),
y = as.matrix(ds$y_Close),
width = 5,
intercept = FALSE
)
# Nested in a named-column, within a matrix, within a list.
ds$beta <- runs$coefficients[, "x1"]
ds$beta
# [1] NA NA NA NA 0.1224813
# [6] 0.1238653 0.1242478 0.1246279 0.1256553 0.1259121
Double-check the alignment of the variables in your dataset. x_Close is around 50, while y_Close is around 7. That might explain the small disparity between the expected 0.1229065 and the 0.1224813 value above.

Resources