df = data.frame(a = c(0, 0), b = c(17, 15),
c = c(35,37), d = c(55,57),
e = c(80, 85), x = c(1, 2),
w1 = c(20, 30), w2 = c(0.2, 0.3))
ggplot(df) +
geom_boxplot(aes(x = x, ymin = a, lower = b, middle = c, upper = d, ymax = e),
stat = "identity")
I have a dataframe containing the values of each quantile for a boxplot, (a-e).
Is it possible use columns w1 or w2 to define the width of the boxplots in ggplot?
My desired result is similar to using varwidth in graphics::boxplot but with custom widths.
graphics::boxplot(mpg~gear, mtcars, varwidth = T)
Don't think this is a duplicate since it seems like the weight argument doesn't work with stat = identity.
Looks like it can be done by using stat_summary.
df = data.frame(a = c(0, 0), b = c(17, 15),
c = c(35,37), d = c(55,57),
e = c(80, 85), x = factor(c(1, 2)),
w = c(0.2, 0.3))
df2 = reshape2::melt(data = df, id = "x")
ff = function(x){
data.frame(
ymin = x[1],
lower = x[2],
middle = x[3],
upper = x[4],
ymax = x[5],
width = x[6]
)
}
ggplot(df2, aes(x, value)) + stat_summary(fun.data = ff, geom = "boxplot")
But i am not sure if this is the best way to do it.
Related
I would like to plot an angle between two lines using ggplot2, meaning something similar to the bold red line in the plot below. Is there an easy solution to this?
Data and code to make the plot without the red line:
library(tidyverse)
df <- tibble(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5))
ggplot(
df, aes(x, y, group = line))+
geom_path()
have a look at geom_curve, e.g. :
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve(aes(x = 1.5, y = 2, xend = 2, yend = 1.5), curvature = -0.5, color = "red", size = 3)
You will have to tweak it a bit to use it in a more robust, automatic way, for example:
red_curve <- df %>%
group_by(line) %>%
summarise( avg_x = mean(x),
avg_y = mean(y))
ggplot( df, aes(x, y, group = line))+
geom_path() +
geom_curve( data = red_curve, aes(x = avg_x[1], y = avg_y[1], xend = avg_x[2], yend = avg_y[2]), curvature = 0.5, color = "red", size = 3)
Here is a solution with geom_arc of the ggforce package.
library(ggplot2)
library(ggforce)
angle <- function(p, c){
M <- p - c
Arg(complex(real = M[1], imaginary = M[2]))
}
O <- c(1,1)
P1 <- c(5,3)
P2 <- c(3,5)
a1 <- angle(P1, O)
a2 <- angle(P2, O)
df <- data.frame(
line = c("A", "A", "B", "B"),
x = c(1, 5, 1, 3),
y = c(1, 3, 1, 5)
)
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE)
The arc does not look like a true arc circle. That's because the aspect ratio is not set to 1. To set the aspect ratio to 1:
ggplot(df, aes(x, y, group = line)) +
geom_path() +
geom_arc(aes(x0 = 1, y0 = 1, r = 1, start = a1, end = a2),
color="red", size = 2, inherit.aes = FALSE) +
coord_fixed()
I am trying to use the ks library to calculate the 95% home range for groups within a data set. The problem is that the "break" values which define the cut-off for the 95% contours differ between groups. So far, I have been able to get my plots, but I have to manually add the break values for each group/level and I would really like to find a solution where I can create figures in ggplot where the break values are imported automatically.
require(ks)
require(dplyr)
require(ggplot2)
# define the ks function to pass to a grouped_df
ksFUN = function(data){
H = Hpi(data[,c("x","y")], binned = TRUE) * 1
fhata = kde(data[,c("x","y")], H = H, compute.cont = TRUE,
xmin = c(minX, minY), xmax = c(maxX, maxY))
res95 = data.frame(HR = contourSizes(fhata, cont = 95, approx = TRUE))
dimnames(fhata[['estimate']]) = list(fhata[["eval.points"]][[1]],
fhata[["eval.points"]][[2]])
dat = reshape2::melt(fhata[['estimate']])
dat$breaks50 = fhata[["cont"]]["50%"]
dat$breaks95 = fhata[["cont"]]["5%"]
return(dat)
}
set.seed(100)
# create some data
df1 = data.frame(x = rnorm(100, 0, 5),
y = rnorm(100, 0, 5),
Group = "Test1")
df2 = data.frame(x = rnorm(100, 10, 5),
y = rnorm(100, 10, 5),
Group = "Test2")
df = rbind(df1, df2)
# Set the minimum and maximum x and y values outside
# of the ksFUN to keep the data on the same scale
minX = min(df$x, na.rm = T); maxX = max(df$x, na.rm = T)
minY = min(df$y, na.rm = T); maxY = max(df$y, na.rm = T)
xx = df %>%
group_by(Group) %>%
do(as.data.frame(ksFUN(.)))
# extract the break value for the 95% contour (home range) and 50% (core area)
breaks = xx %>%
group_by(Group) %>%
summarize(breaks95 = mean(breaks95),
breaks50 = mean(breaks50))
breaks
# The only way I have been able to add the breaks is to manually add them
ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group)) +
stat_contour(data = xx[xx$Group == "Test1",], aes(z = value),
breaks = 0.000587, alpha = 0.3, geom = "polygon") +
stat_contour(data = xx[xx$Group == "Test2",], aes(z = value),
breaks = 0.000527, alpha = 0.3, geom = "polygon")
I would really like to find a solution where I don't have to explicitly pass the break values to the stat_contour functions
Is there a problem with using the breaks column in breaks? e.g.
# base plot
pl <- ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group))
groups <- unique(xx$Group)
# loop and add for each group
for(i in groups){
pl <- pl + stat_contour(data = xx[xx$Group == i,], aes(z = value),
breaks = breaks[breaks$Group == i, ]$breaks,
alpha = 0.3, geom = "polygon")
}
pl
I get some weird plots, at the edges, especially when I remove the breaks part from stat_contour, which leads me to think there might be a bug in ksFUN
I am trying to use a log-modulus transformation in my plot. It was working fine...
library(tidyverse)
library(scales)
log_modulus_trans <- function()
trans_new(name = "log_modulus",
transform = function(x) sign(x) * log(abs(x) + 1),
inverse = function(x) sign(x) * ( exp(abs(x)) - 1 ))
# fake data
set.seed(1)
d <- data_frame(
tt = rep(1:10, 3),
cc = rep(LETTERS[1:3], each = 10),
xx = c(rnorm(10, mean = 100, sd = 10),
rnorm(10, mean = 0, sd = 10),
rnorm(10, mean = -100, sd = 10)))
ggplot(data = d,
mapping = aes(x = tt, y = xx, group = cc)) +
geom_line() +
coord_trans(y = "log_modulus")
When I tried to add a geom_vline() things got weird...
ggplot(data = d,
mapping = aes(x = tt, y = xx, group = cc)) +
geom_line() +
coord_trans(y = "log_modulus") +
geom_vline(xintercept = 5)
Any idea how to get geom_vline() to go from the top to the bottom of the plot window... or a work around hack?
Here is a solution using geom_segment
ggplot(data = d,
mapping = aes(x = tt, y = xx, group = cc)) +
geom_line() +
geom_segment(x = 5, xend = 5, y = -150, yend = 150) +
coord_trans(y = "log_modulus")
I am working on a boxplot with forecast and observations which is quite long dataset. I am providing a sample format here.
> forecasts <- data.frame(f_type = c(rep("A", 9), rep("B", 9)),
Date = c(rep(as.Date("2007-01-31"),3), rep(as.Date("2007-02-28"), 3), rep(as.Date("2007-03-31"), 3), rep(as.Date("2007-01-31"), 3), rep(as.Date("2007-02-28"), 3), rep(as.Date("2007-03-31"), 3)),
value = c(10, 50, 60, 05, 90, 20, 30, 46, 39, 69, 82, 48, 65, 99, 75, 15 ,49, 27))
>
> observation <- data.frame(Dt = c(as.Date("2007-01-31"), as.Date("2007-02-28"), as.Date("2007-03-31")),
obs = c(30,49,57))
So far I have:
ggplot() +
geom_boxplot(data = forecasts,
aes(x = as.factor(Date), y = value,
group = interaction(Date, f_type), fill = f_type)) +
geom_line(data = observations,
aes(x = as.factor(Dt), y = obs, group = 1),
size = 2)
With this the box and whiskers are set by default. I want to assign these values so that I will know the extent of the whiskers. I have tried to pass a function with stat_summary with like:
f <- function(x) {
r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
o <- function(x) {
subset(x, x < quantile(x,probs = 0.05) | quantile(x,probs = 0.95) < x)
}
ggplot(forecasts, aes(x = as.factor(Date), y = value)) +
stat_summary(fun.data = f, geom = "boxplot", aes(group = interaction(Date, f_type), fill = f_type)) +
stat_summary(fun.y = o, geom = "point")
But, with this the groups are messed up. This produces stacked up plots.
Does anyone how to accomplish this?
With a little preprocessing you can summarise the values by date and f_type to generate the desired ymin, lower, middle, upper and ymax arguments of geom_boxplot (the trick is to set stat = "identity"):
forecasts %>% group_by(f_type, Date) %>%
summarise( # You can set your desired values/quantiles here
y_min = quantile(value, 0.05),
low = quantile(value, 0.25),
mid = quantile(value, 0.5),
high = quantile(value, 0.75),
y_max = quantile(value, 0.95)
) %>%
ggplot() +
geom_boxplot(
aes(
ymin = y_min,
lower = low,
middle = mid,
upper = high,
ymax = y_max,
x = as.factor(Date),
fill = f_type
),
stat = "identity"
) +
geom_line(
data = observations,
aes(
x = as.factor(Dt),
y = obs, group = 1
),
size = 2
)
Just curious how can you generate the dcauchy distribution from Wikipedia:
Normally, you have
dcauchy(x, location = 0, scale = 1, log = FALSE)
for one line density p(x) v.s x
I assume in order to generate the diagram from wiki, a data.frame involves?
cauchy_dist <- data.frame(cauchy1 = rcauchy(10, location = 0, scale = 1, log = FALSE), cauchy2 = ....... , cauchy3 = ..... )
or you just need to
plot(x, P(x))
and then add lines to it?
You can use ggplot2's stat_function:
ggplot(data.frame(x = c(-5, 5)), aes(x)) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 0.5), aes(color = "a"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 1), aes(color = "b"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 2), aes(color = "c"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = -2, scale = 1), aes(color = "d"), size = 2) +
scale_x_continuous(expand = c(0, 0)) +
scale_color_discrete(name = "",
labels = c("a" = expression(x[0] == 0*","~ gamma == 0.5),
"b" = expression(x[0] == 0*","~ gamma == 1),
"c" = expression(x[0] == 0*","~ gamma == 2),
"d" = expression(x[0] == -2*","~ gamma == 1))) +
ylab("P(x)") +
theme_bw(base_size = 24) +
theme(legend.position = c(0.8, 0.8),
legend.text.align = 0)
You could create the data as follows:
location <- c(0, 0, 0, -2)
scale <- c(0.5, 1, 2, 1)
x <- seq(-5, 5, by = 0.1)
cauchy_data <- Map(function(l, s) dcauchy(x, l, s), location, scale)
names(cauchy_data) <- paste0("cauchy", seq_along(location))
cauchy_tab <- data.frame(x = x, cauchy_data)
head(cauchy_tab)
## x cauchy1 cauchy2 cauchy3 cauchy4
## 1 -5.0 0.006303166 0.01224269 0.02195241 0.03183099
## 2 -4.9 0.006560385 0.01272730 0.02272830 0.03382677
## 3 -4.8 0.006833617 0.01324084 0.02354363 0.03600791
## 4 -4.7 0.007124214 0.01378562 0.02440091 0.03839685
## 5 -4.6 0.007433673 0.01436416 0.02530285 0.04101932
## 6 -4.5 0.007763656 0.01497929 0.02625236 0.04390481
Map is used to apply a function of multiple variables to just as many vectors element by element. Thus, the first list element of cauchy_data will contain the following
dcauchy(x, location[1], scale[1])
and so on. I then put the Cauchy data in a data frame together with the vector of x coordinates, x. So you have the desired data table.
There are, of course, many ways to plot this. I prefer to use ggplot and show you how to plot as an example:
library(tidyr)
library(ggplot2)
curve_labs <- paste(paste("x0 = ", location), paste("gamma = ", scale), sep = ", ")
plot_data <- gather(cauchy_tab, key = curve, value = "P", -x )
ggplot(plot_data, aes(x = x, y = P, colour = curve)) + geom_line() +
scale_colour_discrete(labels = curve_labs)
You could tweak the plot in many ways to get something that more closely resembles the plot from Wikipedia.