Evenly-spaced 1/freq for R function spec.pgram() - r

The goal is to plot of the coherency between two time series (i.e. the correlation coefficient with respect to frequencies). How can I get 1/freq (i.e. the period) in the x-axis to be evenly-spaced?
t <- 0:99
ts1 <- ts(2*cos((2*pi)/24*t))
ts2 <- ts(2*cos((2*pi)/48*t))
ts12 <- ts.intersect(ts1, ts2)
Coh <- spec.pgram(ts12, spans=3)
plot(Coh$freq, Coh$coh, type='l')
plot(1/Coh$freq, Coh$coh, type='l') # how to get 1/freq to be evenly-space?
I have tried to modify the function spec.pgram() but without success. More specifically, I replace the line:
freq <- seq.int(from = xfreq/N, by = xfreq/N, length.out = Nspec)
with:
freq.tmp <- seq.int(from = xfreq/N, by = xfreq/N, length.out = Nspec)
freq <- rev(1/seq(from=1/max(freq.tmp), to=1/min(freq.tmp), length.out=Nspec))
Has anyone else had better luck? Thanks

Do you mean that you just want to relabel the x-axis with periods, rather than frequencies? That would maintain the spacing of the values on the x-axis, at the expense of a non-linear scaling for the x values. For example (using ggplot2):
library(ggplot2)
dat = as.data.frame(Coh[c("freq","spec","coh","phase")])
ggplot(dat, aes(freq, coh)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks=dat$freq[seq(1,nrow(dat),3)],
minor_breaks=dat$freq,
labels=round(1/dat$freq[seq(1,nrow(dat),3)],1)) +
labs(x="Period")
You could also set the x-value labels to fall on integer periods:
breaks = c(1:10,15,25,50,100)
ggplot(dat, aes(freq, coh)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks=1/breaks,
minor_breaks = 1/(breaks[-1] - 0.5 * diff(breaks)),
labels=breaks) +
labs(x="Period")

Related

Make ggplot with regression line and normal distribution overlay

I am trying to make a plot to show the intuition behind logistic (or probit) regression. How would I make a plot that looks something like this in ggplot?
(Wolf & Best, The Sage Handbook of Regression Analysis and Causal Inference, 2015, p. 155)
Actually, what I would rather even do is have one single normal distribution displayed along the y axis with mean = 0, and a specific variance, so that I can draw horizontal lines going from the linear predictor to the y axis and sideways normal distribution. Something like this:
What this is supposed to show (assuming I haven't misunderstood something) is . I haven't had much success so far...
library(ggplot2)
x <- seq(1, 11, 1)
y <- x*0.5
x <- x - mean(x)
y <- y - mean(y)
df <- data.frame(x, y)
# Probability density function of a normal logistic distribution
pdfDeltaFun <- function(x) {
prob = (exp(x)/(1 + exp(x))^2)
return(prob)
}
# Tried switching the x and y to be able to turn the
# distribution overlay 90 degrees with coord_flip()
ggplot(df, aes(x = y, y = x)) +
geom_point() +
geom_line() +
stat_function(fun = pdfDeltaFun)+
coord_flip()
I think this comes pretty close to the first illustration you give. If this is a thing you don't need to repeat many times, it is probably best to compute the density curves prior to plotting and use a seperate dataframe to plot these.
library(ggplot2)
x <- seq(1, 11, 1)
y <- x*0.5
x <- x - mean(x)
y <- y - mean(y)
df <- data.frame(x, y)
# For every row in `df`, compute a rotated normal density centered at `y` and shifted by `x`
curves <- lapply(seq_len(NROW(df)), function(i) {
mu <- df$y[i]
range <- mu + c(-3, 3)
seq <- seq(range[1], range[2], length.out = 100)
data.frame(
x = -1 * dnorm(seq, mean = mu) + df$x[i],
y = seq,
grp = i
)
})
# Combine above densities in one data.frame
curves <- do.call(rbind, curves)
ggplot(df, aes(x, y)) +
geom_point() +
geom_line() +
# The path draws the curve
geom_path(data = curves, aes(group = grp)) +
# The polygon does the shading. We can use `oob_squish()` to set a range.
geom_polygon(data = curves, aes(y = scales::oob_squish(y, c(0, Inf)),group = grp))
The second illustration is pretty close to your code. I simplified your density function by the standard normal density function and added some extra paramters to stat function:
library(ggplot2)
x <- seq(1, 11, 1)
y <- x*0.5
x <- x - mean(x)
y <- y - mean(y)
df <- data.frame(x, y)
ggplot(df, aes(x, y)) +
geom_point() +
geom_line() +
stat_function(fun = dnorm,
aes(x = after_stat(-y * 4 - 5), y = after_stat(x)),
xlim = range(df$y)) +
# We fill with a polygon, squishing the y-range
stat_function(fun = dnorm, geom = "polygon",
aes(x = after_stat(-y * 4 - 5),
y = after_stat(scales::oob_squish(x, c(-Inf, -1)))),
xlim = range(df$y))

A ggplot2 equivalent of the lines() function in basic plot

For reasons I won't go into I need to plot a vertical normal curve on a blank ggplot2 graph. The following code gets it done as a series of points with x,y coordinates
dfBlank <- data.frame()
g <- ggplot(dfBlank) + xlim(0.58,1) + ylim(-0.2,113.2)
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
xVals <- 0.79 - (0.06*dnorm(yComb, 52.65, 10.67))/0.05
dfVertCurve <- data.frame(x = xVals, y = yComb)
g + geom_point(data = dfVertCurve, aes(x = x, y = y), size = 0.01)
The curve is clearly discernible but is a series of points. The lines() function in basic plot would turn these points into a smooth line.
Is there a ggplot2 equivalent?
I see two different ways to do it.
geom_segment
The first uses geom_segment to 'link' each point with its next one.
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
xVals <- 0.79 - (0.06*dnorm(yComb, 52.65, 10.67))/0.05
dfVertCurve <- data.frame(x = xVals, y = yComb)
library(ggplot2)
ggplot() +
xlim(0.58, 1) +
ylim(-0.2, 113.2) +
geom_segment(data = dfVertCurve, aes(x = x, xend = dplyr::lead(x), y = y, yend = dplyr::lead(y)), size = 0.01)
#> Warning: Removed 1 rows containing missing values (geom_segment).
As you can see it just link the points you created. The last point does not have a next one, so the last segment is removed (See the warning)
stat_function
The second one, which I think is better and more ggplotish, utilize stat_function().
library(ggplot2)
f = function(x) .79 - (.06 * dnorm(x, 52.65, 10.67)) / .05
hdiLo <- 31.88
hdiHi <- 73.43
yComb <- seq(hdiLo, hdiHi, length = 75)
ggplot() +
xlim(-0.2, 113.2) +
ylim(0.58, 1) +
stat_function(data = data.frame(yComb), fun = f) +
coord_flip()
This build a proper function (y = f(x)), plot it. Note that it is build on the X axis and then flipped. Because of this the xlim and ylim are inverted.

geom_errorbar with ecdf in ggplot

I want to create an ecdf plot with two lines and I would like to add errorbars to one of them.
I am using this code
x <- c(16,16,16,16,34,35,38,42,45,1,12)
xError <- c(0,1,1,1,3,3,3,4,5,1,1)
y <- c(16,1,12)
length(x)
length(xError)
length(y)
df <- rbind(data.frame(value = x,name='x'),
data.frame(value = y,name='y'))
ggplot(df, aes(x=value,color=name,linetype=name))+ stat_ecdf()+ geom_errorbar(aes(ymax = x + xError, ymin=x - xError))
The error bar should be added to the x values, but it gives my this error:
Error: Aesthetics must either be length one, or the same length as the dataProblems: x + xError, x - xError
I don't get it - the result is of the same length.
EDIT
I changed to problem, so it gets easier - I thin the real problem is related to ECDF plots and error bars. Take this code as an example:
x <- c(16,16,16,16,34,35,38,42,45,1,12)
xError <- c(0,1,1,1,3,3,3,4,5,1,1)
y <- c(16,1,12)
df <- data.frame(value = x)
ggplot(df, aes(x=value))+ stat_ecdf()+ geom_errorbar(aes(ymax = x + xError, ymin=x - xError))
It prints the error bars, but the plot is completely broken.
there is some similar question here: confidence interval for ecdf
Maybe thats the thing You'll like to archive.
EDIT:
I think this is the thing You'll try to get:
dat2 <- data.frame(variable = x)
dat2 <- transform(dat2, lower = x - xError, upper = x + xError)
l <- ecdf(dat2$lower)
u <- ecdf(dat2$upper)
v <- ecdf(dat2$variable)
dat2$lower1 <- l(dat2$variable)
dat2$upper1 <- u(dat2$variable)
dat2$variable1 <- v(dat2$variable)
ggplot(dat2,aes(x = variable)) +
geom_step(aes(y = variable1)) +
geom_ribbon(aes(ymin = upper1,ymax = lower1),alpha = 0.2)

R: Calculate and plot difference between two density countours

I have two datasets with two continuous variables: duration and waiting.
library("MASS")
data(geyser)
geyser1 <- geyser[1:150,]
geyser2 <- geyser[151:299,]
geyser2$duration <- geyser2$duration - 1
geyser2$waiting <- geyser2$waiting - 20
For each dataset I output a 2D density plot
ggplot(geyser1, aes(x = duration, y = waiting)) +
xlim(0.5, 6) + ylim(40, 110) +
stat_density2d(aes(alpha=..level..),
geom="polygon", bins = 10)
ggplot(geyser2, aes(x = duration, y = waiting)) +
xlim(0.5, 6) + ylim(40, 110) +
stat_density2d(aes(alpha=..level..),
geom="polygon", bins = 10)
I now want to produce a plot which indicates the regions where the two plot have the same density (white), negative differences (gradation from white to blue where geyser2 is denser than geyser1) and positive differences (gradation from white to red where geyser1 is denser than geyser2).
How to compute and plot the difference of the densities?
You can do this by first using kde2d to calculate the densities and then subtracting them from each other. Then you do some data reshaping to get it into a form that can be fed to ggplot2.
library(reshape2) # For melt function
# Calculate the common x and y range for geyser1 and geyser2
xrng = range(c(geyser1$duration, geyser2$duration))
yrng = range(c(geyser1$waiting, geyser2$waiting))
# Calculate the 2d density estimate over the common range
d1 = kde2d(geyser1$duration, geyser1$waiting, lims=c(xrng, yrng), n=200)
d2 = kde2d(geyser2$duration, geyser2$waiting, lims=c(xrng, yrng), n=200)
# Confirm that the grid points for each density estimate are identical
identical(d1$x, d2$x) # TRUE
identical(d1$y, d2$y) # TRUE
# Calculate the difference between the 2d density estimates
diff12 = d1
diff12$z = d2$z - d1$z
## Melt data into long format
# First, add row and column names (x and y grid values) to the z-value matrix
rownames(diff12$z) = diff12$x
colnames(diff12$z) = diff12$y
# Now melt it to long format
diff12.m = melt(diff12$z, id.var=rownames(diff12))
names(diff12.m) = c("Duration","Waiting","z")
# Plot difference between geyser2 and geyser1 density
ggplot(diff12.m, aes(Duration, Waiting, z=z, fill=z)) +
geom_tile() +
stat_contour(aes(colour=..level..), binwidth=0.001) +
scale_fill_gradient2(low="red",mid="white", high="blue", midpoint=0) +
scale_colour_gradient2(low=muted("red"), mid="white", high=muted("blue"), midpoint=0) +
coord_cartesian(xlim=xrng, ylim=yrng) +
guides(colour=FALSE)

How do I plot lines and points with limited points?

I am trying to replot the following figure in a more legible way. Observe that I am trying to plot both lines and points. However, the number of points being printed is way too many and the line is getting covered up. Is there a way I can plot:
Different lines for different datasets
Different points shapes for different datasets but limit the number of points to say 30-50
Add the line and point information to the legend
My plotting code is here (It was too big for SO)
Do you need something like this?
transData$Type2 <- factor(transData$Type, labels = c("Some Info for P", "Some Info for Q"))
ggplot(transData, aes(x=Value, y=ecd)) +
geom_line(aes(group=Type2,colour=Type2, linetype=Type2), size=1.5) +
geom_point(aes(shape = Type2), data = transData[round(seq(1, nrow(transData), length = 30)), ], size = 5) +
opts(legend.position = "top", legend.key.width = unit(3, "line"))
You can plot large, partially transparent points: the denser areas will appear darker.
p <- ggplot(transData, aes(x=Value, y=ecd, group=Type))
p +
geom_point(size=20, colour=rgb(0,0,0,.02)) +
geom_line(aes(colour=Type), size=3)
The following code adds points more or less evenly spaced, though they're not necessarily actual data points (could be interpolated),
barbedize <- function(x, y, N=10, ...){
ind <- order(x)
x <- x[ind]
y <- y[ind]
lengths <- c(0, sqrt(diff(x)^2 + diff(y)^2))
l <- cumsum(lengths)
tl <- l[length(l)]
el <- seq(0, to=tl, length=N+1)[-1]
res <-
sapply(el[-length(el)], function(ii){
int <- findInterval(ii, l)
xx <- x[int:(int+1)]
yy <- y[int:(int+1)]
dx <- diff(xx)
dy <- diff(yy)
new.length <- ii - l[int]
segment.length <- lengths[int+1]
ratio <- new.length / segment.length
xend <- x[int] + ratio * dx
yend <- y[int] + ratio * dy
c(x=xend, y=yend)
})
as.data.frame(t(res))
}
library(plyr)
few_points <- ddply(transData, "Type", function(d, ...)
barbedize(d$Value, d$ecd, ...), N=10)
ggplot(transData, aes(x=Value, y=ecd)) +
geom_line(aes(group=Type,colour=Type, linetype=Type), size=1) +
geom_point(aes(x=x,y=y, colour=Type, shape=Type), data=few_points, size=3)
(this is a quick and dirty proof-of-principle, barbedize should be cleaned up and written more efficiently...)

Resources