Check how many times smooth spline intersects with x-axis - r

I want to check how many times my smoothed spline intersects with the x-axis. Is there an elegant way to do this?
Example: (1 intersection in this case)
]1)

Check the number of times y values go from positive to negative
set.seed(1571933401)
x = 1:100
y = rnorm(100)
sp = smooth.spline(x, y)
with(sp, sum((sign(c(0, y)) * sign(c(y, 0))) == -1))
#6
graphics.off()
plot(sp, type = "l")
abline(h = 0, lty = 2)

Related

How to plot part of two planes

I would like to plot two planes in a 3D plot. I have tried persp3d and it generates two planes. But instead of the whole two planes, I just want to show parts of them divided by the intersection line, i.e, "left" part of the blue plane, and "upper" part of the red plane. I tried xlim, ylim, but it seems my lims are not single values, but functions.
library(rgl)
x <- seq(-10, 10, length = 30)
y <- x
region = expand.grid(x=x, y=y)
z1 = region$x+2*region$y + 2
z2=3*region$x+region$y
persp3d(x,y,z1,col="steelblue")
persp3d(x,y,z2,col="red",add=TRUE)
grid = mesh(x,y)
z = with(grid,ifelse(x+2*y>3*x+y,x+2*y,3*x+y))
persp3D(z = z, x = x, y = y,col = NULL)
for (i in 1:900){
z[i] = ifelse(region$x[i]+2*region$y[i] + 2 >
3*region$x[i]+region$y[i],region$x[i]+2*region$y[i] + 2,3*region$x[i]+region$y[i])}
persp3d(x,y,z,col="steelblue")
This is inspired by Huang Rui's suggestion

Placing arrow heads to the middle of the lines in R

I have a plot where I draw arrows from points to points. I would like to put this arrow heads not to the end of the line, but to middle. Is there a simple way to do it other than placing extra arrows with half length of the according line?
My code is this:
plot(x, y, xlim=range(x), ylim=range(y), xlab="x", ylab="y", pch=16,
main="Filled Plane")
for(i in 1:20){
arrows(x[i], y[i], x[i+1], y[i+1], length = 0.25, angle = 30, col = i)
}
Make a custom function myArrow() and add one new argument cut to control the proportion of the arrows
myArrow <- function(x0, y0, x1, y1, cut = 1, ...){
x.new <- (1 - cut) * x0 + cut * x1
y.new <- (1 - cut) * y0 + cut * y1
# segments(x0, y0, x1, y1, ...)
arrows(x0, y0, x.new, y.new, ...)
}
Note1 : The computation of x.new and y.new in this custom function uses a simple mathematical concept, i.e. the Section Formula. The value of cut must be between 0 to 1.
Note2 : The use of this function is equivalent to that of the original functionarrows() other than that it has one more new argument cut.
Note3 : If you want complete lines behind the arrows, just remove the hash(#) in the function.
Plot and try different cut value. For example, I use cut = 0.7. (If you want the arrowheads to the middle, use cut = 0.5.)
# Toy Data
x <- seq(1, 5.5, by = 0.5)
y <- rep(c(1, 5), 5)
plot(x, y, pch = 16)
for(i in 1:9){
myArrow(x[i], y[i], x[i+1], y[i+1], cut = 0.7, col = i, lwd = 2)
}
Since you do not provide your x and y, I made up some data. There is no need for the loop. arrows will handle a vector of coordinates. One way is to draw a full-length arrow with no arrowhead and another that just goes halfway but has the arrowhead.
## Some bogus data
set.seed(123)
x = runif(4)
y = runif(4)
## Compute the midpoints
midx = diff(x)/2 + x[-length(x)]
midy = diff(y)/2 + y[-length(y)]
## Draw it
plot(x,y,xlim=range(x), ylim=range(y), xlab="x", ylab="y",
main="Filled Plane",pch=16)
arrows(x[-length(x)], y[-length(y)],x[-1],y[-1],
angle = 0, col = 1:3)
arrows(x[-length(x)], y[-length(y)],midx,midy,
length = 0.25, angle = 30, col = 1:3)

How to generate values for 2 variables to draw a circle

I have been trying to generate random values so that I can construct a circle.
The values of x and y are expected to satisfy the following equation
x^2 + y^2 = 1
Here is the code that I used.
par(type = "s")
x <- runif(1000, min = -1, max = 1)
y <- sqrt(1 - x^2)
z <- NULL
z$x <- x
z$y <- y
z <- as.data.frame(z)
plot.new()
plot(z$x, z$y, type = "p")
plot.window(xlim = c(-10,10), ylim = c(-10,10), asp = 1)
But the graph I get is not quite what I expected it to be.
The graph resembles an upper half of an ellipse rather than a semicircle
Why are there no values for y where y < 0
Please find the plot here.
I am also interested in finding out, how to generate random values for x, y, z, a; where x^2 + y^2 + z^2 + a^2 = 10
Maybe you missed #thelatemail's comment:
png()
plot(z$x, z$y, type = "p", asp=1)
dev.off()
The reason passing asp=1 to plot.window would fail(if it were called first, and this is what you might have tried) is that plot itself calls plot.window again, and in the process reacquires the default values. You can see that in the code of plot.default:
> plot.default
function (x, y = NULL, type = "p", xlim = NULL, ylim = NULL,
log = "", main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
ann = par("ann"), axes = TRUE, frame.plot = axes, panel.first = NULL,
panel.last = NULL, asp = NA, ...)
{
localAxis <- function(..., col, bg, pch, cex, lty, lwd) Axis(...)
localBox <- function(..., col, bg, pch, cex, lty, lwd) box(...)
localWindow <- function(..., col, bg, pch, cex, lty, lwd) plot.window(...)
#.... omitted the rest of the R code.
(Calling plot.window after that plot call should not be expected to have any favorable effect.)
The problem is within this part of your code:
x <- runif(1000, min = -1, max = 1)
y <- sqrt(1 - x^2)enter code here
This problem arises from interpreting two distinct mathematical entities as the same (functions and equations are two different things). A function f takes an input x, and returns a single output f(x). Equations don't have this limitation, so if you are encoding this equation as a function, you will lose half the points in the circle, you will generate all the points in the upper semicircle.
Since the circle equation has two y outputs for any x value you can just generate two pairs of coordinates for each point generated by your uniform distribution like this:
x1 = runif(1000, min = -1, max = 1)
x2 = x1
y1 = sqrt(1 - x1^2)
y2 = (-1)*y1
x = c(x1,x2)
y = c(y1,y2)
plot(x,y, asp=1)
As John Coleman recommended in his comment, i'd prefer using parametric/polar coordinates instead. Generate angles in radians between 0 and 2pi and then calculate the appropriate x and y positions using the generated angle and the radius you want.
radius = 1
theta = runif(1000, min = 0, max = 2*pi)
x = radius * cos(theta)
y = radius * sin(theta)
plot(x,y, asp=1)
For the last part of your question, for each value of a variable, you'd have to work out all the possible tuples that solve the equation, and if z and a are also variables, it may not be possible to represent it solely on a 2-dimensional graph.

How to draw the curves in an energy diagram in R?

I wrote following R script:
#energy diagram
x <- c(0.1, 0.3, 0.5, 0.7, 0.9 ) #chosen randomly, reaction axis
y <- c(-5.057920, -5.057859, -5.057887,-5.057674, -5.057919 ) #energy of the educt, intermediate, transtition states and product
plot(x,y, type="p",
xlim=c(0,1),
ylim=c(-5.058,-5.0575),
xlab="reaction axis",
ylab=expression(paste(E[el] ," / ",10^6," ",kJ/mol)),
xaxt="n" #hide x-axis
)
#h- and v-lines, so i can draw curves by hand
abline(v=seq(0,1,0.1),h=seq(-5.0600,-5.0500,0.00005),col="black",lty=1,lwd=1)
abline(h=c(-5.057920, -5.057859, -5.057887,-5.057674), col="blue", lty=1,lwd=0.7)
Is it possible to draw a curve through the points that would look like a energy diagram. An example of an energy diagram is here:
A lot could be done to streamline / vectorize this code, but for a smallish diagram this works pretty well:
# get that data
x <- c(0.1, 0.3, 0.5, 0.7, 0.9 ) # reaction axis
y <- c(-5.057920, -5.057859, -5.057887,-5.057674, -5.057919 ) # energies
I'm going to make a little Bezier curve to connect each point to the next---this way we can make sure the smooth line passes through the data, not just close to it. I'll give each point a single 'control point' to define the slope. By using the same y-values for a point and it's control point, the slope at the point will be 0. I'll call the offset between the point and the control point delta. We'll start with one point-pair:
library(Hmisc)
delta = 0.15
bezx = c(0.1, 0.1 + delta, 0.3 - delta, 0.3)
bezy = rep(y[1:2], each = 2)
plot(bezx, bezy, type = 'b', col = "gray80")
lines(bezier(bezx, bezy), lwd = 2, col = "firebrick4")
Here I plotted the points and control points in gray, and the smooth line in red so we can see what's going on.
It looks promising, let's turn it into a function that we can apply to each pair of points:
bezf = function(x1, x2, y1, y2, delta = 0.15) {
bezier(x = c(x1, x1 + delta, x2 - delta, x2), y = c(y1, y1, y2, y2))
}
You can play with the delta parameter, I think 0.1 looks pretty good.
plot(x, y, xlab = "Reaction coordinate", ylab = "E", axes = F)
box(bty = "L")
axis(side = 2)
for(i in 1:(length(x) - 1)) {
lines(bezf(x1 = x[i], x2 = x[i + 1], y1 = y[i], y2 = y[i + 1], delta = 0.1))
}
You can of course tweak the plot, add labels, and ablines as in your original. (Use my for loop with the lines command to draw only the smoothed lines.) I left the points on to show that we are passing through them, not just getting close.
I prefer plotting in ggplot2, if you do too you'll need to extract the data into a data.frame:
bezlist = list()
for (i in 1:(length(x) - 1)) {
bezlist[[i]] = bezf(x1 = x[i], x2 = x[i + 1], y1 = y[i], y2 = y[i + 1], delta = 0.1)
}
xx = unlist(lapply(bezlist, FUN = '[', 'y'))
yy = unlist(lapply(bezlist, FUN = '[', 'y'))
bezdat = data.frame(react = xx, E = yy)
library(ggplot2)
ggplot(bezdat, aes(x = react, y = E)) +
geom_line() +
labs(x = "Reaction coordinate")
You could use a spline fit. Define some points along the energy diagram, and then fit to them using a spline function. The more points that you provide, the better that your fit will be. You can check out the smooth.splines function in the stats package for one implementation of the spline fit.

How do I run a high pass or low pass filter on data points in R?

I am a beginner in R and I have tried to find information about the following without finding anything.
The green graph in the picture is composed by the red and yellow graphs. But let's say that I only have the data points of something like the green graph. How do I extract the low/high frequencies (i.e. approximately the red/yellow graphs) using a low pass/high pass filter?
Update: The graph was generated with
number_of_cycles = 2
max_y = 40
x = 1:500
a = number_of_cycles * 2*pi/length(x)
y = max_y * sin(x*a)
noise1 = max_y * 1/10 * sin(x*a*10)
plot(x, y, type="l", col="red", ylim=range(-1.5*max_y,1.5*max_y,5))
points(x, y + noise1, col="green", pch=20)
points(x, noise1, col="yellow", pch=20)
Update 2: Using the Butterworth filter in the signal package suggested I get the following:
library(signal)
bf <- butter(2, 1/50, type="low")
b <- filter(bf, y+noise1)
points(x, b, col="black", pch=20)
bf <- butter(2, 1/25, type="high")
b <- filter(bf, y+noise1)
points(x, b, col="black", pch=20)
The calculations was a bit work, signal.pdf gave next to no hints about what values W should have, but the original octave documentation at least mentioned radians which got me going. The values in my original graph was not chosen with any specific frequency in mind, so I ended up with the following not so simple frequencies: f_low = 1/500 * 2 = 1/250, f_high = 1/500 * 2*10 = 1/25 and the sampling frequency f_s = 500/500 = 1. Then I chose a f_c somewhere inbetween the low and high frequencies for the low/high pass filters (1/100 and 1/50 respectively).
I bumped into similar problem recently and did not find the answers here particularly helpful. Here is an alternative approach.
Let´s start by defining the example data from the question:
number_of_cycles = 2
max_y = 40
x = 1:500
a = number_of_cycles * 2*pi/length(x)
y = max_y * sin(x*a)
noise1 = max_y * 1/10 * sin(x*a*10)
y <- y + noise1
plot(x, y, type="l", ylim=range(-1.5*max_y,1.5*max_y,5), lwd = 5, col = "green")
So the green line is the dataset we want to low-pass and high-pass filter.
Side note: The line in this case could be expressed as a function by using cubic spline (spline(x,y, n = length(x))), but with real world data this would rarely be the case, so let's assume that it is not possible to express the dataset as a function.
The easiest way to smooth such data I have came across is to use loess or smooth.spline with appropriate span/spar. According to statisticians loess/smooth.spline is probably not the right approach here, as it does not really present a defined model of the data in that sense. An alternative is to use Generalized Additive Models (gam() function from package mgcv). My argument for using loess or smoothed spline here is that it is easier and does not make a difference as we are interested in the visible resulting pattern. Real world datasets are more complicated than in this example and finding a defined function for filtering several similar datasets might be difficult. If the visible fit is good, why to make it more complicated with R2 and p values? To me the application is visual for which loess/smoothed splines are appropriate methods. Both of the methods assume polynomial relationships with the difference that loess is more flexible also using higher degree polynomials, while cubic spline is always cubic (x^2). Which one to use depends on trends in a dataset. That said, the next step is to apply a low-pass filter on the dataset by using loess() or smooth.spline():
lowpass.spline <- smooth.spline(x,y, spar = 0.6) ## Control spar for amount of smoothing
lowpass.loess <- loess(y ~ x, data = data.frame(x = x, y = y), span = 0.3) ## control span to define the amount of smoothing
lines(predict(lowpass.spline, x), col = "red", lwd = 2)
lines(predict(lowpass.loess, x), col = "blue", lwd = 2)
Red line is the smoothed spline filter and blue the loess filter. As you see results differ slightly. I guess one argument of using GAM would be to find the best fit, if the trends really were this clear and consistent among datasets, but for this application both of these fits are good enough for me.
After finding a fitting low-pass filter, the high-pass filtering is as simple as subtracting the low-pass filtered values from y:
highpass <- y - predict(lowpass.loess, x)
lines(x, highpass, lwd = 2)
This answer comes late, but I hope it helps someone else struggling with similar problem.
Use filtfilt function instead of filter (package signal) to get rid of signal shift.
library(signal)
bf <- butter(2, 1/50, type="low")
b1 <- filtfilt(bf, y+noise1)
points(x, b1, col="red", pch=20)
One method is using the fast fourier transform implemented in R as fft. Here is an example of a high pass filter. From the plots above, the idea implemented in this example is to get the serie in yellow starting from the serie in green (your real data).
# I've changed the data a bit so it's easier to see in the plots
par(mfrow = c(1, 1))
number_of_cycles = 2
max_y = 40
N <- 256
x = 0:(N-1)
a = number_of_cycles * 2 * pi/length(x)
y = max_y * sin(x*a)
noise1 = max_y * 1/10 * sin(x*a*10)
plot(x, y, type="l", col="red", ylim=range(-1.5*max_y,1.5*max_y,5))
points(x, y + noise1, col="green", pch=20)
points(x, noise1, col="yellow", pch=20)
### Apply the fft to the noisy data
y_noise = y + noise1
fft.y_noise = fft(y_noise)
# Plot the series and spectrum
par(mfrow = c(1, 2))
plot(x, y_noise, type='l', main='original serie', col='green4')
plot(Mod(fft.y_noise), type='l', main='Raw serie - fft spectrum')
### The following code removes the first spike in the spectrum
### This would be the high pass filter
inx_filter = 15
FDfilter = rep(1, N)
FDfilter[1:inx_filter] = 0
FDfilter[(N-inx_filter):N] = 0
fft.y_noise_filtered = FDfilter * fft.y_noise
par(mfrow = c(2, 1))
plot(x, noise1, type='l', main='original noise')
plot(x, y=Re( fft( fft.y_noise_filtered, inverse=TRUE) / N ) , type='l',
main = 'filtered noise')
Per request of OP:
The signal package contains all kinds of filters for signal processing. Most of it is comparable to / compatible with the signal processing functions in Matlab/Octave.
Check out this link where there's R code for filtering (medical signals). It's by Matt Shotwell and the site is full of interesting R/stats info with a medical bent:
biostattmat.com
The package fftfilt contains lots of filtering algorithms that should help too.
I also struggled to figure out how the W parameter in the butter function maps on to the filter cut-off, in part because the documentation for filter and filtfilt is incorrect as of posting (it suggests that W = .1 would result in a 10 Hz lp filter when combined with filtfilt when signal sampling rate Fs = 100, but actually, it's only a 5 Hz lp filter -- the half-amplitude cut-off is 5 Hz when use filtfilt, but the half-power cut-off is 5 Hz when you only apply the filter once, using the filter function). I'm posting some demo code I wrote below that helped me confirm how this is all working, and that you could use to check a filter is doing what you want.
#Example usage of butter, filter, and filtfilt functions
#adapted from https://rdrr.io/cran/signal/man/filtfilt.html
library(signal)
Fs <- 100; #sampling rate
bf <- butter(3, 0.1);
#when apply twice with filtfilt,
#results in a 0 phase shift
#5 Hz half-amplitude cut-off LP filter
#
#W * (Fs/2) == half-amplitude cut-off when combined with filtfilt
#
#when apply only one time, using the filter function (non-zero phase shift),
#W * (Fs/2) == half-power cut-off
t <- seq(0, .99, len = 100) # 1 second sample
#generate a 5 Hz sine wave
x <- sin(2*pi*t*5)
#filter it with filtfilt
y <- filtfilt(bf, x)
#filter it with filter
z <- filter(bf, x)
#plot original and filtered signals
plot(t, x, type='l')
lines(t, y, col="red")
lines(t,z,col="blue")
#estimate signal attenuation (proportional reduction in signal amplitude)
1 - mean(abs(range(y[t > .2 & t < .8]))) #~50% attenuation at 5 Hz using filtfilt
1 - mean(abs(range(z[t > .2 & t < .8]))) #~30% attenuation at 5 Hz using filter
#demonstration that half-amplitude cut-off is 6 Hz when apply filter only once
x6hz <- sin(2*pi*t*6)
z6hz <- filter(bf, x6hz)
1 - mean(abs(range(z6hz[t > .2 & t < .8]))) #~50% attenuation at 6 Hz using filter
#plot the filter attenuation profile (for when apply one time, as with "filter" function):
hf <- freqz(bf, Fs = Fs);
plot(c(0, 20, 20, 0, 0), c(0, 0, 1, 1, 0), type = "l",
xlab = "Frequency (Hz)", ylab = "Attenuation (abs)")
lines(hf$f[hf$f<=20], abs(hf$h)[hf$f<=20])
plot(c(0, 20, 20, 0, 0), c(0, 0, -50, -50, 0),
type = "l", xlab = "Frequency (Hz)", ylab = "Attenuation (dB)")
lines(hf$f[hf$f<=20], 20*log10(abs(hf$h))[hf$f<=20])
hf$f[which(abs(hf$h) - .5 < .001)[1]] #half-amplitude cutoff, around 6 Hz
hf$f[which(20*log10(abs(hf$h))+6 < .2)[1]] #half-amplitude cutoff, around 6 Hz
hf$f[which(20*log10(abs(hf$h))+3 < .2)[1]] #half-power cutoff, around 5 Hz
there is a package on CRAN named FastICA, this computes the approximation of the independent source signals, however in order to compute both signals you need a matrix of at least 2xn mixed observations (for this example), this algorithm can't determine the two indpendent signals with just 1xn vector. See the example below. hope this can help you.
number_of_cycles = 2
max_y = 40
x = 1:500
a = number_of_cycles * 2*pi/length(x)
y = max_y * sin(x*a)
noise1 = max_y * 1/10 * sin(x*a*10)
plot(x, y, type="l", col="red", ylim=range(-1.5*max_y,1.5*max_y,5))
points(x, y + noise1, col="green", pch=20)
points(x, noise1, col="yellow", pch=20)
######################################################
library(fastICA)
S <- cbind(y,noise1)#Assuming that "y" source1 and "noise1" is source2
A <- matrix(c(0.291, 0.6557, -0.5439, 0.5572), 2, 2) #This is a mixing matrix
X <- S %*% A
a <- fastICA(X, 2, alg.typ = "parallel", fun = "logcosh", alpha = 1,
method = "R", row.norm = FALSE, maxit = 200,
tol = 0.0001, verbose = TRUE)
par(mfcol = c(2, 3))
plot(S[,1 ], type = "l", main = "Original Signals",
xlab = "", ylab = "")
plot(S[,2 ], type = "l", xlab = "", ylab = "")
plot(X[,1 ], type = "l", main = "Mixed Signals",
xlab = "", ylab = "")
plot(X[,2 ], type = "l", xlab = "", ylab = "")
plot(a$S[,1 ], type = "l", main = "ICA source estimates",
xlab = "", ylab = "")
plot(a$S[, 2], type = "l", xlab = "", ylab = "")
I am not sure if any filter is the best way for You. More useful instrument for that aim is the fast Fourier transformation.

Resources