How to plot CDF using two dataset in R - r

I'm trying to plot the below dataset using plot function. Im unable to plot both the graphs in same plot.
Using the dataset I tried to plot the graph.
m_bs = conpl$new(sample_data1$V1)
m_eq = conpl$new(sample_data2$V1)
est = estimate_xmin(m_bs, xmax=5e+5)
est_eq = estimate_xmin(m_eq, xmax=Inf)
m_bs$setXmin(est_bs)
m_eq$setXmin(est_eq)
plot(m_bs)
lines(m_bs)
d = plot(m_eq, draw =FALSE)
points(d$x, d$y, col=2)
lines(m_eq,col=2,lwd=2)
I got the below graph, it only shows one graph. Sorry for posting the question again I didnt get proper answer earlier post.

I looked up the source code of the plot function used by poweRlaw and modified it:
lines_ <- function (x, y, ...)
{
.local <- function (x, cut = FALSE, draw = TRUE, ...)
{
xmin = x$getXmin()
cut_off = cut * xmin
x_values = x$dat
if (!cut)
x$setXmin(min(x_values))
y = dist_data_cdf(x, lower_tail = FALSE, xmax = max(x_values) + 1)
cut_off_seq = (x_values >= cut_off)
x_axs = x_values[cut_off_seq]
if (is(x, "discrete_distribution"))
x_axs = unique(x_axs)
x$setXmin(xmin)
x = x_axs
if (draw)
lines(x, y, ...)
invisible(data.frame(x = x, y = y))
}
.local(x, ...)
}
#----------------------------------------------------------
points_ <- function (x, y, ...)
{
.local <- function (x, cut = FALSE, draw = TRUE, ...)
{
xmin = x$getXmin()
cut_off = cut * xmin
x_values = x$dat
if (!cut)
x$setXmin(min(x_values))
y = dist_data_cdf(x, lower_tail = FALSE, xmax = max(x_values) + 1)
cut_off_seq = (x_values >= cut_off)
x_axs = x_values[cut_off_seq]
if (is(x, "discrete_distribution"))
x_axs = unique(x_axs)
x$setXmin(xmin)
x = x_axs
if (draw)
points(x, y, ...)
invisible(data.frame(x = x, y = y))
}
.local(x, ...)
}
The functions lines_ and points_
draw the same graph as the plot function of the poweRlaw package, but
behave like the standard lines and points functions in that they don't destroy the current graph.
First m_bs and 'm_eq' separately:
> plot(m_bs, lwd=9, col="black")
> lines_(m_bs, lwd=5, col="green")
> plot(m_eq, lwd=9, col="black")
> lines_(m_eq, lwd=5, col="blue")
The x-ranges of these to graphs do not overlap. Hence xlim has to be chosen appropriately to show both graphs in the same picture.
> plot( m_eq, lwd=8, col="black",
+ xlim=c(min(m_bs$dat,m_eq$dat),max(m_bs$dat,m_eq$dat)))
> lines_(m_eq, lwd=5, col="blue")
> points_(m_bs,lwd=8,col="black")
> lines_(m_bs, lwd=5, col="green")
>

Related

Is There an Expedient Way to Color-Fill a Curve in R?

I am familiar with polygon and ggplot2, but I like working with basic plot.
I want to fill in the curve called by the command:
curve(dnorm(x, 0.5, 0.22), xlim=c(-0.5, 1.5))
When in the past I have used polygon I defined the end points based on prior calculations of x along the lines of x <- seq(-0.5, 0.5, len = 100), for example, and y <- dnorm(x, 0.5, 0.22). Next, the limits are defined within polygon, the color selected, et voilĂ ... in less than a fortnight, there is color...
OK, not exactly fast, but in this particular case, I haven't even explicitly defined x and y outside curve, which makes the whole process even more cumbersome. It's almost like starting afresh.
Is there any way of doing it fast, perhaps with something like fill, col or other plotting parameters within curve?
You can actually modify the curve function relatively easily. The only changes I've made are:
1) Add a fill parameter defaulting to "red"
2) Add a polygon plot at the end of the function (marked with a comment)
Here's the function in action. See below for the function code.
my_curve(dnorm(x, 0.5, 0.22), xlim=c(-0.5, 1.5), fill="green")
I'm not sure that this is any more expedient that just doing:
x=seq(-0.5,1.5,0.01)
plot(x, dnorm(x,0.5,0.22), xlim=c(-0.5,1.5), type="l")
polygon(x, dnorm(x,0.5,0.22), col="green")
But if you're going to be using it a lot, you can source my_curve in your scripts or create a package with the new version of curve that masks the base version. You could also make the fill optional and perhaps add a feature that allows partial filling.
If you're not wild about the ggplot2 defaults, you can also get rid of them, while still keeping the benefits of ggplot's grammar:
library(ggplot2)
df = data.frame(x=seq(-0.5,1.5,0.01), y=dnorm(x,0.5,0.22))
ggplot(df, aes(x,y)) +
geom_area(colour="black", fill="red") +
theme_bw() +
theme(panel.grid=element_blank())
Here's the modified curve function:
my_curve = function (expr, from = NULL, to = NULL, n = 101, add = FALSE,
type = "l", xname = "x", xlab = xname, ylab = NULL, log = NULL,
xlim = NULL, fill="red", ...)
{
sexpr <- substitute(expr)
if (is.name(sexpr)) {
expr <- call(as.character(sexpr), as.name(xname))
}
else {
if (!((is.call(sexpr) || is.expression(sexpr)) && xname %in%
all.vars(sexpr)))
stop(gettextf("'expr' must be a function, or a call or an expression containing '%s'",
xname), domain = NA)
expr <- sexpr
}
if (dev.cur() == 1L && !identical(add, FALSE)) {
warning("'add' will be ignored as there is no existing plot")
add <- FALSE
}
addF <- identical(add, FALSE)
if (is.null(ylab))
ylab <- deparse(expr)
if (is.null(from) || is.null(to)) {
xl <- if (!is.null(xlim))
xlim
else if (!addF) {
pu <- par("usr")[1L:2L]
if (par("xaxs") == "r")
pu <- extendrange(pu, f = -1/27)
if (par("xlog"))
10^pu
else pu
}
else c(0, 1)
if (is.null(from))
from <- xl[1L]
if (is.null(to))
to <- xl[2L]
}
lg <- if (length(log))
log
else if (!addF && par("xlog"))
"x"
else ""
if (length(lg) == 0)
lg <- ""
if (grepl("x", lg, fixed = TRUE)) {
if (from <= 0 || to <= 0)
stop("'from' and 'to' must be > 0 with log=\"x\"")
x <- exp(seq.int(log(from), log(to), length.out = n))
}
else x <- seq.int(from, to, length.out = n)
ll <- list(x = x)
names(ll) <- xname
y <- eval(expr, envir = ll, enclos = parent.frame())
if (length(y) != length(x))
stop("'expr' did not evaluate to an object of length 'n'")
if (isTRUE(add))
lines(x = x, y = y, type = type, ...)
else plot(x = x, y = y, type = type, xlab = xlab, ylab = ylab,
xlim = xlim, log = lg, ...)
polygon(x,y, col=fill) # Add filled area under curve
invisible(list(x = x, y = y))
}

Modify the limit of the y axis in plot.ACF (package nlme)

I used the function plot.ACF (package nlme) to do a graph of the autocorrelations for model residuals.The problem is that the autocorrelations in the graph are very hard to read. How can I modify the limit of the y axis in the function plot.ACF ?
Here is my graph:
The nlme:::plot.ACF function has a hard-coded ylim assignment, so you have to modify the function slightly (the comment lines below are the only places I made changes).
my_acf_plot <- function (x, alpha = 0, xlab = "Lag", ylab = "Autocorrelation",
grid = FALSE,
ylim = NULL, ## add ylim
...)
{
require("lattice") ## for xyplot, panel.* function
## (unnecessary originally because the package imports
## the required functions)
object <- x
if (is.null(ylim)) ylim <- range(object$ACF) ## set ylim if unset
if (alpha) {
assign("stdv", qnorm(1 - alpha/2)/sqrt(attr(object, "n.used")))
stMax <- max(stdv)
ylim <- c(min(c(-stMax, ylim[1])), max(c(ylim[2], stMax)))
}
assign("alpha", as.logical(alpha))
assign("grid", grid)
xyplot(ACF ~ lag, object, ylim = ylim, panel = function(x,
y, ...) {
x <- as.numeric(x)
y <- as.numeric(y)
if (grid)
panel.grid()
panel.xyplot(x, y, type = "h")
panel.abline(0, 0)
if (alpha) {
llines(x, stdv, lty = 2)
llines(x, -stdv, lty = 2)
}
}, xlab = xlab, ylab = ylab, ...)
}
Example:
library(nlme)
set.seed(101)
d <- data.frame(y=rnorm(50),x=1:50)
my_acf_plot(ACF(gls(y~x,data=d)),ylim=c(-0.3,0.5))

Visualize a function using double integration in R - Wacky Result

I am trying to visualize a curve for pollination distribution. I am very new to R so please don't be upset by my stupidity.
llim <- 0
ulim <- 6.29
f <- function(x,y) {(.156812/((2*pi)*(.000005^2)*(gamma(2/.156812)))*exp(-((sqrt(x^2+y^2))/.000005)^.156812))}
integrate(function(y) {
sapply(y, function(y) {
integrate(function(x) f(x,y), llim, ulim)$value
})
}, llim, ulim)
fv <- Vectorize(f)
curve(fv, from=0, to=1000)
And I get:
Error in y^2 : 'y' is missing
I'm not quite sure what you're asking to plot. But I know you want to visualise your scalar function of two arguments.
Here are some approaches. First we define your function.
llim <- 0
ulim <- 6.29
f <- function(x,y) {
(.156812/((2*pi)*(.000005^2)*(gamma(2/.156812)))*exp(-((sqrt(x^2+y^2))/.000005)^.156812))
}
From your title I thought of the following. The function defined below intf integrates your function over the square [0,ul] x [0,ul] and return the value. We then vectorise and plot the integral over the square as a function the length of the side of the square.
intf <- function(ul) {
integrate(function(y) {
sapply(y, function(y) {
integrate(function(x) f(x,y), 0, ul)$value
})
}, 0, ul)$value
}
fv <- Vectorize(intf)
curve(fv, from=0, to=1000)
If f is a distribution, I guess you can make your (somewhat) nice probability interpretation of this curve. (I.e. ~20 % probability of pollination(?) in the 200 by 200 meter square.)
However, you can also do a contour plot (of the log-transformed values) which illustrate the function we are integrating above:
logf <- function(x, y) log(f(x, y))
x <- y <- seq(llim, ulim, length.out = 100)
contour(x, y, outer(x, y, logf), lwd = 2, drawlabels = FALSE)
You can also plot some profiles of the surface:
plot(1, xlim = c(llim, ulim), ylim = c(0, 0.005), xlab = "x", ylab = "f")
y <- seq(llim, ulim, length.out = 6)
for (i in seq_along(y)) {
tmp <- function(x) f(x, y = y[i])
curve(tmp, llim, ulim, add = TRUE, col = i)
}
legend("topright", lty = 1, col = seq_along(y),
legend = as.expression(paste("y = ",y)))
They need to be modified a bit to make them publication worthy, but you get the idea. Lastly, you can do some 3d plots as others have suggested.
EDIT
As per your comments, you can also do something like this:
# Define the function times radius (this time with general a and b)
# The default of a and b is as before
g <- function(z, a = 5e-6, b = .156812) {
z * (b/(2*pi*a^2*gamma(2/b)))*exp(-(z/a)^b)
}
# A function that integrates g from 0 to Z and rotates
# As g is not dependent on the angle we just multiply by 2pi
intg <- function(Z, ...) {
2*pi*integrate(g, 0, Z, ...)$value
}
# Vectorize the Z argument of intg
gv <- Vectorize(intg, "Z")
# Plot
Z <- seq(0, 1000, length.out = 100)
plot(Z, gv(Z), type = "l", lwd = 2)
lines(Z, gv(Z, a = 5e-5), col = "blue", lwd = 2)
lines(Z, gv(Z, b = .150), col = "red", lwd = 2)
lines(Z, gv(Z, a = 1e-4, b = .2), col = "orange", lwd = 2)
You can then plot the curves for the a and b you want. If either is not specified, the default is used.
Disclaimer: my calculus is rusty and I just did off this top of my head. You should verify that I've done the rotation of the function around the axis properly.
The lattice package has several functions that can help you draw 3 dimensional plots, including wireframe() and persp(). If you prefer not to use a 3d-plot, you can create a contour plot using contour().
Note: I don't know if this is intentional, but your data produces a very large spike in one corner of the plot. This produces a plot that is for all intents flat, with a barely noticable spike in one corner. This is particularly problematic with the contour plot below.
library(lattice)
x <- seq(0, 1000, length.out = 50)
y <- seq(0, 1000, length.out = 50)
First the wire frame plot:
df <- expand.grid(x=x, y=y)
df$z <- with(df, f(x, y))
wireframe(z ~ x * y, data = df)
Next the perspective plot:
dm <- outer(x, y, FUN=f)
persp(x, y, dm)
The contour plot:
contour(x, y, dm)

Fill area to match the lines of with various 'type' arguments in lattice

I know I can use panel.xyarea from latticeExtra to fill the area in the plot with any colour. Without defining a type argument in xyplot, such filling will follow the route of default type="p":
library(lattice)
library(latticeExtra)
data <- data.frame(time=1:24,value=rnorm(24))
xyplot(value~time, data,
panel=function(x,y,...){
panel.xyarea(x,y,...)
panel.xyplot(x,y,...)})
This plots both panel.xyarea and the points coming from default type="p" in panel.xyplot. Now the problem arise when I want to change the type of plotting line, for example making it step function type="S":
xyplot(value~time, data, type="S",
panel=function(x,y,...){
panel.xyarea(x,y,...)
panel.xyplot(x,y,...)}
As you see on the example above, panel.xyarea doesn't fill the area underneath the new step function, but instead it plots both areas overlapping. It doesn't change anything if I move type="S" to the panel.xyarea - in fact it doesn't register type argument it at all and plots as it wouldn't be there.
Is there a way I can bypass this and have panel.xyarea fill my plots whatever type I define - be it step function (type="S"), loess (type="smooth") or regression (type="r")? Or maybe there is something better than panel.xyarea to use in such context?
For each value of type, you'll need to construct a custom panel function. Fortunately, if you model the functions closely on existing lattice code (starting out by having a look at panel.xyplot), that shouldn't be too hard. For example, the two custom panel functions below include many lines of code but only a couple of lines (marked with comments) that I had to write.
Once you've defined the panel functions (copying them in from the code blocks following the figure), use them like this:
library(lattice)
library(latticeExtra)
library(gridExtra)
set.seed(100)
data <- data.frame(time=1:24,value=rnorm(24))
## Filled version of xyplot(..., type="S")
a <- xyplot(value~time, data, panel=panel.filled_S)
## Filled version of xyplot(..., type="smooth")
b <- xyplot(value~time, data, panel=panel.filled_smooth)
grid.arrange(a, b, ncol = 2)
For a filled version of type="S":
## Modeled on code in panel.xyplot, which is called when type=S"
panel.filled_S <-
function(x,y, ...) {
horizontal <- FALSE ## Edited (may not want to hardcode)
ord <- if (horizontal)
sort.list(y)
else sort.list(x)
n <- length(x)
xx <- numeric(2 * n - 1)
yy <- numeric(2 * n - 1)
xx[2 * 1:n - 1] <- x[ord]
yy[2 * 1:n - 1] <- y[ord]
xx[2 * 1:(n - 1)] <- x[ord][-n]
yy[2 * 1:(n - 1)] <- y[ord][-1]
panel.xyarea(x = xx, y = yy, ...) ## Edited
panel.lines(x = xx, y = yy, ...) ## Edited
}
xyplot(value~time, data, panel=panel.filled_S, type="o")
For a filled version of type="smooth":
## Modeled on code in panel.loess, called by panel.xyplot when type="smooth"
panel.filled_smooth <-
function (x, y, span = 2/3, degree = 1, family = c("symmetric",
"gaussian"), evaluation = 50, lwd = plot.line$lwd, lty = plot.line$lty,
col, col.line = plot.line$col, type, horizontal = FALSE,
..., identifier = "loess")
{
x <- as.numeric(x)
y <- as.numeric(y)
ok <- is.finite(x) & is.finite(y)
if (sum(ok) < 1)
return()
if (!missing(col)) {
if (missing(col.line))
col.line <- col
}
plot.line <- trellis.par.get("plot.line")
if (horizontal) {
smooth <- loess.smooth(y[ok], x[ok], span = span, family = family,
degree = degree, evaluation = evaluation)
panel.lines(x = smooth$y, y = smooth$x, col = col.line,
lty = lty, lwd = lwd, ..., identifier = identifier)
panel.xyarea(smooth$y, smooth$x, ...) ## Edited
}
else {
smooth <- loess.smooth(x[ok], y[ok], span = span, family = family,
degree = degree, evaluation = evaluation)
panel.lines(x = smooth$x, y = smooth$y, col = col.line,
lty = lty, lwd = lwd, ..., identifier = identifier)
panel.xyarea(smooth$x, smooth$y, ...) ## Edited
}
smooth
}

ScatterPlot and ONLY one Histogram plot together

I want to visualize time series data with a 'scatter plot' and a histogram on the right side, but I haven't been able to figure out how to turn OFF the histogram on the upper side.
Code Example:
install.packages("psych")
library(psych)
data = matrix(rnorm(n=100000,mean=2,sd=1.5), nrow = 100, ncol=1000)
fs = list()
fs$p_Z = 1*(data>2)
n_p = 1;
for(i in floor(seq(1,dim(data)[2],length.out=n_p)))
{
scatter.hist(x = rep(1:length(data[,i])), y = data[,i],
xlab = 'observations',
ylab = 'log(TPM)',
title = 'Mixture Plot',
col = c("red","blue")[fs$p_Z[,i]+1],
correl = FALSE, ellipse = FALSE, smooth = FALSE)
}
Result:
Expected Result:
Same as the one I have but with no histogram on the upper side. I.e., ONLY the histogram on the right side for log(TPM).
Note: I am using psych package, scatter.hist function which seemed easy and nice to use, but couldn't find how to turn off one histogram.
Where flexibility ends, hacking begins. If you look at scatter.hist function, you will see that it is pretty basic usage of R base graphics. Following modified code would create the plot you want:
scat.hist <- function(x, y, xlab = NULL, ylab = NULL, title = "", ...) {
## Create layout
layout(matrix(c(1,2),1,2,byrow=TRUE), c(3,1), c(1,3))
## Plot scatter
par(mar=c(5,5,3,1))
plot(x= x, y = y, xlab = xlab, ylab = ylab, main = title, ...)
## Right histogram
yhist <- hist(y, plot = FALSE, breaks = 11)
par(mar=c(5,2,3,1))
mp <- barplot(yhist$density, space=0, horiz=TRUE, axes = FALSE)
## Density
d <- density(y, na.rm = TRUE, bw = "nrd", adjust = 1.2)
temp <- d$y
d$y <- (mp[length(mp)] - mp[1] + 1) * (d$x - min(yhist$breaks))/(max(yhist$breaks) - min(yhist$breaks))
d$x <- temp
lines(d)
}
Let's try it for the first row:
i = 1
scat.hist(x = seq_along(data[,i]), y = data[,i], col = c("red", "blue")[fs$p_Z[,i]+1], xlab = 'observations', ylab = 'log(TPM)', title = 'Mixture Plot')

Resources