Plotting huge number of lines in R - r

I have written a code to plot nearly 100000 lines in a graph with different colors depending on different conditions. The code is as following.
for(i in c(160000:260000)){
if(data[i,]$Char1 == 'A' & data[i,]$Char2 == 'S'){
if(data[i,]$Q1 < data[i,]$Q2){
lines(c(i,i),c(data[i,]$P + 2,data[i,]$P + 22),col="green")
}else{
lines(c(i,i),c(data[i,]$P - 2,data[i,]$P - 22),col="green")
}
}
if(data[i,]$Char1 == "B" & data[i,]$Char2 == 'S'){
lines(c(i,i),c(data[i,]$P + 2,data[i,]$P + 22),col='blue')
}
}
I have Plotted a normal graph before that. Which is
plot(data$P,type="l")
I ran the code and for more than 2-3 hours it kept on running till the point I stopped it. Is there any way to do this task easily and with less amount of time?

You may be able to save some computation time by not actually displaying the plot. Running
library(scales)
n <- 100000
m <- 20
system.time({
plot(0, 0, type = 'n', xlim = c(0, 10), ylim = c(0, 10), xlab = '', ylab = '')
for (i in 1:n) lines(sort(runif(m, max = 10)), sort(runif(m, max = 10)),
col = ifelse(i %% 10 == 0, 'red', alpha('lightblue', 0.1)),
lwd = 0.2)
})
vs.
system.time({
png('plot.png')
plot(0, 0, type = 'n', xlim = c(0, 10), ylim = c(0, 10), xlab = '', ylab = '')
for (i in 1:n) lines(sort(runif(m, max = 10)), sort(runif(m, max = 10)),
col = ifelse(i %% 10 == 0, 'red', alpha('lightblue', 0.1)),
lwd = 0.2)
dev.off()
})
gives
user system elapsed
44.415 0.704 45.435
vs.
user system elapsed
23.115 0.294 23.585
on my machine.
Update
Using CathG's answer brings down the computation time drastically when plotting lines:
n <- 100000
data <- data.frame(x0 = runif(n), y0 = runif(n), x1 = runif(n),
y1 = runif(n), col = 1:10)
system.time({
png('plot.png', 640, 640)
plot(0, 0, type = 'n', xlab = '', ylab = '', xlim = c(0, 1), ylim = c(0, 1))
for (i in 1:n) lines(data[i, c(1, 3)], data[i, c(2, 4)], col = data$col,
lwd = 0.1)
dev.off()
})
system.time({
png('plot.png', 640, 640)
plot(0, 0, type = 'n', xlab = '', ylab = '', xlim = c(0, 1), ylim = c(0, 1))
segments(data$x0, data$y0, data$x1, data$y1, col = data$col, lwd = 0.1)
dev.off()
})
gives
user system elapsed
119.682 0.822 121.525
vs.
user system elapsed
2.267 0.020 2.303

I think you should compute the different x and y (and color) first and then plot them all in one call with segments and I also think you should directly plot them using png for example and not in the window device:
data2 <- data[160000:260000, ]
data2$x0 <- data2$x1 <- 160000:260000
cond1 <- (data2$Char1=="A") & (data2$Char2 == "S") & (data2$Q1 < data2$Q2)
cond2 <- (data2$Char1=="A") & (data2$Char2 == "S") & (data2$Q1 >= data2$Q2)
cond3 <- (data2$Char1=="B") & (data2$Char2 == "S")
data2$y0[cond1] <- data2$P[cond1] + 2
data2$y0[cond2] <- data2$P[cond2] - 2
data2$y0[cond3] <- data2$P[cond3] + 2
data2$y1[cond1] <- data2$P[cond1] + 22
data2$y1[cond2] <- data2$P[cond2] - 22
data2$y1[cond3] <- data2$P[cond3] + 22
data2$color[cond1] <- "green"
data2$color[cond2] <- "green"
data2$color[cond3] <- "blue"
png("nameofyourfile.png")
plot(data$P,type="l")
segments(data2$x0, data2$y0, data2$x1, data2$y1, col=data2$color)
dev.off()

Related

Dot Plot include vertical line and dots of different colors

I needed to include in the code below, a vertical line,
for example, in position x = 5 and that all points smaller than 5 have another color,
for example blue.
The values of a variable can be read from the x-axis, and the y-axis shows the order of the observations in the variable (from bottom to top). Isolated points as the far ends, and on either side in a plot, suggest potentional outliers
Thanks
library(dplyr)
library(lattice)
n = 1000
df <- tibble(
xx1 = runif(n, min = 3, max = 10),
xx2 = runif(n, min = 3, max = 10),
xx3 = runif(n, min = 3, max = 10)
)
MyVar <- c("xx1","xx2","xx3")
MydotplotBR <- function(DataSelected){
P <- dotplot(as.matrix(as.matrix(DataSelected)),
groups=FALSE,
strip = strip.custom(bg = 'white',
par.strip.text = list(cex = 1.2)),
scales = list(x = list(relation = "same",tck = 1,
draw = TRUE, at=seq(0,10,1)),x=list(at=seq),
y = list(relation = "free", draw = FALSE),
auto.key = list(x =1)),
col=10,
axes = FALSE,
cex = 0.4, pch = 5,
xlim=c(0,10),
xlab = list(label = "Variable Value", cex = 1.5),
ylab = list(label = "Order of data in the file", cex = 1.5))
print(P)
}
(tempoi <- Sys.time())
Vertemp <- MydotplotBR(df[,MyVar])
(tempof <- Sys.time()-tempoi)
I find it weird that you want a color dependent only on the x-axis when values are also used on the y-axis of other plots.
Nevertheless, here's a homemade pairs_cutoff() function doing what you want.
pairs_cutoff <- function(data, cutoff, cols = c("red", "blue"),
only.lower = F, ...){
data <- as.data.frame(data)
cns <- colnames(data)
nc <- ncol(data)
layout(matrix(seq_len(nc^2), ncol = nc))
invisible(
sapply(seq_len(nc), function(i){
sapply(seq_len(nc), function(j){
if(i == j){
plot.new()
legend("center", bty = "n", title = cns[i], cex = 1.5, text.font = 2, legend = "")
} else {
if(j < i & only.lower)
plot.new()
else{
if(is.null(cutoff))
cols <- cols[1]
plot(data[,i], data[,j], col = cols[(data[,i] < cutoff) + 1],
xlab = cns[i], ylab = cns[j], ...)
}
}
})
})
)
}
Using your suggested data :
n = 1000
dat <- tibble(
xx1 = runif(n, min = 3, max = 10),
xx2 = runif(n, min = 3, max = 10),
xx3 = runif(n, min = 3, max = 10)
)
pairs_cutoff(dat, cutoff = 5, only.lower = T)
outputs the following plot :
You can specify extra parameters to the plot function (eg. pch) directly to pairs_cutoff.
Also, if you want the full symmetric grid of plots, set only.lower = F.

Margin for alluvial plot

I use this example
library(alluvial)
tit <- as.data.frame(Titanic)
# only two variables: class and survival status
tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum)
alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
gap.width=0.1, col= "steelblue", border="white",
layer = tit2d$Survived != "Yes" , cex.axis =8)
Pay attention I use cex.axis =8 and i get
Axis labels go beyond
I try to use par(mar=c(10, 10, 10, 10)) but no result
thanks for any idea
There is a bug in the source code of the alluvial function
The function sets par(mar=c(2,1,1,1)) hard coded so using the par() outside doesn't have any effect.
You could change locally the source code of the function to one of 2 options:
add an argument mar_ and pass the margin, and set at the right place par(mar=mar_).
just overwrite locally the line to the desired margins
I found the first option more appealing because you can then set the values from outside the function and optimise more easily.
The source code:
function (..., freq, col = "gray", border = 0, layer, hide = FALSE,
alpha = 0.5, gap.width = 0.05, xw = 0.1, cw = 0.1, blocks = TRUE,
ordering = NULL, axis_labels = NULL, cex = par("cex"), cex.axis = par("cex.axis"))
{
p <- data.frame(..., freq = freq, col, alpha, border, hide,
stringsAsFactors = FALSE)
np <- ncol(p) - 5
if (!is.null(ordering)) {
stopifnot(is.list(ordering))
if (length(ordering) != np)
stop("'ordering' argument should have ", np, " components, has ",
length(ordering))
}
n <- nrow(p)
if (missing(layer)) {
layer <- 1:n
}
p$layer <- layer
d <- p[, 1:np, drop = FALSE]
p <- p[, -c(1:np), drop = FALSE]
p$freq <- with(p, freq/sum(freq))
col <- col2rgb(p$col, alpha = TRUE)
if (!identical(alpha, FALSE)) {
col["alpha", ] <- p$alpha * 256
}
p$col <- apply(col, 2, function(x) do.call(rgb, c(as.list(x),
maxColorValue = 256)))
isch <- sapply(d, is.character)
d[isch] <- lapply(d[isch], as.factor)
if (length(blocks) == 1) {
blocks <- if (!is.na(as.logical(blocks))) {
rep(blocks, np)
}
else if (blocks == "bookends") {
c(TRUE, rep(FALSE, np - 2), TRUE)
}
}
if (is.null(axis_labels)) {
axis_labels <- names(d)
}
else {
if (length(axis_labels) != ncol(d))
stop("`axis_labels` should have length ", names(d),
", has ", length(axis_labels))
}
getp <- function(i, d, f, w = gap.width) {
a <- c(i, (1:ncol(d))[-i])
if (is.null(ordering[[i]])) {
o <- do.call(order, d[a])
}
else {
d2 <- d
d2[1] <- ordering[[i]]
o <- do.call(order, d2[a])
}
x <- c(0, cumsum(f[o])) * (1 - w)
x <- cbind(x[-length(x)], x[-1])
gap <- cumsum(c(0L, diff(as.numeric(d[o, i])) != 0))
mx <- max(gap)
if (mx == 0)
mx <- 1
gap <- gap/mx * w
(x + gap)[order(o), ]
}
dd <- lapply(seq_along(d), getp, d = d, f = p$freq)
rval <- list(endpoints = dd)
===============================================
===============Need to edit====================
op <- par(mar = c(2, 1, 1, 1))
===============================================
plot(NULL, type = "n", xlim = c(1 - cw, np + cw), ylim = c(0,
1), xaxt = "n", yaxt = "n", xaxs = "i", yaxs = "i", xlab = "",
ylab = "", frame = FALSE)
ind <- which(!p$hide)[rev(order(p[!p$hide, ]$layer))]
for (i in ind) {
for (j in 1:(np - 1)) {
xspline(c(j, j, j + xw, j + 1 - xw, j + 1, j + 1,
j + 1 - xw, j + xw, j) + rep(c(cw, -cw, cw),
c(3, 4, 2)), c(dd[[j]][i, c(1, 2, 2)], rev(dd[[j +
1]][i, c(1, 1, 2, 2)]), dd[[j]][i, c(1, 1)]),
shape = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0), open = FALSE,
col = p$col[i], border = p$border[i])
}
}
for (j in seq_along(dd)) {
ax <- lapply(split(dd[[j]], d[, j]), range)
if (blocks[j]) {
for (k in seq_along(ax)) {
rect(j - cw, ax[[k]][1], j + cw, ax[[k]][2])
}
}
else {
for (i in ind) {
x <- j + c(-1, 1) * cw
y <- t(dd[[j]][c(i, i), ])
w <- xw * (x[2] - x[1])
xspline(x = c(x[1], x[1], x[1] + w, x[2] - w,
x[2], x[2], x[2] - w, x[1] + w, x[1]), y = c(y[c(1,
2, 2), 1], y[c(2, 2, 1, 1), 2], y[c(1, 1),
1]), shape = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0),
open = FALSE, col = p$col[i], border = p$border[i])
}
}
for (k in seq_along(ax)) {
text(j, mean(ax[[k]]), labels = names(ax)[k], cex = cex)
}
}
axis(1, at = rep(c(-cw, cw), ncol(d)) + rep(seq_along(d),
each = 2), line = 0.5, col = "white", col.ticks = "black",
labels = FALSE)
axis(1, at = seq_along(d), tick = FALSE, labels = axis_labels,
cex.axis = cex.axis)
par(op)
invisible(rval)
}
I marked where the problem occur as:
================================================
==============Need to edit======================
op <- par(mar = c(2, 1, 1, 1))
================================================
After changing the line to par(mar=c(5, 5, 3, 10)) I got:

Code runs ok in R but fails in R-Fiddle, why?

I'm trying to run a piece of R code HERE on R-Fiddle with no success. The code runs very smoothly in R but doesn't run at all HERE on R-Fiddle?
Any advise is appreciated.
alt.hyp = function(N, d){
options(warn = -1) ; d = sort(d)
df = N - 1 ; d.SE = 1/sqrt(N) ; ncp.min = min(d)*sqrt(N) ; ncp.max = max(d)*sqrt(N)
min.d = d.SE*qt(1e-5, df, ncp.min) ; max.d = d.SE*qt(0.99999, df, ncp.max)
for(i in 1:length(d)){
H = curve(dt(d[i]*sqrt(N), df, x*sqrt(N)), min.d, max.d, n = 1e3, xlab = "Effect Size",
ylab = NA, ty = "n", add = i!= 1, bty = "n", yaxt = "n", font.lab = 2)
polygon(H, col = adjustcolor(i, .7), border = NA)
text(d[i], max(H$y), bquote(bolditalic(H[.(i-1)])), pos = 3, xpd = NA)
axis(1, at = d[i], col = i, col.axis = i, font = 2)
segments(d[i], 0, d[i], max(H$y), lty = 3)
}
}
# Example of use:
alt.hyp(N = 30, d = seq(0, 2, .5))
Looks like older version of R is used on the R fiddle.
Anyway, if I redo your script in old style, it works, see here. The only changes are replacement of assignment from = to <- and single statement per line.
Code
alt.hyp <- function(N, d) {
options(warn = -1)
d <- sort(d)
df <- N - 1
d.SE <- 1/sqrt(N)
ncp.min <- min(d)*sqrt(N)
ncp.max <- max(d)*sqrt(N)
min.d <- d.SE*qt(1e-5, df, ncp.min)
max.d <- d.SE*qt(0.99999, df, ncp.max)
for(i in 1:length(d)){
H <- curve(dt(d[i]*sqrt(N), df, x*sqrt(N)), min.d, max.d, n = 1e3, xlab = "Effect Size", ylab = NA, ty = "n", add = i!= 1, bty = "n", yaxt = "n", font.lab = 2)
polygon(H, col = adjustcolor(i, .7), border = NA)
text(d[i], max(H$y), bquote(bolditalic(H[.(i-1)])), pos = 3, xpd = NA)
axis(1, at = d[i], col = i, col.axis = i, font = 2)
segments(d[i], 0, d[i], max(H$y), lty = 3)
}
N
}
q <- alt.hyp(N = 30, d = seq(0, 2, .5))
print(q)
And the output in the R Fiddle

Fanplot in R with other package than fanplot

I generally create a fanplot like this:
n.ahead <- 10
m <- matrix(,nrow = 5000,ncol = 10)
library(fanplot)
m[,1] <- rnorm(5000,0.01,sd = 0.005)
m[,2] <- rnorm(5000,0.02,0.006)
m[,3] <- rnorm(5000,0.03,0.008)
m[,4] <- rnorm(5000,0.04,0.01)
m[,5] <- rnorm(5000,0.06,0.013)
m[,6] <- rnorm(5000,0.1,0.015)
m[,7] <- rnorm(5000,0.11,0.02)
m[,8] <- rnorm(5000,0.13,0.025)
m[,9] <- rnorm(5000,0.14,0.05)
m[,10] <- rnorm(5000,0.18,0.07)
n.ahead <- 10
fancol <- colorRampPalette(c('black', 'white'))
plot(
NULL, type = 'n', lwd = 3, col = 'black',
xlim = c(0, n.ahead), ylab = 'Y', ylim = c(0,max(m)), xlab = 'Year',
las = 1, xaxt = 'n', main = 'Y'
)
fan(
m, fan.col = fancol, ln.col = 'grey', txt = c('90','95', '99'),
anchor = 0, frequency = 1, probs = c(seq(1, 99, 1), 99.9, 99.95, 99.99), ln =
c(50, 90, 99, 99.9, 99.95, 99.99)
)
axis(1, at = 0:n.ahead, tcl = 0.5)
axis(1, at = seq(0, n.ahead, 0.25), labels = FALSE, tcl = 0.25)
It does the trick in the sense that a fanlplot is created. However, I don't really like it. Are there alternatives with dynamic charts like we see in dygraphs and rCharts I can use in Shiny?
Thanks.

SPI drought index [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 9 years ago.
Improve this question
I have the following data:
pcp # precipitation monthly data from 2001-01,2020-12
I have used the following to compute the SPI drought index
library(SPEI)
spi1 <- spi(pcp,1,kernel = list(type = "rectangular", shift = 0),distribution = "Gamma", fit = "ub-pwm", na.rm = FALSE,ref.start=NULL, ref.end=NULL, x=FALSE, params=NULL)
First question:
when I plot(spi1) I get SPEI on the Y axis which I don't want , what I want is SPI,
second:
how to plot each month separately for example when you call spi1 it will give you the index value for each month, and I want to plot it for each month
For the first answer, you can rewrite the function plot.spei
plot.spei <-
function (x, ...)
{
## label <- ifelse(as.character(x$call)[1] == "spei", "SPEI",
## "SPI")
ser <- ts(as.matrix(x$fitted[-c(1:x$scale), ]), end = end(x$fitted),
frequency = frequency(x$fitted))
ser[is.nan(ser - ser)] <- 0
se <- ifelse(ser == 0, ser, NA)
tit <- dimnames(x$coefficients)[2][[1]]
if (start(ser)[2] == 1) {
ns <- c(start(ser)[1] - 1, 12)
}
else {
ns <- c(start(ser)[1], start(ser)[2] - 1)
}
if (end(ser)[2] == 12) {
ne <- c(end(ser)[1] + 1, 1)
}
else {
ne <- c(end(ser)[1], end(ser)[2] + 1)
}
n <- ncol(ser)
if (is.null(n))
n <- 1
par(mar = c(4, 4, 2, 1) + 0.1)
if (n > 1 & n < 5)
par(mfrow = c(n, 1))
if (n > 1 & n >= 5)
par(mfrow = c({
n + 1
}%/%2, 2))
for (i in 1:n) {
datt <- ts(c(0, ser[, i], 0), frequency = frequency(ser),
start = ns, end = ne)
datt.pos <- ifelse(datt > 0, datt, 0)
datt.neg <- ifelse(datt <= 0, datt, 0)
plot(datt, type = "n", xlab = "", main = tit[i], ...)
if (!is.null(x$ref.period)) {
k <- ts(5, start = x$ref.period[1, ], end = x$ref.period[2,
], frequency = 12)
k[1] <- k[length(k)] <- -5
polygon(k, col = "light grey", border = NA, density = 20)
abline(v = x$ref.period[1, 1] + (x$ref.period[1,
2] - 1)/12, col = "grey")
abline(v = x$ref.period[2, 1] + (x$ref.period[2,
2] - 1)/12, col = "grey")
}
grid(col = "black")
polygon(datt.pos, col = "blue", border = NA)
polygon(datt.neg, col = "red", border = NA)
lines(datt, col = "dark grey")
abline(h = 0)
points(se, pch = 21, col = "white", bg = "black")
}
}
And then use the ylab parameters
plot(spi1, ylab = "SPI")
If you want to plot it separately, you can extract the fitted value of class ts and apply basic plotting for time series object in R.
par(mfrow = c(3, 4))
listofmonths <- split(fitted(spi1), cycle(fitted(spi1)))
names(listofmonths) <- month.abb
require(plyr)
l_ply(seq_along(listofmonths), function(x) {
plot(x = seq_along(listofmonths[[x]]), y = listofmonths[[x]],
type = "l", xlab = "", ylab = "SPI")
title(names(listofmonths)[x])
})
You can also try these types of plot
monthplot(fitted(spi1), labels = month.abb, cex.axis = 0.8)
boxplot(fitted(spi1) ~ cycle(fitted(spi1)), names = month.abb, cex.axis = 0.8)

Resources