Execute function multiple times and create matrix with results - r

The following code takes vector V1 and creates one bootstrapped sample called BV1. I would like to run that i times and place all the BVi vectors in matrix MV. Using a function of the apply family rather than a for loop if possible.
V1 <- c(0.18, 0.2, 0.24, 0.35, -0.22, -0.17, 0.28, -0.28, -0.14, 0.03, 0.87, -0.2, 0.06, -0.1, -0.72, 0.18, 0.01, 0.31, -0.36, 0.61, -0.16, -0.07, -0.13, 0.01, -0.09, 0.26, -0.14, 0.08, -0.62, -0.2, 0.3, -0.21, -0.11, 0.05, 0.06, -0.28, -0.27, 0.17, 0.42, -0.05, -0.15, 0.05, -0.07, -0.22, -0.34, 0.16, 0.34, 0.1, -0.12, 0.24, 0.45, 0.37, 0.61, 0.9, -0.25, 0.02)
BV1 <- sample(V1, length(V1), replace=TRUE)
I will then use that matrix to calculate a distribution of the bootstrapped summary statistics. Thanks for your help.

We can use replicate to repeat the sample 'n' times and output as a matrix.
replicate(4, sample(V1, length(V1), replace=TRUE))
If we look at replicate
function (n, expr, simplify = "array")
sapply(integer(n), eval.parent(substitute(function(...) expr)),
simplify = simplify)
it uses sapply (so the OP's need for apply family of functions is covered)

Related

Raytracing many objects and exporting shaded ground as a raster

I have models consisting of thousands of cylinders which are stored in a data frame with start-coordinates, end-coordinates, lengths and radii. I want to simulate the shading they would create in the real world, given a certain position of a light source. As a result, I would like to have a raster which contains the information whether the ground is shaded or not (on the xy-plane). Is there a way to do this in R? Even when handling several thousand cylinder objects at the same time?
Here a mockup for a single cylinder:
I usually draw my cylinders with the rgl package, but it would be also okay if I have to use another package. I figured I might be able to use the packages rayrender or raytracing, but I don't know to export the shaded ground from the view to an array or raster.
Edit: Code for creating & plotting some cylinders:
library(rgl)
# some cylinders
cylinder <- structure(list(radius = c(0.01, 0.01, 0.01, 0.02, 0.01, 0.01,
0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
0.01, 0.01), length = c(0.07, 0.13, 0.08, 0.08, 0.1, 0.08, 0.09,
0.08, 0.07, 0.15, 0.02, 0.09, 0.12, 0.12, 0.08, 0.26, 0.1, 0.09,
0.08, 0.02, 0.12, 0.11, 0.08, 0.06, 0.06, 0.19, 0.05, 0.1, 0.09,
0.09), start_X = c(0.62, 0.61, 0.62, 0.63, 0.64, 0.65, 0.65,
0.63, 0.63, 0.63, 0.63, 0.64, 0.63, 0.69, 0.79, 0.81, 0.92, 0.97,
1.03, 1.07, 1.08, 1.15, 1.24, 1.3, 1.34, 0.61, 0.5, 0.47, 0.4,
0.37), start_Y = c(0.13, 0.11, 0.09, 0.09, 0.09, 0.08, 0.09,
0.08, 0.07, 0.08, 0.07, 0.07, 0.07, 0.05, 0.02, 0.04, 0.02, 0.01,
0.04, 0.05, 0.05, 0.1, 0.15, 0.19, 0.22, 0.07, 0.13, 0.16, 0.17,
0.26), start_Z = c(361, 361.07, 361.2, 361.29, 361.36, 361.46,
361.54, 361.62, 361.7, 361.77, 361.9, 361.92, 361.78, 361.88,
361.98, 362.04, 362.26, 362.35, 362.39, 362.46, 362.48, 362.56,
362.62, 362.65, 362.66, 361.76, 361.91, 361.95, 362.01, 362.07
), axis_X = c(-0.09, 0.05, 0.12, 0.14, 0.1, -0.03, -0.15, -0.07,
-0.07, -0.2, 0.52, -0.62, 0.43, 0.54, 0.16, 0.35, 0.53, 0.43,
0.76, 0.58, 0.63, 0.74, 0.66, 0.56, 0.79, -0.61, -0.65, -0.64,
-0.33, -0.7), axis_Y = c(-0.12, -0.09, -0.01, -0.08, -0.08, 0.01,
-0.11, -0.14, -0.04, -0.06, 0.06, 0.59, -0.14, -0.14, 0.38, -0.22,
0.1, 0, 0.14, 0.15, 0.47, 0.45, 0.46, 0.67, 0.48, 0.28, 0.2,
0, 0.55, 0.16), axis_Z = c(0.99, 0.99, 0.99, 0.99, 0.99, 1, 0.98,
0.99, 1, 0.98, 0.85, 0.53, 0.89, 0.83, 0.91, 0.91, 0.84, 0.9,
0.64, 0.8, 0.61, 0.5, 0.59, 0.48, -0.39, 0.74, 0.74, 0.77, 0.77,
0.69)), row.names = c(NA, 30L), class = "data.frame")
# calculate end points of cylinders
# (cylinders have starting coordinates, a length and a direction as unit vector)
cylinder$end_X = cylinder$start_X + cylinder$axis_X * cylinder$length
cylinder$end_Y = cylinder$start_Y + cylinder$axis_Y * cylinder$length
cylinder$end_Z = cylinder$start_Z + cylinder$axis_Z * cylinder$length
# prepare cylinders
cylinder_list <- lapply(1:nrow(cylinder), function(i) {
cyl <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
cyl
})
# plot cylinders
open3d()
par3d(windowRect = c(50,50,650, 650))
shade3d(shapelist3d(cylinder_list, plot = FALSE), color = "blue")
I would like the light source to be a point source from infinite distance, as I would like to simulate sunlight. As the sun is so far away, I would just assume the light beams to be parallel to each other.
I believe this does what you want:
library(rgl)
# some cylinders
# ... deleted ...
# cylinder$start_Z <- cylinder$start_Z - 361.5
# calculate end points of cylinders
# (cylinders have starting coordinates, a length and a direction as unit vector)
cylinder$end_X = cylinder$start_X + cylinder$axis_X * cylinder$length
cylinder$end_Y = cylinder$start_Y + cylinder$axis_Y * cylinder$length
cylinder$end_Z = cylinder$start_Z + cylinder$axis_Z * cylinder$length
# prepare cylinders
cylinder_list <- lapply(1:nrow(cylinder), function(i) {
cyl <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
cyl
})
cylinder_list <- shapelist3d(cylinder_list, plot = FALSE)
# plot cylinders
open3d()
#> glX
#> 1
par3d(windowRect = c(50,50,650, 650))
# shade3d(cylinder_list, color = "blue")
# Suppose Sun is in direction xyz
sun <- c(0,0,1) # straight up
# Get a matrix that projects sun to c(0,0,0),
# and leaves anything with z = 0 alone
M <- rbind( cbind( diag(2), c(-sun[1]/sun[3], -sun[2]/sun[3])), 0)
# Replot the shadows of the cylinders
shadows <- transform3d(cylinder_list, t(M))
shade3d(shadows, col = "gray", lit = FALSE)
# decorate3d() # Add axes
par3d(userMatrix = diag(4)) # Display flat.
lowlevel() # And show it in reprex
# Use snapshot3d() to get PNG file containing the final image
Created on 2022-07-12 by the reprex package (v2.0.1)
It should include your definition of the cylinders dataframe, but I left that out to make it easier to read.
If you want to show the cylinders as well as the shadows, uncomment the shade3d() call on cylinder_list. You'll probably also want to make the Z values smaller; I subtracted 361.5 to put them near zero. If you don't do this, the graph will be extremely long and thin, and you won't be able to see anything.
The idea of the code is to project the sun vector to c(0,0,0), while leaving anything with Z==0 alone. This flattens the cylinders into objects that fit in the Z == 0 plane. Setting the userMatrix to the identity matrix then displays the shadow. Use the snapshot3d() function to make this into a raster image (in a PNG file).

How to fill the area under/above the curve with different colors above and below 0?

I would like to plot a time series of the Standardized Precipitation Index (SPI). Normally this looks somewhat like this:
You can see that the area under/above the curve is colored in blue/red. This is what I would like to plot too.
I know that there were kind of similar questions, like this and that one. This could bring be a bit further, but unfortunately not to the final result yet.
To make it easier to understand, here is some code so that everybody can reproduce it:
library(data.table)
library(ggplot2)
vec1 <- 1:310
vec2 <- c(1.78, 1.88, 1.10, 0.42, 0.73, 1.35, 1.34, 0.54, 0.20, 0.72, 1.29, 1.78, 1.30, 1.37, -0.13, 0.64, -0.13, 0.87, 0.47, -0.26, -0.27,
-0.81, -0.54, -0.77, -0.29, -0.22, -0.05, 0.41, 0.45, 0.91, -0.31, 0.67, 0.28, 0.93, 0.43, -0.04, -0.80, -1.20, -0.73, -0.98, 0.47, -0.01,
1.30, 1.45, 0.72, -0.59, -1.14, -0.33, 0.22, 0.49, 0.58, 0.36, 0.66, 0.64, 0.47, -0.60, 1.01, 1.50, 1.18, 0.82, 0.02, 0.57, 0.25,
1.20, 1.19, 0.71, -0.30, -1.37, -1.50, -1.03, -0.77, -1.08, -1.92, -2.32, -2.46, -1.61, -0.39, 0.67, 0.38, 0.62, -0.34, 0.01, -0.55, -0.74,
-1.95, -1.18, -0.96, 0.36, -0.96, -1.28, -2.29, -2.67, -0.65, -0.13, 0.61, 0.21, 0.57, 0.11, 0.37, 0.20, -0.14, -0.87, -0.84, 0.87, 1.33,
0.45, -0.76, -1.27, -0.65, -0.29, 0.54, 0.14, -0.55, -0.94, -0.98, -0.44, -0.37, 0.72, 0.70, 0.95, 0.89, 1.10, 1.51, 1.11, 1.77, 1.20,
1.23, -0.72, -1.43, -2.11, -1.37, -0.80, -0.34, -0.14, 0.22, -0.65, -0.44, -0.86, -0.46, -0.67, -0.91, -0.40, -0.09, 0.22, 0.96, 0.71, 0.51,
-1.61, -1.62, -1.43, -0.27, 1.08, 1.76, 1.30, 0.78, 1.02, 1.01, 0.56, -0.32, 0.37, 0.31, 1.36, 1.49, 1.42, 0.78, -0.19, 0.64, 0.39,
0.47, -1.13, -1.45, -0.52, 0.43, -0.19, -0.97, -0.27, 0.63, 1.01, 1.01, 0.83, -0.56, -1.71, -0.29, 1.06, 1.82, 1.28, 0.88, 1.08, 1.78,
1.47, 0.74, -0.34, 0.14, 1.09, 1.49, 1.30, 0.28, -0.25, 0.24, -0.33, 0.05, -0.86, -0.69, -1.03, -0.59, 0.32, 0.61, 0.84, -0.18, -0.67,
0.46, 0.31, -0.72, -2.26, -2.85, -0.69, -0.77, 0.64, -1.49, -1.69, -1.55, -0.28, -0.80, -1.15, -0.38, 0.31, 0.18, -0.27, -0.84, -0.94, -1.23,
-0.53, -1.52, -0.73, -0.93, 0.25, -0.11, 0.38, 0.48, 0.10, -0.02, 0.26, 1.39, 1.61, 0.83, 0.09, 0.95, 1.07, 0.77, 0.23, 0.26, 0.85,
0.93, 0.91, 1.10, 0.47, 0.74, 1.42, 1.17, 0.32, -0.40, 0.76, 1.44, 1.69, 1.03, 0.01, 0.46, 0.61, 0.60, -0.09, -0.31, -0.96, -0.91,
-0.06, 0.75, 1.32, 1.29, 0.55, 0.43, -1.25, 0.12, -0.05, 0.18, -0.77, -2.19, -1.85, -2.12, -1.51, -1.14, -0.79, -0.82, -1.13, -1.72, -2.14,
-1.95, -0.63, 0.70, 0.64, 0.17, -1.04, -0.58, -0.57, -0.57, -1.05, -1.11, -0.59, -0.07, 1.22, 0.30, -0.15)
df <- data.frame(vec1, vec2)
colnames(df) <- c("ID", "SPI")
df = as.data.table(df)
There you have the entire SPI time series, one value for each month between 1980 and 2005.
So far I came to that result:
ggplot(data = df, aes(x = ID, y = SPI)) +
geom_col(data = df[SPI <= 0], fill = "red") +
geom_col(data = df[SPI >= 0], fill = "blue") +
theme_bw()
When you run the code you will see the following plot:
This is going into the right direction, but the small white gaps between values in the first half are disturbing and not supposed to be there. So there must be something wrong.
It is supposed to look like this, just with the two colors blue and red for the positive and negative values:
ggplot(data = df, aes(x = ID, y = SPI)) +
geom_area() +
theme_bw()
The code leads to this image:
You can see that there are no white gaps between the single values and I have absolutely no idea what leads to these errors.
Anybody with an idea how to solve that?
OP, what you are observing are the artifacts related to the resolution of your graphics driver and the space between columns. The areas you show are composed of many filled columns next to one another on the x axis. You do not specify the width= argument for geom_col(), so the default value leaves a space between the individual values on the x axis. It's best to illustrate if we take only a section of your data along the x axis:
ggplot(data = df, aes(x = ID, y = SPI)) +
geom_col(data = df[SPI <= 0], fill = "red") +
geom_col(data = df[SPI >= 0], fill = "blue") +
theme_bw() +
xlim(0,100) # just the first part on the left
There's your white lines - it's the space bewtween the columns. When you have the larger picture, the appearance of the white lines has to do with the resolution of your graphics device. You can test this if you save your graphic with ggsave() using different parameters for dpi=. For example, on my computer saving ggsave('filename.png', dpi=72) gives no lines, but ggsave('filename.png', dpi=600) shows the white lines in places.
There's an easy solution to this though, which is to specify the width= argument of geom_col() to be 1. Be default, it's set to 0.75 or 0.8 (not exactly sure), which leaves a gap between the next value (fills ~75 or 80% of the space). If you set this to 1, it fills 100% of the space allotted for that column, leaving no white space in-between:
ggplot(data = df, aes(x = ID, y = SPI)) +
geom_col(data = df[SPI <= 0], fill = "red", width=1) +
geom_col(data = df[SPI >= 0], fill = "blue", width=1) +
theme_bw() +
xlim(0,100)
What if you change the distance between columns?
Just added "width=1" inside "aes" of ggplot.
ggplot(data = df, aes(x = ID, y = SPI, width=1)) +
geom_col(data = df[SPI <= 0], fill = "red") +
geom_col(data = df[SPI >= 0], fill = "blue") +
theme_bw()
I get this:
image.result

Calculate derivative of Cumulative Distribution (CDF) to get Probability Density (PDF)

The following code calculates the Cumulative Distribution function (CDF) for vector VP. I would like to use the CDF to get the Probability Density function (PDF). In other words, I need to calculate the derivative of CDF. How can I do that in R?
VP <- c(0.36, 0.3, 0.36, 0.47, 0, 0.05, 0.4, 0, 0, 0.15, 0.89, 0.03,
0.45, 0.21, 0, 0.18, 0.04, 0.53, 0, 0.68, 0.06, 0.09, 0.58, 0.03,
0.23, 0.27, 0, 0.12, 0.12, 0, 0.32, 0.07, 0.04, 0.07, 0.39, 0, 0.25,
0.28, 0.42, 0.55, 0.04, 0.07, 0.18, 0.17, 0.06, 0.39, 0.65, 0.15,
0.1, 0.32, 0.52, 0.55, 0.71, 0.93, 0, 0.36)
set.seed(0)
CF <- round(sapply(1:1000, function(i) sample(VP, length(VP), replace=TRUE)),2)
Breaks <- c(max(CF,1.0), 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0)
CDF <- round(sapply(Breaks, function(b) sum(CF<=b)/length(CF)),2)
diff is the discrete difference operator, so I think you're looking for
diff(CDF)/diff(Breaks)
note that this vector will be one shorter than the original CDF and Breaks vectors
you might have to do something about reversing your CDF and Breaks vectors to get sensible results ...
You can also try the empirical cdf function:
CDF <- ecdf(VP)
and the histogram function can also provide a sample density function
PDF <- hist(VP, freq=F)
Have a look at PDF$counts and PDF$breaks.

Error with hist function (need finite 'ylim' values) (breaks related)

Could anyone tell why running hist on matrix CHh gives error "Error in plot.window(xlim, ylim, "", ...) : need finite 'ylim' values"? If I eliminate the min function the error disappears. Yet I don't understand why that represents a problem. Thank you.
CFh <-structure(c(-0.64, 0.34, 0.65, 0.26, -0.64, 0.92, -0.64, -0.1, -0.41, -0.36, 0.16, 0.92, 1.43, -0.41, 0.65, 0.28, 0.47, 0.35, -0.54, 0.65, 0.28, -0.1, 0.92, -0.36, 0.25, 0.34, -0.34, 0.07, 0.65, 0, -0.04, 0.47, 0.78, 0.47, 1.43, -0.23, -0.41, 0.28, 0.62, 0.35, -0.34, -0.23, -0.36, 0.28, 0.26, 0.03, 0.28, 0.07, 0.47, 0.63, 0.35, 0.47, 0, -0.28, 0.34, 0.16, 0.62, -0.04, 0.03, -0.41, -0.34, -0.64, -0.32, -0.28, -0.04, -0.36, 0.34, 0.47, 0.63, 0.62, 0, -0.04, -0.23, 0.65, -0.04, 0.47, -0.64, 0, -0.34, 0.28, -0.1, -0.28, 0.35, -0.34, -0.04, 0.63, 0.92, 0.35, 0.25, 0.34, 0.25, 0.34, 0.16, -0.36, 0, 0.28, 0.28, -0.28, -0.34, -0.23, 0.78, -0.41, 0.65, -0.32, -0.54, -0.36, 0.92, 0.25, 0.47, -0.1, 0.78, -0.54, 0.63, 0.65, -0.28, 0.25, 0.07, 0.35, 0.62, -0.28, -0.36, -0.54, 0.47, 0.47, 1.43, 0.63, -0.28, 0.03, 0.92, 0.92), .Dim = c(26L, 5L))
Breaks <- c(max(CFh,1.0), 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0,
-0.1, -0.2, -0.3, -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0, min(CFh,-1.0))
h <- hist(CFh, plot=TRUE, breaks=Breaks)
You have repeated values in your Breaks vector. This causes a problem with binning. Make sure the values are unique
h <- hist(CFh, plot=TRUE, breaks=unique(Breaks))
Continuing on from the above solution,
using unique breaks,
you could also consider a simpler way to create breaks as a sequence
Breaks <- unique(c(max(CFh,1.0), seq(1, -1, by=-0.1), min(CFh,-1.0)))
h <- hist(CFh, plot=TRUE, breaks=Breaks)

Make table/matrix of probability densities and associated breaks

The following code takes vector V1, and bootstraps 10000 times a randomized rnomal sample made out of V1, creating with the results a matrix with 10000 columns. It then creates a histogram for that matrix.
V1 <- c(0.18, 0.2, 0.24, 0.35, -0.22, -0.17, 0.28, -0.28, -0.14, 0.03, 0.87, -0.2, 0.06, -0.1, -0.72, 0.18, 0.01, 0.31, -0.36, 0.61, -0.16, -0.07, -0.13, 0.01, -0.09, 0.26, -0.14, 0.08, -0.62, -0.2, 0.3, -0.21, -0.11, 0.05, 0.06, -0.28, -0.27, 0.17, 0.42, -0.05, -0.15, 0.05, -0.07, -0.22, -0.34, 0.16, 0.34, 0.1, -0.12, 0.24, 0.45, 0.37, 0.61, 0.9, -0.25, 0.02)
xxx <- round(sapply(1:10000, function(i) rnorm(length(V1), mean=sample
(V1, length(V1), replace=TRUE), sd(V1))),2)
h <- hist(xxx, plot=T)
I would like to create a printout of its probability density function in matrix or table format, i.e. get a matrix with 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0, -0,1 -0.2, -0.3, -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0 as breaks, and the associated probability densities on the next column.
My problems are two. First, specifying the breaks I want fails. Secondly, making a matrix with h$breaks and h$density also fails. Any insights would be much appreciated. Thank you.
#This works, but I want to specify the breaks
h <- hist(xxx, plot=T, breaks=20)
#This gives error "some 'x' not counted; maybe 'breaks' do not span range of 'x'"
h <- hist(xxx, plot=T, breaks=c(1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0, -0,1 -0.2, -0.3, -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0))
#This gives error number of rows of result is not a multiple of vector length
ddd<- cbind(h$breaks, h$density)
At first you have a type error, you confused “.” and “,”.
At second the breaks must span the whole range of observations like this:
h <- hist(xxx, plot=F, breaks=c(max(xxx),1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0, -0.1, -0.2, -0.3, -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0,min(xxx)))
And at last, you want to pick “mids” instead of breaks:
ddd<- cbind(h$mids, h$density)

Resources