R Getting numeric matrix from predict() - r

I have the following code:
fit_lm=lm(z~x+y)
mix <- 2
max <- 12
miy <- 2
may <- 12
griddf <- expand.grid(x = seq(mix,max, length.out = 10),
y = seq( miy,may,length.out = 10))
Prediction_data <- data.frame(griddf)
colnames(Prediction_data) <- c("x", "y")
coordinates(Prediction_data ) <- ~ x + y
terrain_lm <- predict(fit_lm, Prediction_data)
I want that terrain_lm is a numeric matrix, in such a way, that I can use
fig <- plot_ly()
fig <- fig %>% add_surface(terrain_lm)
but I get a 1d array with 100 elements.

The result of predict is a vector. You need to add it to the x and y values and then use xtabs to transform into a suitable matrix for a surface plot.
library(plotly)
#test data
x <- runif(20, 4, 10)
y <- runif(20, 3, 6)
z <- 3*x+y +runif(20, 0, 2)
fit_lm <- lm(z~x+y)
mix <- 2
max <- 12
miy <- 2
may <- 12
griddf <- expand.grid(x = seq(mix,max, length.out = 10),
y = seq( miy,may,length.out = 10))
terrain_lm <- data.frame(griddf)
terrain_lm$z <- predict(fit_lm, terrain_lm)
fig <- plot_ly(z = ~xtabs(z ~ x + y, data = terrain_lm))
fig <- fig %>% add_surface()

Related

Why does function return NULL?

A beginner in R over here, so apologies for the basic question.
Why does ATE return a null vector instead of saving the values of the difference of the means?
fun.cluster <- function(M, N){
set.seed(02139)
J <- 1:M # vector J_i
df <- as.data.frame(matrix(data=1:N, nrow = N, ncol = 1)) #data frame of all original values
df$cluster <- cut(df$V1, M, labels = 1:M) #breaking the dataframe into clusters
df$cluster <- as.numeric(df$cluster)
Y1 <- as.vector(sample(J, 5)) # assigning treatment
df$treatment <- ifelse(df$cluster %in% Y1, df$treatment <- 1, df$treatment <- 0)
#Inducing intracluster correlation:
mu_0j <- runif(n = 50, min = -1, max = 1)
df$V1[df$treatment==0] <- mu_0j
mu_1j <- runif(n=50, min = -0.5, max = 1.5)
df$V1[df$treatment==0] <- mu_1j
# drawing values
y_0i <- rnorm(n = 50, mean = mu_0j, sd = 1)
y_1i <- rnorm(n = 50, mean = mu_1j, sd = 1)
D_i <- as.vector(c(y_0i, y_1i))
# calculating ATE:
ATE[i] <- mean(y_1i - y_0i)
}
ATE <- c()
for(i in 1:10){
fun.cluster(M = 10, N = 100)
}

with ggplot in R

I need little help. I try to do plot with ggplot package. When I want to make plot, depends of more than 1 factor (for example here: plot changes when średnia1 and odchylenie1 change):
alpha = 0.05
N = 100
sample_l = 10
srednia1 = seq(-7, 7, by = 1)
odchylenie1 = seq(1, 10, by = 1)
srednia2 = 2
odchylenie2 = 2
prob = 0.7
params = expand.grid(sample_l, srednia1, odchylenie1, srednia2, odchylenie2, prob)
str(params)
names(params) = c("dlugość", "średnia1", "odchylenie1", "średnia2", "odchyelnie2", "prawdopodobienstwo")
set.seed(100)
now <- Sys.time()
powers <- sapply(1:nrow(params), function(p){
l <- params[p, 1]
par_1 <- c(params[p, 2],params[p, 3])
par_2 <- c(params[p, 4], params[p, 5])
p <- params[p,6]
p_sim <-sapply(rep(l, N), function(x){
my_sample <- rmix(l,"norm", par_1, "norm", par_2, p)
shapiro.test(my_sample)$p.value
})
mean(p_sim < alpha)
})
Sys.time() - now
power_df <- bind_cols(params, power = powers)
power_df %>% ggplot(aes(x = średnia1,
y = power,
col = factor(odchylenie1))) +
geom_line()
it work perfect, but now, when I want to make plot only depends of 1 factor - prob something goes wrong. I have error : Error: Aesthetics must be either length 1 or the same as the data (150): x, y. Here is a code:
alpha = 0.05
N = 100
sample_l = 10
srednia1 = 2
odchylenie1 = 2
srednia2 = 1
odchylenie2 = 1
prob = seq(0.1,0.9,by=0.1)
set.seed(100)
now <- Sys.time()
powers <- sapply(1:nrow(params), function(p){
l <- params[p, 1]
par_1 <- c(params[p, 2],params[p, 3])
par_2 <- c(params[p, 4], params[p, 5])
p <- params[p,6]
p_sim <-sapply(rep(l, N), function(x){
my_sample <- rmix(l,"norm", par_1, "norm", par_2, p)
shapiro.test(my_sample)$p.value
})
mean(p_sim < alpha)
})
Sys.time() - now
power_df <- bind_cols(params, power = powers)
power_df %>% ggplot(aes(x = prob, y = power)) + geom_line()
PLEASE HELP ME :(

Calculation in a loop in R

Consider the following data:
library(Benchmarking)
x <- c(2, 3, 8)
y <- c(1, 5, 10)
k <- 25
d <- data.frame(x,y,k)
x <- c(5, 2, 3, 4)
y <- c(3, 5, 9, 10)
k <- 30
d2 <- data.frame(x,y,k)
d3 <- replicate(3, rbind(d, d2[sample(seq_len(nrow(d2)), 3), , drop=FALSE]), simplify = FALSE)
So now I have a list of 3 data.frame. For each of these 3 data.frame I want to perform the following calculation:
e1 <- with(subset(d3[[1]], k == 25), dea(d3[[1]]$x, d3[[1]]$y, XREF = x, YREF = y))
e2 <- with(subset(d3[[1]], k == 30), dea(d3[[1]]$x, d3[[1]]$y, XREF = x, YREF = y))
we1 <- weighted.mean(eff(e1), d3[[1]]$y)
we2 <- weighted.mean(eff(e2), d3[[1]]$y)
va <- we2/we1
But instead of using that code three times, where I change [[1]] to [[2]] and [[3]], can I instead use a loop, where it does the calculation for all 3 data.frame and create a new data.frame where it list va for the three calculations?
We create a function and then loop through the list and apply the function
f1 <- function(dat) {
d1 <- subset(dat, k == 25)
e1 <- with(d1, dea(dat$x, dat$y, XREF = x, YREF = y))
d2 <- subset(dat, k == 30)
e2 <- with(d2, dea(dat$x, dat$y, XREF = x, YREF = y))
we1 <- weighted.mean(eff(e1), dat$y)
we2 <- weighted.mean(eff(e2), dat$y)
we2/we1
}
sapply(d3, f1)

IDW parameters in R

I want to perform IDW interpolation using R using the idw command from the gstat package. I have this data:
#settings
library(gstat)
library(dplyr)
library(sp)
library(tidyr)
id_rep <- rep(c(1,2), 20)
f <- rep(c(930,930.2), each=20)
perc <- rep(c(90, 80), each=10)
x <- sample(1:50, 40)
y <- sample(50:100, 40)
E <- runif(40)
df <- data.frame(id_rep, perc, x,y, f, E)
df_split <- split(df, list(df$id_rep, df$perc, df$f), drop = TRUE, sep="_")
#grid
x.range <- range(df$x)
y.range <- range(df$y)
grid <- expand.grid(x = seq(x.range[1], x.range[2], by=1),
y = seq(y.range[1], y.range[2], by=1))
coordinates(grid) <- ~x + y
#interpolation
lst_interp_idw <- lapply(df_split, function(X) {
coordinates(X) <- ~x + y
E_idw <- idw(E~ 1, X, grid, idp=1, nmax=3) %>% as.data.frame()
df_interp <- select(E_idw, x,y,E_pred=var1.pred)
df_interp
})
df_interp_idw <- bind_rows(lst_interp_idw, .id = "interact") %>%
separate(interact, c("id_rep", "perc", "f"), sep = "\\_")
Now I want to perform each run with different idp and nmax parameters within certain values​ (idp from 1 to 3 by 0.5, and nmax 3 to 6 by 1) and get out a data frame with columns for each combination of idp and nmax values. I try with two for loops but it doesn't work.
EDIT
the code that doesn't work is:
idp = seq(from = 1, to = 3, by = 0.5)
nmax = seq(from = 3, to = 6, by = 1)
...
for(i in idp) {
for(j in nmax)
{ E_idw= idw(E ~ 1, X, grid, nmax = i, idp = j)
}
}
...
Here is a way how to store the result of every iteration in a list.
#settings
#install.packages("gstat")
library(gstat)
library(dplyr)
library(sp)
library(tidyr)
id_rep <- rep(c(1,2), 20)
f <- rep(c(930,930.2), each=20)
perc <- rep(c(90, 80), each=10)
x <- sample(1:50, 40)
y <- sample(50:100, 40)
E <- runif(40)
df <- data.frame(id_rep, perc, x,y, f, E)
df_split <- split(df, list(df$id_rep, df$perc, df$f), drop = TRUE, sep="_")
#grid
x.range <- range(df$x)
y.range <- range(df$y)
grid <- expand.grid(x = seq(x.range[1], x.range[2], by=1),
y = seq(y.range[1], y.range[2], by=1))
coordinates(grid) <- ~x + y
# ==============================================
# NEW function
# ==============================================
idp = seq(from = 1, to = 3, by = 0.5)
nmax = seq(from = 3, to = 6, by = 1)
#interpolation
lst_interp_idw <- lapply(df_split, function(X) {
coordinates(X) <- ~x + y
df_interp <- vector(length(idp)*length(nmax), mode = "list" )
k <- 0
for(i in idp) {
for(j in nmax) {
print(paste(i, j))
# Iterator
k <- k + 1
E_idw= idw(E ~ 1, X, grid, nmax = i, idp = j) %>% as.data.frame()
df_interp[[k]] <- select(E_idw, x,y,E_pred=var1.pred)
}
}
return(df_interp)
})
# ==============================================
Some plausibility checks (lapply is applied to 8 list elements and 20 variations are calculated):
length(lst_interp_idw) # 8
length(lst_interp_idw[[1]]) #20
length(lst_interp_idw[[1]]) #20
It should be easy for you to adapt the last line of your code
df_interp_idw <- bind_rows(lst_interp_idw, .id = "interact") %>%
separate(interact, c("id_rep", "perc", "f"), sep = "\\_")
to format the output in the desired format. This highly depends on how you want to present the different interpolation alternatives.

Matrix version of rasterToPoints?

Anyone know of a non-raster method to achieve the following?
require(raster)
d = data.frame(rasterToPoints(raster(volcano)))
head(d)
x y layer
1 0.008196721 0.9942529 100
2 0.024590164 0.9942529 100
3 0.040983607 0.9942529 101
4 0.057377049 0.9942529 101
5 0.073770492 0.9942529 101
6 0.090163934 0.9942529 101
Cheers.
One way would be to use the row and col command:
library(raster)
data(volcano)
df <- data.frame(
x = as.vector(col(volcano)),
y = (yy <- as.vector(row(volcano)))[length(yy):1],
val = as.vector(volcano)
)
raster rescales the range to 0 - 1, if not specified differently, so we would to have to do this too:
## rescale
df$x <- with(df, (x - min(x)) / (max(x) - min(x)))
df$y <- with(df, (y - min(x)) / (max(y) - min(y)))
Finally lets check, that the results are the same:
## Using raster df1 <- data.frame(rasterToPoints(raster(volcano)))
cols <- colorRampPalette(c('white', "blue",'red')) df$col <-
cols(20)[as.numeric(cut(df$val, breaks = 20))] df1$col <-
cols(20)[as.numeric(cut(df1$layer, breaks = 20))]
par(mfrow = c(1, 2)) plot(df[, 1:2], col = df$col, pch = 20, main =
"matrix")
plot(df1[, 1:2], col = df1$col, pch = 20, main = "raster")
Note:
While the results appear the same visually, they are not. The resolution of the raster command is most likely different, and hence there are different nrows for df and df1.
Faster for large matrices:
data.frame(
x = rep(1:ncol(m), each=nrow(m)),
y = rep(nrow(m):1, ncol(m)),
val = as.vector(m)
)

Resources