Demography package issue with aggregating data - r

# Function to construct a mortality demogdata object from HMD
hmd.mx <- function(country, username, password, label=country){
path <- paste("https://www.mortality.org/hmd/", country, "/STATS/", "Mx_1x1.txt", sep = "")
}
userpwd <- paste(username, ":", password, sep = "")
txt <- RCurl::getURL(path, userpwd = userpwd)
con <- textConnection(txt)
mx <- try(utils::read.table(con, skip = 2, header = TRUE, na.strings = "."),TRUE)
close(con)
if(class(mx)=="try-error")
stop("Connection error at www.mortality.org. Please check username, password and country label.")
path <- paste("https://www.mortality.org/hmd/", country, "/STATS/", "Exposures_1x1.txt", sep = "")
userpwd <- paste(username, ":", password, sep = "")
txt <- RCurl::getURL(path, userpwd = userpwd)
con <- textConnection(txt)
pop <- try(utils::read.table(con, skip = 2, header = TRUE, na.strings = "."),TRUE)
close(con)
if(class(pop)=="try-error")
stop("Exposures file not found at www.mortality.org")
obj <- list(type="mortality",label=label,lambda=0)
obj$year <- sort(unique(mx[, 1]))
#obj$year <- ts(obj$year, start=min(obj$year))
n <- length(obj$year)
m <- length(unique(mx[, 2]))
obj$age <- mx[1:m, 2]
obj$rate <- obj$pop <- list()
for (i in 1:n.mort)
{ obj$rate[[i]] <- matrix(mx[, i + 2], nrow = m, ncol = n)
obj$rate[[i]][obj$rate[[i]] < 0] <- NA
obj$pop[[i]] <- matrix(pop[, i + 2], nrow = m, ncol = n)
obj$pop[[i]][obj$pop[[i]] < 0] <- NA
dimnames(obj$rate[[i]]) <- dimnames(obj$pop[[i]]) <- list(obj$age, obj$year)
}
names(obj$pop) = names(obj$rate) <- tolower(mnames)
obj$age <- as.numeric(as.character(obj$age))
if (is.na(obj$age[m])) {
obj$age[m] <- 2 * obj$age[m - 1] - obj$age[m - 2] }
return(structure(obj, class = "demogdata"))
}
Above is the code that we are using to import our population data into r.
NLdata <- hmd.mx(country = "NLD",username = "username",password="password")
This would be the specific code to obtain the Dutch data.
Would anyone happen to know how to add multiple countries into one, and put that data into one dataframe (same format as the demography data packages that we download)? So for example the mortality rates for the (Netherlands + France + Norway) / 3 into one package.

You can try this code. However I could not run your demography package. So you might need to edit the code a bit. Perhaps someone else can fill in the second part? I saw that no one has reacted yet.
C1 <- data.frame(Year = 1980:2018, value1 = rnorm(39), value2 = rnorm(39), Cat =rbinom(39,1,0.5), Country = "France")
C2 <- data.frame(Year = 1980:2018, value1 = rnorm(39), value2 = rnorm(39), Cat =rbinom(39,1,0.5),Country = "England")
C3 <- data.frame(Year = 1970:2018, value1 = rnorm(49), value2 = rnorm(49), Cat =rbinom(49,1,0.5),Country = "Netherlands")
C1 <- split(C1, C1$Cat)
C2 <- split(C2, C2$Cat)
C3 <- split(C3, C3$Cat)
list_all <- list(rbind(C1[[1]],C2[[1]],C3[[1]]),rbind(C1[[2]],C2[[2]],C3[[2]]))
Final_list <- lapply(list_all, function(x) x %>% group_by(Year) %>% summarise(Val1 = mean(value1), Val2 = mean(value2), Country = "All") %>% as.data.frame)

Related

How can i start this code found on github?

I'm following this code on github and in line 51 i have a problem with option[i,]<- skew.raw why? Said: object "i" not found. Why? What should i put?
It also fails to take values as after starting the get.option function I have NA values.
# Define function for formating/retrieving options data from json obj
get.options = function(symbols, date){
options = matrix(ncol = 11, nrow = length(symbols))
colnames(options) = c('Cl_price', "call_strike",
"call_lastPrice","call_vol","call_openInt", "call_ImpVoli",
"put_strike","put_lastPrice", 'put_vol',"put_openInt", 'put_ImpVoli')
rownames(options) = symbols
for(u in 1:length(symbols)){
s = symbols[u]
d = as.numeric(as.POSIXct(date, origin = '1970-01-01', tz = 'GMT'))
json_file <- sprintf('https://query2.finance.yahoo.com/v7/finance/options/%s?
date=%d&formatted=true&crumb=UNus6VhY1bn&lang=en-US&region=US&corsDomain=finance.yahoo.com',s,d)
json_data <- suppressWarnings(fromJSON(paste(readLines(json_file), collapse = "")))
# CALLS
n = length(json_data$optionChain$result[[1]]$options[[1]]$calls)
if (n < 1) next
calls = matrix(ncol = 6, nrow = n)
for(i in 1:n) calls[,2][i] = json_data$optionChain$result[[1]]$options[[1]]$calls[[i]]$strike$raw
Cl.price = json_data$optionChain$result[[1]]$quote$regularMarketPrice
x <- which.min(abs((calls[,2]/Cl.price) -1))
calls = calls[x,]
calls[1] = Cl.price
calls[3] = json_data$optionChain$result[[1]]$options[[1]]$calls[[x]]$lastPrice$raw
calls[4] = json_data$optionChain$result[[1]]$options[[1]]$calls[[x]]$volume$raw
calls[5] = json_data$optionChain$result[[1]]$options[[1]]$calls[[x]]$openInterest$raw
calls[6] = json_data$optionChain$result[[1]]$options[[1]]$calls[[x]]$impliedVolatility$raw
# PUTS
n = length(json_data$optionChain$result[[1]]$options[[1]]$puts)
if(n < 1) next
puts = matrix(ncol = 5, nrow = n)
for(i in 1:n) puts[,1][i] = json_data$optionChain$result[[1]]$options[[1]]$puts[[i]]$strike$raw
x <- which.min(abs((puts[,1]/Cl.price) - 0.95))
puts = puts[x,]
puts[2] = json_data$optionChain$result[[1]]$options[[1]]$puts[[x]]$lastPrice$raw
puts[3] = json_data$optionChain$result[[1]]$options[[1]]$puts[[x]]$volume$raw
puts[4] = json_data$optionChain$result[[1]]$options[[1]]$puts[[x]]$openInterest$raw
puts[5] = json_data$optionChain$result[[1]]$options[[1]]$puts[[x]]$impliedVolatility$raw
options[u,] = c(calls, puts)
}
return(options)
}
# Define stocks and gather options data
date = '2017-04-21'
symbols <- c('DIS','CAT','TSLA')
daily.options = as.data.frame(get.options(symbols, date))
which(is.na(daily.options))
skew.raw = daily.options$put_ImpVoli - daily.options$call_ImpVoli # SKEW(i,t)
options[i,] <- skew.raw
write.table(options, 'DISCATTSLA', sep = ",")
options = read.table('DISCATTSLA', sep = ",")**
I’m following this code because I read the paper by Rhui Zhao but in the paper I did not talk about how to implement the skew volatility on a software and then I was able to find this code on github.

multiple data frames with similar names

I needed to generate array or many data frames from other data frames which only varied in names. This required me to do a lot of copy-paste works. Is it possible that I can make it cleaner but not keep copying and pasting? Follows are two examples from many similar cases of the analysis I am doing now (I will provide codes for reproduction at the end of the question), which I think may be able to make them cleaner with the same approach.
case 1, create an array with data from per_d1,per_d1,per_d3,per_d4,per_d5
perd <- array(dim=c(7,15,5))
perd [,,1] <- as.matrix(per_d$per_d1)
perd [,,2] <- as.matrix(per_d$per_d2)
perd [,,3] <- as.matrix(per_d$per_d3)
perd [,,4] <- as.matrix(per_d$per_d4)
perd [,,5] <- as.matrix(per_d$per_d5)
case 2, create multiple data frames from data with similar names.
dataplot <- dfmak (per_d$per_d1,ge$per_d1$g1,ge$per_d1$g2,ge$per_d1$g3,ge$per_d1$g4,ge$per_d1$g5)
dataplot2 <- dfmak (per_d$per_d2,ge$per_d2$g1,ge$per_d2$g2,ge$per_d2$g3,ge$per_d2$g4,ge$per_d2$g5)
dataplot3 <- dfmak (per_d$per_d3,ge$per_d3$g1,ge$per_d3$g2,ge$per_d3$g3,ge$per_d3$g4,ge$per_d3$g5)
dataplot4 <- dfmak (per_d$per_d4,ge$per_d4$g1,ge$per_d4$g2,ge$per_d4$g3,ge$per_d4$g4,ge$per_d4$g5)
dataplot5 <- dfmak (per_d$per_d5,ge$per_d5$g1,ge$per_d5$g2,ge$per_d5$g3,ge$per_d5$g4,ge$per_d5$g5)
codes for reproduction
N <- 1
CS <- 10.141
S <- seq (7.72,13,0.807)
t <- 15
l <- length (S)
m0 <- 100
exps <- c(0.2, 0.5, 0.9, 1.5, 2)
sd <- c(0.2, 0.5, 0.8, 1.3, 1.8)
names(sd) <- paste("per", seq_along(sd), sep = "")
per <- lapply(sd, function(x){
per <- matrix(nrow = length(S)*N, ncol = t+1)
for (i in 1:dim(per)[1]) {
for (j in 1:t+1){
per [,1] <- replicate (n = N, S)
per [i,j] <- round (abs (rnorm (1, mean = per[i,1], sd =x)),digits=3)
colnames(per) <- c('physical',paste('t', 1:15, sep = ""))
per <- as.data.frame (per)
}
}
per <- per [,-1]
return(per)
}
)
per_d <- lapply(per, function(x){
per_d <- abs (x - 10.141)
}
)
names(per_d) <- paste("per_d", seq_along(sd), sep = "")
gefun <- function (i){
res <- lapply(exps, function(x){
g <- as.matrix (m0 * exp (-x * i))
for (i in 1:l) {
for (j in 1:t){
g [i,j] <- abs((round (rnorm(1,mean = g[i,j],sd=3), digits = 3)))
colnames(g) <- paste('t', 1:ncol(g), sep = "")
g <- as.data.frame(g)
}}
return(g)
}
)
}
ge <- lapply(per_d, gefun)
for (i in 1:length(ge)){
names(ge[[i]]) <- paste("g", seq_along(ge), sep = "")
}
dfmak <- function(df1,df2,df3,df4,df5,df6){
data.frame(stimulus = c (paste0('S',1:3),'CS+',paste0('S',5:7)),
phy_dis = S,
per_dis = c(df1$t1,df1$t2,df1$t3,df1$t4,df1$t5,df1$t6,df1$t7,df1$t8,df1$t9,df1$t10,df1$t11,df1$t12,df1$t13,df1$t14,df1$t15),
trials = rep(1:15, each = 7),
response_0.2 = c (df2$t1,df2$t2,df2$t3,df2$t4,df2$t5,df2$t6,df2$t7,df2$t8,df2$t9,df2$t10,df2$t11,df2$t12,df2$t13,df2$t14,df2$t15),
response_0.5 = c (df3$t1,df3$t2,df3$t3,df3$t4,df3$t5,df3$t6,df3$t7,df3$t8,df3$t9,df3$t10,df3$t11,df3$t12,df3$t13,df3$t14,df3$t15),
response_0.9 = c (df4$t1,df4$t2,df4$t3,df4$t4,df4$t5,df4$t6,df4$t7,df4$t8,df4$t9,df4$t10,df4$t11,df4$t12,df4$t13,df4$t14,df4$t15),
response_1.5 = c (df5$t1,df5$t2,df5$t3,df5$t4,df5$t5,df5$t6,df5$t7,df5$t8,df5$t9,df5$t10,df5$t11,df5$t12,df5$t13,df5$t14,df5$t15),
response_2 = c (df6$t1,df6$t2,df6$t3,df6$t4,df6$t5,df6$t6,df6$t7,df6$t8,df6$t9,df6$t10,df6$t11,df6$t12,df6$t13,df6$t14,df6$t15)
)
}
You can try the followings. But the codes, unfortunately, are not short.
Case 1
a <- lapply(per_d, as.matrix)
b <- c(a, recursive = TRUE)
pred <- array(b, dim = c(7,15,5))
Case 2
The data frames will be stored in a list. You still have to extract them using $ or [[]].
# create empty lists to store the outputs
out <- list()
name <- list()
for(i in 1:5) {
a <- per_d[[i]]
b <- ge[[i]][[1]]
c <- ge[[i]][[2]]
d <- ge[[i]][[3]]
e <- ge[[i]][[4]]
f <- ge[[i]][[5]]
arg <- list(a, b, c, d, e, f)
name[[i]] <- paste0("df_", i)
out[[i]] <- do.call(dfmak, arg)
}
out <- setNames(out, name)

issue with disag_model() function from disaggregation R package

I was trying to use the disaggregation package to evaluate if it could be used on the dataset I have. My original data are disaggregated, so I've aggregated them to use the disag_model function from disaggregation package and compare "fitted values" with actual values.
However when I run the function the R session aborts.
I tried to execute the disag_model function step by step and I saw that the problem is due to the use of nlminb() to optimize the a posteriori density function, but I cannot understand why it's happening and how to solve it.
Thanks for your help.
You can find the data I used at this link: https://www.dropbox.com/sh/au7l0e11trzfo19/AACpfRSUpd4gRCveUsh5JX6Ea?dl=0
Please download the folder to run the code.
This is the code I used:
library(tidyverse)
library(raster)
library(disaggregation)
library(sp)
path<- "yourPath/Data"
load(file.path(path, "myRS"))
load(file.path(path, "RAST"))
Data <- read.csv(file = paste(path, "/sim_data.csv", sep = ""))
Data$HasRes <- ifelse(Data$PN50 > runif(nrow(Data)), 1, 0)
for (i in 1:nlayers(myRS)) {
myRS#layers[[i]]#file#name<-file.path(path, "predStackl10")
}
DFCov <-
as.data.frame(raster::extract(myRS, Data[c("XCoord", "YCoord")]))
Data <- cbind(Data, DFCov)
# Remove NA
NAs <- which(is.na(rowSums(Data[names(myRS)])))
Data <- Data[-NAs, ]
Data$ISO3 <- as.factor(Data$ISO3)
world_shape <-
shapefile(file.path(path, "World.shp"))
lmic_shape <-
world_shape[(world_shape#data$ISO3 %in% levels(Data$ISO3)),]
plot(lmic_shape)
# I would like to convert Data in a SpatialPointsDataFrame object
PN50 <- Data
coordinates(PN50) <- c("XCoord", "YCoord")
is.projected(PN50) # see if a projection is defined
proj4string(PN50) <- CRS("+proj=longlat +datum=WGS84")
# compute the mean P50 within each state
PN50_mean <- aggregate(x = PN50,
by = list(Data$ISO3),
FUN = mean)
# compute the centroid of the observations coordinates for each state
PN50_centroid <-
Data %>% group_by(ISO3) %>% summarise(meanX = mean(XCoord), meanY = mean(YCoord))
# assign to each mean the centroid coordinates
PN50_agg <-
as.data.frame(
cbind(
PN50_mean = PN50_mean#data$PN50,
XCoord = PN50_centroid$meanX,
YCoord = PN50_centroid$meanY
)
)
PN50_agg$XCoord <- as.numeric(PN50_agg$XCoord)
PN50_agg$YCoord <- as.numeric(PN50_agg$YCoord)
PN50_agg$ISO3 <- as.character(PN50_centroid$ISO3)
samsiz <-
Data %>% group_by(ISO3) %>% summarise(sz = sum(SampleSize))
PN50_agg$sample_size <- as.numeric(samsiz$sz)
PN50_agg$case <- round(PN50_agg$PN50_mean * PN50_agg$sample_size)
# I would like having data in a SpatialPolygonsDataFrame format to use the disaggrgation package
library(sp)
coordinates(PN50_agg) <- c("XCoord", "YCoord")
proj4string(PN50_agg) <- CRS("+proj=longlat +datum=WGS84")
PN50_polyg <- lmic_shape
PN50_polyg#data <-
full_join(PN50_polyg#data, PN50_agg#data, by = "ISO3")
# covariates raster
covariate_stack <-
getCovariateRasters(path, shape = raster(x = paste0(path, '/multi.tif')))
names(covariate_stack)
covariate_stack2 <- dropLayer(covariate_stack, nlayers(covariate_stack))
names(covariate_stack2)
plot(covariate_stack2)
covariate_stack2 <- raster::stack(covariate_stack2)
covariate_stack2<-brick(covariate_stack2)
# population raster
extracted <- raster::extract(raster(x = paste0(path, '/multi.tif')), PN50_polyg)
n_cells <- sapply(extracted, length)
PN50_polyg#data$pop_per_cell <- PN50_polyg#data$sample_size / n_cells
population_raster <-
rasterize(PN50_polyg, covariate_stack2, field = 'pop_per_cell')
# prepare data for disag_model()
dis_data <- prepare_data(
polygon_shapefile = PN50_polyg,
covariate_rasters = covariate_stack2,
aggregation_raster = population_raster,
mesh.args = list(
max.edge = c(5, 40),
cut = 0.0005,
offset = 1
),
id_var = "ISO3",
response_var = "case",
sample_size_var = "sample_size",
na.action = TRUE,
ncores = 8
)
# Rho and p(Rho<Rho_min)
dist <- pointDistance(PN50_agg#coords, lonlat = F, allpairs = T)
rownames(dist) <- PN50_agg$ISO3
colnames(dist) <- PN50_agg$ISO3
flattenDist <- function(dist) {
up <- upper.tri(dist)
flat <- data_frame(row = rownames(dist)[row(dist)[up]],
column = rownames(dist)[col(dist)[up]],
dist = dist[up])
return(flat)
}
pair_dist <- flattenDist(dist)
d <- pair_dist$dist
k <- 0.036
CorMatern <- k * d * besselK(k * d, 1)
limits <- sp::bbox(PN50_polyg)
hypontenuse <-
sqrt((limits[1, 2] - limits[1, 1]) ^ 2 + (limits[2, 2] - limits[2, 1]) ^
2)
prior_rho <- hypontenuse / 3
p_rho <- sum(d[CorMatern <= 0.1] < prior_rho) / length(d[CorMatern <= 0.1])
# sigma and p(sigma>sigma_max)
sigma_boost <- function(data, i) {
sd(data[i] / mean(data[i]))
}
sigma <-
boot(data = dis_data$polygon_data$response,
statistic = sigma_boost,
10000)
prior_sigma <- sigma$t0
p_sigma <- sum(sigma$t >= sigma$t0) / length(sigma$t)
default_priors <-
list(
priormean_intercept = 0,
priorsd_intercept = 4,
priormean_slope = 0,
priorsd_slope = 2,
prior_rho_min = prior_rho,
prior_rho_prob = p_rho,
prior_sigma_max = prior_sigma,
prior_sigma_prob = p_sigma,
prior_iideffect_sd_max = 0.1,
prior_iideffect_sd_prob = 0.01
)
fitted_model <- disag_model(
data = dis_data,
iterations = 1000,
family = "binomial",
link = "logit",
# priors = default_priors,
field = TRUE,
iid = TRUE,
silent = TRUE
)
I was able to run the disag_model function using your dis_data object. There were no errors or crashes. I ran the following lines.
fitted_model <- disag_model(
data = dis_data,
iterations = 1000,
family = "binomial",
link = "logit",
field = TRUE,
iid = TRUE,
silent = TRUE
)
I am running on a Windows machine with 64GB RAM and 8 cores. It took over an hour and used all of my RAM for a while and up to 50% of my CPU, which is not surprising as you are fitting 5.5M pixels over the whole world. Therefore, I suspect it is related to your computer running out of resources. I suggest you try a smaller example to test it out first. Try fewer polygons and fewer pixels in each polygon.

r - create plots sequentially and arrange over multiple pages

I need to create plots sequentially from all files in a folder, add certain information from calculated values to the plot (e.g. Mean SOC, TNPP, ANPP, BNPP), and arrange them over as many pages as needed to array them in a 5 x 3 layout.
Please, find a sample folder with trial files here:
https://www.dropbox.com/sh/evty00a0t9a062b/AAABG-rIq3Uhtlf-yOWo2fNGa?dl=0
Following different online sources and other threads, I have tried:
path <- "C:/Users/.../trialFiles"
dfs <- dir(path, "*.csv", full.names = FALSE, ignore.case = TRUE, all.files = TRUE)
plotModel <- function(df) {
dat <- read.csv(paste(path, df, sep = "/"), header = TRUE, sep = ",")
Time <- dat$time
SOC <- dat$somtc
AGBM <- dat$agcprd
BGBM <- dat$bgcjprd
time_frame <- Time >= oT & Time <= fT
sTime <- Time[time_frame]
sSOC <- SOC[sTime]
sAGBM <- AGBM[sTime]
sBGBM <- BGBM[sTime]
iM_AGBM <- mean(sAGBM)
iM_BGBM <- mean(sBGBM)
iMSOC <- mean(sSOC)
iTNPP <- sum(iM_AGBM, iM_BGBM)
plot(Time, SOC)
legend("bottomright", bty = "n", legend = paste(df, "\n\n",
"SOC =", format(iMSOC, digits = 6), "\n",
"TNPP =", format(iTNPP, digits = 6), "\n",
"ANPP =", format(iM_AGBM, digits = 5), "\n",
"BNPP =", format(iM_BGBM, digits = 5), sep = ""))
}
eq_plot <- lapply(dfs, plotModel)
nPlot <- length(eq_plot)
cols <- 3
layout <- matrix(seq(1, cols * ceiling(nPlot/cols)),
ncol = cols, nrow = ceiling(nPlot/cols))
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
for (i in 1:nPlot) {
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(eq_plot[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
It does not give me an error, but is not showing the graphs either, just displays the text specified in the legend.
I have also tried with ggplot as follows:
dfs <- dir(period, "*.csv", full.names = FALSE, ignore.case = TRUE, all.files = TRUE)
plotModel <- function(df) {
dat <- read.csv(paste(period, df, sep = "/"), header = TRUE, sep = ",")
Time <- dat$time
SOC <- dat$somtc
AGBM <- dat$agcprd
BGBM <- dat$bgcjprd
time_frame <- Time >= oT & Time <= fT
sTime <- Time[time_frame]
sSOC <- SOC[sTime]
sAGBM <- AGBM[sTime]
sBGBM <- BGBM[sTime]
iM_AGBM <- mean(sAGBM)
iM_BGBM <- mean(sBGBM)
iMSOC <- mean(sSOC)
iTNPP <- sum(iM_AGBM, iM_BGBM)
ggplot(dat, aes(x=Time, y=SOC)) +
geom_line() +
ggtitle(df, subtitle = paste("SOC =", format(iMSOC, digits = 6), "\n",
"TNPP =", format(iTNPP, digits = 6), "\n",
"ANPP =", format(iM_AGBM, digits = 5), "\n",
"BNPP =", format(iM_BGBM, digits = 5), sep = ""))
}
eq_plot <- lapply(dfs, plotModel)
multi.page <- ggarrange(eq_plot, nrow = 5, ncol = 3)
ggexport(multi.page, filename = "diag_plots")
But I get the following error message: ggplot2 doesn't know how to deal with data of class uneval.
Please, forgive if cross posting. I have tried following the examples but there's something I am doing wrong.
Thanks
You need to save the plot within your function in an object and then return it.
Here is the solution for the first 3 files in that directory. You will need to adjust the last 2 lines of this code to handle all your plots:
library(ggplot2)
library(ggpubr)
path <- "C:/Users/.../path/to/your/dir"
dfs <- dir(path, "*.csv", full.names = FALSE, ignore.case = TRUE, all.files = TRUE)
# define oT and fT
oT=100
fT=1000
plotModel <- function(df) {
dat <- read.csv(paste(path, df, sep = "/"), header = TRUE, sep = ",")
# This part can be optimized, see below the simplified version of the function
Time <- dat$time
SOC <- dat$somtc
AGBM <- dat$agcprd
BGBM <- dat$bgcjprd
time_frame <- Time >= oT & Time <= fT
sTime <- Time[time_frame]
sSOC <- SOC[sTime]
sAGBM <- AGBM[sTime]
sBGBM <- BGBM[sTime]
iM_AGBM <- mean(sAGBM)
iM_BGBM <- mean(sBGBM)
iMSOC <- mean(sSOC)
iTNPP <- sum(iM_AGBM, iM_BGBM)
# save graph in an object
g<- ggplot(dat, aes(x=Time, y=SOC)) +
geom_line() +
ggtitle(df,
subtitle = paste("SOC =", format(iMSOC, digits=6), "\n",
"TNPP =", format(iTNPP, digits=6), "\n",
"ANPP =", format(iM_AGBM, digits=5), "\n",
"BNPP =", format(iM_BGBM, digits=5), sep = ""))
return(g)
}
eq_plot <- lapply(dfs, plotModel)
ggarrange(eq_plot[[1]], eq_plot[[2]], eq_plot[[3]], nrow = 1, ncol = 3) %>%
ggexport(filename = "diag_plots.png")
dev.off()
The body of the function can be simplified for clarity and efficiency:
plotModel <- function(df) {
dat <- read.csv(paste(path, df, sep = "/"), header = TRUE, sep = ",")
var.means <- colMeans(dat[dat$time >= oT & dat$time <= fT, c("agcprd","bgcjprd","somtc")])
# save graph in an object
g<- ggplot(dat, aes(x=time, y=somtc)) +
geom_line() +
ggtitle(df,
subtitle = paste("SOC =", format(var.means["somtc"], digits=6), "\n",
"TNPP =", format(var.means["agcprd"] + var.means["bgcjprd"], digits=6), "\n",
"ANPP =", format(var.means["agcprd"], digits=5), "\n",
"BNPP =", format(var.means["bgcjprd"], digits=5), sep = ""))
return(g)
}

R redefine a string to argument

following on from some help earlier I think all I need for this to work is a way to define the variable dimxST below as not a string as I need that to point to the data frame....
cpkstudy <- function(x,y){
dxST <- paste(x,"$",y, sep = "")
dLSL <- paste(y, "LSL", sep = "")
dUSL <- paste(y, "USL", sep = "")
dTar <- paste(y, "Target", sep = "")
dimxST <-
dimLSL <- PivSpecs[[dLSL]]
dimUSL <- PivSpecs[[dUSL]]
dimTar <- PivSpecs[[dTar]]
ss.study.ca(dimxST, LSL = dimLSL, USL = dimUSL, Target = dimTar,
alpha = 0.05, f.na.rm = TRUE, f.main = "Six Sigma Study")
}
cpkstudy("cam1","D1")
link to the previous post
This is a different direction, and you may find the learning curve a bit steeper, but it's a lot more powerful. Instead of passing everything in as strings, we pass them without quotes, and use the rlang package to figure out where to evaluate D1.
# the same dummy data frame from Katia's answer
cam1 <- data.frame(D1 = rnorm(10),
D2 = rnorm(10))
PivSpecs <- list(D1LSL = 740, D1USL = 760, D1Target = 750)
library(rlang)
cpkstudy <- function(df, y){
quo_y <- enquo(y)
dLSL <- paste0(quo_name(quo_y), "LSL")
dUSL <- paste0(quo_name(quo_y), "USL")
dTar <- paste0(quo_name(quo_y), "Target")
dimxST <- eval_tidy(quo_y, data = df)
dimLSL <- PivSpecs[[dLSL]]
dimUSL <- PivSpecs[[dUSL]]
dimTar <- PivSpecs[[dTar]]
print(dimxST)
print (paste("dimLSL=", dimLSL) )
print (paste("dimUSL=", dimUSL) )
print (paste("dimTar=", dimTar) )
# ss.study.ca(dimxST, LSL = dimLSL, USL = dimUSL, Target = dimTar,
# alpha = 0.05, f.na.rm = TRUE, f.main = "Six Sigma Study")
}
# notice that I am not quoting cam1 and D1
cpkstudy(cam1, D1)
If you want to learn more about this, I would suggest looking at https://dplyr.tidyverse.org/articles/programming.html as an overview (the dplyr package imports some of the functions used in rlang), and http://rlang.r-lib.org/index.html for a more complete list of all the functions and examples.
You can use function get() to get object value from its string representation. In this solution I did not evaluate ss.study.ca() function itself, since I do not have your real-case input data, instead I just print the values that would go there:
cpkstudy <- function(x,y){
#dxST <- paste0(x,"$",y)
dLSL <- paste0(y, "LSL")
dUSL <- paste0(y, "USL")
dTar <- paste0(y, "Target")
dimxST <- get(x)[,y]
print(dimxST)
dimLSL <- PivSpecs[[dLSL]]
dimUSL <- PivSpecs[[dUSL]]
dimTar <- PivSpecs[[dTar]]
print (paste("dimLSL=", dimLSL) )
print (paste("dimUSL=", dimUSL) )
print (paste("dimTar=", dimTar) )
#ss.study.ca(dimxST, LSL = dimLSL, USL = dimUSL, Target = dimTar,
# alpha = 0.05, f.na.rm = TRUE, f.main = "Six Sigma Study")
}
# create some dummy dataframe to test with this example
cam1 <- data.frame(D1 = rnorm(10),
D2 = rnorm(10))
# define a list that will be used within a function
PivSpecs <- list(D1LSL = 740, D1USL = 760, D1Target = 750)
#test function
cpkstudy("cam1","D1")
#[1] 1.82120625 -0.08857998 -0.08452232 -0.43263828 0.17974556 -0.91141414 #-2.30595203 -1.24014396 -1.83814577 -0.24812598
#[1] "dimLSL= 740"
#[1] "dimUSL= 760"
#[1] "dimTar= 750"
I also changed your paste() commands on paste0() which has sep="" as a default.

Resources