I´m working on a project that involves analysing the income of those who belong to the active population on a certain territorial area (an Autonomous Community). I need to create a histogram with the sample weights given to me and ggplot2. However,when I try to implement the argument "weight" to the aesthetics, it doesn´t work because whether I include the argument "weight" or not, it plots the same graph. Apart from that, I don´t know how to add the weighted mean since my graph doesn´t even take into account the weights of the sample
This is the code to generate all the data from the territorial area:
rm(list=ls(all=TRUE))
if (!require(sae)) install.packages("sae")
library(sae)
data(incomedata)
help("incomedata")
set.seed(100452840)
cual = sample(1:17,1)
(cual)
datosECV=incomedata
datosECVmas16 = subset(datosECV, (datosECV$labor>0))
datosECVmas16$age = datosECVmas16$age - 1
nrows = dim(datosECVmas16)[[1]]
datosECVmas16$horas = round(rnorm(nrows,34,3), 1)
datosECVmas16$horas[(datosECVmas16$labor==2) | (datosECVmas16$labor == 3)] = 0
datosECVmas16$income = round(jitter(datosECVmas16$income),1)
datosECVmas16$income[datosECVmas16$labor==2] = datosECVmas16$income[datosECVmas16$labor==2]*0.7
datosECVmas16$income[datosECVmas16$labor==3] = 0
datosFinal =
data.frame(ca=datosECVmas16$ac, prov=datosECVmas16$prov,
provlab=datosECVmas16$provlab, gen=datosECVmas16$gen,
edad=datosECVmas16$age, nac=datosECVmas16$nat,
neduc=datosECVmas16$educ, sitemp=datosECVmas16$labor,
ingnorm=datosECVmas16$income, horas=datosECVmas16$horas,
factorel=round(datosECVmas16$weight,1))
datos_ComValenciana = datosFinal[datosFinal[,1]==10,]
if(cual == 1) {
write.table(datos_Andalucia,"datos_Andalucia.txt",row.names=FALSE)
} else if(cual == 2) {
write.table(datos_Aragon,"datos_Aragon.txt",row.names=FALSE)
} else if(cual == 3) {
write.table(datos_Asturias,"datos_Asturias.txt",row.names=FALSE)
} else if(cual == 4) {
write.table(datos_Baleares,"datos_Baleares.txt",row.names=FALSE)
} else if(cual == 5) {
write.table(datos_Canarias,"datos_Canarias.txt",row.names=FALSE)
} else if(cual == 6) {
write.table(datos_Cantabria,"datos_Cantabria.txt",row.names=FALSE)
} else if(cual == 7) {
write.table(datos_CastillaLeon,"datos_CastillaLeon.txt",row.names=FALSE)
} else if(cual == 8) {
write.table(datos_CastillaLaMancha,"datos_CastillaLaMancha.txt",row.names=FALSE)
} else if(cual == 9) {
write.table(datos_Catalunya,"datos_Catalunya.txt",row.names=FALSE)
} else if(cual == 10) {
write.table(datos_ComValenciana,"datos_ComValenciana.txt",row.names=FALSE)
} else if(cual == 11) {
write.table(datos_Extremadura,"datos_Extremadura.txt",row.names=FALSE)
} else if(cual == 12) {
write.table(datos_Galicia,"datos_Galicia.txt",row.names=FALSE)
} else if(cual == 13) {
write.table(datos_ComMadrid,"datos_ComMadrid.txt",row.names=FALSE)
} else if(cual == 14) { write.table(datos_RegMurcia,"datos_RegMurcia.txt",row.names=FALSE)
} else if(cual == 15) {
write.table(datos_ComForalNavarra,"datos_ComForalNavarra.txt",row.names=FALSE)
} else if(cual == 16) {
write.table(datos_PaisVasco,"datos_PaisVasco.txt",row.names=FALSE)
} else {
write.table(datos_Rioja,"datos_Rioja.txt",row.names=FALSE)
}
datosFinal=datosFinal[-12086,]
datos_ComValenciana = datosFinal[datosFinal[,1]==10,]
datosFinal=datosFinal[datosFinal$sitemp<3,]
datos_ComValenciana = datosFinal[datosFinal[,1]==10,]
N <- sum(datos_ComValenciana[,"factorel"])
Important note: the weights are the ones given by the 11th column of the dataset (the dataset in this case is called datos_ComValenciana). This column is the one called "factorel". "Ingnorm" are the different incomes of different people.
This code belowe should get the job done but doesn´t:
ggplot(data = datos_ComValenciana, aes(x = ingnorm,y = ..density..,weight=factorel)) +
geom_histogram(fill="#5DC863FF",alpha=0.6,col="black",bins=18)+
xlab("Ingresos normalizados")+
ylab("Cuenta")+
scale_fill_viridis(alpha=1,discrete=TRUE, option="D")+
ggtitle("Income without the weights")`
You can directly apply any weigths in the aes() function, like this (I made a toy example with mtcars):
library(ggplot2)
library(viridis)
data("mtcars")
mtcars$wt_norm <- mtcars$wt / mean(mtcars$wt)
ggplot(data = mtcars, aes(x = mpg * wt_norm, y = after_stat(density))) +
geom_histogram(fill="#5DC863FF", alpha = 0.6, col = "black", bins = 18)+
xlab("mpg normalizado")+
ylab("Cuenta")+
scale_fill_viridis(alpha = 1, discrete = TRUE, option = "D") +
ggtitle("Consumo normalizado por peso")
Which yields a different result than:
ggplot(data = mtcars, aes(x = mpg, y = after_stat(density))) +
geom_histogram(fill="#5DC863FF", alpha = 0.6, col = "black", bins = 18)+
xlab("mpg")+
ylab("Cuenta")+
scale_fill_viridis(alpha = 1, discrete = TRUE, option = "D") +
ggtitle("Consumo")
I'm by no means an specialist in ggplot, but the argument weights, if it is valid, it seems not to be working.
Related
I want to plot two sequence distribution plots with the TraMineR package (seqplot(type = 'd')) and below them two Hidden Markov Models which correspond to the sequence plots (those are clusters).
For the reprex, I just use the build in biofam data set. Using layout() produces the desired result up to the third plot. The fourth plot is drawn to a new page, so essentially the bottom right space is left empty, but the first three panes are used accordingly to the layout() specs.
library('seqHMM')
library('TraMineR')
data("biofam")
biofam <- biofam[sample(nrow(biofam),300),]
biofam.lab <- c("Parent", "Left", "Married", "Left+Marr",
"Child", "Left+Child", "Left+Marr+Child", "Divorced")
biofam.seq <- seqdef(biofam, 10:25, labels=biofam.lab)
#> [>] 8 distinct states appear in the data:
#> 1 = 0
#> 2 = 1
#> 3 = 2
#> 4 = 3
#> 5 = 4
#> 6 = 5
#> 7 = 6
#> 8 = 7
#> [>] state coding:
#> [alphabet] [label] [long label]
#> 1 0 0 Parent
#> 2 1 1 Left
#> 3 2 2 Married
#> 4 3 3 Left+Marr
#> 5 4 4 Child
#> 6 5 5 Left+Child
#> 7 6 6 Left+Marr+Child
#> 8 7 7 Divorced
#> [>] 300 sequences in the data set
#> [>] min/max sequence length: 16/16
### Plot the sequence
seqplot(biofam.seq,type='d',with.legend=FALSE)
### Fit a Hidden Markov Model
biofam.hmm <- build_hmm(observations = biofam.seq, n_states = 3)
### Plot the HMM once
plot(biofam.hmm, with.legend=FALSE)
### Use layout to plot two sequence distribution plots and below there HMM
layout(matrix(c(1,2,3,4), ncol = 2, byrow = TRUE))
layout.show(4)
par(cex = 0.65, mar = c(2.5,4.1,2.5,1.5))
### The Sequence plots
seqplot(biofam.seq, type = 'd', with.legend = FALSE, border = NA, ylab = NA)
seqplot(biofam.seq, type = 'd', with.legend = FALSE, border = NA, ylab = NA)
### Change the par() arguments
par(cex = 0.5, mar = c(1.5,3.5,3.5,1.5))
plot(biofam.hmm, with.legend = FALSE, vertex.size = 15)
plot(biofam.hmm, with.legend = FALSE, vertex.size = 15)
Created on 2022-11-20 by the reprex package (v2.0.1)
As you can see, the layout.show() indicates the correct plotting dimensions, but the second HMM plot is drawn on the next pane. Is there anything I can do (besides from plotting one cluster after another with just the sequence distribution and the fitted HMM), to plot everything on one page?
Any help would be appreciated.
Kind regards
PS: Since I don't have enough reputation, I cannot include the rendered images. Hopefully you can recreate my issue with the given code by yourself.
The problem is that seqHMM:::plot.hmm―the method which is dispatched when calling plot on a "hmm" object―is messing around with the pars internally (bad practice). But we can modify the method by deactivating that behavior, and have a new plot_hmm function.
plot_hmm <- function(x, layout = "horizontal", pie = TRUE, vertex.size = 40,
vertex.label = "initial.probs", vertex.label.dist = "auto",
vertex.label.pos = "bottom", vertex.label.family = "sans",
loops = FALSE, edge.curved = TRUE, edge.label = "auto", edge.width = "auto",
cex.edge.width = 1, edge.arrow.size = 1.5, edge.label.family = "sans",
label.signif = 2, label.scientific = FALSE, label.max.length = 6,
trim = 1e-15, combine.slices = 0.05, combined.slice.color = "white",
combined.slice.label = "others", with.legend = "bottom",
ltext = NULL, legend.prop = 0.5, cex.legend = 1, ncol.legend = "auto",
cpal = "auto", cpal.legend = "auto", legend.order = TRUE,
main = NULL, withlegend, ...) {
seqHMM:::check_deprecated_args(match.call())
oldPar <- par(no.readonly = TRUE)
if (is.null(main)) {
par(mar = c(0.5, 0.5, 0.5, 0.5))
}
else {
par(mai = c(0, 0, 1, 0))
}
# on.exit(par(oldPar), add = TRUE)
# on.exit(par(mfrow = c(1, 1)), add = TRUE)
dots <- list(...)
labelprint <- function(z, labs) {
if (labs == TRUE && (z > 0.001 || z == 0)) {
labs <- FALSE
}
if (z < 10^-(label.max.length)) {
z <- prettyNum(signif(round(z, digits = label.max.length),
digits = label.signif), scientific = labs)
}
else {
z <- prettyNum(signif(z, digits = label.signif),
scientific = labs)
}
}
if (!is.matrix(layout) && !is.function(layout)) {
if (!(layout %in% c("horizontal", "vertical"))) {
stop("Argument layout only accepts numerical matrices, igraph layout functions, or strings \"horizontal\" and \"vertical\".")
}
}
if (!is.numeric(vertex.label.pos)) {
choices <- c("bottom", "top", "left", "right")
ind <- pmatch(vertex.label.pos, choices, duplicates.ok = TRUE)
if (any(is.na(ind))) {
stop("Argument vertex.label.pos only accepts values \"bottom\", \"top\", \"left\", \"right\" or a numerical vector.")
}
vertex.label.pos <- choices[ind]
}
choices <- c(TRUE, FALSE, "bottom", "top", "left", "right")
ind <- pmatch(with.legend, choices)
if (is.na(ind)) {
stop("Argument with.legend must be one of TRUE, FALSE, \"bottom\", \"right\", \"top\", or \"left\".")
}
with.legend <- choices[ind]
if (with.legend %in% c(TRUE, "auto")) {
with.legend <- "bottom"
}
if (x$n_channels > 1) {
x <- mc_to_sc(x)
}
if (pie == FALSE && with.legend != FALSE) {
with.legend <- FALSE
}
if (!is.numeric(vertex.label.pos)) {
vpos <- numeric(length(vertex.label.pos))
for (i in 1:length(vertex.label.pos)) {
if (vertex.label.pos[i] == "bottom") {
vpos[i] <- pi/2
}
else if (vertex.label.pos[i] == "top") {
vpos[i] <- -pi/2
}
else if (vertex.label.pos[i] == "left") {
vpos[i] <- pi
}
else {
vpos[i] <- 0
}
}
vertex.label.pos <- vpos
}
if (length(vertex.label) == 1 && !is.na(vertex.label) &&
vertex.label != FALSE) {
if (vertex.label == "initial.probs") {
vertex.label <- sapply(x$initial_probs, labelprint,
labs = label.scientific)
}
else if (vertex.label == "names") {
vertex.label <- x$state_names
}
}
else if (length(vertex.label) != length(x$state_names)) {
warning("The length of the vector provided for the argument \"vertex.label\" does not match the number of hidden states.")
vertex.label <- rep(vertex.label, length.out = length(x$state_names))
}
if (is.character(vertex.label.dist)) {
ind <- pmatch(vertex.label.dist, "auto")
if (is.na(ind)) {
stop("Argument vertex.label.dist only accepts the value \"auto\" or a numerical vector.")
}
vertex.label.dist <- vertex.size * 0.4/3.5
}
else if (length(vertex.label.dist) > 1 && length(vertex.label.dist) !=
x$n_states) {
warning("The length of the vector provided for the argument \"vertex.label.dist\" does not match the number of edges.")
vertex.label.dist <- rep(vertex.label.dist, length.out = length(x$n_states))
}
transM <- x$transition_probs
transM[transM < trim] <- 0
edges <- transM
edges[edges > 0] <- 1
if (!loops) {
diag(edges) <- 0
}
transitions <- transM
if (loops == FALSE && length(transitions) > 1) {
diag(transitions) <- 0
}
transitions <- t(transitions)[t(transitions) > 0]
if (!is.na(edge.label) && edge.label != FALSE) {
if (length(edge.label) == 1 && (edge.label == "auto" ||
edge.label == TRUE)) {
edge.label <- sapply(transitions, labelprint, labs = label.scientific)
}
else if (length(edge.label) > 1 && length(edge.label) !=
length(transitions)) {
warning("The length of the vector provided for the argument \"edge.label\" does not match the number of edges.")
edge.label <- rep(edge.label, length.out = length(transitions))
}
}
if (is.character(edge.width)) {
ind <- pmatch(edge.width, "auto")
if (is.na(ind)) {
stop("Argument edge.width only accepts the value \"auto\" or a numerical vector.")
}
edge.width <- transitions * (7/max(transitions)) * cex.edge.width
}
else if (length(edge.width) > 1 && edge.width != length(transitions)) {
warning("The length of the vector provided for the argument \"edge.width\" does not match the number of edges.")
edge.width <- rep(edge.width, length.out = length(transitions))
}
g1 <- igraph::graph.adjacency(edges, mode = "directed")
if (is.function(layout)) {
glayout <- layout(g1)
}
else if (is.matrix(layout)) {
glayout <- layout
}
else {
if (layout == "horizontal") {
glayout <- igraph::layout_on_grid(g1, width = x$n_states)
}
else if (layout == "vertical") {
glayout <- igraph::layout_on_grid(g1, width = 1)
}
}
if (length(cpal) == 1 && cpal == "auto") {
pie.colors <- attr(x$observations, "cpal")
}
else if (length(cpal) != ncol(x$emiss)) {
warning("The length of the vector provided for argument cpal does not match the number of observed states. Automatic color palette was used.")
pie.colors <- attr(x$observations, "cpal")
}
else if (!all(isColor(cpal))) {
stop(paste("Please provide a vector of colors for argument cpal or use value \"auto\" for automatic color palette."))
}
else {
pie.colors <- cpal
}
if (with.legend != FALSE) {
pie.colors.l <- pie.colors
}
if (with.legend != FALSE && pie == TRUE) {
if (!is.null(ltext)) {
if (legend.order) {
if (length(ltext) != x$n_symbols) {
stop(paste0("With legend.order = TRUE, the length of the argument ltext must match the number of (combined) observed states in the observed data (",
x$n_symbols, ")."))
}
}
else {
if ((length(cpal) == 1 && cpal != "auto") &&
length(ltext) != length(cpal.legend)) {
stop(paste0("The number of colours in cpal.legend does not match the number of labels in ltext."))
}
ltext.orig <- ltext
if (length(cpal) == 1 && cpal == "auto") {
if (length(cpal.legend) == 1 && cpal.legend ==
"auto") {
cpal.legend <- attr(x$observations, "cpal")
}
}
else {
if (length(cpal.legend) == 1 && cpal.legend ==
"auto") {
cpal.legend <- cpal
}
}
}
}
else {
ltext <- ltext.orig <- x$symbol_names
if (length(cpal) == 1 && cpal == "auto") {
if (length(cpal.legend) == 1 && cpal.legend ==
"auto") {
cpal.legend <- attr(x$observations, "cpal")
}
}
else {
cpal.legend <- cpal
}
}
if (with.legend == "bottom") {
graphics::layout(matrix(1:2, nrow = 2), heights = c(1 -
legend.prop, legend.prop))
}
else if (with.legend == "right") {
graphics::layout(matrix(1:2, nrow = 1), widths = c(1 -
legend.prop, legend.prop))
}
else if (with.legend == "left") {
graphics::layout(matrix(2:1, nrow = 1), widths = c(legend.prop,
1 - legend.prop))
}
else {
graphics::layout(matrix(2:1, nrow = 2), widths = c(legend.prop,
1 - legend.prop))
}
par(cex = 1)
}
if (!is.matrix(layout) && !is.function(layout)) {
if (layout == "horizontal") {
if (hasArg(rescale)) {
rescale <- dots$rescale
}
else {
rescale <- FALSE
}
if (hasArg(xlim)) {
xlim <- dots$xlim
}
else {
if (rescale == TRUE) {
xlim <- c(-1, 1)
}
else {
xlim <- c(-0.1, ncol(transM) - 1 + 0.1)
}
}
if (hasArg(ylim)) {
ylim <- dots$ylim
}
else {
if (rescale == TRUE) {
ylim <- c(-1, 1)
}
else {
ylim <- c(-0.5, 0.5)
}
}
dots[["xlim"]] <- NULL
dots[["ylim"]] <- NULL
dots[["rescale"]] <- NULL
}
else if (layout == "vertical") {
if (hasArg(rescale)) {
rescale <- dots$rescale
}
else {
rescale <- FALSE
}
if (hasArg(xlim)) {
xlim <- dots$xlim
}
else {
if (rescale == TRUE) {
xlim <- c(-1, 1)
}
else {
xlim <- c(-0.5, 0.5)
}
}
if (hasArg(ylim)) {
ylim <- dots$ylim
}
else {
if (rescale == TRUE) {
ylim <- c(-1, 1)
}
else {
ylim <- c(-0.1, ncol(transM) - 1 + 0.1)
}
}
dots[["xlim"]] <- NULL
dots[["ylim"]] <- NULL
dots[["rescale"]] <- NULL
}
}
if (pie == TRUE) {
pie.values <- lapply(seq_len(nrow(transM)), function(i) x$emission_probs[i,
])
if (combine.slices > 0 && !all(unlist(pie.values)[unlist(pie.values) >
0] > combine.slices)) {
if (with.legend != FALSE) {
pie.colors.l <- NULL
lt <- NULL
}
for (i in 1:x$n_states) {
cs.prob <- sum(pie.values[[i]][pie.values[[i]] <
combine.slices])
pie.values[[i]][pie.values[[i]] < combine.slices] <- 0
pie.values[[i]] <- c(pie.values[[i]], cs.prob)
if (with.legend != FALSE) {
pie.colors.l <- c(pie.colors.l, pie.colors[pie.values[[i]][1:(length(pie.values[[i]]) -
1)] >= combine.slices])
lt <- c(lt, ltext[pie.values[[i]][1:(length(pie.values[[i]]) -
1)] >= combine.slices])
}
}
if (with.legend != FALSE) {
ltext <- c(unique(lt), combined.slice.label)
pie.colors.l <- c(unique(pie.colors.l), combined.slice.color)
}
if (ncol.legend == "auto") {
if (with.legend == "bottom" || with.legend ==
"top") {
ncol.legend <- ceiling(length(pie.colors)/4)
}
else {
ncol.legend <- 1
}
}
pie.colors <- c(pie.colors, combined.slice.color)
}
else {
if (ncol.legend == "auto") {
if (with.legend == "bottom" || with.legend ==
"top") {
ncol.legend <- ceiling(ncol(x$emission_probs)/4)
}
else {
ncol.legend <- 1
}
}
}
if (!is.matrix(layout) && !is.function(layout) && (layout ==
"horizontal" || layout == "vertical")) {
do.call(igraph::plot.igraph, c(list(g1, layout = glayout,
vertex.shape = "pie", vertex.pie = pie.values,
vertex.pie.color = list(pie.colors), vertex.size = vertex.size,
vertex.label = vertex.label, vertex.label.dist = vertex.label.dist,
vertex.label.degree = vertex.label.pos, vertex.label.family = vertex.label.family,
edge.curved = edge.curved, edge.width = edge.width,
edge.label = edge.label, edge.label.family = edge.label.family,
edge.arrow.size = edge.arrow.size, xlim = xlim,
ylim = ylim, rescale = rescale, main = main),
dots))
}
else {
do.call(igraph::plot.igraph, c(list(g1, layout = glayout,
vertex.shape = "pie", vertex.pie = pie.values,
vertex.pie.color = list(pie.colors), vertex.size = vertex.size,
vertex.label = vertex.label, vertex.label.dist = vertex.label.dist,
vertex.label.degree = vertex.label.pos, vertex.label.family = vertex.label.family,
edge.curved = edge.curved, edge.width = edge.width,
edge.label = edge.label, edge.label.family = edge.label.family,
edge.arrow.size = edge.arrow.size, main = main),
dots))
}
}
else {
if (!is.matrix(layout) && !is.function(layout) && (layout ==
"horizontal" || layout == "vertical")) {
do.call(igraph::plot.igraph, c(list(g1, layout = glayout,
vertex.size = vertex.size, vertex.label = vertex.label,
vertex.label.dist = vertex.label.dist, vertex.label.degree = vertex.label.pos,
vertex.label.family = vertex.label.family, edge.curved = edge.curved,
edge.width = edge.width, edge.label = edge.label,
edge.label.family = edge.label.family, edge.arrow.size = edge.arrow.size,
xlim = xlim, ylim = ylim, rescale = rescale,
main = main), dots))
}
else {
do.call(igraph::plot.igraph, c(list(g1, layout = glayout,
vertex.size = vertex.size, vertex.label = vertex.label,
vertex.label.dist = vertex.label.dist, vertex.label.degree = vertex.label.pos,
vertex.label.family = vertex.label.family, edge.curved = edge.curved,
edge.width = edge.width, edge.label = edge.label,
edge.label.family = edge.label.family, edge.arrow.size = edge.arrow.size,
main = main), dots))
}
}
if (with.legend != FALSE && pie == TRUE) {
if (legend.order) {
seqlegend(x$observations, cpal = pie.colors.l, ltext = ltext,
position = "center", cex = cex.legend, ncol = ncol.legend,
with.missing = FALSE)
}
else {
seqlegend(x$observations, cpal = cpal.legend, ltext = ltext.orig,
position = "center", cex = cex.legend, ncol = ncol.legend,
with.missing = FALSE)
}
}
# par(mfrow = c(1, 1))
}
Now it works.
layout(matrix(c(1, 2, 3, 4), ncol=2, byrow=TRUE))
# layout.show(4)
par(cex=0.65, mar=c(2.5, 4.1, 2.5, 1.5))
seqplot(biofam.seq, type='d', with.legend=FALSE, bordplot_hmmer=NA, ylab=NA)
seqplot(biofam.seq, type='d', with.legend=FALSE, border=NA, ylab=NA)
par(cex=0.5, mar=c(1.5, 3.5, 3.5, 1.5))
plot_hmm(biofam.hmm, with.legend=FALSE, vertex.size=15)
plot_hmm(biofam.hmm, with.legend=FALSE, vertex.size=15)
I came across this code on GitHub the other day. I am going to create two species frequency maps (not a simple distribution map). One map deals with species frequency in different biomes, and the other one shows the frequency in different eco-regions. I have almost 500 species and want to combine the occurrences of these 500 in one big file and create the maps using that. The following code has been used for another project in which the researchers have developed two maps for the frequency of many plant species in various biomes and eco-regions on Earth. Can anybody tell me how I can turn this code into the one I need?
Which parts of this do I have to remove? Which codes do I need to add?
Frequency map of species in different biomes and eco-regions (Reference: Rice, A., Šmarda, P., Novosolov, M., Drori, M., Glick, L., Sabath, N., ... & Mayrose, I. (2019). The global biogeography of polyploid plants. Nature Ecology & Evolution, 3(2), 265-273.)
require("raster")
require("rgdal")
require("plyr")
# args: (1) data_file (output from previous step, final_species_data.csv). (2) ecoregions (a file of ecoregion names and ids, attributes_and_data/eco_id_with_names.csv) (3) output1 (for biomes) (4) output2 (for ecoregions)
# GLOBAL ARGUMENTS
args <- commandArgs(TRUE)
for(i in 1:length(args)){
eval(parse(text = args[[i]]))
}
get_biomes = function (data){
all_biomes = data$biome
all_biomes = gsub("[[:space:]]", "", all_biomes)
splitter = unlist(strsplit(all_biomes,","))
odd_ind = seq(1,length(unlist(strsplit(splitter,"_"))),2)
even_ind = seq(2,length(unlist(strsplit(splitter,"_"))),2)
biomes = unlist(strsplit(splitter,"_"))[odd_ind]
occurrences = unlist(strsplit(splitter,"_"))[even_ind]
return (list(biomes,occurrences))
}
get_ecoregions = function (data){
all_ecoregions = data$eco_id
all_ecoregions = gsub("[[:space:]]", "", all_ecoregions)
splitter = unlist(strsplit(all_ecoregions,","))
odd_ind = seq(1,length(unlist(strsplit(splitter,"_"))),2)
even_ind = seq(2,length(unlist(strsplit(splitter,"_"))),2)
ecoregions = unlist(strsplit(splitter,"_"))[odd_ind]
occurrences = unlist(strsplit(splitter,"_"))[even_ind]
return(list(ecoregions,occurrences))
}
data = read.csv(data_file,stringsAsFactors=F, na.strings = c(NA,""))
ecoregions_data = read.csv(ecoregions,stringsAsFactors=F)
factors = 23
eco_data = data.frame(matrix(nrow = nrow(ecoregions_data), ncol = factors))
names(eco_data) = c("eco_id","name","occurrences","sp","dp","pp","na","annual","herb_p","woody","unclass_herb","unclass_per","no_lifeform",
"mixed","unresolved","conflict","basal_m","higher_m","angiosperm","basal_d","rosids","asterids","no_tax")
eco_data$eco_id = ecoregions_data$eco_id
eco_data$name = ecoregions_data$eco_name
biome_data = data.frame(matrix(nrow = 14, ncol = factors))
names(biome_data) = c("biome","name","occurrences","sp","dp","pp","na","annual","herb_p","woody","unclass_herb","unclass_per","no_lifeform",
"mixed","unresolved","conflict","basal_m","higher_m","angiosperm","basal_d","rosids","asterids","no_tax")
biome_data$biome = seq(1,14,1)
# initialize data frames as zeros
eco_data[,3:ncol(eco_data)] = 0
biome_data[,3:ncol(biome_data)] = 0
# update count in table
update_count = function(row,original_data_row,curr_occ){
row$occurrences = row$occurrences + curr_occ
row$sp = row$sp + 1
if (is.na(original_data_row$ploidy)){ row$na = row$na + 1 }
else if (original_data_row$ploidy==0){ row$dp = row$dp + 1 }
else if (original_data_row$ploidy==1){ row$pp = row$pp + 1 }
if (is.na(original_data_row$lifeform)) { row$no_lifeform = row$no_lifeform + 1 }
else if (original_data_row$lifeform == "Annual") { row$annual = row$annual + 1 }
else if (original_data_row$lifeform == "Perennial herb") { row$herb_p = row$herb_p + 1 }
else if (original_data_row$lifeform == "Woody") { row$woody = row$woody + 1 }
else if (original_data_row$lifeform == "Unclassified herb") { row$unclass_herb = row$unclass_herb + 1 }
else if (original_data_row$lifeform == "Unclassified perennial") { row$unclass_per = row$unclass_per + 1 }
else if (original_data_row$lifeform == "mixed") { row$mixed = row$mixed + 1 }
else if (original_data_row$lifeform == "unresolved") { row$unresolved = row$unresolved + 1 }
else if (original_data_row$lifeform =="Conflict") { row$conflict = row$conflict + 1 }
if (is.na(original_data_row$Wood_major_group)) { row$no_tax = row$no_tax + 1 }
else if (original_data_row$Wood_major_group =="Basal monocots (non-commelinid monocots)") { row$basal_m = row$basal_m + 1 }
else if (original_data_row$Wood_major_group =="Higher monocots (commelinids)") { row$higher_m = row$higher_m + 1 }
else if (original_data_row$Wood_major_group =="Basal angiosperms") { row$angiosperm = row$angiosperm + 1 }
else if (original_data_row$Wood_major_group =="Basal dicots (non-asterid +non-rosid dicots)") { row$basal_d = row$basal_d + 1 }
else if (original_data_row$Wood_major_group =="Dicots - core rosids") { row$rosids = row$rosids + 1 }
else if (original_data_row$Wood_major_group =="Dicots - core asterids") { row$asterids = row$asterids + 1 }
return (row)
}
for (i in 1:nrow(data)){
print(i)
res = get_biomes(data[i,]) # get species' biomes
biomes = res[[1]]; occurrences = res[[2]]
remove_ind = NULL
if ("98" %in% biomes){ # remove "98" from biomes
remove_ind = which(biomes=="98")
biomes = biomes[-remove_ind]
occurrences = occurrences[-remove_ind]
}
if ("99" %in% biomes){ # remove "99" from biomes
remove_ind = which(biomes=="99")
biomes = biomes[-remove_ind]
occurrences = occurrences[-remove_ind]
}
# do not consider species with less than 5 occurrences
remove_ind = NULL
if (length(which(as.numeric(occurrences)<5))>0){
remove_ind = which(as.numeric(occurrences)<5)
occurrences = occurrences[-remove_ind]
biomes = biomes[-remove_ind]
}
data$biomes[i] = length(biomes)
for (b in 1:length(biomes)){ # iterate over all biomes
row_ind = which(biome_data$biome==biomes[b])
updated_row = update_count(biome_data[row_ind,],data[i,],as.numeric(occurrences[b]))
biome_data[row_ind,] = updated_row
}
remove_ind = NULL
if (is.na(data$eco_id[i])){ # possible that there are biomes without ecoregions
next
}
res = get_ecoregions(data[i,]) # get species' ecoregions
ecoregions = res[[1]]; occurrences = res[[2]]
if ("-9998" %in% ecoregions){ # remove "-9998" from biomes
remove_ind = which(ecoregions=="-9998")
ecoregions = ecoregions[-remove_ind]
occurrences = occurrences[-remove_ind]
}
if ("-9999" %in% ecoregions){ # remove "-9999" from biomes
remove_ind = which(ecoregions=="-9999")
ecoregions = ecoregions[-remove_ind]
occurrences = occurrences[-remove_ind]
}
# do not consider species with less than 5 occurrences
remove_ind = NULL
if (length(which(as.numeric(occurrences)<5))>0){
remove_ind = which(as.numeric(occurrences)<5)
occurrences = occurrences[-remove_ind]
ecoregions = ecoregions[-remove_ind]
}
data$ecoregions[i] = length(ecoregions)
for (e in 1:length(ecoregions)){
row_ind = which(eco_data$eco_id==ecoregions[e])
updated_row = update_count(eco_data[row_ind,],data[i,],as.numeric(occurrences[e]))
eco_data[row_ind,] = updated_row
}
}
# add biome number to each ecoregion
biome_data$pp_perc = biome_data$pp/biome_data$sp*100 # pp_perc in each ecoregion
biome_data$pp_perc_resolved = biome_data$pp/(biome_data$pp + biome_data$dp)*100
eco_data$pp_perc_sp = eco_data$pp/eco_data$sp*100 # pp_perc in each ecoregion
eco_data$pp_perc_resolved = eco_data$pp/(eco_data$pp + eco_data$dp)*100
write.csv(file = output1,biome_data,row.names=F)
write.csv(file = output2,eco_data,row.names=F)
I am new to coding and I am trying to set up TclArray in R, so anytime users checks the checkbutton in GUI, TclArray will get input 0 or 1. The issues in my code occurs in value2, the code does not assign 1 or 0 each checkbutton. Also, once I value is assign, how do I assign it to other regular array?
Thanks,
library(tcltk)
base2 = tktoplevel()
tkwm.title(base2,'Process data Input')
headers <- c("Files","Tool1","Tool2","Tool3","Tool4")
file_name_GUI <- c("SYS1","SYS2","SYS3","SYS4") #More system can be added
parameters <- tclArray()
nfrm2 = tkframe(base2)
fontSub <- tkfont.create(family="times",size=11.0, weight="bold")
fontSub2 <- tkfont.create(family="times",size=11.0)
for (i in 0:length(file_name_GUI))
{
if(i == 0)
{
f2 = tkframe(nfrm2)
value = tklabel(f2,text=headers[1], font = fontSub, width = 20,bg="gray64", relief = 'raised')
tkgrid(value, row = i, column=0, padx =1, pady =1)
}else
{
f2 = tkframe(nfrm2)
value1 = tklabel(f2,text=file_name_GUI[i], font = fontSub2, width = 23)
tkgrid(value1,row=i,column=0, padx = 1, pady = 1)
}
for (j in 1:4)
{
if (i == 0){
value = tklabel(f2,text=headers[j+1], font = fontSub, width = 20,bg="gray64", relief = 'raised')
tkgrid(value,row=i,column=j,padx=1,padx=1)
}
else {
value2 = tkcheckbutton(f2, variable = .Tcl(paste("set parameters(", i, ",", j, ") 1", sep = "")), width = 23) #Issue is here, I can't assign value to each checkbox.
tkgrid(value2,row = i, column = j, padx = 1, pady =1)
}
}
tkpack(f2,side ='top')
}
tkpack(nfrm2)
I am using the referenceIntervals package in R, to do some data analytics.
In particular I am using the refLimit function which calculates reference and confidence intervals. I want to edit it to remove certain functionality (for instance it runs a shapiro normalitiy test, which stops the entire code if the data larger than 5000, it wont allow you to parametrically test samples less than 120). To do this I have been typing refLimit into the terminal - copying the function definition, then saving it as a separate file (below is the full original definition of the function).
singleRefLimit =
function (data, dname = "default", out.method = "horn", out.rm = FALSE,
RI = "p", CI = "p", refConf = 0.95, limitConf = 0.9)
{
if (out.method == "dixon") {
output = dixon.outliers(data)
}
else if (out.method == "cook") {
output = cook.outliers(data)
}
else if (out.method == "vanderLoo") {
output = vanderLoo.outliers(data)
}
else {
output = horn.outliers(data)
}
if (out.rm == TRUE) {
data = output$subset
}
outliers = output$outliers
n = length(data)
mean = mean(data, na.rm = TRUE)
sd = sd(data, na.rm = TRUE)
norm = NULL
if (RI == "n") {
methodRI = "Reference Interval calculated nonparametrically"
data = sort(data)
holder = nonparRI(data, indices = 1:length(data), refConf)
lowerRefLimit = holder[1]
upperRefLimit = holder[2]
if (CI == "p") {
CI = "n"
}
}
if (RI == "r") {
methodRI = "Reference Interval calculated using Robust algorithm"
holder = robust(data, 1:length(data), refConf)
lowerRefLimit = holder[1]
upperRefLimit = holder[2]
CI = "boot"
}
if (RI == "p") {
methodRI = "Reference Interval calculated parametrically"
methodCI = "Confidence Intervals calculated parametrically"
refZ = qnorm(1 - ((1 - refConf)/2))
limitZ = qnorm(1 - ((1 - limitConf)/2))
lowerRefLimit = mean - refZ * sd
upperRefLimit = mean + refZ * sd
se = sqrt(((sd^2)/n) + (((refZ^2) * (sd^2))/(2 * n)))
lowerRefLowLimit = lowerRefLimit - limitZ * se
lowerRefUpperLimit = lowerRefLimit + limitZ * se
upperRefLowLimit = upperRefLimit - limitZ * se
upperRefUpperLimit = upperRefLimit + limitZ * se
shap_normalcy = shapiro.test(data)
shap_output = paste(c("Shapiro-Wilk: W = ", format(shap_normalcy$statistic,
digits = 6), ", p-value = ", format(shap_normalcy$p.value,
digits = 6)), collapse = "")
ks_normalcy = suppressWarnings(ks.test(data, "pnorm",
m = mean, sd = sd))
ks_output = paste(c("Kolmorgorov-Smirnov: D = ", format(ks_normalcy$statistic,
digits = 6), ", p-value = ", format(ks_normalcy$p.value,
digits = 6)), collapse = "")
if (shap_normalcy$p.value < 0.05 | ks_normalcy$p.value <
0.05) {
norm = list(shap_output, ks_output)
}
else {
norm = list(shap_output, ks_output)
}
}
if (CI == "n") {
if (n < 120) {
cat("\nSample size too small for non-parametric confidence intervals, \n \t\tbootstrapping instead\n")
CI = "boot"
}
else {
methodCI = "Confidence Intervals calculated nonparametrically"
ranks = nonparRanks[which(nonparRanks$SampleSize ==
n), ]
lowerRefLowLimit = data[ranks$Lower]
lowerRefUpperLimit = data[ranks$Upper]
upperRefLowLimit = data[(n + 1) - ranks$Upper]
upperRefUpperLimit = data[(n + 1) - ranks$Lower]
}
}
if (CI == "boot" & (RI == "n" | RI == "r")) {
methodCI = "Confidence Intervals calculated by bootstrapping, R = 5000"
if (RI == "n") {
bootresult = boot::boot(data = data, statistic = nonparRI,
refConf = refConf, R = 5000)
}
if (RI == "r") {
bootresult = boot::boot(data = data, statistic = robust,
refConf = refConf, R = 5000)
}
bootresultlower = boot::boot.ci(bootresult, conf = limitConf,
type = "basic", index = 1)
bootresultupper = boot::boot.ci(bootresult, conf = limitConf,
type = "basic", index = 2)
lowerRefLowLimit = bootresultlower$basic[4]
lowerRefUpperLimit = bootresultlower$basic[5]
upperRefLowLimit = bootresultupper$basic[4]
upperRefUpperLimit = bootresultupper$basic[5]
}
RVAL = list(size = n, dname = dname, out.method = out.method,
out.rm = out.rm, outliers = outliers, methodRI = methodRI,
methodCI = methodCI, norm = norm, refConf = refConf,
limitConf = limitConf, Ref_Int = c(lowerRefLimit = lowerRefLimit,
upperRefLimit = upperRefLimit), Conf_Int = c(lowerRefLowLimit = lowerRefLowLimit,
lowerRefUpperLimit = lowerRefUpperLimit, upperRefLowLimit = upperRefLowLimit,
upperRefUpperLimit = upperRefUpperLimit))
class(RVAL) = "interval"
return(RVAL)
}
However when I then execute this file a large number of terms end up being undefined, for instance when I use the function I get 'object 'nonparRanks' not found'.
How do I edit the function in the package? I have looked at trying to important the package namespace and environment but this has not helped. I have also tried to find the actual function in the package files in my directory, but not been able to.
I am reasonably experienced in R, but I have never had to edit a package before. I am clearly missing something about how functions are defined in packages, but I am not sure what.
In the beginning of the package there is a line
data(sysdata, envir=environment())
See here: https://github.com/cran/referenceIntervals/tree/master/data/sysdata.rda
I suspect that "nonparRanks" is defined there as I don't see it defined anywhere else. So perhaps you could download that file, write your own function, then run that same line before running your function and it may work.
EDIT:
Download the file then run:
load("C:/sysdata.rda")
With your path to the file and then your function will work.
nonparRanks is a function in the referenceIntervals package:
Table that dictate the ranks for the confidence intervals
around thecalculated reference interval
Your method of saving and editing the function is fine, but make sure you load all the necessary underlying functions to run it too.
The easiest thing to do might be to:
save your copied and pasted R function as a different name, e.g. singleRefLimit2, then
call library("referenceIntervals"), which will load all the underlying functions you need and then
load your function source("singelRefLimit2.R"), with whatever edits you choose to make.
I am using the package networkDynamic to visualise two evolving networks and I would like to add, close to each network a simple legend (a few words of text). I can't find a way of doing this.
In the networkDynamic package, the function render.animation uses plot.network (from the package network) to render each frame and then compiles the different frames into an animation.
The plot.network arguments can be passed to render.animation, so the problem seems to boils down to adding text to a plot generated with plot.network but there doesn't seem to be a way of adding text at specified coordinates.
With a normal plot I would use the text function, but is there a way of including this function into the plot.network arguments?
render.animation is a function in the ndtv package. You will have to create a custom render.animation2 function based on render.animation. In the following function, I add an extra line to the render.animation function. I add an mtext after each plot.network calls (see about 20 lines from the end). You could change it to a text instead of mtext.
render.animation2 <- function (net, render.par = list(tween.frames = 10, show.time = TRUE,
show.stats = NULL, extraPlotCmds = NULL, initial.coords = 0),
plot.par = list(bg = "white"), ani.options = list(interval = 0.1),
render.cache = c("plot.list", "none"), verbose = TRUE, ...)
{
if (!is.network(net)) {
stop("render.animation requires the first argument to be a network object")
}
if (is.null(render.par)) {
stop("render.animation is missing the 'render.par' argument (a list of rendering parameters).")
}
if (is.null(render.par$tween.frames)) {
render.par$tween.frames <- 10
}
if (is.null(render.par$show.time)) {
render.par$show.time <- TRUE
}
if (is.null(render.par$initial.coords)) {
render.par$initial.coords <- matrix(0, ncol = 2, nrow = network.size(net))
}
if (!all(c("animation.x.active", "animation.y.active") %in%
list.vertex.attributes(net))) {
net <- compute.animation(net, verbose = verbose)
}
externalDevice <- FALSE
doRStudioHack <- TRUE
if (!is.null(render.par$do_RStudio_plot_hack)) {
doRStudioHack <- render.par$do_RStudio_plot_hack
}
if (!is.function(options()$device)) {
if (names(dev.cur()) == "RStudioGD" & doRStudioHack) {
message("RStudio's graphics device is not well supported by ndtv, attempting to open another type of plot window")
if (.Platform$OS.type == "windows") {
windows()
}
else if (length(grep(R.version$platform, pattern = "apple")) >
0) {
quartz()
}
else {
x11()
}
externalDevice <- TRUE
}
}
if (par("bg") == "transparent" & is.null(plot.par$bg)) {
plot.par$bg <- "white"
}
origPar <- par(plot.par)
oopts <- ani.options(ani.options)
slice.par <- get.network.attribute(net, "slice.par")
if (is.null(slice.par)) {
stop("render.animation can not locate the 'slice.par' list of parameters in the input network object")
}
render.cache <- match.arg(render.cache)
plot_params <- list(...)
if (is.null(plot_params$label)) {
plot_params$label <- function(slice) {
network.vertex.names(slice)
}
}
if (is.null(plot_params$xlab) & render.par$show.time) {
plot_params$xlab <- function(onset, terminus) {
ifelse(onset == terminus, paste("t=", onset, sep = ""),
paste("t=", onset, "-", terminus, sep = ""))
}
}
if (!is.null(render.par$show.stats) && render.par$show.stats !=
FALSE) {
if (render.par$show.time) {
plot_params$xlab <- eval(parse(text = paste("function(slice,onset,terminus){stats<-summary.statistics.network(slice",
render.par$show.stats, ")\n return(paste('t=',onset,'-',terminus,' ',paste(rbind(names(stats),stats),collapse=':'),sep='')) }",
sep = "")))
}
else {
plot_params$xlab <- eval(parse(text = paste("function(slice){stats<-summary.statistics.network(slice",
render.par$show.stats, ")\n return(paste(rbind(names(stats),stats),collapse=':')) }",
sep = "")))
}
}
if (is.null(plot_params$jitter)) {
plot_params$jitter <- FALSE
}
interp.fun <- coord.interp.smoothstep
starts <- seq(from = slice.par$start, to = slice.par$end,
by = slice.par$interval)
ends <- seq(from = slice.par$start + slice.par$aggregate.dur,
to = slice.par$end + slice.par$aggregate.dur, by = slice.par$interval)
xmin <- aggregate.vertex.attribute.active(net, "animation.x",
min)
xmax <- aggregate.vertex.attribute.active(net, "animation.x",
max)
ymin <- aggregate.vertex.attribute.active(net, "animation.y",
min)
ymax <- aggregate.vertex.attribute.active(net, "animation.y",
max)
if (is.null(plot_params$xlim)) {
if (xmin == xmax) {
xmax <- xmin + 1
xmin <- xmin - 1
}
plot_params$xlim <- c(xmin, xmax)
}
if (is.null(plot_params$ylim)) {
if (ymin == ymax) {
ymax <- ymin + 1
ymin <- ymin - 1
}
plot_params$ylim <- c(ymin, ymax)
}
if (is.numeric(render.par$initial.coords)) {
coords <- matrix(render.par$initial.coords, ncol = 2,
nrow = network.size(net))
}
slice <- network.collapse(net, starts[1], ends[1], rule = slice.par$rule,
rm.time.info = FALSE)
activev <- is.active(net, starts[1], ends[1], v = seq_len(network.size(net)),
rule = if (slice.par$rule != "all") {
"any"
})
if (length(slice) > 0 & network.size(slice) > 0) {
coords[activev, 1] <- get.vertex.attribute(slice, "animation.x")
coords[activev, 2] <- get.vertex.attribute(slice, "animation.y")
}
coords2 <- coords
if (render.cache == "plot.list") {
ani.record(reset = TRUE)
}
for (s in 1:length(starts)) {
if (verbose) {
print(paste("rendering", render.par$tween.frames,
"frames for slice", s - 1))
}
slice <- network.collapse(net, starts[s], ends[s], rule = slice.par$rule,
rm.time.info = FALSE)
activev <- is.active(net, starts[s], ends[s], v = seq_len(network.size(net)),
rule = if (slice.par$rule != "all") {
"any"
})
if (length(slice) > 0 & network.size(slice) > 0) {
evald_params <- .evaluate_plot_params(plot_params = plot_params,
net = net, slice = slice, s = s, onset = starts[s],
terminus = ends[s])
for (t in 1:render.par$tween.frames) {
coords2[activev, 1] <- get.vertex.attribute(slice,
"animation.x")
coords2[activev, 2] <- get.vertex.attribute(slice,
"animation.y")
tweenCoords <- interp.fun(coords, coords2, t,
render.par$tween.frames)
plot_args <- list(x = slice, coord = tweenCoords[activev,
, drop = FALSE])
plot_args <- c(plot_args, evald_params)
do.call(plot.network, plot_args)
mtext("my text\n on two lines", side = 3) #my.legend
if (!is.null(render.par$extraPlotCmds)) {
eval(render.par$extraPlotCmds)
}
if (render.cache == "plot.list") {
ani.record()
}
}
coords <- coords2
}
else {
evald_params <- .evaluate_plot_params(plot_params = plot_params,
net = net, slice = slice, s = s, onset = starts[s],
terminus = ends[s])
if (render.par$show.time) {
xlab <- evald_params$xlab
}
else {
xlab <- NULL
}
singlenet <- network.initialize(1)
for (t in 1:render.par$tween.frames) {
plot.network(singlenet, vertex.cex = 0, xlab = xlab)
if (!is.null(render.par$extraPlotCmds)) {
eval(render.par$extraPlotCmds)
}
if (render.cache == "plot.list") {
ani.record()
}
}
}
}
par(origPar)
if (externalDevice) {
dev.off()
}
}
It is then important to assign your new function render.animation2 to the ndtv namespace. If you don't, it will crash because render.animation refers to functions that can only be found in its own namespace.
environment(render.animation2) <- asNamespace('ndtv')
environment(render.animation) #<environment: namespace:ndtv>
environment(render.animation2) #<environment: namespace:ndtv>
Using, render.animation2, you will then get your legend printed on each slide of the animation.
require(ndtv)
triangle <- network.initialize(3) # create a toy network
add.edge(triangle,1,2)
# add an edge between vertices 1 and 2
add.edge(triangle,2,3)
# add a more edges
activate.edges(triangle,at=1) # turn on all edges at time 1 only
activate.edges(triangle,onset=2, terminus=3,
e=get.edgeIDs(triangle,v=1,alter=2))
add.edges.active(triangle,onset=4, length=2,tail=3,head=1)
render.animation2(triangle) #custom function
ani.replay()
Here's what the last slide looks like in the animation:
If you only need to add a few lines of text, you can pass the standard plot arguments main (for the main title) or xlab (for the x-axis caption). you can separate lines with the newline escape "\n"
library(ndtv)
data(short.stergm.sim)
render.animation(short.stergm.sim,main='hello\nworld')
It is also possible to plot other graphic elements (such as legend or text or maps) using the extraPlotCmds argument to render.animation. For example, if you wanted to plot "hello world" in blue at coordiantes 0,0 using text you can wrap it in an expression and pass it in via render.par
render.animation(short.stergm.sim,
render.par=list(extraPlotCmds=expression(
text(0,0,'hello\nworld',col='blue')
))
)
the extra plot command will evaluated on each frame as the network is rendered