A boxplot for the days of the month - r

Error in RStudio barplot
I work for a research lab and last worker left without explaining how her RStudio programs work, I have to adapt one to get the data of participants in the month of September, but I get 2 errors, it should be an easy fix but I can't see it.
I have the parameter days:
dias <- c("01","02","05","06","07","08","09","12","13","14","16","19","20","21","22","23","26","27","28","29","30")
and a part that reads data from an excel and then makes a barplot but I get the error:
Error in barplot.default(Volun_citados_dias_mes, ylim = c(0, 100), names.arg = dias, :
incorrect number of names
In this part for the barplot:
jpeg("x.jpeg", width = 800, height = 800)
text(barplot(Volun_citados_dias_mes, ylim = c(0, 100), names.arg = dias, cex.names = 0.9, col = "khaki", xlab = "Días",
ylab = "Voluntarios", main = "Número de voluntarios citados en Septiembre de 2022"),
Volun_citados_dias_mes + 4, labels = round(Volun_citados_dias_mes, 1))
dev.off()
And here when putting that data on an excel i get this other error:
Error in data.frame(dias, Volun_citadosSep) :
arguments imply differing number of rows: 21, 2
write.xlsx(data.frame(dias, Volun_citadosSep), "X:/Cohorte Cantabria/2. PROYECTO/INFORMES/Exito reclutamiento/Asistencia/Septiembre/Tablas/VolunCitadosDiasMes.xlsx", row.names = FALSE)
on.exit(par(opar))
dev.off()
I tried changing that 4 (Volun_citados_dias_mes + 4) to a 21... but not really sure what else to change, i was expecting a graph with at least 2 days added 01 and 02 that are the ones added by now, I prefer fixing this before adding 20 more.
CODE resumed:
#"..." is parts that are omitted
#libraries
library(readxl)
library("stringr")
library("xlsx")
dir <- "**...**"
#days
dias <- c("01","02","05","06","07","08","09","12","13","14","16","19","20","21","22","23","26","27","28","29","30")
#Redcap
df <- file.info(list.files("**...**", pattern = "**...**", full.names = TRUE))
lastFileCallcenter <- rownames(df)[which.max(df$mtime)]
ccList <- read.csv2(lastFileCallcenter, header = TRUE, sep = ",", quote = "\"",
dec = ",", stringsAsFactors = FALSE, encoding = "UTF-8")
colnames(ccList)[[which(str_detect(colnames(ccList), "codigo_registro"))]] <- "codigo_registro"
ccList <- unified_df(ccList)
ccList$codigo_registro <- toupper(ccList$codigo_registro)
ccList$dni_nie <- toupper(ccList$dni_nie)
septiembre_22 <- read.csv2(file.path(dir, "**...**.csv"), header = TRUE, sep = ",",
quote = "\"", dec = ",", stringsAsFactors = FALSE, encoding = "UTF-8")
colnames(septiembre_22)[[which(str_detect(colnames(septiembre_22), "codigo_registro"))]] <- "codigo_registro"
septiembre_22 <- unified_df(septiembre_22)
septiembre_22$codigo_registro <- toupper(septiembre_22$codigo_registro)
septiembre_22$dni_nie <- toupper(septiembre_22$dni_nie)
#-------------------Volunteers days of the month ---------------------------------------------------
septiembre_22_01 <- dim(septiembre_22[which(septiembre_22$fecha_extraccion == "01/09/2022"),])[1] + dim(septiembre_22[which(septiembre_22$fecha_extraccion_vt == "01/09/2022"),])[1]
septiembre_22_02 <- dim(septiembre_22[which(septiembre_22$fecha_extraccion == "02/09/2022"),])[1] + dim(septiembre_22[which(septiembre_22$fecha_extraccion_vt == "02/09/2022"),])[1]
**#...TODO, other days**
Volun_citadosSep <- c(septiembre_22_01, septiembre_22_02,**#...TODO**)
jpeg("x.jpeg", width = 800, height = 800)
text(barplot(Volun_citadosSep, ylim = c(0, 100), names.arg = dias, cex.names = 0.9, col = "khaki", xlab = "Días",
ylab = "Voluntarios", main = "Número de voluntarios citados en Septiembre de 2022"),
Volun_citadosSep + 4, labels = round(Volun_citadosSep, 1))
dev.off()
write.xlsx(data.frame(dias, Volun_citadosSep), "VolunCitadosDiasMes.xlsx", row.names = FALSE)
on.exit(par(opar))
dev.off()

Related

R convert string into argument for a function

I am trying to make plots using a function in which arguments are values of dataframes.
seniorPlot <- function(validityDate, seniorTotal, color){
par(new = T)
plot(validityDate,seniorTotal,
type = "l",
lwd = 2.5,
xlim = c(date_debut$validityDate,date_fin$validityDate),
ylim = c(1,nmax$seniorTotal),
col = color,
xlab = "",
ylab = "",
xaxt = "n",
yaxt = "n",
bty = "n",
)
}
For the purpose, the threee arguments of the function are results several dataframes 'seniorList' and the seniors df named with the senior initials ('AM', 'FB', 'GM'…)
To describe, you can see the senior dataframes below:
seniorList <- data.frame(seniorValidation=c('AM', 'FB', 'GM'),seniorTotal=c(72, 154, 137))
AM <- data.frame(validationDate=c('2022-01-25', '2022-01-26'), color=c('brown','brown')
FB <- data.frame(validationDate=c('2022-01-20', '2022-01-30'), color=c('green','green')
GM <- data.frame(validationDate=c('2022-01-24', '2022-01-28'), color=c('blue','blue')
Obviously, there is more than 3 lines in the seniorList, so I want to do a loop, using the value of the first column of seniorList (ie. the senior name) to call the write dataframe.
And, here is the issue : how can I convert the result of noquote(paste(noquote(seniorList[i,1]),'$validityDate', sep = '')) to the result '2022-01-25' (if i = 1)
for (i in 1:nrow(seniorList)) {
seniorPlot(validityDate = noquote(paste(noquote(seniorList[i,1]),'$validityDate', sep = '')),
seniorTotal = noquote(paste(noquote(seniorList[i,1]),'$seniorTotal', sep = '')),
color = noquote(paste(noquote(seniorList[i,1]),'$color', sep = '')))
}
Thank you for your help, I hope my english is easy to understand.
noquote(paste(noquote(seniorList[i,1]),'$validityDate', sep = '')) give AM$validityDate and not its result
It's unclear what exactly the plot should look like but this shows how use "tidy" data easily with ggplot2.
DF <- rbind(cbind(AM, data.frame(seniorTotal = 72, seniorValidation = "AM")),
cbind(FB, data.frame(seniorTotal = 154, seniorValidation = "GB")),
cbind(GM, data.frame(seniorTotal = 137, seniorValidation = "GM")))
DF$validationDate <- as.Date(DF$validationDate)
#this is how your data should look like:
print(DF)
library(ggplot2)
ggplot(data = DF, aes(x = validationDate, y = seniorTotal, color = seniorValidation)) +
geom_line()

How to adjust column width in dataframe of foretsplotter function

I am trying to create a forestplot, using forestplotter function, am able to get a beautiful graph, but am not able to see the entire graph, the column widths in few of the columns are so big, even if the string size is less, making the width of the entire graph, so big to see, can someone help me with this and also is it possible to align the datahrame contents uniformly centre aligned......Please help me with this
The code and relevant data are
###Required packages###
library(grid)
library(forestploter)
library(rmeta)
library(gridExtra)
#Data entered#
df <- data.frame(Study=c("A","B","C","D","Summary"),
nA = c(24,187,36,26,273),
median_A = c(4.9,5.69,8.866995074,8.5,NA),
Q1A =c(3,2.86,4.495073892,2,NA),
Q3A =c(8.5,9.78,14.96305419,32,NA),
nP = c(23,193,36,26,278),
median_P = c(7.2,6.79,8.990147783,12.5,NA),
Q1P =c(3.4,3.59,4.002463054,2,NA),
Q3P =c(10.9,10.12,12.06896552,43,NA),
W = c("10.6%","80.8%","8.0%","0.70%",NA),
E=c(-2.3,-1.1,-0.123152709,-4,-1.16881587),
UL=c(1.161473203,0.156288294,3.881699516,10.02689306,-0.039791047),
LL=c(-5.761473203,-2.356288294,-4.128004935,-18.02689306,-2.297840692))
#Calculate SE for box size#
df$SE <- (df$UL-df$E)/1.96
#Column for Confidence intervals for Drug A and Placebo, with 2 significant digit#
df$IQRA <- sprintf("%.2f (%.2f to %.2f)",df$median_A,df$Q1A, df$Q3A)
df$IQRP <- sprintf("%.2f (%.2f to %.2f)",df$median_P,df$Q1P, df$Q3P)
#Column for Confidence intervals for NET EFFECT, with 2 significant digit#
df$MD <- sprintf("%.2f (%.2f to %.2f)", df$E, df$LL, df$UL)
#Create a column with space for forest plot#
df$" "<- paste(rep(" ", 16), collapse = " ")
##Forest plot theme##
#To be modified as needed#
ftn <-forest_theme(
base_size = 16,
base_family = "serif",
ci_pch = 15,
ci_col = "black",
ci_lty = 1,
ci_lwd = 1,
ci_Theight = 0.25,
legend_name = " ",
legend_position = "right",legend_value = "",
xaxis_lwd = 1,
xaxis_cex = 0.7,
refline_lwd = 1,
refline_lty = "dashed",
refline_col = "red",
summary_fill = "blue",
summary_col = "blue",
footnote_cex = 0.4,
footnote_fontface = "plain",
footnote_col = "black",
title_just = c("center"),
title_cex = 1.1,
title_fontface = "bold",
title_col = "black",
show.rownames = FALSE)
##Table in Order for Forest plot##
#First get Column names#
colnames(df)
df2 <-df[,c(1,2,15,6,16,18,17)]
#Make NA cells empty
df2[5,3] <-c(" ")
df2[5,5] <-c(" ")
##Forestplot##
plot<-forest(df2,
est = df$E,
lower = df$LL,
upper = df$UL,
sizes = (df$SE/10),
ci_column = 6,
ref_line = 0,
arrow_lab = c("Drug A Better", "Placebo Better"),
xlim = c(-7, 6),
is_summary = c(FALSE,FALSE,FALSE,FALSE,TRUE),
xlog = FALSE,
ticks_digits = 0,ticks_at = c(-6,0,6),
theme = ftn)
##Show plot
print(plot, autofit = FALSE)

Error in `V<-`(`*tmp*`, value = `*vtmp*`) : invalid indexing

I used the bibliometrix function in R, and want to plot some useful graphs.
library(bibliometrix)
??bibliometrix
D<-readFiles("E:\\RE\\savedrecs.txt")
M <- convert2df(D,dbsource = "isi", format= "plaintext")
results <- biblioAnalysis(M ,sep = ";" )
S<- summary(object=results,k=10, pause=FALSE)
plot(x=results,k=10,pause=FALSE)
options(width=100)
S <- summary(object = results, k = 10, pause = FALSE)
NetMatrix <- biblioNetwork(M1, analysis = "co-occurrences", network = "author_keywords", sep = ";")
S <- normalizeSimilarity(NetMatrix, type = "association")
net <- networkPlot(S, n = 200, Title = "co-occurrence network",type="fruchterman", labelsize = 0.7, halo = FALSE, cluster = "walktrap",remove.isolates=FALSE, remove.multiple=FALSE, noloops=TRUE, weighted=TRUE)
res <- thematicMap(net, NetMatrix, S)
plot(res$map)
But in the net <- networkPlot(S, n = 200, Title = "co-occurrence network",type="fruchterman", labelsize = 0.7, halo = FALSE, cluster = "walktrap",remove.isolates=FALSE, remove.multiple=FALSE, noloops=TRUE, weighted=TRUE), it shows error
Error in V<-(*tmp*, value = *vtmp*) : invalid indexing
. Also I cannot do the CR, it always shows unlistCR. I cannot use the NetMatrix function neither.
Some help me plsssssssss
The problem is in the data itself not in the code you presented. When I downloaded the data from bibliometrix.com and changed M1 to M (typo?) in biblioNetwork function call everything worked perfectly. Please see the code below:
library(bibliometrix)
# Plot bibliometric analysis results
D <- readFiles("http://www.bibliometrix.org/datasets/savedrecs.txt")
M <- convert2df(D, dbsource = "isi", format= "plaintext")
results <- biblioAnalysis(M, sep = ";")
S <- summary(results)
plot(x = results, k = 10, pause = FALSE)
# Plot Bibliographic Network
options(width = 100)
S <- summary(object = results, k = 10, pause = FALSE)
NetMatrix <- biblioNetwork(M, analysis = "co-occurrences", network = "author_keywords", sep = ";")
S <- normalizeSimilarity(NetMatrix, type = "association")
net <- networkPlot(S, n = 200, Title = "co-occurrence network", type = "fruchterman",
labelsize = 0.7, halo = FALSE, cluster = "walktrap",
remove.isolates = FALSE, remove.multiple = FALSE, noloops = TRUE, weighted = TRUE)
# Plot Thematic Map
res <- thematicMap(net, NetMatrix, S)
str(M)
plot(res$map)

plot_generic() function in plot_roc_components not found in R

I am using plot_roc_components function from rmda package. The definition of it has plot_generic() function. But, I am not able to find definition of this function. Why is it so?
The reason for it to see if there is an option for legend.size(). plot_roc_components gives me figure, however, I want to change the legend size. There is an option for legend.position, but not for its font size.
Could you please explain?
Thanks!
https://github.com/mdbrown/rmda/blob/57553a4cf5b6972176a0603b412260e367147619/R/plot_functions_sub.R
You were looking in one file but it was defined in another file.
plot_generic<- function(xx, predictors, value, plotNew,
standardize, confidence.intervals,
cost.benefit.axis = TRUE, cost.benefits, n.cost.benefits,
cost.benefit.xlab, xlab, ylab,
col, lty, lwd,
xlim, ylim, legend.position,
lty.fpr = 2, lty.tpr = 1,
tpr.fpr.legend = FALSE,
impact.legend = FALSE,
impact.legend.2 = FALSE,
population.size = 1000,
policy = policy, ...){
## xx is output from get_DecisionCurve,
## others are directly from the function call
#save old par parameters and reset them once the function exits.
old.par<- par("mar"); on.exit(par(mar = old.par))
xx.wide <- reshape::cast(xx, thresholds~model, value = value, add.missing = TRUE, fill = NA)
xx.wide$thresholds <- as.numeric(as.character(xx.wide$thresholds))
if(is.numeric(confidence.intervals)){
val_lower <- paste(value, "lower", sep = "_")
val_upper <- paste(value, "upper", sep = "_")
xx.lower <- cast(xx, thresholds~model, value = val_lower, add.missing = TRUE, fill = NA)
xx.upper <- cast(xx, thresholds~model, value = val_upper, add.missing = TRUE, fill = NA)
xx.lower$thresholds <- as.numeric(as.character(xx.lower$thresholds))
xx.upper$thresholds <- as.numeric(as.character(xx.upper$thresholds))
}
# adjust margins to add extra x-axis
if(cost.benefit.axis) par(mar = c(7.5, 4, 3, 2) + 0.1)
#set default ylim if not provided
#initial call to plot and add gridlines

R script that generates pdf in SSIS

I have SSIS package that gets data into database and then executes R Script. R script creates new folder (names it based on the current date) and generate some pdf files into this folder. I have deployed this package on server and created Job that executes it every night. The problem is that each morning I am finding only empty folders (with correct date name) without any pdf files. However, If I execute that package manually in Visual Studio it works fine and pdfs are there. Am I missing something here? I appreciate every answer.
EDIT
When I execute manually it is directly on the server
Package looks like this
and here is my R script
dir.create(file.path(output.path, date))
library(RODBC)
conn <- odbcConnect("Azure", uid = "aaaaa", pwd = "aaaaa")
etldata <- sqlFetch(conn,"dbo.EtlLogsData", stringsAsFactors = FALSE)
pdf(paste('ETL_Duration_For_Effective_Date_', date,'.pdf',sep = ""),
width = 12,
height = 8,
paper = 'special')
par(mar = c(5, 17, 5, 3))
plot(c(min(etldata_day$st_sec), max(etldata_day$et_sec)),
c(sn[1], sn[1]),
ylim = c(0, n),
yaxt = 'n',
xaxt = 'n',
ylab = '',
xlab = 'Time',
main = paste('ETL Duration With Effective Date ', date, sep = ""))
abline(h = sn, untf = FALSE, col = "gray90")
for (i in 1:n){
lines(c(etldata_day$st_sec[i], etldata_day$et_sec[i]),
c(sn[i], sn[i]),
type = "l", lwd = 2)
arrows(etldata_day$st_sec[i], sn[i],
etldata_day$et_sec[i], sn[i],
length = 0.025, angle = 90, lwd = 2)
arrows(etldata_day$et_sec[i], sn[i],
etldata_day$st_sec[i], sn[i],
length = 0.025, angle = 90, lwd = 2)
}
# Print y axis labels
axis(2, at = sn, labels = etldata_day$TaskName, las = 1, cex.axis = 1)
# Print x axis labels
xat <- seq(from = min(etldata_day$st_sec), to = max(etldata_day$et_sec), length.out = 10)
xlabels <- secondsToString(xat)
axis(1, at = xat, labels = substr(xlabels,1,8), cex.axis = 1)
dev.off()
After plot() I use some FOR cycles, and LINES(),

Resources