R markdown v2 and Hmisc tables - r

How can I take the output from summary in Hmisc and have it rendered in knitr with the correct formatting and preferably transferred to word as a table for my collaborators?
The following chunk produces a table but the formatting is off (all the value labels and numbers for the variables are on the same line, not beneath each other)
---
output: word_document
---
```{r table, results='asis'}
library(Hmisc)
options(digits=3)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
age <- rnorm(500, 50, 5)
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
# Generate a 3-choice variable; each of 3 variables has 5 possible levels
symp <- c('Headache','Stomach Ache','Hangnail',
'Muscle Ache','Depressed')
symptom1 <- sample(symp, 500,TRUE)
symptom2 <- sample(symp, 500,TRUE)
symptom3 <- sample(symp, 500,TRUE)
Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms')
table(Symptoms)
# Note: In this example, some subjects have the same symptom checked
# multiple times; in practice these redundant selections would be NAs
# mChoice will ignore these redundant selections
#Frequency table sex*treatment, sex*Symptoms
summary(sex ~ treatment + Symptoms, fun=table)
```

My main focus was to get the summary.formula.reverse table from Hmisc into word for submission. I tend to use it a lot so I ended up with a quick hack that gets the table into word - although not using knitr. Feel free to improve and apply the same logic to the other summary.formula tables...
library(stringr)
library(Hmisc)
library(rtf)
tabl<-function(x,filename="tab.doc"){
u<-capture.output(print(x,exclude1=F,long=T,pctdig=1,))
col<-max(str_count(string=u,"\\|"))
row<-sum(as.numeric(str_detect(u,"\\|")==T))
su<-which(str_detect(u,"\\|")==T)
i<-str_trim(unlist(str_split(u[su[1]],"\\|")))
i2<-str_trim(unlist(str_split(u[su[2]],"\\|")))
i3<-paste(i,i2,sep="\n")
i3<-i3[-c(1,col+1)]
uo<-u[su[-c(1:2)]]
val<-lapply(uo,function(x) str_trim(unlist(str_split(x,"\\|"))))
misd<-lapply(val,function(x) ifelse(x[3]=="",paste("\\tab",x[2],sep=" "),paste("\\ql",x[2],sep=" ")))
f<-t(matrix(unlist(val),col+1))
f[,-c(1,col+1)]->f2
f2[,1]<-unlist(misd)
colnames(f2)<-i3
which(str_detect(f2,"\\ql")==T)->blank
inser<-function(df,place,vector){
df1<-rbind(df[1:place-1,],vector,df[place:length(df[,1]),])
df1
}
f3<-as.data.frame(f2)
lapply(c(1:length(names(f3))),function(x) levels(f3[[x]])<<-c(levels(f3[[x]]),""))
g<-1
for (i in blank[-1]) {
f3<-inser(f3,i-1+g,c(rep("",col-1)))
g<-g+1
}
y<-as.data.frame(f3)
di<-apply(y,2,function(x) max(nchar(x)))/12 ##12 char/inch
di[di<.5]<-.5
u<-RTF(file=filename,width=8.5, height=11, omi=c(1, 1, 1, 1), font.size=10)
addHeader(u,title="Table",subtitle=paste(date(),"\n",sep=""))
addTable(u,y,font.size=10,row.names=FALSE,NA.string="-",col.justify = c("L",rep("C",col-2)),header.col.justify = c("L",rep("C",col-2)),col.widths=di)
done(u)
return(u)
}

Related

Recursive / Expanding Window forecasts

I am having a small issue with my Rstudio code. I will try to replicate my code but unfortunately there is no easy data for me to show. This is about the package forecast. What I am looking for is somehwat simpler for what is in the manual. But unfortunately, I am not able to work round it.
so the issue is with an expanding window forecast. So I have a dependent variable Y and 3 regressors (X). I am trying to build a recursive one steap ahead forecast for each X.
Here is my code.
library(forecast)
library(zoo)
library(timeDate)
library(xts)
## Load data
data = Dataset[,2:ncol(Dataset)]
st <- as.Date("1990-1-1")
en <- as.Date("2020-12-1")
tt <- seq(st, en, by = "1 month")
data = xts(data, order.by=tt)
##########################################################################
RECFORECAST=function (Y,X,h,window){
st <- as.Date("1990-1-1")
en <- as.Date("2020-12-1")
tt <- seq(st, en, by = "1 month")
datas= cbind(Y,X)
newfcast= matrix(0,nrow(datas),h)
for (k in 1:nrow(datas)){
sample =datas[1:(window+k-1),]
# print(sample)
v= window+k
# print(v)
# fit = Arima(sample[,1], order=c(0,0,0),xreg=sample[,2])
fit = lm(sample[,1]~sample[,2], data = sample)
# fcast=forecast(fit,xreg=rep(sample[v,2],h))$mean
fcast = forecast.lm(fit,sample[v,2],h=1)$mean
print(fcast)
# print(fcast)
# newfcast[k+window+1,]=fcast
}
print(newfcast)
return(newfcast)
}
## Code to send the loop into forecasts
StoreMatrix = data$growth ## This is the first column data[,1]
for (i in 2:4)
{
try({
X=data[,i]
Y=data[,1]
RecModel=RECFORECAST(Y,X,h=1,window=60) ##Here the initial window is 60 obs
StoreMatrix=cbind(StoreMatrix,RecModel)
print(StoreMatrix)
}, silent=T)
}
The bits # were different ways I tried to crosscheck my data and they may not be useful. I have tried so many things but I don't seem to be able to get my head through it. At the end I want to have a matrix (StoreMatrix) with the first variable being the realization, and each of the columns with the corresponding 1 step ahead forecast.
The main lines where there seems to be an issue are these ones:
# fcast=forecast(fit,xreg=rep(sample[v,2],h))$mean
fcast = forecast.lm(fit,sample[v,2],h=1)$mean
Note sure how to solve this. Thank you very much.

Using R Hmisc summary/summaryM latex command within Knitr Markdown pdf

I have been trying to get the Hmisc latex.summary and latex.summaryM examples to work within a pdf document created using Knitr in RStudio. But keep getting error messages. The example data is:
options(digits=3)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
country <- factor(sample(c('US', 'Canada'), 500, rep=TRUE))
age <- rnorm(500, 50, 5)
sbp <- rnorm(500, 120, 12)
label(sbp) <- 'Systolic BP'
units(sbp) <- "mmHg"
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
sbp[1] <- NA
# Generate a 3-choice variable; each of 3 variables has 5 possible levels
symp <- c('Headache','Stomach Ache','Hangnail',
'Muscle Ache','Depressed')
symptom1 <- sample(symp, 500,TRUE)
symptom2 <- sample(symp, 500,TRUE)
symptom3 <- sample(symp, 500,TRUE)
Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms')
And I want to create a pdf document that contains the tables
tab1 <- summary(sex ~ treatment + Symptoms, fun=table)
tab2 <- summaryM(age + sex + sbp + Symptoms ~ treatment,
groups='treatment', test=TRUE)
I'm running R version 3.5.2 (2018-12-20), RStudio 1.1.463, Hmisc_4.2-0, and have installed tinytex using tinytex::install_tinytex().
After a few hours trial and error I discovered how, and am posting the code below in case it helps others.
The following code works for me, note;
Requirement for relsize latex package when Hmisc::units attribute is used to prevent the following failed to compile error.
! Undefined control sequence.
<recently read> \smaller
The mylatex function is taken from https://stackoverflow.com/a/31443576/4241780, and is required for removing unwanted output.
The option file = "" is needed to prevent the error
Error in system(comd, intern = TRUE, wait = TRUE) : 'yap' not found
Calls: <Anonymous> ... print -> print.latex -> show.latex -> show.dvi -> system
The use of the where = "!htbp" option ensures that the tables remain where they are placed and do not float to the top of the page (by default where = "!tbp") https://tex.stackexchange.com/a/2282.
---
title: "Untitled"
author: "Author"
date: "15 April 2019"
output:
pdf_document:
extra_dependencies: ["relsize"]
---
```{r setup, include=FALSE}
library(Hmisc)
library(dplyr)
mylatex <- function (...) {
o <- capture.output(latex(file = "", where = "!htbp", ...))
# this will strip /all/ line-only comments; or if you're only
# interested in stripping the first such comment you could
# adjust accordingly
o <- grep('^%', o, inv=T, value=T)
cat(o, sep='\n')
}
```
```{r data}
# As in question above ...
```
Here is the first table
```{r tab1, results = "asis"}
tab1 <- summary(sex ~ treatment + Symptoms, fun=table)
mylatex(tab1)
```
Here is the second table
```{r tab2, results = "asis"}
tab2 <- summaryM(age + sex + sbp + Symptoms ~ treatment, test=TRUE)
mylatex(tab2)
```

R: Package topicmodels: LDA: Error: invalid argument

I have a question regarding LDA in topicmodels in R.
I created a matrix with documents as rows, terms as columns, and the number of terms in a document as respective values from a data frame. While I wanted to start LDA, I got an Error Message stating "Error in !all.equal(x$v, as.integer(x$v)) : invalid argument type" . The data contains 1675 documents of 368 terms. What can I do to make the code work?
library("tm")
library("topicmodels")
data_matrix <- data %>%
group_by(documents, terms) %>%
tally %>%
spread(terms, n, fill=0)
doctermmatrix <- as.DocumentTermMatrix(data_matrix, weightTf("data_matrix"))
lda_head <- topicmodels::LDA(doctermmatrix, 10, method="Gibbs")
Help is much appreciated!
edit
# Toy Data
documentstoy <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
meta1toy <- c(3,4,1,12,1,2,3,5,1,4,2,1,1,1,1,1)
meta2toy <- c(10,0,10,1,1,0,1,1,3,3,0,0,18,1,10,10)
termstoy <- c("cus","cus","bill","bill","tube","tube","coa","coa","un","arc","arc","yib","yib","yib","dar","dar")
toydata <- data.frame(documentstoy,meta1toy,meta2toy,termstoy)
So I looked inside the code and apparently the lda() function only accepts integers as the input so you have to convert your categorical variables as below:
library('tm')
library('topicmodels')
documentstoy <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
meta1toy <- c(3,4,1,12,1,2,3,5,1,4,2,1,1,1,1,1)
meta2toy <- c(10,0,10,1,1,0,1,1,3,3,0,0,18,1,10,10)
toydata <- data.frame(documentstoy,meta1toy,meta2toy)
termstoy <- c("cus","cus","bill","bill","tube","tube","coa","coa","un","arc","arc","yib","yib","yib","dar","dar")
toy_unique = unique(termstoy)
for (i in 1:length(toy_unique)){
A = as.integer(termstoy == toy_unique[i])
toydata[toy_unique[i]] = A
}
lda_head <- topicmodels::LDA(toydata, 10, method="Gibbs")

linear disriminant function error - arguments must be same length

My example dataset:
year <- c("2002","2002","2002","2004","2005","2005","2005","2006", "2006")
FA1 <- c(0.7975030, 1.5032768, 0.8805000, 1.0505961, 1.1379715, 1.1334510, 1.1359434, 0.9614926, 1.2631387)
FA2 <- c(0.7930153, 1.2862355, 0.5633592, 1.0396431, 0.9446277, 1.1944455, 1.086171, 0.767955, 1.2385361)
FA3 <- c(-0.7825210, 0.56415672, -0.9294417, 0.21485071, -0.447953,0.037978, 0.038363, -0.495383, 0.509704)
FA4 <- c(0.38829957,0.34638035,-0.06783007, 0.505020, 0.3158221,0.55505411, 0.42822783, 0.36399347, 0.51352115)
df <- data.frame(year,FA1,FA2,FA3,FA4)
I then select the data I want to use and run a DFA
library(magrittr)
library(DiscriMiner)
yeardf <- df[df$year %in% c(2002, 2005, 2006),]
yeardfd <- linDA(yeardf[,2:4],yeardf$year, validation = "crossval")
But now i get an error telling me the arguments are different lengths.
"Error in table(original = y[test], predicted = pred_class) :
all arguments must have the same length"
I looked at
length(yeardf$year)
dim(yeardf)
And it looks like they are the same.
I also checked for spelling mistakes as that seems to cause this error sometimes.
following up on answer.
The suggested answer works on my example data (which does give me the same error), but I can't quite make it work on my real code.
I first apply the transformation to selected columns in my data.frame. And then I combine the transformed columns with the variables I want to use as groups in my DFA
library(robCompositions)
tFA19 <- cenLR(fadata.PIZ[names(FA19)])[1]
tFA19 <- cbind(fadata.PIZ[1:16],tFA19)
So I think creating my data.frame this way must be leading to my error. I tried to insert stringsAsFactors into my cbind statement, but no luck.
You need ,stringsAsFactors = FALSE in data.frame:
year <- c("2002","2002","2002","2004","2005","2005","2005","2006", "2006")
FA1 <- c(0.7975030, 1.5032768, 0.8805000, 1.0505961, 1.1379715, 1.1334510, 1.1359434, 0.9614926, 1.2631387)
FA2 <- c(0.7930153, 1.2862355, 0.5633592, 1.0396431, 0.9446277, 1.1944455, 1.086171, 0.767955, 1.2385361)
FA3 <- c(-0.7825210, 0.56415672, -0.9294417, 0.21485071, -0.447953,0.037978, 0.038363, -0.495383, 0.509704)
FA4 <- c(0.38829957,0.34638035,-0.06783007, 0.505020, 0.3158221,0.55505411, 0.42822783, 0.36399347, 0.51352115)
df <- data.frame(year,FA1,FA2,FA3,FA4,stringsAsFactors = FALSE)
library(magrittr)
library(DiscriMiner)
yeardf <- df[df$year %in% c(2002, 2005, 2006),]
yeardfd <- linDA(yeardf[,2:4],yeardf$year, validation = "crossval")
yeardfd
Linear Discriminant Analysis
-------------------------------------------
$functions discrimination functions
$confusion confusion matrix
$scores discriminant scores
$classification assigned class
$error_rate error rate
-------------------------------------------
$functions
2002 2005 2006
constant -345 -371 -305
FA1 228 231 213
...

Annotated correlation tables with stargazer

I want to report correlation tables in a latex report and I'm using 'stargazer' to transform my R objects into tex-code. The correlational data is currently stored in a data frame.
I would like to print rownames and possibly add an annotation under the table. I couldn't find a 'print rownames'-argument and the 'notes'-argument doesn't seem to work.
Any Ideas?
## create object
x <- matrix(1:4, 2, byrow = TRUE)
dimnames(x) <- list(c("A", "B"), c("A", "B"))
x <- data.frame(x)
## create Tex-Code
stargazer(x, summary = FALSE, title = "2x2 Matrix",
notes = "This is a two by two Matrix")
As of version 5.0, stargazer can directly output the content of matrices/vectors. The following code should provide an easy and intuitive resolution to your problem:
## create object
x <- matrix(1:4, 2, byrow = TRUE)
dimnames(x) <- list(c("A", "B"), c("A", "B"))
## create Tex-Code
stargazer(x, title = "2x2 Matrix",
notes = "This is a two by two Matrix")
This is rather a markdown solution that can be converted to LaTeX with e.g. Pandoc:
> require(pander)
> pander(x, caption = 'Annotation')
---------------
A B
------- --- ---
**A** 1 2
**B** 3 4
---------------
Table: Annotation
To get the 'rownames', try this hackish solution:
## create object
x <- matrix(1:4, 2, byrow = TRUE)
x <- data.frame(x)
x <- cbind(c("A","B"),x)
colnames(x) <- c("","A", "B")
## create Tex-Code
stargazer(x, summary = FALSE, title = "2x2 Matrix",
notes = "This is a two by two Matrix", type="text")
At the moment (v. 4.5.1), 'stargazer' is best suited to working with regression tables and data frames. Your question, however, suggests that users might be interested in better support for matrices. Expect this in future releases (next few months).
As for notes, these really only work for regression tables at the moment. However, they will be available for summary statistics and data frame tables in the next release. If you're willing to edit the source, you can get something very close (although not quite perfect) to the future implementation by replacing the following line(s):
.format.s.stat.parts <<- c("-!","stat names","-!","statistics1","-!")
by:
.format.s.stat.parts <<- c("-!","stat names","-!","statistics1","-!","notes")

Resources