Generating timeline with R package timelineS - r

My R script looks like this
if (!require("timelineS")) install.packages("timelineS")
library('timelineS')
myevents <- read.csv(
text = "MS1.1.1,2021-12-01
MS1.1.2,2022-03-01
MS1.1.3,2022-09-01
MS1.1.4,2022-09-01
MS1.1.5,2022-09-01
MS1.1.6,2022-11-01
MS1.1.7,2023-03-01
MS1.1.8,2023-09-01
MS1.1.9,2023-09-01",
sep=",",header=F)
head(myevents)
class(myevents)
timelineS(myevents)
and I get the error "Fehler in min(event.dates) - buffer.days :
nicht-numerisches Argument für binären Operator".
I cannot figure out where is my mistake.
Thank you for any hint!

?timelineS
myevents$V2 <- as.Date(myevents$V2)
timelineS(myevents)

I had a similar issue. In my case, my data frame had the event date first and the event second. The reverse order is needed. I fixed my data frame this way:
raw_df <- read_tsv("2022_12.tsv") #from readr package
# reverse order of columns
my_df <- data.frame(raw_df$Events, raw_df$Event_Dates)
names(my_df) <- sort(names(raw_df), decreasing = T)

Related

R: Package topicmodels: LDA: Error: invalid argument

I have a question regarding LDA in topicmodels in R.
I created a matrix with documents as rows, terms as columns, and the number of terms in a document as respective values from a data frame. While I wanted to start LDA, I got an Error Message stating "Error in !all.equal(x$v, as.integer(x$v)) : invalid argument type" . The data contains 1675 documents of 368 terms. What can I do to make the code work?
library("tm")
library("topicmodels")
data_matrix <- data %>%
group_by(documents, terms) %>%
tally %>%
spread(terms, n, fill=0)
doctermmatrix <- as.DocumentTermMatrix(data_matrix, weightTf("data_matrix"))
lda_head <- topicmodels::LDA(doctermmatrix, 10, method="Gibbs")
Help is much appreciated!
edit
# Toy Data
documentstoy <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
meta1toy <- c(3,4,1,12,1,2,3,5,1,4,2,1,1,1,1,1)
meta2toy <- c(10,0,10,1,1,0,1,1,3,3,0,0,18,1,10,10)
termstoy <- c("cus","cus","bill","bill","tube","tube","coa","coa","un","arc","arc","yib","yib","yib","dar","dar")
toydata <- data.frame(documentstoy,meta1toy,meta2toy,termstoy)
So I looked inside the code and apparently the lda() function only accepts integers as the input so you have to convert your categorical variables as below:
library('tm')
library('topicmodels')
documentstoy <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
meta1toy <- c(3,4,1,12,1,2,3,5,1,4,2,1,1,1,1,1)
meta2toy <- c(10,0,10,1,1,0,1,1,3,3,0,0,18,1,10,10)
toydata <- data.frame(documentstoy,meta1toy,meta2toy)
termstoy <- c("cus","cus","bill","bill","tube","tube","coa","coa","un","arc","arc","yib","yib","yib","dar","dar")
toy_unique = unique(termstoy)
for (i in 1:length(toy_unique)){
A = as.integer(termstoy == toy_unique[i])
toydata[toy_unique[i]] = A
}
lda_head <- topicmodels::LDA(toydata, 10, method="Gibbs")

"object not found" when running a function in R

I have created the following function
FilterIndi <- function(infile,name, date){
sub_file <- infile[,c("NUMBER","CREATE_DTTM_NEW", name)]
sub_file <- subset(sub_file, name==1)
library(data.table)
sub_file <- setDT(sub_file)[, .SD[which.max(CREATE_DTTM_NEW)], NUMBER]
sub_file$date <- sub_file$CREATE_DTTM_NEW
sub_file$CREATE_DTTM_NEW <- NULL
library(dplyr) #to do left_join
Unique <- left_join(Unique,sub_file, by =c("NUMBER"="NUMBER"))
Unique$name[is.na(Unique$name)] <-0
return(Unique)
}
FilterIndi(allfile, pde, pde_date )
pde is in data frame allfile but I get the following error:
Error in '[.data.frame'(infile, c("NUMBER", "CREATE_DTTM_NEW", :
object 'pde' not found
I can't figure out how to make it work.
Can someone please help me? Thanks a lot in advance.
EDIT: I have attached an image of allfile:

Error in DESeq formula nbinomTest

I'm using the DESeq package to analyze RNA sequencing data. I have only one replicate and two treatments. My code is:
data <- read.csv()
metadata <- data.frame(row.names = colnames(data), condition =c("treated", "untreated"))
cds2 <- newCountDataSet( countData = data, conditions = metadata )
cds2 <- estimateSizeFactors(cds2)
counts( cds2, normalized=TRUE )
cds2 <- estimateDispersions(cds2, method="blind", sharingMode="fit-only")
res <- nbinomTest(cds2, "treated", "untreated" )
Everything works fine until I call "estimateDispersions". However, the function "nbinomTest" gives me this error:
Error in if (dispTable(cds)[condA] == "blind" || dispTable(cds)[condB] == :
missing value where TRUE/FALSE needed
I found some documentation on this error, but the answers are not helpful for me. I work with R version 3.1.2 (2014-10-31).
Can someone help my with my problem, please?
Cheers!

linking crsp and compustat in R via WRDS

I am using R to connect to WRDS. Now, I would like to link compustat and crsp tables. In SAS, this would be achieved using macros and the CCM link table. What would be the best way to approach this topic in R?
PROGRESS UPDATE:
I downloaded crsp, compustat and ccm_link tables from wrds.
sql <- "select * from CRSP.CCMXPF_LINKTABLE"
res <- dbSendQuery(wrds, sql)
ccmxpf_linktable <- fetch(res, n = -1)
ccm.dt <- data.table(ccmxpf_linktable)
rm(ccmxpf_linktable)
I am then converting the suggested matching routine from the wrds event study sas file into R:
ccm.dt[,typeflag:=linktype %in% c("LU","LC","LD","LN","LS","LX") & USEDFLAG=="1"]
setkey(ccm.dt, gvkey, typeflag)
for (i in 1:nrow(compu.dt)) {
gvkey.comp = compu.dt[i, gvkey]
endfyr.comp = compu.dt[i,endfyr]
PERMNO.val <- ccm.dt[.(gvkey.comp, TRUE),][linkdt<=endfyr.comp & endfyr.comp<=linkenddt,lpermno]
if (length(PERMNO.val)==0) PERMNO.val <- NA
suppressWarnings(compu.dt[i, "PERMNO"] <- PERMNO.val)
}
However, this code is fantastically inefficient. I started out with data.table, but do not really understand how to apply the logic in the for-loop. I am hoping that some could point me to a way how to improve the for-loop.
Matching fields in stages works better. maybe someone finds this useful. Any suggestions for further improvement are of course very welcome!!!
# filter on ccm.dt
ccm.dt <- ccm.dt[linktype %in% c("LU","LC","LD","LN","LS","LX") & USEDFLAG=="1"]
setkey(ccm.dt, gvkey)
setkey(compu.dt, gvkey)
compu.merged <- merge(compu.dt, ccm.dt, all.x = TRUE, allow.cartesian = TRUE)
# deal with NAs in linkenddt - set NAs to todays date, assuming they still exist.
today <- as.character(Sys.Date())
compu.merged[is.na(linkenddt), "linkenddt":=today]
# filter out date mismatches
compu <- compu.merged[linkdt <= endfyr & endfyr<=linkenddt]

Error in table(x, y) : attempt to make a table with >= 2^31 elements

I have a problem with plotting my results. Previously (about two weeks ago) I can use same code at below to plot my data but now I'am getting error
data<- read.table("my_step.odt", header = FALSE, sep = "", quote="\"'", dec=".", as.is = FALSE, strip.white=FALSE, col.names=c(.......);
mgn_my <- data[1:49999,18]
sim <- data[1:49999, 21]
plot(sim , mgn_my , type="l",xlab="Time (ns)",ylab="mx")
error
Error in table(x, y) : attempt to make a table with >= 2^31 elements
any suggestion?
I have had a similar problem as you before. Based on my response from another post, here's what I would suggest before you run plot:
Option 1: Use droplevels
mgn_my <- droplevels(data[1:49999,18])
Option 2: Use apply. This approach seems "friendlier" if you are familiar with apply-family functions in R. For example:
mgn_my <- data[1:49999,18]
apply(mgn_my,1,plot)

Resources