Lexis function not found in R - r

I am using this code from the R help guide in the Epi
package:
# A small bogus cohort
xcoh <- structure( list( id = c("A", "B", "C"),
birth = c("14/07/1952", "01/04/1954",
"10/06/1987"),
entry = c("04/08/1965", "08/09/1972",
"23/12/1991"),
exit = c("27/06/1997", "23/05/1995",
"24/07/1998"),
fail = c(1, 0, 1) ),
.Names = c("id", "birth", "entry", "exit",
"fail"),
row.names = c("1", "2", "3"),
class = "data.frame" )
# Define a Lexis object with timescales calendar time and
age
Lcoh <- Lexis( entry = list( per=entry ),
exit = list( per=exit,
age=exit-birth ),
exit.status = fail,
data = xcoh )
But I get this error:
Error in Lexis(entry = list(per = entry), exit = list(per = exit, age = exit - :
could not find function "Lexis"
Any thoughts?

Epi package first needs to be installed in the environment using:
install.packages("Epi")
And then the library for Epi needs to be loaded.
library(Epi)
Hence your code being modified as follows:
install.packages("Epi")
library(Epi)
xcoh <- structure( list( id = c("A", "B", "C"),
birth = c("14/07/1952", "01/04/1954",
"10/06/1987"),
entry = c("04/08/1965", "08/09/1972",
"23/12/1991"),
exit = c("27/06/1997", "23/05/1995",
"24/07/1998"),
fail = c(1, 0, 1) ),
.Names = c("id", "birth", "entry", "exit",
"fail"),
row.names = c("1", "2", "3"),
class = "data.frame" )
# Define a Lexis object with timescales calendar time and
Lcoh <- Lexis( entry = list( per=entry ),
exit = list( per=exit,
age=exit-birth ),
exit.status = fail,
data = xcoh )
Note: I have removed the line that says age. Assuming it is not relevant to the question posted here.

Related

How to highlight specific cells in a dataframe in R markdown HTML

I have a dataframe as shown below (the data is listed at the end of question):
As can be seen, I want to highlight few cells in the final R-Markdown report for the presentation. My current code is only able to show the table:
cluster_summary%>% kbl(caption = '<b>Clustering results</b>') %>%
kable_classic(full_width = F, html_font = "Cambria")
How can I highlight those cells??
DATA
structure(list(cluster = structure(1:7, .Label = c("1", "2",
"3", "4", "5", "6", "7"), class = "factor"), n = c(512L, 1048L,
662L, 1968L, 576L, 1738L, 1188L), ave_price_per_sqft_adjusted = c(5.16299733157459,
3.32371811588978, 3.96858531607868, 3.32922072520205, 3.42896017156734,
4.16418851265888, 4.08627345683475), ave_age = c(12.0393129995492,
12.6062546474121, 9.32033699503113, 25.5092197801581, 19.1151284494788,
12.2180810585854, 12.0248580167839), ave_DOM = c(47.706537201211,
42.0442099665614, 49.9960193152193, 34.2190863941281, 44.5416652882415,
37.1891219996921, 33.3872422432855), ave_activity_rate = c(1.20118970114087,
1.14598100690658, 1.47458159497434, 1.58286371628597, 1.31320615630511,
1.32586511589676, 2.90376115653893), topic_1 = c(0.0873152283441761,
0.0402887288191615, 0.0671677410154403, 0.0658325530416239, 0.0486383977595131,
0.678477957074527, 0.124182893709105), topic_2 = c(0.0432613598954236,
0.0696506982126008, 0.0443719103703934, 0.714018587278257, 0.106997881943579,
0.0858546713546651, 0.123859196751554), topic_3 = c(0.734165987470995,
0.0151590853651532, 0.0274370600921245, 0.0267196491438714, 0.0186524676995082,
0.0422361263557554, 0.0476136227502999), topic_4 = c(0.0268470362758521,
0.0222984614059603, 0.035088529448869, 0.0682401425738628, 0.733361959255753,
0.0345517467883103, 0.0701685629335576), topic_5 = c(0.0236832387869678,
0.0195300786802868, 0.681931511958987, 0.01084326403663, 0.00780696913319592,
0.0271831270677069, 0.0256968988305932), topic_6 = c(0.00241582961309524,
0.00512777524684262, 0.043572436212494, 0.00284832693741011,
0.00466231684981685, 0.00447461706422522, 0.00578628373290925
), topic_7 = c(0.0293156710834479, 0.0165055511133993, 0.0243384949312766,
0.0479052429538088, 0.0240980295134035, 0.035084908174513, 0.531063470492252
), topic_8 = c(0.0519347465414063, 0.808840100571256, 0.0730651082702796,
0.0592810817199474, 0.0538401481417729, 0.0805723035106479, 0.0664648058614109
)), class = "data.frame", row.names = c(NA, -7L))
You could use formattable, see these examples.
color_tile formatter combined with area option allows to change color of a specific row & col.
library(formattable)
highlight <- color_tile("yellow","yellow")
formattable(data, list(
area(col = 3, row = 1 ) ~ highlight,
area(col = 4, row = 4 ) ~ highlight
))

Write a Data.Table as a csv file

I have a data.table that has list values within the columns. Below is the dput:
dput(df2)
structure(list(a = list(structure(5594.05118603497, .Names = "a"),
structure(8877.42723091876, .Names = "a"), structure(2948.95666065332,
.Names = "a"),
structure(5312.77623937465, .Names = "a"), structure(676.637044992807,
.Names = "a"),
structure(323.104243007498, .Names = "a")), b =
list(structure(3.90258318853593e-06, .Names = "b"),
structure(3.89772483584672e-06, .Names = "b"), structure(3.91175458242421e-
06, .Names = "b"),
structure(3.90169532031545e-06, .Names = "b"), structure(6.54536728417568e-
06, .Names = "b"),
structure(6.59087917747312e-06, .Names = "b")), id = 1:6), .Names = c("a",
"b", "id"), class = c("data.table", "data.frame"), row.names = c(NA,
-6L), .internal.selfref = <pointer: 0x0000000000220788>)
Here is what the output looks like:
head(df2)
a b id
1: 5594.051 3.902583e-06 1
2: 8877.427 3.897725e-06 2
3: 2948.957 3.911755e-06 3
4: 5312.776 3.901695e-06 4
5: 676.637 6.545367e-06 5
6: 323.1042 6.590879e-06 6
This looks ok when you see it at first but if you look further into it, this is what it looks like when I want to select a column:
How do I change df2 to just be a normal dataframe where it doesn't have these extra values within a and b like this? I am trying to write this file to a csv but it will not allow me to because it is saying there are vectors as the values.
Thanks!
Edit:
This was the code that generated the lists:
test<-sapply( split( df , df$ID),
function(d){ dat <- list2env(d)
nlsfit <- nls( form = y ~ a * (1-exp(-b * x)), data=dat,
start= list( a=max(dat$y), b=b.start),
control= control1)
list(a = coef(nlsfit)[1], b = coef(nlsfit)[2])} )
df1<-as.data.frame(t(test))
Load the right package, look at its help page, search for "csv", follow the Usage section:
library(data.table)
help(pac=data.table)
fwrite(df2, file="~/test.csv") # for mac, need changing for other OS
Another approach might be:
as.data.frame( lapply(df2, unlist) )

Reading multiple excel files into R using the map function

I know there are similar questions to this but I haven't came across ones using the map function from the purrr package. I am having a difficult time trying to read in some excel files(.xlsx) using purrr::map(). I would like each one to be it's own data frame. I tried the approach in this similar question: How can I reading multiple (excel) files into R?.
However, I keep getting this error:
Error: path does not exist: "tab3_DOfinal_HUClevel_assessment.xlsx"
I know for sure I have the right path. Not sure why I am getting this error. I have about 9 excel spreadsheets that I want to read in.
Code I tried:
# load necessary package
library(purrr)
file.list <- list.files(path="2016_Data_Tables",pattern='*.xlsx')
file.list <- setNames(file.list, file.list)
# store all .xlsx files as individual data frames inside of one list
df <- map(file.list, read_xlsx)
The file name pattern goes as follows:
tab3_DOfinal_HUClevel_assessment.xlsx
The only thing that changes is the DOfinal part.
Some sample data:
structure(list(ID = 1, WMA = 15, Number = "02040302020030-01",
HUC14 = "HUC02040302020030", Name = "Absecon Creek (AC Reserviors) (gage to SB)",
Region = "Atlantic Coast", NumofStations = "2", ListofStations = "01410455, R32",
ListofAssessment = "2, 2", HUCTier = "2", swqs = "PL, SE1",
TotalNumSamples5yrs = "NA", flgusgsprelim = "NA, 0", auassess = 2,
auassesstrout = -999, finalauassess = 2, finalauassesstrout = -999,
Changefrom2014 = "No Change-2", Changetroutfrom2014 = "No Change",
listHUC14assess5 = "NA", listHUC14assess3 = "NA", listHUC14assess2 = "01410455, R32",
His2014 = "Attaining", His2014trout = "-999", Notes = NA_character_,
OldStations2014 = "01410455", OldStationsAssess2014 = "2",
Error = NA_character_), .Names = c("ID", "WMA", "Number",
"HUC14", "Name", "Region", "NumofStations", "ListofStations",
"ListofAssessment", "HUCTier", "swqs", "TotalNumSamples5yrs",
"flgusgsprelim", "auassess", "auassesstrout", "finalauassess",
"finalauassesstrout", "Changefrom2014", "Changetroutfrom2014",
"listHUC14assess5", "listHUC14assess3", "listHUC14assess2", "His2014",
"His2014trout", "Notes", "OldStations2014", "OldStationsAssess2014",
"Error"), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"))
structure(list(WMA = 15, Number = "02040302020030-01", HUC14 = "HUC02040302020030",
Name = "Absecon Creek (AC Reserviors) (gage to SB)", Region = "Atlantic Coast",
NumofStations = "1", ListofStations = "01410455", ListofAssessment = "2",
MaxStaAssessment = "2", MinStaAssessment = "2", TotalNumSamples5yrs = "NA",
auassess = "2", ChangeFrom2014 = "No Change-2", liststaassess2 = "01410455",
liststaassess3 = "NA", liststaassess5 = "NA", Assessment2014 = "Attaining",
Comments = NA_character_), .Names = c("WMA", "Number", "HUC14",
"Name", "Region", "NumofStations", "ListofStations", "ListofAssessment",
"MaxStaAssessment", "MinStaAssessment", "TotalNumSamples5yrs",
"auassess", "ChangeFrom2014", "liststaassess2", "liststaassess3",
"liststaassess5", "Assessment2014", "Comments"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(WMA = 15, Number = "02040302020030-01", HUC14 = "HUC02040302020030",
Name = "Absecon Creek (AC Reserviors) (gage to SB)", Region = "Atlantic Coast",
NumofStations = "1", ListofStations = "R32", ListofAssessment = "3",
MaxStaAssessment = "3", MinStaAssessment = "3", TotalNumSamples5yrs = "9",
auassess = "3", ChangeFrom2014 = "No Change-3", liststaassess2 = "NA",
liststaassess3 = "R32", liststaassess5 = "NA", Assessment2014 = "N/A",
Comments = NA_character_), .Names = c("WMA", "Number", "HUC14",
"Name", "Region", "NumofStations", "ListofStations", "ListofAssessment",
"MaxStaAssessment", "MinStaAssessment", "TotalNumSamples5yrs",
"auassess", "ChangeFrom2014", "liststaassess2", "liststaassess3",
"liststaassess5", "Assessment2014", "Comments"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
Aurèle makes a really good point regarding your file paths.
I would like each one to be it's own data frame
If this is the goal, then a combination of purrr::iwalk and assign could easily get you there. The process goes as follows:
Get a list of all of the .xlsx files located in 2016_Data_Tables/.
Then use purrr::set_names to name each element in this list with its filename sans the .xlsx extension.
Then use purrr::iwalk to apply the assign function to each element in the list. Specifically, use read_xlsx to read each .xlsx file from disk into a data frame and then assign that data frame as a named object to R's global environment
list.files('data/mpg', pattern = '.xlsx', full.names = T) %>%
purrr::set_names(stringr::str_remove(basename(.), '.xlsx$')) %>%
purrr::iwalk(function(x, i) assign(i, readxl::read_xlsx(x), .GlobalEnv))

Using literal month names with year in ramcharts

Here is my code to generate barplot using rAmChart,
library(rAmCharts)
amBarplot(x = "month", y = "value", data = dataset,
dataDateFormat = "MM/YYYY", minPeriod = "MM",
show_values = FALSE, labelRotation = -90, depth = 0.1)
However, is there a way to use month names & year in my x axis? I am trying to use MMM-YY formats.
Sample dataset,
structure(list(value = c(11544, 9588, 9411, 10365, 11154, 12688
), month = c("05/2012", "06/2012", "07/2012", "08/2012", "09/2012",
"10/2012")), .Names = c("value", "month"), row.names = c(NA,
6L), class = "data.frame")
Thanks.
It appears that rAmCharts doesn't expose AmCharts' dateFormats setting in the categoryAxis, so you have to access it through the init event and create your own dateFormats array with a modified format string for the MM period. I'm not very experienced with R, but here's how I managed to make it work using R 3.4.2 and rAmCharts 2.1.5
chart <- amBarplot( ... settings omitted ... )
addListener(.Object = chart,
name = 'init',
expression = paste(
"function(e) {",
"e.chart.categoryAxis.dateFormats = ",
'[{"period":"fff","format":"JJ:NN:SS"},{"period":"ss","format":"JJ:NN:SS"},',
'{"period":"mm","format":"JJ:NN"},{"period":"hh","format":"JJ:NN"},{"period":"DD","format":"MMM DD"},',
'{"period":"WW","format":"MMM DD"},',
'{"period":"MM","format":"MMM-YY"},', # "add YY to default MM format
'{"period":"YYYY","format":"YYYY"}]; ',
'e.chart.validateData();',
"}")
)
Here is a different solution:
library(rAmCharts)
dataset <- structure(list(value = c(11544, 9588, 9411, 10365, 11154, 12688
), month = c("05/2012", "06/2012", "07/2012", "08/2012", "09/2012",
"10/2012")), .Names = c("value", "month"), row.names = c(NA,
6L), class = "data.frame")
dataset$month <- as.character(
format(
as.Date(paste0("01/",dataset$month), "%d/%m/%Y"),
"%B %Y"))
amBarplot(x = "month", y = "value", data = dataset,
show_values = FALSE, labelRotation = -90, depth = 0.1)

R crashes while using data.table

sac[,treatment_days := as.character(seq(from = SACDPDAT, to = SACRTDAT, by = "1 day")), by = PACKID] I have data named sac with dput(sac[1:2,]) as follows:
structure(list(SUBJECT_Blinded = c(1201001, 1101001), LINE = c(8,
4), MODULE = c("SAC", "SAC"), CENTRE_Blinded = c(1201, 1201),
STUDYPER = c(7, 4), PACKID = c(10096, 10595), SACDPDAT = structure(c(1335304800,
1325545200), class = c("POSIXct", "POSIXt"), tzone = ""),
SACDP1 = c(35, 35), C_SACDP = c(NA_character_, NA_character_
), SACRTDAT = structure(c(1340316000, 1327964400), class = c("POSIXct",
"POSIXt"), tzone = ""), SACRT1 = c(0, 9), C_SACRT = c(NA_character_,
NA_character_)), .Names = c("SUBJECT_Blinded", "LINE", "MODULE",
"CENTRE_Blinded", "STUDYPER", "PACKID", "SACDPDAT", "SACDP1",
"C_SACDP", "SACRTDAT", "SACRT1", "C_SACRT"), sorted = c("SUBJECT_Blinded",
"PACKID"), class = c("data.table", "data.frame"), row.names = c(NA,
-2L))
When I running the code:
sac[,treatment_days := list(format(seq(from = SACDPDAT, to = SACRTDAT, by = "1 day"),"%Y-%m-%d")), by = PACKID]
RStudio crushes and returns info:
Problem signature:
Problem Event Name: APPCRASH
Application Name: rsession.exe
Application Version: 0.98.501.0
Application Timestamp: 52e8371d
Fault Module Name: R.dll
Fault Module Version: 3.3.65126.0
Fault Module Timestamp: 53185fd3
Exception Code: c0000005
Exception Offset: 0000000000028c36
OS Version: 6.1.7601.2.1.0.256.48
Locale ID: 1045
Additional Information 1: 4fc0
Additional Information 2: 4fc0e6e5b53a870c89fb6e37a38d7e6b
Additional Information 3: 9d6e
Additional Information 4: 9d6e8f79167930945e5a5d06afac680e
It's the same with pure R. Any ideas how to do it another way?
There's a couple of problems with your new code:
"1 day" is incorrect, if you run seq on a date object, the number you pass to by will be interpreted as days, so:
seq(from = SACDPDAT, to = SACRTDAT, by = 1)
You also cannot create a new column from this sequence, because there can only be one value for each row. Instead, you can generate the sequence of days by PACKID, and then join this onto the old data.table
So try:
setkey(sac, PACKID)
sac <- sac[sac[,seq(from = SACDPDAT, to = SACRTDAT, by = 1), by=PACKID]]

Resources