Related
I have an object called 'data' containing information about real estate property:
> class(data)
[1] "data.frame"`
> dim(data)
[1] 351 13
Whitin its columns, there is one called 'addresses'
> class(data$addresses)
[1] "character"
I used the function cartociudad_geocode() to get the maximum amount of addresses geolocalized:
geo_info<-sapply(data$addresses, cartociudad_geocode, on.error="warn")
Which resulted in a large list of 351 elements, with geographical info of all of the strings of addressed that DID get geolocalized. The ones that DIDN'T appear as NULL:
> dput(head(geo_info, 4))
list(`Valencia, Avenida Nº S.n. Planta 1 Castellon De La Plana/Castello De La Plana` = structure(list(
id = "2061380170886", province = "Badajoz", comunidadAutonoma = "Extremadura",
muni = "Valdetorres", type = "portal", address = "PLAN PARCIAL Nº 1",
postalCode = "06474", poblacion = "Valdetorres", geom = "POINT(-6.07339711488708 38.91670317033)",
tip_via = "BARRIO", lat = 38.91670317033, lng = -6.07339711488708,
portalNumber = "0", stateMsg = "Resultado exacto de la búsqueda",
extension = "", state = "1", countryCode = "011"), row.names = c(NA,
-1L), class = "data.frame"), `Mayor Nº S.n. Planta 1 Castellon De La Plana/Castello De La Plana` = structure(list(
id = "2061380170886", province = "Badajoz", comunidadAutonoma = "Extremadura",
muni = "Valdetorres", type = "portal", address = "PLAN PARCIAL Nº 1",
postalCode = "06474", poblacion = "Valdetorres", geom = "POINT(-6.07339711488708 38.91670317033)",
tip_via = "BARRIO", lat = 38.91670317033, lng = -6.07339711488708,
portalNumber = "0", stateMsg = "Resultado exacto de la búsqueda",
extension = "", state = "1", countryCode = "011"), row.names = c(NA,
-1L), class = "data.frame"), `Notario Mas, Plaza Nº 3 Piso 1º Castellon De La Plana` = structure(list(
id = "120400001216", province = "Castellón/Castelló", comunidadAutonoma = "Comunitat Valenciana",
muni = "Castelló de la Plana", type = "portal", address = "NOTARIO MAS",
geom = "POINT(-0.0414310339999702 39.9877939630001)", tip_via = "PLAZA",
lat = 39.9877939630001, lng = -0.0414310339999702, portalNumber = "5",
stateMsg = "Resultado exacto de la búsqueda", state = "1",
countryCode = "011"), row.names = c(NA, -1L), class = "data.frame"),
`Mayor Nº 56 Piso 1º Castellon De La Plana` = NULL)`
My objective is: transforming geo_info into a data frame while KEEPING the NULL rows.
I have a dataset with the following structure:
structure(list(mes = c(7, 7, 7, 4, 4), ano = c(2021, 2021, 2021,
2021, 2021), nacionalidad = c("Venezuela", "Venezuela", "Venezuela",
"Venezuela", "Venezuela"), centro = c("Aeropuerto Eldorado",
"Aeropuerto Eldorado", "Aeropuerto Eldorado", "Aeropuerto Eldorado",
"Aeropuerto Eldorado"), puesto = c("Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá"
), transporte = c("Air", "Air", "Air", "Air", "Air"), ciudad = c("Arauca",
"Bogotá", "Pereira", "Bogotá", "Bogotá"), flujo = c("Entries",
"Entries", "Entries", "Entries", "Entries"), motivo = c("Tourism",
"Tourism", "Tourism", "Transit", "Transit"), edad = c("0-17",
"0-17", "0-17", "18-29", "18-29"), colombiano = c("Extranjeros",
"Extranjeros", "Extranjeros", "Extranjeros", "Extranjeros"),
departamento = c("Arauca", "Bogota D.C.", "Risaralda", "Bogota D.C.",
"Bogota D.C."), region = c("América del Sur", "América del Sur",
"América del Sur", "América del Sur", "América del Sur"
), status = c("Permiso de Turismo", "Permiso de Turismo",
"Permiso de Turismo", "Permiso Otras Actividades", "Permiso Otras Actividades"
), departamento_2 = c("Bogotá", "Bogotá", "Bogotá", "Bogotá",
"Bogotá"), destino_procedencia = c("Emiratos Árabes", "Israel",
"Emiratos Árabes", "Panamá", "Panamá"), region_destino = c("Asia",
"Asia", "Asia", "América Central y el Caribe", "América Central y el Caribe"
), sexo = c("Male", "Male", "Male", "Male", "Male"), numero = c(1,
1, 1, 5, 5), date = structure(c(18809, 18809, 18809, 18718,
18718), class = "Date"), date2 = structure(c(18809, 18809,
18809, 18718, 18718), class = "Date")), row.names = c(NA,
5L), class = "data.frame")
and I would like to plot a chart by date and other variables (e.g. mode of transport) in ggplotly. The chart is correct, but the labels that appear in the dates show numbers instead of date format. The variable in the database is in date format, and I already tried changing it to different formats and still does not work.
I would also like to add minor date breaks, but can't seem to get it right.
Here is the code I am using for the chart:
chart9<-ggplot()+
geom_line(data=Flow,
aes(x=date,
color=transporte), stat="count") +
scale_x_date(date_minor_breaks = "1 month",
date_labels = "%Y (%b)")+
labs(color="Type of Flow")+
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021")+
xlab("Date")+
ylab("Number or People")
ggplotly(chart9)
This is the chart plotted
Any help would be greatly appreciated! :)
Looks like the date class gets dropped when using stat="count". Hence, one option to achieve your desired result would be to aggregate your dataset before passing it to ggplot using e.g. dplyr::count(Flow, date, transporte):
Flow <- dplyr::count(Flow, date, transporte, name = "count")
ggplot() +
geom_line(data = Flow, aes(x = date, y = count, color = transporte)) +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly()
A second option which additionally for setting the date format would be make use of the text "aesthetic" and convert your numbers back to proper dates:
library(plotly)
ggplot() +
geom_line(data = Flow, aes(x = date, color = transporte, text = paste(
"count:", ..count..,
"<br>Date: ", format(as.Date(..x.., origin = "1970-01-01"), "%Y (%b)"),
"<br>transporte: ", ..color..
)), stat = "count") +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly(tooltip = c("text"))
I am trying to merge two dataframes, one containing variables like Date, Author, Paper, and IDs, the other containing texts and their IDs.
I add, because it might have some importance, that the dataframe containing the texts has been obtained by converting a Vcorpus into a dataframe with the following code :
factivadf <- data.frame(text=unlist(sapply(corpus, `[`, "content")), stringsAsFactors=F)
To achieve the merging, I use the following code :
factivaclean <- full_join(corpusVars, factiva, by = "doc_id")
And I get the following error :
Error in UseMethod("tbl_vars") : no applicable method for 'tbl_vars' applied to an object of class "list"
My two original dataframes were regular dataframes, and I thought at first that the error required to apply tibble() , so I applied the function to them, but I keep getting the same error.
Here is the dput of the head of my first dataframe, corpusVars :
structure(list(corpusVars = structure(list(doc_id = c("LEPARI0020120304e833000v5",
"HUMAN00020120301e8320001e", "LACRX00020120228e82s00017", "HUMAN00020120223e82o0001h",
"HUMAN00020120223e82o0001g", "HUMAN00020120223e82o0000n"), Origine = c("Le Parisien-Aujourd'hui en France",
"L'Humanité", "La Croix", "L'Humanité", "L'Humanité", "L'Humanité"
), Date = structure(c(15402, 15401, 15398, 15394, 15394, 15394
), class = "Date"), Auteur = c(NA, NA, NA, "Entretien réalisé par <U+2028>Fara C",
"V. H.", NA)), .internal.selfref = <pointer: 0x0000024403b11ef0>, row.names = c(NA,
6L), class = c("data.table", "data.frame"))), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
Here is the dput of the second dataframe, factivadf :
structure(list(factivadf = structure(list(doc_id = c("ECHOS00020110523e75n0004j.content",
"ECHOS00020110525e75p0000o.content1", "ECHOS00020110525e75p0000o.content2",
"ECHOS00020110525e75p0000o.content3", "ECHOS00020110525e75p0000o.content4",
"ECHOS00020110530e75u00019.content1"), text = c("Environ 500 personnes s'étaient donné rendez-vous hier devant le Centre Georges-Pompidou pour condamner le « sexisme » exprimé par de nombreux responsables politiques et médiatiques autour de l'affaire DSK. Une initiative portée notamment par les associations Paroles de femmes et Osez le féminisme, qui ont rappelé que 75.000 femmes sont chaque année en France victimes de viol.",
"Le propos. La collection « Les 50 grandes idées que vous devez connaître » s'enrichit d'un nouveau titre, signé par un enseignant britannique de littérature qui se consacre maintenant à la vulgarisation des savoirs. Comme dans un dictionnaire de science politique (mais sans pontifier, ni tourner des pages autour du pot), 50 entrées promènent le lecteur de la théorie politique (liberté, égalité, tyrannie, utopie, etc.) aux matières de la politique (pauvreté, sécurité, racisme, corruption, etc.) en passant par les idéologies (anarchisme, capitalisme, socialisme, multiculturalisme, féminisme, etc.). Un petit glossaire complète le tout, avec de rapides définitions, qui auraient pu aussi constituer des idées à développer (laisser-faire, lobbying, réforme, etc.).",
"Conçu comme un outil agréable de travail, ce livre original dispose d'un index permettant de retrouver nombre de personnages (Nicolas Sarkozy, Aristote, Aristide Briand ou bien encore la reine Victoria) à travers des pages dédiées aussi à la différence, la tyrannie, la laïcité ou le droit divin.",
"L'intérêt. Mêlant citations et proverbes (plus ou moins célèbres), encadrés descriptifs, chronologies thématiques, tout en alternant ton sérieux (le plus souvent) et piques ironiques (voir la notice sur le politiquement correct), l'ouvrage permet, en se feuilletant, de passer un bon moment. Il a, au-delà, toute sa place dans une bibliothèque, à portée de main, pour une présentation rapide et claire de thèmes tout à fait sérieux.",
"La citation.« La politique est supposée être la seconde plus vieille profession. J'ai fini par réaliser qu'elle ressemblait beaucoup à la première. » (Ronald Reagan)",
"Le décret que prépare le gouvernement pour favoriser l'égalité salariale entre les hommes et les femmes est violemment critiqué par les syndicats, ce qui est courant, mais aussi par une partie de la majorité, ce qui l'est moins. Députée UMP et présidente de la délégation aux droits des femmes de l'Assemblée nationale, Marie-Jo Zimmermann ne mâche pas ses mots. « Ce décret, c'est de l'eau tiède, il ne réglera rien au problème», assure-t-elle."
)), .internal.selfref = <pointer: 0x0000024403b11ef0>, row.names = c(NA,
6L), class = "data.frame")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Do you know how to merge these dataframes without getting this error ?
Thank you in advance !
EDIT :
When opening it with read.table("corpusVars.csv", header = TRUE, sep = ";", na.strings = " ") , I get the following error (same with the other file, just another line being incriminated) :
Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
line 102 did not have 4 elements
When opening it with read.csv2, here is a subset of the dput of the head of corpusVars :
structure(list(doc_id = structure(c(898L, 434L, 702L, 433L, 432L,
431L), .Label = c("ECHOS00020110523e75n0004j", "ECHOS00020110525e75p0000o",
"ECHOS00020110530e75u00019", "ECHOS00020110603e76300003", "ECHOS00020110615e76f0003l",
"ECHOS00020110621e76l00021"), class = "factor"),
Origine = structure(c(5L, 1L, 2L, 1L, 1L, 1L), .Label = c("L'Humanité",
"La Croix", "La Tribune", "Le Figaro", "Le Parisien-Aujourd'hui en France",
"Les Echos"), class = "factor"), Date = structure(c(30L,
16L, 368L, 313L, 313L, 313L), .Label = c("01/02/2012", "01/02/2019",
"01/03/2019", "01/04/2019", "01/06/2011", "01/07/2011"), class = "factor"),
Auteur = structure(c(NA, NA, NA, 150L, 463L, NA), .Label = c("A.DA.",
"A.F.", "Adam Arroudj; 0", "Adèle Smith; adelesmith100#gmail.com",
"ADRIEN GOMBEAUD", "Adrien Jaulmes; ajaulmes#lefigaro.fr"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
There seems to be a problem with your reading function.
The output is not a common dataframe object, but rather some sort of list containing only a dataframe object.
Indeed, this line seems to work and give a proper merged dataframe:
full_join(corpusVars$corpusVars, factivadf$factivadf, by = "doc_id")
Of note, as Chris said, .internal.selfref = <pointer: 0x0000024403b11ef0> should not be included and I had to remove it from your dput output for the example to work. This indeed seems to be related to fread:
Warning: 'Invalid .internal.selfref detected' when adding a column to a data.table returned from a function
Recently I'm using the survival package in R, in order to be able to better measure the waiting time of patients in the Emergency Department of my Hospital, what I have achieved. However, it's complex to show the results and that can be understood by third parties from RStudio, so I'm developing a Shiny App to show the results without having to show the code and be able to modify certain elements quickly
When I create the app, I have the problem that when I request that you build a dataframe with the quantiles, I get the error: "Error in rep: invalid 'times' argument" in R#200. I have reviewed the code on multiple occasions, but I still can't find a solution. I enclose the complete code
Thank you
ui <- fluidPage(
titlePanel("Prototipo Tiempos de Urgencia"),
sidebarLayout(
sidebarPanel(
selectInput(inputId = "triage",
label = "Triage",
choices =c("Todos",listadotriage),
selectize = FALSE,
selected = "Todos"),
selectInput(inputId = "tiempo",
label = "Tiempo a analizar",
choices = c("Diferencia entre Hora de atención y hora de triage" = "60",
"Diferencia ente hora de atencón y hora de admisión" = "61",
"Diferencia ente hora de atencón y hora de alta" = "62",
"Diferencia ente hora de admision y hora de alta" = "63",
"Diferencia ente hora de admision y hora de triage" = "64"),
selectize = FALSE,
selected = "Diferencia entre Hora de atención y hora de triage"),
selectInput(inputId = "atencion",
label = "Área Atención",
choices = c("Todos",listadoatencion),
selectize = FALSE,
selected = "Todos"),
selectInput(inputId = "alta",
label = "Tipo de Alta",
choices = c("General" = "74",
"Hospitalizado" = "72",
"Altas" = "73"),
multiple = FALSE,
selected = "General"),
dateRangeInput(inputId = "fecha",
label = "Intervalo de fechas",
format = "dd-mm-yyyy",
start = "2019-11-01",
end = "2019-11-30",
weekstart = 1,
language= "es",
separator = "a")
),
mainPanel = dataTableOutput(outputId = "tabla1")
))
server <- function(input, output) {
output$tabla1 <- renderDataTable({
if (input$triage=="Todos") {
dat1<-subset(dat1,ifelse(input$atencion!="Todos",dat1$AREA_ATENCION==input$atencion & dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2],
dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2]))
timesevent<-as.integer(input$tiempo)
event<-as.integer(input$alta)
dat3<-dat1[,c(30,timesevent,event)]
gral<-Surv(dat3[[2]] ,event= as.numeric(dat3[[3]]))
fit1a<-survfit(gral ~ Medico, data = dat3)
d1<-as.data.frame(quantile(fit1a,c(0.25,0.5,0.75,0.9,1),conf.int = FALSE)) # estadísticas sobre tiempo de demora según modelo
data1<-as.data.frame(fit1a$strata)
setDT(data1,keep.rownames = TRUE)
data1$rn<-gsub("Medico=","",data1$rn)
colnames(data1)<-c("Médico","Consultas")
setDT(d1,keep.rownames = TRUE)[]
d1$rn<-gsub("Medico=","",d1$rn)
colnames(d1)<-c("Médico","P.25","P.50","P.75","P.90","P.100")
datatotal1<-merge(data1,d1,by="Médico")
datatable(data=datatota1l,caption =paste("Tiempos de atención en Servicio de Urgencia por Médico, por Atención en Servicio",input$atencion,"durante","el período",format(input$fecha[1], format= "%d-%m-%Y"),"a",format(input$fecha[2], format= "%d-%m-%Y"), sep=" "),
rownames = FALSE,extensions= "Buttons",options=list(pageLength=40,dom="Bfrtip", buttons = c("print")) )
} else {
dat1<-subset(dat1,dat1$Triage==input$triage)
dat1<-subset(dat1,ifelse(input$atencion!="Todos",dat1$AREA_ATENCION==input$atencion & dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2],
dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2]))
timesevent<-as.integer(input$tiempo)
event<-as.integer(input$alta)
dat3<-dat1[,c(30,timesevent,event)]
gral<-Surv(dat3[[2]] ,event= as.numeric(dat3[[3]])) ~ Medico
fit1b<-survfit(gral ~ Medico, data = dat3)
d2<-as.data.frame(quantile(fit1b,c(0.25,0.5,0.75,0.9,1),conf.int = FALSE)) # estadísticas sobre tiempo de demora según modelo
data2<-as.data.frame(fit1b$strata)
setDT(data2,keep.rownames = TRUE)
data2$rn<-gsub("Medico=","",data2$rn)
colnames(data2)<-c("Médico","Consultas")
setDT(d2,keep.rownames = TRUE)[]
d2$rn<-gsub("Medico=","",d2$rn)
colnames(d2)<-c("Médico","P.25","P.50","P.75","P.90","P.100")
datatotal2<-merge(data2,d1,by="Médico")
datatable(data=datatotal2,caption =paste("Tiempos de atención en Servicio de Urgencia por Médico, por Categorización",input$triage,"por Atención en Servicio",input$atencion,"durante","el período",format(input$fecha[1], format= "%d-%m-%Y"),"a",format(input$fecha[2], format= "%d-%m-%Y"), sep=" "),
rownames = FALSE,extensions= "Buttons",options=list(pageLength=40,dom="Bfrtip", buttons = c("print")) )
}
})
}
shinyApp(ui = ui, server = server)```
I resolved!!!!
I changed the next lines
dat1<-subset(dat1,ifelse(input$atencion!="Todos",dat1$AREA_ATENCION==input$atencion & dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2],
dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2]))
```
for the next lines:
```
dat3<-subset(dat3,dat3$fechanormal>=input$fecha[1] & dat3$fechanormal<=input$fecha[2])
ifelse(input$atencion!="Todos",dat3<-subset(dat3,dat3$AREA_ATENCION==input$atencion),NA)
```
Regards!!!
I need to put the min date value and the max date value in ggplot subtitle.
I've found a similar question but for the axis labels, I need to apply this to the subtitle argument:
subtitle = paste0("Del ", vitocho_likes_min_date, " al ", vitocho_likes_max_date)
Min date looks like:
"2010-10-14" #Expect: "14 de octubre del 2010"
dput(vitocho_likes_min_date)
structure(14896, class = "Date")
Max date looks like:
"2019-04-29" #Expect: "29 de abril del 2019"
dput(vitocho_likes_max_date)
structure(18015, class = "Date")
This is my ggplot chart:
vitocho_chart <- t_kids_faves %>%
filter(user == "VictorAndresGB") %>%
ggplot(aes(x = fct_reorder(screen_name, n), y = n)) +
geom_col(fill = "#494A4F") +
coord_flip() +
theme_tweets() +
labs(
x = "",
y = "",
title = "Cuentas de Twitter con más likes de Victor Andrés García Belaunde.",
subtitle = paste0("Del ", vitocho_likes_min_date, " al ", vitocho_likes_max_date)
) +
geom_text(
aes(x = screen_name,
y = n - 15,
label = n
),
size = 4,
color = "gray95"
)
use:
Sys.setlocale("LC_TIME", "Spanish")
vitocho_likes_min_date= as.character(format(as.Date(14896, origin="1970-01-01"), "%d de %B del %Y"))
vitocho_likes_max_date= as.character(format(as.Date(18015, origin="1970-01-01"), "%d de %B del %Y"))