I have a dataframe with columns A and B listing coordinates, but they are in DMS format. I wonder if there is a simple way to convert them into Latitudes and Longitudes format with a R package.
Here is a sample of my database:
structure(list(Nom = c("Pont de Normandie", "Pont de Tancarville",
"Pont de Saint-Nazaire", "Pont de l’Iroise", "Pont d'Aquitaine",
"Viaduc de Millau", "Pont de Brotonne", "Viaduc du Chavanon",
"Pont de Térénez", "Pont du Bras de la Plaine"), Portée = c("856",
"608", "404", "400", "394", "342 (×6)", "320", "300", "285",
"281"), Long. = c(2141, 1420, 3356, 800, 1767, 2460, 1278, 360,
515, 305), Type = c("Haubané\nacier, béton précontraint",
"Suspendu\nacier, béton précontraint, béton armé", "Haubané\nacier, béton précontraint",
"Haubané\nacier, béton précontraint", "Suspendu\nacier, béton précontraint, pylônes en béton armé",
"Haubané\nMultihaubané, 7 piles béton, tablier et pylônes caissons acier, suspension axiale",
"Haubané\nTablier caisson béton, pylônes béton, suspension axiale",
"Suspendu\nTablier caisson mixte acier/béton, pylônes béton, suspension axiale",
"Haubané\nTablier courbe dalle béton, pylônes béton", "Treillis mixte acier/béton, précontrainte extérieure"
), `Voie portée
Voie franchie` = c("Autoroute A29\nRoute européenne 44\nSeine",
"Autoroute A131\nRoute européenne 5\nN182\nSeine", "RD 213\nLoire",
"RN 165\nRoute européenne 60\nÉlorn", "Autoroute A630\nRoute européenne 5\nRocade de Bordeaux\nGaronne",
"Autoroute A75\nRoute européenne 11\nGorges du Tarn", "RD 490\nSeine",
"Autoroute A89\nRoute européenne 70\nChavanon", "RD 791\nAulne",
"RD 26\nBras de la Plaine"), Date = c("1995", "1959", "1975",
"1994", "1967", "2004", "1977", "2000", "2011", "2001"), Localisation = c("Le Havre - Honfleur\n",
"Tancarville - Marais-Vernier\n", "Saint-Nazaire - Saint-Brevin-les-Pins\n",
"Plougastel-Daoulas - Le Relecq-Kerhuon\n", "Bordeaux\n", "Millau - Creissels\n",
"Caudebec-en-Caux\n", "Merlines - Messeix\n", "Landévennec - Rosnoën\n",
"Saint-Pierre - Entre-Deux\n"), A = c("49° 25′ 56,1″ N",
"49° 28′ 21,6″ N", "47° 17′ 06,3″ N", "48° 23′ 18,1″ N",
"44° 52′ 47,2″ N", "44° 04′ 48″ N", "49° 31′ 13,9″ N",
"45° 37′ 26,5″ N", "48° 16′ 07,9″ N", "21° 16′ 35,5″ S"
), B = c("0° 16′ 26,3″ E", "0° 27′ 52,8″ E",
"2° 10′ 13,8″ O", "4° 23′ 55,6″ O", "0° 32′ 09,7″ O",
"3° 01′ 20,6″ E", "0° 44′ 49,8″ E", "2° 28′ 47,5″ E",
"4° 15′ 48,2″ O", "55° 27′ 56,7″ E"), Département = c("Seine-Maritime\nCalvados",
"Seine-Maritime\nEure", "Loire-Atlantique", "Finistère", "Gironde",
"Aveyron", "Seine-Maritime", "Corrèze\nPuy-de-Dôme", "Finistère",
"La Réunion")), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
You don't really need a package for this. A simple function should do the trick:
dms_to_degrees <- function(dms) {
sapply(strsplit(dms, "(° )|(' )|(″ )"), function(x) {
sum(as.numeric(gsub(",", ".", x)[1:3]) * c(1, 1/60, 1/3600)) *
c(1, -1, 1, -1)[match(x[4], c("N", "S", "E", "O"))]
})
}
Which allows:
dms_to_degrees(df$A)
#> [1] 49.43225 49.47267 47.28508 48.38836 44.87978 44.08000
#> [7] 49.52053 45.62403 48.26886 -21.27653
dms_to_degrees(df$B)
#> [1] 0.2739722 0.4646667 -2.1705000 -4.3987778 -0.5360278 3.0223889
#> [7] 0.7471667 2.4798611 -4.2633889 55.4657500
And we can confirm this by drawing a map:
library(ggplot2)
ggplot(data = map_data("world")) +
geom_map(map = map_data("world"), aes(long, lat, map_id = region),
fill = "#d0e890", color = "gray50") +
geom_point(data = within(df, {lat <- dms_to_degrees(A);
long <- dms_to_degrees(B)}),
aes(x = long, y = lat), color = 'red')
Related
I have an object called 'data' containing information about real estate property:
> class(data)
[1] "data.frame"`
> dim(data)
[1] 351 13
Whitin its columns, there is one called 'addresses'
> class(data$addresses)
[1] "character"
I used the function cartociudad_geocode() to get the maximum amount of addresses geolocalized:
geo_info<-sapply(data$addresses, cartociudad_geocode, on.error="warn")
Which resulted in a large list of 351 elements, with geographical info of all of the strings of addressed that DID get geolocalized. The ones that DIDN'T appear as NULL:
> dput(head(geo_info, 4))
list(`Valencia, Avenida Nº S.n. Planta 1 Castellon De La Plana/Castello De La Plana` = structure(list(
id = "2061380170886", province = "Badajoz", comunidadAutonoma = "Extremadura",
muni = "Valdetorres", type = "portal", address = "PLAN PARCIAL Nº 1",
postalCode = "06474", poblacion = "Valdetorres", geom = "POINT(-6.07339711488708 38.91670317033)",
tip_via = "BARRIO", lat = 38.91670317033, lng = -6.07339711488708,
portalNumber = "0", stateMsg = "Resultado exacto de la búsqueda",
extension = "", state = "1", countryCode = "011"), row.names = c(NA,
-1L), class = "data.frame"), `Mayor Nº S.n. Planta 1 Castellon De La Plana/Castello De La Plana` = structure(list(
id = "2061380170886", province = "Badajoz", comunidadAutonoma = "Extremadura",
muni = "Valdetorres", type = "portal", address = "PLAN PARCIAL Nº 1",
postalCode = "06474", poblacion = "Valdetorres", geom = "POINT(-6.07339711488708 38.91670317033)",
tip_via = "BARRIO", lat = 38.91670317033, lng = -6.07339711488708,
portalNumber = "0", stateMsg = "Resultado exacto de la búsqueda",
extension = "", state = "1", countryCode = "011"), row.names = c(NA,
-1L), class = "data.frame"), `Notario Mas, Plaza Nº 3 Piso 1º Castellon De La Plana` = structure(list(
id = "120400001216", province = "Castellón/Castelló", comunidadAutonoma = "Comunitat Valenciana",
muni = "Castelló de la Plana", type = "portal", address = "NOTARIO MAS",
geom = "POINT(-0.0414310339999702 39.9877939630001)", tip_via = "PLAZA",
lat = 39.9877939630001, lng = -0.0414310339999702, portalNumber = "5",
stateMsg = "Resultado exacto de la búsqueda", state = "1",
countryCode = "011"), row.names = c(NA, -1L), class = "data.frame"),
`Mayor Nº 56 Piso 1º Castellon De La Plana` = NULL)`
My objective is: transforming geo_info into a data frame while KEEPING the NULL rows.
I have a dataset with the following structure:
structure(list(mes = c(7, 7, 7, 4, 4), ano = c(2021, 2021, 2021,
2021, 2021), nacionalidad = c("Venezuela", "Venezuela", "Venezuela",
"Venezuela", "Venezuela"), centro = c("Aeropuerto Eldorado",
"Aeropuerto Eldorado", "Aeropuerto Eldorado", "Aeropuerto Eldorado",
"Aeropuerto Eldorado"), puesto = c("Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá",
"Aeropuerto Eldorado de Bogotá", "Aeropuerto Eldorado de Bogotá"
), transporte = c("Air", "Air", "Air", "Air", "Air"), ciudad = c("Arauca",
"Bogotá", "Pereira", "Bogotá", "Bogotá"), flujo = c("Entries",
"Entries", "Entries", "Entries", "Entries"), motivo = c("Tourism",
"Tourism", "Tourism", "Transit", "Transit"), edad = c("0-17",
"0-17", "0-17", "18-29", "18-29"), colombiano = c("Extranjeros",
"Extranjeros", "Extranjeros", "Extranjeros", "Extranjeros"),
departamento = c("Arauca", "Bogota D.C.", "Risaralda", "Bogota D.C.",
"Bogota D.C."), region = c("América del Sur", "América del Sur",
"América del Sur", "América del Sur", "América del Sur"
), status = c("Permiso de Turismo", "Permiso de Turismo",
"Permiso de Turismo", "Permiso Otras Actividades", "Permiso Otras Actividades"
), departamento_2 = c("Bogotá", "Bogotá", "Bogotá", "Bogotá",
"Bogotá"), destino_procedencia = c("Emiratos Árabes", "Israel",
"Emiratos Árabes", "Panamá", "Panamá"), region_destino = c("Asia",
"Asia", "Asia", "América Central y el Caribe", "América Central y el Caribe"
), sexo = c("Male", "Male", "Male", "Male", "Male"), numero = c(1,
1, 1, 5, 5), date = structure(c(18809, 18809, 18809, 18718,
18718), class = "Date"), date2 = structure(c(18809, 18809,
18809, 18718, 18718), class = "Date")), row.names = c(NA,
5L), class = "data.frame")
and I would like to plot a chart by date and other variables (e.g. mode of transport) in ggplotly. The chart is correct, but the labels that appear in the dates show numbers instead of date format. The variable in the database is in date format, and I already tried changing it to different formats and still does not work.
I would also like to add minor date breaks, but can't seem to get it right.
Here is the code I am using for the chart:
chart9<-ggplot()+
geom_line(data=Flow,
aes(x=date,
color=transporte), stat="count") +
scale_x_date(date_minor_breaks = "1 month",
date_labels = "%Y (%b)")+
labs(color="Type of Flow")+
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021")+
xlab("Date")+
ylab("Number or People")
ggplotly(chart9)
This is the chart plotted
Any help would be greatly appreciated! :)
Looks like the date class gets dropped when using stat="count". Hence, one option to achieve your desired result would be to aggregate your dataset before passing it to ggplot using e.g. dplyr::count(Flow, date, transporte):
Flow <- dplyr::count(Flow, date, transporte, name = "count")
ggplot() +
geom_line(data = Flow, aes(x = date, y = count, color = transporte)) +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly()
A second option which additionally for setting the date format would be make use of the text "aesthetic" and convert your numbers back to proper dates:
library(plotly)
ggplot() +
geom_line(data = Flow, aes(x = date, color = transporte, text = paste(
"count:", ..count..,
"<br>Date: ", format(as.Date(..x.., origin = "1970-01-01"), "%Y (%b)"),
"<br>transporte: ", ..color..
)), stat = "count") +
scale_x_date(
date_minor_breaks = "1 month",
date_labels = "%Y (%b)"
) +
labs(color = "Type of Flow") +
ggtitle("Number of Entrances, by Month and Mode of Transportation, 2017-2021") +
xlab("Date") +
ylab("Number or People")
ggplotly(tooltip = c("text"))
I am doing this graph with this code
ggplot(c_clinicos) +
aes(x = Condición, fill = Estado, weight = Conteo) +
geom_bar() +
scale_fill_manual(values = list(
Ausente = "#FF1100", Presente = "#538FF6")) +
labs(x = "Condición clínica", y = "Nº Personas. ",
title = "Distribución de la presencia por enfermedad", subtitle = "Muestra de 810 pacientes", fill = "Estado:") +
coord_flip() +
theme_linedraw()
I want to get a little square for each bar (blue and red) that tells me how many people there is and a %. I have been trying to use geom_label but I couldn't make that work.
I am using this data:
structure(list(Condición = c("Cianosis Aguda", "Cianosis Aguda",
"Gassping", "Gassping", "FR mayor de 40 o menor de 6 rpm", "FR mayor de 40 o menor de 6 rpm",
"Oliguria que no responde a volumen y uso de diuréticos", "Oliguria que no responde a volumen y uso de diuréticos",
"Transtornos de la coagulación", "Transtornos de la coagulación",
"Pérdida de la conciencia mayor de 12 horas", "Pérdida de la conciencia mayor de 12 horas",
"Pérdida de la conciencia y ausencia de pulso y latidos cardíacos.",
"Pérdida de la conciencia y ausencia de pulso y latidos cardíacos.",
"Stroke", "Stroke", "Parálisis total o convulsiones incontrolables",
"Parálisis total o convulsiones incontrolables", "Ictericia más preeclampsia",
"Ictericia más preeclampsia"), Estado = c("Presente", "Ausente",
"Presente", "Ausente", "Presente", "Ausente", "Presente", "Ausente",
"Presente", "Ausente", "Presente", "Ausente", "Presente", "Ausente",
"Presente", "Ausente", "Presente", "Ausente", "Presente", "Ausente"
), Conteo = c(13, 797, 0, 810, 520, 290, 314, 496, 150, 659,
1, 809, 1, 809, 9, 801, 49, 761, 114, 696)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
Thanks in Advance.
We can calculate the labels that we want to display and use it in geom_label.
library(dplyr)
library(ggplot2)
c_clinicos %>%
group_by(Condición) %>%
mutate(label = sprintf('%d \n(%.2f %%)', Conteo, prop.table(Conteo) * 100),
label = replace(label, Conteo == 0, '')) %>%
ggplot() +
aes(x = Condición, fill = Estado, y = Conteo, label = label) +
geom_col() +
scale_fill_manual(values = list(
Ausente = "#FF1100", Presente = "#538FF6")) +
labs(x = "Condición clínica", y = "Nº Personas. ",
title = "Distribución de la presencia por enfermedad",
subtitle = "Muestra de 810 pacientes", fill = "Estado:") +
geom_label(position=position_stack(vjust=0.5), color = 'white') +
coord_flip() +
theme_linedraw()
Recently I'm using the survival package in R, in order to be able to better measure the waiting time of patients in the Emergency Department of my Hospital, what I have achieved. However, it's complex to show the results and that can be understood by third parties from RStudio, so I'm developing a Shiny App to show the results without having to show the code and be able to modify certain elements quickly
When I create the app, I have the problem that when I request that you build a dataframe with the quantiles, I get the error: "Error in rep: invalid 'times' argument" in R#200. I have reviewed the code on multiple occasions, but I still can't find a solution. I enclose the complete code
Thank you
ui <- fluidPage(
titlePanel("Prototipo Tiempos de Urgencia"),
sidebarLayout(
sidebarPanel(
selectInput(inputId = "triage",
label = "Triage",
choices =c("Todos",listadotriage),
selectize = FALSE,
selected = "Todos"),
selectInput(inputId = "tiempo",
label = "Tiempo a analizar",
choices = c("Diferencia entre Hora de atención y hora de triage" = "60",
"Diferencia ente hora de atencón y hora de admisión" = "61",
"Diferencia ente hora de atencón y hora de alta" = "62",
"Diferencia ente hora de admision y hora de alta" = "63",
"Diferencia ente hora de admision y hora de triage" = "64"),
selectize = FALSE,
selected = "Diferencia entre Hora de atención y hora de triage"),
selectInput(inputId = "atencion",
label = "Área Atención",
choices = c("Todos",listadoatencion),
selectize = FALSE,
selected = "Todos"),
selectInput(inputId = "alta",
label = "Tipo de Alta",
choices = c("General" = "74",
"Hospitalizado" = "72",
"Altas" = "73"),
multiple = FALSE,
selected = "General"),
dateRangeInput(inputId = "fecha",
label = "Intervalo de fechas",
format = "dd-mm-yyyy",
start = "2019-11-01",
end = "2019-11-30",
weekstart = 1,
language= "es",
separator = "a")
),
mainPanel = dataTableOutput(outputId = "tabla1")
))
server <- function(input, output) {
output$tabla1 <- renderDataTable({
if (input$triage=="Todos") {
dat1<-subset(dat1,ifelse(input$atencion!="Todos",dat1$AREA_ATENCION==input$atencion & dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2],
dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2]))
timesevent<-as.integer(input$tiempo)
event<-as.integer(input$alta)
dat3<-dat1[,c(30,timesevent,event)]
gral<-Surv(dat3[[2]] ,event= as.numeric(dat3[[3]]))
fit1a<-survfit(gral ~ Medico, data = dat3)
d1<-as.data.frame(quantile(fit1a,c(0.25,0.5,0.75,0.9,1),conf.int = FALSE)) # estadísticas sobre tiempo de demora según modelo
data1<-as.data.frame(fit1a$strata)
setDT(data1,keep.rownames = TRUE)
data1$rn<-gsub("Medico=","",data1$rn)
colnames(data1)<-c("Médico","Consultas")
setDT(d1,keep.rownames = TRUE)[]
d1$rn<-gsub("Medico=","",d1$rn)
colnames(d1)<-c("Médico","P.25","P.50","P.75","P.90","P.100")
datatotal1<-merge(data1,d1,by="Médico")
datatable(data=datatota1l,caption =paste("Tiempos de atención en Servicio de Urgencia por Médico, por Atención en Servicio",input$atencion,"durante","el período",format(input$fecha[1], format= "%d-%m-%Y"),"a",format(input$fecha[2], format= "%d-%m-%Y"), sep=" "),
rownames = FALSE,extensions= "Buttons",options=list(pageLength=40,dom="Bfrtip", buttons = c("print")) )
} else {
dat1<-subset(dat1,dat1$Triage==input$triage)
dat1<-subset(dat1,ifelse(input$atencion!="Todos",dat1$AREA_ATENCION==input$atencion & dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2],
dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2]))
timesevent<-as.integer(input$tiempo)
event<-as.integer(input$alta)
dat3<-dat1[,c(30,timesevent,event)]
gral<-Surv(dat3[[2]] ,event= as.numeric(dat3[[3]])) ~ Medico
fit1b<-survfit(gral ~ Medico, data = dat3)
d2<-as.data.frame(quantile(fit1b,c(0.25,0.5,0.75,0.9,1),conf.int = FALSE)) # estadísticas sobre tiempo de demora según modelo
data2<-as.data.frame(fit1b$strata)
setDT(data2,keep.rownames = TRUE)
data2$rn<-gsub("Medico=","",data2$rn)
colnames(data2)<-c("Médico","Consultas")
setDT(d2,keep.rownames = TRUE)[]
d2$rn<-gsub("Medico=","",d2$rn)
colnames(d2)<-c("Médico","P.25","P.50","P.75","P.90","P.100")
datatotal2<-merge(data2,d1,by="Médico")
datatable(data=datatotal2,caption =paste("Tiempos de atención en Servicio de Urgencia por Médico, por Categorización",input$triage,"por Atención en Servicio",input$atencion,"durante","el período",format(input$fecha[1], format= "%d-%m-%Y"),"a",format(input$fecha[2], format= "%d-%m-%Y"), sep=" "),
rownames = FALSE,extensions= "Buttons",options=list(pageLength=40,dom="Bfrtip", buttons = c("print")) )
}
})
}
shinyApp(ui = ui, server = server)```
I resolved!!!!
I changed the next lines
dat1<-subset(dat1,ifelse(input$atencion!="Todos",dat1$AREA_ATENCION==input$atencion & dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2],
dat1$fechanormal>=input$fecha[1] & dat1$fechanormal<=input$fecha[2]))
```
for the next lines:
```
dat3<-subset(dat3,dat3$fechanormal>=input$fecha[1] & dat3$fechanormal<=input$fecha[2])
ifelse(input$atencion!="Todos",dat3<-subset(dat3,dat3$AREA_ATENCION==input$atencion),NA)
```
Regards!!!
I am trying to separate (tidyr) line_text into separate words so it's one word per column.
Data:
structure(list(ID = c(140L, 233L, 233L),
pdf_name = structure(c(1L,
2L, 2L), .
Label = c("GBD2016_2_1255_Venezuela_MoH_Epi_2012_9.pdf",
"GBD2016_2_1351_Venezuela_MoH_Epi_2014_44.pdf"),
class = "factor"),
keyword = c("SEGÚN GRUPOS", "SEGÚN GRUPOS", "SEGÚN GRUPOS"
), line_text = list("2000 Gráfico 2 . CASOS DE MALARIA SEGÚN GRUPOS DE EDAD Y SEXO, EPIDEMIOLÓGICA 9 Año 2012",
"GRÁFICO 2. CASOS DE MALARIA SEGÚN GRUPOS DE EDAD Y SEXO, HASTA",
"GRÁFICO 2. CASOS DE SEGÚN GRUPOS"),
.Names = c("ID", "pdf_name", "keyword",
"page_num", "line_num", "line_text", "token_text"), row.names = c(NA,
-3L), class = "data.frame")
Coded used:
numcols<- make.unique(c(rep("word",10, sep = " ")) )
df<- reportdiagn%>%
(separate(reportdiagn$line_text,
into = numcols,
sep = ("")))
I get the following error and can't work out how to fix it.
`Error in UseMethod("separate_") :
no applicable method for 'separate_' applied to an object of class "factor
The data you pasted isn't quite right. Might be good to try that again - but I have tried to reproduce your data anyway. It might not be exactly the same. I have set linetext to be a character string - but I think the code below works in either character or factor.
In select(), you don't need to reference the data frame - the %>% already does that, you just need the name of the variable without quotes. Also, your sep needs to be a space or \\b for a word boundary.
ID <- c(140, 233, 233)
pdf_name <- factor(c(1, 2, 2),
labels = c(
"GBD2016_2_1255_Venezuela_MoH_Epi_2012_9.pdf",
"GBD2016_2_1351_Venezuela_MoH_Epi_2014_44.pdf")
)
keyword <- c("SEGÚN GRUPOS", "SEGÚN GRUPOS", "SEGÚN GRUPOS")
line_text <- c("2000 Gráfico 2 . CASOS DE MALARIA SEGÚN GRUPOS DE EDAD Y SEXO, EPIDEMIOLÓGICA 9 Año 2012",
"GRÁFICO 2. CASOS DE MALARIA SEGÚN GRUPOS DE EDAD Y SEXO, HASTA",
"GRÁFICO 2. CASOS DE SEGÚN GRUPOS.")
reportdiagn <- data.frame(ID, pdf_name, keyword, line_text)
numcols<- make.unique(c(rep("word",10 )) )
df <- reportdiagn %>%
separate(line_text,
into = numcols,
sep = " ")
This produces some NA values where there's less than 10 words and truncates when there's more. I assume you're expecting that.