Converting lat/long into correct format - r

I have some data which looks like:
long_bnk lat_bnk
[1,] "3<U+00B0> 52' 30.1\" W" "40<U+00B0> 44' 3.7\" N"
[2,] "2<U+00B0> 44' 54.4\" E" "42<U+00B0> 7' 18.1\" N"
[3,] NA NA
[4,] "2<U+00B0> 7' 54.2\" E" "41<U+00B0> 31' 21.9\" N"
[5,] "0<U+00B0> 1' 54.5\" W" "39<U+00B0> 58' 59.3\" N"
[6,] "3<U+00B0> 41' 15.5\" W" "40<U+00B0> 27' 47.2\" N"
I am trying to put the data into the correct lat/long format. I am running the following:
pts_bnk[pts_bnk==""] <- NA
pts_bnk <- pts_bnk[complete.cases(pts_bnk),]
pts_bnk <- matrix(as.numeric(sp::char2dms(as.vector(pts_bnk), "°")), ncol=2)
However, I keep getting:
Error in if (any(abs(object#deg) > 90)) return("abs(degree) > 90") :
missing value where TRUE/FALSE needed In addition: Warning message: In
asMethod(object) : NAs introduced by coercion
Where am I going wrong in the conversion to the correct lat/long format?
Data:
pts_bnk <- structure(c("3<U+00B0> 52' 30.1\" W", "2<U+00B0> 44' 54.4\" E",
NA, "2<U+00B0> 7' 54.2\" E", "0<U+00B0> 1' 54.5\" W", "3<U+00B0> 41' 15.5\" W",
"40<U+00B0> 44' 3.7\" N", "42<U+00B0> 7' 18.1\" N", NA, "41<U+00B0> 31' 21.9\" N",
"39<U+00B0> 58' 59.3\" N", "40<U+00B0> 27' 47.2\" N"), .Dim = c(6L,
2L), .Dimnames = list(NULL, c("long_bnk", "lat_bnk")))
EDIT:
Essentially I would like to plot the data using:
library(ggrepel)
library(ggmap)
register_google(key = "MyKey")
spain <- get_map("Spain", zoom = 6)
ggmap(spain, extent = "normal") +
geom_point()
EDIT 2:
The original data I had (which works) was the following:
dms_lat <- readLines(n=5)
1 40° 25' 35.8" N
2 40° 26' 28.4" N
3 40° 28' 39.8" N
4
5 38° 59' 15.0" N
dms_long <-readLines(n=5)
1 3° 41' 19.9" W
2 3° 47' 42.2" W
3 3° 41' 11.7" W
4
5 3° 55' 29.6" W
pts <- cbind(dms_long, dms_lat)
pts <- sub("^\\d+\\s+", "", pts)
pts[pts==""] <- NA
pts <- pts[complete.cases(pts),]
pts <- matrix(as.numeric(sp::char2dms(as.vector(pts), "°")), ncol=2)
library(rworldmap)
plot(subset(getMap(resolution = "low"), NAME=="Spain"))
points(pts[,1], pts[,2], col = "red", pch=3, cex = 0.6)
The current data I have (which does not work) is:
x <- structure(c("3<U+00B0> 52' 30.1\" W", "2<U+00B0> 44' 54.4\" E",
NA, "2<U+00B0> 7' 54.2\" E", "0<U+00B0> 1' 54.5\" W", "3<U+00B0> 41' 15.5\" W",
"40<U+00B0> 44' 3.7\" N", "42<U+00B0> 7' 18.1\" N", NA, "41<U+00B0> 31' 21.9\" N",
"39<U+00B0> 58' 59.3\" N", "40<U+00B0> 27' 47.2\" N"), .Dim = c(6L,
2L), .Dimnames = list(NULL, c("long_bnk", "lat_bnk")))
x %>%
data.frame() %>%
mutate(
lat = sub("<U\\+00B0>", "\u00B0", lat_bnk),
long = sub("<U\\+00B0>", "\u00B0", long_bnk)
) %>%
select(lat, long) %>%
drop_na()
I am trying to make this second data be equal to the first so it can be plotted using ggmap().

Let me know if this works for you based on our comments:
library(rworldmap)
library(sp)
library(dplyr)
pts <- x %>%
data.frame() %>%
mutate(
lat = sub("<U\\+00B0>", "d", lat_bnk),
long = sub("<U\\+00B0>", "d", long_bnk)
) %>%
select(lat, long) %>%
drop_na()
pts_long <- as.numeric(char2dms(pts[["long"]]))
pts_lat <- as.numeric(char2dms(pts[["lat"]]))
plot(subset(getMap(resolution = "low"), NAME=="Spain"))
points(pts_long, pts_lat, col = "red", pch=3, cex = 0.6)
Note that the char2dms is used as follows:
char2dms(from, chd = "d", chm = "'", chs = "\"")
where the default for degree character terminator is the letter d (an alternative to the degree symbol, if chd not specified).
Using ggmap you can pass longitude and latitude in geom_point:
library(ggrepel)
library(ggmap)
library(ggthemes)
pts_data <- data.frame(pts_long, pts_lat)
# Note requires Google key
spain <- ggmap::get_map("Madrid, Spain", zoom = 6)
ggmap(spain, extent = "normal") +
geom_point(data = pts_data, aes(x = pts_long, y = pts_lat)) +
theme_map()

We can add mutate_at at the end
library(dplyr)
x %>%
data.frame() %>%
mutate(
lat = sub("<U\\+00B0>", "\u00B0", lat_bnk),
long = sub("<U\\+00B0>", "\u00B0", long_bnk)
) %>%
select(lat, long) %>%
drop_na()%>%
mutate_at(vars(matches('^(lat|long)')), ~ as.numeric(sp::char2dms(., "°")))

Related

Is there a way to create a network of word associations using a bi-partite network analysis in R?

I have a text file with words from historical accounts and I want to visualise the species and frequency of words associated with them.
So far I have tried using the following code with a txt file of all the historical documents in one doc but want to ask if there is specific formatting of a csv to then input into R for a bipartite network graph:
"""library(ggraph)
library(ggplot2)
library(dplyr)
library(pdftools)
library(tm)
library(readtext)
library(tidytext)
library(igraph)
library(tidyr)
library(FactoMineR)
library(factoextra)
library(flextable)
library(GGally)
library(ggdendro)
library(network)
library(Matrix)
library(quanteda)
library(stringr)
library(quanteda.textstats)
options(stringsAsFactors = F)
options(scipen = 999)
options(max.print=1000)
# Read in text--------
wordbase <- readtext("mq_bird_stories.txt")
# List of extra words to remove---------
extrawords <- c("the", "can", "get", "Ccchants", "make", "making", "house", "torn", "tree", "man", "however", "upon", "instructs", "wife", "coming","without", "mother", "versions","variant", "version", "thus", "got","throws", "are", "has", "already", "asks", "sacra", "can", "brings", "one", "look", "sees", "tonaheiee", "wants", "later",
"dont", "even", "may", "but", "will", "turn", "sing", "swallows", "alba", "gives", "find", "other","tonaheieee", "away","day","comes","another",
"much", "first", "but", "see", "new", "back","goes", "go","songs", "returns", "take","takes","come",
"many", "less", "now", "well", "taught", "like", "puts", "slits", "sends", "tell","tells","open","mentions",
"often", "every", "said", "two", "and", "handsome", "husband", "bring", "lives","gets", "von", "den", "steinen", "handy")
# Clean the data-------
darwin <- wordbase %>%
paste0(collapse = " ") %>%
stringr::str_squish() %>%
stringr::str_remove_all("\\(") %>%
stringr::str_remove_all("\\)") %>%
stringr::str_remove_all("!") %>%
stringr::str_remove_all(",") %>%
stringr::str_remove_all(";") %>%
stringr::str_remove_all("\\?") %>%
stringr::str_split(fixed(".")) %>%
unlist() %>%
tm :: removeWords(extrawords) %>%
paste0(collapse = " ")
# One method for calculating frequencies of bigrams------
# Process into a table of words
darwin_split <- darwin %>%
as_tibble() %>%
tidytext::unnest_tokens(words, value)
# Create data frame of bigrams-------
darwin_words <- darwin_split %>%
dplyr::rename(word1 = words) %>%
dplyr::mutate(word2 = c(word1[2:length(word1)], NA)) %>%
na.omit()
# Calculate frequency of bigrams-----
darwin2grams <- darwin_words %>%
dplyr::mutate(bigram = paste(word1, word2, sep = " ")) %>%
dplyr::group_by(bigram) %>%
dplyr::summarise(frequency = n()) %>%
dplyr::arrange(-frequency)
# Define stopwords
stps <- paste0(tm::stopwords(kind = "en"), collapse = "\\b|\\b")
# Remove stopwords from bigram table
darwin2grams_clean <- darwin2grams %>%
dplyr::filter(!str_detect(bigram, stps))
# Another method for calculating frequencies of bigrams
# Clean corpus
darwin_clean <- darwin %>%
stringr::str_to_title()
# Tokenize corpus----
darwin_tokzd <- quanteda::tokens(darwin_clean)
# Extract bigrams------
BiGrams <- darwin_tokzd %>%
quanteda::tokens_remove(stopwords("en")) %>%
quanteda::tokens_select(pattern = "^[A-Z]",
valuetype = "regex",
case_insensitive = FALSE,
padding = TRUE) %>%
quanteda.textstats::textstat_collocations(min_count = 1, tolower = FALSE)
# read in and process text
darwinsentences <- darwin %>%
stringr::str_squish() %>%
tokenizers::tokenize_sentences(.) %>%
unlist() %>%
stringr::str_remove_all("- ") %>%
stringr::str_replace_all("\\W", " ") %>%
stringr::str_squish()
# inspect data
head(darwinsentences)
darwincorpus <- Corpus(VectorSource(darwinsentences))
# clean corpus-----
darwincorpusclean <- darwincorpus %>%
tm::tm_map(removeNumbers) %>%
tm::tm_map(tolower) %>%
tm::tm_map(removeWords, stopwords()) %>%
tm::tm_map(removeWords, extrawords)
# create document term matrix
darwindtm <- DocumentTermMatrix(darwincorpusclean, control=list(bounds = list(global=c(1, Inf)), weighting = weightBin))
# convert dtm into sparse matrix
darwinsdtm <- Matrix::sparseMatrix(i = darwindtm$i, j = darwindtm$j,
x = darwindtm$v,
dims = c(darwindtm$nrow, darwindtm$ncol),
dimnames = dimnames(darwindtm))
# calculate co-occurrence counts
coocurrences <- t(darwinsdtm) %*% darwinsdtm
# convert into matrix
collocates <- as.matrix(coocurrences)
# inspect size of matrix
ncol(collocates)
#provide some summary stats
summary(rowSums(collocates))
#visualising collocations
# load function for co-occurrence calculation
source("https://slcladal.github.io/rscripts/calculateCoocStatistics.R")
# define term
coocTerm <- "pigeon"
# calculate co-occurrence statistics
coocs <- calculateCoocStatistics(coocTerm, darwinsdtm, measure="LOGLIK")
# inspect results
coocs[1:50]
coocdf <- coocs %>%
as.data.frame() %>%
dplyr::mutate(CollStrength = coocs,
Term = names(coocs)) %>%
dplyr::filter(CollStrength > 0)
###Make graph - visualize association strengths------
ggplot(coocdf, aes(x = reorder(Term, CollStrength, mean), y = CollStrength)) +
geom_point() +
coord_flip() +
theme_bw() +
labs(y = "")
##network
net = network::network(collocates_redux,
directed = FALSE,
ignore.eval = FALSE,
names.eval = "weights")
# vertex names
network.vertex.names(net) = rownames(collocates_redux)
# inspect object
net
ggnet2(net,label = TRUE,
label.size = 4,
alpha = 0.2,
size.cut = 3,
edge.alpha = 0.3) +
guides(color = FALSE, size = FALSE)"""
I'd suggest taking a look at the netCoin package. If you can transform your data into nodes and links data frames, then you can easily get a high quality network visualization:
#Example of links data frame
links <-
data.frame(
matrix(
c(
"Person A","Account 1", "not link",
"Person A","Account 2", "link",
"Person B","Account 2", "link",
"Person B","Account 3", "not link",
"Person B","Account 4", "link",
"Person C","Account 4", "link"
),
nrow = 6,
ncol = 3,
byrow = TRUE,
dimnames = list(NULL,
c("Source", "Target", "other_links_column"))
),
stringsAsFactors = FALSE
)
#Example of nodes data frame
nodes <-
data.frame(
matrix(
c(
"Person A","person",
"Person B","person",
"Person C","person",
"Account 1", "account",
"Account 2", "account",
"Account 3", "account",
"Account 4", "account"
),
nrow = 7,
ncol = 2,
byrow = TRUE,
dimnames = list(NULL,
c("name", "other_nodes_column"))
),
stringsAsFactors = FALSE
)
install.packages("netCoin") #may need to install the netCoin package
library(netCoin)
?netCoin #displays netCoin Help to see all the function options
graph_df <- netCoin(nodes = nodes, #Data frame of unique nodes and their attributes #Must contain name column
links = links, #Data frame of links and their attributes #Must contain Source and Target columns
cex = 1.25, #Font size
color = "other_nodes_column", #Column in node data frame to determine node color
shape = "other_nodes_column", #Column in node data frame to determine node shape
main = "This is the title of my visualization", #Visualization title
controls = 1:5, #Controls that will be shown in the visualization (maximum of 5)
dir = "folder-with-viz-output") #Output folder for the visualization #Entire folder should be exported as a zip file
plot(graph_df) #Command to display the visualization

Scatterpie pie plot: circles not properly positioned over map

I am trying to create a map where I show the amount and category of Exports in every European country, using a scatterpie plot. This is the data I am trying to represent:
Country A B C D E F G Total
1 FR 48208727011 129696846358 34574295963 99154544367 87056475894 104059261659 391086898 50.3141238
2 BE 30008344795 130642251666 27315419464 48966420544 51351672841 57686707705 875915760 34.6846733
3 NL 53815652300 126965690773 52604259051 164935573324 43089183110 79607329056 516212340 52.1533900
4 DE 79643366705 285793555191 66579801287 230961697801 160598853461 167790359814 13590821673 100.4958456
5 IT 35306881277 124880125091 31042897909 65051137874 44481779280 65707113992 307508636 36.6777444
6 UK 4190569134 14226329164 4343541388 8299777138 7863823675 8191378024 177728913 4.7293147
7 IE 8049979989 25547263228 3324685081 15609577840 18293778082 13299495081 284077060 8.4408856
8 DK 10844794488 22366273732 3669934507 20904821209 8871184551 17364886109 1104100358 8.5125995
9 EL 5548998459 14199041489 9684405892 6969942717 2877634605 8740624663 9513713 4.8030162
10 PT 9302893141 19921174761 5742487970 12183620710 9794141959 10889202370 59025653 6.7892547
11 ES 29087706350 79136960848 26777114009 45807156391 43316950993 54577475375 225619825 27.8928984
12 LU 2103037221 5485541709 1274451840 3165573258 3448812873 2685200517 23828895 1.8186446
13 SE 14297019504 32367817406 10023929115 31082425639 18504243058 21520786963 251825497 12.8048047
14 FI 4368941438 17924135085 6424290821 13268574752 7679357024 7759601514 87932852 5.7512833
15 AT 11108739001 47969735941 8282060600 36180768764 20761302493 26060191499 319396555 15.0682195
16 MT 529547453 748570490 789405002 772157398 939286493 808546088 1179489 0.4588692
17 EE 1387220092 4797469841 1253135597 3127037067 1483571375 2251847940 315884341 1.4616166
18 LV 2714038229 4237027490 958962478 3158721396 1479290893 2931423023 89667330 1.5569131
19 LT 3408636288 8761053696 3263941940 5534705815 2630113004 4477086678 348351748 2.8423889
20 PL 17264039729 70678231411 11723435712 53284056901 28214023352 41438947683 319384835 22.2922120
21 CZ 7664643659 38573705210 5359209173 54059163460 20745595183 22423687496 216009863 14.9042014
22 SK 4193310193 17229538594 3771900263 19251595573 18415022178 10092362707 163300267 7.3117030
23 HU 5067726212 26282833327 5807291521 31406620462 16576651093 12918544146 456905984 9.8516573
24 RO 7210065674 24768518425 3986448288 20279628790 10274528929 13490373296 213856837 8.0223420
25 BG 3364866564 11098005470 2490021719 5767532283 2282959524 4540599434 289425842 2.9833411
26 SI 2226481542 11769625979 2186097710 5986840366 6169533307 8453642146 32927930 3.6825149
27 HR 2664219116 7204053277 2281750708 4155735739 2094082503 4970586651 14826478 2.3385254
28 CY 847756088 1467939342 983937418 824244195 1900124484 1375465594 47109886 0.7446577
Using the following code:
library(giscoR)
borders <- gisco_get_countries(
epsg = "3035",
year = "2020",
resolution = "3",
country = idf$Country
)
merged <- merge(borders,
idf,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
library(tidyverse)
symbol_pos <- st_centroid(merged, of_largest_polygon = TRUE)
separate_coords = symbol_pos %>% mutate(lat = unlist(map(symbol_pos$geometry, 1)), long = unlist(map(symbol_pos$geometry, 2)))
sympos = data.frame(Country = separate_coords$CNTR_ID, lat = separate_coords$lat, long = separate_coords$long)
merged <- merge(merged,
sympos,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
ggplot() +
geom_sf(data = merged, size = 0.1) +
geom_scatterpie(data = merged, aes(x = long, y = lat, r = Total), cols = LETTERS[1:7])+
coord_sf(xlim = c(2377294, 6500000), ylim = c(1413597, 5228510))
And it gives me this error:
Error in rowSums(data[, cols]) : 'x' must be numeric
I am trying to create a map similar to this one:
And I would be grateful if someone can provide some hint as to how to fix the error. Thanks.
Edit: below is the dput(idf) output:
structure(list(Country = c("FR", "BE", "NL", "DE", "IT", "UK",
"IE", "DK", "EL", "PT", "ES", "LU", "SE", "FI", "AT", "MT", "EE",
"LV", "LT", "PL", "CZ", "SK", "HU", "RO", "BG", "SI", "HR", "CY"
), A = c(48208727011, 30008344795, 53815652300, 79643366705,
35306881277, 4190569134, 8049979989, 10844794488, 5548998459,
9302893141, 29087706350, 2103037221, 14297019504, 4368941438,
11108739001, 529547453, 1387220092, 2714038229, 3408636288,
17264039729,
7664643659, 4193310193, 5067726212, 7210065674, 3364866564,
2226481542,
2664219116, 847756088), B = c(129696846358, 130642251666,
126965690773,
285793555191, 124880125091, 14226329164, 25547263228,
22366273732,
14199041489, 19921174761, 79136960848, 5485541709, 32367817406,
17924135085, 47969735941, 748570490, 4797469841, 4237027490,
8761053696, 70678231411, 38573705210, 17229538594, 26282833327,
24768518425, 11098005470, 11769625979, 7204053277, 1467939342
), C = c(34574295963, 27315419464, 52604259051, 66579801287,
31042897909, 4343541388, 3324685081, 3669934507, 9684405892,
5742487970, 26777114009, 1274451840, 10023929115, 6424290821,
8282060600, 789405002, 1253135597, 958962478, 3263941940,
11723435712,
5359209173, 3771900263, 5807291521, 3986448288, 2490021719,
2186097710,
2281750708, 983937418), D = c(99154544367, 48966420544,
164935573324,
230961697801, 65051137874, 8299777138, 15609577840, 20904821209,
6969942717, 12183620710, 45807156391, 3165573258, 31082425639,
13268574752, 36180768764, 772157398, 3127037067, 3158721396,
5534705815, 53284056901, 54059163460, 19251595573, 31406620462,
20279628790, 5767532283, 5986840366, 4155735739, 824244195),
E = c(87056475894, 51351672841, 43089183110, 160598853461,
44481779280, 7863823675, 18293778082, 8871184551, 2877634605,
9794141959, 43316950993, 3448812873, 18504243058, 7679357024,
20761302493, 939286493, 1483571375, 1479290893, 2630113004,
28214023352, 20745595183, 18415022178, 16576651093, 10274528929,
2282959524, 6169533307, 2094082503, 1900124484), F =
c(104059261659,
57686707705, 79607329056, 167790359814, 65707113992, 8191378024,
13299495081, 17364886109, 8740624663, 10889202370, 54577475375,
2685200517, 21520786963, 7759601514, 26060191499, 808546088,
2251847940, 2931423023, 4477086678, 41438947683, 22423687496,
10092362707, 12918544146, 13490373296, 4540599434, 8453642146,
4970586651, 1375465594), G = c(391086898, 875915760, 516212340,
13590821673, 307508636, 177728913, 284077060, 1104100358,
9513713, 59025653, 225619825, 23828895, 251825497, 87932852,
319396555, 1179489, 315884341, 89667330, 348351748, 319384835,
216009863, 163300267, 456905984, 213856837, 289425842, 32927930,
14826478, 47109886), Total = c(50.314123815, 34.6846732775,
52.1533899954, 100.4958455932, 36.6777444059, 4.7293147436,
8.4408856361, 8.5125994954, 4.8030161538, 6.7892546564,
27.8928983791,
1.8186446313, 12.8048047182, 5.7512833486, 15.0682194853,
0.4588692413, 1.4616166253, 1.5569130839, 2.8423889169,
22.2922119623,
14.9042014044, 7.3117029775, 9.8516572745, 8.0223420239,
2.9833410836, 3.682514898, 2.3385254472, 0.7446577007)),
row.names = c(NA,
-28L), class = "data.frame")
Please find below one possible solution to your request. The main problem was that geom_scatterpie() expects a dataframe and not an sf object. So you need to use as.data.frame() inside geom_scatterpie(). I also took the opportunity to simplify your code a bit.
Reprex
Code
library(giscoR)
library(sf)
library(dplyr)
library(ggplot2)
library(scatterpie)
borders <- gisco_get_countries(
epsg = "3035",
year = "2020",
resolution = "3",
country = idf$Country
)
merged <- merge(borders,
idf,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
symbol_pos <- st_centroid(merged, of_largest_polygon = TRUE)
sympos <- symbol_pos %>%
st_drop_geometry() %>%
as.data.frame() %>%
cbind(., symbol_pos %>% st_coordinates()) %>%
select(CNTR_ID, X, Y) %>%
rename(Country = CNTR_ID, long = X, lat = Y)
merged <- merge(merged,
sympos,
by.x = "CNTR_ID",
by.y = "Country",
all.x = TRUE
)
Visualization
ggplot() +
geom_sf(data = merged, size = 0.1) +
geom_scatterpie(data = as.data.frame(merged), aes(x = long, y = lat, r = Total*2200), cols = LETTERS[1:7]) +
coord_sf(xlim = c(2377294, 6500000), ylim = c(1413597, 5228510))
Created on 2022-01-23 by the reprex package (v2.0.1)

Converting coordinates from degree with unconventional format to decimal degree

I am trying to convert my data so that it can be plotting on a map. For example the data looks like:
# A tibble: 2 x 2
Latitud Longitud
<chr> <chr>
1 10º 35' 28.98'' N 3º 41' 33.91'' O
2 10º 35' 12.63'' N 3º 45' 46.22'' O
I am trying to mutate it using the following:
df %>%
mutate(
Latitud = str_replace_all(Latitud, "''", ""),
lat_edit = sp::char2dms(Latitud), "°")
Which returns and error:
Error in if (any(abs(object#deg) > 90)) return("abs(degree) > 90") :
missing value where TRUE/FALSE needed
In addition: Warning message:
In asMethod(object) : NAs introduced by coercion
I would like to plot these two points on a map in ggplot (or another spatial package)
Data:
structure(list(Latitud = c("40º 25' 25.98'' N", "40º 25' 17.63'' N"
), Longitud = c("3º 42' 43.91'' O", "3º 40' 56.22'' O")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -2L))
You can use the following custom function (I am assuming N, S, W, E. Not sure what O means in longitude):
angle2dec <- function(angle) {
angle <- as.character(angle)
angle <- ifelse(grepl("S|W", angle), paste0("-", angle), angle)
angle <- trimws(gsub("[^- +.0-9]", "", angle))
x <- do.call(rbind, strsplit(angle, split=' '))
x <- apply(x, 1L, function(y) {
y <- as.numeric(y)
(abs(y[1]) + y[2]/60 + y[3]/3600) * sign(y[1])
})
return(x)
}
Applying on the data:
df1[] <- lapply(df1, angle2dec)
df1
#> Latitud Longitud
#> 1 -40.42388 3.712197
#> 2 40.42156 -3.682283
Plotting:
library(ggplot2)
ggplot(df1, aes(x = Longitud, y = Latitud)) +
geom_point()
Slightly Modified Data to Show for Different Hemispheres:
df1 <- structure(list(Latitud = c("40<U+623C><U+3E61> 25' 25.98'' S",
"40<U+623C><U+3E61> 25' 17.63'' N"),
Longitud = c("3<U+623C><U+3E61> 42' 43.91'' E",
"3<U+623C><U+3E61> 40' 56.22'' W")),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -2L))
In reference to Converting geo coordinates from degree to decimal .
I'll preface this by saying I hadn't used char2dms until right now, so there may be intricacies I missed (such as my question above about "O" as a direction). Looking at the docs and examples, you need to give the characters used to demarcate degrees, minutes, and seconds. In your case, these are "º", "'", and "''", respectively. I skipped the step of removing the third of these, because it's necessary to see where the seconds are written. (Update: added a step to replace the regex "O$" (oeste) with "W" (west)). That gets you what's below:
library(dplyr)
library(ggplot2)
library(sp)
dat <- structure(list(Latitud = c("40º 25' 25.98'' N", "40º 25' 17.63'' N"
), Longitud = c("3º 42' 43.91'' O", "3º 40' 56.22'' O")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -2L)) %>%
mutate_at(vars(Latitud, Longitud), stringr::str_replace_all, "O$", "W")
char2dms(dat$Latitud, chd = "º", chm = "'", chs = "''")
#> [1] 40d25'25.98"N 40d25'17.63"N
This is a DMS S3 object, not a vector (here's where my knowledge of this ends), so you can't put it directly into the data frame columns. Instead, convert to a numeric vector, and you've got numeric coordinates in your data frame.
dat_numeric <- dat %>%
mutate(lat_edit = as.numeric(char2dms(dat$Latitud, chd = "º", chm = "'", chs = "''")),
lon_edit = as.numeric(char2dms(dat$Longitud, chd = "º", chm = "'", chs = "''")))
dat_numeric
#> # A tibble: 2 x 4
#> Latitud Longitud lat_edit lon_edit
#> <chr> <chr> <dbl> <dbl>
#> 1 40º 25' 25.98'' N 3º 42' 43.91'' W 40.4 -3.71
#> 2 40º 25' 17.63'' N 3º 40' 56.22'' W 40.4 -3.68
Plot like normal numbers:
ggplot(dat_numeric, aes(x = lon_edit, y = lat_edit)) +
geom_point()
Or convert to an sf object and plot with the appropriate aspect ratio, projection, etc.
sf::st_as_sf(dat_numeric, coords = c("lon_edit", "lat_edit")) %>%
ggplot() +
geom_sf()

using duplicate factor to plot using ggplot2

I am trying to plot a ggplot_dumbbell with the following code:
library(ggplot2)
library(ggalt)
theme_set(theme_classic())
df_senPhi <- structure(list(phi = c(0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 0.9, 1), W = c(7833625.7334, 8291583.0188, 8762978.0131,
8169317.158, 8460793.8918, 8765222.8718, 8266025.5499, 8311199.2075,
8265304.816, 8289392.5799, 8273733.0523, 8284554.5615), Type = c("A, B, C",
"A, B, C", "A, B, C", "D, E", "D, E", "D, E", "F, G", "F, G",
"H, I", "H, I", "I, J", "I, J"), pChange = c(-0.0533144181552553,
0.00202924695507283, 0.0589968453118437, -0.0127464560859453,
0.0224782062508261, 0.0592681341679742, -0.00105934677399903,
0.00439984310620854, -0.00114644672167306, 0.00176453467558519,
-0.000127903066776307, 0.00117986514708678)), class = "data.frame", row.names = c(NA,
-12L), .Names = c("phi", "W", "Type", "pChange"))
df_senPhi$phi <- factor(df_senPhi$phi, levels=as.character(df_senPhi$phi)) # for right ordering of the dumbells
gg <- ggplot(df_senPhi, aes(x=0, xend=pChange, y=phi, color = Type)) +
geom_dumbbell(#colour="#a3c4dc",
size=0.75,
colour_xend="#0e668b") +
scale_x_continuous(label=scales::percent)
plot(gg)
If you run this code, you will get a warning saying "duplicate levels in factors are deprecated".
If you look closely in the df_senPhi you can see 12 records. However while plotting, only 11 records are plotted. Also the 10th and the 11th records have the same phi value in the data frame which are associated in to the same level. That is also causing the overlapping of the two phi bars in the plot (probably that's why I'm seeing only 11 dumbbells).
I want all 12 records to be plotted such that the second 0.9 phi's dumbbell appears just above the first just like they were two different values.
Is there a way to achieve this ?
used a bit of dplyr
but it seems to get what you are looking for
df_senPhi %>%
mutate(row = 1:n()) %>%
ggplot(aes(0, row, color = Type)) +
geom_dumbbell(aes(xend = pChange)) +
scale_y_continuous(labels = factor(df_senPhi$phi),
breaks = 1:12)

R barplot - keep same colours after sorting

I want to plot percentages for 3 variables (a,b,c) one after the others. So I have a matrix (%) for a set of activities for variable a, b and c.
dta = structure(c(0.0073, 0.1467, 0.0111, 0.0294, 0.0451, 0.0031, 0.1823,
0.0452, 0.2212, 0.1123, 7e-04, 0.1138, 0.0723, 0.1649, 0.0634),
.Dim = c(5L, 3L),
.Dimnames = list(c("c Work", "e Travel/Commute",
"f Cooking", "g Housework", "h Odd jobs"),
c("a", "b", "c")))
However, I would like to plot each variables sorted and but keeping the same colours for the set of activities.
So this is the colours of the activities.
library(RColorBrewer)
rc = c(brewer.pal(n = 5, name = 'Set2'))
kol = list()
kol$act <- c("c Work", "e Travel/Commute", "f Cooking", "g Housework", "h Odd jobs" )
kol$colours <- rc
kol = as.data.frame(kol)
act colours
1 c Work #66C2A5
2 e Travel/Commute #FC8D62
3 f Cooking #8DA0CB
4 g Housework #E78AC3
5 h Odd jobs #A6D854
So here are my barplots
par(mfrow = c(2,2))
barplot(dta[,1], horiz = T, las = 2, col = kol$colours)
barplot(dta[,2], horiz = T, las = 2, col = kol$colours)
barplot(dta[,3], horiz = T, las = 2, col = kol$colours)
So I want is to sort by keep the same colours for the activities
par(mfrow = c(2,2))
barplot(sort(dta[,1]), horiz = T, las = 2)
barplot(sort(dta[,2]), horiz = T, las = 2)
barplot(sort(dta[,3]), horiz = T, las = 2)
How can I make it "match" ?
You can use the function match to match the names of the "entities" and the desired colours, for example, for the first column:
kol$colours[match(names(sort(dta[,1])), kol$act)]
so, to obtain your barplot, just do:
par(mfrow = c(2,2), mar=c(5, 8, 4, 1)) # also modifying the margins to make the names fit in
for (i in 1:3) {
barplot(sort(dta[,i]), horiz = T, las = 2, col=kol$colours[match(names(sort(dta[, i])), kol$act)])
}

Resources