I am new to R. I found a script online which is used to batch geocode a list of addresses.
http://www.storybench.org/geocode-csv-addresses-r/
However I keep getting this error message 'Error: is.character(location) is not TRUE'...anyone have any ideas on how to reslove the issue??
# Geocoding script for large list of addresses.
# Finbar Gillen 25/07/2018
#load up the ggmap library
install.packages('ggmap')
library(ggmap)
# Select the file from the file chooser
fileToLoad <- file.choose(new = TRUE)
# Read in the CSV data and store it in a variable
origAddress <- read.csv(fileToLoad, stringsAsFactors = FALSE)
# Initialize the data frame
geocoded <- data.frame(stringsAsFactors = FALSE)
# Loop through the addresses to get the latitude and longitude
of each address and add it to the
# origAddress data frame in new columns lat and lon
for(i in 1:nrow(origAddress))
{
# Print("Working...")
result <- geocode(origAddress$addresses[i], output =
"latlona", source = "google")
origAddress$lon[i] <- as.numeric(result[1])
origAddress$lat[i] <- as.numeric(result[2])
origAddress$geoAddress[i] <- as.character(result[3])
}
# Write a CSV file containing origAddress to the working
directory
write.csv(origAddress, "geocoded.csv", row.names=FALSE)
After # Print("Working...")
it shall be name of column of your inputfile/dataframe and not 'addresses'
result <- geocode(origAddress$addresses[i], output =
"latlona", source = "google")
Related
I have 44 doc files. From each file, I need to extract the customer name and amount. I am able to this for one file using the read_document command and using the grep to extract the amount and customer name. When I do this for 44 files, I am getting an error. Not sure where I am wrong:
ls()
rm(list = ls())
files <- list.files("~/experiment", ".doc")
files
length(files)
for (i in length(files)){
library(textreadr)
read_document(files[i])
}
Here is the full code that I run on one file:
file <- "~/customer_full_file.docx"
library(textreadr)
full_customer_file <- read_document(file, skip = 0, remove.empty = TRUE, trim = TRUE)
#checking file is read correctly
head(full_customer_file)
tail(full_customer_file)
# Extracting Name
full_customer_file <- full_customer_file[c(1,4)]
amount_extract <- grep("Amount", full_customer_file, value = T)
library(tm)
require(stringr)
amount_extract_2 <- lapply(amount_extract, stripWhitespace)
amount_extract_2 <- str_remove(marks_extract_2, "Amount")
name_extract <- grep("Customer Name and ID: ", full_customer_file, value = T)
name_extract
name_extract_2 <- lapply(name_extract, stripWhitespace)
name_extract_2 <- str_remove(name_extract_2, "Customer Name and ID: ")
name_extract_2 <- as.data.frame(name_extract_2)
names(name_extract_2)[1] <- paste("customer_full_name")
amount_extract_2 <- as.data.frame(amount_extract_2)
names(amount_extract_2)[1] <- paste("amount")
amount_extract_2
customer_final_file <- cbind(name_extract_2, amount_extract_2)
write.table(customer_final_file, "~/customer_amount.csv", sep = ",", col.names = T, append = T)
Here is the code that I run on 44 file
ls()
rm(list = ls())
files <- list.files("~/experiment", ".doc")
files
length(files)
library(textreadr)
for (i in 1:length(files)){
read_document(files[i])
}
Here is the error that I am getting:
> library(textreadr)
> for (i in 1:length(files)){
+ read_document(files[i])
+ }
Warning messages:
1: In utils::unzip(file, exdir = tmp) :
error 1 in extracting from zip file
2: In utils::unzip(file, exdir = tmp) :
error 1 in extracting from zip file
3: In utils::unzip(file, exdir = tmp) :
error 1 in extracting from zip file
4: In utils::unzip(file, exdir = tmp) :
error 1 in extracting from zip file
5: In utils::unzip(file, exdir = tmp) :
error 1 in extracting from zip file
I could give you my code, which I used to analyze different word files through the sentimentr package in R. I guess you can use the same structure that I have and just change the for in function to loop the extraction for every docx.
And this is the code:
library(sentimentr)
folder_path <- "C:\\Users\\yourname\\Documents\\R\\"
# Get a list of all the docx files in the folder
docx_files <- list.files(path = folder_path, pattern = "\\.docx$", full.names = TRUE)
# Create an empty data frame to store the results
results <- data.frame(file = character(0), sentiment = numeric(0))
# Loop over the list of files
for (file in docx_files) {
# Read the docx file
sample_data <- read_docx(file)
# Extract the content and create a summary
content <- docx_summary(sample_data)
law <- content[sapply(strsplit(as.character(content$text),""),length)>5,]
# Calculate the sentiment of the summary (or in your case extraction)
sentiment <- sentiment_by(as.character(law$text))
# Add a row to the data frame with the results for this file
results <- rbind(results, data.frame(file = file, sentiment = sentiment$ave_sentiment))
}
# View the results data frame
View(results)
I hope that is near enough to your problem to solve it
I ran the code below in R:
CLCLT_Homes <- file.choose(new = TRUE)
origAddress <- read.csv(CLCLT_Homes, header = TRUE, stringsAsFactors = FALSE)
geocoded <- data.frame(stringsAsFactors = FALSE)
for (i in 1:nrow(origAddress))
{
result <- geocode(origAddress$Address[i], output = "latlona", source = "google")
origAddress$lon[1] <- as.numeric(result[1])
origAddress$lat[1] <- as.numeric(result[2])
origAddress$geoAddress[i] <- as.character(result[3])
}
write.csv(origAddress, "where I put the file.csv", row.names = FALSE)
and when I went to look at the file, it had created columns for long and lat for every address, but each address had the exact same longitude and latitude (oddly, except for the address at the very top; it had its own coordinates while all others had different coordinates that matched). Did I forget to include something in the code? Is it only reading the first two lines correctly and then not rotating?
I have collected data of different users' location from twitter. I am trying to plot those data in a map in R. The problem is users have given invalid/incorrect addresses which causes geocode function to fail. How can I avoid this failure? Is there any way to check for this error case and not proceed? For example the user location data is something like this for any file geocode9.csv.
available locations,
Buffalo,
New York,
thsjf,
Washington, USA
Michigan,
nkjnt,
basketball,
ejhrbvw
library(ggmap)
fileToLoad <- file.choose(new = TRUE)
origAddress <- read.csv(fileToLoad, stringsAsFactors = FALSE)
geocoded <- data.frame(stringsAsFactors = FALSE)
for(i in 1:nrow(origAddress))
{
result <- geocode(origAddress$available_locations[i], output = "latlona", source = "google")
origAddress$lon[i] <- as.numeric(result[1])
origAddress$lat[i] <- as.numeric(result[2])
origAddress$geoAddress[i] <- as.character(result[3])
}
write.csv(origAddress, "geocoded.csv", row.names=FALSE)
When the code runs through "thsjf" of the locations list, it throws an error. How can I get past this error? I want something like,
if(false){ # do not run geocode function}
I'm not sure how to geocode those addresses if they are actually wrong. How would the machine even figure it out if it was wrong? I think you need to get the addresses corrected, and THEN geocode everything. Here is some sample code.
#load ggmap
library(ggmap)
startTime <- Sys.time()
# Select the file from the file chooser
fileToLoad <- file.choose(new = TRUE)
# Read in the CSV data and store it in a variable
origAddress <- read.csv(fileToLoad, stringsAsFactors = FALSE)
# Initialize the data frame
geocoded <- data.frame(stringsAsFactors = FALSE)
# Loop through the addresses to get the latitude and longitude of each address and add it to the
# origAddress data frame in new columns lat and lon
for(i in 1:nrow(origAddress))
{
# Print("Working...")
result <- geocode(origAddress$addresses[i], output = "latlona", source = "google")
origAddress$lon[i] <- as.numeric(result[1])
origAddress$lat[i] <- as.numeric(result[2])
origAddress$geoAddress[i] <- as.character(result[3])
}
# Write a CSV file containing origAddress to the working directory
write.csv(origAddress, "geocoded.csv", row.names=FALSE)
endTime <- Sys.time()
processingTime <- endTime - startTime
processingTime
Check this for more info.
http://www.storybench.org/geocode-csv-addresses-r/
I am trying to use ggmap to get the fields in administrative_area_level_3 from the google maps api. The single call returns the correct data. The below code returns a '1' for every entry from administrative_area_level_3.
# Geocoding a csv column of "addresses" in R
#load ggmap
library(ggmap)
# Select the file from the file chooser
fileToLoad <- file.choose(new = TRUE)
# Read in the CSV data and store it in a variable
origAddress <- read.csv(fileToLoad, stringsAsFactors = FALSE)
# Initialize the data frame
geocoded <- data.frame(stringsAsFactors = FALSE)
# Loop through the addresses to get the latitude and longitude of each address and add it to the
# origAddress data frame in new columns lat and lon
for(i in 1:nrow(origAddress))
{
# Print("Working...")
result <- geocode(origAddress$addresses[i], output = "more", source = "google")
origAddress$lon[i] <- as.character(result[1])
origAddress$lat[i] <- as.character(result[2])
origAddress$geoAddress[i] <- as.character(result[5])
origAddress$district[i] <- as.character(result[13])
}
# Write a CSV file containing origAddress to the working directory
write.csv(origAddress, "geocoded.csv", row.names=FALSE)
I modified these lines to get those fields:
origAddress$geoAddress[i] <- as.character(result[5])
origAddress$district[i] <- as.character(result[13])
When I run this I get the correct administrative_area_level_3
adr <- geocode("35880 WIDENER VALLEY RD Glade Spring VA", output = "more", source = "google")
Here is my CSV:
ID,addresses
1,35880 WIDENER VALLEY RD Glade Spring VA
I have 32K lines of addresses for which I have to find long/latitude values.
I'm using the code found here. I'm so very thankful for this person to creating it but I have a question:
I'd like to edit it so that if the loop runs into an issue with the current row's address, it simply states NA in the Lat/Long fields and moves to the next one. Does anyone know how that may be accomplished? The code is below:
# Geocoding a csv column of "addresses" in R
#load ggmap
library(ggmap)
# Select the file from the file chooser
fileToLoad <- file.choose(new = TRUE)
# Read in the CSV data and store it in a variable
origAddress <- read.csv(fileToLoad, stringsAsFactors = FALSE)
# Initialize the data frame
geocoded <- data.frame(stringsAsFactors = FALSE)
# Loop through the addresses to get the latitude and longitude of each address and add it to the
# origAddress data frame in new columns lat and lon
for(i in 1:nrow(origAddress))
{
# Print("Working...")
result <- geocode(origAddress$addresses[i], output = "latlona", source = "google")
origAddress$lon[i] <- as.numeric(result[1])
origAddress$lat[i] <- as.numeric(result[2])
origAddress$geoAddress[i] <- as.character(result[3])
}
# Write a CSV file containing origAddress to the working directory
write.csv(origAddress, "geocoded.csv", row.names=FALSE)
You can use tryCatch() to isolate the geocode warning and return a data.frame with the same structure (lon, lat, address) as geocode() would return.
Your code would then be
# Geocoding a csv column of "addresses" in R
# load ggmap
library(ggmap)
# Select the file from the file chooser
fileToLoad <- file.choose(new = TRUE)
# Read in the CSV data and store it in a variable
origAddress <- read.csv(fileToLoad, stringsAsFactors = FALSE)
# Loop through the addresses to get the latitude and longitude of each address and add it to the
# origAddress data frame in new columns lat and lon
for(i in 1:nrow(origAddress)) {
result <- tryCatch(geocode(origAddress$addresses[i], output = "latlona", source = "google"),
warning = function(w) data.frame(lon = NA, lat = NA, address = NA))
origAddress$lon[i] <- as.numeric(result[1])
origAddress$lat[i] <- as.numeric(result[2])
origAddress$geoAddress[i] <- as.character(result[3])
}
# Write a CSV file containing origAddress to the working directory
write.csv(origAddress, "geocoded.csv", row.names=FALSE)
Alternatively, you can do this faster and more cleanly without the loop and error checking. However, without a reproducible example of your data there is no way to know if this will retain all of the information you need.
# Substituted for for loop
result <- geocode(origAddress$addresses, output = "latlona", source = "google")
origAddress <- cbind(origAddress$addresses, result)