geopy returns latitude and longitude for None - geopy

I'm trying to set my address to None by geopy returns a value for latitude and longitude.
from geopy.geocoders import Nominatim
geolocator = Nominatim()
addr = None
location = geolocator.geocode(addr)
print location.latitude, location.longitude
Result
44.933143 7.540121

Related

How can I use Google Cloud Functions to run a web scraper?

Thanks in advance for your help.
I'm currently running a webscraper - this is the first time I've ever done something like this - It pulls addresses from the URL and then will match the address to the users input. This will be going into a chat bot, I wondering how I can make this run on Google Functions. Whats the process to do this, is there a tutorial anywhere?
This is my code so far. There is a small items file too
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from ..items import DataItem
from fuzzywuzzy import fuzz
from urllib.parse import urljoin
import scrapy
class AddressesSpider(scrapy.Spider):
name = 'Addresses'
allowed_domains = ['find-energy-certificate.service.gov.uk']
postcode = "bh10+4ah"
start_urls = ['https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode=' + postcode]
## def start_requests(self):
## self.first = input("Please enter the address you would like to match: ")
## yield scrapy.Request(url=self.start_urls[0], callback=self.parse)
def parse(self, response):
first = input("Please enter the address you would like to match: ")
highest_ratios = []
highest_item = None
for row in response.xpath('//table[#class="govuk-table"]//tr'):
address = row.xpath("normalize-space(.//a[#class='govuk-link']/text())").extract()[0].lower()
address = address.rsplit(',', 2)[0]
link = row.xpath('.//a[#class="govuk-link"]/#href').extract()
details = row.xpath("normalize-space(.//td/following-sibling::td)").extract()
ratio = fuzz.token_set_ratio(address, first)
item = DataItem()
item['link'] = link
item['details'] = details
item['address'] = address
item['ratioresult'] = ratio
if len(highest_ratios) < 3:
highest_ratios.append(item)
elif ratio > min(highest_ratios, key=lambda x: x['ratioresult'])['ratioresult']:
highest_ratios.remove(min(highest_ratios, key=lambda x: x['ratioresult']))
highest_ratios.append(item)
highest_ratios_100 = [item for item in highest_ratios if item['ratioresult'] == 100]
if highest_ratios_100:
for item in highest_ratios_100:
yield item
else:
yield max(highest_ratios, key=lambda x: x['ratioresult'])
if len(highest_ratios_100) > 1:
for i, item in enumerate(highest_ratios_100):
print(f"{i+1}: {item['address']}")
selected = int(input("Please select the correct address by entering the number corresponding to the address: ")) - 1
selected_item = highest_ratios_100[selected]
else:
selected_item = highest_ratios_100[0] if highest_ratios_100 else max(highest_ratios, key=lambda x: x['ratioresult'])
new_url = selected_item['link'][0]
new_url = str(new_url)
if new_url:
base_url = 'https://find-energy-certificate.service.gov.uk'
print(f'Base URL: {base_url}')
print(f'New URL: {new_url}')
new_url = urljoin(base_url, new_url)
print(f'Combined URL: {new_url}')
yield scrapy.Request(new_url, callback=self.parse_new_page)
def parse_new_page(self, response):
Postcode = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()])').extract()
Town = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()-1])').extract()
First = response.xpath(".//p[#class='epc-address govuk-body']").extract()
Type = response.xpath('normalize-space(//dd[1]/text())').extract_first()
Walls = response.xpath("//th[contains(text(), 'Wall')]/following-sibling::td[1]/text()").extract()
Roof = response.xpath("//th[contains(text(), 'Roof')]/following-sibling::td[1]/text()").extract()
Heating = response.xpath("//th[text()='Main heating']/following-sibling::td[1]/text()").extract_first()
CurrentScore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[1]/text[1]/text()').re_first("[0-9+]{1,2}")
Maxscore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[2]/text[1]/text()').re_first("[0-9+]{2}")
Expiry = response.xpath('normalize-space(//b)').extract_first()
FloorArea = response.xpath('//dt[contains(text(), "floor area")]/following-sibling::dd/text()').re_first("[0-9+]{2,3}")
Steps = response.xpath("//h3[contains(text(),'Step')]/text()").extract()
yield {
'Postcode': Postcode,
'Town': Town,
'First': First,
'Type': Type,
'Walls': Walls,
'Roof': Roof,
'Heating': Heating,
'CurrentScore': CurrentScore,
'Maxscore': Maxscore,
'Expiry': Expiry,
'FloorArea': FloorArea,
'Steps': Steps
}
I've tried googling and having a look around and can't get how to deploy this as a project to run on google functions or can I just copy the code into the console somewhere?
You can try running your spider from a script. However, a better solution would be to wrap scrapy in its own child process.
For example:
from multiprocessing import Process, Queue
from ... import MySpider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
def my_cloud_function(event, context):
def script(queue):
try:
settings = get_project_settings()
settings.setdict({
'LOG_LEVEL': 'ERROR',
'LOG_ENABLED': True,
})
process = CrawlerProcess(settings)
process.crawl(MySpider)
process.start()
queue.put(None)
except Exception as e:
queue.put(e)
queue = Queue()
# wrap the spider in a child process
main_process = Process(target=script, args=(queue,))
main_process.start() # start the process
main_process.join() # block until the spider finishes
result = queue.get() # check the process did not return an error
if result is not None:
raise result
return 'ok'
You can refer to this tutorial for more info.

Extract one band data from Geotiff file and plot colored image based on one band values on folium map

I am new to extracting data from Geotiff files.So this question will sound obvious.I have tiff file which I have got from api.It has 12 bands.This shows definition of what each band contains
What I want
After extenisve searching for days i am only able to do this.
enter image description here
Code which i have tried
driver=gdal.GetDriverByName('GTiFF')
driver.Register()
ds = gdal.Open('...../286d7a11-4abd-449e-856b-79959dfd1396.tif')
if ds is None:
print('Could not open')
geotransform = ds.GetGeoTransform()
proj = ds.GetProjection()
cols = ds.RasterXSize
rows = ds.RasterYSize
xmin=geotransform[0]
ymax=geotransform[3]
xmax=xmin+cols*geotransform[1]
ymin=ymax+rows*geotransform[5]
centerx=(xmin+xmax)/2
centery=(ymin+ymax)/2
bands = ds.RasterCount
band10 = ds.GetRasterBand(10)
array = band10.ReadAsArray()
map= folium.Map(location=[centery, centerx], zoom_start=7,tiles='openstreetmap')
image = folium.raster_layers.ImageOverlay(
dataimage,opacity=0.8, bounds=[[ymin,xmin],[ymax,xmax]]
)
image.add_to(map)
map.fit_bounds(map.get_bounds(), padding=(10, 10))
return map

Asynchronous Loop mixing meta item

I'm trying to scrape the following website:
https://institucional.xpi.com.br/sobre-a-xp/encontre-um-escritorio/
I have a dropdown list to select a state, from that state it gives me a dropdown list from the cities available.
After submitting, I get the list of all offices (address,e-mail,phone number) for that city.
With this code, I'm not getting all the results and also I'm getting repeated city names, it looks like the meta item is mixing from one loop with the other.
I tried to debug, but here is what happens:
I start the first parse function, when I enter the loop for each state, I get the first state ("AC") when I arrive at the yield line, I expected it to go to the parseStates function but it starts the loop again.
The thing is, it doesn't do the whole loop, it loops through the first five states, then it jumps the the parseStates function.
def parse(self, response):
statesList = ["AC","AL","AM","BA","CE","DF","ES","GO","MA","MG","MS","MT","PA","PB","PE","PR","RJ","RN","RO","RS","SC","SE","SP"]
for state in statesList:
linkState = 'https://institucional.xpi.com.br/api/Escritorios/FilialListarCidadesV2?vSiglaEstado=' + state
location = LocationItem()
location['state']=state
yield scrapy.Request(url=linkState, callback=self.parseStates, meta={'item':location})
def parseStates(self,response):
location=response.meta['item']
root = ET.fromstring(response.body)
cityList = [city.text for city in root.iter('{http://schemas.datacontract.org/2004/07/XP.Portal.Entities}Nome')]
for city in cityList:
location['city']=city
state = location['state']
linkCity = 'https://institucional.xpi.com.br/api/Escritorios/FilialListarPorEstadoCidadeV2?vSiglaEstado=' + state + '&vNomeCidade='+city.replace(' ','%20')
yield scrapy.Request(url=linkCity, callback=self.parseCities,meta={'item':location})
def parseCities(self,response):
location = response.meta['item']
state = location['state']
city = location['city']
root = ET.fromstring(response.body)
mailList = [elem.text for elem in root.iter('{http://schemas.datacontract.org/2004/07/XP.Portal.Entities}EmailPadronizadoSocioResponsavel')]
companyList = [elem.text for elem in root.iter('{http://schemas.datacontract.org/2004/07/XP.Portal.Entities}RazaoSocial')]
contactList = [elem.text for elem in root.iter('{http://schemas.datacontract.org/2004/07/XP.Portal.Entities}SocioResponsavel')]
telList = [elem.text for elem in root.iter('{http://schemas.datacontract.org/2004/07/XP.Portal.Entities}Telefone')]
for i in range(len(mailList)):
write(state,city,companyList[i],contactList[i],mailList[i],telList[i])

Python ArcPy - Print Layer with highest field value

I have some python code that goes through layers in my ArcGIS project and prints out the layer names and their corresponding highest value within the field "SUM_USER_VisitCount".
Output Picture
What I want the code to do is only print out the layer name and SUM_USER_VisitCount field value for the one layer with the absolute highest value.
Desired Output
I have been unable to figure out how to achieve this and can't find anything online either. Can someone help me achieve my desired output?
Sorry if the code layout is a little weird. It got messed up when I pasted it into the "code sample"
Here is my code:
import arcpy
import datetime
from datetime import timedelta
import time
#Document Start Time in-order to calculate Run Time
time1 = time.clock()
#assign project and map frame
p =
arcpy.mp.ArcGISProject(r'E:\arcGIS_Shared\Python\CumulativeHeatMaps.aprx')
m = p.listMaps('Map')[0]
Markets = [3000]
### Centers to loop through
CA_Centers = ['Castro', 'ColeValley', 'Excelsior', 'GlenPark',
'LowerPacificHeights', 'Marina', 'NorthBeach', 'RedwoodCity', 'SanBruno',
'DalyCity']
for Market in Markets:
print(Market)
for CA_Center in CA_Centers:
Layers =
m.listLayers("CumulativeSumWithin{0}_{1}_Jun2018".format(Market,CA_Center))
fields = ['SUM_USER_VisitCount']
for Layer in Layers:
print(Layer)
sqlClause = (None, 'ORDER BY ' + 'SUM_USER_VisitCount') # + 'DESC'
with arcpy.da.SearchCursor(in_table = Layer, field_names = fields,
sql_clause = sqlClause) as searchCursor:
print (max(searchCursor))
You can create a dictonary that stores the results from each query and then print out the highest one at the end.
results_dict = {}
for Market in Markets:
print(Market)
for CA_Center in CA_Centers:
Layers =
m.listLayers("CumulativeSumWithin{0}_{1}_Jun2018".format(Market,CA_Center))
fields = ['SUM_USER_VisitCount']
for Layer in Layers:
print(Layer)
sqlClause = (None, 'ORDER BY ' + 'SUM_USER_VisitCount') # + 'DESC'
with arcpy.da.SearchCursor(in_table = Layer, field_names = fields,
sql_clause = sqlClause) as searchCursor:
print (max(searchCursor))
results_dict[Layer] = max(searchCursor)
# get key for dictionary item with the highest value
highest_count_layer = max(results_dict, key=results_dict.get)
print(highest_count_layer)
print(results_dict[highest_count_layer])

How to set a keyword to write fully to the CSV file

This script is working in so far that the output is correct. However it is not populating the CSV file for me. But only populating the last iteration of the loop. Being new to IDL, I need to grasp this concept of the keyword.
I believe I need a keyword, but my attempts of inserting this have all failed.
Can some amend the script so that the csv file populates fully please.
PRO Lat_Lon_Alt_Array
; This program is the extract the Latitute, Longigitude & Altitute
; with the Site name and file code.
; The purpose is to output the above dimensions from the station files
; into a csv file.
COMPILE_OPt IDL2
the_file_list = file_search('D:/Rwork/Project/25_Files/','*.nc')
FOR filein = 0, N_ElEMENTS (the_file_list)-1 DO BEGIN
station = NCDF_OPEN(the_file_list[filein])
NCDF_VARGET, station, 'station_name', St_Name
NCDF_VARGET, station, 'lat', latitude
NCDF_VARGET, station, 'lon', longitude
NCDF_VARGET, station, 'alt', height
latitude=REFORM(latitude,1)
longitude=REFORM(longitude,1)
height=REFORM(height,1)
Print,the_file_list[filein]
Print, 'name'
Print, St_Name
Print,'lat'
Print,latitude
Print,'lon'
print,longitude
Print,'alt'
Print,height
; Add each station data to the file
WRITE_CSV, 'LatLon.csv', the_file_list[filein],latitude,longitude,height
ENDFOR
RETURN
END
WRITE_CSV overwrites the file every time it is called, hence you only ever see the last entry.
Create arrays to hold all the values before the for loop:
n_files = N_ElEMENTS(the_file_list)
latitude_arr = DBLARR(n_files) ; Assuming type is double
longitude_arr = DBLARR(n_files)
height_arr = DBLARR(n_files)
In your for loop fill them with:
latitude_arr[filein] = latitude
longitude_arr[filein] = longitude
height_arr[filein] = height
Then after the for loop, write them with:
WRITE_CSV, 'LatLon.csv', the_file_list, latitude_arr, longitude_arr, height_arr

Resources