You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
200 lines
4.9 KiB
200 lines
4.9 KiB
![]()
1 year ago
|
import geocoder
|
||
|
import re
|
||
|
import json
|
||
|
|
||
|
|
||
|
def parse_number (chars):
|
||
|
if not chars:
|
||
|
return ""
|
||
|
|
||
|
chars = chars.lower()
|
||
|
try:
|
||
|
return int(chars)
|
||
|
except Exception as e:
|
||
|
decimals = {
|
||
|
"uno": 1,
|
||
|
"un": 1,
|
||
|
"dos": 2,
|
||
|
"tres": 3,
|
||
|
"cuatro": 4,
|
||
|
"quatre": 4,
|
||
|
"cinco": 5,
|
||
|
"cinc": 5,
|
||
|
"seis": 6,
|
||
|
"sis": 6,
|
||
|
"siete": 7,
|
||
|
"set": 7,
|
||
|
"ocho": 8,
|
||
|
"vuit": 8,
|
||
|
"nueve": 9,
|
||
|
"nou": 9
|
||
|
}
|
||
|
|
||
|
teens = {
|
||
|
"once": 11,
|
||
|
"onze": 11,
|
||
|
"doce": 12,
|
||
|
"dotze": 12,
|
||
|
"trece": 13,
|
||
|
"tretze": 13,
|
||
|
"catorce": 14,
|
||
|
"catorze": 14,
|
||
|
"quince": 15,
|
||
|
"quinze": 15,
|
||
|
"dieziseis": 16,
|
||
|
"setze": 16,
|
||
|
"diezisiete": 17,
|
||
|
"diset": 17,
|
||
|
"dieziocho": 18,
|
||
|
"divuit": 18,
|
||
|
"diezinueve": 19,
|
||
|
"dinou": 19
|
||
|
}
|
||
|
|
||
|
tenths = {
|
||
|
"diez": 10,
|
||
|
"deu": 10,
|
||
|
"veinte": 20,
|
||
|
"veint": 20,
|
||
|
"vint": 20,
|
||
|
"treinta": 30,
|
||
|
"trenta": 30,
|
||
|
"cuarenta": 40,
|
||
|
"quaranta": 40,
|
||
|
"cincuenta": 50,
|
||
|
"cinquanta": 50,
|
||
|
"sesenta": 60,
|
||
|
"seixanta": 60,
|
||
|
"setenta": 70,
|
||
|
"ochenta": 80,
|
||
|
"vuitanta": 80,
|
||
|
"noventa": 90,
|
||
|
"noranta": 90,
|
||
|
}
|
||
|
|
||
|
hundreds = {
|
||
|
"cien": 100,
|
||
|
"cent": 100,
|
||
|
}
|
||
|
|
||
|
thousands = {
|
||
|
"mil": 1000
|
||
|
}
|
||
|
|
||
|
number = 0
|
||
|
for decimal in decimals:
|
||
|
match = re.search(r"(?<![a-z])(y|i|-)? *(%s)(?![a-z])" % decimal, chars)
|
||
|
if match:
|
||
|
number += decimals.get(match.groups()[1])
|
||
|
|
||
|
for teen in teens:
|
||
|
match = re.search(r"(?<=[a-z])(%s)(?![a-z])" % teen, chars)
|
||
|
if match:
|
||
|
number += teens.get(match.groups()[0])
|
||
|
|
||
|
for tenth in tenths:
|
||
|
match = re.search(r"(?<![a-z])(%s) *(?:y|i|-)? *([a-z]+)?(?![a-z])? *" % tenth, chars)
|
||
|
if match:
|
||
|
number += tenths.get(match.groups()[0])
|
||
|
|
||
|
for hundred in hundreds:
|
||
|
match = re.search(r"(?<![a-z])([a-z]+)?%s(?:tos?|s?)?(?![a-z])" % hundred, chars)
|
||
|
if match:
|
||
|
number += (decimals.get(match.groups()[0])) or 1 * 100
|
||
|
|
||
|
for thousand in thousands:
|
||
|
match = re.search(r"(?<![a-z])([a-z]+)?%s(?![a-z])" % thousand, chars)
|
||
|
if match:
|
||
|
print("match thousands")
|
||
|
number += (decimals.get(match.groups()[0])) or 1 * 1000
|
||
|
|
||
|
return number
|
||
|
|
||
|
|
||
|
def parse_street (chars):
|
||
|
if not chars:
|
||
|
return ""
|
||
|
|
||
|
street = " ".join([chunk.lower() for word in chars.split(" ") for chunk in re.split(r"([A-Z][a-z]+)", word) if chunk])
|
||
|
return street
|
||
|
|
||
|
|
||
|
def parse_town (chars):
|
||
|
if not chars:
|
||
|
return ""
|
||
|
|
||
|
town = " ".join([chunk.lower() for word in chars.split(" ") for chunk in re.split(r"([A-Z][a-z]+)", word) if chunk])
|
||
|
town = re.sub(r"(villa|vila)( *de *)?", "", town)
|
||
|
return town
|
||
|
|
||
|
|
||
|
def build_address (record):
|
||
|
data = record.description.data
|
||
|
address = "{number} {street}, {town}, cataluña, españa".format(
|
||
|
number=parse_number(data.get("number")),
|
||
|
street=parse_street(data.get("street")),
|
||
|
town=parse_town(data.get("town"))
|
||
|
)
|
||
|
|
||
|
address = re.sub(r"^ *, *", "", re.sub(r"(?<=,) *,", "", address))
|
||
|
return address
|
||
|
|
||
|
|
||
|
class GeoLocation (object):
|
||
|
|
||
|
def __init__ (self, parser):
|
||
|
self._data = self.geolocate(parser.data).json or {}
|
||
|
setattr(parser, "geolocation", self)
|
||
|
|
||
|
def geolocate (self, record):
|
||
|
address = build_address(record)
|
||
|
res = geocoder.osm(address)
|
||
|
return res
|
||
|
|
||
|
@property
|
||
|
def latlng (self):
|
||
|
return [self._data.get("lat"), self._data.get("lng")]
|
||
|
|
||
|
@property
|
||
|
def address (self):
|
||
|
return self._data.get("address")
|
||
|
|
||
|
@property
|
||
|
def street (self):
|
||
|
return self._data.get("street")
|
||
|
|
||
|
@property
|
||
|
def postcode (self):
|
||
|
return self._data.get("postcode")
|
||
|
|
||
|
@property
|
||
|
def town (self):
|
||
|
return self._data.get("town")
|
||
|
|
||
|
@property
|
||
|
def county (self):
|
||
|
return self._data.get("county")
|
||
|
|
||
|
@property
|
||
|
def region (self):
|
||
|
return self._data.get("region")
|
||
|
|
||
|
@property
|
||
|
def country (self):
|
||
|
return self._data.get("country")
|
||
|
|
||
|
@property
|
||
|
def data (self):
|
||
|
return {
|
||
|
"latlng": self.latlng,
|
||
|
"street": self.street,
|
||
|
"postcode": self.postcode,
|
||
|
"town": self.town,
|
||
|
"county": self.county,
|
||
|
"region": self.region,
|
||
|
"country": self.country,
|
||
|
"address": self.address
|
||
|
}
|
||
|
|
||
|
def __str__ (self):
|
||
|
return json.dumps(self.data)
|