You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

200 lines
4.9 KiB

1 year ago
import geocoder
import re
import json
def parse_number (chars):
if not chars:
return ""
chars = chars.lower()
try:
return int(chars)
except Exception as e:
decimals = {
"uno": 1,
"un": 1,
"dos": 2,
"tres": 3,
"cuatro": 4,
"quatre": 4,
"cinco": 5,
"cinc": 5,
"seis": 6,
"sis": 6,
"siete": 7,
"set": 7,
"ocho": 8,
"vuit": 8,
"nueve": 9,
"nou": 9
}
teens = {
"once": 11,
"onze": 11,
"doce": 12,
"dotze": 12,
"trece": 13,
"tretze": 13,
"catorce": 14,
"catorze": 14,
"quince": 15,
"quinze": 15,
"dieziseis": 16,
"setze": 16,
"diezisiete": 17,
"diset": 17,
"dieziocho": 18,
"divuit": 18,
"diezinueve": 19,
"dinou": 19
}
tenths = {
"diez": 10,
"deu": 10,
"veinte": 20,
"veint": 20,
"vint": 20,
"treinta": 30,
"trenta": 30,
"cuarenta": 40,
"quaranta": 40,
"cincuenta": 50,
"cinquanta": 50,
"sesenta": 60,
"seixanta": 60,
"setenta": 70,
"ochenta": 80,
"vuitanta": 80,
"noventa": 90,
"noranta": 90,
}
hundreds = {
"cien": 100,
"cent": 100,
}
thousands = {
"mil": 1000
}
number = 0
for decimal in decimals:
match = re.search(r"(?<![a-z])(y|i|-)? *(%s)(?![a-z])" % decimal, chars)
if match:
number += decimals.get(match.groups()[1])
for teen in teens:
match = re.search(r"(?<=[a-z])(%s)(?![a-z])" % teen, chars)
if match:
number += teens.get(match.groups()[0])
for tenth in tenths:
match = re.search(r"(?<![a-z])(%s) *(?:y|i|-)? *([a-z]+)?(?![a-z])? *" % tenth, chars)
if match:
number += tenths.get(match.groups()[0])
for hundred in hundreds:
match = re.search(r"(?<![a-z])([a-z]+)?%s(?:tos?|s?)?(?![a-z])" % hundred, chars)
if match:
number += (decimals.get(match.groups()[0])) or 1 * 100
for thousand in thousands:
match = re.search(r"(?<![a-z])([a-z]+)?%s(?![a-z])" % thousand, chars)
if match:
print("match thousands")
number += (decimals.get(match.groups()[0])) or 1 * 1000
return number
def parse_street (chars):
if not chars:
return ""
street = " ".join([chunk.lower() for word in chars.split(" ") for chunk in re.split(r"([A-Z][a-z]+)", word) if chunk])
return street
def parse_town (chars):
if not chars:
return ""
town = " ".join([chunk.lower() for word in chars.split(" ") for chunk in re.split(r"([A-Z][a-z]+)", word) if chunk])
town = re.sub(r"(villa|vila)( *de *)?", "", town)
return town
def build_address (record):
data = record.description.data
address = "{number} {street}, {town}, cataluña, españa".format(
number=parse_number(data.get("number")),
street=parse_street(data.get("street")),
town=parse_town(data.get("town"))
)
address = re.sub(r"^ *, *", "", re.sub(r"(?<=,) *,", "", address))
return address
class GeoLocation (object):
def __init__ (self, parser):
self._data = self.geolocate(parser.data).json or {}
setattr(parser, "geolocation", self)
def geolocate (self, record):
address = build_address(record)
res = geocoder.osm(address)
return res
@property
def latlng (self):
return [self._data.get("lat"), self._data.get("lng")]
@property
def address (self):
return self._data.get("address")
@property
def street (self):
return self._data.get("street")
@property
def postcode (self):
return self._data.get("postcode")
@property
def town (self):
return self._data.get("town")
@property
def county (self):
return self._data.get("county")
@property
def region (self):
return self._data.get("region")
@property
def country (self):
return self._data.get("country")
@property
def data (self):
return {
"latlng": self.latlng,
"street": self.street,
"postcode": self.postcode,
"town": self.town,
"county": self.county,
"region": self.region,
"country": self.country,
"address": self.address
}
def __str__ (self):
return json.dumps(self.data)