You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
200 lines
4.9 KiB
200 lines
4.9 KiB
import geocoder |
|
import re |
|
import json |
|
|
|
|
|
def parse_number (chars): |
|
if not chars: |
|
return "" |
|
|
|
chars = chars.lower() |
|
try: |
|
return int(chars) |
|
except Exception as e: |
|
decimals = { |
|
"uno": 1, |
|
"un": 1, |
|
"dos": 2, |
|
"tres": 3, |
|
"cuatro": 4, |
|
"quatre": 4, |
|
"cinco": 5, |
|
"cinc": 5, |
|
"seis": 6, |
|
"sis": 6, |
|
"siete": 7, |
|
"set": 7, |
|
"ocho": 8, |
|
"vuit": 8, |
|
"nueve": 9, |
|
"nou": 9 |
|
} |
|
|
|
teens = { |
|
"once": 11, |
|
"onze": 11, |
|
"doce": 12, |
|
"dotze": 12, |
|
"trece": 13, |
|
"tretze": 13, |
|
"catorce": 14, |
|
"catorze": 14, |
|
"quince": 15, |
|
"quinze": 15, |
|
"dieziseis": 16, |
|
"setze": 16, |
|
"diezisiete": 17, |
|
"diset": 17, |
|
"dieziocho": 18, |
|
"divuit": 18, |
|
"diezinueve": 19, |
|
"dinou": 19 |
|
} |
|
|
|
tenths = { |
|
"diez": 10, |
|
"deu": 10, |
|
"veinte": 20, |
|
"veint": 20, |
|
"vint": 20, |
|
"treinta": 30, |
|
"trenta": 30, |
|
"cuarenta": 40, |
|
"quaranta": 40, |
|
"cincuenta": 50, |
|
"cinquanta": 50, |
|
"sesenta": 60, |
|
"seixanta": 60, |
|
"setenta": 70, |
|
"ochenta": 80, |
|
"vuitanta": 80, |
|
"noventa": 90, |
|
"noranta": 90, |
|
} |
|
|
|
hundreds = { |
|
"cien": 100, |
|
"cent": 100, |
|
} |
|
|
|
thousands = { |
|
"mil": 1000 |
|
} |
|
|
|
number = 0 |
|
for decimal in decimals: |
|
match = re.search(r"(?<![a-z])(y|i|-)? *(%s)(?![a-z])" % decimal, chars) |
|
if match: |
|
number += decimals.get(match.groups()[1]) |
|
|
|
for teen in teens: |
|
match = re.search(r"(?<=[a-z])(%s)(?![a-z])" % teen, chars) |
|
if match: |
|
number += teens.get(match.groups()[0]) |
|
|
|
for tenth in tenths: |
|
match = re.search(r"(?<![a-z])(%s) *(?:y|i|-)? *([a-z]+)?(?![a-z])? *" % tenth, chars) |
|
if match: |
|
number += tenths.get(match.groups()[0]) |
|
|
|
for hundred in hundreds: |
|
match = re.search(r"(?<![a-z])([a-z]+)?%s(?:tos?|s?)?(?![a-z])" % hundred, chars) |
|
if match: |
|
number += (decimals.get(match.groups()[0])) or 1 * 100 |
|
|
|
for thousand in thousands: |
|
match = re.search(r"(?<![a-z])([a-z]+)?%s(?![a-z])" % thousand, chars) |
|
if match: |
|
print("match thousands") |
|
number += (decimals.get(match.groups()[0])) or 1 * 1000 |
|
|
|
return number |
|
|
|
|
|
def parse_street (chars): |
|
if not chars: |
|
return "" |
|
|
|
street = " ".join([chunk.lower() for word in chars.split(" ") for chunk in re.split(r"([A-Z][a-z]+)", word) if chunk]) |
|
return street |
|
|
|
|
|
def parse_town (chars): |
|
if not chars: |
|
return "" |
|
|
|
town = " ".join([chunk.lower() for word in chars.split(" ") for chunk in re.split(r"([A-Z][a-z]+)", word) if chunk]) |
|
town = re.sub(r"(villa|vila)( *de *)?", "", town) |
|
return town |
|
|
|
|
|
def build_address (record): |
|
data = record.description.data |
|
address = "{number} {street}, {town}, cataluña, españa".format( |
|
number=parse_number(data.get("number")), |
|
street=parse_street(data.get("street")), |
|
town=parse_town(data.get("town")) |
|
) |
|
|
|
address = re.sub(r"^ *, *", "", re.sub(r"(?<=,) *,", "", address)) |
|
return address |
|
|
|
|
|
class GeoLocation (object): |
|
|
|
def __init__ (self, parser): |
|
self._data = self.geolocate(parser.data).json or {} |
|
setattr(parser, "geolocation", self) |
|
|
|
def geolocate (self, record): |
|
address = build_address(record) |
|
res = geocoder.osm(address) |
|
return res |
|
|
|
@property |
|
def latlng (self): |
|
return [self._data.get("lat"), self._data.get("lng")] |
|
|
|
@property |
|
def address (self): |
|
return self._data.get("address") |
|
|
|
@property |
|
def street (self): |
|
return self._data.get("street") |
|
|
|
@property |
|
def postcode (self): |
|
return self._data.get("postcode") |
|
|
|
@property |
|
def town (self): |
|
return self._data.get("town") |
|
|
|
@property |
|
def county (self): |
|
return self._data.get("county") |
|
|
|
@property |
|
def region (self): |
|
return self._data.get("region") |
|
|
|
@property |
|
def country (self): |
|
return self._data.get("country") |
|
|
|
@property |
|
def data (self): |
|
return { |
|
"latlng": self.latlng, |
|
"street": self.street, |
|
"postcode": self.postcode, |
|
"town": self.town, |
|
"county": self.county, |
|
"region": self.region, |
|
"country": self.country, |
|
"address": self.address |
|
} |
|
|
|
def __str__ (self): |
|
return json.dumps(self.data) |