You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
998 B
34 lines
998 B
![]()
1 year ago
|
# BUILT-INS
|
||
|
import json
|
||
|
|
||
|
# SOURCE
|
||
|
from parsers import PdfParser
|
||
|
from parsers.geocoding import GeoLocation
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
import os
|
||
|
|
||
|
directory = os.path.realpath(os.path.join(os.path.abspath(os.path.dirname(__file__)), "pdfs/T1"))
|
||
|
with open("log.txt", "wt") as conn:
|
||
|
for file_name in os.listdir(directory):
|
||
|
file_path = os.path.join(directory, file_name)
|
||
|
parser = PdfParser(file_path)
|
||
|
GeoLocation(parser)
|
||
|
format = parser.format
|
||
|
data = parser.data
|
||
|
conn.write(f"""
|
||
|
# FILENAME
|
||
|
{file_name.upper()}
|
||
|
{format == "img" and "Format imàtge" or "Format vectorial"} {not data.success and "(UNABLE TO READ)" or ""}
|
||
|
|
||
|
## PLAIN_TEXT
|
||
|
{data}
|
||
|
## STRUCTURED
|
||
|
description: {json.dumps(data.description.data, indent=4, ensure_ascii=False)}
|
||
|
ownership: {json.dumps(data.ownership.data, indent=4, ensure_ascii=False)}
|
||
|
|
||
|
## GEOLOCATION
|
||
|
{json.dumps(parser.geolocation.data, indent=4, ensure_ascii=False)}
|
||
|
""")
|