You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
1.1 KiB
33 lines
1.1 KiB
# BUILT-INS |
|
import json |
|
|
|
# SOURCE |
|
from src.parsers.pdf import PdfParser |
|
from src.parsers.geocoding import GeoLocation |
|
|
|
|
|
if __name__ == "__main__": |
|
import os |
|
|
|
directory = os.path.realpath(os.path.join(os.path.dirname(__file__), *"pdfs/T1".split("/"))) |
|
with open(os.path.realpath(os.path.join(os.path.dirname(__file__), *"log/log.txt".split("/"))), "wt") as conn: |
|
for file_name in os.listdir(directory): |
|
file_path = os.path.join(directory, file_name) |
|
parser = PdfParser(file_path) |
|
GeoLocation(parser) |
|
format = parser.format |
|
data = parser.data |
|
conn.write(f""" |
|
# FILENAME |
|
{file_name.upper()} |
|
{format == "img" and "Format imàtge" or "Format vectorial"} {not data.success and "(UNABLE TO READ)" or ""} |
|
|
|
## PLAIN_TEXT |
|
{data} |
|
## STRUCTURED |
|
description: {json.dumps(data.description.data, indent=4, ensure_ascii=False)} |
|
ownership: {json.dumps(data.ownership.data, indent=4, ensure_ascii=False)} |
|
|
|
## GEOLOCATION |
|
{json.dumps(parser.geolocation.data, indent=4, ensure_ascii=False)} |
|
""")
|
|
|