You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

33 lines
998 B

# BUILT-INS
import json
# SOURCE
from parsers import PdfParser
from parsers.geocoding import GeoLocation
if __name__ == "__main__":
import os
directory = os.path.realpath(os.path.join(os.path.abspath(os.path.dirname(__file__)), "pdfs/T1"))
with open("log.txt", "wt") as conn:
for file_name in os.listdir(directory):
file_path = os.path.join(directory, file_name)
parser = PdfParser(file_path)
GeoLocation(parser)
format = parser.format
data = parser.data
conn.write(f"""
# FILENAME
{file_name.upper()}
{format == "img" and "Format imàtge" or "Format vectorial"} {not data.success and "(UNABLE TO READ)" or ""}
## PLAIN_TEXT
{data}
## STRUCTURED
description: {json.dumps(data.description.data, indent=4, ensure_ascii=False)}
ownership: {json.dumps(data.ownership.data, indent=4, ensure_ascii=False)}
## GEOLOCATION
{json.dumps(parser.geolocation.data, indent=4, ensure_ascii=False)}
""")