# BUILT-INS import json # SOURCE from parsers import PdfParser from parsers.geocoding import GeoLocation if __name__ == "__main__": import os directory = os.path.realpath(os.path.join(os.path.abspath(os.path.dirname(__file__)), "pdfs/T1")) with open("log.txt", "wt") as conn: for file_name in os.listdir(directory): file_path = os.path.join(directory, file_name) parser = PdfParser(file_path) GeoLocation(parser) format = parser.format data = parser.data conn.write(f""" # FILENAME {file_name.upper()} {format == "img" and "Format imàtge" or "Format vectorial"} {not data.success and "(UNABLE TO READ)" or ""} ## PLAIN_TEXT {data} ## STRUCTURED description: {json.dumps(data.description.data, indent=4, ensure_ascii=False)} ownership: {json.dumps(data.ownership.data, indent=4, ensure_ascii=False)} ## GEOLOCATION {json.dumps(parser.geolocation.data, indent=4, ensure_ascii=False)} """)