From 1387f3b998f9b9cd71bd6dd7a0ab6d141cea71b3 Mon Sep 17 00:00:00 2001 From: Christian Heller Date: Sun, 24 Sep 2023 05:39:13 +0200 Subject: [PATCH] Bookmaker: add more input validations. --- bookmaker.py | 73 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/bookmaker.py b/bookmaker.py index 48d4af2..646877a 100755 --- a/bookmaker.py +++ b/bookmaker.py @@ -51,6 +51,8 @@ Same as -n, but draw lines marking printable-region margins, page quarts, spine NOTES: +For arguments like -p, page numbers are assumed to start with 1 (not 0, which is treated as an invalid page number value). + The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations). The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book. Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper): @@ -72,7 +74,7 @@ parser = argparse.ArgumentParser(description=desc, epilog=epilogue, formatter_cl parser._optionals.title = "OPTIONS" parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file") parser.add_argument("-o", "--output_file", required=True, help="output PDF file") -parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '3-end'") +parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'") parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect") parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)") parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages") @@ -82,21 +84,13 @@ parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="prin args = parser.parse_args() # some basic input validation -for filename in args.input_file: - if not os.path.isfile(filename): - raise ValueError("-i: %s is not a file" % filename) - try: - with open(filename, 'rb') as file: - pypdf.PdfReader(file) - except pypdf.errors.PdfStreamError: - raise ValueError("-i: cannot interpret %s as PDF file" % filename) def validate_page_range(p_string, err_msg_prefix): err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string) if '-' not in p_string: - raise ValueError(err_msg) + raise ValueError("%s: page range string lacks '-': %s" % (err_msg_prefix, p_string)) tokens = p_string.split("-") if len(tokens) > 2: - raise ValueError(err_msg) + raise ValueError("%s: page range string has too many '-': %s" % (err_msg_prefix, p_string)) for i, token in enumerate(tokens): if token == "": continue @@ -107,7 +101,26 @@ def validate_page_range(p_string, err_msg_prefix): try: int(token) except: - raise ValueError(err_msg) + raise ValueError("%s: page range string carries values that are neither integer, nor 'start', nor 'end': %s" % (err_msg_prefix, p_string)) + if int(token) < 1: + raise ValueError("%s: page range string may not carry page numbers <1: %s" % (err_msg_prefix, p_string)) + start = -1 + end = -1 + try: + start = int(tokens[0]) + end = int(tokens[1]) + except: + pass + if start > 0 and end > 0 and start > end: + raise ValueError("%s: page range starts higher than it ends: %s" % (err_msg_prefix, p_string)) +for filename in args.input_file: + if not os.path.isfile(filename): + raise ValueError("-i: %s is not a file" % filename) + try: + with open(filename, 'rb') as file: + pypdf.PdfReader(file) + except pypdf.errors.PdfStreamError: + raise ValueError("-i: cannot interpret %s as PDF file" % filename) if args.page_range: for p_string in args.page_range: validate_page_range(p_string, "-p") @@ -132,17 +145,18 @@ if args.crops: except: raise ValueError("-c: non-number crop in %s" % c_string) if args.rotate_page: - for r in arg.rotate_page: + for r in args.rotate_page: try: int(r) except: raise ValueError("-r: non-integer value: %s" % r) + if r < 1: + raise ValueError("-r: value must not be <1: %s" % r) try: float(args.print_margin) except: raise ValueError("-m: non-float value: %s" % arg.print_margin) - # select pages from input files def parse_page_range(range_string, pages): start_page = 0 @@ -165,20 +179,26 @@ for i, input_file in enumerate(args.input_file): if args.page_range and len(args.page_range) > i: range_string = args.page_range[i] start_page, end_page = parse_page_range(range_string, reader.pages) + if end_page > len(reader.pages): # no need to test start_page cause start_page > end_page is checked above + raise ValueError("-p: page range goes beyond pages of input file: %s" % range_string) for old_page_num in range(start_page, end_page): new_page_num += 1 page = reader.pages[old_page_num] pages_to_add += [page] print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num)) -# if necessary, pad pages to multiple of 8 -if args.nup4: - mod_to_8 = len(pages_to_add) % 8 - if mod_to_8 > 0: - print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add)) - for _ in range(8 - mod_to_8): - new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height) - pages_to_add += [new_page] +# we can do some more input validations now that we know how many pages output should have +if args.crops: + for c_string in args.crops: + initial_split = c_string.split(':') + if len(initial_split) > 1: + start, end = parse_page_range(initial_split[0], pages_to_add) + if end > len(pages_to_add): + raise ValueError("-c: page range goes beyond number of pages we're building: %s" % initial_split[0]) +if args.rotate_page: + for r in args.rotate_page: + if r > len(pages_to_add): + raise ValueError("-r: page number beyond number of pages we're building: %d" % r) # rotate page canvas if args.rotate_page: @@ -189,6 +209,15 @@ if args.rotate_page: page.add_transformation(pypdf.Transformation().translate(tx=a4_width/2, ty=a4_height/2)) print("-r: rotating (by 90°) page", rotate_page) +# if necessary, pad pages to multiple of 8 +if args.nup4: + mod_to_8 = len(pages_to_add) % 8 + if mod_to_8 > 0: + print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add)) + for _ in range(8 - mod_to_8): + new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height) + pages_to_add += [new_page] + # normalize all pages to portrait A4 for page in pages_to_add: if "/Rotate" in page: -- 2.30.2