From: Christian Heller <c.heller@plomlompom.de>
Date: Sun, 24 Sep 2023 03:39:13 +0000 (+0200)
Subject: Bookmaker: add more input validations.
X-Git-Url: https://plomlompom.com/repos/%7B%7Bdb.prefix%7D%7D/static/%7B%7B%20web_path%20%7D%7D/%7B%7Bdb.prefix%7D%7D/todo?a=commitdiff_plain;h=1387f3b998f9b9cd71bd6dd7a0ab6d141cea71b3;p=misc

Bookmaker: add more input validations.
---

diff --git a/bookmaker.py b/bookmaker.py
index 48d4af2..646877a 100755
--- a/bookmaker.py
+++ b/bookmaker.py
@@ -51,6 +51,8 @@ Same as -n, but draw lines marking printable-region margins, page quarts, spine
 
 NOTES:
 
+For arguments like -p, page numbers are assumed to start with 1 (not 0, which is treated as an invalid page number value).
+
 The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations).
 
 The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book.  Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper):
@@ -72,7 +74,7 @@ parser = argparse.ArgumentParser(description=desc, epilog=epilogue, formatter_cl
 parser._optionals.title = "OPTIONS"
 parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
 parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
-parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '3-end'")
+parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
 parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
 parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
 parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
@@ -82,21 +84,13 @@ parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="prin
 args = parser.parse_args()
 
 # some basic input validation
-for filename in args.input_file:
-    if not os.path.isfile(filename):
-        raise ValueError("-i: %s is not a file" % filename)
-    try:
-        with open(filename, 'rb') as file:
-            pypdf.PdfReader(file)
-    except pypdf.errors.PdfStreamError:
-        raise ValueError("-i: cannot interpret %s as PDF file" % filename)
 def validate_page_range(p_string, err_msg_prefix):
     err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
     if '-' not in p_string:
-        raise ValueError(err_msg)
+        raise ValueError("%s: page range string lacks '-': %s" % (err_msg_prefix, p_string))
     tokens = p_string.split("-")
     if len(tokens) > 2:
-        raise ValueError(err_msg)
+        raise ValueError("%s: page range string has too many '-': %s" % (err_msg_prefix, p_string))
     for i, token in enumerate(tokens):
         if token == "":
             continue
@@ -107,7 +101,26 @@ def validate_page_range(p_string, err_msg_prefix):
         try:
             int(token)
         except:
-            raise ValueError(err_msg)
+            raise ValueError("%s: page range string carries values that are neither integer, nor 'start', nor 'end': %s" % (err_msg_prefix, p_string))
+        if int(token) < 1:
+            raise ValueError("%s: page range string may not carry page numbers <1: %s" % (err_msg_prefix, p_string))
+    start = -1
+    end = -1
+    try:
+        start = int(tokens[0])
+        end = int(tokens[1])
+    except:
+        pass
+    if start > 0 and end > 0 and start > end:
+        raise ValueError("%s: page range starts higher than it ends: %s" % (err_msg_prefix, p_string))
+for filename in args.input_file:
+    if not os.path.isfile(filename):
+        raise ValueError("-i: %s is not a file" % filename)
+    try:
+        with open(filename, 'rb') as file:
+            pypdf.PdfReader(file)
+    except pypdf.errors.PdfStreamError:
+        raise ValueError("-i: cannot interpret %s as PDF file" % filename)
 if args.page_range:
     for p_string in args.page_range:
         validate_page_range(p_string, "-p")
@@ -132,17 +145,18 @@ if args.crops:
             except:
                 raise ValueError("-c: non-number crop in %s" % c_string)
 if args.rotate_page:
-    for r in arg.rotate_page:
+    for r in args.rotate_page:
         try:
             int(r)
         except:
             raise ValueError("-r: non-integer value: %s" % r)
+        if r < 1:
+            raise ValueError("-r: value must not be <1: %s" % r)
 try:
     float(args.print_margin)
 except:
     raise ValueError("-m: non-float value: %s" % arg.print_margin)
 
-
 # select pages from input files
 def parse_page_range(range_string, pages):
     start_page = 0
@@ -165,20 +179,26 @@ for i, input_file in enumerate(args.input_file):
     if args.page_range and len(args.page_range) > i:
         range_string = args.page_range[i]
     start_page, end_page = parse_page_range(range_string, reader.pages)
+    if end_page > len(reader.pages):  # no need to test start_page cause start_page > end_page is checked above
+        raise ValueError("-p: page range goes beyond pages of input file: %s" % range_string)
     for old_page_num in range(start_page, end_page):
         new_page_num += 1
         page = reader.pages[old_page_num]
         pages_to_add += [page]
         print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num))
 
-# if necessary, pad pages to multiple of 8
-if args.nup4:
-    mod_to_8 = len(pages_to_add) % 8
-    if mod_to_8 > 0:
-        print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
-        for _ in range(8 - mod_to_8):
-            new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
-            pages_to_add += [new_page]
+# we can do some more input validations now that we know how many pages output should have
+if args.crops:
+    for c_string in args.crops:
+        initial_split = c_string.split(':')
+        if len(initial_split) > 1:
+            start, end = parse_page_range(initial_split[0], pages_to_add)
+            if end > len(pages_to_add):
+                 raise ValueError("-c: page range goes beyond number of pages we're building: %s" % initial_split[0])
+if args.rotate_page:
+    for r in args.rotate_page:
+        if r > len(pages_to_add):
+             raise ValueError("-r: page number beyond number of pages we're building: %d" % r)
 
 # rotate page canvas
 if args.rotate_page:
@@ -189,6 +209,15 @@ if args.rotate_page:
         page.add_transformation(pypdf.Transformation().translate(tx=a4_width/2, ty=a4_height/2))
         print("-r: rotating (by 90°) page", rotate_page)
 
+# if necessary, pad pages to multiple of 8
+if args.nup4:
+    mod_to_8 = len(pages_to_add) % 8
+    if mod_to_8 > 0:
+        print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
+        for _ in range(8 - mod_to_8):
+            new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
+            pages_to_add += [new_page]
+
 # normalize all pages to portrait A4
 for page in pages_to_add:
     if "/Rotate" in page: