From 877731d064ed31edb8e343b98b6fd91fab12dee9 Mon Sep 17 00:00:00 2001
From: Christian Heller <c.heller@plomlompom.de>
Date: Sun, 24 Sep 2023 05:15:31 +0200
Subject: [PATCH] Bookmaker: add some basic input validation.

---
 bookmaker.py | 134 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 98 insertions(+), 36 deletions(-)

diff --git a/bookmaker.py b/bookmaker.py
index a82f986..48d4af2 100755
--- a/bookmaker.py
+++ b/bookmaker.py
@@ -2,6 +2,7 @@
 import pypdf
 import argparse
 import io
+import os
 from reportlab.lib.pagesizes import A4
 a4_width, a4_height = A4
 points_per_cm = 10 * 72 / 25.4
@@ -78,9 +79,70 @@ parser.add_argument("-s", "--symmetry", action="store_true", help="alternate hor
 parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
 parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
 parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
-parser.add_argument("-H", "--long_help", action="store_true", help="show examples, explanations, additional usage notes")
 args = parser.parse_args()
 
+# some basic input validation
+for filename in args.input_file:
+    if not os.path.isfile(filename):
+        raise ValueError("-i: %s is not a file" % filename)
+    try:
+        with open(filename, 'rb') as file:
+            pypdf.PdfReader(file)
+    except pypdf.errors.PdfStreamError:
+        raise ValueError("-i: cannot interpret %s as PDF file" % filename)
+def validate_page_range(p_string, err_msg_prefix):
+    err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
+    if '-' not in p_string:
+        raise ValueError(err_msg)
+    tokens = p_string.split("-")
+    if len(tokens) > 2:
+        raise ValueError(err_msg)
+    for i, token in enumerate(tokens):
+        if token == "":
+            continue
+        if i == 0 and token == "start":
+            continue
+        if i == 1 and token == "end":
+            continue
+        try:
+            int(token)
+        except:
+            raise ValueError(err_msg)
+if args.page_range:
+    for p_string in args.page_range:
+        validate_page_range(p_string, "-p")
+    if len(args.page_range) > len(args.input_file):
+        raise ValueError("more -p arguments than -i arguments")
+if args.crops:
+    for c_string in args.crops:
+        initial_split = c_string.split(':')
+        if len(initial_split) > 2:
+            raise ValueError("-c: cropping string has multiple ':': %s" % c_string)
+        if len(initial_split) > 1:
+            validate_page_range(initial_split[0], "-c")
+            crops = initial_split[1].split(",")
+
+        else:
+            crops = initial_split[0].split(",")
+        if len(crops) != 4:
+            raise ValueError("-c: cropping should contain three ',': %s" % c_string)
+        for crop in crops:
+            try:
+                float(crop)
+            except:
+                raise ValueError("-c: non-number crop in %s" % c_string)
+if args.rotate_page:
+    for r in arg.rotate_page:
+        try:
+            int(r)
+        except:
+            raise ValueError("-r: non-integer value: %s" % r)
+try:
+    float(args.print_margin)
+except:
+    raise ValueError("-m: non-float value: %s" % arg.print_margin)
+
+
 # select pages from input files
 def parse_page_range(range_string, pages):
     start_page = 0
@@ -141,41 +203,41 @@ for page in pages_to_add:
 crops_at_page = [(0,0,0,0)]*len(pages_to_add)
 zoom_at_page = [1]*len(pages_to_add)
 if args.crops:
-  for crops in args.crops:
-      initial_split = crops.split(':')
-      if len(initial_split) > 1:
-          page_range = initial_split[0]
-          crops = initial_split[1]
-      else:
-          page_range = None
-          crops = initial_split[0]
-      start_page, end_page = parse_page_range(page_range, pages_to_add)
-      crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in  crops.split(',')]
-      crop_left = crop_left_cm * points_per_cm
-      crop_bottom = crop_bottom_cm * points_per_cm
-      crop_right = crop_right_cm * points_per_cm
-      crop_top = crop_top_cm * points_per_cm
-      if args.symmetry:
-          print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
-      else:
-          print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
-      cropped_width  = a4_width - crop_left - crop_right
-      cropped_height = a4_height - crop_bottom - crop_top
-      zoom = 1
-      zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
-      zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
-      if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
-          raise ValueError("crops would create opposing zoom directions")
-      elif zoom_horizontal + zoom_vertical > 2:
-          zoom = min(zoom_horizontal, zoom_vertical)
-      else:
-          zoom = max(zoom_horizontal, zoom_vertical)
-      for page_num in range(start_page, end_page):
-          if args.symmetry and page_num % 2:
-              crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
-          else:
-              crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
-          zoom_at_page[page_num] = zoom
+    for crops in args.crops:
+        initial_split = crops.split(':')
+        if len(initial_split) > 1:
+            page_range = initial_split[0]
+            crops = initial_split[1]
+        else:
+            page_range = None
+            crops = initial_split[0]
+        start_page, end_page = parse_page_range(page_range, pages_to_add)
+        crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in  crops.split(',')]
+        crop_left = crop_left_cm * points_per_cm
+        crop_bottom = crop_bottom_cm * points_per_cm
+        crop_right = crop_right_cm * points_per_cm
+        crop_top = crop_top_cm * points_per_cm
+        if args.symmetry:
+            print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
+        else:
+            print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
+        cropped_width  = a4_width - crop_left - crop_right
+        cropped_height = a4_height - crop_bottom - crop_top
+        zoom = 1
+        zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
+        zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
+        if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
+            raise ValueError("crops would create opposing zoom directions")
+        elif zoom_horizontal + zoom_vertical > 2:
+            zoom = min(zoom_horizontal, zoom_vertical)
+        else:
+            zoom = max(zoom_horizontal, zoom_vertical)
+        for page_num in range(start_page, end_page):
+            if args.symmetry and page_num % 2:
+                crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
+            else:
+                crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
+            zoom_at_page[page_num] = zoom
 
 writer = pypdf.PdfWriter()
 if not args.nup4:
-- 
2.30.2