import pypdf
import argparse
import io
+import os
from reportlab.lib.pagesizes import A4
a4_width, a4_height = A4
points_per_cm = 10 * 72 / 25.4
NOTES:
+For arguments like -p, page numbers are assumed to start with 1 (not 0, which is treated as an invalid page number value).
+
The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations).
The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book. Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper):
parser._optionals.title = "OPTIONS"
parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
-parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '3-end'")
+parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
-parser.add_argument("-H", "--long_help", action="store_true", help="show examples, explanations, additional usage notes")
args = parser.parse_args()
+# some basic input validation
+def validate_page_range(p_string, err_msg_prefix):
+ err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
+ if '-' not in p_string:
+ raise ValueError("%s: page range string lacks '-': %s" % (err_msg_prefix, p_string))
+ tokens = p_string.split("-")
+ if len(tokens) > 2:
+ raise ValueError("%s: page range string has too many '-': %s" % (err_msg_prefix, p_string))
+ for i, token in enumerate(tokens):
+ if token == "":
+ continue
+ if i == 0 and token == "start":
+ continue
+ if i == 1 and token == "end":
+ continue
+ try:
+ int(token)
+ except:
+ raise ValueError("%s: page range string carries values that are neither integer, nor 'start', nor 'end': %s" % (err_msg_prefix, p_string))
+ if int(token) < 1:
+ raise ValueError("%s: page range string may not carry page numbers <1: %s" % (err_msg_prefix, p_string))
+ start = -1
+ end = -1
+ try:
+ start = int(tokens[0])
+ end = int(tokens[1])
+ except:
+ pass
+ if start > 0 and end > 0 and start > end:
+ raise ValueError("%s: page range starts higher than it ends: %s" % (err_msg_prefix, p_string))
+for filename in args.input_file:
+ if not os.path.isfile(filename):
+ raise ValueError("-i: %s is not a file" % filename)
+ try:
+ with open(filename, 'rb') as file:
+ pypdf.PdfReader(file)
+ except pypdf.errors.PdfStreamError:
+ raise ValueError("-i: cannot interpret %s as PDF file" % filename)
+if args.page_range:
+ for p_string in args.page_range:
+ validate_page_range(p_string, "-p")
+ if len(args.page_range) > len(args.input_file):
+ raise ValueError("more -p arguments than -i arguments")
+if args.crops:
+ for c_string in args.crops:
+ initial_split = c_string.split(':')
+ if len(initial_split) > 2:
+ raise ValueError("-c: cropping string has multiple ':': %s" % c_string)
+ if len(initial_split) > 1:
+ validate_page_range(initial_split[0], "-c")
+ crops = initial_split[1].split(",")
+
+ else:
+ crops = initial_split[0].split(",")
+ if len(crops) != 4:
+ raise ValueError("-c: cropping should contain three ',': %s" % c_string)
+ for crop in crops:
+ try:
+ float(crop)
+ except:
+ raise ValueError("-c: non-number crop in %s" % c_string)
+if args.rotate_page:
+ for r in args.rotate_page:
+ try:
+ int(r)
+ except:
+ raise ValueError("-r: non-integer value: %s" % r)
+ if r < 1:
+ raise ValueError("-r: value must not be <1: %s" % r)
+try:
+ float(args.print_margin)
+except:
+ raise ValueError("-m: non-float value: %s" % arg.print_margin)
# select pages from input files
def parse_page_range(range_string, pages):
if args.page_range and len(args.page_range) > i:
range_string = args.page_range[i]
start_page, end_page = parse_page_range(range_string, reader.pages)
+ if end_page > len(reader.pages): # no need to test start_page cause start_page > end_page is checked above
+ raise ValueError("-p: page range goes beyond pages of input file: %s" % range_string)
for old_page_num in range(start_page, end_page):
new_page_num += 1
page = reader.pages[old_page_num]
pages_to_add += [page]
print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num))
-# if necessary, pad pages to multiple of 8
-if args.nup4:
- mod_to_8 = len(pages_to_add) % 8
- if mod_to_8 > 0:
- print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
- for _ in range(8 - mod_to_8):
- new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
- pages_to_add += [new_page]
+# we can do some more input validations now that we know how many pages output should have
+if args.crops:
+ for c_string in args.crops:
+ initial_split = c_string.split(':')
+ if len(initial_split) > 1:
+ start, end = parse_page_range(initial_split[0], pages_to_add)
+ if end > len(pages_to_add):
+ raise ValueError("-c: page range goes beyond number of pages we're building: %s" % initial_split[0])
+if args.rotate_page:
+ for r in args.rotate_page:
+ if r > len(pages_to_add):
+ raise ValueError("-r: page number beyond number of pages we're building: %d" % r)
# rotate page canvas
if args.rotate_page:
page.add_transformation(pypdf.Transformation().translate(tx=a4_width/2, ty=a4_height/2))
print("-r: rotating (by 90°) page", rotate_page)
+# if necessary, pad pages to multiple of 8
+if args.nup4:
+ mod_to_8 = len(pages_to_add) % 8
+ if mod_to_8 > 0:
+ print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
+ for _ in range(8 - mod_to_8):
+ new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
+ pages_to_add += [new_page]
+
# normalize all pages to portrait A4
for page in pages_to_add:
if "/Rotate" in page:
crops_at_page = [(0,0,0,0)]*len(pages_to_add)
zoom_at_page = [1]*len(pages_to_add)
if args.crops:
- for crops in args.crops:
- initial_split = crops.split(':')
- if len(initial_split) > 1:
- page_range = initial_split[0]
- crops = initial_split[1]
- else:
- page_range = None
- crops = initial_split[0]
- start_page, end_page = parse_page_range(page_range, pages_to_add)
- crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in crops.split(',')]
- crop_left = crop_left_cm * points_per_cm
- crop_bottom = crop_bottom_cm * points_per_cm
- crop_right = crop_right_cm * points_per_cm
- crop_top = crop_top_cm * points_per_cm
- if args.symmetry:
- print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
- else:
- print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
- cropped_width = a4_width - crop_left - crop_right
- cropped_height = a4_height - crop_bottom - crop_top
- zoom = 1
- zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
- zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
- if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
- print("Error: opposing zooms.")
- exit(1)
- elif zoom_horizontal + zoom_vertical > 2:
- zoom = min(zoom_horizontal, zoom_vertical)
- else:
- zoom = max(zoom_horizontal, zoom_vertical)
- for page_num in range(start_page, end_page):
- if args.symmetry and page_num % 2:
- crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
- else:
- crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
- zoom_at_page[page_num] = zoom
+ for crops in args.crops:
+ initial_split = crops.split(':')
+ if len(initial_split) > 1:
+ page_range = initial_split[0]
+ crops = initial_split[1]
+ else:
+ page_range = None
+ crops = initial_split[0]
+ start_page, end_page = parse_page_range(page_range, pages_to_add)
+ crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in crops.split(',')]
+ crop_left = crop_left_cm * points_per_cm
+ crop_bottom = crop_bottom_cm * points_per_cm
+ crop_right = crop_right_cm * points_per_cm
+ crop_top = crop_top_cm * points_per_cm
+ if args.symmetry:
+ print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
+ else:
+ print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
+ cropped_width = a4_width - crop_left - crop_right
+ cropped_height = a4_height - crop_bottom - crop_top
+ zoom = 1
+ zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
+ zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
+ if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
+ raise ValueError("crops would create opposing zoom directions")
+ elif zoom_horizontal + zoom_vertical > 2:
+ zoom = min(zoom_horizontal, zoom_vertical)
+ else:
+ zoom = max(zoom_horizontal, zoom_vertical)
+ for page_num in range(start_page, end_page):
+ if args.symmetry and page_num % 2:
+ crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
+ else:
+ crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
+ zoom_at_page[page_num] = zoom
writer = pypdf.PdfWriter()
if not args.nup4: