home · contact · privacy
Allow page ranges beyond input, pad output with blank pages.
[bookmaker] / bookmaker.py
1 #!/usr/bin/env python3
2 """
3 bookmaker.py is a helper for optimizing PDFs for the production of small self-printed, self-bound physical books.  Towards this goal it offers various PDF manipulation options that may also be used indepéndently and for other purposes.
4 """
5 import argparse
6 import io
7 import os
8 import sys
9
10 def handled_error_exit(msg):
11     print(f"ERROR: {msg}")
12     sys.exit(1)
13
14 try:
15     import pypdf
16 except ImportError:
17     handled_error_exit("Can't run at all without pypdf installed.")
18
19 # some general paper geometry constants
20 POINTS_PER_CM = 10 * 72 / 25.4
21 A4_WIDTH = 21 * POINTS_PER_CM
22 A4_HEIGHT = 29.7 * POINTS_PER_CM
23 A4 = (A4_WIDTH, A4_HEIGHT)
24
25 # constants specifically for --nup4
26 A4_HALF_WIDTH = A4_WIDTH / 2
27 A4_HALF_HEIGHT = A4_HEIGHT / 2
28 CUT_DEPTH = 1.95 * POINTS_PER_CM
29 CUT_WIDTH = 1.05 * POINTS_PER_CM
30 MIDDLE_POINT_DEPTH = 0.4 * POINTS_PER_CM
31 INNER_SPINE_MARGIN_PER_PAGE = 1 * POINTS_PER_CM
32 QUARTER_SCALE_FACTOR = 0.5
33 PAGE_ORDER_FOR_NUP4 = (3,0,7,4,1,2,5,6)
34
35
36 class PageCrop:
37
38     def __init__(self, left_cm=0, bottom_cm=0, right_cm=0, top_cm=0):
39         self.left_cm = left_cm
40         self.bottom_cm = bottom_cm
41         self.right_cm = right_cm
42         self.top_cm = top_cm
43         self.left = float(self.left_cm) * POINTS_PER_CM
44         self.bottom = float(self.bottom_cm) * POINTS_PER_CM
45         self.right = float(self.right_cm) * POINTS_PER_CM
46         self.top = float(self.top_cm) * POINTS_PER_CM
47         zoom_horizontal = A4_WIDTH / (A4_WIDTH - self.left - self.right)
48         zoom_vertical = A4_HEIGHT / (A4_HEIGHT - self.bottom - self.top)
49         if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
50             raise HandledException("-c: crops would create opposing zoom directions")
51         elif zoom_horizontal + zoom_vertical > 2:
52             self.zoom = min(zoom_horizontal, zoom_vertical)
53         else:
54             self.zoom = max(zoom_horizontal, zoom_vertical)
55
56     def __str__(self):
57         return str(vars(self))
58
59     @property
60     def format_in_cm(self):
61         return f"left {self.left_cm}cm, bottom {self.bottom_cm}cm, right {self.right_cm}cm, top {self.top_cm}cm"
62
63     @property
64     def remaining_width(self):
65         return A4_WIDTH - self.left - self.right
66
67     @property
68     def remaining_height(self):
69         return A4_HEIGHT - self.bottom - self.top
70
71     def give_mirror(self):
72         return PageCrop(left_cm=self.right_cm, bottom_cm=self.bottom_cm, right_cm=self.left_cm, top_cm=self.top_cm)
73
74
75 class Nup4Geometry:
76
77     def __init__(self, margin_cm):
78         self.margin = margin_cm * POINTS_PER_CM
79         self.shrink_for_margin = (A4_WIDTH - 2 * self.margin)/A4_WIDTH
80         # NB: We define spine size un-shrunk, but .shrink_for_spine is used with values shrunk for the margin, which we undo here.
81         spine_part_of_page = (INNER_SPINE_MARGIN_PER_PAGE / A4_HALF_WIDTH) / self.shrink_for_margin
82         self.shrink_for_spine = 1 - spine_part_of_page
83
84
85 class HandledException(Exception):
86     pass
87
88
89 def parse_args():
90     help_epilogue = "See README.txt for detailed usage instructions, command examples, etc."
91     parser = argparse.ArgumentParser(description=__doc__, epilog=help_epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
92     parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
93     parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
94     parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
95     parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
96     parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
97     parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
98     parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
99     parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
100     parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
101     return parser.parse_args()
102
103
104 def validate_inputs_first_pass(args):
105     for filename in args.input_file:
106         if not os.path.isfile(filename):
107             raise HandledException(f"-i: {filename} is not a file")
108         try:
109             with open(filename, 'rb') as file:
110                 pypdf.PdfReader(file)
111         except pypdf.errors.PdfStreamError:
112             raise HandledException(f"-i: cannot interpret {filename} as PDF file")
113     if args.page_range:
114         for p_string in args.page_range:
115             validate_page_range(p_string, "-p")
116         if len(args.page_range) > len(args.input_file):
117             raise HandledException("-p: more --page_range arguments than --input_file arguments")
118     if args.crops:
119         for c_string in args.crops:
120             initial_split = c_string.split(':')
121             if len(initial_split) > 2:
122                 raise HandledException(f"-c: cropping string has multiple ':': {c_string}")
123             page_range, crops = split_crops_string(c_string)
124             crops = crops.split(",")
125             if page_range:
126                 validate_page_range(page_range, "-c")
127             if len(crops) != 4:
128                 raise HandledException(f"-c: cropping does not contain exactly three ',': {c_string}")
129             for crop in crops:
130                 try:
131                     float(crop)
132                 except ValueError:
133                     raise HandledException(f"-c: non-number crop in: {c_string}")
134     if args.rotate_page:
135         for r in args.rotate_page:
136             try:
137                 int(r)
138             except ValueError:
139                 raise HandledException(f"-r: non-integer value: {r}")
140             if r < 1:
141                 raise HandledException(f"-r: value must not be <1: {r}")
142     try:
143         float(args.print_margin)
144     except ValueError:
145         raise HandledException(f"-m: non-float value: {arg.print_margin}")
146
147
148 def validate_page_range(p_string, err_msg_prefix):
149     prefix = f"{err_msg_prefix}: page range string"
150     if '-' not in p_string:
151         raise HandledException(f"{prefix} lacks '-': {p_string}")
152     tokens = p_string.split("-")
153     if len(tokens) > 2:
154         raise HandledException(f"{prefix} has too many '-': {p_string}")
155     for i, token in enumerate(tokens):
156         if token == "":
157             continue
158         if i == 0 and token == "start":
159             continue
160         if i == 1 and token == "end":
161             continue
162         try:
163             int(token)
164         except ValueError:
165             raise HandledException(f"{prefix} carries value neither integer, nor 'start', nor 'end': {p_string}")
166         if int(token) < 1:
167             raise HandledException(f"{prefix} carries page number <1: {p_string}")
168     start = -1
169     end = -1
170     try:
171         start = int(tokens[0])
172         end = int(tokens[1])
173     except ValueError:
174         pass
175     if start > 0 and end > 0 and start > end:
176         raise HandledException(f"{prefix} has higher start than end value: {p_string}")
177
178
179 def split_crops_string(c_string):
180     initial_split = c_string.split(':')
181     if len(initial_split) > 1:
182         page_range = initial_split[0]
183         crops = initial_split[1]
184     else:
185         page_range = None
186         crops = initial_split[0]
187     return page_range, crops
188
189
190 def parse_page_range(range_string, pages):
191     start_page = 0
192     end_page = len(pages)
193     if range_string:
194         start, end = range_string.split('-')
195         if not (len(start) == 0 or start == "start"):
196             start_page = int(start) - 1
197         if not (len(end) == 0 or end == "end"):
198             end_page = int(end)
199     return start_page, end_page
200
201
202 def read_inputs_to_pagelist(args_input_file, args_page_range):
203     pages_to_add = []
204     opened_files = []
205     new_page_num = 0
206     for i, input_file in enumerate(args_input_file):
207         file = open(input_file, 'rb')
208         opened_files += [file]
209         reader = pypdf.PdfReader(file)
210         range_string = None
211         if args_page_range and len(args_page_range) > i:
212             range_string = args_page_range[i]
213         start_page, end_page = parse_page_range(range_string, reader.pages)
214         for old_page_num in range(start_page, end_page):
215             new_page_num += 1
216             if old_page_num >= len(reader.pages):
217                 page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
218             else:
219                 page = reader.pages[old_page_num]
220             pages_to_add += [page]
221             print(f"-i, -p: read in {input_file} page number {old_page_num+1} as new page {new_page_num}")
222     return pages_to_add, opened_files
223
224
225 def validate_inputs_second_pass(args, pages_to_add):
226     if args.crops:
227         for c_string in args.crops:
228             page_range, _= split_crops_string(c_string)
229             if page_range:
230                 start, end = parse_page_range(page_range, pages_to_add)
231                 if end > len(pages_to_add):
232                      raise HandledException(f"-c: page range goes beyond number of pages we're building: {page_range}")
233     if args.rotate_page:
234         for r in args.rotate_page:
235             if r > len(pages_to_add):
236                  raise HandledException(f"-r: page number beyond number of pages we're building: {r}")
237
238
239 def rotate_pages(args_rotate_page, pages_to_add):
240     if args_rotate_page:
241         for rotate_page in args_rotate_page:
242             page = pages_to_add[rotate_page - 1]
243             page.add_transformation(pypdf.Transformation().translate(tx=-A4_WIDTH/2, ty=-A4_HEIGHT/2))
244             page.add_transformation(pypdf.Transformation().rotate(-90))
245             page.add_transformation(pypdf.Transformation().translate(tx=A4_WIDTH/2, ty=A4_HEIGHT/2))
246             print(f"-r: rotating (by 90°) page {rotate_page}")
247
248
249 def pad_pages_to_multiple_of_8(pages_to_add):
250     mod_to_8 = len(pages_to_add) % 8
251     if mod_to_8 > 0:
252         old_len = len(pages_to_add)
253         for _ in range(8 - mod_to_8):
254             new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
255             pages_to_add += [new_page]
256         print(f"-n: number of input pages {old_len} not required multiple of 8, padded to {len(pages_to_add)}")
257
258
259 def normalize_pages_to_A4(pages_to_add):
260     for page in pages_to_add:
261         if "/Rotate" in page:  # TODO: preserve rotation, but in canvas?
262             page.rotate(360 - page["/Rotate"])
263         page.mediabox.left = 0
264         page.mediabox.bottom = 0
265         page.mediabox.top = A4_HEIGHT
266         page.mediabox.right = A4_WIDTH
267         page.cropbox = page.mediabox
268
269
270 def collect_per_page_crops_and_zooms(args_crops, args_symmetry, pages_to_add):
271     crop_at_page = [PageCrop()] * len(pages_to_add)
272     if args_crops:
273         for c_string in args_crops:
274             page_range, crops = split_crops_string(c_string)
275             start_page, end_page = parse_page_range(page_range, pages_to_add)
276             prefix = "-c, -t" if args_symmetry else "-c"
277             suffix = " (but alternating left and right crop between even and odd pages)" if args_symmetry else ""
278             page_crop = PageCrop(*[x for x in crops.split(',')])
279             print(f"{prefix}: to pages {start_page + 1} to {end_page} applying crop: {page_crop.format_in_cm}{suffix}")
280             for page_num in range(start_page, end_page):
281                 if args_symmetry and page_num % 2:
282                     crop_at_page[page_num] = page_crop.give_mirror()
283                 else:
284                     crop_at_page[page_num] = page_crop
285     return crop_at_page
286
287
288 def build_single_pages_output(writer, pages_to_add, crop_at_page):
289     print("building 1-input-page-per-output-page book")
290     odd_page = True
291     for i, page in enumerate(pages_to_add):
292         page.add_transformation(pypdf.Transformation().translate(tx=-crop_at_page[i].left, ty=-crop_at_page[i].bottom))
293         page.add_transformation(pypdf.Transformation().scale(crop_at_page[i].zoom, crop_at_page[i].zoom))
294         page.mediabox.right = crop_at_page[i].remaining_width * crop_at_page[i].zoom
295         page.mediabox.top = crop_at_page[i].remaining_height * crop_at_page[i].zoom
296         writer.add_page(page)
297         odd_page = not odd_page
298         print(f"built page number {i+1} (of {len(pages_to_add)})")
299
300
301 def build_nup4_output(writer, pages_to_add, crop_at_page, args_print_margin, args_analyze, canvas_class):
302     print("-n: building 4-input-pages-per-output-page book")
303     print(f"-m: applying printable-area margin of {args_print_margin}cm")
304     if args_analyze:
305         print("-a: drawing page borders, spine limits")
306     nup4_geometry = Nup4Geometry(args_print_margin)
307     pages_to_add, new_i_order = resort_pages_for_nup4(pages_to_add)
308     nup4_i = 0
309     page_count = 0
310     is_front_page = True
311     for i, page in enumerate(pages_to_add):
312         if nup4_i == 0:
313             new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
314         corrected_i = new_i_order[i]
315         nup4_inner_page_transform(page, crop_at_page[corrected_i], nup4_geometry, nup4_i)
316         nup4_outer_page_transform(page, nup4_geometry, nup4_i)
317         new_page.merge_page(page)
318         page_count += 1
319         print(f"merged page number {page_count} (of {len(pages_to_add)})")
320         nup4_i += 1
321         if nup4_i > 3:
322             ornate_nup4(writer, args_analyze, is_front_page, new_page, nup4_geometry, canvas_class)
323             writer.add_page(new_page)
324             nup4_i = 0
325             is_front_page = not is_front_page
326
327
328 def resort_pages_for_nup4(pages_to_add):
329     new_page_order = []
330     new_i_order = []
331     eight_pack = []
332     i = 0
333     n_eights = 0
334     for page in pages_to_add:
335         if i == 0:
336             eight_pack = []
337         eight_pack += [page]
338         i += 1
339         if i == 8:
340             i = 0
341             for n in PAGE_ORDER_FOR_NUP4:
342                 new_i_order += [8 * n_eights + n]
343                 new_page_order += [eight_pack[n]]
344             n_eights += 1
345     return new_page_order, new_i_order
346
347
348 def nup4_inner_page_transform(page, crop, nup4_geometry, nup4_i):
349     page.add_transformation(pypdf.Transformation().translate(ty=(A4_HEIGHT / crop.zoom - (A4_HEIGHT - crop.top))))
350     if nup4_i == 0 or nup4_i == 2:
351         page.add_transformation(pypdf.Transformation().translate(tx=-crop.left))
352     elif nup4_i == 1 or nup4_i == 3:
353         page.add_transformation(pypdf.Transformation().translate(tx=(A4_WIDTH / crop.zoom - (A4_WIDTH - crop.right))))
354     page.add_transformation(pypdf.Transformation().scale(crop.zoom * nup4_geometry.shrink_for_spine, crop.zoom * nup4_geometry.shrink_for_spine))
355     if nup4_i == 2 or nup4_i == 3:
356         page.add_transformation(pypdf.Transformation().translate(ty=-2*nup4_geometry.margin/nup4_geometry.shrink_for_margin))
357
358
359 def nup4_outer_page_transform(page, nup4_geometry, nup4_i):
360     page.add_transformation(pypdf.Transformation().translate(ty=(1-nup4_geometry.shrink_for_spine)*A4_HEIGHT))
361     if nup4_i == 0 or nup4_i == 1:
362         y_section = A4_HEIGHT
363         page.mediabox.bottom = A4_HALF_HEIGHT
364         page.mediabox.top    = A4_HEIGHT
365     if nup4_i == 2 or nup4_i == 3:
366         y_section = 0
367         page.mediabox.bottom = 0
368         page.mediabox.top  =  A4_HALF_HEIGHT
369     if nup4_i == 0 or nup4_i == 2:
370         x_section = 0
371         page.mediabox.left   = 0
372         page.mediabox.right  = A4_HALF_WIDTH
373     if nup4_i == 1 or nup4_i == 3:
374         page.add_transformation(pypdf.Transformation().translate(tx=(1-nup4_geometry.shrink_for_spine)*A4_WIDTH))
375         x_section = A4_WIDTH
376         page.mediabox.left   = A4_HALF_WIDTH
377         page.mediabox.right  = A4_WIDTH
378     page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
379     page.add_transformation(pypdf.Transformation().scale(QUARTER_SCALE_FACTOR, QUARTER_SCALE_FACTOR))
380
381
382 def ornate_nup4(writer, args_analyze, is_front_page, new_page, nup4_geometry, canvas_class):
383     if args_analyze:
384         # borders
385         packet = io.BytesIO()
386         c = canvas_class(packet, pagesize=A4)
387         c.setLineWidth(0.1)
388         c.line(0, A4_HEIGHT, A4_WIDTH, A4_HEIGHT)
389         c.line(0, A4_HALF_HEIGHT, A4_WIDTH, A4_HALF_HEIGHT)
390         c.line(0, 0, A4_WIDTH, 0)
391         c.line(0, A4_HEIGHT, 0, 0)
392         c.line(A4_HALF_WIDTH, A4_HEIGHT, A4_HALF_WIDTH, 0)
393         c.line(A4_WIDTH, A4_HEIGHT, A4_WIDTH, 0)
394         c.save()
395         new_pdf = pypdf.PdfReader(packet)
396         new_page.merge_page(new_pdf.pages[0])
397     printable_offset_x = nup4_geometry.margin
398     printable_offset_y = nup4_geometry.margin * A4_HEIGHT / A4_WIDTH
399     new_page.add_transformation(pypdf.Transformation().scale(nup4_geometry.shrink_for_margin, nup4_geometry.shrink_for_margin))
400     new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
401     x_left_spine_limit = A4_HALF_WIDTH * nup4_geometry.shrink_for_spine
402     x_right_spine_limit = A4_WIDTH - x_left_spine_limit
403     if args_analyze or is_front_page:
404         packet = io.BytesIO()
405         c = canvas_class(packet, pagesize=A4)
406     if args_analyze:
407         # spine lines
408         c.setLineWidth(0.1)
409         c.line(x_left_spine_limit, A4_HEIGHT, x_left_spine_limit, 0)
410         c.line(x_right_spine_limit, A4_HEIGHT, x_right_spine_limit, 0)
411     if is_front_page:
412         c.setLineWidth(0.2)
413         draw_cut(c, x_left_spine_limit, (1))
414         draw_cut(c, x_right_spine_limit, (-1))
415     if args_analyze or is_front_page:
416         c.save()
417         new_pdf = pypdf.PdfReader(packet)
418         new_page.merge_page(new_pdf.pages[0])
419
420
421 def draw_cut(canvas, x_spine_limit, direction):
422     outer_start_x = x_spine_limit - 0.5 * CUT_WIDTH * direction
423     inner_start_x = x_spine_limit + 0.5 * CUT_WIDTH * direction
424     middle_point_y =  A4_HALF_HEIGHT + MIDDLE_POINT_DEPTH * direction
425     end_point_y =  A4_HALF_HEIGHT + CUT_DEPTH * direction
426     canvas.line(inner_start_x, A4_HALF_HEIGHT, x_spine_limit, end_point_y)
427     canvas.line(x_spine_limit, end_point_y, x_spine_limit, middle_point_y)
428     canvas.line(x_spine_limit, middle_point_y, outer_start_x, A4_HALF_HEIGHT)
429
430
431 def main():
432     args = parse_args()
433     validate_inputs_first_pass(args)
434     if args.nup4:
435         try:
436             from reportlab.pdfgen.canvas import Canvas
437         except ImportError:
438             raise HandledException("-n: need reportlab.pdfgen.canvas installed for --nup4")
439     pages_to_add, opened_files = read_inputs_to_pagelist(args.input_file, args.page_range)
440     validate_inputs_second_pass(args, pages_to_add)
441     rotate_pages(args.rotate_page, pages_to_add)
442     if args.nup4:
443         pad_pages_to_multiple_of_8(pages_to_add)
444     normalize_pages_to_A4(pages_to_add)
445     crop_at_page = collect_per_page_crops_and_zooms(args.crops, args.symmetry, pages_to_add)
446     writer = pypdf.PdfWriter()
447     if args.nup4:
448         build_nup4_output(writer, pages_to_add, crop_at_page, args.print_margin, args.analyze, Canvas)
449     else:
450         build_single_pages_output(writer, pages_to_add, crop_at_page)
451     for file in opened_files:
452         file.close()
453     with open(args.output_file, 'wb') as output_file:
454         writer.write(output_file)
455
456
457 if __name__ == "__main__":
458     try:
459         main()
460     except HandledException as e:
461         handled_error_exit(e)