home · contact · privacy
Move majority of documentation into README.
[bookmaker] / bookmaker.py
1 #!/usr/bin/env python3
2 """
3 bookmaker.py is a helper for optimizing PDFs for the production of small self-printed, self-bound physical books.  Towards this goal it offers various PDF manipulation options that may also be used indepéndently and for other purposes.
4 """
5 import argparse
6 import io
7 import os
8 import sys
9
10 def handled_error_exit(msg):
11     print(f"ERROR: {msg}")
12     sys.exit(1)
13
14 try:
15     import pypdf
16 except ImportError:
17     handled_error_exit("Can't run at all without pypdf installed.")
18
19 # some general paper geometry constants
20 POINTS_PER_CM = 10 * 72 / 25.4
21 A4_WIDTH = 21 * POINTS_PER_CM
22 A4_HEIGHT = 29.7 * POINTS_PER_CM
23 A4 = (A4_WIDTH, A4_HEIGHT)
24
25 # constants specifically for --nup4
26 A4_HALF_WIDTH = A4_WIDTH / 2
27 A4_HALF_HEIGHT = A4_HEIGHT / 2
28 CUT_DEPTH = 1.95 * POINTS_PER_CM
29 CUT_WIDTH = 1.05 * POINTS_PER_CM
30 MIDDLE_POINT_DEPTH = 0.4 * POINTS_PER_CM
31 INNER_SPINE_MARGIN_PER_PAGE = 1 * POINTS_PER_CM
32 QUARTER_SCALE_FACTOR = 0.5
33 PAGE_ORDER_FOR_NUP4 = (3,0,7,4,1,2,5,6)
34
35
36 class PageCrop:
37
38     def __init__(self, left_cm=0, bottom_cm=0, right_cm=0, top_cm=0):
39         self.left_cm = left_cm
40         self.bottom_cm = bottom_cm
41         self.right_cm = right_cm
42         self.top_cm = top_cm
43         self.left = float(self.left_cm) * POINTS_PER_CM
44         self.bottom = float(self.bottom_cm) * POINTS_PER_CM
45         self.right = float(self.right_cm) * POINTS_PER_CM
46         self.top = float(self.top_cm) * POINTS_PER_CM
47         zoom_horizontal = A4_WIDTH / (A4_WIDTH - self.left - self.right)
48         zoom_vertical = A4_HEIGHT / (A4_HEIGHT - self.bottom - self.top)
49         if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
50             raise HandledException("-c: crops would create opposing zoom directions")
51         elif zoom_horizontal + zoom_vertical > 2:
52             self.zoom = min(zoom_horizontal, zoom_vertical)
53         else:
54             self.zoom = max(zoom_horizontal, zoom_vertical)
55
56     def __str__(self):
57         return str(vars(self))
58
59     @property
60     def format_in_cm(self):
61         return f"left {self.left_cm}cm, bottom {self.bottom_cm}cm, right {self.right_cm}cm, top {self.top_cm}cm"
62
63     @property
64     def remaining_width(self):
65         return A4_WIDTH - self.left - self.right
66
67     @property
68     def remaining_height(self):
69         return A4_HEIGHT - self.bottom - self.top
70
71     def give_mirror(self):
72         return PageCrop(left_cm=self.right_cm, bottom_cm=self.bottom_cm, right_cm=self.left_cm, top_cm=self.top_cm)
73
74
75 class Nup4Geometry:
76
77     def __init__(self, margin_cm):
78         self.margin = margin_cm * POINTS_PER_CM
79         self.shrink_for_margin = (A4_WIDTH - 2 * self.margin)/A4_WIDTH
80         # NB: We define spine size un-shrunk, but .shrink_for_spine is used with values shrunk for the margin, which we undo here.
81         spine_part_of_page = (INNER_SPINE_MARGIN_PER_PAGE / A4_HALF_WIDTH) / self.shrink_for_margin
82         self.shrink_for_spine = 1 - spine_part_of_page
83
84
85 class HandledException(Exception):
86     pass
87
88
89 def parse_args():
90     help_epilogue = "See README.txt for detailed usage instructions, command examples, etc."
91     parser = argparse.ArgumentParser(description=__doc__, epilog=help_epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
92     parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
93     parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
94     parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
95     parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
96     parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
97     parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
98     parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
99     parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
100     parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
101     return parser.parse_args()
102
103
104 def validate_inputs_first_pass(args):
105     for filename in args.input_file:
106         if not os.path.isfile(filename):
107             raise HandledException(f"-i: {filename} is not a file")
108         try:
109             with open(filename, 'rb') as file:
110                 pypdf.PdfReader(file)
111         except pypdf.errors.PdfStreamError:
112             raise HandledException(f"-i: cannot interpret {filename} as PDF file")
113     if args.page_range:
114         for p_string in args.page_range:
115             validate_page_range(p_string, "-p")
116         if len(args.page_range) > len(args.input_file):
117             raise HandledException("-p: more --page_range arguments than --input_file arguments")
118     if args.crops:
119         for c_string in args.crops:
120             initial_split = c_string.split(':')
121             if len(initial_split) > 2:
122                 raise HandledException(f"-c: cropping string has multiple ':': {c_string}")
123             page_range, crops = split_crops_string(c_string)
124             crops = crops.split(",")
125             if page_range:
126                 validate_page_range(page_range, "-c")
127             if len(crops) != 4:
128                 raise HandledException(f"-c: cropping does not contain exactly three ',': {c_string}")
129             for crop in crops:
130                 try:
131                     float(crop)
132                 except ValueError:
133                     raise HandledException(f"-c: non-number crop in: {c_string}")
134     if args.rotate_page:
135         for r in args.rotate_page:
136             try:
137                 int(r)
138             except ValueError:
139                 raise HandledException(f"-r: non-integer value: {r}")
140             if r < 1:
141                 raise HandledException(f"-r: value must not be <1: {r}")
142     try:
143         float(args.print_margin)
144     except ValueError:
145         raise HandledException(f"-m: non-float value: {arg.print_margin}")
146
147
148 def validate_page_range(p_string, err_msg_prefix):
149     prefix = f"{err_msg_prefix}: page range string"
150     if '-' not in p_string:
151         raise HandledException(f"{prefix} lacks '-': {p_string}")
152     tokens = p_string.split("-")
153     if len(tokens) > 2:
154         raise HandledException(f"{prefix} has too many '-': {p_string}")
155     for i, token in enumerate(tokens):
156         if token == "":
157             continue
158         if i == 0 and token == "start":
159             continue
160         if i == 1 and token == "end":
161             continue
162         try:
163             int(token)
164         except ValueError:
165             raise HandledException(f"{prefix} carries value neither integer, nor 'start', nor 'end': {p_string}")
166         if int(token) < 1:
167             raise HandledException(f"{prefix} carries page number <1: {p_string}")
168     start = -1
169     end = -1
170     try:
171         start = int(tokens[0])
172         end = int(tokens[1])
173     except ValueError:
174         pass
175     if start > 0 and end > 0 and start > end:
176         raise HandledException(f"{prefix} has higher start than end value: {p_string}")
177
178
179 def split_crops_string(c_string):
180     initial_split = c_string.split(':')
181     if len(initial_split) > 1:
182         page_range = initial_split[0]
183         crops = initial_split[1]
184     else:
185         page_range = None
186         crops = initial_split[0]
187     return page_range, crops
188
189
190 def parse_page_range(range_string, pages):
191     start_page = 0
192     end_page = len(pages)
193     if range_string:
194         start, end = range_string.split('-')
195         if not (len(start) == 0 or start == "start"):
196             start_page = int(start) - 1
197         if not (len(end) == 0 or end == "end"):
198             end_page = int(end)
199     return start_page, end_page
200
201
202 def read_inputs_to_pagelist(args_input_file, args_page_range):
203     pages_to_add = []
204     opened_files = []
205     new_page_num = 0
206     for i, input_file in enumerate(args_input_file):
207         file = open(input_file, 'rb')
208         opened_files += [file]
209         reader = pypdf.PdfReader(file)
210         range_string = None
211         if args_page_range and len(args_page_range) > i:
212             range_string = args_page_range[i]
213         start_page, end_page = parse_page_range(range_string, reader.pages)
214         if end_page > len(reader.pages):  # no need to test start_page cause start_page > end_page is checked above
215             raise HandledException(f"-p: page range goes beyond pages of input file: {range_string}")
216         for old_page_num in range(start_page, end_page):
217             new_page_num += 1
218             page = reader.pages[old_page_num]
219             pages_to_add += [page]
220             print(f"-i, -p: read in {input_file} page number {old_page_num+1} as new page {new_page_num}")
221     return pages_to_add, opened_files
222
223
224 def validate_inputs_second_pass(args, pages_to_add):
225     if args.crops:
226         for c_string in args.crops:
227             page_range, _= split_crops_string(c_string)
228             if page_range:
229                 start, end = parse_page_range(page_range, pages_to_add)
230                 if end > len(pages_to_add):
231                      raise HandledException(f"-c: page range goes beyond number of pages we're building: {page_range}")
232     if args.rotate_page:
233         for r in args.rotate_page:
234             if r > len(pages_to_add):
235                  raise HandledException(f"-r: page number beyond number of pages we're building: {r}")
236
237
238 def rotate_pages(args_rotate_page, pages_to_add):
239     if args_rotate_page:
240         for rotate_page in args_rotate_page:
241             page = pages_to_add[rotate_page - 1]
242             page.add_transformation(pypdf.Transformation().translate(tx=-A4_WIDTH/2, ty=-A4_HEIGHT/2))
243             page.add_transformation(pypdf.Transformation().rotate(-90))
244             page.add_transformation(pypdf.Transformation().translate(tx=A4_WIDTH/2, ty=A4_HEIGHT/2))
245             print(f"-r: rotating (by 90°) page {rotate_page}")
246
247
248 def pad_pages_to_multiple_of_8(pages_to_add):
249     mod_to_8 = len(pages_to_add) % 8
250     if mod_to_8 > 0:
251         old_len = len(pages_to_add)
252         for _ in range(8 - mod_to_8):
253             new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
254             pages_to_add += [new_page]
255         print(f"-n: number of input pages {old_len} not required multiple of 8, padded to {len(pages_to_add)}")
256
257
258 def normalize_pages_to_A4(pages_to_add):
259     for page in pages_to_add:
260         if "/Rotate" in page:  # TODO: preserve rotation, but in canvas?
261             page.rotate(360 - page["/Rotate"])
262         page.mediabox.left = 0
263         page.mediabox.bottom = 0
264         page.mediabox.top = A4_HEIGHT
265         page.mediabox.right = A4_WIDTH
266         page.cropbox = page.mediabox
267
268
269 def collect_per_page_crops_and_zooms(args_crops, args_symmetry, pages_to_add):
270     crop_at_page = [PageCrop()] * len(pages_to_add)
271     if args_crops:
272         for c_string in args_crops:
273             page_range, crops = split_crops_string(c_string)
274             start_page, end_page = parse_page_range(page_range, pages_to_add)
275             prefix = "-c, -t" if args_symmetry else "-c"
276             suffix = " (but alternating left and right crop between even and odd pages)" if args_symmetry else ""
277             page_crop = PageCrop(*[x for x in crops.split(',')])
278             print(f"{prefix}: to pages {start_page + 1} to {end_page} applying crop: {page_crop.format_in_cm}{suffix}")
279             for page_num in range(start_page, end_page):
280                 if args_symmetry and page_num % 2:
281                     crop_at_page[page_num] = page_crop.give_mirror()
282                 else:
283                     crop_at_page[page_num] = page_crop
284     return crop_at_page
285
286
287 def build_single_pages_output(writer, pages_to_add, crop_at_page):
288     print("building 1-input-page-per-output-page book")
289     odd_page = True
290     for i, page in enumerate(pages_to_add):
291         page.add_transformation(pypdf.Transformation().translate(tx=-crop_at_page[i].left, ty=-crop_at_page[i].bottom))
292         page.add_transformation(pypdf.Transformation().scale(crop_at_page[i].zoom, crop_at_page[i].zoom))
293         page.mediabox.right = crop_at_page[i].remaining_width * crop_at_page[i].zoom
294         page.mediabox.top = crop_at_page[i].remaining_height * crop_at_page[i].zoom
295         writer.add_page(page)
296         odd_page = not odd_page
297         print(f"built page number {i+1} (of {len(pages_to_add)})")
298
299
300 def build_nup4_output(writer, pages_to_add, crop_at_page, args_print_margin, args_analyze, canvas_class):
301     print("-n: building 4-input-pages-per-output-page book")
302     print(f"-m: applying printable-area margin of {args_print_margin}cm")
303     if args_analyze:
304         print("-a: drawing page borders, spine limits")
305     nup4_geometry = Nup4Geometry(args_print_margin)
306     pages_to_add, new_i_order = resort_pages_for_nup4(pages_to_add)
307     nup4_i = 0
308     page_count = 0
309     is_front_page = True
310     for i, page in enumerate(pages_to_add):
311         if nup4_i == 0:
312             new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
313         corrected_i = new_i_order[i]
314         nup4_inner_page_transform(page, crop_at_page[corrected_i], nup4_geometry, nup4_i)
315         nup4_outer_page_transform(page, nup4_geometry, nup4_i)
316         new_page.merge_page(page)
317         page_count += 1
318         print(f"merged page number {page_count} (of {len(pages_to_add)})")
319         nup4_i += 1
320         if nup4_i > 3:
321             ornate_nup4(writer, args_analyze, is_front_page, new_page, nup4_geometry, canvas_class)
322             writer.add_page(new_page)
323             nup4_i = 0
324             is_front_page = not is_front_page
325
326
327 def resort_pages_for_nup4(pages_to_add):
328     new_page_order = []
329     new_i_order = []
330     eight_pack = []
331     i = 0
332     n_eights = 0
333     for page in pages_to_add:
334         if i == 0:
335             eight_pack = []
336         eight_pack += [page]
337         i += 1
338         if i == 8:
339             i = 0
340             for n in PAGE_ORDER_FOR_NUP4:
341                 new_i_order += [8 * n_eights + n]
342                 new_page_order += [eight_pack[n]]
343             n_eights += 1
344     return new_page_order, new_i_order
345
346
347 def nup4_inner_page_transform(page, crop, nup4_geometry, nup4_i):
348     page.add_transformation(pypdf.Transformation().translate(ty=(A4_HEIGHT / crop.zoom - (A4_HEIGHT - crop.top))))
349     if nup4_i == 0 or nup4_i == 2:
350         page.add_transformation(pypdf.Transformation().translate(tx=-crop.left))
351     elif nup4_i == 1 or nup4_i == 3:
352         page.add_transformation(pypdf.Transformation().translate(tx=(A4_WIDTH / crop.zoom - (A4_WIDTH - crop.right))))
353     page.add_transformation(pypdf.Transformation().scale(crop.zoom * nup4_geometry.shrink_for_spine, crop.zoom * nup4_geometry.shrink_for_spine))
354     if nup4_i == 2 or nup4_i == 3:
355         page.add_transformation(pypdf.Transformation().translate(ty=-2*nup4_geometry.margin/nup4_geometry.shrink_for_margin))
356
357
358 def nup4_outer_page_transform(page, nup4_geometry, nup4_i):
359     page.add_transformation(pypdf.Transformation().translate(ty=(1-nup4_geometry.shrink_for_spine)*A4_HEIGHT))
360     if nup4_i == 0 or nup4_i == 1:
361         y_section = A4_HEIGHT
362         page.mediabox.bottom = A4_HALF_HEIGHT
363         page.mediabox.top    = A4_HEIGHT
364     if nup4_i == 2 or nup4_i == 3:
365         y_section = 0
366         page.mediabox.bottom = 0
367         page.mediabox.top  =  A4_HALF_HEIGHT
368     if nup4_i == 0 or nup4_i == 2:
369         x_section = 0
370         page.mediabox.left   = 0
371         page.mediabox.right  = A4_HALF_WIDTH
372     if nup4_i == 1 or nup4_i == 3:
373         page.add_transformation(pypdf.Transformation().translate(tx=(1-nup4_geometry.shrink_for_spine)*A4_WIDTH))
374         x_section = A4_WIDTH
375         page.mediabox.left   = A4_HALF_WIDTH
376         page.mediabox.right  = A4_WIDTH
377     page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
378     page.add_transformation(pypdf.Transformation().scale(QUARTER_SCALE_FACTOR, QUARTER_SCALE_FACTOR))
379
380
381 def ornate_nup4(writer, args_analyze, is_front_page, new_page, nup4_geometry, canvas_class):
382     if args_analyze:
383         # borders
384         packet = io.BytesIO()
385         c = canvas_class(packet, pagesize=A4)
386         c.setLineWidth(0.1)
387         c.line(0, A4_HEIGHT, A4_WIDTH, A4_HEIGHT)
388         c.line(0, A4_HALF_HEIGHT, A4_WIDTH, A4_HALF_HEIGHT)
389         c.line(0, 0, A4_WIDTH, 0)
390         c.line(0, A4_HEIGHT, 0, 0)
391         c.line(A4_HALF_WIDTH, A4_HEIGHT, A4_HALF_WIDTH, 0)
392         c.line(A4_WIDTH, A4_HEIGHT, A4_WIDTH, 0)
393         c.save()
394         new_pdf = pypdf.PdfReader(packet)
395         new_page.merge_page(new_pdf.pages[0])
396     printable_offset_x = nup4_geometry.margin
397     printable_offset_y = nup4_geometry.margin * A4_HEIGHT / A4_WIDTH
398     new_page.add_transformation(pypdf.Transformation().scale(nup4_geometry.shrink_for_margin, nup4_geometry.shrink_for_margin))
399     new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
400     x_left_spine_limit = A4_HALF_WIDTH * nup4_geometry.shrink_for_spine
401     x_right_spine_limit = A4_WIDTH - x_left_spine_limit
402     if args_analyze or is_front_page:
403         packet = io.BytesIO()
404         c = canvas_class(packet, pagesize=A4)
405     if args_analyze:
406         # spine lines
407         c.setLineWidth(0.1)
408         c.line(x_left_spine_limit, A4_HEIGHT, x_left_spine_limit, 0)
409         c.line(x_right_spine_limit, A4_HEIGHT, x_right_spine_limit, 0)
410     if is_front_page:
411         c.setLineWidth(0.2)
412         draw_cut(c, x_left_spine_limit, (1))
413         draw_cut(c, x_right_spine_limit, (-1))
414     if args_analyze or is_front_page:
415         c.save()
416         new_pdf = pypdf.PdfReader(packet)
417         new_page.merge_page(new_pdf.pages[0])
418
419
420 def draw_cut(canvas, x_spine_limit, direction):
421     outer_start_x = x_spine_limit - 0.5 * CUT_WIDTH * direction
422     inner_start_x = x_spine_limit + 0.5 * CUT_WIDTH * direction
423     middle_point_y =  A4_HALF_HEIGHT + MIDDLE_POINT_DEPTH * direction
424     end_point_y =  A4_HALF_HEIGHT + CUT_DEPTH * direction
425     canvas.line(inner_start_x, A4_HALF_HEIGHT, x_spine_limit, end_point_y)
426     canvas.line(x_spine_limit, end_point_y, x_spine_limit, middle_point_y)
427     canvas.line(x_spine_limit, middle_point_y, outer_start_x, A4_HALF_HEIGHT)
428
429
430 def main():
431     args = parse_args()
432     validate_inputs_first_pass(args)
433     if args.nup4:
434         try:
435             from reportlab.pdfgen.canvas import Canvas
436         except ImportError:
437             raise HandledException("-n: need reportlab.pdfgen.canvas installed for --nup4")
438     pages_to_add, opened_files = read_inputs_to_pagelist(args.input_file, args.page_range)
439     validate_inputs_second_pass(args, pages_to_add)
440     rotate_pages(args.rotate_page, pages_to_add)
441     if args.nup4:
442         pad_pages_to_multiple_of_8(pages_to_add)
443     normalize_pages_to_A4(pages_to_add)
444     crop_at_page = collect_per_page_crops_and_zooms(args.crops, args.symmetry, pages_to_add)
445     writer = pypdf.PdfWriter()
446     if args.nup4:
447         build_nup4_output(writer, pages_to_add, crop_at_page, args.print_margin, args.analyze, Canvas)
448     else:
449         build_single_pages_output(writer, pages_to_add, crop_at_page)
450     for file in opened_files:
451         file.close()
452     with open(args.output_file, 'wb') as output_file:
453         writer.write(output_file)
454
455
456 if __name__ == "__main__":
457     try:
458         main()
459     except HandledException as e:
460         handled_error_exit(e)