3 bookmaker.py is a helper for optimizing PDFs for the production of small self-printed, self-bound physical books. Towards this goal it offers various PDF manipulation options that may also be used indepéndently and for other purposes.
10 def handled_error_exit(msg):
11 print(f"ERROR: {msg}")
17 handled_error_exit("Can't run at all without pypdf installed.")
19 # some general paper geometry constants
20 POINTS_PER_CM = 10 * 72 / 25.4
21 A4_WIDTH = 21 * POINTS_PER_CM
22 A4_HEIGHT = 29.7 * POINTS_PER_CM
23 A4 = (A4_WIDTH, A4_HEIGHT)
25 # constants specifically for --nup4
26 A4_HALF_WIDTH = A4_WIDTH / 2
27 A4_HALF_HEIGHT = A4_HEIGHT / 2
28 CUT_DEPTH = 1.95 * POINTS_PER_CM
29 CUT_WIDTH = 1.05 * POINTS_PER_CM
30 MIDDLE_POINT_DEPTH = 0.4 * POINTS_PER_CM
31 INNER_SPINE_MARGIN_PER_PAGE = 1 * POINTS_PER_CM
32 QUARTER_SCALE_FACTOR = 0.5
33 PAGE_ORDER_FOR_NUP4 = (3,0,7,4,1,2,5,6)
38 def __init__(self, left_cm=0, bottom_cm=0, right_cm=0, top_cm=0):
39 self.left_cm = left_cm
40 self.bottom_cm = bottom_cm
41 self.right_cm = right_cm
43 self.left = float(self.left_cm) * POINTS_PER_CM
44 self.bottom = float(self.bottom_cm) * POINTS_PER_CM
45 self.right = float(self.right_cm) * POINTS_PER_CM
46 self.top = float(self.top_cm) * POINTS_PER_CM
47 zoom_horizontal = A4_WIDTH / (A4_WIDTH - self.left - self.right)
48 zoom_vertical = A4_HEIGHT / (A4_HEIGHT - self.bottom - self.top)
49 if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
50 raise HandledException("-c: crops would create opposing zoom directions")
51 elif zoom_horizontal + zoom_vertical > 2:
52 self.zoom = min(zoom_horizontal, zoom_vertical)
54 self.zoom = max(zoom_horizontal, zoom_vertical)
57 return str(vars(self))
60 def format_in_cm(self):
61 return f"left {self.left_cm}cm, bottom {self.bottom_cm}cm, right {self.right_cm}cm, top {self.top_cm}cm"
64 def remaining_width(self):
65 return A4_WIDTH - self.left - self.right
68 def remaining_height(self):
69 return A4_HEIGHT - self.bottom - self.top
71 def give_mirror(self):
72 return PageCrop(left_cm=self.right_cm, bottom_cm=self.bottom_cm, right_cm=self.left_cm, top_cm=self.top_cm)
77 def __init__(self, margin_cm):
78 self.margin = margin_cm * POINTS_PER_CM
79 self.shrink_for_margin = (A4_WIDTH - 2 * self.margin)/A4_WIDTH
80 # NB: We define spine size un-shrunk, but .shrink_for_spine is used with values shrunk for the margin, which we undo here.
81 spine_part_of_page = (INNER_SPINE_MARGIN_PER_PAGE / A4_HALF_WIDTH) / self.shrink_for_margin
82 self.shrink_for_spine = 1 - spine_part_of_page
85 class HandledException(Exception):
90 help_epilogue = "See README.txt for detailed usage instructions, command examples, etc."
91 parser = argparse.ArgumentParser(description=__doc__, epilog=help_epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
92 parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
93 parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
94 parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
95 parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
96 parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
97 parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
98 parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
99 parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
100 parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
101 return parser.parse_args()
104 def validate_inputs_first_pass(args):
105 for filename in args.input_file:
106 if not os.path.isfile(filename):
107 raise HandledException(f"-i: {filename} is not a file")
109 with open(filename, 'rb') as file:
110 pypdf.PdfReader(file)
111 except pypdf.errors.PdfStreamError:
112 raise HandledException(f"-i: cannot interpret {filename} as PDF file")
114 for p_string in args.page_range:
115 validate_page_range(p_string, "-p")
116 if len(args.page_range) > len(args.input_file):
117 raise HandledException("-p: more --page_range arguments than --input_file arguments")
119 for c_string in args.crops:
120 initial_split = c_string.split(':')
121 if len(initial_split) > 2:
122 raise HandledException(f"-c: cropping string has multiple ':': {c_string}")
123 page_range, crops = split_crops_string(c_string)
124 crops = crops.split(",")
126 validate_page_range(page_range, "-c")
128 raise HandledException(f"-c: cropping does not contain exactly three ',': {c_string}")
133 raise HandledException(f"-c: non-number crop in: {c_string}")
135 for r in args.rotate_page:
139 raise HandledException(f"-r: non-integer value: {r}")
141 raise HandledException(f"-r: value must not be <1: {r}")
143 float(args.print_margin)
145 raise HandledException(f"-m: non-float value: {arg.print_margin}")
148 def validate_page_range(p_string, err_msg_prefix):
149 prefix = f"{err_msg_prefix}: page range string"
150 if '-' not in p_string:
151 raise HandledException(f"{prefix} lacks '-': {p_string}")
152 tokens = p_string.split("-")
154 raise HandledException(f"{prefix} has too many '-': {p_string}")
155 for i, token in enumerate(tokens):
158 if i == 0 and token == "start":
160 if i == 1 and token == "end":
165 raise HandledException(f"{prefix} carries value neither integer, nor 'start', nor 'end': {p_string}")
167 raise HandledException(f"{prefix} carries page number <1: {p_string}")
171 start = int(tokens[0])
175 if start > 0 and end > 0 and start > end:
176 raise HandledException(f"{prefix} has higher start than end value: {p_string}")
179 def split_crops_string(c_string):
180 initial_split = c_string.split(':')
181 if len(initial_split) > 1:
182 page_range = initial_split[0]
183 crops = initial_split[1]
186 crops = initial_split[0]
187 return page_range, crops
190 def parse_page_range(range_string, pages):
192 end_page = len(pages)
194 start, end = range_string.split('-')
195 if not (len(start) == 0 or start == "start"):
196 start_page = int(start) - 1
197 if not (len(end) == 0 or end == "end"):
199 return start_page, end_page
202 def read_inputs_to_pagelist(args_input_file, args_page_range):
206 for i, input_file in enumerate(args_input_file):
207 file = open(input_file, 'rb')
208 opened_files += [file]
209 reader = pypdf.PdfReader(file)
211 if args_page_range and len(args_page_range) > i:
212 range_string = args_page_range[i]
213 start_page, end_page = parse_page_range(range_string, reader.pages)
214 if end_page > len(reader.pages): # no need to test start_page cause start_page > end_page is checked above
215 raise HandledException(f"-p: page range goes beyond pages of input file: {range_string}")
216 for old_page_num in range(start_page, end_page):
218 page = reader.pages[old_page_num]
219 pages_to_add += [page]
220 print(f"-i, -p: read in {input_file} page number {old_page_num+1} as new page {new_page_num}")
221 return pages_to_add, opened_files
224 def validate_inputs_second_pass(args, pages_to_add):
226 for c_string in args.crops:
227 page_range, _= split_crops_string(c_string)
229 start, end = parse_page_range(page_range, pages_to_add)
230 if end > len(pages_to_add):
231 raise HandledException(f"-c: page range goes beyond number of pages we're building: {page_range}")
233 for r in args.rotate_page:
234 if r > len(pages_to_add):
235 raise HandledException(f"-r: page number beyond number of pages we're building: {r}")
238 def rotate_pages(args_rotate_page, pages_to_add):
240 for rotate_page in args_rotate_page:
241 page = pages_to_add[rotate_page - 1]
242 page.add_transformation(pypdf.Transformation().translate(tx=-A4_WIDTH/2, ty=-A4_HEIGHT/2))
243 page.add_transformation(pypdf.Transformation().rotate(-90))
244 page.add_transformation(pypdf.Transformation().translate(tx=A4_WIDTH/2, ty=A4_HEIGHT/2))
245 print(f"-r: rotating (by 90°) page {rotate_page}")
248 def pad_pages_to_multiple_of_8(pages_to_add):
249 mod_to_8 = len(pages_to_add) % 8
251 old_len = len(pages_to_add)
252 for _ in range(8 - mod_to_8):
253 new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
254 pages_to_add += [new_page]
255 print(f"-n: number of input pages {old_len} not required multiple of 8, padded to {len(pages_to_add)}")
258 def normalize_pages_to_A4(pages_to_add):
259 for page in pages_to_add:
260 if "/Rotate" in page: # TODO: preserve rotation, but in canvas?
261 page.rotate(360 - page["/Rotate"])
262 page.mediabox.left = 0
263 page.mediabox.bottom = 0
264 page.mediabox.top = A4_HEIGHT
265 page.mediabox.right = A4_WIDTH
266 page.cropbox = page.mediabox
269 def collect_per_page_crops_and_zooms(args_crops, args_symmetry, pages_to_add):
270 crop_at_page = [PageCrop()] * len(pages_to_add)
272 for c_string in args_crops:
273 page_range, crops = split_crops_string(c_string)
274 start_page, end_page = parse_page_range(page_range, pages_to_add)
275 prefix = "-c, -t" if args_symmetry else "-c"
276 suffix = " (but alternating left and right crop between even and odd pages)" if args_symmetry else ""
277 page_crop = PageCrop(*[x for x in crops.split(',')])
278 print(f"{prefix}: to pages {start_page + 1} to {end_page} applying crop: {page_crop.format_in_cm}{suffix}")
279 for page_num in range(start_page, end_page):
280 if args_symmetry and page_num % 2:
281 crop_at_page[page_num] = page_crop.give_mirror()
283 crop_at_page[page_num] = page_crop
287 def build_single_pages_output(writer, pages_to_add, crop_at_page):
288 print("building 1-input-page-per-output-page book")
290 for i, page in enumerate(pages_to_add):
291 page.add_transformation(pypdf.Transformation().translate(tx=-crop_at_page[i].left, ty=-crop_at_page[i].bottom))
292 page.add_transformation(pypdf.Transformation().scale(crop_at_page[i].zoom, crop_at_page[i].zoom))
293 page.mediabox.right = crop_at_page[i].remaining_width * crop_at_page[i].zoom
294 page.mediabox.top = crop_at_page[i].remaining_height * crop_at_page[i].zoom
295 writer.add_page(page)
296 odd_page = not odd_page
297 print(f"built page number {i+1} (of {len(pages_to_add)})")
300 def build_nup4_output(writer, pages_to_add, crop_at_page, args_print_margin, args_analyze, canvas_class):
301 print("-n: building 4-input-pages-per-output-page book")
302 print(f"-m: applying printable-area margin of {args_print_margin}cm")
304 print("-a: drawing page borders, spine limits")
305 nup4_geometry = Nup4Geometry(args_print_margin)
306 pages_to_add, new_i_order = resort_pages_for_nup4(pages_to_add)
310 for i, page in enumerate(pages_to_add):
312 new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
313 corrected_i = new_i_order[i]
314 nup4_inner_page_transform(page, crop_at_page[corrected_i], nup4_geometry, nup4_i)
315 nup4_outer_page_transform(page, nup4_geometry, nup4_i)
316 new_page.merge_page(page)
318 print(f"merged page number {page_count} (of {len(pages_to_add)})")
321 ornate_nup4(writer, args_analyze, is_front_page, new_page, nup4_geometry, canvas_class)
322 writer.add_page(new_page)
324 is_front_page = not is_front_page
327 def resort_pages_for_nup4(pages_to_add):
333 for page in pages_to_add:
340 for n in PAGE_ORDER_FOR_NUP4:
341 new_i_order += [8 * n_eights + n]
342 new_page_order += [eight_pack[n]]
344 return new_page_order, new_i_order
347 def nup4_inner_page_transform(page, crop, nup4_geometry, nup4_i):
348 page.add_transformation(pypdf.Transformation().translate(ty=(A4_HEIGHT / crop.zoom - (A4_HEIGHT - crop.top))))
349 if nup4_i == 0 or nup4_i == 2:
350 page.add_transformation(pypdf.Transformation().translate(tx=-crop.left))
351 elif nup4_i == 1 or nup4_i == 3:
352 page.add_transformation(pypdf.Transformation().translate(tx=(A4_WIDTH / crop.zoom - (A4_WIDTH - crop.right))))
353 page.add_transformation(pypdf.Transformation().scale(crop.zoom * nup4_geometry.shrink_for_spine, crop.zoom * nup4_geometry.shrink_for_spine))
354 if nup4_i == 2 or nup4_i == 3:
355 page.add_transformation(pypdf.Transformation().translate(ty=-2*nup4_geometry.margin/nup4_geometry.shrink_for_margin))
358 def nup4_outer_page_transform(page, nup4_geometry, nup4_i):
359 page.add_transformation(pypdf.Transformation().translate(ty=(1-nup4_geometry.shrink_for_spine)*A4_HEIGHT))
360 if nup4_i == 0 or nup4_i == 1:
361 y_section = A4_HEIGHT
362 page.mediabox.bottom = A4_HALF_HEIGHT
363 page.mediabox.top = A4_HEIGHT
364 if nup4_i == 2 or nup4_i == 3:
366 page.mediabox.bottom = 0
367 page.mediabox.top = A4_HALF_HEIGHT
368 if nup4_i == 0 or nup4_i == 2:
370 page.mediabox.left = 0
371 page.mediabox.right = A4_HALF_WIDTH
372 if nup4_i == 1 or nup4_i == 3:
373 page.add_transformation(pypdf.Transformation().translate(tx=(1-nup4_geometry.shrink_for_spine)*A4_WIDTH))
375 page.mediabox.left = A4_HALF_WIDTH
376 page.mediabox.right = A4_WIDTH
377 page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
378 page.add_transformation(pypdf.Transformation().scale(QUARTER_SCALE_FACTOR, QUARTER_SCALE_FACTOR))
381 def ornate_nup4(writer, args_analyze, is_front_page, new_page, nup4_geometry, canvas_class):
384 packet = io.BytesIO()
385 c = canvas_class(packet, pagesize=A4)
387 c.line(0, A4_HEIGHT, A4_WIDTH, A4_HEIGHT)
388 c.line(0, A4_HALF_HEIGHT, A4_WIDTH, A4_HALF_HEIGHT)
389 c.line(0, 0, A4_WIDTH, 0)
390 c.line(0, A4_HEIGHT, 0, 0)
391 c.line(A4_HALF_WIDTH, A4_HEIGHT, A4_HALF_WIDTH, 0)
392 c.line(A4_WIDTH, A4_HEIGHT, A4_WIDTH, 0)
394 new_pdf = pypdf.PdfReader(packet)
395 new_page.merge_page(new_pdf.pages[0])
396 printable_offset_x = nup4_geometry.margin
397 printable_offset_y = nup4_geometry.margin * A4_HEIGHT / A4_WIDTH
398 new_page.add_transformation(pypdf.Transformation().scale(nup4_geometry.shrink_for_margin, nup4_geometry.shrink_for_margin))
399 new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
400 x_left_spine_limit = A4_HALF_WIDTH * nup4_geometry.shrink_for_spine
401 x_right_spine_limit = A4_WIDTH - x_left_spine_limit
402 if args_analyze or is_front_page:
403 packet = io.BytesIO()
404 c = canvas_class(packet, pagesize=A4)
408 c.line(x_left_spine_limit, A4_HEIGHT, x_left_spine_limit, 0)
409 c.line(x_right_spine_limit, A4_HEIGHT, x_right_spine_limit, 0)
412 draw_cut(c, x_left_spine_limit, (1))
413 draw_cut(c, x_right_spine_limit, (-1))
414 if args_analyze or is_front_page:
416 new_pdf = pypdf.PdfReader(packet)
417 new_page.merge_page(new_pdf.pages[0])
420 def draw_cut(canvas, x_spine_limit, direction):
421 outer_start_x = x_spine_limit - 0.5 * CUT_WIDTH * direction
422 inner_start_x = x_spine_limit + 0.5 * CUT_WIDTH * direction
423 middle_point_y = A4_HALF_HEIGHT + MIDDLE_POINT_DEPTH * direction
424 end_point_y = A4_HALF_HEIGHT + CUT_DEPTH * direction
425 canvas.line(inner_start_x, A4_HALF_HEIGHT, x_spine_limit, end_point_y)
426 canvas.line(x_spine_limit, end_point_y, x_spine_limit, middle_point_y)
427 canvas.line(x_spine_limit, middle_point_y, outer_start_x, A4_HALF_HEIGHT)
432 validate_inputs_first_pass(args)
435 from reportlab.pdfgen.canvas import Canvas
437 raise HandledException("-n: need reportlab.pdfgen.canvas installed for --nup4")
438 pages_to_add, opened_files = read_inputs_to_pagelist(args.input_file, args.page_range)
439 validate_inputs_second_pass(args, pages_to_add)
440 rotate_pages(args.rotate_page, pages_to_add)
442 pad_pages_to_multiple_of_8(pages_to_add)
443 normalize_pages_to_A4(pages_to_add)
444 crop_at_page = collect_per_page_crops_and_zooms(args.crops, args.symmetry, pages_to_add)
445 writer = pypdf.PdfWriter()
447 build_nup4_output(writer, pages_to_add, crop_at_page, args.print_margin, args.analyze, Canvas)
449 build_single_pages_output(writer, pages_to_add, crop_at_page)
450 for file in opened_files:
452 with open(args.output_file, 'wb') as output_file:
453 writer.write(output_file)
456 if __name__ == "__main__":
459 except HandledException as e:
460 handled_error_exit(e)