3 bookmaker.py is a helper for optimizing PDFs of books for the production of small self-printed, self-bound physical books Towards this goal it offers various PDF manipulation options potentially that can also be used indepéndently and for other purposes.
9 def fail_with_msg(msg):
15 fail_with_msg("Can't run without pypdf installed.")
17 from reportlab.lib.pagesizes import A4
19 fail_with_msg("Can't run without reportlab installed.")
22 A4_WIDTH, A4_HEIGHT = A4
23 POINTS_PER_CM = 10 * 72 / 25.4
24 CUT_DEPTH = 1.95 * POINTS_PER_CM
25 CUT_WIDTH = 1.05 * POINTS_PER_CM
26 MIDDLE_POINT_DEPTH = 0.4 * POINTS_PER_CM
27 SPINE_LIMIT = 1 * POINTS_PER_CM
31 Concatenate two PDFs A.pdf and B.pdf to COMBINED.pdf:
32 bookmaker.py --input_file A.pdf --input_file B.pdf --output_file COMBINED.pdf
34 Produce OUTPUT.pdf containing all pages of (inclusive) page number range 3-7 from INPUT.pdf:
35 bookmaker.py -i INPUT.pdf --page_range 3-7 -o OUTPUT.pdf
37 Produce COMBINED-pdf from A.pdf's first 7 pages, B.pdf's pages except its first two, and all pages of C.pdf:
38 bookmaker.py -i A.pdf -p start-7 -i B.pdf -p 3-end -i C.pdf -o COMBINED.pdf
40 Crop each page 5cm from the left, 10cm from the bottom, 2cm from the right, and 0cm from the top:
41 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --crops "5,10,2,0"
43 Include all pages from INPUT.pdf, but crop pages 10-20 by 5cm each from bottom and top:
44 bookmaker.py -i INPUT.pdf -c "10-20:0,5,0,5" -o OUTPUT.pdf
46 Same crops from on pages 10-20, but also crop all pages 30 and later by 3cm each from left and right:
47 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "10-20:0,5,0,5" -c "30-end:3,0,3,0"
49 Rotate by 90° pages 3, 5, 7; rotate page 7 once more by 90% (i.e. 180° in total):
50 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --rotate 3 -r 5 -r 7 -r 7
52 Initially declare 5cm crop from the left and 1cm crop from right, but alternate direction between even and odd pages:
53 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "5,0,1,0" -s
55 Quarter each OUTPUT.pdf page to carry 4 pages from INPUT.pdf, draw stencils into inner margins for cuts to carry binding strings:
56 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --nup4
58 Same as --nup4, but define a printable-region margin of 1.3cm to limit the space for the INPUT.pdf pages in OUTPUT.pdf page quarters:
59 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --print_margin 1.3
61 Same as -n, but draw lines marking printable-region margins, page quarts, spine margins:
62 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --analyze
66 For arguments like -p, page numbers are assumed to start with 1 (not 0, which is treated as an invalid page number value).
68 The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations).
70 The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book. Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper):
79 To facilitate this layout, --nup4 also pads the input PDF pages to a total number that is a multiple of 8, by adding empty pages.
81 (To turn this page into a tiny 8-page book, cut the paper in two on its horizontal middle line. Fold the two halves by their vertical middle lines, with pages 3-2 and 7-6 on the folds' insides. This creates two 4-page books of pages 1-4 and pages 5-8. Fold them both closed and (counter-intuitively) put the book of pages 5-8 on top of the other one (creating a temporary page order of 5,6,7,8,1,2,3,4). A binding cut stencil should be visible on the top left of this stack – cut it out (with all pages folded together) to add the same inner-margin upper cut to each page. Turn around your 8-pages stack to find the mirror image of aforementioned stencil on the stack's back's bottom, and cut it out too. Each page now has binding cuts on top and bottom of its inner margins. Swap the order of both books (back to the final page order of 1,2,3,4,5,6,7,8), and you now have an 8-pages book that can be "bound" in its binding cuts through a rubber band or the like. Repeat with the next 8-pages double-page, et cetera. (Actually, with just 8 pages, the paper may curl under the pressure of a rubber band – but go up to 32 pages or so, and the result will become quite stable.)
85 def validate_page_range(p_string, err_msg_prefix):
86 err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
87 if '-' not in p_string:
88 raise ValueError("%s: page range string lacks '-': %s" % (err_msg_prefix, p_string))
89 tokens = p_string.split("-")
91 raise ValueError("%s: page range string has too many '-': %s" % (err_msg_prefix, p_string))
92 for i, token in enumerate(tokens):
95 if i == 0 and token == "start":
97 if i == 1 and token == "end":
102 raise ValueError("%s: page range string carries values that are neither integer, nor 'start', nor 'end': %s" % (err_msg_prefix, p_string))
104 raise ValueError("%s: page range string may not carry page numbers <1: %s" % (err_msg_prefix, p_string))
108 start = int(tokens[0])
112 if start > 0 and end > 0 and start > end:
113 raise ValueError("%s: page range starts higher than it ends: %s" % (err_msg_prefix, p_string))
115 def split_crops_string(c_string):
116 initial_split = c_string.split(':')
117 if len(initial_split) > 1:
118 page_range = initial_split[0]
119 crops = initial_split[1]
122 crops = initial_split[0]
123 return page_range, crops
125 def parse_page_range(range_string, pages):
127 end_page = len(pages)
129 start, end = range_string.split('-')
130 if not (len(start) == 0 or start == "start"):
131 start_page = int(start) - 1
132 if not (len(end) == 0 or end == "end"):
134 return start_page, end_page
137 parser = argparse.ArgumentParser(description=__doc__, epilog=help_epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
138 parser._optionals.title = "OPTIONS"
139 parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
140 parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
141 parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
142 parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
143 parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
144 parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
145 parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
146 parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
147 parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
148 args = parser.parse_args()
150 # some basic input validation
151 for filename in args.input_file:
152 if not os.path.isfile(filename):
153 raise ValueError("-i: %s is not a file" % filename)
155 with open(filename, 'rb') as file:
156 pypdf.PdfReader(file)
157 except pypdf.errors.PdfStreamError:
158 raise ValueError("-i: cannot interpret %s as PDF file" % filename)
160 for p_string in args.page_range:
161 validate_page_range(p_string, "-p")
162 if len(args.page_range) > len(args.input_file):
163 raise ValueError("more -p arguments than -i arguments")
165 for c_string in args.crops:
166 initial_split = c_string.split(':')
167 if len(initial_split) > 2:
168 raise ValueError("-c: cropping string has multiple ':': %s" % c_string)
169 page_range, crops = split_crops_string(c_string)
170 crops = crops.split(",")
172 validate_page_range(page_range, "-c")
174 raise ValueError("-c: cropping should contain three ',': %s" % c_string)
179 raise ValueError("-c: non-number crop in %s" % c_string)
181 for r in args.rotate_page:
185 raise ValueError("-r: non-integer value: %s" % r)
187 raise ValueError("-r: value must not be <1: %s" % r)
189 float(args.print_margin)
191 raise ValueError("-m: non-float value: %s" % arg.print_margin)
198 # select pages from input files
202 for i, input_file in enumerate(args.input_file):
203 file = open(input_file, 'rb')
204 opened_files += [file]
205 reader = pypdf.PdfReader(file)
207 if args.page_range and len(args.page_range) > i:
208 range_string = args.page_range[i]
209 start_page, end_page = parse_page_range(range_string, reader.pages)
210 if end_page > len(reader.pages): # no need to test start_page cause start_page > end_page is checked above
211 raise ValueError("-p: page range goes beyond pages of input file: %s" % range_string)
212 for old_page_num in range(start_page, end_page):
214 page = reader.pages[old_page_num]
215 pages_to_add += [page]
216 print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num))
218 # we can do some more input validations now that we know how many pages output should have
220 for c_string in args.crops:
221 page_range, _= split_crops_string(c_string)
223 start, end = parse_page_range(page_range, pages_to_add)
224 if end > len(pages_to_add):
225 raise ValueError("-c: page range goes beyond number of pages we're building: %s" % page_range)
227 for r in args.rotate_page:
228 if r > len(pages_to_add):
229 raise ValueError("-r: page number beyond number of pages we're building: %d" % r)
231 # rotate page canvas (as opposed to using PDF's /Rotate command)
233 for rotate_page in args.rotate_page:
234 page = pages_to_add[rotate_page - 1]
235 page.add_transformation(pypdf.Transformation().translate(tx=-A4_WIDTH/2, ty=-A4_HEIGHT/2))
236 page.add_transformation(pypdf.Transformation().rotate(-90))
237 page.add_transformation(pypdf.Transformation().translate(tx=A4_WIDTH/2, ty=A4_HEIGHT/2))
238 print("-r: rotating (by 90°) page", rotate_page)
240 # if necessary, pad pages to multiple of 8
242 mod_to_8 = len(pages_to_add) % 8
244 print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
245 for _ in range(8 - mod_to_8):
246 new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
247 pages_to_add += [new_page]
249 # normalize all pages to portrait A4
250 for page in pages_to_add:
251 if "/Rotate" in page:
252 page.rotate(360 - page["/Rotate"])
253 page.mediabox.left = 0
254 page.mediabox.bottom = 0
255 page.mediabox.top = A4_HEIGHT
256 page.mediabox.right = A4_WIDTH
257 page.cropbox = page.mediabox
259 # determine page crops, zooms, crop symmetry
260 crops_at_page = [(0,0,0,0)]*len(pages_to_add)
261 zoom_at_page = [1]*len(pages_to_add)
263 for c_string in args.crops:
264 page_range, crops = split_crops_string(c_string)
265 start_page, end_page = parse_page_range(page_range, pages_to_add)
266 crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in crops.split(',')]
267 crop_left = crop_left_cm * POINTS_PER_CM
268 crop_bottom = crop_bottom_cm * POINTS_PER_CM
269 crop_right = crop_right_cm * POINTS_PER_CM
270 crop_top = crop_top_cm * POINTS_PER_CM
272 print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
274 print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
275 cropped_width = A4_WIDTH - crop_left - crop_right
276 cropped_height = A4_HEIGHT - crop_bottom - crop_top
278 zoom_horizontal = A4_WIDTH / (A4_WIDTH - crop_left - crop_right)
279 zoom_vertical = A4_HEIGHT / (A4_HEIGHT - crop_bottom - crop_top)
280 if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
281 raise ValueError("crops would create opposing zoom directions")
282 elif zoom_horizontal + zoom_vertical > 2:
283 zoom = min(zoom_horizontal, zoom_vertical)
285 zoom = max(zoom_horizontal, zoom_vertical)
286 for page_num in range(start_page, end_page):
287 if args.symmetry and page_num % 2:
288 crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
290 crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
291 zoom_at_page[page_num] = zoom
293 writer = pypdf.PdfWriter()
296 print("building 1-input-page-per-output-page book")
298 for i, page in enumerate(pages_to_add):
299 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[i]
300 zoom = zoom_at_page[i]
301 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left, ty=-crop_bottom))
302 page.add_transformation(pypdf.Transformation().scale(zoom, zoom))
303 cropped_width = A4_WIDTH - crop_left - crop_right
304 cropped_height = A4_HEIGHT - crop_bottom - crop_top
305 page.mediabox.right = cropped_width * zoom
306 page.mediabox.top = cropped_height * zoom
307 writer.add_page(page)
308 odd_page = not odd_page
309 print("built page number %d (of %d)" % (i+1, len(pages_to_add)))
312 print("-n: building 4-input-pages-per-output-page book")
313 print("-m: applying printable-area margin of %.2fcm" % args.print_margin)
315 print("-a: drawing page borders, spine limits")
317 printable_margin = args.print_margin * POINTS_PER_CM
318 printable_scale = (A4_WIDTH - 2*printable_margin)/A4_WIDTH
319 half_width = A4_WIDTH / n_pages_per_axis
320 half_height = A4_HEIGHT / n_pages_per_axis
321 section_scale_factor = 1 / n_pages_per_axis
322 spine_part_of_page = (SPINE_LIMIT / half_width) / printable_scale
323 bonus_shrink_factor = 1 - spine_part_of_page
329 for page in pages_to_add:
336 new_i_order += [8 * n_eights + 3,
345 new_page_order += [eight_pack[3]] # page front, upper left
346 new_page_order += [eight_pack[0]] # page front, upper right
347 new_page_order += [eight_pack[7]] # page front, lower left
348 new_page_order += [eight_pack[4]] # page front, lower right
349 new_page_order += [eight_pack[1]] # page back, upper left
350 new_page_order += [eight_pack[2]] # page back, upper right
351 new_page_order += [eight_pack[5]] # page back, lower left
352 new_page_order += [eight_pack[6]] # page back, lower right
356 for j, page in enumerate(new_page_order):
358 new_page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, height=A4_HEIGHT)
360 # in-section transformations: align pages on top, left-hand pages to left, right-hand to right
361 new_i = new_i_order[j]
362 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[new_i]
363 zoom = zoom_at_page[new_i]
364 page.add_transformation(pypdf.Transformation().translate(ty=(A4_HEIGHT / zoom - (A4_HEIGHT - crop_top))))
366 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left))
367 elif i == 1 or i == 3:
368 page.add_transformation(pypdf.Transformation().translate(tx=(A4_WIDTH / zoom - (A4_WIDTH - crop_right))))
369 page.add_transformation(pypdf.Transformation().scale(zoom * bonus_shrink_factor, zoom * bonus_shrink_factor))
371 page.add_transformation(pypdf.Transformation().translate(ty=-2*printable_margin/printable_scale))
373 # outer section transformations
374 page.add_transformation(pypdf.Transformation().translate(ty=(1-bonus_shrink_factor)*A4_HEIGHT))
376 y_section = A4_HEIGHT
377 page.mediabox.bottom = half_height
378 page.mediabox.top = A4_HEIGHT
381 page.mediabox.bottom = 0
382 page.mediabox.top = half_height
385 page.mediabox.left = 0
386 page.mediabox.right = half_width
388 page.add_transformation(pypdf.Transformation().translate(tx=(1-bonus_shrink_factor)*A4_WIDTH))
390 page.mediabox.left = half_width
391 page.mediabox.right = A4_WIDTH
392 page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
393 page.add_transformation(pypdf.Transformation().scale(section_scale_factor, section_scale_factor))
394 new_page.merge_page(page)
396 print("merged page number %d (of %d)" % (page_count, len(pages_to_add)))
399 from reportlab.pdfgen import canvas
402 packet = io.BytesIO()
403 c = canvas.Canvas(packet, pagesize=A4)
405 c.line(0, A4_HEIGHT, A4_WIDTH, A4_HEIGHT)
406 c.line(0, half_height, A4_WIDTH, half_height)
407 c.line(0, 0, A4_WIDTH, 0)
408 c.line(0, A4_HEIGHT, 0, 0)
409 c.line(half_width, A4_HEIGHT, half_width, 0)
410 c.line(A4_WIDTH, A4_HEIGHT, A4_WIDTH, 0)
412 new_pdf = pypdf.PdfReader(packet)
413 new_page.merge_page(new_pdf.pages[0])
414 printable_offset_x = printable_margin
415 printable_offset_y = printable_margin * A4_HEIGHT / A4_WIDTH
416 new_page.add_transformation(pypdf.Transformation().scale(printable_scale, printable_scale))
417 new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
418 x_left_SPINE_LIMIT = half_width * bonus_shrink_factor
419 x_right_SPINE_LIMIT = A4_WIDTH - x_left_SPINE_LIMIT
420 if args.analyze or front_page:
421 packet = io.BytesIO()
422 c = canvas.Canvas(packet, pagesize=A4)
426 c.line(x_left_SPINE_LIMIT, A4_HEIGHT, x_left_SPINE_LIMIT, 0)
427 c.line(x_right_SPINE_LIMIT, A4_HEIGHT, x_right_SPINE_LIMIT, 0)
431 start_up_left_left_x = x_left_SPINE_LIMIT - 0.5 * CUT_WIDTH
432 start_up_left_right_x = x_left_SPINE_LIMIT + 0.5 * CUT_WIDTH
433 middle_point_up_left_y = half_height + MIDDLE_POINT_DEPTH
434 end_point_up_left_y = half_height + CUT_DEPTH
435 c.line(start_up_left_right_x, half_height, x_left_SPINE_LIMIT, end_point_up_left_y)
436 c.line(x_left_SPINE_LIMIT, end_point_up_left_y, x_left_SPINE_LIMIT, middle_point_up_left_y)
437 c.line(x_left_SPINE_LIMIT, middle_point_up_left_y, start_up_left_left_x, half_height)
439 start_down_right_left_x = x_right_SPINE_LIMIT - 0.5 * CUT_WIDTH
440 start_down_right_right_x = x_right_SPINE_LIMIT + 0.5 * CUT_WIDTH
441 middle_point_down_right_y = half_height - MIDDLE_POINT_DEPTH
442 end_point_down_right_y = half_height - CUT_DEPTH
443 c.line(start_down_right_left_x, half_height, x_right_SPINE_LIMIT, end_point_down_right_y)
444 c.line(x_right_SPINE_LIMIT, end_point_down_right_y, x_right_SPINE_LIMIT, middle_point_down_right_y)
445 c.line(x_right_SPINE_LIMIT, middle_point_down_right_y, start_down_right_right_x, half_height)
446 if args.analyze or front_page:
448 new_pdf = pypdf.PdfReader(packet)
449 new_page.merge_page(new_pdf.pages[0])
450 writer.add_page(new_page)
452 front_page = not front_page
455 for file in opened_files:
457 with open(args.output_file, 'wb') as output_file:
458 writer.write(output_file)
461 if __name__ == "__main__":
464 except ValueError as e: