5 def fail_with_msg(msg):
11 fail_with_msg("Can't run without pypdf installed.")
13 from reportlab.lib.pagesizes import A4
15 fail_with_msg("Can't run without reportlab installed.")
17 a4_width, a4_height = A4
18 points_per_cm = 10 * 72 / 25.4
19 cut_depth = 1.95 * points_per_cm
20 cut_width = 1.05 * points_per_cm
21 middle_point_depth = 0.4 * points_per_cm
22 spine_limit = 1 * points_per_cm
23 desc = """bookmaker.py is a helper for optimizing PDFs of books for the production of small self-printed, self-bound physical books To this goal it offers various PDF manipulation options potentially that can also be used indepéndently and for other purposes.
28 Concatenate two PDFs A.pdf and B.pdf to COMBINED.pdf:
29 bookmaker.py --input_file A.pdf --input_file B.pdf --output_file COMBINED.pdf
31 Produce OUTPUT.pdf containing all pages of (inclusive) page number range 3-7 from INPUT.pdf:
32 bookmaker.py -i INPUT.pdf --page_range 3-7 -o OUTPUT.pdf
34 Produce COMBINED-pdf from A.pdf's first 7 pages, B.pdf's pages except its first two, and all pages of C.pdf:
35 bookmaker.py -i A.pdf -p start-7 -i B.pdf -p 3-end -i C.pdf -o COMBINED.pdf
37 Crop each page 5cm from the left, 10cm from the bottom, 2cm from the right, and 0cm from the top:
38 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --crops "5,10,2,0"
40 Include all pages from INPUT.pdf, but crop pages 10-20 by 5cm each from bottom and top:
41 bookmaker.py -i INPUT.pdf -c "10-20:0,5,0,5" -o OUTPUT.pdf
43 Same crops from on pages 10-20, but also crop all pages 30 and later by 3cm each from left and right:
44 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "10-20:0,5,0,5" -c "30-end:3,0,3,0"
46 Rotate by 90° pages 3, 5, 7; rotate page 7 once more by 90% (i.e. 180° in total):
47 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --rotate 3 -r 5 -r 7 -r 7
49 Initially declare 5cm crop from the left and 1cm crop from right, but alternate direction between even and odd pages:
50 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "5,0,1,0" -s
52 Quarter each OUTPUT.pdf page to carry 4 pages from INPUT.pdf, draw stencils into inner margins for cuts to carry binding strings:
53 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --nup4
55 Same as --nup4, but define a printable-region margin of 1.3cm to limit the space for the INPUT.pdf pages in OUTPUT.pdf page quarters:
56 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --print_margin 1.3
58 Same as -n, but draw lines marking printable-region margins, page quarts, spine margins:
59 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --analyze
63 For arguments like -p, page numbers are assumed to start with 1 (not 0, which is treated as an invalid page number value).
65 The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations).
67 The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book. Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper):
76 To facilitate this layout, --nup4 also pads the input PDF pages to a total number that is a multiple of 8, by adding empty pages.
78 (To turn this page into a tiny 8-page book, cut the paper in two on its horizontal middle line. Fold the two halves by their vertical middle lines, with pages 3-2 and 7-6 on the folds' insides. This creates two 4-page books of pages 1-4 and pages 5-8. Fold them both closed and (counter-intuitively) put the book of pages 5-8 on top of the other one (creating a temporary page order of 5,6,7,8,1,2,3,4). A binding cut stencil should be visible on the top left of this stack – cut it out (with all pages folded together) to add the same inner-margin upper cut to each page. Turn around your 8-pages stack to find the mirror image of aforementioned stencil on the stack's back's bottom, and cut it out too. Each page now has binding cuts on top and bottom of its inner margins. Swap the order of both books (back to the final page order of 1,2,3,4,5,6,7,8), and you now have an 8-pages book that can be "bound" in its binding cuts through a rubber band or the like. Repeat with the next 8-pages double-page, et cetera. (Actually, with just 8 pages, the paper may curl under the pressure of a rubber band – but go up to 32 pages or so, and the result will become quite stable.)
81 parser = argparse.ArgumentParser(description=desc, epilog=epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
82 parser._optionals.title = "OPTIONS"
83 parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
84 parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
85 parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
86 parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
87 parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
88 parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
89 parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
90 parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
91 parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
92 args = parser.parse_args()
94 # some basic input validation
95 def validate_page_range(p_string, err_msg_prefix):
96 err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
97 if '-' not in p_string:
98 fail_with_msg("%s: page range string lacks '-': %s" % (err_msg_prefix, p_string))
99 tokens = p_string.split("-")
101 fail_with_msg("%s: page range string has too many '-': %s" % (err_msg_prefix, p_string))
102 for i, token in enumerate(tokens):
105 if i == 0 and token == "start":
107 if i == 1 and token == "end":
112 fail_with_msg("%s: page range string carries values that are neither integer, nor 'start', nor 'end': %s" % (err_msg_prefix, p_string))
114 fail_with_msg("%s: page range string may not carry page numbers <1: %s" % (err_msg_prefix, p_string))
118 start = int(tokens[0])
122 if start > 0 and end > 0 and start > end:
123 fail_with_msg("%s: page range starts higher than it ends: %s" % (err_msg_prefix, p_string))
125 for filename in args.input_file:
126 if not os.path.isfile(filename):
127 fail_with_msg("-i: %s is not a file" % filename)
129 with open(filename, 'rb') as file:
130 pypdf.PdfReader(file)
131 except pypdf.errors.PdfStreamError:
132 fail_with_msg("-i: cannot interpret %s as PDF file" % filename)
134 for p_string in args.page_range:
135 validate_page_range(p_string, "-p")
136 if len(args.page_range) > len(args.input_file):
137 fail_with_msg("more -p arguments than -i arguments")
139 for c_string in args.crops:
140 initial_split = c_string.split(':')
141 if len(initial_split) > 2:
142 fail_with_msg("-c: cropping string has multiple ':': %s" % c_string)
143 if len(initial_split) > 1:
144 validate_page_range(initial_split[0], "-c")
145 crops = initial_split[1].split(",")
148 crops = initial_split[0].split(",")
150 fail_with_msg("-c: cropping should contain three ',': %s" % c_string)
155 fail_with_msg("-c: non-number crop in %s" % c_string)
157 for r in args.rotate_page:
161 fail_with_msg("-r: non-integer value: %s" % r)
163 fail_with_msg("-r: value must not be <1: %s" % r)
165 float(args.print_margin)
167 fail_with_msg("-m: non-float value: %s" % arg.print_margin)
169 # select pages from input files
170 def parse_page_range(range_string, pages):
172 end_page = len(pages)
174 start, end = range_string.split('-')
175 if not (len(start) == 0 or start == "start"):
176 start_page = int(start) - 1
177 if not (len(end) == 0 or end == "end"):
179 return start_page, end_page
184 for i, input_file in enumerate(args.input_file):
185 file = open(input_file, 'rb')
186 opened_files += [file]
187 reader = pypdf.PdfReader(file)
189 if args.page_range and len(args.page_range) > i:
190 range_string = args.page_range[i]
191 start_page, end_page = parse_page_range(range_string, reader.pages)
192 if end_page > len(reader.pages): # no need to test start_page cause start_page > end_page is checked above
193 fail_with_msg("-p: page range goes beyond pages of input file: %s" % range_string)
194 for old_page_num in range(start_page, end_page):
196 page = reader.pages[old_page_num]
197 pages_to_add += [page]
198 print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num))
200 # we can do some more input validations now that we know how many pages output should have
202 for c_string in args.crops:
203 initial_split = c_string.split(':')
204 if len(initial_split) > 1:
205 start, end = parse_page_range(initial_split[0], pages_to_add)
206 if end > len(pages_to_add):
207 fail_with_msg("-c: page range goes beyond number of pages we're building: %s" % initial_split[0])
209 for r in args.rotate_page:
210 if r > len(pages_to_add):
211 fail_with_msg("-r: page number beyond number of pages we're building: %d" % r)
215 for rotate_page in args.rotate_page:
216 page = pages_to_add[rotate_page - 1]
217 page.add_transformation(pypdf.Transformation().translate(tx=-a4_width/2, ty=-a4_height/2))
218 page.add_transformation(pypdf.Transformation().rotate(-90))
219 page.add_transformation(pypdf.Transformation().translate(tx=a4_width/2, ty=a4_height/2))
220 print("-r: rotating (by 90°) page", rotate_page)
222 # if necessary, pad pages to multiple of 8
224 mod_to_8 = len(pages_to_add) % 8
226 print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
227 for _ in range(8 - mod_to_8):
228 new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
229 pages_to_add += [new_page]
231 # normalize all pages to portrait A4
232 for page in pages_to_add:
233 if "/Rotate" in page:
234 page.rotate(360 - page["/Rotate"])
235 page.mediabox.left = 0
236 page.mediabox.bottom = 0
237 page.mediabox.top = a4_height
238 page.mediabox.right = a4_width
239 page.cropbox = page.mediabox
241 # determine page crops, zooms, crop symmetry
242 crops_at_page = [(0,0,0,0)]*len(pages_to_add)
243 zoom_at_page = [1]*len(pages_to_add)
245 for crops in args.crops:
246 initial_split = crops.split(':')
247 if len(initial_split) > 1:
248 page_range = initial_split[0]
249 crops = initial_split[1]
252 crops = initial_split[0]
253 start_page, end_page = parse_page_range(page_range, pages_to_add)
254 crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in crops.split(',')]
255 crop_left = crop_left_cm * points_per_cm
256 crop_bottom = crop_bottom_cm * points_per_cm
257 crop_right = crop_right_cm * points_per_cm
258 crop_top = crop_top_cm * points_per_cm
260 print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
262 print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
263 cropped_width = a4_width - crop_left - crop_right
264 cropped_height = a4_height - crop_bottom - crop_top
266 zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
267 zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
268 if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
269 fail_with_msg("crops would create opposing zoom directions")
270 elif zoom_horizontal + zoom_vertical > 2:
271 zoom = min(zoom_horizontal, zoom_vertical)
273 zoom = max(zoom_horizontal, zoom_vertical)
274 for page_num in range(start_page, end_page):
275 if args.symmetry and page_num % 2:
276 crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
278 crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
279 zoom_at_page[page_num] = zoom
281 writer = pypdf.PdfWriter()
284 print("building 1-input-page-per-output-page book")
286 for i, page in enumerate(pages_to_add):
287 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[i]
288 zoom = zoom_at_page[i]
289 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left, ty=-crop_bottom))
290 page.add_transformation(pypdf.Transformation().scale(zoom, zoom))
291 cropped_width = a4_width - crop_left - crop_right
292 cropped_height = a4_height - crop_bottom - crop_top
293 page.mediabox.right = cropped_width * zoom
294 page.mediabox.top = cropped_height * zoom
295 writer.add_page(page)
296 odd_page = not odd_page
297 print("built page number %d (of %d)" % (i+1, len(pages_to_add)))
300 print("-n: building 4-input-pages-per-output-page book")
301 print("-m: applying printable-area margin of %.2fcm" % args.print_margin)
303 print("-a: drawing page borders, spine limits")
305 printable_margin = args.print_margin * points_per_cm
306 printable_scale = (a4_width - 2*printable_margin)/a4_width
307 half_width = a4_width / n_pages_per_axis
308 half_height = a4_height / n_pages_per_axis
309 section_scale_factor = 1 / n_pages_per_axis
310 spine_part_of_page = (spine_limit / half_width) / printable_scale
311 bonus_shrink_factor = 1 - spine_part_of_page
317 for page in pages_to_add:
324 new_i_order += [8 * n_eights + 3,
333 new_page_order += [eight_pack[3]] # page front, upper left
334 new_page_order += [eight_pack[0]] # page front, upper right
335 new_page_order += [eight_pack[7]] # page front, lower left
336 new_page_order += [eight_pack[4]] # page front, lower right
337 new_page_order += [eight_pack[1]] # page back, upper left
338 new_page_order += [eight_pack[2]] # page back, upper right
339 new_page_order += [eight_pack[5]] # page back, lower left
340 new_page_order += [eight_pack[6]] # page back, lower right
344 for j, page in enumerate(new_page_order):
346 new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
348 # in-section transformations: align pages on top, left-hand pages to left, right-hand to right
349 new_i = new_i_order[j]
350 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[new_i]
351 zoom = zoom_at_page[new_i]
352 page.add_transformation(pypdf.Transformation().translate(ty=(a4_height / zoom - (a4_height - crop_top))))
354 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left))
355 elif i == 1 or i == 3:
356 page.add_transformation(pypdf.Transformation().translate(tx=(a4_width / zoom - (a4_width - crop_right))))
357 page.add_transformation(pypdf.Transformation().scale(zoom * bonus_shrink_factor, zoom * bonus_shrink_factor))
359 page.add_transformation(pypdf.Transformation().translate(ty=-2*printable_margin/printable_scale))
361 # outer section transformations
362 page.add_transformation(pypdf.Transformation().translate(ty=(1-bonus_shrink_factor)*a4_height))
364 y_section = a4_height
365 page.mediabox.bottom = half_height
366 page.mediabox.top = a4_height
369 page.mediabox.bottom = 0
370 page.mediabox.top = half_height
373 page.mediabox.left = 0
374 page.mediabox.right = half_width
376 page.add_transformation(pypdf.Transformation().translate(tx=(1-bonus_shrink_factor)*a4_width))
378 page.mediabox.left = half_width
379 page.mediabox.right = a4_width
380 page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
381 page.add_transformation(pypdf.Transformation().scale(section_scale_factor, section_scale_factor))
382 new_page.merge_page(page)
384 print("merged page number %d (of %d)" % (page_count, len(pages_to_add)))
387 from reportlab.pdfgen import canvas
390 packet = io.BytesIO()
391 c = canvas.Canvas(packet, pagesize=A4)
393 c.line(0, a4_height, a4_width, a4_height)
394 c.line(0, half_height, a4_width, half_height)
395 c.line(0, 0, a4_width, 0)
396 c.line(0, a4_height, 0, 0)
397 c.line(half_width, a4_height, half_width, 0)
398 c.line(a4_width, a4_height, a4_width, 0)
400 new_pdf = pypdf.PdfReader(packet)
401 new_page.merge_page(new_pdf.pages[0])
402 printable_offset_x = printable_margin
403 printable_offset_y = printable_margin * a4_height / a4_width
404 new_page.add_transformation(pypdf.Transformation().scale(printable_scale, printable_scale))
405 new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
406 x_left_spine_limit = half_width * bonus_shrink_factor
407 x_right_spine_limit = a4_width - x_left_spine_limit
408 if args.analyze or front_page:
409 packet = io.BytesIO()
410 c = canvas.Canvas(packet, pagesize=A4)
414 c.line(x_left_spine_limit, a4_height, x_left_spine_limit, 0)
415 c.line(x_right_spine_limit, a4_height, x_right_spine_limit, 0)
419 start_up_left_left_x = x_left_spine_limit - 0.5 * cut_width
420 start_up_left_right_x = x_left_spine_limit + 0.5 * cut_width
421 middle_point_up_left_y = half_height + middle_point_depth
422 end_point_up_left_y = half_height + cut_depth
423 c.line(start_up_left_right_x, half_height, x_left_spine_limit, end_point_up_left_y)
424 c.line(x_left_spine_limit, end_point_up_left_y, x_left_spine_limit, middle_point_up_left_y)
425 c.line(x_left_spine_limit, middle_point_up_left_y, start_up_left_left_x, half_height)
427 start_down_right_left_x = x_right_spine_limit - 0.5 * cut_width
428 start_down_right_right_x = x_right_spine_limit + 0.5 * cut_width
429 middle_point_down_right_y = half_height - middle_point_depth
430 end_point_down_right_y = half_height - cut_depth
431 c.line(start_down_right_left_x, half_height, x_right_spine_limit, end_point_down_right_y)
432 c.line(x_right_spine_limit, end_point_down_right_y, x_right_spine_limit, middle_point_down_right_y)
433 c.line(x_right_spine_limit, middle_point_down_right_y, start_down_right_right_x, half_height)
435 if args.analyze or front_page:
437 new_pdf = pypdf.PdfReader(packet)
438 new_page.merge_page(new_pdf.pages[0])
439 writer.add_page(new_page)
441 front_page = not front_page
444 for file in opened_files:
446 with open(args.output_file, 'wb') as output_file:
447 writer.write(output_file)