6 from reportlab.lib.pagesizes import A4
7 a4_width, a4_height = A4
8 points_per_cm = 10 * 72 / 25.4
9 cut_depth = 1.95 * points_per_cm
10 cut_width = 1.05 * points_per_cm
11 middle_point_depth = 0.4 * points_per_cm
12 spine_limit = 1 * points_per_cm
14 desc = """bookmaker.py is a helper for optimizing PDFs of books for the production of small self-printed, self-bound physical books To this goal it offers various PDF manipulation options potentially that can also be used indepéndently and for other purposes.
19 Concatenate two PDFs A.pdf and B.pdf to COMBINED.pdf:
20 bookmaker.py --input_file A.pdf --input_file B.pdf --output_file COMBINED.pdf
22 Produce OUTPUT.pdf containing all pages of (inclusive) page number range 3-7 from INPUT.pdf:
23 bookmaker.py -i INPUT.pdf --page_range 3-7 -o OUTPUT.pdf
25 Produce COMBINED-pdf from A.pdf's first 7 pages, B.pdf's pages except its first two, and all pages of C.pdf:
26 bookmaker.py -i A.pdf -p start-7 -i B.pdf -p 3-end -i C.pdf -o COMBINED.pdf
28 Crop each page 5cm from the left, 10cm from the bottom, 2cm from the right, and 0cm from the top:
29 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --crops "5,10,2,0"
31 Include all pages from INPUT.pdf, but crop pages 10-20 by 5cm each from bottom and top:
32 bookmaker.py -i INPUT.pdf -c "10-20:0,5,0,5" -o OUTPUT.pdf
34 Same crops from on pages 10-20, but also crop all pages 30 and later by 3cm each from left and right:
35 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "10-20:0,5,0,5" -c "30-end:3,0,3,0"
37 Rotate by 90° pages 3, 5, 7; rotate page 7 once more by 90% (i.e. 180° in total):
38 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --rotate 3 -r 5 -r 7 -r 7
40 Initially declare 5cm crop from the left and 1cm crop from right, but alternate direction between even and odd pages:
41 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "5,0,1,0" -s
43 Quarter each OUTPUT.pdf page to carry 4 pages from INPUT.pdf, draw stencils into inner margins for cuts to carry binding strings:
44 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --nup4
46 Same as --nup4, but define a printable-region margin of 1.3cm to limit the space for the INPUT.pdf pages in OUTPUT.pdf page quarters:
47 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --print_margin 1.3
49 Same as -n, but draw lines marking printable-region margins, page quarts, spine margins:
50 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --analyze
54 For arguments like -p, page numbers are assumed to start with 1 (not 0, which is treated as an invalid page number value).
56 The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations).
58 The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book. Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper):
67 To facilitate this layout, --nup4 also pads the input PDF pages to a total number that is a multiple of 8, by adding empty pages.
69 (To turn this page into a tiny 8-page book, cut the paper in two on its horizontal middle line. Fold the two halves by their vertical middle lines, with pages 3-2 and 7-6 on the folds' insides. This creates two 4-page books of pages 1-4 and pages 5-8. Fold them both closed and (counter-intuitively) put the book of pages 5-8 on top of the other one (creating a temporary page order of 5,6,7,8,1,2,3,4). A binding cut stencil should be visible on the top left of this stack – cut it out (with all pages folded together) to add the same inner-margin upper cut to each page. Turn around your 8-pages stack to find the mirror image of aforementioned stencil on the stack's back's bottom, and cut it out too. Each page now has binding cuts on top and bottom of its inner margins. Swap the order of both books (back to the final page order of 1,2,3,4,5,6,7,8), and you now have an 8-pages book that can be "bound" in its binding cuts through a rubber band or the like. Repeat with the next 8-pages double-page, et cetera. (Actually, with just 8 pages, the paper may curl under the pressure of a rubber band – but go up to 32 pages or so, and the result will become quite stable.)
72 # parser = argparse.ArgumentParser(description="build print-ready book PDF")
73 parser = argparse.ArgumentParser(description=desc, epilog=epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
74 parser._optionals.title = "OPTIONS"
75 parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
76 parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
77 parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '2-9' or '3-end' or 'start-14'")
78 parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
79 parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
80 parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
81 parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
82 parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
83 parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
84 args = parser.parse_args()
86 # some basic input validation
87 def validate_page_range(p_string, err_msg_prefix):
88 err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
89 if '-' not in p_string:
90 raise ValueError("%s: page range string lacks '-': %s" % (err_msg_prefix, p_string))
91 tokens = p_string.split("-")
93 raise ValueError("%s: page range string has too many '-': %s" % (err_msg_prefix, p_string))
94 for i, token in enumerate(tokens):
97 if i == 0 and token == "start":
99 if i == 1 and token == "end":
104 raise ValueError("%s: page range string carries values that are neither integer, nor 'start', nor 'end': %s" % (err_msg_prefix, p_string))
106 raise ValueError("%s: page range string may not carry page numbers <1: %s" % (err_msg_prefix, p_string))
110 start = int(tokens[0])
114 if start > 0 and end > 0 and start > end:
115 raise ValueError("%s: page range starts higher than it ends: %s" % (err_msg_prefix, p_string))
116 for filename in args.input_file:
117 if not os.path.isfile(filename):
118 raise ValueError("-i: %s is not a file" % filename)
120 with open(filename, 'rb') as file:
121 pypdf.PdfReader(file)
122 except pypdf.errors.PdfStreamError:
123 raise ValueError("-i: cannot interpret %s as PDF file" % filename)
125 for p_string in args.page_range:
126 validate_page_range(p_string, "-p")
127 if len(args.page_range) > len(args.input_file):
128 raise ValueError("more -p arguments than -i arguments")
130 for c_string in args.crops:
131 initial_split = c_string.split(':')
132 if len(initial_split) > 2:
133 raise ValueError("-c: cropping string has multiple ':': %s" % c_string)
134 if len(initial_split) > 1:
135 validate_page_range(initial_split[0], "-c")
136 crops = initial_split[1].split(",")
139 crops = initial_split[0].split(",")
141 raise ValueError("-c: cropping should contain three ',': %s" % c_string)
146 raise ValueError("-c: non-number crop in %s" % c_string)
148 for r in args.rotate_page:
152 raise ValueError("-r: non-integer value: %s" % r)
154 raise ValueError("-r: value must not be <1: %s" % r)
156 float(args.print_margin)
158 raise ValueError("-m: non-float value: %s" % arg.print_margin)
160 # select pages from input files
161 def parse_page_range(range_string, pages):
163 end_page = len(pages)
165 start, end = range_string.split('-')
166 if not (len(start) == 0 or start == "start"):
167 start_page = int(start) - 1
168 if not (len(end) == 0 or end == "end"):
170 return start_page, end_page
174 for i, input_file in enumerate(args.input_file):
175 file = open(input_file, 'rb')
176 opened_files += [file]
177 reader = pypdf.PdfReader(file)
179 if args.page_range and len(args.page_range) > i:
180 range_string = args.page_range[i]
181 start_page, end_page = parse_page_range(range_string, reader.pages)
182 if end_page > len(reader.pages): # no need to test start_page cause start_page > end_page is checked above
183 raise ValueError("-p: page range goes beyond pages of input file: %s" % range_string)
184 for old_page_num in range(start_page, end_page):
186 page = reader.pages[old_page_num]
187 pages_to_add += [page]
188 print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num))
190 # we can do some more input validations now that we know how many pages output should have
192 for c_string in args.crops:
193 initial_split = c_string.split(':')
194 if len(initial_split) > 1:
195 start, end = parse_page_range(initial_split[0], pages_to_add)
196 if end > len(pages_to_add):
197 raise ValueError("-c: page range goes beyond number of pages we're building: %s" % initial_split[0])
199 for r in args.rotate_page:
200 if r > len(pages_to_add):
201 raise ValueError("-r: page number beyond number of pages we're building: %d" % r)
205 for rotate_page in args.rotate_page:
206 page = pages_to_add[rotate_page - 1]
207 page.add_transformation(pypdf.Transformation().translate(tx=-a4_width/2, ty=-a4_height/2))
208 page.add_transformation(pypdf.Transformation().rotate(-90))
209 page.add_transformation(pypdf.Transformation().translate(tx=a4_width/2, ty=a4_height/2))
210 print("-r: rotating (by 90°) page", rotate_page)
212 # if necessary, pad pages to multiple of 8
214 mod_to_8 = len(pages_to_add) % 8
216 print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
217 for _ in range(8 - mod_to_8):
218 new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
219 pages_to_add += [new_page]
221 # normalize all pages to portrait A4
222 for page in pages_to_add:
223 if "/Rotate" in page:
224 page.rotate(360 - page["/Rotate"])
225 page.mediabox.left = 0
226 page.mediabox.bottom = 0
227 page.mediabox.top = a4_height
228 page.mediabox.right = a4_width
229 page.cropbox = page.mediabox
231 # determine page crops, zooms, crop symmetry
232 crops_at_page = [(0,0,0,0)]*len(pages_to_add)
233 zoom_at_page = [1]*len(pages_to_add)
235 for crops in args.crops:
236 initial_split = crops.split(':')
237 if len(initial_split) > 1:
238 page_range = initial_split[0]
239 crops = initial_split[1]
242 crops = initial_split[0]
243 start_page, end_page = parse_page_range(page_range, pages_to_add)
244 crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in crops.split(',')]
245 crop_left = crop_left_cm * points_per_cm
246 crop_bottom = crop_bottom_cm * points_per_cm
247 crop_right = crop_right_cm * points_per_cm
248 crop_top = crop_top_cm * points_per_cm
250 print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
252 print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
253 cropped_width = a4_width - crop_left - crop_right
254 cropped_height = a4_height - crop_bottom - crop_top
256 zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
257 zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
258 if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
259 raise ValueError("crops would create opposing zoom directions")
260 elif zoom_horizontal + zoom_vertical > 2:
261 zoom = min(zoom_horizontal, zoom_vertical)
263 zoom = max(zoom_horizontal, zoom_vertical)
264 for page_num in range(start_page, end_page):
265 if args.symmetry and page_num % 2:
266 crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
268 crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
269 zoom_at_page[page_num] = zoom
271 writer = pypdf.PdfWriter()
274 print("building 1-input-page-per-output-page book")
276 for i, page in enumerate(pages_to_add):
277 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[i]
278 zoom = zoom_at_page[i]
279 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left, ty=-crop_bottom))
280 page.add_transformation(pypdf.Transformation().scale(zoom, zoom))
281 cropped_width = a4_width - crop_left - crop_right
282 cropped_height = a4_height - crop_bottom - crop_top
283 page.mediabox.right = cropped_width * zoom
284 page.mediabox.top = cropped_height * zoom
285 writer.add_page(page)
286 odd_page = not odd_page
287 print("built page number %d (of %d)" % (i+1, len(pages_to_add)))
290 print("-n: building 4-input-pages-per-output-page book")
291 print("-m: applying printable-area margin of %.2fcm" % args.print_margin)
293 print("-a: drawing page borders, spine limits")
295 printable_margin = args.print_margin * points_per_cm
296 printable_scale = (a4_width - 2*printable_margin)/a4_width
297 half_width = a4_width / n_pages_per_axis
298 half_height = a4_height / n_pages_per_axis
299 section_scale_factor = 1 / n_pages_per_axis
300 spine_part_of_page = (spine_limit / half_width) / printable_scale
301 bonus_shrink_factor = 1 - spine_part_of_page
307 for page in pages_to_add:
314 new_i_order += [8 * n_eights + 3,
323 new_page_order += [eight_pack[3]] # page front, upper left
324 new_page_order += [eight_pack[0]] # page front, upper right
325 new_page_order += [eight_pack[7]] # page front, lower left
326 new_page_order += [eight_pack[4]] # page front, lower right
327 new_page_order += [eight_pack[1]] # page back, upper left
328 new_page_order += [eight_pack[2]] # page back, upper right
329 new_page_order += [eight_pack[5]] # page back, lower left
330 new_page_order += [eight_pack[6]] # page back, lower right
334 for j, page in enumerate(new_page_order):
336 new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
338 # in-section transformations: align pages on top, left-hand pages to left, right-hand to right
339 new_i = new_i_order[j]
340 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[new_i]
341 zoom = zoom_at_page[new_i]
342 page.add_transformation(pypdf.Transformation().translate(ty=(a4_height / zoom - (a4_height - crop_top))))
344 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left))
345 elif i == 1 or i == 3:
346 page.add_transformation(pypdf.Transformation().translate(tx=(a4_width / zoom - (a4_width - crop_right))))
347 page.add_transformation(pypdf.Transformation().scale(zoom * bonus_shrink_factor, zoom * bonus_shrink_factor))
349 page.add_transformation(pypdf.Transformation().translate(ty=-2*printable_margin/printable_scale))
351 # outer section transformations
352 page.add_transformation(pypdf.Transformation().translate(ty=(1-bonus_shrink_factor)*a4_height))
354 y_section = a4_height
355 page.mediabox.bottom = half_height
356 page.mediabox.top = a4_height
359 page.mediabox.bottom = 0
360 page.mediabox.top = half_height
363 page.mediabox.left = 0
364 page.mediabox.right = half_width
366 page.add_transformation(pypdf.Transformation().translate(tx=(1-bonus_shrink_factor)*a4_width))
368 page.mediabox.left = half_width
369 page.mediabox.right = a4_width
370 page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
371 page.add_transformation(pypdf.Transformation().scale(section_scale_factor, section_scale_factor))
372 new_page.merge_page(page)
374 print("merged page number %d (of %d)" % (page_count, len(pages_to_add)))
377 from reportlab.pdfgen import canvas
380 packet = io.BytesIO()
381 c = canvas.Canvas(packet, pagesize=A4)
383 c.line(0, a4_height, a4_width, a4_height)
384 c.line(0, half_height, a4_width, half_height)
385 c.line(0, 0, a4_width, 0)
386 c.line(0, a4_height, 0, 0)
387 c.line(half_width, a4_height, half_width, 0)
388 c.line(a4_width, a4_height, a4_width, 0)
390 new_pdf = pypdf.PdfReader(packet)
391 new_page.merge_page(new_pdf.pages[0])
392 printable_offset_x = printable_margin
393 printable_offset_y = printable_margin * a4_height / a4_width
394 new_page.add_transformation(pypdf.Transformation().scale(printable_scale, printable_scale))
395 new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
396 x_left_spine_limit = half_width * bonus_shrink_factor
397 x_right_spine_limit = a4_width - x_left_spine_limit
398 if args.analyze or front_page:
399 packet = io.BytesIO()
400 c = canvas.Canvas(packet, pagesize=A4)
404 c.line(x_left_spine_limit, a4_height, x_left_spine_limit, 0)
405 c.line(x_right_spine_limit, a4_height, x_right_spine_limit, 0)
409 start_up_left_left_x = x_left_spine_limit - 0.5 * cut_width
410 start_up_left_right_x = x_left_spine_limit + 0.5 * cut_width
411 middle_point_up_left_y = half_height + middle_point_depth
412 end_point_up_left_y = half_height + cut_depth
413 c.line(start_up_left_right_x, half_height, x_left_spine_limit, end_point_up_left_y)
414 c.line(x_left_spine_limit, end_point_up_left_y, x_left_spine_limit, middle_point_up_left_y)
415 c.line(x_left_spine_limit, middle_point_up_left_y, start_up_left_left_x, half_height)
417 start_down_right_left_x = x_right_spine_limit - 0.5 * cut_width
418 start_down_right_right_x = x_right_spine_limit + 0.5 * cut_width
419 middle_point_down_right_y = half_height - middle_point_depth
420 end_point_down_right_y = half_height - cut_depth
421 c.line(start_down_right_left_x, half_height, x_right_spine_limit, end_point_down_right_y)
422 c.line(x_right_spine_limit, end_point_down_right_y, x_right_spine_limit, middle_point_down_right_y)
423 c.line(x_right_spine_limit, middle_point_down_right_y, start_down_right_right_x, half_height)
425 if args.analyze or front_page:
427 new_pdf = pypdf.PdfReader(packet)
428 new_page.merge_page(new_pdf.pages[0])
429 writer.add_page(new_page)
431 front_page = not front_page
434 for file in opened_files:
436 with open(args.output_file, 'wb') as output_file:
437 writer.write(output_file)