6 from reportlab.lib.pagesizes import A4
7 a4_width, a4_height = A4
8 points_per_cm = 10 * 72 / 25.4
9 cut_depth = 1.95 * points_per_cm
10 cut_width = 1.05 * points_per_cm
11 middle_point_depth = 0.4 * points_per_cm
12 spine_limit = 1 * points_per_cm
14 desc = """bookmaker.py is a helper for optimizing PDFs of books for the production of small self-printed, self-bound physical books To this goal it offers various PDF manipulation options potentially that can also be used indepéndently and for other purposes.
19 Concatenate two PDFs A.pdf and B.pdf to COMBINED.pdf:
20 bookmaker.py --input_file A.pdf --input_file B.pdf --output_file COMBINED.pdf
22 Produce OUTPUT.pdf containing all pages of (inclusive) page number range 3-7 from INPUT.pdf:
23 bookmaker.py -i INPUT.pdf --page_range 3-7 -o OUTPUT.pdf
25 Produce COMBINED-pdf from A.pdf's first 7 pages, B.pdf's pages except its first two, and all pages of C.pdf:
26 bookmaker.py -i A.pdf -p start-7 -i B.pdf -p 3-end -i C.pdf -o COMBINED.pdf
28 Crop each page 5cm from the left, 10cm from the bottom, 2cm from the right, and 0cm from the top:
29 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --crops "5,10,2,0"
31 Include all pages from INPUT.pdf, but crop pages 10-20 by 5cm each from bottom and top:
32 bookmaker.py -i INPUT.pdf -c "10-20:0,5,0,5" -o OUTPUT.pdf
34 Same crops from on pages 10-20, but also crop all pages 30 and later by 3cm each from left and right:
35 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "10-20:0,5,0,5" -c "30-end:3,0,3,0"
37 Rotate by 90° pages 3, 5, 7; rotate page 7 once more by 90% (i.e. 180° in total):
38 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --rotate 3 -r 5 -r 7 -r 7
40 Initially declare 5cm crop from the left and 1cm crop from right, but alternate direction between even and odd pages:
41 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -c "5,0,1,0" -s
43 Quarter each OUTPUT.pdf page to carry 4 pages from INPUT.pdf, draw stencils into inner margins for cuts to carry binding strings:
44 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf --nup4
46 Same as --nup4, but define a printable-region margin of 1.3cm to limit the space for the INPUT.pdf pages in OUTPUT.pdf page quarters:
47 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --print_margin 1.3
49 Same as -n, but draw lines marking printable-region margins, page quarts, spine margins:
50 bookmaker.py -i INPUT.pdf -o OUTPUT.pdf -n --analyze
54 The target page shape so far is assumed to be A4 in portrait orientation; bookmaker.py normalizes all pages to this format before applying crops, and removes any source PDF /Rotate commands (for their production of landscape orientations).
56 The --nup4 quartering puts pages into a specific order optimized for no-tumble duplex print-outs that can easily be folded and cut into pages of a small A6 book. Each unit of 8 pages from the source PDF is mapped thus onto two subsequent pages (i.e. front and back of a printed A4 paper):
65 To facilitate this layout, --nup4 also pads the input PDF pages to a total number that is a multiple of 8, by adding empty pages.
67 (To turn this page into a tiny 8-page book, cut the paper in two on its horizontal middle line. Fold the two halves by their vertical middle lines, with pages 3-2 and 7-6 on the folds' insides. This creates two 4-page books of pages 1-4 and pages 5-8. Fold them both closed and (counter-intuitively) put the book of pages 5-8 on top of the other one (creating a temporary page order of 5,6,7,8,1,2,3,4). A binding cut stencil should be visible on the top left of this stack – cut it out (with all pages folded together) to add the same inner-margin upper cut to each page. Turn around your 8-pages stack to find the mirror image of aforementioned stencil on the stack's back's bottom, and cut it out too. Each page now has binding cuts on top and bottom of its inner margins. Swap the order of both books (back to the final page order of 1,2,3,4,5,6,7,8), and you now have an 8-pages book that can be "bound" in its binding cuts through a rubber band or the like. Repeat with the next 8-pages double-page, et cetera. (Actually, with just 8 pages, the paper may curl under the pressure of a rubber band – but go up to 32 pages or so, and the result will become quite stable.)
70 # parser = argparse.ArgumentParser(description="build print-ready book PDF")
71 parser = argparse.ArgumentParser(description=desc, epilog=epilogue, formatter_class=argparse.RawDescriptionHelpFormatter)
72 parser._optionals.title = "OPTIONS"
73 parser.add_argument("-i", "--input_file", action="append", required=True, help="input PDF file")
74 parser.add_argument("-o", "--output_file", required=True, help="output PDF file")
75 parser.add_argument("-p", "--page_range", action="append", help="page range, e.g., '3-end'")
76 parser.add_argument("-c", "--crops", action="append", help="cm crops left, bottom, right, top – e.g., '10,10,10,10'; prefix with ':'-delimited page range to limit effect")
77 parser.add_argument("-r", "--rotate_page", type=int, action="append", help="rotate page of number by 90° (usable multiple times on same page!)")
78 parser.add_argument("-s", "--symmetry", action="store_true", help="alternate horizontal crops between odd and even pages")
79 parser.add_argument("-n", "--nup4", action='store_true', help="puts 4 input pages onto 1 output page, adds binding cut stencil")
80 parser.add_argument("-a", "--analyze", action="store_true", help="in --nup4, print lines identifying spine, page borders")
81 parser.add_argument("-m", "--print_margin", type=float, default=0.43, help="print margin for --nup4 in cm (default 0.43)")
82 args = parser.parse_args()
84 # some basic input validation
85 for filename in args.input_file:
86 if not os.path.isfile(filename):
87 raise ValueError("-i: %s is not a file" % filename)
89 with open(filename, 'rb') as file:
91 except pypdf.errors.PdfStreamError:
92 raise ValueError("-i: cannot interpret %s as PDF file" % filename)
93 def validate_page_range(p_string, err_msg_prefix):
94 err_msg = "%s: invalid page range string: %s" % (err_msg_prefix, p_string)
95 if '-' not in p_string:
96 raise ValueError(err_msg)
97 tokens = p_string.split("-")
99 raise ValueError(err_msg)
100 for i, token in enumerate(tokens):
103 if i == 0 and token == "start":
105 if i == 1 and token == "end":
110 raise ValueError(err_msg)
112 for p_string in args.page_range:
113 validate_page_range(p_string, "-p")
114 if len(args.page_range) > len(args.input_file):
115 raise ValueError("more -p arguments than -i arguments")
117 for c_string in args.crops:
118 initial_split = c_string.split(':')
119 if len(initial_split) > 2:
120 raise ValueError("-c: cropping string has multiple ':': %s" % c_string)
121 if len(initial_split) > 1:
122 validate_page_range(initial_split[0], "-c")
123 crops = initial_split[1].split(",")
126 crops = initial_split[0].split(",")
128 raise ValueError("-c: cropping should contain three ',': %s" % c_string)
133 raise ValueError("-c: non-number crop in %s" % c_string)
135 for r in arg.rotate_page:
139 raise ValueError("-r: non-integer value: %s" % r)
141 float(args.print_margin)
143 raise ValueError("-m: non-float value: %s" % arg.print_margin)
146 # select pages from input files
147 def parse_page_range(range_string, pages):
149 end_page = len(pages)
151 start, end = range_string.split('-')
152 if not (len(start) == 0 or start == "start"):
153 start_page = int(start) - 1
154 if not (len(end) == 0 or end == "end"):
156 return start_page, end_page
160 for i, input_file in enumerate(args.input_file):
161 file = open(input_file, 'rb')
162 opened_files += [file]
163 reader = pypdf.PdfReader(file)
165 if args.page_range and len(args.page_range) > i:
166 range_string = args.page_range[i]
167 start_page, end_page = parse_page_range(range_string, reader.pages)
168 for old_page_num in range(start_page, end_page):
170 page = reader.pages[old_page_num]
171 pages_to_add += [page]
172 print("-i, -p: read in %s page number %d as new page %d" % (input_file, old_page_num+1, new_page_num))
174 # if necessary, pad pages to multiple of 8
176 mod_to_8 = len(pages_to_add) % 8
178 print("-n: number of input pages %d not multiple of 8, padding to that" % len(pages_to_add))
179 for _ in range(8 - mod_to_8):
180 new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
181 pages_to_add += [new_page]
185 for rotate_page in args.rotate_page:
186 page = pages_to_add[rotate_page - 1]
187 page.add_transformation(pypdf.Transformation().translate(tx=-a4_width/2, ty=-a4_height/2))
188 page.add_transformation(pypdf.Transformation().rotate(-90))
189 page.add_transformation(pypdf.Transformation().translate(tx=a4_width/2, ty=a4_height/2))
190 print("-r: rotating (by 90°) page", rotate_page)
192 # normalize all pages to portrait A4
193 for page in pages_to_add:
194 if "/Rotate" in page:
195 page.rotate(360 - page["/Rotate"])
196 page.mediabox.left = 0
197 page.mediabox.bottom = 0
198 page.mediabox.top = a4_height
199 page.mediabox.right = a4_width
200 page.cropbox = page.mediabox
202 # determine page crops, zooms, crop symmetry
203 crops_at_page = [(0,0,0,0)]*len(pages_to_add)
204 zoom_at_page = [1]*len(pages_to_add)
206 for crops in args.crops:
207 initial_split = crops.split(':')
208 if len(initial_split) > 1:
209 page_range = initial_split[0]
210 crops = initial_split[1]
213 crops = initial_split[0]
214 start_page, end_page = parse_page_range(page_range, pages_to_add)
215 crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm = [float(x) for x in crops.split(',')]
216 crop_left = crop_left_cm * points_per_cm
217 crop_bottom = crop_bottom_cm * points_per_cm
218 crop_right = crop_right_cm * points_per_cm
219 crop_top = crop_top_cm * points_per_cm
221 print("-c, -t: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm (but alternating left and right crop between even and odd pages)" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
223 print("-c: to pages %d to %d applying crops: left %.2fcm, bottom %.2fcm, right %.2fcm, top %.2fcm" % (start_page + 1, end_page, crop_left_cm, crop_bottom_cm, crop_right_cm, crop_top_cm))
224 cropped_width = a4_width - crop_left - crop_right
225 cropped_height = a4_height - crop_bottom - crop_top
227 zoom_horizontal = a4_width / (a4_width - crop_left - crop_right)
228 zoom_vertical = a4_height / (a4_height - crop_bottom - crop_top)
229 if (zoom_horizontal > 1 and zoom_vertical < 1) or (zoom_horizontal < 1 and zoom_vertical > 1):
230 raise ValueError("crops would create opposing zoom directions")
231 elif zoom_horizontal + zoom_vertical > 2:
232 zoom = min(zoom_horizontal, zoom_vertical)
234 zoom = max(zoom_horizontal, zoom_vertical)
235 for page_num in range(start_page, end_page):
236 if args.symmetry and page_num % 2:
237 crops_at_page[page_num] = (crop_right, crop_bottom, crop_left, crop_top)
239 crops_at_page[page_num] = (crop_left, crop_bottom, crop_right, crop_top)
240 zoom_at_page[page_num] = zoom
242 writer = pypdf.PdfWriter()
245 print("building 1-input-page-per-output-page book")
247 for i, page in enumerate(pages_to_add):
248 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[i]
249 zoom = zoom_at_page[i]
250 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left, ty=-crop_bottom))
251 page.add_transformation(pypdf.Transformation().scale(zoom, zoom))
252 cropped_width = a4_width - crop_left - crop_right
253 cropped_height = a4_height - crop_bottom - crop_top
254 page.mediabox.right = cropped_width * zoom
255 page.mediabox.top = cropped_height * zoom
256 writer.add_page(page)
257 odd_page = not odd_page
258 print("built page number %d (of %d)" % (i+1, len(pages_to_add)))
261 print("-n: building 4-input-pages-per-output-page book")
262 print("-m: applying printable-area margin of %.2fcm" % args.print_margin)
264 print("-a: drawing page borders, spine limits")
266 printable_margin = args.print_margin * points_per_cm
267 printable_scale = (a4_width - 2*printable_margin)/a4_width
268 half_width = a4_width / n_pages_per_axis
269 half_height = a4_height / n_pages_per_axis
270 section_scale_factor = 1 / n_pages_per_axis
271 spine_part_of_page = (spine_limit / half_width) / printable_scale
272 bonus_shrink_factor = 1 - spine_part_of_page
278 for page in pages_to_add:
285 new_i_order += [8 * n_eights + 3,
294 new_page_order += [eight_pack[3]] # page front, upper left
295 new_page_order += [eight_pack[0]] # page front, upper right
296 new_page_order += [eight_pack[7]] # page front, lower left
297 new_page_order += [eight_pack[4]] # page front, lower right
298 new_page_order += [eight_pack[1]] # page back, upper left
299 new_page_order += [eight_pack[2]] # page back, upper right
300 new_page_order += [eight_pack[5]] # page back, lower left
301 new_page_order += [eight_pack[6]] # page back, lower right
305 for j, page in enumerate(new_page_order):
307 new_page = pypdf.PageObject.create_blank_page(width=a4_width, height=a4_height)
309 # in-section transformations: align pages on top, left-hand pages to left, right-hand to right
310 new_i = new_i_order[j]
311 crop_left, crop_bottom, crop_right, crop_top = crops_at_page[new_i]
312 zoom = zoom_at_page[new_i]
313 page.add_transformation(pypdf.Transformation().translate(ty=(a4_height / zoom - (a4_height - crop_top))))
315 page.add_transformation(pypdf.Transformation().translate(tx=-crop_left))
316 elif i == 1 or i == 3:
317 page.add_transformation(pypdf.Transformation().translate(tx=(a4_width / zoom - (a4_width - crop_right))))
318 page.add_transformation(pypdf.Transformation().scale(zoom * bonus_shrink_factor, zoom * bonus_shrink_factor))
320 page.add_transformation(pypdf.Transformation().translate(ty=-2*printable_margin/printable_scale))
322 # outer section transformations
323 page.add_transformation(pypdf.Transformation().translate(ty=(1-bonus_shrink_factor)*a4_height))
325 y_section = a4_height
326 page.mediabox.bottom = half_height
327 page.mediabox.top = a4_height
330 page.mediabox.bottom = 0
331 page.mediabox.top = half_height
334 page.mediabox.left = 0
335 page.mediabox.right = half_width
337 page.add_transformation(pypdf.Transformation().translate(tx=(1-bonus_shrink_factor)*a4_width))
339 page.mediabox.left = half_width
340 page.mediabox.right = a4_width
341 page.add_transformation(pypdf.Transformation().translate(tx=x_section, ty=y_section))
342 page.add_transformation(pypdf.Transformation().scale(section_scale_factor, section_scale_factor))
343 new_page.merge_page(page)
345 print("merged page number %d (of %d)" % (page_count, len(pages_to_add)))
348 from reportlab.pdfgen import canvas
351 packet = io.BytesIO()
352 c = canvas.Canvas(packet, pagesize=A4)
354 c.line(0, a4_height, a4_width, a4_height)
355 c.line(0, half_height, a4_width, half_height)
356 c.line(0, 0, a4_width, 0)
357 c.line(0, a4_height, 0, 0)
358 c.line(half_width, a4_height, half_width, 0)
359 c.line(a4_width, a4_height, a4_width, 0)
361 new_pdf = pypdf.PdfReader(packet)
362 new_page.merge_page(new_pdf.pages[0])
363 printable_offset_x = printable_margin
364 printable_offset_y = printable_margin * a4_height / a4_width
365 new_page.add_transformation(pypdf.Transformation().scale(printable_scale, printable_scale))
366 new_page.add_transformation(pypdf.Transformation().translate(tx=printable_offset_x, ty=printable_offset_y))
367 x_left_spine_limit = half_width * bonus_shrink_factor
368 x_right_spine_limit = a4_width - x_left_spine_limit
369 if args.analyze or front_page:
370 packet = io.BytesIO()
371 c = canvas.Canvas(packet, pagesize=A4)
375 c.line(x_left_spine_limit, a4_height, x_left_spine_limit, 0)
376 c.line(x_right_spine_limit, a4_height, x_right_spine_limit, 0)
380 start_up_left_left_x = x_left_spine_limit - 0.5 * cut_width
381 start_up_left_right_x = x_left_spine_limit + 0.5 * cut_width
382 middle_point_up_left_y = half_height + middle_point_depth
383 end_point_up_left_y = half_height + cut_depth
384 c.line(start_up_left_right_x, half_height, x_left_spine_limit, end_point_up_left_y)
385 c.line(x_left_spine_limit, end_point_up_left_y, x_left_spine_limit, middle_point_up_left_y)
386 c.line(x_left_spine_limit, middle_point_up_left_y, start_up_left_left_x, half_height)
388 start_down_right_left_x = x_right_spine_limit - 0.5 * cut_width
389 start_down_right_right_x = x_right_spine_limit + 0.5 * cut_width
390 middle_point_down_right_y = half_height - middle_point_depth
391 end_point_down_right_y = half_height - cut_depth
392 c.line(start_down_right_left_x, half_height, x_right_spine_limit, end_point_down_right_y)
393 c.line(x_right_spine_limit, end_point_down_right_y, x_right_spine_limit, middle_point_down_right_y)
394 c.line(x_right_spine_limit, middle_point_down_right_y, start_down_right_right_x, half_height)
396 if args.analyze or front_page:
398 new_pdf = pypdf.PdfReader(packet)
399 new_page.merge_page(new_pdf.pages[0])
400 writer.add_page(new_page)
402 front_page = not front_page
405 for file in opened_files:
407 with open(args.output_file, 'wb') as output_file:
408 writer.write(output_file)