From: Plom Heller Date: Sat, 4 Apr 2026 10:21:55 +0000 (+0200) Subject: Reorganize command args validation/parsing and files reading. X-Git-Url: https://plomlompom.com/repos/booking/%22https:/validator.w3.org/balance?a=commitdiff_plain;h=e572cfa58e22a3f03f4493af6052f11e3d826272;p=bookmaker Reorganize command args validation/parsing and files reading. --- diff --git a/bookmaker.py b/bookmaker.py index 36fe62b..edd90bb 100755 --- a/bookmaker.py +++ b/bookmaker.py @@ -162,15 +162,43 @@ def parse_args(): return parser.parse_args() -def validate_inputs_first_pass(args): - for filename in args.input_file: - if not os.path.isfile(filename): - raise ArgFail('i', f'{filename} is not a file') +def validate_args_syntax(args): + 'Check command args against general syntax expectations.' + def validate_page_range(pr_string, arg_char): + prefix = 'page range string' + if '-' not in pr_string: + raise ArgFail(arg_char, f'{prefix} lacks "-": {pr_string}') + tokens = pr_string.split('-') + if len(tokens) > 2: + raise ArgFail(arg_char, f'{prefix} has too many "-": {pr_string}') + for i, token in enumerate(tokens): + if token == '': + continue + if i == 0 and token == 'start': + continue + if i == 1 and token == 'end': + continue + try: + int(token) + except ValueError: + raise ArgFail(arg_char, + f'{prefix} carries value neither integer, ' + f'nor "start", nor "end": {pr_string}') + if int(token) < 1: + raise ArgFail(arg_char, + f'{prefix} carries page number <1: {pr_string}') + start = -1 + end = -1 try: - with open(filename, 'rb') as file: - pypdf.PdfReader(file) - except pypdf.errors.PdfStreamError: - raise ArgFail('i', f'cannot interpret {filename} as PDF file') + start = int(tokens[0]) + end = int(tokens[1]) + except ValueError: + pass + if start > end > 0: + raise ArgFail( + arg_char, + f'{prefix} has higher start than end value: {pr_string}') + if args.page_range: for p_string in args.page_range: validate_page_range(p_string, 'p') @@ -211,43 +239,9 @@ def validate_inputs_first_pass(args): raise ArgFail('m', f'non-float value: {args.print_margin}') -def validate_page_range(p_string, arg_char): - prefix = 'page range string' - if '-' not in p_string: - raise ArgFail(arg_char, f'{prefix} lacks "-": {p_string}') - tokens = p_string.split('-') - if len(tokens) > 2: - raise ArgFail(arg_char, f'{prefix} has too many "-": {p_string}') - for i, token in enumerate(tokens): - if token == '': - continue - if i == 0 and token == 'start': - continue - if i == 1 and token == 'end': - continue - try: - int(token) - except ValueError: - raise ArgFail(arg_char, - f'{prefix} carries value neither integer, ' - f'nor "start", nor "end": {p_string}') - if int(token) < 1: - raise ArgFail(arg_char, - f'{prefix} carries page number <1: {p_string}') - start = -1 - end = -1 - try: - start = int(tokens[0]) - end = int(tokens[1]) - except ValueError: - pass - if start > end > 0: - raise ArgFail(arg_char, - f'{prefix} has higher start than end value: {p_string}') - - def split_crops_string(c_string): - initial_split = c_string.split(':') + 'If c_string contains ":" return before and after, else None and c_string.' + initial_split = c_string.split(':', maxsplit=1) if len(initial_split) > 1: page_range = initial_split[0] crops = initial_split[1] @@ -258,30 +252,39 @@ def split_crops_string(c_string): def parse_page_range(range_string, pages): - start_page = 0 - end_page = len(pages) + 'Based on actual pages size read range_string into range limit indices.' + idx_start = 0 + idx_after = len(pages) if range_string: start, end = range_string.split('-') if not (len(start) == 0 or start == 'start'): - start_page = int(start) - 1 - if not (len(end) == 0 or end == 'ebd'): - end_page = int(end) - return start_page, end_page + idx_start = int(start) - 1 + if not (len(end) == 0 or end == 'end'): + idx_after = int(end) + return idx_start, idx_after -def read_inputs_to_pagelist(args_input_file, args_page_range): +def args_to_pagelist(args_input_file, args_page_range): + 'Follow args_input_file ranged by args_page_range into pages, open files.' pages_to_add = [] opened_files = [] new_page_num = 0 - for i, input_file in enumerate(args_input_file): - file = open(input_file, 'rb') + for i, filename in enumerate(args_input_file): + if not os.path.isfile(filename): + raise ArgFail('i', f'{filename} is not a file') + file = open(filename, 'rb') opened_files += [file] - reader = pypdf.PdfReader(file) + try: + reader = pypdf.PdfReader(file) + except pypdf.errors.PdfStreamError: + for file in opened_files: + file.close() + raise ArgFail('i', f'cannot interpret {filename} as PDF file') range_string = None if args_page_range and len(args_page_range) > i: range_string = args_page_range[i] - start_page, end_page = parse_page_range(range_string, reader.pages) - for old_page_num in range(start_page, end_page): + for old_page_num in range(*parse_page_range(range_string, + reader.pages)): new_page_num += 1 if old_page_num >= len(reader.pages): page = pypdf.PageObject.create_blank_page(width=A4_WIDTH, @@ -289,21 +292,21 @@ def read_inputs_to_pagelist(args_input_file, args_page_range): else: page = reader.pages[old_page_num] pages_to_add += [page] - print(f'-i, -p: read in {input_file} page number {old_page_num+1} ' + print(f'-i, -p: read in {filename} page number {old_page_num+1} ' f'as new page {new_page_num}') return pages_to_add, opened_files -def validate_inputs_second_pass(args, pages_to_add): +def validate_ranges(args, pages_to_add): + 'Check command args\' ranges fit into pages_to_add count.' if args.crops: for c_string in args.crops: - page_range, _ = split_crops_string(c_string) - if page_range: - _, end = parse_page_range(page_range, pages_to_add) - if end > len(pages_to_add): - raise ArgFail('c', - 'page range goes beyond number of pages ' - f'we\'re building: {page_range}') + if (page_range := split_crops_string(c_string)[0])\ + and parse_page_range(page_range, + pages_to_add)[1] > len(pages_to_add): + raise ArgFail('c', + 'page range goes beyond number of pages ' + f'we\'re building: {page_range}') if args.rotate_page: for r in args.rotate_page: if r > len(pages_to_add): @@ -355,14 +358,14 @@ def collect_per_page_crops_and_zooms(args_crops, if args_crops: for c_string in args_crops: page_range, crops = split_crops_string(c_string) - start_page, end_page = parse_page_range(page_range, pages_to_add) + idx_start, idx_after = parse_page_range(page_range, pages_to_add) prefix = '-c, -t' if args_symmetry else '-c' suffix = (' (but alternating left and right crop ' 'between even and odd pages)') if args_symmetry else '' page_crop = PageCrop(*crops.split(',')) - print(f'{prefix}: to pages {start_page + 1} to {end_page} ' + print(f'{prefix}: to pages {idx_start + 1}:{idx_after} ' f'applying crop: {page_crop.format_in_cm}{suffix}') - for page_num in range(start_page, end_page): + for page_num in range(idx_start, idx_after): if args_symmetry and page_num % 2: crop_at_page[page_num] = page_crop.give_mirror() else: @@ -556,16 +559,16 @@ def draw_cut(canvas, x_spine_limit, direction): def main(): args = parse_args() - validate_inputs_first_pass(args) + validate_args_syntax(args) if args.nup4: try: from reportlab.pdfgen.canvas import Canvas except ImportError: raise ArgFail('n', 'need reportlab.pdfgen.canvas installed for --nup4') - pages_to_add, opened_files = read_inputs_to_pagelist(args.input_file, - args.page_range) - validate_inputs_second_pass(args, pages_to_add) + pages_to_add, opened_files = args_to_pagelist(args.input_file, + args.page_range) + validate_ranges(args, pages_to_add) rotate_pages(args.rotate_page, pages_to_add) if args.nup4: pad_pages_to_multiple_of_8(pages_to_add)