From 17cf4888e2a440fb97456c8b5fd771dc5d5d9eae Mon Sep 17 00:00:00 2001 From: Christian Heller <c.heller@plomlompom.de> Date: Sat, 4 Jan 2025 22:12:16 +0100 Subject: [PATCH] Add table schema validation. --- src/migrations/0_init.sql | 41 +++++++------ src/migrations/1_add_files_last_updated.sql | 2 +- src/migrations/2_add_files_sha512.sql | 2 +- src/migrations/3_files_redo.sql | 20 +++---- src/migrations/4_add_files_sha512_blob.sql | 2 +- src/migrations/5_files_redo.sql | 12 ++-- src/migrations/6_add_files_tags.sql | 2 +- src/migrations/new_init.sql | 46 +++++++-------- src/ytplom/db.py | 64 ++++++++++++++++++--- src/ytplom/migrations.py | 2 +- 10 files changed, 120 insertions(+), 73 deletions(-) diff --git a/src/migrations/0_init.sql b/src/migrations/0_init.sql index 5b45769..24c4a3f 100644 --- a/src/migrations/0_init.sql +++ b/src/migrations/0_init.sql @@ -1,9 +1,27 @@ -CREATE TABLE yt_queries ( +CREATE TABLE "files" ( + rel_path TEXT PRIMARY KEY, + yt_id TEXT NOT NULL DEFAULT "", + flags INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (yt_id) REFERENCES "yt_videos"(id) +); +CREATE TABLE "quota_costs" ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + cost INT NOT NULL +); +CREATE TABLE "yt_queries" ( id TEXT PRIMARY KEY, text TEXT NOT NULL, retrieved_at TEXT NOT NULL ); -CREATE TABLE yt_videos ( +CREATE TABLE "yt_query_results" ( + query_id TEXT NOT NULL, + video_id TEXT NOT NULL, + PRIMARY KEY (query_id, video_id), + FOREIGN KEY (query_id) REFERENCES "yt_queries"(id), + FOREIGN KEY (video_id) REFERENCES "yt_videos"(id) +); +CREATE TABLE "yt_videos" ( id TEXT PRIMARY KEY, title TEXT NOT NULL, description TEXT NOT NULL, @@ -11,22 +29,3 @@ CREATE TABLE yt_videos ( duration TEXT NOT NULL, definition TEXT NOT NULL ); -CREATE TABLE yt_query_results ( - query_id TEXT NOT NULL, - video_id TEXT NOT NULL, - PRIMARY KEY (query_id, video_id), - FOREIGN KEY (query_id) REFERENCES yt_queries(id), - FOREIGN KEY (video_id) REFERENCES yt_videos(id) -); -CREATE TABLE quota_costs ( - id TEXT PRIMARY KEY, - timestamp TEXT NOT NULL, - cost INT NOT NULL -); -CREATE TABLE files ( - rel_path TEXT PRIMARY KEY, - yt_id TEXT NOT NULL DEFAULT "", - flags INTEGER NOT NULL DEFAULT 0, - FOREIGN KEY (yt_id) REFERENCES yt_videos(id) -); - diff --git a/src/migrations/1_add_files_last_updated.sql b/src/migrations/1_add_files_last_updated.sql index 678d843..4d2427e 100644 --- a/src/migrations/1_add_files_last_updated.sql +++ b/src/migrations/1_add_files_last_updated.sql @@ -1 +1 @@ -ALTER TABLE files ADD COLUMN last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456"; +ALTER TABLE "files" ADD COLUMN last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456"; diff --git a/src/migrations/2_add_files_sha512.sql b/src/migrations/2_add_files_sha512.sql index 36d99e1..33f08d8 100644 --- a/src/migrations/2_add_files_sha512.sql +++ b/src/migrations/2_add_files_sha512.sql @@ -1 +1 @@ -ALTER TABLE files ADD COLUMN sha512_digest TEXT NOT NULL DEFAULT ""; +ALTER TABLE "files" ADD COLUMN sha512_digest TEXT NOT NULL DEFAULT ""; diff --git a/src/migrations/3_files_redo.sql b/src/migrations/3_files_redo.sql index bf485bb..9c0ff87 100644 --- a/src/migrations/3_files_redo.sql +++ b/src/migrations/3_files_redo.sql @@ -1,32 +1,32 @@ -CREATE TEMPORARY TABLE files_backup ( +CREATE TEMPORARY TABLE "files_backup" ( rel_path TEXT PRIMARY KEY, yt_id TEXT NOT NULL DEFAULT "", flags INTEGER NOT NULL DEFAULT 0, last_update TEXT NOT NULL DEFAULT "2000-01-01 12:00:00.123456", sha512_digest TEXT NOT NULL, - FOREIGN KEY (yt_id) REFERENCES yt_videos(id) + FOREIGN KEY (yt_id) REFERENCES "yt_videos"(id) ); -INSERT INTO files_backup SELECT +INSERT INTO "files_backup" SELECT rel_path, yt_id, flags, last_update, sha512_digest -FROM files; -DROP TABLE files; -CREATE TABLE files ( +FROM "files"; +DROP TABLE "files"; +CREATE TABLE "files" ( sha512_digest TEXT PRIMARY KEY, rel_path TEXT NOT NULL, flags INTEGER NOT NULL DEFAULT 0, yt_id TEXT, last_update TEXT NOT NULL, - FOREIGN KEY (yt_id) REFERENCES yt_videos(id) + FOREIGN KEY (yt_id) REFERENCES "yt_videos"(id) ); -INSERT INTO files SELECT +INSERT INTO "files" SELECT sha512_digest, rel_path, flags, yt_id, last_update -FROM files_backup; -DROP TABLE files_backup; +FROM "files_backup"; +DROP TABLE "files_backup"; diff --git a/src/migrations/4_add_files_sha512_blob.sql b/src/migrations/4_add_files_sha512_blob.sql index c382740..4ed2299 100644 --- a/src/migrations/4_add_files_sha512_blob.sql +++ b/src/migrations/4_add_files_sha512_blob.sql @@ -1 +1 @@ -ALTER TABLE files ADD COLUMN sha512_blob BLOB; +ALTER TABLE "files" ADD COLUMN sha512_blob BLOB; diff --git a/src/migrations/5_files_redo.sql b/src/migrations/5_files_redo.sql index a9c30fc..44bbb92 100644 --- a/src/migrations/5_files_redo.sql +++ b/src/migrations/5_files_redo.sql @@ -1,17 +1,17 @@ -CREATE TABLE files_new ( +ALTER TABLE "files" RENAME TO "files_old"; +CREATE TABLE "files" ( digest BLOB PRIMARY KEY, rel_path TEXT NOT NULL, flags INTEGER NOT NULL DEFAULT 0, yt_id TEXT, last_update TEXT NOT NULL, - FOREIGN KEY (yt_id) REFERENCES yt_videos(id) + FOREIGN KEY (yt_id) REFERENCES "yt_videos"(id) ); -INSERT INTO files_new SELECT +INSERT INTO "files" SELECT sha512_blob, rel_path, flags, yt_id, last_update -FROM files; -DROP TABLE files; -ALTER TABLE files_new RENAME TO files; +FROM "files_old"; +DROP TABLE "files_old"; diff --git a/src/migrations/6_add_files_tags.sql b/src/migrations/6_add_files_tags.sql index 7d70c18..4077830 100644 --- a/src/migrations/6_add_files_tags.sql +++ b/src/migrations/6_add_files_tags.sql @@ -1 +1 @@ -ALTER TABLE files ADD COLUMN tags TEXT NOT NULL DEFAULT ""; +ALTER TABLE "files" ADD COLUMN tags TEXT NOT NULL DEFAULT ""; diff --git a/src/migrations/new_init.sql b/src/migrations/new_init.sql index ad11670..ae5e871 100644 --- a/src/migrations/new_init.sql +++ b/src/migrations/new_init.sql @@ -1,9 +1,30 @@ -CREATE TABLE yt_queries ( +CREATE TABLE "files" ( + digest BLOB PRIMARY KEY, + rel_path TEXT NOT NULL, + flags INTEGER NOT NULL DEFAULT 0, + yt_id TEXT, + last_update TEXT NOT NULL, + tags TEXT NOT NULL DEFAULT "", + FOREIGN KEY (yt_id) REFERENCES "yt_videos"(id) +); +CREATE TABLE "quota_costs" ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + cost INT NOT NULL +); +CREATE TABLE "yt_queries" ( id TEXT PRIMARY KEY, text TEXT NOT NULL, retrieved_at TEXT NOT NULL ); -CREATE TABLE yt_videos ( +CREATE TABLE "yt_query_results" ( + query_id TEXT NOT NULL, + video_id TEXT NOT NULL, + PRIMARY KEY (query_id, video_id), + FOREIGN KEY (query_id) REFERENCES "yt_queries"(id), + FOREIGN KEY (video_id) REFERENCES "yt_videos"(id) +); +CREATE TABLE "yt_videos" ( id TEXT PRIMARY KEY, title TEXT NOT NULL, description TEXT NOT NULL, @@ -11,24 +32,3 @@ CREATE TABLE yt_videos ( duration TEXT NOT NULL, definition TEXT NOT NULL ); -CREATE TABLE yt_query_results ( - query_id TEXT NOT NULL, - video_id TEXT NOT NULL, - PRIMARY KEY (query_id, video_id), - FOREIGN KEY (query_id) REFERENCES yt_queries(id), - FOREIGN KEY (video_id) REFERENCES yt_videos(id) -); -CREATE TABLE quota_costs ( - id TEXT PRIMARY KEY, - timestamp TEXT NOT NULL, - cost INT NOT NULL -); -CREATE TABLE files ( - digest BLOB PRIMARY KEY, - rel_path TEXT NOT NULL, - flags INTEGER NOT NULL DEFAULT 0, - yt_id TEXT, - last_update TEXT NOT NULL, - tags TEXT NOT NULL DEFAULT "", - FOREIGN KEY (yt_id) REFERENCES yt_videos(id) -); diff --git a/src/ytplom/db.py b/src/ytplom/db.py index e30e2ad..51e94ac 100644 --- a/src/ytplom/db.py +++ b/src/ytplom/db.py @@ -2,6 +2,7 @@ # included libs from base64 import urlsafe_b64decode, urlsafe_b64encode +from difflib import Differ from hashlib import file_digest from pathlib import Path from sqlite3 import (connect as sql_connect, Connection as SqlConnection, @@ -63,16 +64,63 @@ class DbFile: def __init__(self, path: Path = PATH_DB, - expected_version: int = EXPECTED_DB_VERSION + version_to_validate: int = EXPECTED_DB_VERSION ) -> None: self._path = path - if not path.is_file(): - raise HandledException(f'no DB file at {path}') - if expected_version >= 0: - user_version = self._get_user_version() - if user_version != expected_version: - raise HandledException( - f'wrong DB version {user_version} (!= {expected_version})') + if not self._path.is_file(): + raise HandledException(f'no DB file at {self._path}') + + if version_to_validate < 0: + return + # ensure version + if (user_version := self._get_user_version()) != version_to_validate: + raise HandledException( + f'wrong DB version {user_version} (!= {version_to_validate})') + + # ensure schema + with sql_connect(self._path) as conn: + schema_rows = [ + r[0] for r in + conn.execute('SELECT sql FROM sqlite_master ORDER BY sql') + if r[0]] + schema_rows_normed = [] + indent = ' ' + for row in schema_rows: + row_normed = [] + for subrow in row.split('\n'): + subrow = subrow.rstrip() + in_parentheses = 0 + split_at = [] + for i, c in enumerate(subrow): + if '(' == c: + in_parentheses += 1 + elif ')' == c: + in_parentheses -= 1 + elif ',' == c and 0 == in_parentheses: + split_at += [i + 1] + prev_split = 0 + for i in split_at: + segment = subrow[prev_split:i].strip() + if len(segment) > 0: + row_normed += [f'{indent}{segment}'] + prev_split = i + segment = subrow[prev_split:].strip() + if len(segment) > 0: + row_normed += [f'{indent}{segment}'] + row_normed[0] = row_normed[0].lstrip() + row_normed[-1] = row_normed[-1].lstrip() + if row_normed[-1] != ')' and row_normed[-3][-1] != ',': + row_normed[-3] = row_normed[-3] + ',' + row_normed[-2:] = [indent + row_normed[-1][:-1]] + [')'] + schema_rows_normed += ['\n'.join(row_normed)] + retrieved_schema = ';\n'.join(schema_rows_normed) + ';' + with open(_PATH_DB_SCHEMA, 'r', encoding='utf-8') as f: + stored_schema = f.read().rstrip() + if stored_schema != retrieved_schema: + diff_msg = Differ().compare(retrieved_schema.splitlines(), + stored_schema.splitlines()) + raise HandledException('DB has wrong tables schema. Diff:\n' + + '\n'.join(diff_msg)) def _get_user_version(self) -> int: with sql_connect(self._path) as conn: diff --git a/src/ytplom/migrations.py b/src/ytplom/migrations.py index cb038ef..5cacc95 100644 --- a/src/ytplom/migrations.py +++ b/src/ytplom/migrations.py @@ -68,4 +68,4 @@ _MIGRATIONS: MigrationsList = [ def migrate(): """Migrate DB file at expected default path to most recent version.""" - DbFile(expected_version=-1).migrate(_MIGRATIONS) + DbFile(version_to_validate=-1).migrate(_MIGRATIONS) -- 2.30.2