X-Git-Url: https://plomlompom.com/repos/berlin_corona.txt?a=blobdiff_plain;f=plomtask%2Fdb.py;h=13cdaef5b9c7d3e992f8c92730a9979b9eee2d73;hb=HEAD;hp=bf2b79f758b313525816581620a792330fdbd9a2;hpb=23c7ff7f9833ff5b0e547c19a4ad85325a5d3400;p=plomtask diff --git a/plomtask/db.py b/plomtask/db.py index bf2b79f..13cdaef 100644 --- a/plomtask/db.py +++ b/plomtask/db.py @@ -1,67 +1,163 @@ """Database management.""" from __future__ import annotations +from os import listdir from os.path import isfile from difflib import Differ from sqlite3 import connect as sql_connect, Cursor, Row -from typing import Any, Self, TypeVar, Generic -from plomtask.exceptions import HandledException +from typing import Any, Self, TypeVar, Generic, Callable +from plomtask.exceptions import HandledException, NotFoundException +from plomtask.dating import valid_date -PATH_DB_SCHEMA = 'scripts/init.sql' -EXPECTED_DB_VERSION = 0 +EXPECTED_DB_VERSION = 5 +MIGRATIONS_DIR = 'migrations' +FILENAME_DB_SCHEMA = f'init_{EXPECTED_DB_VERSION}.sql' +PATH_DB_SCHEMA = f'{MIGRATIONS_DIR}/{FILENAME_DB_SCHEMA}' -class DatabaseFile: # pylint: disable=too-few-public-methods +class UnmigratedDbException(HandledException): + """To identify case of unmigrated DB file.""" + + +class DatabaseFile: """Represents the sqlite3 database's file.""" + # pylint: disable=too-few-public-methods def __init__(self, path: str) -> None: self.path = path self._check() - def remake(self) -> None: - """Create tables in self.path file as per PATH_DB_SCHEMA sql file.""" - with sql_connect(self.path) as conn: + @classmethod + def create_at(cls, path: str) -> DatabaseFile: + """Make new DB file at path.""" + with sql_connect(path) as conn: with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f: conn.executescript(f.read()) - self._check() + conn.execute(f'PRAGMA user_version = {EXPECTED_DB_VERSION}') + return cls(path) + + @classmethod + def migrate(cls, path: str) -> DatabaseFile: + """Apply migrations from_version to EXPECTED_DB_VERSION.""" + migrations = cls._available_migrations() + from_version = cls._get_version_of_db(path) + migrations_todo = migrations[from_version+1:] + for j, filename in enumerate(migrations_todo): + with sql_connect(path) as conn: + with open(f'{MIGRATIONS_DIR}/{filename}', 'r', + encoding='utf-8') as f: + conn.executescript(f.read()) + user_version = from_version + j + 1 + with sql_connect(path) as conn: + conn.execute(f'PRAGMA user_version = {user_version}') + return cls(path) def _check(self) -> None: """Check file exists, and is of proper DB version and schema.""" - self.exists = isfile(self.path) - if self.exists: - self._validate_user_version() - self._validate_schema() + if not isfile(self.path): + raise NotFoundException + if self._user_version != EXPECTED_DB_VERSION: + raise UnmigratedDbException() + self._validate_schema() + + @staticmethod + def _available_migrations() -> list[str]: + """Validate migrations directory and return sorted entries.""" + msg_too_big = 'Migration directory points beyond expected DB version.' + msg_bad_entry = 'Migration directory contains unexpected entry: ' + msg_missing = 'Migration directory misses migration of number: ' + migrations = {} + for entry in listdir(MIGRATIONS_DIR): + if entry == FILENAME_DB_SCHEMA: + continue + toks = entry.split('_', 1) + if len(toks) < 2: + raise HandledException(msg_bad_entry + entry) + try: + i = int(toks[0]) + except ValueError as e: + raise HandledException(msg_bad_entry + entry) from e + if i > EXPECTED_DB_VERSION: + raise HandledException(msg_too_big) + migrations[i] = toks[1] + migrations_list = [] + for i in range(EXPECTED_DB_VERSION + 1): + if i not in migrations: + raise HandledException(msg_missing + str(i)) + migrations_list += [f'{i}_{migrations[i]}'] + return migrations_list - def _validate_user_version(self) -> None: - """Compare DB user_version with EXPECTED_DB_VERSION.""" + @staticmethod + def _get_version_of_db(path: str) -> int: + """Get DB user_version, fail if outside expected range.""" sql_for_db_version = 'PRAGMA user_version' - with sql_connect(self.path) as conn: + with sql_connect(path) as conn: db_version = list(conn.execute(sql_for_db_version))[0][0] - if db_version != EXPECTED_DB_VERSION: - msg = f'Wrong DB version, expected '\ - f'{EXPECTED_DB_VERSION}, got {db_version}.' - raise HandledException(msg) + if db_version > EXPECTED_DB_VERSION: + msg = f'Wrong DB version, expected '\ + f'{EXPECTED_DB_VERSION}, got unknown {db_version}.' + raise HandledException(msg) + assert isinstance(db_version, int) + return db_version + + @property + def _user_version(self) -> int: + """Get DB user_version.""" + return self._get_version_of_db(self.path) def _validate_schema(self) -> None: """Compare found schema with what's stored at PATH_DB_SCHEMA.""" + + def reformat_rows(rows: list[str]) -> list[str]: + new_rows = [] + for row in rows: + new_row = [] + for subrow in row.split('\n'): + subrow = subrow.rstrip() + in_parentheses = 0 + split_at = [] + for i, c in enumerate(subrow): + if '(' == c: + in_parentheses += 1 + elif ')' == c: + in_parentheses -= 1 + elif ',' == c and 0 == in_parentheses: + split_at += [i + 1] + prev_split = 0 + for i in split_at: + segment = subrow[prev_split:i].strip() + if len(segment) > 0: + new_row += [f' {segment}'] + prev_split = i + segment = subrow[prev_split:].strip() + if len(segment) > 0: + new_row += [f' {segment}'] + new_row[0] = new_row[0].lstrip() + new_row[-1] = new_row[-1].lstrip() + if new_row[-1] != ')' and new_row[-3][-1] != ',': + new_row[-3] = new_row[-3] + ',' + new_row[-2:] = [' ' + new_row[-1][:-1]] + [')'] + new_rows += ['\n'.join(new_row)] + return new_rows + sql_for_schema = 'SELECT sql FROM sqlite_master ORDER BY sql' msg_err = 'Database has wrong tables schema. Diff:\n' with sql_connect(self.path) as conn: schema_rows = [r[0] for r in conn.execute(sql_for_schema) if r[0]] - retrieved_schema = ';\n'.join(schema_rows) + ';' - with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f: - stored_schema = f.read().rstrip() - if stored_schema != retrieved_schema: - diff_msg = Differ().compare(retrieved_schema.splitlines(), - stored_schema.splitlines()) - raise HandledException(msg_err + '\n'.join(diff_msg)) + schema_rows = reformat_rows(schema_rows) + retrieved_schema = ';\n'.join(schema_rows) + ';' + with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f: + stored_schema = f.read().rstrip() + if stored_schema != retrieved_schema: + diff_msg = Differ().compare(retrieved_schema.splitlines(), + stored_schema.splitlines()) + raise HandledException(msg_err + '\n'.join(diff_msg)) class DatabaseConnection: """A single connection to the database.""" def __init__(self, db_file: DatabaseFile) -> None: - self.file = db_file - self.conn = sql_connect(self.file.path) + self.conn = sql_connect(db_file.path) def commit(self) -> None: """Commit SQL transaction.""" @@ -71,18 +167,28 @@ class DatabaseConnection: """Add commands to SQL transaction.""" return self.conn.execute(code, inputs) + def exec_on_vals(self, code: str, inputs: tuple[Any, ...]) -> Cursor: + """Wrapper around .exec appending adequate " (?, …)" to code.""" + q_marks_from_values = '(' + ','.join(['?'] * len(inputs)) + ')' + return self.exec(f'{code} {q_marks_from_values}', inputs) + def close(self) -> None: """Close DB connection.""" self.conn.close() - def rewrite_relations(self, table_name: str, key: str, target: int, - rows: list[list[Any]]) -> None: - """Rewrite relations in table_name to target, with rows values.""" + def rewrite_relations(self, table_name: str, key: str, target: int | str, + rows: list[list[Any]], key_index: int = 0) -> None: + # pylint: disable=too-many-arguments + """Rewrite relations in table_name to target, with rows values. + + Note that single rows are expected without the column and value + identified by key and target, which are inserted inside the function + at key_index. + """ self.delete_where(table_name, key, target) for row in rows: - values = tuple([target] + row) - q_marks = self.__class__.q_marks_from_values(values) - self.exec(f'INSERT INTO {table_name} VALUES {q_marks}', values) + values = tuple(row[:key_index] + [target] + row[key_index:]) + self.exec_on_vals(f'INSERT INTO {table_name} VALUES', values) def row_where(self, table_name: str, key: str, target: int | str) -> list[Row]: @@ -90,6 +196,17 @@ class DatabaseConnection: return list(self.exec(f'SELECT * FROM {table_name} WHERE {key} = ?', (target,))) + # def column_where_pattern(self, + # table_name: str, + # column: str, + # pattern: str, + # keys: list[str]) -> list[Any]: + # """Return column of rows where one of keys matches pattern.""" + # targets = tuple([f'%{pattern}%'] * len(keys)) + # haystack = ' OR '.join([f'{k} LIKE ?' for k in keys]) + # sql = f'SELECT {column} FROM {table_name} WHERE {haystack}' + # return [row[0] for row in self.exec(sql, targets)] + def column_where(self, table_name: str, column: str, key: str, target: int | str) -> list[Any]: """Return column of table where key == target.""" @@ -102,96 +219,367 @@ class DatabaseConnection: return [row[0] for row in self.exec(f'SELECT {column} FROM {table_name}')] - def delete_where(self, table_name: str, key: str, target: int) -> None: + def delete_where(self, table_name: str, key: str, + target: int | str) -> None: """Delete from table where key == target.""" self.exec(f'DELETE FROM {table_name} WHERE {key} = ?', (target,)) - @staticmethod - def q_marks_from_values(values: tuple[Any]) -> str: - """Return placeholder to insert values into SQL code.""" - return '(' + ','.join(['?'] * len(values)) + ')' - -X = TypeVar('X', int, str) -T = TypeVar('T', bound='BaseModel[Any]') +BaseModelId = TypeVar('BaseModelId', int, str) +BaseModelInstance = TypeVar('BaseModelInstance', bound='BaseModel[Any]') -class BaseModel(Generic[X]): +class BaseModel(Generic[BaseModelId]): """Template for most of the models we use/derive from the DB.""" table_name = '' to_save: list[str] = [] - id_: None | X - cache_: dict[X, Self] = {} + to_save_versioned: list[str] = [] + to_save_relations: list[tuple[str, str, str, int]] = [] + add_to_dict: list[str] = [] + id_: None | BaseModelId + cache_: dict[BaseModelId, Self] + to_search: list[str] = [] + can_create_by_id = False + _exists = True + sorters: dict[str, Callable[..., Any]] = {} + + def __init__(self, id_: BaseModelId | None) -> None: + if isinstance(id_, int) and id_ < 1: + msg = f'illegal {self.__class__.__name__} ID, must be >=1: {id_}' + raise HandledException(msg) + if isinstance(id_, str) and "" == id_: + msg = f'illegal {self.__class__.__name__} ID, must be non-empty' + raise HandledException(msg) + self.id_ = id_ + + def __hash__(self) -> int: + hashable = [self.id_] + [getattr(self, name) for name in self.to_save] + for definition in self.to_save_relations: + attr = getattr(self, definition[2]) + hashable += [tuple(rel.id_ for rel in attr)] + for name in self.to_save_versioned: + hashable += [hash(getattr(self, name))] + return hash(tuple(hashable)) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, self.__class__): + return False + return hash(self) == hash(other) + + def __lt__(self, other: Any) -> bool: + if not isinstance(other, self.__class__): + msg = 'cannot compare to object of different class' + raise HandledException(msg) + assert isinstance(self.id_, int) + assert isinstance(other.id_, int) + return self.id_ < other.id_ + + @property + def as_dict(self) -> dict[str, object]: + """Return self as (json.dumps-compatible) dict.""" + library: dict[str, dict[str | int, object]] = {} + d: dict[str, object] = {'id': self.id_, '_library': library} + for to_save in self.to_save: + attr = getattr(self, to_save) + if hasattr(attr, 'as_dict_into_reference'): + d[to_save] = attr.as_dict_into_reference(library) + else: + d[to_save] = attr + if len(self.to_save_versioned) > 0: + d['_versioned'] = {} + for k in self.to_save_versioned: + attr = getattr(self, k) + assert isinstance(d['_versioned'], dict) + d['_versioned'][k] = attr.history + for r in self.to_save_relations: + attr_name = r[2] + l: list[int | str] = [] + for rel in getattr(self, attr_name): + l += [rel.as_dict_into_reference(library)] + d[attr_name] = l + for k in self.add_to_dict: + d[k] = [x.as_dict_into_reference(library) + for x in getattr(self, k)] + return d + + def as_dict_into_reference(self, + library: dict[str, dict[str | int, object]] + ) -> int | str: + """Return self.id_ while writing .as_dict into library.""" + def into_library(library: dict[str, dict[str | int, object]], + cls_name: str, + id_: str | int, + d: dict[str, object] + ) -> None: + if cls_name not in library: + library[cls_name] = {} + if id_ in library[cls_name]: + if library[cls_name][id_] != d: + msg = 'Unexpected inequality of entries for ' +\ + f'_library at: {cls_name}/{id_}' + raise HandledException(msg) + else: + library[cls_name][id_] = d + as_dict = self.as_dict + assert isinstance(as_dict['_library'], dict) + for cls_name, dict_of_objs in as_dict['_library'].items(): + for id_, obj in dict_of_objs.items(): + into_library(library, cls_name, id_, obj) + del as_dict['_library'] + assert self.id_ is not None + into_library(library, self.__class__.__name__, self.id_, as_dict) + assert isinstance(as_dict['id'], (int, str)) + return as_dict['id'] @classmethod - def from_table_row(cls: type[T], - # pylint: disable=unused-argument - db_conn: DatabaseConnection, - row: Row | list[Any]) -> T: - """Make from DB row, write to DB cache.""" - obj = cls(*row) - obj.cache() - return obj + def name_lowercase(cls) -> str: + """Convenience method to return cls' name in lowercase.""" + return cls.__name__.lower() @classmethod - def _by_id(cls, - db_conn: DatabaseConnection, - id_: X) -> tuple[Self | None, bool]: - """Return instance found by ID, or None, and if from cache or not.""" - from_cache = False - obj = cls.get_cached(id_) - if obj: - from_cache = True - else: - for row in db_conn.row_where(cls.table_name, 'id', id_): - obj = cls.from_table_row(db_conn, row) - obj.cache() - break - return obj, from_cache - - def set_int_id(self, id_: int | None) -> None: - """Set id_ if >= 1 or None, else fail.""" - if (id_ is not None) and id_ < 1: - msg = f'illegal {self.__class__.__name__} ID, must be >=1: {id_}' - raise HandledException(msg) - self.id_ = id_ # type: ignore[assignment] + def sort_by(cls, seq: list[Any], sort_key: str, default: str = 'title' + ) -> str: + """Sort cls list by cls.sorters[sort_key] (reverse if '-'-prefixed).""" + reverse = False + if len(sort_key) > 1 and '-' == sort_key[0]: + sort_key = sort_key[1:] + reverse = True + if sort_key not in cls.sorters: + sort_key = default + sorter: Callable[..., Any] = cls.sorters[sort_key] + seq.sort(key=sorter, reverse=reverse) + if reverse: + sort_key = f'-{sort_key}' + return sort_key + + # cache management + # (we primarily use the cache to ensure we work on the same object in + # memory no matter where and how we retrieve it, e.g. we don't want + # .by_id() calls to create a new object each time, but rather a pointer + # to the one already instantiated) + + def __getattribute__(self, name: str) -> Any: + """Ensure fail if ._disappear() was called, except to check ._exists""" + if name != '_exists' and not super().__getattribute__('_exists'): + raise HandledException('Object does not exist.') + return super().__getattribute__(name) + + def _disappear(self) -> None: + """Invalidate object, make future use raise exceptions.""" + assert self.id_ is not None + if self._get_cached(self.id_): + self._uncache() + to_kill = list(self.__dict__.keys()) + for attr in to_kill: + delattr(self, attr) + self._exists = False - def save_core(self, db_conn: DatabaseConnection, - update_with_lastrowid: bool = True) -> None: - """Write bare-bones self (sans connected items), ensuring self.id_.""" - values = tuple([self.id_] + [getattr(self, key) - for key in self.to_save]) - q_marks = DatabaseConnection.q_marks_from_values(values) - table_name = self.table_name - cursor = db_conn.exec(f'REPLACE INTO {table_name} VALUES {q_marks}', - values) - if update_with_lastrowid: - self.id_ = cursor.lastrowid # type: ignore[assignment] - self.cache() + @classmethod + def empty_cache(cls) -> None: + """Empty class's cache, and disappear all former inhabitants.""" + # pylint: disable=protected-access + # (cause we remain within the class) + if hasattr(cls, 'cache_'): + to_disappear = list(cls.cache_.values()) + for item in to_disappear: + item._disappear() + cls.cache_ = {} + + @classmethod + def get_cache(cls: type[BaseModelInstance]) -> dict[Any, BaseModel[Any]]: + """Get cache dictionary, create it if not yet existing.""" + if not hasattr(cls, 'cache_'): + d: dict[Any, BaseModel[Any]] = {} + cls.cache_ = d + return cls.cache_ @classmethod - def get_cached(cls: type[T], id_: X) -> T | None: + def _get_cached(cls: type[BaseModelInstance], + id_: BaseModelId) -> BaseModelInstance | None: """Get object of id_ from class's cache, or None if not found.""" - # pylint: disable=consider-iterating-dictionary - if id_ in cls.cache_.keys(): - obj = cls.cache_[id_] + cache = cls.get_cache() + if id_ in cache: + obj = cache[id_] assert isinstance(obj, cls) return obj return None def cache(self) -> None: - """Update object in class's cache.""" + """Update object in class's cache. + + Also calls ._disappear if cache holds older reference to object of same + ID, but different memory address, to avoid doing anything with + dangling leftovers. + """ if self.id_ is None: raise HandledException('Cannot cache object without ID.') - self.__class__.cache_[self.id_] = self - - def uncache(self) -> None: + cache = self.get_cache() + old_cached = self._get_cached(self.id_) + if old_cached and id(old_cached) != id(self): + # pylint: disable=protected-access + # (cause we remain within the class) + old_cached._disappear() + cache[self.id_] = self + + def _uncache(self) -> None: """Remove self from cache.""" if self.id_ is None: raise HandledException('Cannot un-cache object without ID.') - del self.__class__.cache_[self.id_] + cache = self.get_cache() + del cache[self.id_] + + # object retrieval and generation @classmethod - def empty_cache(cls) -> None: - """Empty class's cache.""" - cls.cache_ = {} + def from_table_row(cls: type[BaseModelInstance], + # pylint: disable=unused-argument + db_conn: DatabaseConnection, + row: Row | list[Any]) -> BaseModelInstance: + """Make from DB row (sans relations), update DB cache with it.""" + obj = cls(*row) + assert obj.id_ is not None + for attr_name in cls.to_save_versioned: + attr = getattr(obj, attr_name) + table_name = attr.table_name + for row_ in db_conn.row_where(table_name, 'parent', obj.id_): + attr.history_from_row(row_) + obj.cache() + return obj + + @classmethod + def by_id(cls, db_conn: DatabaseConnection, id_: BaseModelId) -> Self: + """Retrieve by id_, on failure throw NotFoundException. + + First try to get from cls.cache_, only then check DB; if found, + put into cache. + """ + obj = None + if id_ is not None: + obj = cls._get_cached(id_) + if not obj: + for row in db_conn.row_where(cls.table_name, 'id', id_): + obj = cls.from_table_row(db_conn, row) + break + if obj: + return obj + raise NotFoundException(f'found no object of ID {id_}') + + @classmethod + def by_id_or_create(cls, db_conn: DatabaseConnection, + id_: BaseModelId | None + ) -> Self: + """Wrapper around .by_id, creating (not caching/saving) if not find.""" + if not cls.can_create_by_id: + raise HandledException('Class cannot .by_id_or_create.') + if id_ is None: + return cls(None) + try: + return cls.by_id(db_conn, id_) + except NotFoundException: + return cls(id_) + + @classmethod + def all(cls: type[BaseModelInstance], + db_conn: DatabaseConnection) -> list[BaseModelInstance]: + """Collect all objects of class into list. + + Note that this primarily returns the contents of the cache, and only + _expands_ that by additional findings in the DB. This assumes the + cache is always instantly cleaned of any items that would be removed + from the DB. + """ + items: dict[BaseModelId, BaseModelInstance] = {} + for k, v in cls.get_cache().items(): + assert isinstance(v, cls) + items[k] = v + already_recorded = items.keys() + for id_ in db_conn.column_all(cls.table_name, 'id'): + if id_ not in already_recorded: + item = cls.by_id(db_conn, id_) + assert item.id_ is not None + items[item.id_] = item + return list(items.values()) + + @classmethod + def by_date_range_with_limits(cls: type[BaseModelInstance], + db_conn: DatabaseConnection, + date_range: tuple[str, str], + date_col: str = 'day' + ) -> tuple[list[BaseModelInstance], str, + str]: + """Return list of items in database within (open) date_range interval. + + If no range values provided, defaults them to 'yesterday' and + 'tomorrow'. Knows to properly interpret these and 'today' as value. + """ + start_str = date_range[0] if date_range[0] else 'yesterday' + end_str = date_range[1] if date_range[1] else 'tomorrow' + start_date = valid_date(start_str) + end_date = valid_date(end_str) + items = [] + sql = f'SELECT id FROM {cls.table_name} ' + sql += f'WHERE {date_col} >= ? AND {date_col} <= ?' + for row in db_conn.exec(sql, (start_date, end_date)): + items += [cls.by_id(db_conn, row[0])] + return items, start_date, end_date + + @classmethod + def matching(cls: type[BaseModelInstance], db_conn: DatabaseConnection, + pattern: str) -> list[BaseModelInstance]: + """Return all objects whose .to_search match pattern.""" + items = cls.all(db_conn) + if pattern: + filtered = [] + for item in items: + for attr_name in cls.to_search: + toks = attr_name.split('.') + parent = item + for tok in toks: + attr = getattr(parent, tok) + parent = attr + if pattern in attr: + filtered += [item] + break + return filtered + return items + + # database writing + + def save(self, db_conn: DatabaseConnection) -> None: + """Write self to DB and cache and ensure .id_. + + Write both to DB, and to cache. To DB, write .id_ and attributes + listed in cls.to_save[_versioned|_relations]. + + Ensure self.id_ by setting it to what the DB command returns as the + last saved row's ID (cursor.lastrowid), EXCEPT if self.id_ already + exists as a 'str', which implies we do our own ID creation (so far + only the case with the Day class, where it's to be a date string. + """ + values = tuple([self.id_] + [getattr(self, key) + for key in self.to_save]) + table_name = self.table_name + cursor = db_conn.exec_on_vals(f'REPLACE INTO {table_name} VALUES', + values) + if not isinstance(self.id_, str): + self.id_ = cursor.lastrowid # type: ignore[assignment] + self.cache() + for attr_name in self.to_save_versioned: + getattr(self, attr_name).save(db_conn) + for table, column, attr_name, key_index in self.to_save_relations: + assert isinstance(self.id_, (int, str)) + db_conn.rewrite_relations(table, column, self.id_, + [[i.id_] for i + in getattr(self, attr_name)], key_index) + + def remove(self, db_conn: DatabaseConnection) -> None: + """Remove from DB and cache, including dependencies.""" + if self.id_ is None or self._get_cached(self.id_) is None: + raise HandledException('cannot remove unsaved item') + for attr_name in self.to_save_versioned: + getattr(self, attr_name).remove(db_conn) + for table, column, attr_name, _ in self.to_save_relations: + db_conn.delete_where(table, column, self.id_) + self._uncache() + db_conn.delete_where(self.table_name, 'id', self.id_) + self._disappear()