"""Database management."""
+from __future__ import annotations
+from os import listdir
from os.path import isfile
from difflib import Differ
-from sqlite3 import connect as sql_connect, Cursor
-from typing import Any, Dict
-from plomtask.exceptions import HandledException
+from sqlite3 import connect as sql_connect, Cursor, Row
+from typing import Any, Self, TypeVar, Generic
+from plomtask.exceptions import HandledException, NotFoundException
-PATH_DB_SCHEMA = 'scripts/init.sql'
+EXPECTED_DB_VERSION = 4
+MIGRATIONS_DIR = 'migrations'
+FILENAME_DB_SCHEMA = f'init_{EXPECTED_DB_VERSION}.sql'
+PATH_DB_SCHEMA = f'{MIGRATIONS_DIR}/{FILENAME_DB_SCHEMA}'
+
+
+class UnmigratedDbException(HandledException):
+ """To identify case of unmigrated DB file."""
class DatabaseFile: # pylint: disable=too-few-public-methods
self.path = path
self._check()
- def remake(self) -> None:
- """Create tables in self.path file as per PATH_DB_SCHEMA sql file."""
- with sql_connect(self.path) as conn:
+ @classmethod
+ def create_at(cls, path: str) -> DatabaseFile:
+ """Make new DB file at path."""
+ with sql_connect(path) as conn:
with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f:
conn.executescript(f.read())
- self._check()
+ conn.execute(f'PRAGMA user_version = {EXPECTED_DB_VERSION}')
+ return cls(path)
+
+ @classmethod
+ def migrate(cls, path: str) -> DatabaseFile:
+ """Apply migrations from_version to EXPECTED_DB_VERSION."""
+ migrations = cls._available_migrations()
+ from_version = cls.get_version_of_db(path)
+ migrations_todo = migrations[from_version+1:]
+ for j, filename in enumerate(migrations_todo):
+ with sql_connect(path) as conn:
+ with open(f'{MIGRATIONS_DIR}/{filename}', 'r',
+ encoding='utf-8') as f:
+ conn.executescript(f.read())
+ user_version = from_version + j + 1
+ with sql_connect(path) as conn:
+ conn.execute(f'PRAGMA user_version = {user_version}')
+ return cls(path)
def _check(self) -> None:
- """Check file exists and is of proper schema."""
- self.exists = isfile(self.path)
- if self.exists:
- self._validate_schema()
+ """Check file exists, and is of proper DB version and schema."""
+ if not isfile(self.path):
+ raise NotFoundException
+ if self.user_version != EXPECTED_DB_VERSION:
+ raise UnmigratedDbException()
+ self._validate_schema()
+
+ @staticmethod
+ def _available_migrations() -> list[str]:
+ """Validate migrations directory and return sorted entries."""
+ msg_too_big = 'Migration directory points beyond expected DB version.'
+ msg_bad_entry = 'Migration directory contains unexpected entry: '
+ msg_missing = 'Migration directory misses migration of number: '
+ migrations = {}
+ for entry in listdir(MIGRATIONS_DIR):
+ if entry == FILENAME_DB_SCHEMA:
+ continue
+ toks = entry.split('_', 1)
+ if len(toks) < 2:
+ raise HandledException(msg_bad_entry + entry)
+ try:
+ i = int(toks[0])
+ except ValueError as e:
+ raise HandledException(msg_bad_entry + entry) from e
+ if i > EXPECTED_DB_VERSION:
+ raise HandledException(msg_too_big)
+ migrations[i] = toks[1]
+ migrations_list = []
+ for i in range(EXPECTED_DB_VERSION + 1):
+ if i not in migrations:
+ raise HandledException(msg_missing + str(i))
+ migrations_list += [f'{i}_{migrations[i]}']
+ return migrations_list
+
+ @staticmethod
+ def get_version_of_db(path: str) -> int:
+ """Get DB user_version, fail if outside expected range."""
+ sql_for_db_version = 'PRAGMA user_version'
+ with sql_connect(path) as conn:
+ db_version = list(conn.execute(sql_for_db_version))[0][0]
+ if db_version > EXPECTED_DB_VERSION:
+ msg = f'Wrong DB version, expected '\
+ f'{EXPECTED_DB_VERSION}, got unknown {db_version}.'
+ raise HandledException(msg)
+ assert isinstance(db_version, int)
+ return db_version
+
+ @property
+ def user_version(self) -> int:
+ """Get DB user_version."""
+ return self.__class__.get_version_of_db(self.path)
def _validate_schema(self) -> None:
"""Compare found schema with what's stored at PATH_DB_SCHEMA."""
+
+ def reformat_rows(rows: list[str]) -> list[str]:
+ new_rows = []
+ for row in rows:
+ new_row = []
+ for subrow in row.split('\n'):
+ subrow = subrow.rstrip()
+ in_parentheses = 0
+ split_at = []
+ for i, c in enumerate(subrow):
+ if '(' == c:
+ in_parentheses += 1
+ elif ')' == c:
+ in_parentheses -= 1
+ elif ',' == c and 0 == in_parentheses:
+ split_at += [i + 1]
+ prev_split = 0
+ for i in split_at:
+ segment = subrow[prev_split:i].strip()
+ if len(segment) > 0:
+ new_row += [f' {segment}']
+ prev_split = i
+ segment = subrow[prev_split:].strip()
+ if len(segment) > 0:
+ new_row += [f' {segment}']
+ new_row[0] = new_row[0].lstrip()
+ new_row[-1] = new_row[-1].lstrip()
+ if new_row[-1] != ')' and new_row[-3][-1] != ',':
+ new_row[-3] = new_row[-3] + ','
+ new_row[-2:] = [' ' + new_row[-1][:-1]] + [')']
+ new_rows += ['\n'.join(new_row)]
+ return new_rows
+
sql_for_schema = 'SELECT sql FROM sqlite_master ORDER BY sql'
msg_err = 'Database has wrong tables schema. Diff:\n'
with sql_connect(self.path) as conn:
schema_rows = [r[0] for r in conn.execute(sql_for_schema) if r[0]]
- retrieved_schema = ';\n'.join(schema_rows) + ';'
- with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f:
- stored_schema = f.read().rstrip()
- if stored_schema != retrieved_schema:
- diff_msg = Differ().compare(retrieved_schema.splitlines(),
- stored_schema.splitlines())
- raise HandledException(msg_err + '\n'.join(diff_msg))
+ schema_rows = reformat_rows(schema_rows)
+ retrieved_schema = ';\n'.join(schema_rows) + ';'
+ with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f:
+ stored_schema = f.read().rstrip()
+ if stored_schema != retrieved_schema:
+ diff_msg = Differ().compare(retrieved_schema.splitlines(),
+ stored_schema.splitlines())
+ raise HandledException(msg_err + '\n'.join(diff_msg))
class DatabaseConnection:
def __init__(self, db_file: DatabaseFile) -> None:
self.file = db_file
self.conn = sql_connect(self.file.path)
- self.cached_todos: Dict[int, Any] = {}
- self.cached_days: Dict[str, Any] = {}
- self.cached_process_steps: Dict[int, Any] = {}
- self.cached_processes: Dict[int, Any] = {}
- self.cached_conditions: Dict[int, Any] = {}
def commit(self) -> None:
"""Commit SQL transaction."""
def close(self) -> None:
"""Close DB connection."""
self.conn.close()
+
+ def rewrite_relations(self, table_name: str, key: str, target: int | str,
+ rows: list[list[Any]]) -> None:
+ """Rewrite relations in table_name to target, with rows values."""
+ self.delete_where(table_name, key, target)
+ for row in rows:
+ values = tuple([target] + row)
+ q_marks = self.__class__.q_marks_from_values(values)
+ self.exec(f'INSERT INTO {table_name} VALUES {q_marks}', values)
+
+ def row_where(self, table_name: str, key: str,
+ target: int | str) -> list[Row]:
+ """Return list of Rows at table where key == target."""
+ return list(self.exec(f'SELECT * FROM {table_name} WHERE {key} = ?',
+ (target,)))
+
+ def column_where(self, table_name: str, column: str, key: str,
+ target: int | str) -> list[Any]:
+ """Return column of table where key == target."""
+ return [row[0] for row in
+ self.exec(f'SELECT {column} FROM {table_name} '
+ f'WHERE {key} = ?', (target,))]
+
+ def column_all(self, table_name: str, column: str) -> list[Any]:
+ """Return complete column of table."""
+ return [row[0] for row in
+ self.exec(f'SELECT {column} FROM {table_name}')]
+
+ def delete_where(self, table_name: str, key: str,
+ target: int | str) -> None:
+ """Delete from table where key == target."""
+ self.exec(f'DELETE FROM {table_name} WHERE {key} = ?', (target,))
+
+ @staticmethod
+ def q_marks_from_values(values: tuple[Any]) -> str:
+ """Return placeholder to insert values into SQL code."""
+ return '(' + ','.join(['?'] * len(values)) + ')'
+
+
+BaseModelId = TypeVar('BaseModelId', int, str)
+BaseModelInstance = TypeVar('BaseModelInstance', bound='BaseModel[Any]')
+
+
+class BaseModel(Generic[BaseModelId]):
+ """Template for most of the models we use/derive from the DB."""
+ table_name = ''
+ to_save: list[str] = []
+ to_save_versioned: list[str] = []
+ to_save_relations: list[tuple[str, str, str]] = []
+ id_: None | BaseModelId
+ cache_: dict[BaseModelId, Self]
+
+ def __init__(self, id_: BaseModelId | None) -> None:
+ if isinstance(id_, int) and id_ < 1:
+ msg = f'illegal {self.__class__.__name__} ID, must be >=1: {id_}'
+ raise HandledException(msg)
+ self.id_ = id_
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, self.__class__):
+ return False
+ to_hash_me = tuple([self.id_] +
+ [getattr(self, name) for name in self.to_save])
+ to_hash_other = tuple([other.id_] +
+ [getattr(other, name) for name in other.to_save])
+ return hash(to_hash_me) == hash(to_hash_other)
+
+ def __lt__(self, other: Any) -> bool:
+ if not isinstance(other, self.__class__):
+ msg = 'cannot compare to object of different class'
+ raise HandledException(msg)
+ assert isinstance(self.id_, int)
+ assert isinstance(other.id_, int)
+ return self.id_ < other.id_
+
+ @classmethod
+ def get_cached(cls: type[BaseModelInstance],
+ id_: BaseModelId) -> BaseModelInstance | None:
+ """Get object of id_ from class's cache, or None if not found."""
+ # pylint: disable=consider-iterating-dictionary
+ cache = cls.get_cache()
+ if id_ in cache.keys():
+ obj = cache[id_]
+ assert isinstance(obj, cls)
+ return obj
+ return None
+
+ @classmethod
+ def empty_cache(cls) -> None:
+ """Empty class's cache."""
+ cls.cache_ = {}
+
+ @classmethod
+ def get_cache(cls: type[BaseModelInstance]) -> dict[Any, BaseModel[Any]]:
+ """Get cache dictionary, create it if not yet existing."""
+ if not hasattr(cls, 'cache_'):
+ d: dict[Any, BaseModel[Any]] = {}
+ cls.cache_ = d
+ return cls.cache_
+
+ def cache(self) -> None:
+ """Update object in class's cache."""
+ if self.id_ is None:
+ raise HandledException('Cannot cache object without ID.')
+ cache = self.__class__.get_cache()
+ cache[self.id_] = self
+
+ def uncache(self) -> None:
+ """Remove self from cache."""
+ if self.id_ is None:
+ raise HandledException('Cannot un-cache object without ID.')
+ cache = self.__class__.get_cache()
+ del cache[self.id_]
+
+ @classmethod
+ def from_table_row(cls: type[BaseModelInstance],
+ # pylint: disable=unused-argument
+ db_conn: DatabaseConnection,
+ row: Row | list[Any]) -> BaseModelInstance:
+ """Make from DB row, write to DB cache."""
+ obj = cls(*row)
+ obj.cache()
+ return obj
+
+ @classmethod
+ def by_id(cls, db_conn: DatabaseConnection,
+ id_: BaseModelId | None,
+ # pylint: disable=unused-argument
+ create: bool = False) -> Self:
+ """Retrieve by id_, on failure throw NotFoundException.
+
+ First try to get from cls.cache_, only then check DB; if found,
+ put into cache.
+
+ If create=True, make anew (but do not cache yet).
+ """
+ obj = None
+ if id_ is not None:
+ obj = cls.get_cached(id_)
+ if not obj:
+ for row in db_conn.row_where(cls.table_name, 'id', id_):
+ obj = cls.from_table_row(db_conn, row)
+ obj.cache()
+ break
+ if obj:
+ return obj
+ if create:
+ obj = cls(id_)
+ return obj
+ raise NotFoundException(f'found no object of ID {id_}')
+
+ @classmethod
+ def all(cls: type[BaseModelInstance],
+ db_conn: DatabaseConnection) -> list[BaseModelInstance]:
+ """Collect all objects of class into list.
+
+ Note that this primarily returns the contents of the cache, and only
+ _expands_ that by additional findings in the DB. This assumes the
+ cache is always instantly cleaned of any items that would be removed
+ from the DB.
+ """
+ items: dict[BaseModelId, BaseModelInstance] = {}
+ for k, v in cls.get_cache().items():
+ assert isinstance(v, cls)
+ items[k] = v
+ already_recorded = items.keys()
+ for id_ in db_conn.column_all(cls.table_name, 'id'):
+ if id_ not in already_recorded:
+ item = cls.by_id(db_conn, id_)
+ assert item.id_ is not None
+ items[item.id_] = item
+ return list(items.values())
+
+ def save(self, db_conn: DatabaseConnection) -> None:
+ """Write self to DB and cache and ensure .id_.
+
+ Write both to DB, and to cache. To DB, write .id_ and attributes
+ listed in cls.to_save[_versioned|_relations].
+
+ Ensure self.id_ by setting it to what the DB command returns as the
+ last saved row's ID (cursor.lastrowid), EXCEPT if self.id_ already
+ exists as a 'str', which implies we do our own ID creation (so far
+ only the case with the Day class, where it's to be a date string.
+ """
+ values = tuple([self.id_] + [getattr(self, key)
+ for key in self.to_save])
+ q_marks = DatabaseConnection.q_marks_from_values(values)
+ table_name = self.table_name
+ cursor = db_conn.exec(f'REPLACE INTO {table_name} VALUES {q_marks}',
+ values)
+ if not isinstance(self.id_, str):
+ self.id_ = cursor.lastrowid # type: ignore[assignment]
+ self.cache()
+ for attr_name in self.to_save_versioned:
+ getattr(self, attr_name).save(db_conn)
+ for table, column, attr_name in self.to_save_relations:
+ assert isinstance(self.id_, (int, str))
+ db_conn.rewrite_relations(table, column, self.id_,
+ [[i.id_] for i
+ in getattr(self, attr_name)])
+
+ def remove(self, db_conn: DatabaseConnection) -> None:
+ """Remove from DB and cache, including dependencies."""
+ if self.id_ is None or self.__class__.get_cached(self.id_) is None:
+ raise HandledException('cannot remove unsaved item')
+ for attr_name in self.to_save_versioned:
+ getattr(self, attr_name).remove(db_conn)
+ for table, column, attr_name in self.to_save_relations:
+ db_conn.delete_where(table, column, self.id_)
+ self.uncache()
+ db_conn.delete_where(self.table_name, 'id', self.id_)