plomlompom.com Git - plomtask/blob - plomtask/db.py

   1 """Database management."""
   2 from __future__ import annotations
   3 from os import listdir
   4 from os.path import isfile
   5 from difflib import Differ
   6 from sqlite3 import connect as sql_connect, Cursor, Row
   7 from typing import Any, Self, TypeVar, Generic
   8 from plomtask.exceptions import HandledException, NotFoundException
   9 from plomtask.dating import valid_date
  10
  11 EXPECTED_DB_VERSION = 5
  12 MIGRATIONS_DIR = 'migrations'
  13 FILENAME_DB_SCHEMA = f'init_{EXPECTED_DB_VERSION}.sql'
  14 PATH_DB_SCHEMA = f'{MIGRATIONS_DIR}/{FILENAME_DB_SCHEMA}'
  15
  16
  17 class UnmigratedDbException(HandledException):
  18     """To identify case of unmigrated DB file."""
  19
  20
  21 class DatabaseFile:
  22     """Represents the sqlite3 database's file."""
  23     # pylint: disable=too-few-public-methods
  24
  25     def __init__(self, path: str) -> None:
  26         self.path = path
  27         self._check()
  28
  29     @classmethod
  30     def create_at(cls, path: str) -> DatabaseFile:
  31         """Make new DB file at path."""
  32         with sql_connect(path) as conn:
  33             with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f:
  34                 conn.executescript(f.read())
  35             conn.execute(f'PRAGMA user_version = {EXPECTED_DB_VERSION}')
  36         return cls(path)
  37
  38     @classmethod
  39     def migrate(cls, path: str) -> DatabaseFile:
  40         """Apply migrations from_version to EXPECTED_DB_VERSION."""
  41         migrations = cls._available_migrations()
  42         from_version = cls._get_version_of_db(path)
  43         migrations_todo = migrations[from_version+1:]
  44         for j, filename in enumerate(migrations_todo):
  45             with sql_connect(path) as conn:
  46                 with open(f'{MIGRATIONS_DIR}/{filename}', 'r',
  47                           encoding='utf-8') as f:
  48                     conn.executescript(f.read())
  49             user_version = from_version + j + 1
  50             with sql_connect(path) as conn:
  51                 conn.execute(f'PRAGMA user_version = {user_version}')
  52         return cls(path)
  53
  54     def _check(self) -> None:
  55         """Check file exists, and is of proper DB version and schema."""
  56         if not isfile(self.path):
  57             raise NotFoundException
  58         if self._user_version != EXPECTED_DB_VERSION:
  59             raise UnmigratedDbException()
  60         self._validate_schema()
  61
  62     @staticmethod
  63     def _available_migrations() -> list[str]:
  64         """Validate migrations directory and return sorted entries."""
  65         msg_too_big = 'Migration directory points beyond expected DB version.'
  66         msg_bad_entry = 'Migration directory contains unexpected entry: '
  67         msg_missing = 'Migration directory misses migration of number: '
  68         migrations = {}
  69         for entry in listdir(MIGRATIONS_DIR):
  70             if entry == FILENAME_DB_SCHEMA:
  71                 continue
  72             toks = entry.split('_', 1)
  73             if len(toks) < 2:
  74                 raise HandledException(msg_bad_entry + entry)
  75             try:
  76                 i = int(toks[0])
  77             except ValueError as e:
  78                 raise HandledException(msg_bad_entry + entry) from e
  79             if i > EXPECTED_DB_VERSION:
  80                 raise HandledException(msg_too_big)
  81             migrations[i] = toks[1]
  82         migrations_list = []
  83         for i in range(EXPECTED_DB_VERSION + 1):
  84             if i not in migrations:
  85                 raise HandledException(msg_missing + str(i))
  86             migrations_list += [f'{i}_{migrations[i]}']
  87         return migrations_list
  88
  89     @staticmethod
  90     def _get_version_of_db(path: str) -> int:
  91         """Get DB user_version, fail if outside expected range."""
  92         sql_for_db_version = 'PRAGMA user_version'
  93         with sql_connect(path) as conn:
  94             db_version = list(conn.execute(sql_for_db_version))[0][0]
  95         if db_version > EXPECTED_DB_VERSION:
  96             msg = f'Wrong DB version, expected '\
  97                     f'{EXPECTED_DB_VERSION}, got unknown {db_version}.'
  98             raise HandledException(msg)
  99         assert isinstance(db_version, int)
 100         return db_version
 101
 102     @property
 103     def _user_version(self) -> int:
 104         """Get DB user_version."""
 105         return self._get_version_of_db(self.path)
 106
 107     def _validate_schema(self) -> None:
 108         """Compare found schema with what's stored at PATH_DB_SCHEMA."""
 109
 110         def reformat_rows(rows: list[str]) -> list[str]:
 111             new_rows = []
 112             for row in rows:
 113                 new_row = []
 114                 for subrow in row.split('\n'):
 115                     subrow = subrow.rstrip()
 116                     in_parentheses = 0
 117                     split_at = []
 118                     for i, c in enumerate(subrow):
 119                         if '(' == c:
 120                             in_parentheses += 1
 121                         elif ')' == c:
 122                             in_parentheses -= 1
 123                         elif ',' == c and 0 == in_parentheses:
 124                             split_at += [i + 1]
 125                     prev_split = 0
 126                     for i in split_at:
 127                         segment = subrow[prev_split:i].strip()
 128                         if len(segment) > 0:
 129                             new_row += [f'    {segment}']
 130                         prev_split = i
 131                     segment = subrow[prev_split:].strip()
 132                     if len(segment) > 0:
 133                         new_row += [f'    {segment}']
 134                 new_row[0] = new_row[0].lstrip()
 135                 new_row[-1] = new_row[-1].lstrip()
 136                 if new_row[-1] != ')' and new_row[-3][-1] != ',':
 137                     new_row[-3] = new_row[-3] + ','
 138                     new_row[-2:] = ['    ' + new_row[-1][:-1]] + [')']
 139                 new_rows += ['\n'.join(new_row)]
 140             return new_rows
 141
 142         sql_for_schema = 'SELECT sql FROM sqlite_master ORDER BY sql'
 143         msg_err = 'Database has wrong tables schema. Diff:\n'
 144         with sql_connect(self.path) as conn:
 145             schema_rows = [r[0] for r in conn.execute(sql_for_schema) if r[0]]
 146         schema_rows = reformat_rows(schema_rows)
 147         retrieved_schema = ';\n'.join(schema_rows) + ';'
 148         with open(PATH_DB_SCHEMA, 'r', encoding='utf-8') as f:
 149             stored_schema = f.read().rstrip()
 150         if stored_schema != retrieved_schema:
 151             diff_msg = Differ().compare(retrieved_schema.splitlines(),
 152                                         stored_schema.splitlines())
 153             raise HandledException(msg_err + '\n'.join(diff_msg))
 154
 155
 156 class DatabaseConnection:
 157     """A single connection to the database."""
 158
 159     def __init__(self, db_file: DatabaseFile) -> None:
 160         self.conn = sql_connect(db_file.path)
 161
 162     def commit(self) -> None:
 163         """Commit SQL transaction."""
 164         self.conn.commit()
 165
 166     def exec(self, code: str, inputs: tuple[Any, ...] = tuple()) -> Cursor:
 167         """Add commands to SQL transaction."""
 168         return self.conn.execute(code, inputs)
 169
 170     def exec_on_vals(self, code: str, inputs: tuple[Any, ...]) -> Cursor:
 171         """Wrapper around .exec appending adequate " (?, …)" to code."""
 172         q_marks_from_values = '(' + ','.join(['?'] * len(inputs)) + ')'
 173         return self.exec(f'{code} {q_marks_from_values}', inputs)
 174
 175     def close(self) -> None:
 176         """Close DB connection."""
 177         self.conn.close()
 178
 179     def rewrite_relations(self, table_name: str, key: str, target: int | str,
 180                           rows: list[list[Any]], key_index: int = 0) -> None:
 181         # pylint: disable=too-many-arguments
 182         """Rewrite relations in table_name to target, with rows values.
 183
 184         Note that single rows are expected without the column and value
 185         identified by key and target, which are inserted inside the function
 186         at key_index.
 187         """
 188         self.delete_where(table_name, key, target)
 189         for row in rows:
 190             values = tuple(row[:key_index] + [target] + row[key_index:])
 191             self.exec_on_vals(f'INSERT INTO {table_name} VALUES', values)
 192
 193     def row_where(self, table_name: str, key: str,
 194                   target: int | str) -> list[Row]:
 195         """Return list of Rows at table where key == target."""
 196         return list(self.exec(f'SELECT * FROM {table_name} WHERE {key} = ?',
 197                               (target,)))
 198
 199     # def column_where_pattern(self,
 200     #                          table_name: str,
 201     #                          column: str,
 202     #                          pattern: str,
 203     #                          keys: list[str]) -> list[Any]:
 204     #     """Return column of rows where one of keys matches pattern."""
 205     #     targets = tuple([f'%{pattern}%'] * len(keys))
 206     #     haystack = ' OR '.join([f'{k} LIKE ?' for k in keys])
 207     #     sql = f'SELECT {column} FROM {table_name} WHERE {haystack}'
 208     #     return [row[0] for row in self.exec(sql, targets)]
 209
 210     def column_where(self, table_name: str, column: str, key: str,
 211                      target: int | str) -> list[Any]:
 212         """Return column of table where key == target."""
 213         return [row[0] for row in
 214                 self.exec(f'SELECT {column} FROM {table_name} '
 215                           f'WHERE {key} = ?', (target,))]
 216
 217     def column_all(self, table_name: str, column: str) -> list[Any]:
 218         """Return complete column of table."""
 219         return [row[0] for row in
 220                 self.exec(f'SELECT {column} FROM {table_name}')]
 221
 222     def delete_where(self, table_name: str, key: str,
 223                      target: int | str) -> None:
 224         """Delete from table where key == target."""
 225         self.exec(f'DELETE FROM {table_name} WHERE {key} = ?', (target,))
 226
 227
 228 BaseModelId = TypeVar('BaseModelId', int, str)
 229 BaseModelInstance = TypeVar('BaseModelInstance', bound='BaseModel[Any]')
 230
 231
 232 class BaseModel(Generic[BaseModelId]):
 233     """Template for most of the models we use/derive from the DB."""
 234     table_name = ''
 235     to_save: list[str] = []
 236     to_save_versioned: list[str] = []
 237     to_save_relations: list[tuple[str, str, str, int]] = []
 238     id_: None | BaseModelId
 239     cache_: dict[BaseModelId, Self]
 240     to_search: list[str] = []
 241     can_create_by_id = False
 242     _exists = True
 243
 244     def __init__(self, id_: BaseModelId | None) -> None:
 245         if isinstance(id_, int) and id_ < 1:
 246             msg = f'illegal {self.__class__.__name__} ID, must be >=1: {id_}'
 247             raise HandledException(msg)
 248         if isinstance(id_, str) and "" == id_:
 249             msg = f'illegal {self.__class__.__name__} ID, must be non-empty'
 250             raise HandledException(msg)
 251         self.id_ = id_
 252
 253     def __hash__(self) -> int:
 254         hashable = [self.id_] + [getattr(self, name) for name in self.to_save]
 255         for definition in self.to_save_relations:
 256             attr = getattr(self, definition[2])
 257             hashable += [tuple(rel.id_ for rel in attr)]
 258         for name in self.to_save_versioned:
 259             hashable += [hash(getattr(self, name))]
 260         return hash(tuple(hashable))
 261
 262     def __eq__(self, other: object) -> bool:
 263         if not isinstance(other, self.__class__):
 264             return False
 265         return hash(self) == hash(other)
 266
 267     def __lt__(self, other: Any) -> bool:
 268         if not isinstance(other, self.__class__):
 269             msg = 'cannot compare to object of different class'
 270             raise HandledException(msg)
 271         assert isinstance(self.id_, int)
 272         assert isinstance(other.id_, int)
 273         return self.id_ < other.id_
 274
 275     @property
 276     def as_dict(self) -> dict[str, object]:
 277         """Return self as (json.dumps-coompatible) dict."""
 278         d: dict[str, object] = {'id': self.id_}
 279         if len(self.to_save_versioned) > 0:
 280             d['_versioned'] = {}
 281         for k in self.to_save:
 282             attr = getattr(self, k)
 283             if hasattr(attr, 'as_dict'):
 284                 d[k] = attr.as_dict
 285             d[k] = attr
 286         for k in self.to_save_versioned:
 287             attr = getattr(self, k)
 288             assert isinstance(d['_versioned'], dict)
 289             d['_versioned'][k] = attr.history
 290         for r in self.to_save_relations:
 291             attr_name = r[2]
 292             d[attr_name] = [x.as_dict for x in getattr(self, attr_name)]
 293         return d
 294
 295     # cache management
 296     # (we primarily use the cache to ensure we work on the same object in
 297     # memory no matter where and how we retrieve it, e.g. we don't want
 298     # .by_id() calls to create a new object each time, but rather a pointer
 299     # to the one already instantiated)
 300
 301     def __getattribute__(self, name: str) -> Any:
 302         """Ensure fail if ._disappear() was called, except to check ._exists"""
 303         if name != '_exists' and not super().__getattribute__('_exists'):
 304             raise HandledException('Object does not exist.')
 305         return super().__getattribute__(name)
 306
 307     def _disappear(self) -> None:
 308         """Invalidate object, make future use raise exceptions."""
 309         assert self.id_ is not None
 310         if self._get_cached(self.id_):
 311             self._uncache()
 312         to_kill = list(self.__dict__.keys())
 313         for attr in to_kill:
 314             delattr(self, attr)
 315         self._exists = False
 316
 317     @classmethod
 318     def empty_cache(cls) -> None:
 319         """Empty class's cache, and disappear all former inhabitants."""
 320         # pylint: disable=protected-access
 321         # (cause we remain within the class)
 322         if hasattr(cls, 'cache_'):
 323             to_disappear = list(cls.cache_.values())
 324             for item in to_disappear:
 325                 item._disappear()
 326         cls.cache_ = {}
 327
 328     @classmethod
 329     def get_cache(cls: type[BaseModelInstance]) -> dict[Any, BaseModel[Any]]:
 330         """Get cache dictionary, create it if not yet existing."""
 331         if not hasattr(cls, 'cache_'):
 332             d: dict[Any, BaseModel[Any]] = {}
 333             cls.cache_ = d
 334         return cls.cache_
 335
 336     @classmethod
 337     def _get_cached(cls: type[BaseModelInstance],
 338                     id_: BaseModelId) -> BaseModelInstance | None:
 339         """Get object of id_ from class's cache, or None if not found."""
 340         # pylint: disable=consider-iterating-dictionary
 341         cache = cls.get_cache()
 342         if id_ in cache.keys():
 343             obj = cache[id_]
 344             assert isinstance(obj, cls)
 345             return obj
 346         return None
 347
 348     def cache(self) -> None:
 349         """Update object in class's cache.
 350
 351         Also calls ._disappear if cache holds older reference to object of same
 352         ID, but different memory address, to avoid doing anything with
 353         dangling leftovers.
 354         """
 355         if self.id_ is None:
 356             raise HandledException('Cannot cache object without ID.')
 357         cache = self.get_cache()
 358         old_cached = self._get_cached(self.id_)
 359         if old_cached and id(old_cached) != id(self):
 360             # pylint: disable=protected-access
 361             # (cause we remain within the class)
 362             old_cached._disappear()
 363         cache[self.id_] = self
 364
 365     def _uncache(self) -> None:
 366         """Remove self from cache."""
 367         if self.id_ is None:
 368             raise HandledException('Cannot un-cache object without ID.')
 369         cache = self.get_cache()
 370         del cache[self.id_]
 371
 372     # object retrieval and generation
 373
 374     @classmethod
 375     def from_table_row(cls: type[BaseModelInstance],
 376                        # pylint: disable=unused-argument
 377                        db_conn: DatabaseConnection,
 378                        row: Row | list[Any]) -> BaseModelInstance:
 379         """Make from DB row (sans relations), update DB cache with it."""
 380         obj = cls(*row)
 381         assert obj.id_ is not None
 382         for attr_name in cls.to_save_versioned:
 383             attr = getattr(obj, attr_name)
 384             table_name = attr.table_name
 385             for row_ in db_conn.row_where(table_name, 'parent', obj.id_):
 386                 attr.history_from_row(row_)
 387         obj.cache()
 388         return obj
 389
 390     @classmethod
 391     def by_id(cls, db_conn: DatabaseConnection, id_: BaseModelId) -> Self:
 392         """Retrieve by id_, on failure throw NotFoundException.
 393
 394         First try to get from cls.cache_, only then check DB; if found,
 395         put into cache.
 396         """
 397         obj = None
 398         if id_ is not None:
 399             obj = cls._get_cached(id_)
 400             if not obj:
 401                 for row in db_conn.row_where(cls.table_name, 'id', id_):
 402                     obj = cls.from_table_row(db_conn, row)
 403                     break
 404         if obj:
 405             return obj
 406         raise NotFoundException(f'found no object of ID {id_}')
 407
 408     @classmethod
 409     def by_id_or_create(cls, db_conn: DatabaseConnection,
 410                         id_: BaseModelId | None
 411                         ) -> Self:
 412         """Wrapper around .by_id, creating (not caching/saving) if not find."""
 413         if not cls.can_create_by_id:
 414             raise HandledException('Class cannot .by_id_or_create.')
 415         if id_ is None:
 416             return cls(None)
 417         try:
 418             return cls.by_id(db_conn, id_)
 419         except NotFoundException:
 420             return cls(id_)
 421
 422     @classmethod
 423     def all(cls: type[BaseModelInstance],
 424             db_conn: DatabaseConnection) -> list[BaseModelInstance]:
 425         """Collect all objects of class into list.
 426
 427         Note that this primarily returns the contents of the cache, and only
 428         _expands_ that by additional findings in the DB. This assumes the
 429         cache is always instantly cleaned of any items that would be removed
 430         from the DB.
 431         """
 432         items: dict[BaseModelId, BaseModelInstance] = {}
 433         for k, v in cls.get_cache().items():
 434             assert isinstance(v, cls)
 435             items[k] = v
 436         already_recorded = items.keys()
 437         for id_ in db_conn.column_all(cls.table_name, 'id'):
 438             if id_ not in already_recorded:
 439                 item = cls.by_id(db_conn, id_)
 440                 assert item.id_ is not None
 441                 items[item.id_] = item
 442         return list(items.values())
 443
 444     @classmethod
 445     def by_date_range_with_limits(cls: type[BaseModelInstance],
 446                                   db_conn: DatabaseConnection,
 447                                   date_range: tuple[str, str],
 448                                   date_col: str = 'day'
 449                                   ) -> tuple[list[BaseModelInstance], str,
 450                                              str]:
 451         """Return list of items in database within (open) date_range interval.
 452
 453         If no range values provided, defaults them to 'yesterday' and
 454         'tomorrow'. Knows to properly interpret these and 'today' as value.
 455         """
 456         start_str = date_range[0] if date_range[0] else 'yesterday'
 457         end_str = date_range[1] if date_range[1] else 'tomorrow'
 458         start_date = valid_date(start_str)
 459         end_date = valid_date(end_str)
 460         items = []
 461         sql = f'SELECT id FROM {cls.table_name} '
 462         sql += f'WHERE {date_col} >= ? AND {date_col} <= ?'
 463         for row in db_conn.exec(sql, (start_date, end_date)):
 464             items += [cls.by_id(db_conn, row[0])]
 465         return items, start_date, end_date
 466
 467     @classmethod
 468     def matching(cls: type[BaseModelInstance], db_conn: DatabaseConnection,
 469                  pattern: str) -> list[BaseModelInstance]:
 470         """Return all objects whose .to_search match pattern."""
 471         items = cls.all(db_conn)
 472         if pattern:
 473             filtered = []
 474             for item in items:
 475                 for attr_name in cls.to_search:
 476                     toks = attr_name.split('.')
 477                     parent = item
 478                     for tok in toks:
 479                         attr = getattr(parent, tok)
 480                         parent = attr
 481                     if pattern in attr:
 482                         filtered += [item]
 483                         break
 484             return filtered
 485         return items
 486
 487     # database writing
 488
 489     def save(self, db_conn: DatabaseConnection) -> None:
 490         """Write self to DB and cache and ensure .id_.
 491
 492         Write both to DB, and to cache. To DB, write .id_ and attributes
 493         listed in cls.to_save[_versioned|_relations].
 494
 495         Ensure self.id_ by setting it to what the DB command returns as the
 496         last saved row's ID (cursor.lastrowid), EXCEPT if self.id_ already
 497         exists as a 'str', which implies we do our own ID creation (so far
 498         only the case with the Day class, where it's to be a date string.
 499         """
 500         values = tuple([self.id_] + [getattr(self, key)
 501                                      for key in self.to_save])
 502         table_name = self.table_name
 503         cursor = db_conn.exec_on_vals(f'REPLACE INTO {table_name} VALUES',
 504                                       values)
 505         if not isinstance(self.id_, str):
 506             self.id_ = cursor.lastrowid  # type: ignore[assignment]
 507         self.cache()
 508         for attr_name in self.to_save_versioned:
 509             getattr(self, attr_name).save(db_conn)
 510         for table, column, attr_name, key_index in self.to_save_relations:
 511             assert isinstance(self.id_, (int, str))
 512             db_conn.rewrite_relations(table, column, self.id_,
 513                                       [[i.id_] for i
 514                                        in getattr(self, attr_name)], key_index)
 515
 516     def remove(self, db_conn: DatabaseConnection) -> None:
 517         """Remove from DB and cache, including dependencies."""
 518         if self.id_ is None or self._get_cached(self.id_) is None:
 519             raise HandledException('cannot remove unsaved item')
 520         for attr_name in self.to_save_versioned:
 521             getattr(self, attr_name).remove(db_conn)
 522         for table, column, attr_name, _ in self.to_save_relations:
 523             db_conn.delete_where(table, column, self.id_)
 524         self._uncache()
 525         db_conn.delete_where(self.table_name, 'id', self.id_)
 526         self._disappear()