From: Christian Heller Date: Sun, 2 Mar 2025 15:21:12 +0000 (+0100) Subject: Greatly simplify file deletion and deletion sync mechanics. X-Git-Url: https://plomlompom.com/repos/%7B%7B%20web_path%20%7D%7D/%7B%7Bdb.prefix%7D%7D/static/index.html?a=commitdiff_plain;ds=inline;p=ytplom Greatly simplify file deletion and deletion sync mechanics. --- diff --git a/src/migrations/8_add_file_deletion_requests.sql b/src/migrations/8_add_file_deletion_requests.sql new file mode 100644 index 0000000..e646fd4 --- /dev/null +++ b/src/migrations/8_add_file_deletion_requests.sql @@ -0,0 +1,5 @@ +CREATE TABLE "file_deletion_requests" ( + digest BLOB PRIMARY KEY, + last_update TEXT NOT NULL +); + diff --git a/src/migrations/new_init.sql b/src/migrations/new_init.sql index 1cbc4a1..ac92df6 100644 --- a/src/migrations/new_init.sql +++ b/src/migrations/new_init.sql @@ -1,3 +1,7 @@ +CREATE TABLE "file_deletion_requests" ( + digest BLOB PRIMARY KEY, + last_update TEXT NOT NULL +); CREATE TABLE "files" ( digest BLOB PRIMARY KEY, rel_path TEXT NOT NULL, diff --git a/src/ytplom/db.py b/src/ytplom/db.py index fba0f92..afec3f6 100644 --- a/src/ytplom/db.py +++ b/src/ytplom/db.py @@ -15,7 +15,7 @@ from ytplom.primitives import ( PATH_DB = PATH_APP_DATA.joinpath('db.sql') -_EXPECTED_DB_VERSION = 7 +_EXPECTED_DB_VERSION = 8 _PATH_MIGRATIONS = PATH_APP_DATA.joinpath('migrations') _HASH_ALGO = 'sha512' _PATH_DB_SCHEMA = _PATH_MIGRATIONS.joinpath('new_init.sql') diff --git a/src/ytplom/http.py b/src/ytplom/http.py index 3785b07..ae645dc 100644 --- a/src/ytplom/http.py +++ b/src/ytplom/http.py @@ -123,11 +123,8 @@ class _TaskHandler(PlomHttpHandler): self.send_http(b'no way', code=403) with DbConn() as conn: file = VideoFile.get_one(conn, Hash.from_b64(self.path_toks[2])) - self.server.player.remove_by_digest(file.digest) - file.set_flags({FILE_FLAGS[FlagName('delete')]}) - file.save(conn) + file.delete(conn) conn.commit() - file.ensure_unlinked_if_no_living_owners(conn) self._redirect(Path('/')) def _update_file(self) -> None: @@ -178,9 +175,8 @@ class _TaskHandler(PlomHttpHandler): def _purge_deleted_files(self) -> None: with DbConn() as conn: - for file in VideoFile.get_all_deleted(conn): - self.server.player.remove_by_digest(file.digest) - VideoFile.purge_deleteds(conn) + for digest in VideoFile.purge_deleteds(conn): + self.server.player.remove_by_digest(digest) conn.commit() self.send_http(b'OK') @@ -244,7 +240,7 @@ class _TaskHandler(PlomHttpHandler): try: with DbConn() as conn: file_data = VideoFile.get_by_yt_id(conn, video_id) - if file_data.missing: + if not file_data.present: raise NotFoundException self._redirect(Path('/') .joinpath(PAGE_NAMES['file']) @@ -355,10 +351,8 @@ class _TaskHandler(PlomHttpHandler): def _send_missing_json(self) -> None: with DbConn() as conn: - missing = [f.digest.b64 for - f in VideoFile.get_all_non_deleted(conn) - if f.missing] - self._send_json(missing) + self._send_json([f.digest.b64 for f in VideoFile.get_all(conn) + if not f.present]) def _send_tags_index(self) -> None: with DbConn() as conn: diff --git a/src/ytplom/migrations.py b/src/ytplom/migrations.py index 2d93f5e..0933749 100644 --- a/src/ytplom/migrations.py +++ b/src/ytplom/migrations.py @@ -76,4 +76,5 @@ MIGRATIONS: set[DbMigration] = { DbMigration(5, Path('5_files_redo.sql'), None), DbMigration(6, Path('6_add_files_tags.sql'), None), DbMigration(7, Path('7_add_files_duration_ms.sql'), _mig_7_resave_files), + DbMigration(8, Path('8_add_file_deletion_requests.sql'), None), } diff --git a/src/ytplom/misc.py b/src/ytplom/misc.py index ff7b588..aade5de 100644 --- a/src/ytplom/misc.py +++ b/src/ytplom/misc.py @@ -72,8 +72,7 @@ ENVIRON_PREFIX = 'YTPLOM_' TIMESTAMP_FMT = '%Y-%m-%d %H:%M:%S.%f' LEGAL_EXTENSIONS = {'webm', 'mp4', 'mkv'} FILE_FLAGS: dict[FlagName, FlagsInt] = { - FlagName('do not sync'): FlagsInt(1 << 62), - FlagName('delete'): FlagsInt(-(1 << 63)) + FlagName('do not sync'): FlagsInt(1 << 62) } ONE_MILLION = 1000 * 1000 @@ -387,46 +386,14 @@ class VideoFile(DbData): self.last_update = _now_string() return super().save(conn) - @property - def deleted(self) -> bool: - """Return if 'delete' flag set.""" - return self.is_flag_set(FlagName('delete')) - - @classmethod - def get_one(cls, conn: DbConn, id_: str | Hash) -> Self: - """Extend super by .test_deletion.""" - file = super().get_one(conn, id_) - if file.deleted: # pylint: disable=no-member - raise NotFoundException('not showing entry marked as deleted') - # NB: mypy recognizes file as VideoFile without below assert and - # if-isinstance-else, yet less type-smart pylint only does due to the - # latter (also the reason for the disable=no-member above; but wouldn't - # suffice here, pylint would still identify function's return falsely); - # the assert isn't needed by mypy and not enough for pylint, but is - # included just so no future code change would trigger the else result. - assert isinstance(file, VideoFile) - return file if isinstance(file, VideoFile) else cls(None, Path('')) - - @classmethod - def get_all_non_deleted(cls, conn: DbConn) -> list[Self]: - """Extend super by excluding deleteds.""" - return [f for f in super().get_all(conn) if not f.deleted] - - @classmethod - def get_all_deleted(cls, conn: DbConn) -> list[Self]: - """Get only deleteds.""" - return [f for f in super().get_all(conn) if f.deleted] - @classmethod def get_by_yt_id(cls, conn: DbConn, yt_id: YoutubeId) -> Self: - """Return (non-deleted) VideoFile of .yt_id.""" + """Return VideoFile of .yt_id.""" rows = conn.exec(f'SELECT * FROM {cls._table_name} WHERE yt_id =', (yt_id,)).fetchall() for file in [cls._from_table_row(row) for row in rows]: - if not file.deleted: - return file - raise NotFoundException( - f'no undeleted entry for file to Youtube ID {yt_id}') + return file + raise NotFoundException(f'no entry for file to Youtube ID {yt_id}') @classmethod def get_filtered(cls, @@ -435,9 +402,9 @@ class VideoFile(DbData): needed_tags_seen: TagSet = TagSet(set()), show_absent: bool = False ) -> list[Self]: - """Return cls.get_all_non_deleted matching provided filter criteria.""" + """Return all matching provided filter criteria.""" return [ - f for f in cls.get_all_non_deleted(conn) + f for f in cls.get_all(conn) if (show_absent or f.present) and str(filter_path).lower() in str(f.rel_path).lower() and (cls.tags_prefilter_needed.are_all_in(f.tags) @@ -458,7 +425,7 @@ class VideoFile(DbData): def all_tags_showable(cls, conn) -> TagSet: """Show all used tags passing .tags_display_whitelist.""" tags = TagSet() - for file in cls.get_all_non_deleted(conn): + for file in cls.get_all(conn): tags.add(file.tags.whitelisted(cls.tags_display_whitelist)) return tags @@ -470,7 +437,7 @@ class VideoFile(DbData): def unused_tags(self, conn: DbConn) -> TagSet: """Return tags used among other VideoFiles, not in self.""" tags = TagSet() - for file in self.get_all_non_deleted(conn): + for file in self.get_all(conn): tags.add(file.tags.all_not_in(self.tags).whitelisted( self.tags_display_whitelist)) return tags @@ -505,11 +472,6 @@ class VideoFile(DbData): """Return if file exists in filesystem.""" return self.full_path.exists() - @property - def missing(self) -> bool: - """Return if file absent despite absence of 'delete' flag.""" - return not (self.is_flag_set(FlagName('delete')) or self.present) - @property def flags_as_str_list(self) -> list[str]: """Return all set flags.""" @@ -525,28 +487,57 @@ class VideoFile(DbData): """Return if flag of flag_name is set in flags field.""" return bool(self.flags & FILE_FLAGS[flag_name]) - def ensure_unlinked_if_no_living_owners(self, conn: DbConn) -> None: - """If 'delete' flag set and no undeleted owner, unlink.""" - if self.full_path in [f.full_path - for f in self.get_all_non_deleted(conn)]: - return - if self.present: - self.unlink_locally() - def unlink_locally(self) -> None: """Remove actual file from local filesystem.""" print(f'SYNC: removing from filesystem: {self.rel_path}') self.full_path.unlink() + def purge(self, conn) -> None: + """Remove self from database, and in filesystem if no other owners.""" + if self.present and self.full_path not in [ + f.full_path for f in self.get_all(conn) if self != f]: + self.unlink_locally() + print(f'SYNC: purging off DB: {self.digest.b64} ({self.rel_path})') + conn.exec(f'DELETE FROM {self._table_name} WHERE digest =', + (self.digest.bytes,)) + @classmethod - def purge_deleteds(cls, conn: DbConn) -> None: - """For all of .is_flag_set("deleted"), remove file _and_ DB entry.""" - for file in cls.get_all_deleted(conn): - if file.present: - file.unlink_locally() - print(f'SYNC: purging off DB: {file.digest.b64} ({file.rel_path})') - conn.exec(f'DELETE FROM {cls._table_name} WHERE digest =', - (file.digest.bytes,)) + def purge_deleteds(cls, conn: DbConn) -> list[Hash]: + """Purge all with .last_update < a respective entry in delete table.""" + too_early = '2000-01-01 00:00:00.0' + del_req_by_timestamp: dict[Hash, DatetimeStr] = {} + for del_req in FileDeletionRequest.get_all(conn): + if del_req_by_timestamp.get(del_req.digest, too_early + ) < del_req.last_update: + del_req_by_timestamp[del_req.digest] = del_req.last_update + deleteds: list[Hash] = [] + for file in [file for file in cls.get_all(conn) + if del_req_by_timestamp.get(file.digest, too_early + ) > file.last_update]: + deleteds += [file.digest] + file.purge(conn) + return deleteds + + def delete(self, conn: DbConn) -> None: + """Add/update into deletion request table, then purge locally.""" + FileDeletionRequest(self.digest, _now_string()).save(conn) + self.purge(conn) + + +class FileDeletionRequest(DbData): + """Collect file deletion requests in their own table, for syncability.""" + id_name = 'digest' + _table_name = 'file_deletion_requests' + _cols = ('digest', 'last_update') + digest: Hash + last_update: DatetimeStr + + def __init__(self, digest: Hash, last_update: DatetimeStr) -> None: + self.digest = digest + self.last_update = last_update + + def __str__(self) -> str: + return f'{self.digest.b64}:{self.last_update}' class QuotaLog(DbData): @@ -801,8 +792,8 @@ class DownloadsManager: def _sync_db(self): with DbConn() as conn: - known_paths = [file.rel_path for - file in VideoFile.get_all_non_deleted(conn)] + VideoFile.purge_deleteds(conn) + known_paths = [file.rel_path for file in VideoFile.get_all(conn)] old_cwd = Path.cwd() chdir(PATH_DOWNLOADS) for path in [p for p in Path('.').iterdir() if p.is_file()]: @@ -822,9 +813,7 @@ class DownloadsManager: 'present', str(path), VideoFile.get_by_yt_id(conn, yt_id).digest.b64) - for file in VideoFile.get_all_deleted(conn): - file.ensure_unlinked_if_no_living_owners(conn) - self._files = VideoFile.get_all_non_deleted(conn) + self._files = VideoFile.get_all(conn) chdir(old_cwd) def last_update_for(self, diff --git a/src/ytplom/sync.py b/src/ytplom/sync.py index 13e49db..d7bc0c5 100644 --- a/src/ytplom/sync.py +++ b/src/ytplom/sync.py @@ -9,8 +9,8 @@ from paramiko import SSHClient # type: ignore from scp import SCPClient # type: ignore # ourselves from ytplom.db import DbConn, DbFile, Hash, PATH_DB -from ytplom.misc import (PATH_TEMP, Config, FlagName, QuotaLog, VideoFile, - YoutubeQuery, YoutubeVideo) +from ytplom.misc import (PATH_TEMP, Config, FileDeletionRequest, FlagName, + QuotaLog, VideoFile, YoutubeQuery, YoutubeVideo) from ytplom.http import PAGE_NAMES @@ -76,7 +76,8 @@ def _sync_dbs(scp: SCPClient) -> None: """Download remote DB, run sync_(objects|relations), put remote DB back.""" scp.get(PATH_DB, _PATH_DB_REMOTE) with DbConn() as db_local, DbConn(DbFile(_PATH_DB_REMOTE)) as db_remote: - for cls in (QuotaLog, YoutubeQuery, YoutubeVideo, VideoFile): + for cls in (FileDeletionRequest, QuotaLog, YoutubeQuery, YoutubeVideo, + VideoFile): _back_and_forth(_sync_objects, (db_local, db_remote), cls) for yt_video_local in YoutubeVideo.get_all(db_local): _back_and_forth(_sync_relations, (db_local, db_remote),