From f1956f80ff874a30424856fb23be9a6f4b308389 Mon Sep 17 00:00:00 2001
From: Christian Heller <c.heller@plomlompom.de>
Date: Sun, 2 Mar 2025 16:21:12 +0100
Subject: [PATCH] Greatly simplify file deletion and deletion sync mechanics.

---
 .../8_add_file_deletion_requests.sql          |   5 +
 src/migrations/new_init.sql                   |   4 +
 src/ytplom/db.py                              |   2 +-
 src/ytplom/http.py                            |  18 +--
 src/ytplom/migrations.py                      |   1 +
 src/ytplom/misc.py                            | 123 ++++++++----------
 src/ytplom/sync.py                            |   7 +-
 7 files changed, 77 insertions(+), 83 deletions(-)
 create mode 100644 src/migrations/8_add_file_deletion_requests.sql

diff --git a/src/migrations/8_add_file_deletion_requests.sql b/src/migrations/8_add_file_deletion_requests.sql
new file mode 100644
index 0000000..e646fd4
--- /dev/null
+++ b/src/migrations/8_add_file_deletion_requests.sql
@@ -0,0 +1,5 @@
+CREATE TABLE "file_deletion_requests" (
+  digest BLOB PRIMARY KEY,
+  last_update TEXT NOT NULL
+);
+
diff --git a/src/migrations/new_init.sql b/src/migrations/new_init.sql
index 1cbc4a1..ac92df6 100644
--- a/src/migrations/new_init.sql
+++ b/src/migrations/new_init.sql
@@ -1,3 +1,7 @@
+CREATE TABLE "file_deletion_requests" (
+  digest BLOB PRIMARY KEY,
+  last_update TEXT NOT NULL
+);
 CREATE TABLE "files" (
   digest BLOB PRIMARY KEY,
   rel_path TEXT NOT NULL,
diff --git a/src/ytplom/db.py b/src/ytplom/db.py
index fba0f92..afec3f6 100644
--- a/src/ytplom/db.py
+++ b/src/ytplom/db.py
@@ -15,7 +15,7 @@ from ytplom.primitives import (
 
 PATH_DB = PATH_APP_DATA.joinpath('db.sql')
 
-_EXPECTED_DB_VERSION = 7
+_EXPECTED_DB_VERSION = 8
 _PATH_MIGRATIONS = PATH_APP_DATA.joinpath('migrations')
 _HASH_ALGO = 'sha512'
 _PATH_DB_SCHEMA = _PATH_MIGRATIONS.joinpath('new_init.sql')
diff --git a/src/ytplom/http.py b/src/ytplom/http.py
index 3785b07..ae645dc 100644
--- a/src/ytplom/http.py
+++ b/src/ytplom/http.py
@@ -123,11 +123,8 @@ class _TaskHandler(PlomHttpHandler):
             self.send_http(b'no way', code=403)
         with DbConn() as conn:
             file = VideoFile.get_one(conn, Hash.from_b64(self.path_toks[2]))
-            self.server.player.remove_by_digest(file.digest)
-            file.set_flags({FILE_FLAGS[FlagName('delete')]})
-            file.save(conn)
+            file.delete(conn)
             conn.commit()
-            file.ensure_unlinked_if_no_living_owners(conn)
         self._redirect(Path('/'))
 
     def _update_file(self) -> None:
@@ -178,9 +175,8 @@ class _TaskHandler(PlomHttpHandler):
 
     def _purge_deleted_files(self) -> None:
         with DbConn() as conn:
-            for file in VideoFile.get_all_deleted(conn):
-                self.server.player.remove_by_digest(file.digest)
-            VideoFile.purge_deleteds(conn)
+            for digest in VideoFile.purge_deleteds(conn):
+                self.server.player.remove_by_digest(digest)
             conn.commit()
         self.send_http(b'OK')
 
@@ -244,7 +240,7 @@ class _TaskHandler(PlomHttpHandler):
         try:
             with DbConn() as conn:
                 file_data = VideoFile.get_by_yt_id(conn, video_id)
-                if file_data.missing:
+                if not file_data.present:
                     raise NotFoundException
                 self._redirect(Path('/')
                                .joinpath(PAGE_NAMES['file'])
@@ -355,10 +351,8 @@ class _TaskHandler(PlomHttpHandler):
 
     def _send_missing_json(self) -> None:
         with DbConn() as conn:
-            missing = [f.digest.b64 for
-                       f in VideoFile.get_all_non_deleted(conn)
-                       if f.missing]
-        self._send_json(missing)
+            self._send_json([f.digest.b64 for f in VideoFile.get_all(conn)
+                             if not f.present])
 
     def _send_tags_index(self) -> None:
         with DbConn() as conn:
diff --git a/src/ytplom/migrations.py b/src/ytplom/migrations.py
index 2d93f5e..0933749 100644
--- a/src/ytplom/migrations.py
+++ b/src/ytplom/migrations.py
@@ -76,4 +76,5 @@ MIGRATIONS: set[DbMigration] = {
     DbMigration(5, Path('5_files_redo.sql'), None),
     DbMigration(6, Path('6_add_files_tags.sql'), None),
     DbMigration(7, Path('7_add_files_duration_ms.sql'), _mig_7_resave_files),
+    DbMigration(8, Path('8_add_file_deletion_requests.sql'), None),
 }
diff --git a/src/ytplom/misc.py b/src/ytplom/misc.py
index ff7b588..aade5de 100644
--- a/src/ytplom/misc.py
+++ b/src/ytplom/misc.py
@@ -72,8 +72,7 @@ ENVIRON_PREFIX = 'YTPLOM_'
 TIMESTAMP_FMT = '%Y-%m-%d %H:%M:%S.%f'
 LEGAL_EXTENSIONS = {'webm', 'mp4', 'mkv'}
 FILE_FLAGS: dict[FlagName, FlagsInt] = {
-  FlagName('do not sync'): FlagsInt(1 << 62),
-  FlagName('delete'): FlagsInt(-(1 << 63))
+  FlagName('do not sync'): FlagsInt(1 << 62)
 }
 ONE_MILLION = 1000 * 1000
 
@@ -387,46 +386,14 @@ class VideoFile(DbData):
             self.last_update = _now_string()
         return super().save(conn)
 
-    @property
-    def deleted(self) -> bool:
-        """Return if 'delete' flag set."""
-        return self.is_flag_set(FlagName('delete'))
-
-    @classmethod
-    def get_one(cls, conn: DbConn, id_: str | Hash) -> Self:
-        """Extend super by .test_deletion."""
-        file = super().get_one(conn, id_)
-        if file.deleted:  # pylint: disable=no-member
-            raise NotFoundException('not showing entry marked as deleted')
-        # NB: mypy recognizes file as VideoFile without below assert and
-        # if-isinstance-else, yet less type-smart pylint only does due to the
-        # latter (also the reason for the disable=no-member above; but wouldn't
-        # suffice here, pylint would still identify function's return falsely);
-        # the assert isn't needed by mypy and not enough for pylint, but is
-        # included just so no future code change would trigger the else result.
-        assert isinstance(file, VideoFile)
-        return file if isinstance(file, VideoFile) else cls(None, Path(''))
-
-    @classmethod
-    def get_all_non_deleted(cls, conn: DbConn) -> list[Self]:
-        """Extend super by excluding deleteds."""
-        return [f for f in super().get_all(conn) if not f.deleted]
-
-    @classmethod
-    def get_all_deleted(cls, conn: DbConn) -> list[Self]:
-        """Get only deleteds."""
-        return [f for f in super().get_all(conn) if f.deleted]
-
     @classmethod
     def get_by_yt_id(cls, conn: DbConn, yt_id: YoutubeId) -> Self:
-        """Return (non-deleted) VideoFile of .yt_id."""
+        """Return VideoFile of .yt_id."""
         rows = conn.exec(f'SELECT * FROM {cls._table_name} WHERE yt_id =',
                          (yt_id,)).fetchall()
         for file in [cls._from_table_row(row) for row in rows]:
-            if not file.deleted:
-                return file
-        raise NotFoundException(
-                f'no undeleted entry for file to Youtube ID {yt_id}')
+            return file
+        raise NotFoundException(f'no entry for file to Youtube ID {yt_id}')
 
     @classmethod
     def get_filtered(cls,
@@ -435,9 +402,9 @@ class VideoFile(DbData):
                      needed_tags_seen: TagSet = TagSet(set()),
                      show_absent: bool = False
                      ) -> list[Self]:
-        """Return cls.get_all_non_deleted matching provided filter criteria."""
+        """Return all matching provided filter criteria."""
         return [
-            f for f in cls.get_all_non_deleted(conn)
+            f for f in cls.get_all(conn)
             if (show_absent or f.present)
             and str(filter_path).lower() in str(f.rel_path).lower()
             and (cls.tags_prefilter_needed.are_all_in(f.tags)
@@ -458,7 +425,7 @@ class VideoFile(DbData):
     def all_tags_showable(cls, conn) -> TagSet:
         """Show all used tags passing .tags_display_whitelist."""
         tags = TagSet()
-        for file in cls.get_all_non_deleted(conn):
+        for file in cls.get_all(conn):
             tags.add(file.tags.whitelisted(cls.tags_display_whitelist))
         return tags
 
@@ -470,7 +437,7 @@ class VideoFile(DbData):
     def unused_tags(self, conn: DbConn) -> TagSet:
         """Return tags used among other VideoFiles, not in self."""
         tags = TagSet()
-        for file in self.get_all_non_deleted(conn):
+        for file in self.get_all(conn):
             tags.add(file.tags.all_not_in(self.tags).whitelisted(
                 self.tags_display_whitelist))
         return tags
@@ -505,11 +472,6 @@ class VideoFile(DbData):
         """Return if file exists in filesystem."""
         return self.full_path.exists()
 
-    @property
-    def missing(self) -> bool:
-        """Return if file absent despite absence of 'delete' flag."""
-        return not (self.is_flag_set(FlagName('delete')) or self.present)
-
     @property
     def flags_as_str_list(self) -> list[str]:
         """Return all set flags."""
@@ -525,28 +487,57 @@ class VideoFile(DbData):
         """Return if flag of flag_name is set in flags field."""
         return bool(self.flags & FILE_FLAGS[flag_name])
 
-    def ensure_unlinked_if_no_living_owners(self, conn: DbConn) -> None:
-        """If 'delete' flag set and no undeleted owner, unlink."""
-        if self.full_path in [f.full_path
-                              for f in self.get_all_non_deleted(conn)]:
-            return
-        if self.present:
-            self.unlink_locally()
-
     def unlink_locally(self) -> None:
         """Remove actual file from local filesystem."""
         print(f'SYNC: removing from filesystem: {self.rel_path}')
         self.full_path.unlink()
 
+    def purge(self, conn) -> None:
+        """Remove self from database, and in filesystem if no other owners."""
+        if self.present and self.full_path not in [
+                f.full_path for f in self.get_all(conn) if self != f]:
+            self.unlink_locally()
+        print(f'SYNC: purging off DB: {self.digest.b64} ({self.rel_path})')
+        conn.exec(f'DELETE FROM {self._table_name} WHERE digest =',
+                  (self.digest.bytes,))
+
     @classmethod
-    def purge_deleteds(cls, conn: DbConn) -> None:
-        """For all of .is_flag_set("deleted"), remove file _and_ DB entry."""
-        for file in cls.get_all_deleted(conn):
-            if file.present:
-                file.unlink_locally()
-            print(f'SYNC: purging off DB: {file.digest.b64} ({file.rel_path})')
-            conn.exec(f'DELETE FROM {cls._table_name} WHERE digest =',
-                      (file.digest.bytes,))
+    def purge_deleteds(cls, conn: DbConn) -> list[Hash]:
+        """Purge all with .last_update < a respective entry in delete table."""
+        too_early = '2000-01-01 00:00:00.0'
+        del_req_by_timestamp: dict[Hash, DatetimeStr] = {}
+        for del_req in FileDeletionRequest.get_all(conn):
+            if del_req_by_timestamp.get(del_req.digest, too_early
+                                        ) < del_req.last_update:
+                del_req_by_timestamp[del_req.digest] = del_req.last_update
+        deleteds: list[Hash] = []
+        for file in [file for file in cls.get_all(conn)
+                     if del_req_by_timestamp.get(file.digest, too_early
+                                                 ) > file.last_update]:
+            deleteds += [file.digest]
+            file.purge(conn)
+        return deleteds
+
+    def delete(self, conn: DbConn) -> None:
+        """Add/update into deletion request table, then purge locally."""
+        FileDeletionRequest(self.digest, _now_string()).save(conn)
+        self.purge(conn)
+
+
+class FileDeletionRequest(DbData):
+    """Collect file deletion requests in their own table, for syncability."""
+    id_name = 'digest'
+    _table_name = 'file_deletion_requests'
+    _cols = ('digest', 'last_update')
+    digest: Hash
+    last_update: DatetimeStr
+
+    def __init__(self, digest: Hash, last_update: DatetimeStr) -> None:
+        self.digest = digest
+        self.last_update = last_update
+
+    def __str__(self) -> str:
+        return f'{self.digest.b64}:{self.last_update}'
 
 
 class QuotaLog(DbData):
@@ -801,8 +792,8 @@ class DownloadsManager:
 
     def _sync_db(self):
         with DbConn() as conn:
-            known_paths = [file.rel_path for
-                           file in VideoFile.get_all_non_deleted(conn)]
+            VideoFile.purge_deleteds(conn)
+            known_paths = [file.rel_path for file in VideoFile.get_all(conn)]
             old_cwd = Path.cwd()
             chdir(PATH_DOWNLOADS)
             for path in [p for p in Path('.').iterdir() if p.is_file()]:
@@ -822,9 +813,7 @@ class DownloadsManager:
                             'present',
                             str(path),
                             VideoFile.get_by_yt_id(conn, yt_id).digest.b64)
-            for file in VideoFile.get_all_deleted(conn):
-                file.ensure_unlinked_if_no_living_owners(conn)
-            self._files = VideoFile.get_all_non_deleted(conn)
+            self._files = VideoFile.get_all(conn)
             chdir(old_cwd)
 
     def last_update_for(self,
diff --git a/src/ytplom/sync.py b/src/ytplom/sync.py
index 13e49db..d7bc0c5 100644
--- a/src/ytplom/sync.py
+++ b/src/ytplom/sync.py
@@ -9,8 +9,8 @@ from paramiko import SSHClient  # type: ignore
 from scp import SCPClient  # type: ignore
 # ourselves
 from ytplom.db import DbConn, DbFile, Hash, PATH_DB
-from ytplom.misc import (PATH_TEMP, Config, FlagName, QuotaLog, VideoFile,
-                         YoutubeQuery, YoutubeVideo)
+from ytplom.misc import (PATH_TEMP, Config, FileDeletionRequest, FlagName,
+                         QuotaLog, VideoFile, YoutubeQuery, YoutubeVideo)
 from ytplom.http import PAGE_NAMES
 
 
@@ -76,7 +76,8 @@ def _sync_dbs(scp: SCPClient) -> None:
     """Download remote DB, run sync_(objects|relations), put remote DB back."""
     scp.get(PATH_DB, _PATH_DB_REMOTE)
     with DbConn() as db_local, DbConn(DbFile(_PATH_DB_REMOTE)) as db_remote:
-        for cls in (QuotaLog, YoutubeQuery, YoutubeVideo, VideoFile):
+        for cls in (FileDeletionRequest, QuotaLog, YoutubeQuery, YoutubeVideo,
+                    VideoFile):
             _back_and_forth(_sync_objects, (db_local, db_remote), cls)
         for yt_video_local in YoutubeVideo.get_all(db_local):
             _back_and_forth(_sync_relations, (db_local, db_remote),
-- 
2.30.2