From: Christian Heller <c.heller@plomlompom.de>
Date: Fri, 22 Nov 2024 23:34:46 +0000 (+0100)
Subject: Add script for syncing between local and remote instances.
X-Git-Url: https://plomlompom.com/repos/%7B%7Bprefix%7D%7D/static/%7B%7B%20web_path%20%7D%7D/decks/ledger?a=commitdiff_plain;h=867b867d04f796c42b37447f4736f5d085a5fc43;p=ytplom

Add script for syncing between local and remote instances.
---

diff --git a/requirements.txt b/requirements.txt
index 437ab2d..6303919 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 google-api-python-client==2.154.0
 Jinja2==3.1.4
 python-mpv==1.0.7
+scp==0.15.0
 yt-dlp==2024.11.18
diff --git a/sync.py b/sync.py
new file mode 100755
index 0000000..fb02e05
--- /dev/null
+++ b/sync.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""Script to sync between local and remote instances."""
+
+# included libs
+from typing import Callable, TypeAlias
+from json import loads as json_loads
+from os import environ, remove as os_remove
+from os.path import join as path_join
+from urllib.request import urlopen
+# non-included libs
+from paramiko import SSHClient  # type: ignore
+from scp import SCPClient  # type: ignore
+from ytplom.misc import (
+        PATH_DB, PATH_DOWNLOADS,
+        DatabaseConnection, PathStr, QueryId, QuotaLog, VideoFile,
+        YoutubeQuery, YoutubeVideo)
+
+
+# what we might want to manually define per environs
+YTPLOM_REMOTE = environ.get('YTPLOM_REMOTE')
+YTPLOM_PORT = environ.get('YTPLOM_PORT')
+
+# type definitions for mypy
+RowData: TypeAlias = tuple[str,
+                           list[QuotaLog] | list[YoutubeQuery]
+                           | list[YoutubeVideo] | list[VideoFile],
+                           DatabaseConnection]
+RelationData: TypeAlias = tuple[str, list[YoutubeVideo], DatabaseConnection]
+
+
+PATH_DB_REMOTE = PathStr('remote_db.sql')
+URL_MISSING_JSON = f'http://{YTPLOM_REMOTE}:{YTPLOM_PORT}/missing.json'
+
+
+def back_and_forth(sync_func: Callable,
+                   arg_twins: tuple[RowData, RowData],
+                   shared: str
+                   ) -> None:
+    """Apply sync_func twice, once reversing the two arg_twins items."""
+    sync_func(arg_twins[0], arg_twins[1], shared)
+    sync_func(arg_twins[1], arg_twins[0], shared)
+
+
+def sync_objects(row_data_1st: RowData,
+                 row_data_2nd: RowData,
+                 id_name: str
+                 ) -> None:
+    """Ensure objects from row_data_1st are in row_data_2nd objects, DB."""
+    name_1st, objs_1st, _ = row_data_1st
+    name_2nd, objs_2nd, db_2nd = row_data_2nd
+    for obj in [obj for obj in objs_1st if obj not in objs_2nd]:
+        print(f'SYNC: adding {name_1st}->{name_2nd} {getattr(obj, id_name)}')
+        obj.save(db_2nd)
+
+
+def sync_relations(relation_1st: RelationData,
+                   relation_2nd: RelationData,
+                   yt_query_id: QueryId
+                   ) -> None:
+    """Ensure query<->video relation_2nd in relation_1st DB."""
+    name_1st, videos_for_query_1st, db_1st = relation_1st
+    name_2nd, videos_for_query_2nd, _ = relation_2nd
+    for yt_video_2nd in [v for v in videos_for_query_2nd
+                         if v not in videos_for_query_1st]:
+        print(f'SYNC: adding {name_2nd}->{name_1st} mapping '
+              f'of {yt_video_2nd.id_} to {yt_query_id}')
+        yt_video_2nd.save_to_query(db_1st, yt_query_id)
+
+
+if __name__ == '__main__':
+
+    ssh = SSHClient()
+    ssh.load_system_host_keys()
+    ssh.connect(YTPLOM_REMOTE)
+    scp = SCPClient(ssh.get_transport())
+
+    scp.get(PATH_DB, PATH_DB_REMOTE)
+    local_db = DatabaseConnection(PATH_DB)
+    remote_db = DatabaseConnection(PATH_DB_REMOTE)
+
+    for cls in (QuotaLog, YoutubeQuery, YoutubeVideo, VideoFile):
+        objs_local, objs_remote = cls.get_all(local_db), cls.get_all(remote_db)
+        obj_arg_twins = (('local', objs_local, local_db),
+                         ('remote', objs_remote, remote_db))
+        back_and_forth(sync_objects, obj_arg_twins,
+                       'rel_path' if cls is VideoFile else 'id_')
+    for yt_video_local in YoutubeVideo.get_all(local_db):
+        for yt_query_local in YoutubeQuery.get_all_for_video(
+                local_db, yt_video_local.id_):
+            videos_for_query_remote = YoutubeVideo.get_all_for_query(
+                    remote_db, yt_query_local.id_)
+            videos_for_query_local = YoutubeVideo.get_all_for_query(
+                    local_db, yt_query_local.id_)
+            rel_arg_twins = (('local', videos_for_query_local, local_db),
+                             ('remote', videos_for_query_remote, remote_db))
+            back_and_forth(sync_relations, rel_arg_twins, yt_query_local.id_)
+    with urlopen(URL_MISSING_JSON) as response:
+        missing = json_loads(response.read())
+    for path in missing:
+        print(f'SYNC: sending local->remote file {path}')
+        scp.put(path_join(PATH_DOWNLOADS, path),
+                path_join(PATH_DOWNLOADS, path))
+
+    local_db.commit_close()
+    remote_db.commit_close()
+    scp.put(PATH_DB_REMOTE, PATH_DB)
+    os_remove(PATH_DB_REMOTE)
+
+    scp.close()