From: Christian Heller Date: Fri, 22 Nov 2024 23:34:46 +0000 (+0100) Subject: Add script for syncing between local and remote instances. X-Git-Url: https://plomlompom.com/repos/bar%20baz.html?a=commitdiff_plain;h=867b867d04f796c42b37447f4736f5d085a5fc43;p=ytplom Add script for syncing between local and remote instances. --- diff --git a/requirements.txt b/requirements.txt index 437ab2d..6303919 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ google-api-python-client==2.154.0 Jinja2==3.1.4 python-mpv==1.0.7 +scp==0.15.0 yt-dlp==2024.11.18 diff --git a/sync.py b/sync.py new file mode 100755 index 0000000..fb02e05 --- /dev/null +++ b/sync.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +"""Script to sync between local and remote instances.""" + +# included libs +from typing import Callable, TypeAlias +from json import loads as json_loads +from os import environ, remove as os_remove +from os.path import join as path_join +from urllib.request import urlopen +# non-included libs +from paramiko import SSHClient # type: ignore +from scp import SCPClient # type: ignore +from ytplom.misc import ( + PATH_DB, PATH_DOWNLOADS, + DatabaseConnection, PathStr, QueryId, QuotaLog, VideoFile, + YoutubeQuery, YoutubeVideo) + + +# what we might want to manually define per environs +YTPLOM_REMOTE = environ.get('YTPLOM_REMOTE') +YTPLOM_PORT = environ.get('YTPLOM_PORT') + +# type definitions for mypy +RowData: TypeAlias = tuple[str, + list[QuotaLog] | list[YoutubeQuery] + | list[YoutubeVideo] | list[VideoFile], + DatabaseConnection] +RelationData: TypeAlias = tuple[str, list[YoutubeVideo], DatabaseConnection] + + +PATH_DB_REMOTE = PathStr('remote_db.sql') +URL_MISSING_JSON = f'http://{YTPLOM_REMOTE}:{YTPLOM_PORT}/missing.json' + + +def back_and_forth(sync_func: Callable, + arg_twins: tuple[RowData, RowData], + shared: str + ) -> None: + """Apply sync_func twice, once reversing the two arg_twins items.""" + sync_func(arg_twins[0], arg_twins[1], shared) + sync_func(arg_twins[1], arg_twins[0], shared) + + +def sync_objects(row_data_1st: RowData, + row_data_2nd: RowData, + id_name: str + ) -> None: + """Ensure objects from row_data_1st are in row_data_2nd objects, DB.""" + name_1st, objs_1st, _ = row_data_1st + name_2nd, objs_2nd, db_2nd = row_data_2nd + for obj in [obj for obj in objs_1st if obj not in objs_2nd]: + print(f'SYNC: adding {name_1st}->{name_2nd} {getattr(obj, id_name)}') + obj.save(db_2nd) + + +def sync_relations(relation_1st: RelationData, + relation_2nd: RelationData, + yt_query_id: QueryId + ) -> None: + """Ensure query<->video relation_2nd in relation_1st DB.""" + name_1st, videos_for_query_1st, db_1st = relation_1st + name_2nd, videos_for_query_2nd, _ = relation_2nd + for yt_video_2nd in [v for v in videos_for_query_2nd + if v not in videos_for_query_1st]: + print(f'SYNC: adding {name_2nd}->{name_1st} mapping ' + f'of {yt_video_2nd.id_} to {yt_query_id}') + yt_video_2nd.save_to_query(db_1st, yt_query_id) + + +if __name__ == '__main__': + + ssh = SSHClient() + ssh.load_system_host_keys() + ssh.connect(YTPLOM_REMOTE) + scp = SCPClient(ssh.get_transport()) + + scp.get(PATH_DB, PATH_DB_REMOTE) + local_db = DatabaseConnection(PATH_DB) + remote_db = DatabaseConnection(PATH_DB_REMOTE) + + for cls in (QuotaLog, YoutubeQuery, YoutubeVideo, VideoFile): + objs_local, objs_remote = cls.get_all(local_db), cls.get_all(remote_db) + obj_arg_twins = (('local', objs_local, local_db), + ('remote', objs_remote, remote_db)) + back_and_forth(sync_objects, obj_arg_twins, + 'rel_path' if cls is VideoFile else 'id_') + for yt_video_local in YoutubeVideo.get_all(local_db): + for yt_query_local in YoutubeQuery.get_all_for_video( + local_db, yt_video_local.id_): + videos_for_query_remote = YoutubeVideo.get_all_for_query( + remote_db, yt_query_local.id_) + videos_for_query_local = YoutubeVideo.get_all_for_query( + local_db, yt_query_local.id_) + rel_arg_twins = (('local', videos_for_query_local, local_db), + ('remote', videos_for_query_remote, remote_db)) + back_and_forth(sync_relations, rel_arg_twins, yt_query_local.id_) + with urlopen(URL_MISSING_JSON) as response: + missing = json_loads(response.read()) + for path in missing: + print(f'SYNC: sending local->remote file {path}') + scp.put(path_join(PATH_DOWNLOADS, path), + path_join(PATH_DOWNLOADS, path)) + + local_db.commit_close() + remote_db.commit_close() + scp.put(PATH_DB_REMOTE, PATH_DB) + os_remove(PATH_DB_REMOTE) + + scp.close()