From: Christian Heller Date: Tue, 26 Nov 2024 14:15:28 +0000 (+0100) Subject: Update sync script, prefer rows with newer .last_update. X-Git-Url: https://plomlompom.com/repos/%7B%7B%20web_path%20%7D%7D/cards/%7B%7Bcard_id%7D%7D/%7B%7Bprefix%7D%7D/copy_structured?a=commitdiff_plain;p=ytplom Update sync script, prefer rows with newer .last_update. --- diff --git a/src/sync.py b/src/sync.py index 27980cd..1c7e522 100755 --- a/src/sync.py +++ b/src/sync.py @@ -2,7 +2,7 @@ """Script to sync between local and remote instances.""" # included libs -from typing import Callable, TypeAlias +from typing import Any, Callable from json import loads as json_loads from os import environ, remove as os_remove from os.path import join as path_join @@ -12,7 +12,7 @@ from paramiko import SSHClient # type: ignore from scp import SCPClient # type: ignore from ytplom.misc import ( PATH_DB, PATH_DOWNLOADS, PATH_TEMP, - DatabaseConnection, PathStr, QueryId, QuotaLog, VideoFile, + DatabaseConnection, PathStr, QuotaLog, VideoFile, YoutubeQuery, YoutubeVideo) @@ -20,90 +20,88 @@ from ytplom.misc import ( YTPLOM_REMOTE = environ.get('YTPLOM_REMOTE') YTPLOM_PORT = environ.get('YTPLOM_PORT') -# type definitions for mypy -RowData: TypeAlias = tuple[str, - list[QuotaLog] | list[YoutubeQuery] - | list[YoutubeVideo] | list[VideoFile], - DatabaseConnection] -RelationData: TypeAlias = tuple[str, list[YoutubeVideo], DatabaseConnection] - - PATH_DB_REMOTE = PathStr(path_join(PATH_TEMP, 'remote_db.sql')) -URL_MISSING_JSON = f'http://{YTPLOM_REMOTE}:{YTPLOM_PORT}/missing.json' +MISSING_API = PathStr('/missing.json') +ATTR_NAME_LAST_UPDATE = 'last_update' def back_and_forth(sync_func: Callable, - arg_twins: tuple[RowData, RowData], - shared: str + dbs: tuple[DatabaseConnection, DatabaseConnection], + shared: YoutubeVideo | tuple[Any, str] ) -> None: - """Apply sync_func twice, once reversing the two arg_twins items.""" - sync_func(arg_twins[0], arg_twins[1], shared) - sync_func(arg_twins[1], arg_twins[0], shared) + """Run sync_func on arg pairs + shared, and again with pairs switched.""" + host_names = 'local', 'remote' + sync_func(host_names, dbs, shared) + sync_func(*(tuple(reversed(list(t))) for t in (host_names, dbs)), shared) -def sync_objects(row_data_1st: RowData, - row_data_2nd: RowData, - id_name: str +def sync_objects(host_names: tuple[str, str], + dbs: tuple[DatabaseConnection, DatabaseConnection], + shared: tuple[Any, str] ) -> None: - """Ensure objects from row_data_1st are in row_data_2nd objects, DB.""" - name_1st, objs_1st, _ = row_data_1st - name_2nd, objs_2nd, db_2nd = row_data_2nd - for obj in [obj for obj in objs_1st if obj not in objs_2nd]: - print(f'SYNC: adding {name_1st}->{name_2nd} {getattr(obj, id_name)}') - obj.save(db_2nd) - - -def sync_relations(relation_1st: RelationData, - relation_2nd: RelationData, - yt_query_id: QueryId + """Equalize both DB's object collections; prefer newer states to older.""" + cls, id_name = shared + obj_colls = cls.get_all(dbs[0]), cls.get_all(dbs[1]) + for obj_0 in [obj for obj in obj_colls[0] if obj not in obj_colls[1]]: + id_ = getattr(obj_0, id_name) + sync_down = True + msg_verb = 'adding' + for obj_1 in [obj for obj in obj_colls[1] # other collection has … + if id_ == getattr(obj, id_name)]: # … obj, but non-equal + msg_verb = 'updating' + if hasattr(obj_0, ATTR_NAME_LAST_UPDATE): + last_update_0 = getattr(obj_0, ATTR_NAME_LAST_UPDATE) + last_update_1 = getattr(obj_1, ATTR_NAME_LAST_UPDATE) + sync_down = last_update_0 > last_update_1 + if not sync_down: + print(f'SYNC: {msg_verb} ' + f'{host_names[1]}->{host_names[0]} {id_}') + obj_1.save(dbs[0]) + if sync_down: + print(f'SYNC: {msg_verb} {host_names[0]}->{host_names[1]} {id_}') + obj_0.save(dbs[1]) + + +def sync_relations(host_names: tuple[str, str], + dbs: tuple[DatabaseConnection, DatabaseConnection], + yt_result: YoutubeVideo ) -> None: - """Ensure query<->video relation_2nd in relation_1st DB.""" - name_1st, videos_for_query_1st, db_1st = relation_1st - name_2nd, videos_for_query_2nd, _ = relation_2nd - for yt_video_2nd in [v for v in videos_for_query_2nd - if v not in videos_for_query_1st]: - print(f'SYNC: adding {name_2nd}->{name_1st} mapping ' - f'of {yt_video_2nd.id_} to {yt_query_id}') - yt_video_2nd.save_to_query(db_1st, yt_query_id) + """To dbs[1] add YT yt_video->yt_q_colls[0] mapping not in yt_q_colls[1]""" + yt_q_colls = tuple(YoutubeQuery.get_all_for_video(db, yt_result.id_) + for db in dbs) + for q in [q for q in yt_q_colls[0] if q not in yt_q_colls[1]]: + print(f'SYNC: adding {host_names[1]}->{host_names[0]} mapping ' + f'of result {yt_result.id_} to query {q.id_}') + yt_result.save_to_query(dbs[1], q.id_) -if __name__ == '__main__': - +def main(): + """Connect to remote, sync local+remote DBs, + downloads where missing.""" ssh = SSHClient() ssh.load_system_host_keys() ssh.connect(YTPLOM_REMOTE) scp = SCPClient(ssh.get_transport()) - scp.get(PATH_DB, PATH_DB_REMOTE) local_db = DatabaseConnection(PATH_DB) remote_db = DatabaseConnection(PATH_DB_REMOTE) - for cls in (QuotaLog, YoutubeQuery, YoutubeVideo, VideoFile): - objs_local, objs_remote = cls.get_all(local_db), cls.get_all(remote_db) - obj_arg_twins = (('local', objs_local, local_db), - ('remote', objs_remote, remote_db)) - back_and_forth(sync_objects, obj_arg_twins, - 'rel_path' if cls is VideoFile else 'id_') + back_and_forth(sync_objects, (local_db, remote_db), + (cls, 'rel_path' if cls is VideoFile else 'id_')) for yt_video_local in YoutubeVideo.get_all(local_db): - for yt_query_local in YoutubeQuery.get_all_for_video( - local_db, yt_video_local.id_): - videos_for_query_remote = YoutubeVideo.get_all_for_query( - remote_db, yt_query_local.id_) - videos_for_query_local = YoutubeVideo.get_all_for_query( - local_db, yt_query_local.id_) - rel_arg_twins = (('local', videos_for_query_local, local_db), - ('remote', videos_for_query_remote, remote_db)) - back_and_forth(sync_relations, rel_arg_twins, yt_query_local.id_) - with urlopen(URL_MISSING_JSON) as response: - missing = json_loads(response.read()) - for path in missing: - print(f'SYNC: sending local->remote file {path}') - scp.put(path_join(PATH_DOWNLOADS, path), - path_join(PATH_DOWNLOADS, path)) - + back_and_forth(sync_relations, (local_db, remote_db), yt_video_local) local_db.commit_close() remote_db.commit_close() scp.put(PATH_DB_REMOTE, PATH_DB) os_remove(PATH_DB_REMOTE) - + for host, direction, mover in ((YTPLOM_REMOTE, 'local->remote', scp.put), + ('localhost', 'remote->local', scp.get)): + with urlopen(f'http://{host}:{YTPLOM_PORT}{MISSING_API}') as response: + missing = json_loads(response.read()) + for path in (path_join(PATH_DOWNLOADS, path) for path in missing): + print(f'SYNC: sending {direction} file {path}') + mover(path, path) scp.close() + + +if __name__ == '__main__': + main()